TCH_REFACTOR' | 'ARCHITECTURE_RESEARCH' | 'DOCUMENT_SYNTHESIS';
payload: Record<string, unknown>;
constraints: {
maxTokens: number;
requiresLocalFS: boolean;
requiresShellAccess: boolean;
};
}
interface ExecutionResult {
taskId: string;
status: 'COMPLETED' | 'PARTIAL' | 'FAILED';
output: string;
tokenUsage: number;
}
### Step 2: Implement the Routing Logic
The router evaluates task constraints against tool capabilities. It enforces budget limits and prevents high-agency tools from executing low-complexity tasks.
```typescript
class AnthropicTaskRouter implements TaskRouter {
private tokenBudget: Map<string, number> = new Map();
async route(task: DevelopmentTask): Promise<ExecutionResult> {
const budget = this.tokenBudget.get(task.id) ?? 50000;
if (task.type === 'IDE_COMPLETION') {
return this.executeIDECompletion(task, budget);
}
if (task.type === 'BATCH_REFACTOR' && task.constraints.requiresShellAccess) {
return this.executeCLIBatch(task, budget);
}
if (task.type === 'ARCHITECTURE_RESEARCH') {
return this.executeWebResearch(task, budget);
}
if (task.type === 'DOCUMENT_SYNTHESIS' && task.constraints.requiresLocalFS) {
return this.executeDesktopSynthesis(task, budget);
}
throw new Error(`No compatible tool found for task type: ${task.type}`);
}
private async executeIDECompletion(task: DevelopmentTask, budget: number): Promise<ExecutionResult> {
// Routes to GitHub Copilot + claude-sonnet-4 via IDE extension API
// Flat subscription billing; token tracking is advisory
return { taskId: task.id, status: 'COMPLETED', output: 'Inline suggestions applied', tokenUsage: 0 };
}
private async executeCLIBatch(task: DevelopmentTask, budget: number): Promise<ExecutionResult> {
// Routes to Claude Code (@anthropic-ai/claude-code)
// Enforces token budget before spawning CLI process
if (budget < 10000) throw new Error('Insufficient budget for batch refactoring');
// CLI execution logic with output capture
return { taskId: task.id, status: 'COMPLETED', output: 'Refactored 14 controllers', tokenUsage: 42000 };
}
private async executeWebResearch(task: DevelopmentTask, budget: number): Promise<ExecutionResult> {
// Routes to Claude.ai Deep Research / Projects
// Subscription tier; context isolated per project workspace
return { taskId: task.id, status: 'COMPLETED', output: '2,400-word cited analysis', tokenUsage: 0 };
}
private async executeDesktopSynthesis(task: DevelopmentTask, budget: number): Promise<ExecutionResult> {
// Routes to Claude Cowork desktop agent
// Requires explicit folder grants; runs in isolated VM
return { taskId: task.id, status: 'COMPLETED', output: 'Generated Phase 3 readiness report', tokenUsage: 0 };
}
}
Step 3: Parallel Model Orchestration for Legacy Analysis
When analyzing large codebases, single-model execution becomes inefficient. The following pattern fans out analysis across different Claude model tiers, then synthesizes results. This replaces sequential processing with parallel execution, reducing wall-clock time by 60-70%.
async function analyzeLegacyCodebase(repoPath: string) {
const modelTasks = [
{ model: 'claude-opus-4-6', prompt: 'Generate service dependency graph and identify coupling hotspots' },
{ model: 'claude-sonnet-4-6', prompt: 'Scan for security vulnerabilities and deprecated API usage' },
{ model: 'claude-haiku-4-5', prompt: 'Inventory all external dependencies and version constraints' }
];
const results = await Promise.all(
modelTasks.map(async ({ model, prompt }) => {
// Execute via Anthropic API or CLI wrapper
const response = await callClaudeModel(model, prompt, repoPath);
return { model, data: response };
})
);
return synthesizeAnalysis(results);
}
function synthesizeAnalysis(results: Array<{ model: string; data: string }>): string {
// Merge outputs, resolve conflicts, generate unified report
// Opus handles architectural reasoning, Sonnet handles logic/security, Haiku handles fast parsing
return 'Consolidated legacy analysis report generated.';
}
Architecture Decisions and Rationale
- Separation of Billing Models: IDE tools use flat subscriptions, making them ideal for high-frequency, low-complexity tasks. CLI and API-driven tools use consumption-based billing, requiring explicit budget guards. The router enforces these boundaries to prevent unexpected expenditure.
- Context Isolation: Each tool layer maintains independent context windows. IDE chats reset per session, CLI agents read the full repository on invocation, web projects persist memory across conversations, and desktop agents operate within granted directories. Forcing context sharing across layers causes token waste and hallucination.
- Agency Gating: High-agency tools (CLI, Desktop) are restricted to tasks requiring filesystem or shell access. Low-agency tools (IDE, Web) handle suggestion and synthesis. This prevents autonomous agents from executing trivial tasks that drain tokens without adding value.
- Model Tier Routing: Parallel orchestration assigns tasks based on model strengths.
claude-opus-4-6 handles deep architectural reasoning, claude-sonnet-4-6 manages complex logic and security scanning, and claude-haiku-4-5 performs fast, repetitive parsing. This tiered approach optimizes both cost and accuracy.
Pitfall Guide
1. Context Window Overflow in IDE Chat
Explanation: Developers paste entire stack traces, configuration files, and error logs into IDE chat panels. The context window fills rapidly, causing the model to truncate earlier instructions and produce generic responses.
Fix: Implement context pruning. Extract only the relevant exception chain, configuration snippet, and surrounding code block. Use @workspace references instead of pasting full files. Set explicit token limits in your IDE extension settings.
2. Unbounded CLI Token Consumption
Explanation: Firing a CLI agent against a large monorepo without scoping the task causes it to traverse unnecessary directories, execute redundant tests, and consume excessive tokens.
Fix: Always scope CLI tasks with explicit file patterns or directory constraints. Use .gitignore-aware filtering. Implement a dry-run flag that outputs planned changes before execution. Monitor API usage dashboards and set hard budget alerts.
3. Treating Web Projects as Local Sandboxes
Explanation: Teams upload sensitive configuration files, database schemas, or PII to Claude.ai Projects, assuming the web interface provides the same security boundaries as local tools.
Fix: Never upload production secrets, customer data, or internal credentials to web interfaces. Use redacted schemas, mock data, or architecture diagrams. Treat web projects as design-time environments, not runtime sandboxes.
Explanation: CLI agents require outbound network access to Anthropic APIs and may execute shell commands that trigger firewall rules or proxy restrictions in corporate environments.
Fix: Validate IT security policies before deploying CLI tools. Configure explicit proxy settings, allowlist required API endpoints, and run agents within approved development containers. Use network monitoring to detect unexpected egress.
5. Over-Agentic Tasks in Low-Agency Interfaces
Explanation: Asking an IDE assistant to refactor multiple files, run test suites, or generate project scaffolding forces it to provide step-by-step instructions instead of executing changes. This wastes developer time and creates context fragmentation.
Fix: Match task complexity to tool agency. Use IDE tools for inline completion, code explanation, and single-file edits. Delegate multi-file operations, test execution, and scaffolding to CLI agents. Reserve web interfaces for research, architecture planning, and artifact generation.
6. Model Tier Misalignment
Explanation: Routing simple boilerplate generation to high-reasoning models or complex security analysis to fast, low-cost models degrades output quality and inflates costs.
Fix: Establish a model routing matrix. Use claude-haiku-4-5 for parsing, inventory, and repetitive transformations. Use claude-sonnet-4-6 for debugging, logic implementation, and security scanning. Reserve claude-opus-4-6 for architectural reviews, system design, and complex reasoning tasks.
7. Ignoring Context Window Limits in Long Sessions
Explanation: Extended conversations in web or desktop interfaces eventually exceed context limits, causing the model to forget earlier instructions or project constraints.
Fix: Implement session checkpointing. Summarize key decisions at regular intervals and inject them as system prompts in new sessions. Use project-level memory features to persist critical constraints. Avoid keeping single conversations open for more than 2-3 hours of continuous interaction.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Daily feature development, inline completion, single-file edits | GitHub Copilot + Claude Sonnet | Flat subscription, low latency, IDE-native context | Predictable, included in team plan |
| Multi-file refactoring, test suite execution, legacy analysis | Claude Code CLI | High agency, shell access, batch execution | Consumption-based; requires budget guards |
| Architecture planning, technology comparison, research synthesis | Claude.ai Projects / Deep Research | Persistent memory, web search, cited outputs | Subscription tier; no token tracking |
| Document generation, file organization, non-dev knowledge work | Claude Cowork Desktop | Local filesystem access, isolated VM, Office/PDF support | Subscription tier; requires explicit folder grants |
| Parallel codebase analysis across multiple dimensions | Tiered model orchestration (Opus/Sonnet/Haiku) | Optimizes reasoning depth vs. speed vs. cost | Mixed; Haiku reduces overall spend by 40-60% |
Configuration Template
{
"ai_workflow": {
"routing": {
"ide_completion": {
"tool": "github_copilot",
"model": "claude-sonnet-4",
"billing": "flat_subscription",
"context_scope": "open_buffers"
},
"batch_refactor": {
"tool": "claude_code",
"model": "claude-sonnet-4-6",
"billing": "api_consumption",
"max_tokens": 75000,
"shell_access": true
},
"architecture_research": {
"tool": "claude_ai",
"features": ["projects", "deep_research"],
"billing": "subscription_tier",
"context_scope": "project_workspace"
},
"document_synthesis": {
"tool": "claude_cowork",
"billing": "subscription_tier",
"local_fs_access": true,
"isolated_vm": true
}
},
"security": {
"pii_allowed": false,
"network_egress": "allowlist_only",
"folder_grants": "explicit_opt_in"
},
"budgeting": {
"cli_monthly_cap": 500,
"alert_threshold_percent": 80,
"dry_run_enabled": true
}
}
}
Quick Start Guide
- Install and Authenticate: Deploy GitHub Copilot with Claude Sonnet routing in your IDE. Install
@anthropic-ai/claude-code globally via npm and authenticate with your Anthropic API key. Verify network connectivity and proxy settings.
- Configure Routing Rules: Create a team routing matrix that maps task types to tool layers. Document budget limits, context pruning guidelines, and security constraints in your engineering runbook.
- Initialize Project Workspaces: Set up Claude.ai Projects for architecture planning and research. Upload redacted schemas, decision logs, and migration plans. Configure explicit folder grants for Claude Cowork if handling local documents.
- Execute First Batch Task: Run a scoped CLI refactoring task with a dry-run flag. Monitor token consumption, verify output accuracy, and adjust budget thresholds. Iterate until the workflow aligns with team velocity and cost targets.