me: string;
description: string;
parameters: Record<string, unknown>;
execute: (args: Record<string, unknown>) => Promise<string>;
}
class CapabilityRegistry {
private tools: Map<string, ToolDefinition> = new Map();
register(tool: ToolDefinition): void {
if (this.tools.has(tool.name)) {
throw new Error(Tool ${tool.name} is already registered.);
}
this.tools.set(tool.name, tool);
}
async invoke(name: string, args: Record<string, unknown>): Promise<string> {
const tool = this.tools.get(name);
if (!tool) throw new Error(Unknown tool: ${name});
try {
return await tool.execute(args);
} catch (err) {
return EXECUTION_ERROR: ${err instanceof Error ? err.message : 'Unknown failure'};
}
}
getSchemaArray(): Array<Record<string, unknown>> {
return Array.from(this.tools.values()).map(t => ({
name: t.name,
description: t.description,
parameters: t.parameters
}));
}
}
### Step 2: Implement Context Routing & Window Management
Context is the agent's entire operational boundary. Feeding raw conversation history into every iteration causes token bloat and attention degradation. Implement a sliding window with priority routing.
```typescript
class ContextRouter {
private maxTokens: number;
private history: Array<{ role: 'user' | 'assistant' | 'tool'; content: string }> = [];
constructor(maxTokens: number = 8000) {
this.maxTokens = maxTokens;
}
append(entry: { role: 'user' | 'assistant' | 'tool'; content: string }): void {
this.history.push(entry);
this.prune();
}
getPayload(): Array<{ role: 'user' | 'assistant' | 'tool'; content: string }> {
return this.history;
}
private prune(): void {
// Keep system instructions, first user prompt, and last N turns
// Real implementation uses token estimation (e.g., tiktoken)
const keepCount = 6;
if (this.history.length > keepCount + 2) {
const system = this.history[0];
const recent = this.history.slice(-keepCount);
this.history = [system, ...recent];
}
}
}
Step 3: Build the ReAct Execution Loop
The loop handles reasoning, tool dispatch, observation, and reflection. It enforces iteration limits and integrates guardrails.
interface LoopConfig {
maxIterations: number;
modelProvider: (messages: Array<Record<string, unknown>>) => Promise<{ content: string; toolCall?: { name: string; args: Record<string, unknown> } }>;
registry: CapabilityRegistry;
context: ContextRouter;
}
export class ReActOrchestrator {
private config: LoopConfig;
constructor(config: LoopConfig) {
this.config = config;
}
async run(initialPrompt: string): Promise<string> {
this.config.context.append({ role: 'user', content: initialPrompt });
let iteration = 0;
while (iteration < this.config.maxIterations) {
iteration++;
const response = await this.config.modelProvider(this.config.context.getPayload());
if (response.toolCall) {
const observation = await this.config.registry.invoke(response.toolCall.name, response.toolCall.args);
this.config.context.append({ role: 'tool', content: observation });
continue;
}
// Reflection step: validate output before returning
const validated = await this.reflectAndRefine(response.content);
return validated;
}
throw new Error('Loop exceeded maximum iterations without resolution.');
}
private async reflectAndRefine(output: string): Promise<string> {
// Lightweight self-critique pass
const critiquePrompt = `Review the following output for factual consistency, schema compliance, and completeness. Return only the corrected version.\n\nOutput: ${output}`;
this.config.context.append({ role: 'user', content: critiquePrompt });
const refined = await this.config.modelProvider(this.config.context.getPayload());
return refined.content || output;
}
}
Architecture Decisions & Rationale
- Explicit Tool Registry: Decouples model reasoning from execution. Prevents the LLM from hallucinating function names or bypassing validation.
- Context Pruning: Maintains attention quality by removing stale observations. Token bloat degrades reasoning accuracy faster than model capability limits.
- Bounded Iteration: Hard caps prevent infinite loops and cost spikes. Agents must fail fast or escalate, not loop indefinitely.
- Reflection Gate: A lightweight critique pass catches structural errors before they reach downstream systems. It trades ~15% latency for ~20% accuracy gains on complex outputs.
- Error Mapping in Tools: Tools return structured error strings instead of throwing. This keeps the loop intact and allows the model to adapt its next action based on failure context.
Pitfall Guide
1. Context Window Saturation
Explanation: Feeding every tool output, reflection pass, and intermediate thought into the prompt causes attention fragmentation. The model starts ignoring recent instructions or hallucinating missing data.
Fix: Implement token-aware pruning. Retain system directives, the original task, and the last 3-4 interaction turns. Offload long-term knowledge to vector retrieval or structured memory stores.
2. Unbounded Loop Execution
Explanation: Without iteration caps or cost budgets, agents can loop through tool calls indefinitely, especially when encountering ambiguous errors or conflicting observations.
Fix: Enforce maxIterations at the orchestrator level. Implement token/cost tracking per loop. Add early-exit conditions when confidence scores drop or repeated identical tool calls are detected.
Explanation: Vague descriptions or missing parameter constraints cause the model to select the wrong tool or pass malformed arguments. This breaks the observation step and corrupts context.
Fix: Use strict JSON schemas with explicit required fields. Add usage conditions in descriptions (e.g., "Use only when user provides a valid email format"). Map all execution errors to standardized strings the model can parse.
4. Silent Context Corruption
Explanation: Malformed tool outputs, HTML fragments, or unescaped characters pollute the conversation history. Subsequent reasoning passes inherit the corruption, leading to cascading failures.
Fix: Sanitize all tool outputs before appending to context. Validate against expected formats. Implement a fallback handler that replaces corrupted data with a placeholder and logs the incident.
5. Over-Engineering Multi-Agent Handoffs
Explanation: Teams split tasks into multiple agents before validating single-agent performance. This introduces coordination overhead, state synchronization bugs, and compounding latency.
Fix: Start with a single ReAct loop. Measure where context density or reasoning depth fails. Split only when a single context window cannot hold the necessary state for a subtask. Use sequential handoffs before attempting parallel execution.
Explanation: Agent loops retry failed steps. If tools perform stateful operations (e.g., database writes, API payments), retries cause duplicate actions and data corruption.
Fix: Implement idempotency keys for all stateful tools. Track executed tool calls per session. Support dry-run modes for validation passes before committing changes.
7. Missing Observability Traces
Explanation: Black-box loops make debugging impossible. Teams cannot determine whether failures stem from reasoning errors, tool misfires, or context degradation.
Fix: Emit structured logs per iteration: token usage, tool selection, observation length, and reflection outcome. Integrate span-based tracing (OpenTelemetry) to visualize execution paths and bottlenecks.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Simple Q&A or single-step lookup | Single-Prompt LLM | Low complexity, deterministic output, minimal overhead | Lowest |
| Multi-step research or code generation | ReAct Loop Agent | Iterative reasoning improves accuracy, manageable latency | Moderate |
| High-volume customer support routing | Workflow Engine + LLM Router | Predictable paths, parallel execution, strict SLAs | Low-Moderate |
| Complex compliance or financial analysis | Multi-Agent Orchestrator | Specialized roles reduce context pollution, higher accuracy | Highest |
| Real-time interactive debugging | ReAct Loop with Human Checkpoints | Balances autonomy with safety, prevents irreversible errors | Moderate |
Configuration Template
import { ReActOrchestrator, CapabilityRegistry, ContextRouter } from './agent-core';
const registry = new CapabilityRegistry();
// Example tool registration
registry.register({
name: 'fetch_document',
description: 'Retrieves internal documentation by ID. Use only when user provides a valid doc ID.',
parameters: {
type: 'object',
properties: {
docId: { type: 'string', pattern: '^DOC-[0-9]{4}$' }
},
required: ['docId']
},
execute: async (args) => {
// Simulated fetch with error handling
const id = args.docId as string;
if (!id.startsWith('DOC-')) return 'EXECUTION_ERROR: Invalid document ID format';
return `Content for ${id}: [truncated for brevity]`;
}
});
const orchestrator = new ReActOrchestrator({
maxIterations: 5,
context: new ContextRouter(6000),
registry,
modelProvider: async (messages) => {
// Replace with actual API call (OpenAI, Anthropic, etc.)
// Return structured response with optional toolCall
return { content: 'Processed successfully.' };
}
});
export { orchestrator };
Quick Start Guide
- Initialize the registry: Define your tools with strict schemas, descriptions, and execution handlers. Register them in the
CapabilityRegistry.
- Configure the orchestrator: Set iteration limits, context window size, and inject your model provider. Ensure the provider returns structured tool calls when applicable.
- Attach observability: Wrap the
run() method with logging/tracing. Capture iteration count, token usage, and tool selection per execution.
- Execute with guardrails: Pass the initial prompt. Monitor the loop for early exits or reflection triggers. Validate the final output against expected schemas before downstream consumption.