tch ? match.guard(ctx) : false;
}
apply(to: AgentState, ctx: LoopContext): void {
if (!this.canTransition(to, ctx)) throw new Error(Invalid transition: ${this.current} -> ${to});
const match = this.transitions.find(t => t.from === this.current && t.to === to)!;
this.current = to;
match.onEnter(ctx);
}
get current(): AgentState { return this.current; }
}
**Why this matters:** Explicit state transitions prevent the model from attempting unauthorized actions. Guards enforce business rules before execution, and `onEnter` hooks trigger side effects like logging, notifications, or context pruning.
### Step 2: Enforce Bounded Iterations and Stop Conditions
The loop must terminate deterministically. Implement a hard iteration cap alongside semantic stop conditions: final answer, blocked state, escalation trigger, or max turns.
```typescript
interface LoopConfig {
maxIterations: number;
stopConditions: StopCondition[];
contextBudget: number; // max tokens to retain in prompt
}
class LoopController {
private iteration = 0;
private shouldStop = false;
constructor(private config: LoopConfig) {}
checkStop(ctx: LoopContext): boolean {
if (this.iteration >= this.config.maxIterations) return true;
return this.config.stopConditions.some(c => c.evaluate(ctx));
}
increment(): void { this.iteration++; }
get exhausted(): boolean { return this.iteration >= this.config.maxIterations; }
}
Why this matters: Unbounded loops are the primary driver of production cost overruns and compounding errors. A hard cap protects against runaway execution, while semantic conditions allow graceful exits for blocked or escalated tasks.
Step 3: Curate Context Instead of Dumping It
The context window is a finite engineering resource. Raw tool outputs, intermediate reasoning, and stale retrieval chunks must be pruned before each turn. Implement a context curator that retains only decision-relevant data.
class ContextPruner {
prune(rawMessages: Message[], budget: number): Message[] {
const essential = rawMessages.filter(m => m.role === 'system' || m.role === 'user');
const toolOutputs = rawMessages.filter(m => m.role === 'tool');
const reasoning = rawMessages.filter(m => m.role === 'assistant' && m.metadata?.isReasoning);
// Retain latest tool output per tool call, summarize older ones
const prunedTools = this.collapseToolOutputs(toolOutputs);
const prunedReasoning = reasoning.slice(-2); // Keep last 2 reasoning turns
const assembled = [...essential, ...prunedReasoning, ...prunedTools];
return this.trimToBudget(assembled, budget);
}
private collapseToolOutputs(outputs: Message[]): Message[] {
const latest = new Map<string, Message>();
outputs.forEach(m => latest.set(m.toolCallId, m));
return Array.from(latest.values());
}
private trimToBudget(messages: Message[], budget: number): Message[] {
// Token estimation and truncation logic here
return messages; // Simplified for brevity
}
}
Why this matters: Context saturation degrades model accuracy and increases latency. Pruning preserves decision-critical data while discarding exhaust, maintaining reasoning quality across multi-turn executions.
The model does not read source code or API documentation. It reads the description string attached to each tool registration. That description is the decision boundary. Omit failure modes, usage constraints, or expected outputs, and the model will call tools incorrectly or retry on permanent errors.
interface ToolDefinition {
name: string;
description: string; // Decision interface: when to use, when to avoid, expected output
parameters: Record<string, unknown>;
execute: (params: unknown) => Promise<ToolResult>;
}
const orderLookupTool: ToolDefinition = {
name: 'lookup_order_status',
description: 'Use when the user references an order ID and needs fulfillment status. Returns shipping carrier, tracking, and delivery estimate. Do not use for refund eligibility or cancellation authority. Fails with 404 if order ID format is invalid.',
parameters: { orderId: { type: 'string' } },
execute: async (params) => { /* implementation */ }
};
Why this matters: Tool descriptions directly shape the decide step. Well-scoped descriptions reduce hallucinated tool calls, prevent authority violations, and make failure modes predictable.
Step 5: Execute the Loop with Turn-Level Tracing
Combine the components into a controller that runs the observe β decide β act β check β repeat cycle, logging each turn for auditability.
class AgentOrchestrator {
constructor(
private state: StateRegistry,
private controller: LoopController,
private pruner: ContextPruner,
private tools: ToolDefinition[],
private llm: LLMClient
) {}
async run(initialPrompt: string): Promise<ExecutionTrace> {
const trace: ExecutionTrace = { turns: [] };
let context: Message[] = [{ role: 'user', content: initialPrompt }];
while (!this.controller.checkStop({ state: this.state.current, context })) {
this.controller.increment();
const turnStart = Date.now();
// Observe: curate context
const observed = this.pruner.prune(context, 8000);
// Decide: LLM selects next action
const decision = await this.llm.decide(observed, this.tools);
// Act: execute tool or generate response
const actionResult = decision.toolCall
? await this.executeTool(decision.toolCall)
: { content: decision.response, type: 'final' };
// Check: validate result and update state
this.validateActionResult(actionResult, this.state.current);
this.state.apply(this.deriveNextState(actionResult, this.state.current), { context });
// Record turn
trace.turns.push({
iteration: this.controller.iteration,
state: this.state.current,
decision: decision.reasoning,
toolCall: decision.toolCall,
result: actionResult,
latency: Date.now() - turnStart
});
context.push({ role: 'assistant', content: decision.response, metadata: { isReasoning: true } });
if (actionResult.type === 'tool') context.push({ role: 'tool', content: actionResult.content, toolCallId: decision.toolCall.id });
}
return trace;
}
}
Architecture Rationale: This design separates concerns cleanly. State transitions are guarded, iterations are bounded, context is pruned, and every turn is traced. The loop order enforces safety: observation precedes decision, execution precedes validation, and repetition only occurs when conditions permit. This structure eliminates the "script with hallucination" anti-pattern by making each step deterministic and auditable.
Pitfall Guide
1. Implicit State Drift
Explanation: Relying on the LLM to remember the current phase or pending dependencies. Natural language memory degrades across turns, causing the agent to repeat actions or violate authority boundaries.
Fix: Externalize state into a typed registry with explicit transitions. Pass only the current state tag and pending dependencies to the prompt. Use onEnter hooks to trigger side effects.
2. Unbounded Iterations
Explanation: No hard cap on loop turns. The agent retries failed actions, re-plans indefinitely, or enters recursive tool-calling cycles, multiplying token costs and potential damage.
Fix: Implement a configurable maxIterations limit. Log when the cap is hit and return a structured summary of attempted actions. Pair with exponential backoff for transient tool failures.
3. Context Window Saturation
Explanation: Appending every tool output, retrieval chunk, and reasoning step to the prompt. The window fills with exhaust, pushing critical instructions out of attention range and degrading accuracy.
Fix: Implement semantic pruning. Retain system prompts, user requests, latest tool outputs, and recent reasoning. Summarize or discard older intermediate data. Enforce a token budget per turn.
Explanation: Writing vague or purely technical descriptions. The model lacks decision boundaries, leading to incorrect tool selection, retries on permanent errors, or authority violations.
Fix: Write decision-oriented descriptions. Include: when to use, when to avoid, expected output format, and failure modes. Treat the description as the contract between the application and the model.
5. Upfront Planning Fallacy
Explanation: Generating a complete step sequence before execution. Plans become stale immediately after the first tool response, causing the agent to pursue unsafe or irrelevant paths.
Fix: Adopt turn-by-turn ReAct reasoning. Let the model plan only the next action based on current observations. Log reasoning per turn for debugging, but never treat it as a fixed script.
6. Silent Stop Conditions
Explanation: The agent declares completion while side effects remain incomplete or partially executed. This occurs when the model confuses "intent expressed" with "action verified."
Fix: Implement post-action verification. Check tool return codes, confirm idempotency, and validate business rules before transitioning to COMPLETE. Require explicit confirmation for destructive operations.
7. Missing Escalation Paths
Explanation: The agent continues attempting tasks outside its authority or capability, wasting tokens and frustrating users. No designed handoff mechanism exists.
Fix: Define authority boundaries in state guards. When a task exceeds scope, transition to ESCALATED, summarize findings, and route to a human or specialized system. Log escalation triggers for process improvement.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| High-volume customer support | Bounded loop (max 5 turns) + context pruning + auto-escalation | Prevents runaway tokens; ensures predictable SLA | Lowers cost by 60-80% vs unbounded |
| Complex multi-step workflow | Explicit state machine + turn-by-turn ReAct + post-action verification | Guarantees correct sequencing; prevents partial executions | Moderate increase in token spend; higher success rate |
| Research/analysis agent | Higher iteration cap (10-15) + retrieval summarization + reasoning trace | Allows deep exploration while maintaining auditability | Higher token cost; offset by reduced manual review |
| Destructive operations (deletes, refunds) | Strict state guards + human confirmation step + idempotency checks | Prevents irreversible errors; enforces compliance | Adds latency; eliminates financial risk |
Configuration Template
const productionLoopConfig: LoopConfig = {
maxIterations: 6,
contextBudget: 8000,
stopConditions: [
{ name: 'finalAnswer', evaluate: (ctx) => ctx.state === 'COMPLETE' },
{ name: 'blocked', evaluate: (ctx) => ctx.state === 'BLOCKED' },
{ name: 'escalated', evaluate: (ctx) => ctx.state === 'ESCALATED' },
{ name: 'maxTurns', evaluate: (ctx) => false } // Handled by controller
]
};
const stateTransitions: StateTransition[] = [
{ from: 'OPEN', to: 'AWAITING_INPUT', guard: () => true, onEnter: (ctx) => ctx.log('State: Awaiting user input') },
{ from: 'AWAITING_INPUT', to: 'EXECUTING', guard: (ctx) => ctx.hasRequiredData(), onEnter: (ctx) => ctx.log('State: Executing task') },
{ from: 'EXECUTING', to: 'COMPLETE', guard: (ctx) => ctx.verifySideEffects(), onEnter: (ctx) => ctx.log('State: Task complete') },
{ from: 'EXECUTING', to: 'BLOCKED', guard: () => true, onEnter: (ctx) => ctx.log('State: Blocked - missing dependency') },
{ from: 'EXECUTING', to: 'ESCALATED', guard: (ctx) => ctx.exceedsAuthority(), onEnter: (ctx) => ctx.routeToHuman() }
];
Quick Start Guide
- Initialize the orchestrator: Instantiate
StateRegistry, LoopController, ContextPruner, and register your tools with decision-oriented descriptions.
- Define state transitions: Map your business workflow to explicit states and guards. Attach
onEnter hooks for logging, notifications, or context pruning.
- Configure bounds and pruning: Set
maxIterations based on task complexity. Configure contextBudget to 60-70% of your model's window to leave room for reasoning.
- Run and trace: Execute the loop with an initial prompt. Inspect the
ExecutionTrace for state transitions, tool calls, and latency. Adjust guards and pruning rules based on observed behavior.
- Deploy with telemetry: Ship turn-level logs, stop-condition triggers, and token consumption metrics to your observability stack. Set alerts for iteration cap breaches and escalation spikes.