ntics:** The replay engine is strictly observational. It never modifies the source log or attempts to reconstruct mutable state. This guarantees that forensic analysis does not introduce side effects or alter the ground truth.
4. Tolerant Parsing: Production logs may contain partial writes or malformed lines at the tail. The loader skips invalid lines with warnings rather than failing hard, ensuring that incomplete runs can still be analyzed.
Implementation Example
The following TypeScript implementation demonstrates the Trace Replay Pattern. This example uses distinct interfaces and variable names to illustrate the pattern independently of any specific library.
// core/types.ts
export interface TraceEvent {
kind: string;
timestamp: Date;
name?: string;
data: Record<string, unknown>;
durationMs?: number;
turn?: number;
correlationId?: string;
}
export interface TraceFilter {
byKind(kind: string): TraceFilter;
where(predicate: (event: TraceEvent) => boolean): TraceFilter;
first(): TraceEvent | null;
last(): TraceEvent | null;
toArray(): TraceEvent[];
}
export interface TraceNavigator {
step(): TraceEvent | null;
retreat(): TraceEvent | null;
seek(index: number): TraceEvent;
peek(): TraceEvent | null;
remaining(): number;
reset(): void;
}
// core/loader.ts
import * as fs from 'fs';
export class ExecutionLog {
private events: TraceEvent[];
private constructor(events: TraceEvent[]) {
this.events = events;
}
static loadFromJsonl(filePath: string): ExecutionLog {
const rawContent = fs.readFileSync(filePath, 'utf-8');
const lines = rawContent.split('\n').filter(line => line.trim().length > 0);
const events: TraceEvent[] = [];
for (const line of lines) {
try {
const payload = JSON.parse(line);
const event: TraceEvent = {
kind: payload.kind,
timestamp: new Date(payload.timestamp),
name: payload.name || undefined,
data: payload.data || {},
durationMs: payload.duration_ms || undefined,
turn: payload.turn || undefined,
correlationId: payload.correlation_id || payload.request_id || undefined,
};
// Derive duration if correlation_id exists and duration_ms is missing
if (event.correlationId && !event.durationMs) {
// Duration calculation logic would match events by correlationId
// For brevity, this is a placeholder for the matching algorithm
event.durationMs = ExecutionLog.computeDuration(events, event);
}
events.push(event);
} catch (err) {
console.warn(`Skipping malformed line: ${line.substring(0, 50)}...`);
}
}
return new ExecutionLog(events);
}
private static computeDuration(events: TraceEvent[], current: TraceEvent): number | undefined {
// Simplified duration logic: find start event with same correlationId
const startEvent = events.find(e =>
e.correlationId === current.correlationId &&
e.timestamp < current.timestamp
);
if (startEvent) {
return current.timestamp.getTime() - startEvent.timestamp.getTime();
}
return undefined;
}
get length(): number {
return this.events.length;
}
filter(): TraceFilter {
return new DefaultTraceFilter(this.events);
}
navigate(): TraceNavigator {
return new DefaultTraceNavigator(this.events);
}
}
// core/filter.ts
class DefaultTraceFilter implements TraceFilter {
private source: TraceEvent[];
constructor(source: TraceEvent[]) {
this.source = source;
}
byKind(kind: string): TraceFilter {
const filtered = this.source.filter(e => e.kind === kind);
return new DefaultTraceFilter(filtered);
}
where(predicate: (event: TraceEvent) => boolean): TraceFilter {
const filtered = this.source.filter(predicate);
return new DefaultTraceFilter(filtered);
}
first(): TraceEvent | null {
return this.source.length > 0 ? this.source[0] : null;
}
last(): TraceEvent | null {
return this.source.length > 0 ? this.source[this.source.length - 1] : null;
}
toArray(): TraceEvent[] {
return [...this.source];
}
}
// core/navigator.ts
class DefaultTraceNavigator implements TraceNavigator {
private events: TraceEvent[];
private cursor: number;
constructor(events: TraceEvent[]) {
this.events = events;
this.cursor = 0;
}
step(): TraceEvent | null {
if (this.cursor < this.events.length) {
return this.events[this.cursor++];
}
return null;
}
retreat(): TraceEvent | null {
if (this.cursor > 0) {
return this.events[--this.cursor];
}
return null;
}
seek(index: number): TraceEvent {
if (index < 0 || index >= this.events.length) {
throw new RangeError(`Index ${index} out of bounds`);
}
this.cursor = index + 1;
return this.events[index];
}
peek(): TraceEvent | null {
if (this.cursor < this.events.length) {
return this.events[this.cursor];
}
return null;
}
remaining(): number {
return this.events.length - this.cursor;
}
reset(): void {
this.cursor = 0;
}
}
Usage Patterns
Filtering Slow Tool Calls:
const log = ExecutionLog.loadFromJsonl('agent-run-2026-05-24.jsonl');
const slowCalls = log.filter()
.byKind('tool_call')
.where(e => (e.durationMs || 0) > 5000);
console.log(`Found ${slowCalls.toArray().length} slow tool calls.`);
Interactive Debugging Session:
const navigator = log.navigate();
// Step through events until an error is found
let event = navigator.step();
while (event) {
if (event.kind === 'error') {
console.log(`Error at turn ${event.turn}:`, event.data);
break;
}
event = navigator.step();
}
// Jump to a specific event for context
navigator.seek(42);
console.log('Context around error:', navigator.peek());
Programmatic Replay for Evaluation:
const llmTurns = log.filter().byKind('llm_call').toArray();
for (const turn of llmTurns) {
const messageCount = (turn.data.messages as any[]).length;
console.log(`Turn ${turn.turn}: ${messageCount} messages, model: ${turn.data.model}`);
}
Pitfall Guide
When implementing or using a trace replay system, developers often encounter specific pitfalls that can undermine debugging efforts or degrade performance.
| Pitfall | Explanation | Fix |
|---|
| The State Illusion | Assuming the trace contains enough information to reconstruct the agent's internal memory or variable state. Traces record events, not state snapshots. | Emit explicit state-snapshot events if state reconstruction is required. Use companion libraries that capture state at key intervals. |
| The Memory Trap | Loading massive trace files (e.g., millions of events) into memory at once. This causes OOM errors in constrained environments. | Implement chunked loading or streaming filters for large files. Use fromDirectory patterns to process runs incrementally. |
| Duration Blindness | Failing to emit correlation_id or request_id fields, resulting in missing latency data. Without these IDs, the engine cannot match request/response pairs. | Enforce correlation ID injection in the logging middleware. Validate that all async operations emit matching IDs. |
| Live vs. Post Confusion | Attempting to use the replay engine for real-time monitoring. Replay engines are designed for post-hoc analysis and load the file at startup. | Use a streaming logger or live viewer for real-time monitoring. Reserve replay for historical analysis and eval harnesses. |
| Schema Rigidity | Designing the replay engine to require a strict schema. This breaks when logging layers evolve or when integrating with third-party agents. | Adopt a format-light contract. Require only kind and timestamp. Treat all other fields as optional metadata. |
| Malformed Line Crashes | The loader fails on a single bad JSON line, preventing analysis of the rest of the trace. Production logs often have partial writes. | Implement tolerant parsing. Skip malformed lines with warnings and continue loading valid events. |
| Ignoring Turn Context | Analyzing events in isolation without considering the agent's turn structure. This leads to misinterpretation of sequential dependencies. | Ensure the logging layer emits turn numbers. Use turn-based filtering to isolate specific reasoning cycles. |
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Single Run Debugging | Interactive Navigator | Allows stepping, retreating, and seeking through events to pinpoint failures. | Low (Dev time savings) |
| Batch Performance Audit | Filter & Aggregate | Programmatic filtering identifies slow patterns across hundreds of runs. | Medium (Compute for batch processing) |
| Live Monitoring | Streaming Logger | Replay is post-hoc; streaming provides real-time visibility. | High (Infrastructure for streaming) |
| State Reconstruction | State Snapshot Module | Replay shows events; snapshots capture memory. Use both for full forensics. | Medium (Storage for snapshots) |
| A/B Testing Prompts | Trace Diffing | Compare event sequences between runs to measure prompt impact. | Low (Analysis only) |
Configuration Template
Sample JSONL Trace Entry:
{
"kind": "tool_call",
"timestamp": "2026-05-24T10:15:30.123Z",
"name": "search_database",
"data": {
"query": "customer_order_status",
"params": {"order_id": "ORD-9921"}
},
"correlation_id": "req_abc123",
"turn": 4
}
Loader Configuration:
const logConfig = {
requiredFields: ['kind', 'timestamp'],
optionalFields: ['name', 'data', 'duration_ms', 'turn', 'correlation_id'],
onMalformedLine: 'warn_and_skip',
enableDurationDerivation: true,
};
Quick Start Guide
-
Initialize Replay Engine:
import { ExecutionLog } from './core/loader';
const log = ExecutionLog.loadFromJsonl('trace.jsonl');
-
Inspect Event Count:
console.log(`Total events: ${log.length}`);
-
Filter for Errors:
const errors = log.filter().byKind('error').toArray();
console.log(`Found ${errors.length} errors.`);
-
Navigate Interactively:
const nav = log.navigate();
const firstEvent = nav.step();
console.log('First event:', firstEvent?.kind);
-
Export Analysis:
const slowTools = log.filter()
.byKind('tool_call')
.where(e => (e.durationMs || 0) > 2000)
.toArray();
console.log(JSON.stringify(slowTools, null, 2));
Conclusion
The Trace Replay Pattern transforms agent debugging from a fragile, ad-hoc process into a deterministic, reusable workflow. By treating execution logs as navigable data structures, teams can rapidly diagnose failures, analyze performance, and validate agent behavior without writing custom parsing code. The pattern's format-light design ensures resilience against schema drift, while its read-only semantics preserve the integrity of the ground truth. Implementing this pattern is essential for any production system relying on complex, stochastic agent workflows.