ushes the new user message, estimates tokens, trims if necessary, calls the model, logs the response, and saves a checkpoint at defined intervals.
import { Anthropic } from "@anthropic-ai/sdk";
import { promises as fs } from "fs";
import path from "path";
import { createHash } from "crypto";
// Domain interfaces
interface TurnRecord {
role: "user" | "assistant" | "system";
content: string;
timestamp: number;
}
interface SessionSnapshot {
turnIndex: number;
toolState: Record<string, unknown>;
lastAssistantText: string;
checkpointTime: number;
}
interface ContextWindowConfig {
maxTokens: number;
safetyBuffer: number;
}
// Token estimation utility (model-specific approximation)
function estimateTokens(text: string): number {
// Rough approximation: ~4 chars per token for English
return Math.ceil(text.length / 4);
}
// Context window manager
class ConversationWindow {
private buffer: TurnRecord[] = [];
private currentTokens: number = 0;
constructor(private config: ContextWindowConfig) {}
push(record: TurnRecord): void {
const tokens = estimateTokens(record.content);
this.currentTokens += tokens;
this.buffer.push(record);
this.trim();
}
getMessages(): Array<{ role: string; content: string }> {
return this.buffer.map(t => ({ role: t.role, content: t.content }));
}
private trim(): void {
const limit = this.config.maxTokens - this.config.safetyBuffer;
while (this.currentTokens > limit && this.buffer.length > 1) {
const removed = this.buffer.shift();
if (removed) {
this.currentTokens -= estimateTokens(removed.content);
}
}
}
}
// JSONL turn logger
class TurnLogger {
private filePath: string;
constructor(sessionDir: string) {
this.filePath = path.join(sessionDir, "turns.jsonl");
}
async append(record: TurnRecord): Promise<void> {
const line = JSON.stringify(record) + "\n";
await fs.appendFile(this.filePath, line, "utf-8");
}
async loadAll(): Promise<TurnRecord[]> {
try {
const raw = await fs.readFile(this.filePath, "utf-8");
return raw
.split("\n")
.filter(Boolean)
.map(line => JSON.parse(line) as TurnRecord);
} catch {
return [];
}
}
}
// Checkpoint store
class CheckpointStore {
private filePath: string;
constructor(sessionDir: string) {
this.filePath = path.join(sessionDir, "checkpoint.json");
}
async save(snapshot: SessionSnapshot): Promise<void> {
await fs.writeFile(this.filePath, JSON.stringify(snapshot, null, 2), "utf-8");
}
async load(): Promise<SessionSnapshot | null> {
try {
const raw = await fs.readFile(this.filePath, "utf-8");
return JSON.parse(raw) as SessionSnapshot;
} catch {
return null;
}
}
}
// Orchestrator
class SessionOrchestrator {
private window: ConversationWindow;
private logger: TurnLogger;
private store: CheckpointStore;
private client: Anthropic;
private turnCounter: number = 0;
private toolState: Record<string, unknown> = {};
constructor(
private sessionId: string,
private checkpointInterval: number = 3
) {
const sessionDir = path.join("/tmp/agent-sessions", sessionId);
this.window = new ConversationWindow({ maxTokens: 6000, safetyBuffer: 256 });
this.logger = new TurnLogger(sessionDir);
this.store = new CheckpointStore(sessionDir);
this.client = new Anthropic();
}
async initialize(): Promise<void> {
const history = await this.logger.loadAll();
for (const turn of history) {
this.window.push(turn);
}
const snapshot = await this.store.load();
if (snapshot) {
this.turnCounter = snapshot.turnIndex;
this.toolState = snapshot.toolState;
}
}
async processTurn(userMessage: string): Promise<string> {
await this.initialize();
const userTurn: TurnRecord = {
role: "user",
content: userMessage,
timestamp: Date.now(),
};
this.window.push(userTurn);
await this.logger.append(userTurn);
const response = await this.client.messages.create({
model: "claude-sonnet-4-6",
max_tokens: 1024,
system: "You are a persistent assistant. Maintain context across turns.",
messages: this.window.getMessages(),
});
const assistantText = response.content[0].type === "text"
? response.content[0].text
: "";
const assistantTurn: TurnRecord = {
role: "assistant",
content: assistantText,
timestamp: Date.now(),
};
this.window.push(assistantTurn);
await this.logger.append(assistantTurn);
this.turnCounter++;
if (this.turnCounter % this.checkpointInterval === 0) {
await this.store.save({
turnIndex: this.turnCounter,
toolState: this.toolState,
lastAssistantText: assistantText,
checkpointTime: Date.now(),
});
}
return assistantText;
}
}
// Usage
async function main() {
const orchestrator = new SessionOrchestrator("session-7742", 3);
const r1 = await orchestrator.processTurn("What is the capital of France?");
console.log("Turn 1:", r1);
const r2 = await orchestrator.processTurn("And what is its population?");
console.log("Turn 2:", r2);
}
main().catch(console.error);
Architecture Decisions & Rationale
- Push-Time Token Estimation: Counting tokens when messages enter the buffer prevents last-minute recalculation during API assembly. This guarantees the payload never exceeds the model's limit, avoiding
400 errors.
- JSONL for Persistence: Append-only files are crash-safe. If a process terminates mid-write, the last complete line remains valid. JSONL also enables streaming reads and easy log rotation.
- Coarse Checkpointing: Saving state every
N turns balances I/O cost against recovery granularity. Fine-grained checkpoints introduce disk latency that blocks the event loop. Coarse checkpoints accept minor re-execution in exchange for predictable throughput.
- Stateless Window Manager: The context window never reads from disk. It operates purely in memory, making eviction deterministic and fast. Persistence is delegated to the logger, enforcing separation of concerns.
Pitfall Guide
1. Checkpointing on Every Turn
Explanation: Saving state after each API call introduces synchronous disk I/O into the critical path. This increases latency and can exhaust file descriptor limits under concurrent load.
Fix: Use a threshold-based strategy (e.g., every 3β5 turns) or batch writes. If mid-tool recovery is required, checkpoint inside the tool dispatcher, not the main loop.
2. Token Estimation Drift
Explanation: Character-to-token ratios vary by model, language, and formatting. Using a fixed multiplier causes the window to either over-trim (losing context) or under-trim (triggering API errors).
Fix: Use model-specific tokenizers or add a 10β15% safety buffer. Validate estimates against actual API responses during load testing.
3. Blind Context Eviction
Explanation: Dropping the oldest messages indiscriminately removes system instructions, tool definitions, or critical user constraints. The agent loses behavioral grounding.
Fix: Preserve high-priority messages (system prompts, tool schemas) in a separate buffer. Only evict conversational turns. Consider summarization hooks for long-running sessions.
4. Blocking Async I/O
Explanation: Using synchronous file operations (fs.readFileSync, fs.writeFileSync) in an async orchestrator blocks the event loop, degrading throughput for concurrent sessions.
Fix: Use fs.promises or aiofiles-equivalent async APIs. Run heavy serialization in worker threads if checkpoint payloads exceed 50KB.
5. Session File Collisions
Explanation: Multiple processes writing to the same JSONL or checkpoint file causes interleaved records or corrupted JSON. This breaks session continuity.
Fix: Isolate sessions by UUID. Implement file locking (fs.lock) or use a session registry that enforces single-writer semantics per ID.
Explanation: Storing database connections, open streams, or closure references in toolState causes serialization failures or stale state on resume.
Fix: Explicitly define serializable state shapes. Store only primitives, plain objects, and IDs. Re-establish connections on resume using stored identifiers.
7. Ignoring Rate Limits During Resume
Explanation: Restoring a long session and immediately firing multiple API calls can trigger provider rate limits, especially if the checkpoint contains pending tool calls.
Fix: Implement exponential backoff and token-aware batching during session restoration. Queue pending tool executions and respect provider concurrency caps.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Short-lived support chats (<10 turns) | In-memory window only | No persistence overhead; fast teardown | Lowest |
| Long-running research sessions (>50 turns) | JSONL logger + coarse checkpoints | Durable history, predictable recovery | Moderate (+15% I/O) |
| Multi-agent collaborative threads | Separate JSONL per agent + session registry | Prevents interleaved turns, maintains ordering | Higher (storage + routing) |
| High-frequency automation (>1k turns/hr) | Batched JSONL + Redis checkpoints | Reduces disk I/O, enables fast state sync | High (infrastructure) |
| Cost-sensitive production | Token-aware window + summarization hook | Compresses history, caps API spend | Lowest long-term |
Configuration Template
// config/session.config.ts
export const SessionConfig = {
model: "claude-sonnet-4-6",
maxTokens: 6000,
safetyBuffer: 256,
checkpointInterval: 3,
storage: {
baseDir: "/tmp/agent-sessions",
format: "jsonl",
compression: false, // Enable for sessions >100 turns
},
eviction: {
strategy: "fifo", // fifo, priority, or summarize
preserveSystemPrompts: true,
maxTurnsBeforeSummarize: 50,
},
resilience: {
retryAttempts: 3,
backoffMultiplier: 1.5,
maxConcurrency: 10,
},
};
Quick Start Guide
- Initialize the project: Create a new TypeScript project and install dependencies:
npm init -y && npm install @anthropic-ai/sdk typescript ts-node @types/node.
- Create the orchestrator: Copy the
SessionOrchestrator class and supporting utilities into src/orchestrator.ts. Configure the session ID and checkpoint interval.
- Run a test session: Execute
ts-node src/orchestrator.ts. Verify that turns.jsonl and checkpoint.json are created in the session directory.
- Simulate recovery: Kill the process mid-session, restart it, and send a follow-up prompt. Confirm the agent resumes from the last checkpoint without repeating context.
- Monitor metrics: Add logging for token usage, checkpoint latency, and eviction events. Adjust
safetyBuffer and checkpointInterval based on observed API response times.