}
return true;
}
}
**Rationale:** Capability gating transforms security from a runtime heuristic into a structural guarantee. By defining boundaries at initialization, the system prevents privilege escalation and tool injection attacks regardless of model output.
### Step 2: AST-Only Policy Evaluation
Traditional agent runtimes rely on `eval()` or dynamic code execution to process conditions. This introduces ReDoS vulnerabilities, loop exhaustion, and arbitrary code execution risks. The solution is a sandboxed policy engine that parses model output into an Abstract Syntax Tree (AST) and evaluates it using a strictly limited operator set.
```typescript
type PolicyOperator = 'eq' | 'contains' | 'gt' | 'lt' | 'field';
interface PolicyNode {
operator: PolicyOperator;
left: string | number;
right: string | number;
}
class ASTPolicyEngine {
evaluate(node: PolicyNode, context: Record<string, unknown>): boolean {
const leftVal = this.resolveValue(node.left, context);
const rightVal = this.resolveValue(node.right, context);
switch (node.operator) {
case 'eq': return leftVal === rightVal;
case 'contains': return String(leftVal).includes(String(rightVal));
case 'gt': return Number(leftVal) > Number(rightVal);
case 'lt': return Number(leftVal) < Number(rightVal);
case 'field': return this.resolveField(String(node.left), context) === rightVal;
default: throw new SyntaxError('Unsupported policy operator');
}
}
private resolveValue(token: string | number, ctx: Record<string, unknown>): unknown {
return typeof token === 'string' && ctx[token] !== undefined ? ctx[token] : token;
}
private resolveField(path: string, ctx: Record<string, unknown>): unknown {
return path.split('.').reduce((acc, key) => acc?.[key], ctx);
}
}
Rationale: The policy layer is intentionally less expressive than general-purpose languages. By excluding loops, recursion, and system calls, the engine structurally eliminates resource exhaustion attacks and non-terminating evaluations. Limitation becomes a security property.
Step 3: Idempotent Execution Adapters
External side-effects must be keyed to prevent duplicate mutations during retries or state recovery. Each execution receives a unique execution_id that the adapter uses to track completion status.
interface IdempotentAdapter<T> {
execute(executionId: string, payload: T): Promise<void>;
getStatus(executionId: string): Promise<'pending' | 'completed' | 'failed'>;
}
class StripePaymentAdapter implements IdempotentAdapter<{ amount: number; currency: string }> {
private readonly completedIds = new Set<string>();
async execute(executionId: string, payload: { amount: number; currency: string }): Promise<void> {
if (this.completedIds.has(executionId)) return;
// Simulate external API call with idempotency key
await this.callPaymentGateway(executionId, payload);
this.completedIds.add(executionId);
}
async getStatus(executionId: string): Promise<'pending' | 'completed' | 'failed'> {
return this.completedIds.has(executionId) ? 'completed' : 'pending';
}
private async callPaymentGateway(id: string, payload: { amount: number; currency: string }) {
// Gateway integration logic
console.log(`Processing payment ${id}: ${payload.amount} ${payload.currency}`);
}
}
Rationale: Idempotency boundaries ensure that internal state recovery never duplicates external mutations. The adapter acts as a deterministic gate between the FSM and the outside world.
Step 4: Cryptographic Hash Chaining & Absorbing States
Every state transition generates a cryptographic envelope containing the previous hash, current state, and action payload. Terminal states (SUCCESS, FAILED) are designed as absorbing states: once reached, all subsequent inputs are treated as no-ops.
interface StateEnvelope {
sequence: number;
currentState: string;
action: string;
payloadHash: string;
previousHash: string;
timestamp: number;
}
class AuditChain {
private chain: StateEnvelope[] = [];
private terminalReached = false;
append(state: string, action: string, payload: unknown): StateEnvelope {
if (this.terminalReached) {
throw new StateError('Cannot append to terminal state');
}
const previousHash = this.chain.length > 0
? this.chain[this.chain.length - 1].payloadHash
: '0000000000000000';
const payloadHash = this.computeHash(JSON.stringify({ state, action, payload, previousHash }));
const envelope: StateEnvelope = {
sequence: this.chain.length,
currentState: state,
action,
payloadHash,
previousHash,
timestamp: Date.now()
};
this.chain.push(envelope);
if (state === 'SUCCESS' || state === 'FAILED') {
this.terminalReached = true;
}
return envelope;
}
verifyIntegrity(): boolean {
for (let i = 1; i < this.chain.length; i++) {
if (this.chain[i].previousHash !== this.chain[i - 1].payloadHash) {
return false;
}
}
return true;
}
private computeHash(data: string): string {
// Production: use crypto.subtle or Node.js crypto
return Buffer.from(data).toString('base64').slice(0, 32);
}
}
Rationale: Hash chaining provides tamper-evident audit trails. Absorbing states prevent replay attacks and double-execution by mathematically guaranteeing that terminal states cannot transition further.
Step 5: GDPR-Compliant Redaction Without Chain Breakage
Regulatory requirements like GDPR Article 17 mandate data erasure, but cryptographic audit trails cannot be altered. The solution replaces PII pointers with tombstone markers while preserving hash continuity.
interface VaultReference {
type: 'vault';
path: string;
data: string;
}
class GDPRRedactionEngine {
redactPii(envelope: StateEnvelope, piiPaths: string[]): StateEnvelope {
const redactedPayload = this.traverseAndRedact(JSON.parse(envelope.payloadHash), piiPaths);
return { ...envelope, payloadHash: this.computeHash(JSON.stringify(redactedPayload)) };
}
private traverseAndRedact(obj: unknown, paths: string[]): unknown {
if (typeof obj !== 'object' || obj === null) return obj;
const result = Array.isArray(obj) ? [...obj] : { ...obj };
for (const path of paths) {
const keys = path.split('.');
let current: any = result;
for (let i = 0; i < keys.length - 1; i++) {
current = current[keys[i]];
}
current[keys[keys.length - 1]] = '[REDACTED_TOMBSTONE]';
}
return result;
}
}
Rationale: PII becomes cryptographically inaccessible while the mathematical proof of safe operation remains intact. Referential integrity and audit continuity are preserved without violating erasure mandates.
Pitfall Guide
1. Direct Model-to-Side-Effect Binding
Explanation: Routing LLM output directly to external APIs without an intermediate validation layer. This assumes the model's JSON output is safe to execute.
Fix: Always route through a capability gate that validates against a compile-time snapshot before any network call occurs.
2. Missing Idempotency Keys in External Calls
Explanation: Retrying failed transitions without unique execution identifiers causes duplicate charges, duplicate records, or infrastructure drift.
Fix: Generate a UUID per transition and pass it as an idempotency key to all external adapters. Track completion status in-memory or in a durable store.
3. Over-Expressive Policy Languages
Explanation: Using full scripting languages (Python, JavaScript eval()) for condition evaluation. This opens the system to ReDoS, infinite loops, and arbitrary code execution.
Fix: Restrict policy evaluation to a finite AST operator set. Explicitly exclude loops, recursion, and system calls. Treat limitation as a security feature.
4. Breaking Hash Chains for Data Deletion
Explanation: Attempting to satisfy GDPR erasure by deleting or altering audit log entries. This destroys cryptographic continuity and violates compliance requirements for tamper-evident logging.
Fix: Implement tombstone-based redaction. Replace PII values with [REDACTED_TOMBSTONE] while preserving the hash chain structure and sequence integrity.
5. Ignoring Absorbing State Semantics
Explanation: Allowing terminal states (SUCCESS, FAILED) to accept further transitions. This enables replay attacks and state corruption during recovery procedures.
Fix: Design terminal states as mathematically absorbing. Any input received after reaching a terminal state must be rejected or treated as a no-op at the runtime level.
Explanation: Allowing agents to register or modify available tools during execution. This breaks compile-time verification and enables privilege escalation.
Fix: Define all capabilities during initialization. The runtime should reject any tool invocation not present in the original capability snapshot.
7. Assuming JSON Validation Equals Security
Explanation: Relying on JSON schema validation to ensure safe execution. Schema validation checks structure, not intent or capability boundaries.
Fix: Combine structural validation with semantic capability gating. Validate that the requested operation is permitted for the current state and actor context.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Read-only analytics & summarization | Dynamic Dispatch | Low blast radius; model output doesn't mutate state | Low (minimal infrastructure) |
| Financial transactions & payments | Deterministic FSM + Idempotent Adapters | Strict capability gating prevents unauthorized charges; absorbing states block double-execution | Medium (adapter development, hash chaining overhead) |
| Infrastructure provisioning | Deterministic FSM + Compile-Time Snapshots | Prevents privilege escalation; ensures reproducible state transitions | Medium-High (capability mapping, audit storage) |
| PII processing & regulated data | Deterministic FSM + Tombstone Redaction | Satisfies GDPR erasure while preserving cryptographic audit continuity | Medium (redaction engine, secure vault integration) |
| Rapid prototyping & internal tools | Hybrid (FSM for writes, dynamic for reads) | Balances development speed with safety for state-mutating operations | Low-Medium (modular architecture) |
Configuration Template
// agent-runtime.config.ts
import { CapabilityGate, ASTPolicyEngine, AuditChain, IdempotentAdapter } from './runtime-core';
export const runtimeConfig = {
capabilities: {
allowedTools: {
'query_database': true,
'initiate_payment': true,
'provision_resource': true,
'delete_record': false // Explicitly disabled
},
maxPayloadSize: 4096,
targetEnvironments: ['staging', 'production']
},
policyEngine: {
allowedOperators: ['eq', 'contains', 'gt', 'lt', 'field'],
maxDepth: 5,
timeoutMs: 150
},
audit: {
hashAlgorithm: 'sha256',
retentionDays: 365,
redactionMarker: '[REDACTED_TOMBSTONE]'
},
states: {
terminal: ['SUCCESS', 'FAILED'],
absorbing: true,
maxTransitions: 50
}
};
export function initializeRuntime() {
const capabilityGate = new CapabilityGate(runtimeConfig.capabilities);
const policyEngine = new ASTPolicyEngine(runtimeConfig.policyEngine);
const auditChain = new AuditChain(runtimeConfig.audit);
return { capabilityGate, policyEngine, auditChain };
}
Quick Start Guide
- Initialize the runtime: Import the configuration module and instantiate the capability gate, policy engine, and audit chain using
initializeRuntime().
- Define state transitions: Map your agent's workflow to explicit states and allowed actions. Register external adapters with idempotency keys.
- Attach policy rules: Write AST-based conditions for each transition. Ensure no loops or system calls are present. Validate against the capability snapshot.
- Execute with validation: Route all model output through the capability gate before execution. Log transitions to the audit chain. Handle terminal states as absorbing boundaries.
- Verify integrity: Run
auditChain.verifyIntegrity() after critical operations. Deploy tombstone redaction for any PII fields before long-term storage.