cy-engine.ts
import { z } from 'zod';
const ActionSchema = z.object({
actorId: z.string(),
targetResource: z.string(),
operation: z.enum(['read', 'write', 'execute', 'delegate']),
context: z.record(z.unknown()).optional(),
});
export type ActionRequest = z.infer<typeof ActionSchema>;
export class PolicyGateway {
private readonly policyStore: Map<string, string[]>;
constructor(initialPolicies: Record<string, string[]>) {
this.policyStore = new Map(Object.entries(initialPolicies));
}
async evaluate(request: ActionRequest): Promise<boolean> {
const allowedOps = this.policyStore.get(request.actorId) ?? [];
const isAllowed = allowedOps.includes(request.operation) || allowedOps.includes('*');
// Emit telemetry for audit trail
await this.logPolicyDecision(request, isAllowed);
return isAllowed;
}
private async logPolicyDecision(request: ActionRequest, allowed: boolean): Promise<void> {
// Structured logging to append-only audit stream
console.log(JSON.stringify({
event: 'policy_evaluation',
timestamp: new Date().toISOString(),
actor: request.actorId,
target: request.targetResource,
operation: request.operation,
decision: allowed ? 'permit' : 'deny',
}));
}
}
**Rationale:** Externalizing policy prevents prompt injection from bypassing security controls. The engine acts as a gatekeeper that the orchestrator cannot circumvent, ensuring consistent enforcement across all execution paths.
### Step 2: Implement Runtime Delegation with Scoped Tokens
When a parent agent spawns a sub-agent, permissions must be narrowed, not inherited wholesale. Use short-lived, scoped delegation tokens that explicitly define allowable operations and resource boundaries.
```typescript
// delegation-manager.ts
import { createHash, randomUUID } from 'crypto';
export interface DelegationClaim {
parentId: string;
childId: string;
allowedOperations: string[];
resourceScope: string[];
expiresAt: number;
}
export class DelegationManager {
private readonly tokenVault: Map<string, DelegationClaim>;
constructor() {
this.tokenVault = new Map();
}
issueToken(parentId: string, childId: string, scope: Partial<DelegationClaim>): string {
const claim: DelegationClaim = {
parentId,
childId,
allowedOperations: scope.allowedOperations ?? [],
resourceScope: scope.resourceScope ?? [],
expiresAt: Date.now() + 300_000, // 5-minute TTL
};
const tokenId = randomUUID();
this.tokenVault.set(tokenId, claim);
return tokenId;
}
validateToken(tokenId: string): DelegationClaim | null {
const claim = this.tokenVault.get(tokenId);
if (!claim || claim.expiresAt < Date.now()) {
this.tokenVault.delete(tokenId);
return null;
}
return claim;
}
}
Rationale: Scoped tokens enforce the principle of least privilege across runtime hierarchies. If a sub-agent is compromised or behaves unexpectedly, the blast radius is contained to the explicitly granted operations and resources. Token expiration prevents stale permissions from lingering after task completion.
Step 3: Capture Reasoning Telemetry Alongside Actions
Audit logs must record the decision path, not just the final tool invocation. Structured telemetry should capture the LLM's reasoning state, tool selection rationale, and context windows at the time of execution.
// audit-logger.ts
export interface ExecutionTrace {
traceId: string;
promptId: string;
actorChain: string[];
reasoningSnapshot: string;
toolInvocation: {
name: string;
parameters: Record<string, unknown>;
timestamp: string;
};
policyDecision: 'permit' | 'deny';
}
export class ExecutionAuditor {
async record(trace: ExecutionTrace): Promise<void> {
const payload = {
...trace,
metadata: {
schemaVersion: '1.0',
retentionClass: 'compliance',
},
};
// Write to immutable storage (e.g., S3 + Glacier, or append-only DB)
await fetch('https://audit.internal/v1/streams/agentic-traces', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
});
}
}
Rationale: Incident response requires understanding why a system chose a specific action. Capturing reasoning snapshots alongside tool calls enables forensic analysis, compliance auditing, and model behavior tuning without relying on opaque black-box executions.
Step 4: Enforce Orchestrator-Level Budgets
Per-tool rate limits are insufficient for agentic systems. A single prompt can spawn dozens of sub-tasks, each technically under its individual limit but collectively exceeding approved thresholds. Budgets must be tracked at the prompt level and enforced by the orchestrator.
// quota-controller.ts
export class QuotaController {
private readonly budgets: Map<string, { remaining: number; max: number }>;
constructor(defaultMax: number) {
this.budgets = new Map();
}
allocate(promptId: string, maxActions: number): void {
this.budgets.set(promptId, { remaining: maxActions, max: maxActions });
}
consume(promptId: string): boolean {
const budget = this.budgets.get(promptId);
if (!budget || budget.remaining <= 0) return false;
budget.remaining--;
return true;
}
getRemaining(promptId: string): number {
return this.budgets.get(promptId)?.remaining ?? 0;
}
}
Rationale: Prompt-level quotas prevent quota exhaustion and cost overruns caused by dynamic fan-out. The orchestrator checks the budget before spawning new sub-tasks, ensuring that runtime autonomy operates within predefined financial and operational boundaries.
Pitfall Guide
1. Static Service Accounts for Sub-Agents
Explanation: Assigning a single shared service account to all dynamically spawned agents eliminates traceability and violates least-privilege principles. If a sub-agent performs an unauthorized action, the audit trail cannot distinguish between parent and child execution.
Fix: Issue unique, short-lived identities for each sub-agent via OIDC delegation. Scope permissions explicitly and tie every action to a verifiable delegation chain.
Explanation: Applying rate limits only to individual tool calls ignores the combinatorial explosion of agentic fan-out. A prompt can trigger 50 sub-agents, each making 10 calls, bypassing per-tool thresholds while exhausting system capacity.
Fix: Implement orchestrator-level budget tracking. Enforce caps at the prompt level and pause execution when thresholds are approached, triggering human review or graceful degradation.
3. Missing Reasoning Chains in Logs
Explanation: Recording only tool invocations and outputs leaves incident responders blind to the decision logic. When an agentic system takes an unexpected path, the absence of reasoning telemetry makes root-cause analysis impossible.
Fix: Structure audit logs to include reasoning snapshots, tool selection rationale, and context windows. Store these alongside execution traces in an append-only, queryable format.
4. Unbounded Sub-Agent Spawning
Explanation: Without depth limits or resource budgets, recursive delegation can trigger infinite loops or resource exhaustion. The system may spawn sub-agents to investigate failures, which in turn spawn more agents to investigate the investigation.
Fix: Enforce maximum delegation depth and track cumulative compute spend per prompt. Implement circuit breakers that halt spawning when thresholds are breached and fall back to a deterministic recovery path.
5. Assuming Retry Equals Replanning
Explanation: Traditional automation relies on retry logic for transient failures. Agentic systems require state-aware replanning that modifies the action graph based on new information. Treating replanning as a simple retry wastes compute and fails to resolve underlying state mismatches.
Fix: Design recovery handlers that evaluate failure context, adjust tool selection, and rewrite the execution plan. Log the replanning decision separately from the original action to maintain audit clarity.
6. Embedding Policy in Agent Prompts
Explanation: Instructing the LLM to "check permissions before acting" or "never access restricted data" relies on model compliance rather than enforced boundaries. Prompt-based policy is fragile and easily bypassed by adversarial inputs or context drift.
Fix: Externalize authorization to a zero-trust policy engine evaluated per hop. The agent should request actions; the platform should grant or deny them based on immutable rules.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Fixed SOPs with known inputs/outputs | Deterministic Workflow | Predictable execution, minimal compute, straightforward audit | Low |
| Single-task automation with predefined tools | Single-Task AI Agent | LLM adds flexibility within bounded scope, per-call auth sufficient | Medium |
| Dynamic multi-source research requiring parallel investigation | Runtime Agentic System | Task graph unknown at design time, requires delegation and replanning | High |
| Adaptive customer support with policy exceptions | Runtime Agentic System | Requires runtime decision-making, sub-agent delegation for escalation, reasoning audit | High |
| High-volume transaction processing | Deterministic Workflow | Throughput and compliance demand fixed paths, agentic overhead unjustified | Low |
Configuration Template
# agentic-governance.yaml
policy_engine:
mode: zero_trust
evaluation: per_action
fallback: deny
delegation:
token_ttl_seconds: 300
max_depth: 3
scope_inheritance: narrow_only
audit:
storage: append_only
schema_version: "1.0"
capture_reasoning: true
retention_days: 2555 # ~7 years for compliance
quotas:
enforcement_level: prompt
max_actions_per_prompt: 200
max_compute_tokens: 500000
circuit_breaker: pause_and_notify
identity:
provider: oidc
agent_registration: dynamic
impersonation: supported
shared_keys: disabled
Quick Start Guide
- Initialize the Policy Gateway: Deploy the external policy engine and configure initial RBAC rules. Ensure all tool requests are routed through it before execution.
- Configure OIDC Delegation: Set up your identity provider to issue scoped tokens for agent-to-agent delegation. Define maximum depth and TTL constraints.
- Wire the Audit Stream: Connect the execution auditor to an append-only storage backend. Verify that reasoning snapshots and delegation chains are captured alongside tool calls.
- Enforce Prompt Quotas: Implement budget tracking at the orchestrator level. Test with synthetic fan-out scenarios to confirm that caps pause execution and trigger notifications.
- Validate Failure Paths: Simulate tool failures and unexpected data shapes. Confirm that the system replans rather than retries, and that audit logs reflect the decision shift.