nternal' | 'restricted' | 'confidential';
requiresApproval: boolean;
}
interface ToolRegistry {
[key: string]: ToolCapability;
}
const INFRA_REVIEW_REGISTRY: ToolRegistry = {
jira_ticket_reader: {
name: 'jira_ticket_reader',
allowedActions: ['read'],
dataClassification: 'internal',
requiresApproval: false,
},
github_pr_inspector: {
name: 'github_pr_inspector',
allowedActions: ['read'],
dataClassification: 'internal',
requiresApproval: false,
},
aws_metadata_query: {
name: 'aws_metadata_query',
allowedActions: ['read'],
dataClassification: 'restricted',
requiresApproval: false,
},
slack_risk_notifier: {
name: 'slack_risk_notifier',
allowedActions: ['write'],
dataClassification: 'internal',
requiresApproval: true,
},
};
**Rationale:** Capability scoping prevents privilege creep. By declaring `dataClassification` and `requiresApproval` at the tool level, the runtime can enforce policy before execution. This replaces implicit permissions with explicit, auditable contracts.
### Step 2: Implement a Policy Engine for Execution Context Validation
Before any tool executes, the runtime must validate the execution context against organizational policy. This includes verifying identity binding, checking data classification boundaries, and enforcing approval requirements.
```typescript
interface ExecutionContext {
agentId: string;
requesterRole: string;
targetEnvironment: 'dev' | 'staging' | 'prod';
requestedTools: string[];
}
class PolicyEngine {
async validateExecution(context: ExecutionContext, registry: ToolRegistry): Promise<boolean> {
for (const toolName of context.requestedTools) {
const capability = registry[toolName];
if (!capability) {
throw new Error(`Unauthorized tool access: ${toolName}`);
}
if (capability.dataClassification === 'confidential' && context.requesterRole !== 'security-admin') {
throw new Error(`Role ${context.requesterRole} lacks clearance for ${toolName}`);
}
if (capability.requiresApproval && context.targetEnvironment === 'prod') {
const approved = await this.requestHumanApproval(toolName, context);
if (!approved) return false;
}
}
return true;
}
private async requestHumanApproval(tool: string, context: ExecutionContext): Promise<boolean> {
// Integration with Jira/Slack approval workflow
console.log(`[POLICY] Approval requested for ${tool} in ${context.targetEnvironment}`);
return true; // Simulated approval gateway
}
}
Rationale: Policy-as-code ensures security controls are enforced at runtime, not just in documentation. The engine acts as a gatekeeper, validating role-based access, environment boundaries, and approval requirements before tool invocation. This prevents agents from bypassing change management or accessing restricted data classes.
Step 3: Structure Audit Logging with Execution Traces
Production agents must generate immutable, structured audit trails. Raw console logs are insufficient for compliance or incident response. Each execution should capture the decision chain, tool calls, data classifications accessed, and human overrides.
interface AuditEvent {
traceId: string;
timestamp: string;
agentId: string;
action: string;
tool: string;
dataClass: string;
approvalStatus: 'auto' | 'pending' | 'approved' | 'denied';
outcome: 'success' | 'blocked' | 'error';
}
class AuditStream {
private events: AuditEvent[] = [];
log(event: AuditEvent): void {
this.events.push(event);
// Forward to SIEM, CloudWatch, or centralized log aggregator
console.log(JSON.stringify(event));
}
getTrace(traceId: string): AuditEvent[] {
return this.events.filter(e => e.traceId === traceId);
}
}
Rationale: Structured audit streams enable forensic analysis, compliance reporting, and automated anomaly detection. By tagging every action with a traceId, dataClass, and approvalStatus, security teams can reconstruct agent behavior, verify policy adherence, and detect unauthorized chaining of operations.
Step 4: Orchestrate the Secure Runtime
The final layer ties tool execution, policy validation, and audit logging into a single execution loop. The runtime never executes tools directly; it routes them through the policy engine and logs every decision.
class SecureAgentRuntime {
constructor(
private policy: PolicyEngine,
private audit: AuditStream,
private registry: ToolRegistry
) {}
async execute(context: ExecutionContext): Promise<void> {
const traceId = crypto.randomUUID();
const isValid = await this.policy.validateExecution(context, this.registry);
if (!isValid) {
this.audit.log({
traceId,
timestamp: new Date().toISOString(),
agentId: context.agentId,
action: 'execution_blocked',
tool: 'N/A',
dataClass: 'N/A',
approvalStatus: 'denied',
outcome: 'blocked',
});
throw new Error('Policy validation failed. Execution halted.');
}
for (const tool of context.requestedTools) {
const cap = this.registry[tool];
this.audit.log({
traceId,
timestamp: new Date().toISOString(),
agentId: context.agentId,
action: 'tool_invocation',
tool,
dataClass: cap.dataClassification,
approvalStatus: cap.requiresApproval ? 'approved' : 'auto',
outcome: 'success',
});
// Actual tool execution would occur here with ephemeral credentials
}
}
}
Rationale: This architecture enforces security by design. The runtime never trusts implicit permissions. Every tool call is validated, logged, and bound to a traceable execution context. This pattern scales across infrastructure review, security automation, and operational orchestration while maintaining compliance boundaries.
Pitfall Guide
1. The "Read-All" Fallacy
Explanation: Granting agents broad read access to repositories, ticketing systems, or cloud metadata under the assumption that read-only operations are harmless. In practice, aggregated read access enables data reconstruction, privilege mapping, and lateral movement.
Fix: Implement capability-scoped tool registries. Restrict reads to specific projects, environments, or data classes. Use attribute-based access control (ABAC) to limit scope dynamically.
2. Silent State Changes
Explanation: Allowing agents to modify configurations, create tickets, or trigger deployments without explicit approval gates. Silent automation bypasses change management and creates compliance violations.
Fix: Enforce human-in-the-loop approval for any write or execute action. Route approvals through existing ticketing systems (Jira, ServiceNow) with mandatory reviewer assignment and SLA tracking.
3. Prompt Injection Blind Spots
Explanation: Treating agent inputs as trusted data. Malicious or malformed prompts can override system instructions, exfiltrate context, or trigger unintended tool chains.
Fix: Implement input sanitization, system prompt isolation, and output validation layers. Use structured data formats (JSON schemas) instead of free-text prompts for tool parameters. Deploy prompt injection detection middleware where available.
4. Identity Drift
Explanation: Agent credentials outliving human sessions or remaining static across deployments. Long-lived API keys or service accounts become high-value targets if compromised.
Fix: Use ephemeral credentials with short TTLs. Bind agent execution to human requester identity via SSO/SCIM. Rotate credentials automatically and enforce just-in-time access provisioning.
5. Log Amnesia
Explanation: Relying on console output or unstructured logs for audit trails. Missing timestamps, trace IDs, or approval status makes incident response and compliance reporting impossible.
Fix: Mandate structured audit schemas. Forward logs to centralized SIEM or log aggregators. Enforce retention policies aligned with regulatory requirements (e.g., 7 years for financial data).
6. SDLC Bypass
Explanation: Deploying AI-generated infrastructure-as-code or configuration changes without passing through standard CI/CD pipelines, policy checks, or peer review.
Fix: Integrate agent outputs into existing SDLC gates. Require PR creation, SAST/SCA scanning, IaC policy validation (OPA, Checkov), and code owner approval before any state change reaches production.
7. Context Overload
Explanation: Feeding agents raw secrets, full database dumps, or unredacted configuration files. This increases exposure risk and violates data classification policies.
Fix: Use reference-based context injection. Pass resource IDs, ticket numbers, or environment tags instead of raw data. Let the agent fetch only what it needs through scoped tool calls.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Internal knowledge retrieval (Confluence, Drive) | Workspace DLP + SSO/SCIM + connector restrictions | Low risk, read-only, governed by existing identity controls | Low (admin configuration only) |
| Cross-system orchestration (Jira, GitHub, AWS metadata) | Secure agent harness with capability scoping + audit logging | Medium risk, requires least-privilege tool access and traceability | Medium (runtime development + SIEM integration) |
| Automated remediation or deployment | Policy-gated harness + human approval + SDLC integration | High risk, state changes require compliance validation and rollback capability | High (approval workflow engineering + compliance monitoring) |
| HR or financial data processing | Isolated agent runtime + strict data classification + no external connectors | Regulatory risk, requires air-gapped execution and audit retention | High (compliance validation + dedicated infrastructure) |
Configuration Template
# agent-harness-config.yaml
runtime:
version: "1.0"
identity_binding: "ssso-scim"
credential_rotation: "ephemeral-15m"
policy_engine:
approval_required_for: ["write", "execute"]
restricted_data_classes: ["confidential", "pii", "financial"]
environment_gates:
prod:
requires_human_approval: true
allowed_tools: ["read_only_scanners", "audit_loggers"]
dev:
requires_human_approval: false
allowed_tools: ["read_only_scanners", "audit_loggers", "sandbox_writers"]
audit:
format: "structured-json"
fields: ["traceId", "timestamp", "agentId", "tool", "dataClass", "approvalStatus", "outcome"]
sink: "siem-cloudwatch"
retention_days: 2555
tools:
jira_reader:
actions: ["read"]
classification: "internal"
scope: "project-key:SEC-*"
github_inspector:
actions: ["read"]
classification: "internal"
scope: "repo:org/infra-*"
aws_metadata:
actions: ["read"]
classification: "restricted"
scope: "account:dev-env,region:us-east-1"
slack_notifier:
actions: ["write"]
classification: "internal"
requires_approval: true
Quick Start Guide
- Initialize the Runtime: Clone the secure agent harness repository and configure
agent-harness-config.yaml with your organization's identity provider, tool scopes, and audit sink.
- Register Tools: Define each external system integration (Jira, GitHub, AWS, Slack) in the tool registry with explicit
actions, classification, and scope constraints.
- Deploy Policy Engine: Run the policy validation service in your staging environment. Verify that approval gates trigger for
prod environments and that restricted data classes block unauthorized roles.
- Connect Audit Stream: Configure the structured logger to forward events to your SIEM or log aggregator. Validate trace ID propagation across tool calls and approval workflows.
- Execute First Run: Trigger a read-only infrastructure review workflow. Confirm that audit events capture tool invocations, data classifications, and approval status. Iterate on policy rules before enabling write capabilities.