y: string;
branchPrefix: string;
}
interface FileChange {
path: string;
operation: 'create' | 'update' | 'delete';
content?: string;
rationale: string;
}
interface ChangeManifest {
taskId: string;
targetBranch: string;
changes: FileChange[];
verificationSteps: string[];
}
**The Orchestrator Class**
The `CodingPipeline` class implements the state machine. It enforces retry limits, manages the feedback loop, and handles GitHub integration via the Octokit REST client.
```typescript
import { Octokit } from '@octokit/rest';
import { execa } from 'execa';
export class CodingPipeline {
private readonly maxRetries: number = 3;
private readonly costCeiling: number = 5.00; // USD per run
constructor(
private readonly octokit: Octokit,
private readonly llmClient: LLMInterface,
private readonly workspaceManager: WorkspaceManager
) {}
async execute(task: TaskContext): Promise<PipelineResult> {
const workspace = await this.workspaceManager.clone(task.repository);
const targetBranch = `${task.branchPrefix}/${task.issueId}-auto-fix`;
try {
// Stage 1: Plan
const manifest = await this.generatePlan(task, workspace);
// Stage 2 & 3: Execute with Verification Loop
let attempts = 0;
let isVerified = false;
let lastError = '';
while (attempts < this.maxRetries && !isVerified) {
await this.applyChanges(manifest, workspace, lastError);
const verificationResult = await this.runVerification(workspace);
isVerified = verificationResult.passed;
if (!isVerified) {
lastError = verificationResult.output;
attempts++;
// Feedback loop: refine manifest based on error
if (attempts < this.maxRetries) {
await this.refinePlan(manifest, lastError);
}
}
}
// Stage 4: Package
if (!isVerified) {
return this.createDraftPR(task, workspace, targetBranch, 'Verification failed after max retries');
}
await this.commitAndPush(workspace, targetBranch);
return this.createDraftPR(task, workspace, targetBranch, 'All checks passed');
} finally {
await this.workspaceManager.cleanup(workspace);
}
}
private async generatePlan(task: TaskContext, workspace: string): Promise<ChangeManifest> {
const prompt = this.buildPlanPrompt(task, workspace);
const response = await this.llmClient.generate(prompt);
return JSON.parse(response) as ChangeManifest;
}
private async applyChanges(manifest: ChangeManifest, workspace: string, contextError?: string): Promise<void> {
for (const change of manifest.changes) {
const filePath = path.join(workspace, change.path);
if (change.operation === 'delete') {
await fs.unlink(filePath);
continue;
}
// For updates, inject error context if available to guide the model
const prompt = this.buildExecutionPrompt(change, contextError);
const newContent = await this.llmClient.generate(prompt);
await fs.writeFile(filePath, newContent);
}
}
private async runVerification(workspace: string): Promise<VerificationResult> {
try {
// Run type checker and tests in isolated environment
await execa('npm', ['run', 'typecheck'], { cwd: workspace });
await execa('npm', ['test'], { cwd: workspace });
return { passed: true, output: '' };
} catch (error: any) {
return { passed: false, output: error.stdout || error.stderr };
}
}
private async createDraftPR(
task: TaskContext,
workspace: string,
branch: string,
status: string
): Promise<PipelineResult> {
const { data: pr } = await this.octokit.pulls.create({
owner: task.repository.split('/')[0],
repo: task.repository.split('/')[1],
title: `[Auto] ${task.description}`,
head: branch,
base: 'main',
draft: true,
body: `## Automated Change\n\n**Status:** ${status}\n\n**Task:** ${task.issueId}\n\nThis PR was generated by the autonomous coding pipeline. Please review carefully.`
});
// Add provenance label
await this.octokit.issues.addLabels({
owner: task.repository.split('/')[0],
repo: task.repository.split('/')[1],
issue_number: pr.number,
labels: ['agent-generated', 'needs-review']
});
return { success: true, prUrl: pr.html_url, status };
}
}
3. Architecture Rationale
- Separation of Plan and Execute: Collapsing these stages leads to context overflow and incoherent diffs. The
ChangeManifest acts as a contract. The executor only implements the plan, reducing the cognitive load on the model and making changes predictable.
- Verification Feedback Loop: The
runVerification step is the safety net. By capturing stdout/stderr and feeding it back into the next execution attempt, the agent can self-correct compilation errors and test failures. This iterative refinement is what distinguishes an agent from a generator.
- Bounded Retries: The
maxRetries constant prevents infinite loops and runaway costs. If the agent cannot resolve errors within the limit, it halts and creates a draft PR with the failure status, preserving the work done so far for human inspection.
- Isolated Workspace: All operations occur in a temporary directory managed by
WorkspaceManager. This prevents side effects on the host system and ensures a clean environment for verification.
- Draft PRs and Labels: Every PR is created as a draft with an
agent-generated label. This enforces a human-in-the-loop review process and provides provenance tracking for compliance and auditing.
Pitfall Guide
Production deployments of autonomous coding agents frequently fail due to architectural oversights. The following pitfalls and fixes are derived from real-world implementation experience.
| Pitfall | Explanation | Fix |
|---|
| The "God Prompt" Trap | Attempting to generate code, tests, and plans in a single prompt. This overwhelms the context window and produces low-quality output for anything beyond trivial fixes. | Decompose the pipeline into distinct stages. Use the ChangeManifest to separate planning from execution. |
| Unbounded Retry Loops | Failing to cap retries causes the agent to loop indefinitely on unfixable errors, burning tokens and incurring high costs. | Implement a hard maxRetries limit (e.g., 3). If the limit is reached, stop execution and open a draft PR with the error log. |
| Verification Blindness | Relying on the model's confidence or skipping external validation. LLMs often report success even when code is broken. | Always run external tools (type checker, linter, test suite) in the verification stage. Use the tool output as the ground truth for success. |
| Credential Over-Privilege | Using organization-wide tokens or tokens with excessive scopes. A compromised agent or buggy script can cause widespread damage. | Use fine-grained Personal Access Tokens (PATs) scoped to a single repository. Grant only contents:write and pull_requests:write. |
| Idempotency Failures | Running the pipeline multiple times creates duplicate branches or PRs, cluttering the repository. | Use predictable branch naming conventions (e.g., auto/<issue>-<slug>). Check for existing branches before creation; update if present. |
| The "Thin Test" Trap | Enabling the agent on repositories with insufficient test coverage. Without a verification gate, the agent cannot validate changes. | Assess test coverage before deployment. Only enable the agent for tasks with objective success criteria (e.g., passing tests, clean type checks). |
| Rubber-Stamping Reviews | Developers approving bot-generated PRs without review due to trust or fatigue. This introduces unvetted code into the codebase. | Enforce draft status for all bot PRs. Require explicit human review and CI validation before merging. Treat all agent output as untrusted until verified. |
Production Bundle
Action Checklist
Decision Matrix
Use this matrix to determine when to deploy the autonomous pipeline versus traditional methods.
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Dependency Migration | Autonomous Pipeline | Repetitive, objective success criteria, high volume. | Low; bounded retries, high ROI. |
| Feature Development | Human + AI Assistant | Ambiguous requirements, product judgment, cross-cutting changes. | N/A; not suitable for automation. |
| Legacy API Refactor | Autonomous Pipeline | Codemod-able, testable, mechanical transformation. | Medium; depends on complexity and retries. |
| UI/UX Polish | Human | Subjective evaluation, requires visual feedback. | N/A; verification gate ineffective. |
| Documentation Update | Autonomous Pipeline | Text-based, low risk, objective structure. | Low; minimal token usage. |
Configuration Template
Use this JSON configuration to parameterize the pipeline for different repositories and tasks.
{
"pipeline": {
"maxRetries": 3,
"costCeilingUSD": 5.00,
"branchPrefix": "auto",
"labels": ["agent-generated", "needs-review"],
"verification": {
"typeCheck": "npm run typecheck",
"testSuite": "npm test",
"linter": "npm run lint"
},
"security": {
"scope": "repository",
"permissions": ["contents:write", "pull_requests:write"],
"runInContainer": true
},
"triggers": {
"webhookLabel": "agent-ready",
"cronSchedule": "0 2 * * *"
}
}
}
Quick Start Guide
- Provision Credentials: Generate a fine-grained PAT with repository-scoped permissions. Add it to your pipeline's environment configuration.
- Deploy Pipeline: Clone the pipeline repository and configure the
config.json file with your repository details and verification commands.
- Setup Trigger: Configure a GitHub webhook to listen for the
agent-ready label on issues. Alternatively, set up a cron job to drain a task queue.
- Run First Task: Label an issue with
agent-ready that describes a mechanical task (e.g., "Migrate date utils to date-fns"). Monitor the pipeline logs for execution.
- Review Output: Check the repository for a new draft PR. Verify the diff, ensure CI passes, and review the changes before merging.