tasks requiring occasional host session reuse without cloud dependency.
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
const server = new McpServer({
name: "local-browser-bridge",
version: "1.0.0"
});
server.tool(
"extract_page_data",
"Parse DOM structure and return structured JSON from a target URL",
{ url: z.string().url(), selector: z.string() },
async ({ url, selector }) => {
// Launches headless or attaches via --extension flag
const browser = await playwright.chromium.launch({
headless: true,
args: ["--disable-blink-features=AutomationControlled"]
});
const context = await browser.newContext();
const page = await context.newPage();
await page.goto(url, { waitUntil: "domcontentloaded" });
const rawHtml = await page.$eval(selector, el => el.innerHTML);
const parsed = await llmExtractToJson(rawHtml);
await browser.close();
return { success: true, payload: parsed };
}
);
async function llmExtractToJson(html: string) {
// Runtime extraction: ~9,600 tokens per invocation
const response = await openai.chat.completions.create({
model: "gpt-4o",
messages: [{ role: "user", content: `Extract structured data from: ${html}` }],
response_format: { type: "json_object" }
});
return JSON.parse(response.choices[0].message.content);
}
const transport = new StdioServerTransport();
await server.connect(transport);
Architecture Rationale: The --extension flag bridges local Chromium to the host browser profile, solving the headless authentication gap without external dependencies. Runtime extraction keeps the tool stateless, making it ideal for ephemeral research. The trade-off is linear token scaling and no native drift compensation.
Pattern 2: Cloud-Isolated Browser Cluster (Browserbase + Stagehand)
Best for: Teams requiring credential isolation, multi-tenant SaaS environments, or centralized session management.
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";
const server = new McpServer({
name: "cloud-browser-cluster",
version: "1.0.0"
});
server.tool(
"run_cloud_extraction",
"Execute browser task in isolated cloud environment with uploaded credentials",
{
targetUrl: z.string().url(),
credentialBundle: z.record(z.string()),
extractionSchema: z.record(z.any())
},
async ({ targetUrl, credentialBundle, extractionSchema }) => {
// Credentials are explicitly provisioned to the cloud runtime
const session = await browserbase.createSession({
region: "us-east-1",
cookies: credentialBundle,
viewport: { width: 1280, height: 720 }
});
const stagehand = await Stagehand.attach(session.id);
await stagehand.navigate(targetUrl);
await stagehand.waitForSelector("main.content");
const result = await stagehand.extract(extractionSchema);
await session.terminate();
return { success: true, data: result };
}
);
const transport = new StdioServerTransport();
await server.connect(transport);
Architecture Rationale: Cloud isolation decouples browser execution from developer machines, enabling consistent environments and centralized credential rotation. The explicit credential upload model satisfies SOC2 and enterprise isolation requirements but introduces data exfiltration boundaries that must be audited. Runtime extraction remains the default, preserving flexibility at the cost of per-call inference.
Pattern 3: Local Session Replay Engine (Tap)
Best for: High-frequency repeated workflows where deterministic execution and zero per-call inference are mandatory.
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";
const server = new McpServer({
name: "deterministic-replay-engine",
version: "1.0.0"
});
// Compiled execution plan generated once via AI compilation
const COMPILED_PLAN = {
id: "plan_hn_top_stories_v2",
operations: [
{ type: "navigate", url: "https://news.ycombinator.com" },
{ type: "wait", selector: "td.title > a" },
{ type: "extract", selector: "td.title > a", fields: ["title", "link"] },
{ type: "limit", count: 30 }
]
};
server.tool(
"execute_replay_plan",
"Run pre-compiled deterministic browser plan against live local session",
{ planId: z.string() },
async ({ planId }) => {
if (planId !== COMPILED_PLAN.id) {
throw new Error("Unknown plan ID. Compile new plan first.");
}
// Executes against live Chrome extension session
// Zero tokens consumed during replay
const results = await localExtensionOrchestrator.run(COMPILED_PLAN);
return { success: true, payload: results, tokensConsumed: 0 };
}
);
const transport = new StdioServerTransport();
await server.connect(transport);
Architecture Rationale: Deterministic replay shifts inference cost from runtime to compilation. The AI compiles a 10-operation execution graph once, then replays it against the live host session. This eliminates per-call token consumption entirely, delivering ~849× cost reduction across 100 repeated queries. The constraint is upfront authoring time and sensitivity to DOM drift.
Pitfall Guide
1. Assuming Headless Browsers Inherit Host Credentials
Explanation: Headless Chromium launches in a clean profile by default. It does not inherit cookies, local storage, or session tokens from the host browser.
Fix: Use the --extension bridge pattern for local session reuse, or explicitly provision credentials through secure vaults. Never assume headless equals authenticated.
2. Ignoring DOM Drift in Deterministic Replay
Explanation: Compiled execution plans rely on stable selectors and DOM structure. Target site updates break replay graphs silently, returning empty payloads or stale data.
Fix: Implement drift detection hooks that validate selector existence before execution. Schedule periodic plan recompilation when structural changes exceed a threshold (e.g., 15% selector failure rate).
Explanation: Runtime extraction tools consume ~9,600 tokens per call. At 50 calls/day, this equals ~480,000 tokens daily, translating to significant LLM API costs and latency.
Fix: Classify workloads by repetition frequency. Migrate repeated tasks to deterministic replay architectures. Reserve runtime extraction for exploratory or one-off research.
4. Credential Exfiltration Compliance Gaps
Explanation: Uploading session cookies to third-party cloud clusters violates SOC2, GDPR, and internal data residency policies for many organizations.
Fix: Audit credential lifecycle before selecting a cloud browser provider. Use local extension bridges or self-hosted isolated clusters when compliance mandates zero exfiltration.
5. Over-Investing in Replay for Ephemeral Research
Explanation: Deterministic replay requires upfront plan compilation, testing, and versioning. For single-use tasks, authoring overhead exceeds runtime extraction costs.
Fix: Apply a repetition threshold rule: only compile replay plans when expected executions exceed 10-15 runs. Use runtime extraction for prototyping and validation.
6. Misconfiguring MCP Transport for Long-Running Sessions
Explanation: Standard stdio transport assumes short-lived tool calls. Browser sessions requiring extended interaction or stateful navigation can timeout or drop context.
Fix: Use SSE (Server-Sent Events) or WebSocket transports for long-running browser sessions. Implement explicit session lifecycle management with heartbeat checks.
7. Failing to Version Control Compiled Execution Plans
Explanation: Deterministic plans are infrastructure-as-code. Without versioning, teams lose reproducibility, cannot rollback broken updates, and struggle with team collaboration.
Fix: Store compiled plans in Git with semantic versioning. Implement plan diffing to track selector changes and execution graph modifications across releases.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| One-off research on unauthenticated sites | Playwright MCP (headless) | Zero setup overhead, stateless extraction | Baseline LLM token cost per call |
| Repeated extraction on stable targets | Tap (deterministic replay) | Eliminates per-call inference, amortizes compilation | ~849× reduction across 100 runs |
| Multi-tenant SaaS with isolated credentials | Browserbase + Stagehand | Centralized credential management, compliance isolation | Cloud infrastructure + runtime tokens |
| High-frequency internal tooling | Tap (local extension) | Zero exfiltration, live session reuse, deterministic execution | Near-zero marginal cost after compilation |
| Exploratory data gathering with unknown structure | Playwright MCP (--extension) | Flexible runtime extraction, adapts to novel DOM layouts | Linear token scaling, acceptable for low volume |
Configuration Template
{
"mcpServers": {
"browser-automation-suite": {
"command": "node",
"args": ["./dist/mcp-browser-server.js"],
"env": {
"MCP_TRANSPORT": "stdio",
"DRIFT_THRESHOLD": "0.15",
"TOKEN_BUDGET_DAILY": "500000",
"COMPLIANCE_MODE": "local_only"
},
"tools": {
"runtime_extraction": {
"enabled": true,
"maxTokensPerCall": 12000,
"fallbackStrategy": "retry_with_wider_selector"
},
"deterministic_replay": {
"enabled": true,
"planDirectory": "./compiled-plans",
"autoRecompile": true,
"driftDetection": "selector_existence_check"
},
"cloud_isolation": {
"enabled": false,
"provider": "browserbase",
"credentialVault": "aws_secrets_manager"
}
}
}
}
}
Quick Start Guide
- Initialize MCP Server: Scaffold a TypeScript project with
@modelcontextprotocol/sdk, install Playwright or Tap CLI, and configure tsconfig.json for ESM output.
- Register Tool Handlers: Implement three distinct tool handlers matching your workload classification: runtime extraction, deterministic replay, and cloud isolation. Wire them to the MCP server instance.
- Configure Transport & Environment: Set
MCP_TRANSPORT to stdio for CLI usage or sse for web dashboards. Define compliance mode and drift thresholds in environment variables.
- Compile First Replay Plan: Use the AI compilation endpoint to generate a deterministic execution graph for your most frequent task. Store it in
./compiled-plans with semantic versioning.
- Validate & Monitor: Run a dry execution against a staging target. Monitor token consumption, selector success rates, and session lifecycle. Adjust drift thresholds and budget limits before production rollout.