lenge resolver.
import { FastifyInstance } from 'fastify';
import { ChallengeEngine } from './challenge-engine';
import { TokenValidator } from './token-validator';
export class RateLimitRecoveryMiddleware {
constructor(
private readonly engine: ChallengeEngine,
private readonly validator: TokenValidator
) {}
async handle(request: FastifyRequest, reply: FastifyReply) {
const bucket = await this.getRateBucket(request);
if (bucket.isExhausted()) {
const challenge = await this.engine.generate('pow', {
difficulty: 14,
ttlMinutes: 5
});
reply.status(402)
.header('WWW-Authenticate', `PowChallenge realm="mcp-api", id="${challenge.id}", difficulty=14`)
.send({
type: 'pow',
id: challenge.id,
salt: challenge.salt,
difficulty: 14,
expires_at: challenge.expiresAt
});
return;
}
await this.continue(request, reply);
}
}
Step 2: Generate Deterministic Challenges
The challenge engine produces SHA-256 puzzles with configurable leading-zero requirements. The difficulty parameter directly correlates with resolution time. Production systems should scale difficulty dynamically based on queue depth rather than using static values.
import { createHash, randomBytes } from 'crypto';
export interface ChallengePayload {
id: string;
salt: string;
difficulty: number;
expiresAt: number;
}
export class ChallengeEngine {
async generate(type: 'pow', config: { difficulty: number; ttlMinutes: number }): Promise<ChallengePayload> {
const id = randomBytes(16).toString('hex');
const salt = randomBytes(32).toString('hex');
const expiresAt = Date.now() + (config.ttlMinutes * 60 * 1000);
return { id, salt, difficulty: config.difficulty, expiresAt };
}
async verifySolution(challengeId: string, nonce: string, salt: string, difficulty: number): Promise<boolean> {
const hash = createHash('sha256')
.update(`${challengeId}:${salt}:${nonce}`)
.digest('hex');
const requiredZeros = '0'.repeat(difficulty);
return hash.startsWith(requiredZeros);
}
}
Step 3: Issue and Validate Access Tokens
Upon successful challenge resolution, the system issues an HMAC-signed token with a strict time-to-live. The token acts as a temporary bypass credential. Validation occurs on the retry request before forwarding to the MCP handler.
import { createHmac, timingSafeEqual } from 'crypto';
export class TokenValidator {
constructor(private readonly secret: string) {}
issue(challengeId: string, ttlMs: number): string {
const payload = `${challengeId}:${Date.now() + ttlMs}`;
const signature = createHmac('sha256', this.secret).update(payload).digest('hex');
return `${payload}:${signature}`;
}
validate(token: string): { valid: boolean; challengeId: string; expiresAt: number } {
const [payload, signature] = token.split(':');
const expected = createHmac('sha256', this.secret).update(payload).digest('hex');
if (!timingSafeEqual(Buffer.from(signature), Buffer.from(expected))) {
return { valid: false, challengeId: '', expiresAt: 0 };
}
const [challengeId, expiresAtStr] = payload.split(':');
const expiresAt = parseInt(expiresAtStr, 10);
if (Date.now() > expiresAt) {
return { valid: false, challengeId, expiresAt };
}
return { valid: true, challengeId, expiresAt };
}
}
Step 4: Agent-Side Resolution Loop
The client agent detects the 402 status, extracts the challenge parameters, performs the computational work, and retries with the issued token. This loop must be non-blocking and respect exponential backoff if the challenge server is temporarily unavailable.
export class AgentChallengeResolver {
async resolveAndRetry(
originalRequest: RequestInit,
response: Response
): Promise<Response> {
if (response.status !== 402) return response;
const challenge = await response.json();
const nonce = await this.solvePoW(challenge);
const verifyRes = await fetch('/api/challenge/verify', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ challenge_id: challenge.id, nonce })
});
const { token } = await verifyRes.json();
return fetch(originalRequest.url, {
...originalRequest,
headers: {
...originalRequest.headers,
'X-Access-Token': token
}
});
}
private async solvePoW(challenge: any): Promise<string> {
let nonce = 0;
const target = '0'.repeat(challenge.difficulty);
while (true) {
const hash = createHash('sha256')
.update(`${challenge.id}:${challenge.salt}:${nonce}`)
.digest('hex');
if (hash.startsWith(target)) return nonce.toString();
nonce++;
}
}
}
Architecture Decisions and Rationale
- Stateless Challenge Generation: Challenges are generated without server-side state, relying on cryptographic salts and IDs. This enables horizontal scaling and eliminates session affinity requirements.
- HMAC Token Validation: Tokens are self-contained and cryptographically signed. Validation requires no database lookup, reducing latency to sub-millisecond levels.
- Decoupled Middleware: The recovery logic sits outside the MCP router. This preserves the core protocol implementation while allowing independent scaling of the challenge service.
- Dual-Mode Support: The architecture supports both PoW and L402 by abstracting the challenge type. Agents negotiate capability via
Accept-Challenge headers, enabling fallback chains.
Pitfall Guide
1. Static Difficulty Configuration
Explanation: Hardcoding PoW difficulty ignores fluctuating server load. During traffic spikes, fixed difficulty either fails to throttle effectively or causes excessive agent latency.
Fix: Implement dynamic difficulty scaling based on real-time queue depth or CPU utilization. Adjust the leading-zero requirement between 12-18 based on current throughput.
2. Token Replay Attacks
Explanation: Agents or malicious clients may reuse valid tokens across multiple requests, bypassing rate limits entirely.
Fix: Bind tokens to specific request fingerprints (IP, user-agent hash, or endpoint signature). Invalidate tokens immediately after first use or implement a sliding window with request counting.
3. Blocking the Event Loop with PoW
Explanation: Synchronous nonce searching in a single-threaded runtime stalls all other requests, causing cascading timeouts.
Fix: Offload PoW solving to worker threads or a dedicated challenge service. Use async iteration with setImmediate or setTimeout(0) to yield control back to the event loop periodically.
4. Ignoring Token Expiration Windows
Explanation: Agents caching tokens beyond their TTL cause validation failures and unnecessary challenge regeneration.
Fix: Implement client-side token lifecycle management. Cache tokens with explicit expiration tracking and trigger proactive renewal at 80% of TTL.
5. Mixing 429 and 402 Responses
Explanation: Returning both status codes from the same endpoint creates ambiguous agent behavior. Some clients may retry immediately on 429 while others wait for 402 challenges.
Fix: Enforce a unified rate-limit policy. All congestion events must route through the 402 challenge pipeline. Remove legacy 429 fallbacks entirely.
6. Missing WWW-Authenticate Compliance
Explanation: Malformed challenge headers cause agent parsers to reject the response, treating it as a generic server error.
Fix: Strictly adhere to RFC 7235 header formatting. Include realm, id, and difficulty parameters. Validate headers against standard parsers before deployment.
7. Assuming Universal Agent Capability
Explanation: Not all MCP clients support HTTP 402 or challenge resolution. Forcing 402 on legacy agents breaks compatibility.
Fix: Implement capability negotiation via X-Agent-Capabilities headers. Return 402 only when the client declares support. Provide a degraded 429 fallback with explicit documentation for unsupported clients.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Sporadic exploration agents | HTTP 402 (PoW, difficulty 14) | Low frequency justifies CPU cost; no payment infrastructure needed | Zero monetary cost; ~5-10s latency per resolution |
| High-throughput CI/CD pipelines | HTTP 402 (L402 Lightning) | Predictable throughput requires economic guarantee; CPU overhead unacceptable | ~3 sats per call; scales linearly with volume |
| Cost-sensitive internal tools | HTTP 402 (PoW, difficulty 12) | Reduced difficulty lowers CPU burn while maintaining throttling | Minimal compute cost; acceptable latency for internal SLAs |
| Multi-tenant public API | HTTP 402 (Dual-mode negotiation) | Allows callers to choose between compute or payment based on their constraints | Flexible cost model; infrastructure scales with demand |
Configuration Template
// fastify-plugin.ts
import fp from 'fastify-plugin';
import { ChallengeEngine } from './challenge-engine';
import { TokenValidator } from './token-validator';
import { RateLimitRecoveryMiddleware } from './rate-limit-recovery';
export default fp(async (fastify) => {
const engine = new ChallengeEngine();
const validator = new TokenValidator(process.env.TOKEN_SECRET || 'default-secret');
const middleware = new RateLimitRecoveryMiddleware(engine, validator);
fastify.decorate('recoveryMiddleware', middleware);
fastify.addHook('onRequest', async (request, reply) => {
const bucket = await fastify.rateLimiter.check(request);
if (bucket.exhausted) {
await middleware.handle(request, reply);
return reply.send();
}
});
fastify.post('/api/challenge/verify', async (request, reply) => {
const { challenge_id, nonce } = request.body as any;
const isValid = await engine.verifySolution(challenge_id, nonce, 'salt-placeholder', 14);
if (!isValid) {
return reply.status(400).send({ error: 'Invalid solution' });
}
const token = validator.issue(challenge_id, 5 * 60 * 1000);
return reply.send({ token });
});
});
Quick Start Guide
- Install Dependencies: Add
fastify, fastify-plugin, and crypto to your MCP gateway project. Ensure Node.js 18+ for native crypto support.
- Deploy Challenge Service: Run the provided middleware as a standalone plugin. Configure
TOKEN_SECRET via environment variables. Set initial PoW difficulty to 14.
- Update Rate Limit Logic: Replace existing 429 returns with the 402 challenge pipeline. Ensure all congestion events route through the middleware.
- Test Agent Resolution: Use a sample client script to trigger rate limits. Verify the agent receives 402, solves the challenge, obtains a token, and successfully retries the original request.
- Monitor & Scale: Deploy metrics collection for challenge resolution time, token validation success rate, and queue depth. Adjust difficulty dynamically based on observed load patterns.