njection detection, rate limiting, and authentication before proxying to the AI provider. This ensures malicious traffic is dropped early, reducing cost and latency.
2. Token-Aware Rate Limiting: Standard request-based rate limiting is insufficient. A token bucket algorithm is used to limit both request frequency and token consumption, preventing financial DoS.
3. Hashed Key Storage: API keys are never stored in plaintext. SHA-256 hashes are used for verification, ensuring that a database breach does not expose active credentials.
4. Tenant-Enforced Context: All conversation history is tagged with a tenant ID. The context manager enforces strict isolation, preventing cross-tenant data leakage.
5. Output Sanitization: Security controls extend to the response. Outputs are scanned for sensitive data patterns before being returned to the client.
Implementation
The following code defines the core security components in TypeScript.
1. Input Sanitization and Normalization
import { createHash, randomBytes } from 'crypto';
interface SanitizationResult {
isValid: boolean;
reason?: string;
sanitizedInput?: string;
}
export class AISanitizer {
private static readonly MAX_CHARS = 8000;
private static readonly BLOCKED_PATTERNS: RegExp[] = [
/ignore\s+previous\s+instructions/i,
/system\s*:/i,
/you\s+are\s+now/i,
/disregard\s+your/i,
/new\s+instructions/i,
/forget\s+everything/i,
/roleplay\s+as/i,
];
public sanitize(input: string): SanitizationResult {
if (!input || typeof input !== 'string') {
return { isValid: false, reason: 'Invalid input type' };
}
// Normalize unicode and strip control characters
const normalized = input.normalize('NFKC').replace(/[\x00-\x1F\x7F]/g, '');
if (normalized.length > AISanitizer.MAX_CHARS) {
return {
isValid: false,
reason: `Input exceeds ${AISanitizer.MAX_CHARS} characters`,
sanitizedInput: normalized.slice(0, AISanitizer.MAX_CHARS),
};
}
for (const pattern of AISanitizer.BLOCKED_PATTERNS) {
if (pattern.test(normalized)) {
return { isValid: false, reason: 'Blocked pattern detected' };
}
}
return { isValid: true, sanitizedInput: normalized };
}
}
2. Token-Aware Rate Limiting
interface RateLimitConfig {
maxRequestsPerMinute: number;
maxTokensPerMinute: number;
}
export class TokenBucketLimiter {
private buckets: Map<string, { tokens: number; lastRefill: number; requests: number[] }>;
private config: RateLimitConfig;
constructor(config: RateLimitConfig) {
this.config = config;
this.buckets = new Map();
}
public isAllowed(clientId: string, estimatedTokens: number): boolean {
const now = Date.now();
let bucket = this.buckets.get(clientId);
if (!bucket) {
bucket = { tokens: this.config.maxTokensPerMinute, lastRefill: now, requests: [] };
this.buckets.set(clientId, bucket);
}
// Refill tokens based on elapsed time
const elapsed = now - bucket.lastRefill;
const refill = (elapsed / 60000) * this.config.maxTokensPerMinute;
bucket.tokens = Math.min(this.config.maxTokensPerMinute, bucket.tokens + refill);
bucket.lastRefill = now;
// Clean old request timestamps
bucket.requests = bucket.requests.filter((t) => now - t < 60000);
// Check limits
if (bucket.requests.length >= this.config.maxRequestsPerMinute) {
return false;
}
if (bucket.tokens < estimatedTokens) {
return false;
}
// Consume resources
bucket.tokens -= estimatedTokens;
bucket.requests.push(now);
return true;
}
}
3. Secure Key Management
interface KeyMetadata {
clientId: string;
scopes: string[];
createdAt: number;
}
export class KeyVault {
private store: Map<string, KeyMetadata>;
constructor() {
this.store = new Map();
}
public async createKey(clientId: string, scopes: string[]): Promise<string> {
const rawKey = `sk-${randomBytes(24).toString('hex')}`;
const keyHash = createHash('sha256').update(rawKey).digest('hex');
this.store.set(keyHash, {
clientId,
scopes,
createdAt: Date.now(),
});
return rawKey;
}
public async validateKey(rawKey: string): Promise<KeyMetadata | null> {
const keyHash = createHash('sha256').update(rawKey).digest('hex');
return this.store.get(keyHash) || null;
}
}
4. Injection Detection and Context Isolation
interface Message {
role: string;
content: string;
tenantId: string;
}
export class InjectionGuard {
private static readonly SIGNALS: string[] = [
'ignore previous',
'system prompt:',
'you are now',
'pretend to be',
'disregard instructions',
'new rules:',
'output raw data',
];
public static detect(input: string): boolean {
const lower = input.toLowerCase();
return InjectionGuard.SIGNALS.some((signal) => lower.includes(signal));
}
}
export class TenantContext {
private history: Message[] = [];
constructor(private tenantId: string) {}
public addMessage(role: string, content: string): void {
this.history.push({
role,
content,
tenantId: this.tenantId,
});
}
public getHistory(): Message[] {
// Strict tenant filtering
return this.history.filter((msg) => msg.tenantId === this.tenantId);
}
public clearHistory(): void {
this.history = this.history.filter((msg) => msg.tenantId !== this.tenantId);
}
}
Pitfall Guide
| Pitfall | Explanation | Fix |
|---|
| Regex Evasion via Encoding | Attackers use Unicode normalization, zero-width characters, or base64 encoding to bypass pattern matching. | Normalize input using NFKC and strip control characters before applying regex. Decode known encodings if applicable. |
| In-Memory Rate Limiting in Clusters | In-memory buckets fail in multi-instance deployments, allowing attackers to bypass limits by distributing requests across nodes. | Use a distributed store like Redis for rate limiting state. Implement atomic operations to prevent race conditions. |
| False Positives in Injection Detection | Overly aggressive keyword matching blocks legitimate user inputs, degrading UX. | Implement confidence scoring, allow-list known safe patterns, and provide a feedback loop for users to report false blocks. |
| Raw Key Storage | Storing API keys in plaintext in databases or logs exposes credentials during a breach. | Always hash keys using SHA-256. Store only the hash. Return the raw key to the user only once upon creation. |
| Output Data Leakage | The model may echo sensitive context or PII in its response, even if input was sanitized. | Implement output sanitization to scan responses for sensitive patterns. Use PII redaction services before returning data to clients. |
| Token Budget Blowout | Failing to enforce max_tokens allows the model to generate excessive output, increasing cost and latency. | Always set max_tokens in API requests. Monitor token usage per request and enforce quotas at the gateway. |
| Context Window Overflow | Long conversations can exceed the model's context window, causing truncation or errors. | Implement context window management. Summarize or prune older messages when the limit is approached. |
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| High-Volume Production | Redis-based Rate Limiting + ML Injection Detection | Scalability and accuracy. Handles distributed traffic and complex attacks. | Higher infra cost; improved security ROI. |
| Low-Volume/Internal Tools | In-Memory Rate Limiting + Regex Detection | Simplicity and low latency. Sufficient for trusted environments. | Low cost; acceptable risk for internal use. |
| Strict Compliance (HIPAA/GDPR) | Output Sanitization + PII Redaction + Audit Logging | Ensures sensitive data is never exposed. Meets regulatory requirements. | Moderate cost for redaction services; high compliance value. |
| Cost-Sensitive Deployment | Token Budget Enforcement + Quota Alerts | Prevents financial DoS and unexpected bills. | Low cost; protects against cost spikes. |
Configuration Template
// security.config.ts
export interface SecurityConfig {
sanitizer: {
maxChars: number;
blockedPatterns: string[];
};
rateLimit: {
maxRequestsPerMinute: number;
maxTokensPerMinute: number;
redisUrl?: string;
};
keyVault: {
hashAlgorithm: 'sha256';
keyPrefix: string;
};
injectionGuard: {
signals: string[];
logAttempts: boolean;
};
tenantIsolation: {
enforceTenantId: boolean;
maxHistoryLength: number;
};
}
export const defaultConfig: SecurityConfig = {
sanitizer: {
maxChars: 8000,
blockedPatterns: [
'ignore previous instructions',
'system:',
'you are now',
],
},
rateLimit: {
maxRequestsPerMinute: 60,
maxTokensPerMinute: 10000,
redisUrl: process.env.REDIS_URL,
},
keyVault: {
hashAlgorithm: 'sha256',
keyPrefix: 'sk-',
},
injectionGuard: {
signals: [
'ignore previous',
'system prompt:',
'disregard instructions',
],
logAttempts: true,
},
tenantIsolation: {
enforceTenantId: true,
maxHistoryLength: 50,
},
};
Quick Start Guide
- Install Dependencies: Add
crypto, redis, and dotenv to your project. Configure environment variables for secrets and Redis URL.
- Initialize Security Components: Create instances of
AISanitizer, TokenBucketLimiter, KeyVault, and InjectionGuard using the configuration template.
- Add Middleware: Integrate the security components into your API gateway. Apply sanitization, injection detection, and rate limiting before proxying requests to the AI provider.
- Test and Monitor: Run prompt injection tests. Verify rate limiting behavior. Monitor token usage and error logs. Adjust thresholds based on traffic patterns.