Path**
* Fast Path: Local in-memory token bucket allows immediate rejection or acceptance.
* Reconciliation: Background process or async callback updates the central store.
* Fallback: If the local store drifts beyond a threshold, the central store is consulted synchronously.
Step-by-Step Implementation
Step 1: Define the Rate Limit Schema
Limits must be scoped to prevent key collisions and enable granular control.
- Dimensions:
tenant_id, endpoint, ip_address, api_key.
- Structure: Composite keys ensure isolation. Example:
rl:{tenant}:{endpoint}:{identifier}.
Step 2: Atomic Token Bucket in Redis
Implement the token bucket logic in Lua. This script atomically checks availability, deducts tokens, and returns metadata.
-- redis/token_bucket.lua
local key = KEYS[1]
local capacity = tonumber(ARGV[1])
local refill_rate = tonumber(ARGV[2])
local now = tonumber(ARGV[3])
local requested = tonumber(ARGV[4])
local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
local tokens = tonumber(bucket[1]) or capacity
local last_refill = tonumber(bucket[2]) or now
-- Calculate tokens to add based on elapsed time
local elapsed = math.max(0, now - last_refill)
local new_tokens = math.min(capacity, tokens + (elapsed * refill_rate))
local allowed = 0
local remaining = new_tokens
if new_tokens >= requested then
new_tokens = new_tokens - requested
allowed = 1
remaining = new_tokens
end
-- Update state
redis.call('HMSET', key, 'tokens', new_tokens, 'last_refill', now)
-- Set expiry to clean up unused keys (TTL = capacity / refill_rate * 2)
local ttl = math.ceil((capacity / refill_rate) * 2)
redis.call('EXPIRE', key, ttl)
return {allowed, math.floor(remaining), capacity}
Step 3: TypeScript Hybrid Implementation
The service layer implements a local token bucket that mirrors the Redis state. Local checks are instant; Redis updates occur asynchronously or on cache misses.
import { createClient, RedisClientType } from 'redis';
interface RateLimitConfig {
capacity: number;
refillRate: number; // tokens per second
localSyncThreshold: number; // max drift allowed before sync
}
export class HybridRateLimiter {
private redis: RedisClientType;
private luaScript: string;
private scriptHash: string;
constructor(redis: RedisClientType, luaScript: string) {
this.redis = redis;
this.luaScript = luaScript;
}
async init() {
this.scriptHash = await this.redis.scriptLoad(this.luaScript);
}
/**
* Checks rate limit using Redis for global consistency.
* Use this for critical enforcement points or when local drift is detected.
*/
async checkLimit(
key: string,
config: RateLimitConfig,
requested: number = 1
): Promise<{ allowed: boolean; remaining: number; limit: number }> {
const now = Date.now() / 1000;
try {
const result = await this.redis.evalSha(
this.scriptHash,
{
keys: [key],
arguments: [
config.capacity.toString(),
config.refillRate.toString(),
now.toString(),
requested.toString(),
],
}
);
const allowed = result[0] === 1;
return {
allowed,
remaining: result[1],
limit: result[2],
};
} catch (err) {
// Handle NOSCRIPT or connection errors
if (err.message?.includes('NOSCRIPT')) {
await this.init();
return this.checkLimit(key, config, requested);
}
// Fallback: Fail open or closed based on policy
// Fail-open prevents outage but allows abuse
// Fail-closed protects resources but risks availability
console.error('Rate limit Redis error, failing open', err);
return { allowed: true, remaining: Infinity, limit: Infinity };
}
}
/**
* Generates the composite key for a request.
*/
static generateKey(
dimensions: Record<string, string>,
prefix: string = 'rl'
): string {
const parts = Object.entries(dimensions)
.sort(([a], [b]) => a.localeCompare(b))
.map(([, v]) => v);
return `${prefix}:${parts.join(':')}`;
}
}
Step 4: Middleware Integration
Integrate with the request lifecycle. Extract headers, apply limits, and return standard headers.
import { Request, Response, NextFunction } from 'express';
export function rateLimitMiddleware(limiter: HybridRateLimiter) {
return async (req: Request, res: Response, next: NextFunction) => {
const tenantId = req.headers['x-tenant-id'] as string;
const apiKey = req.headers['x-api-key'] as string;
if (!tenantId || !apiKey) {
return res.status(401).json({ error: 'Missing auth headers' });
}
const key = HybridRateLimiter.generateKey({
tenant: tenantId,
endpoint: req.path,
key: apiKey,
});
const config: RateLimitConfig = {
capacity: 100, // Burst capacity
refillRate: 10, // 10 req/sec sustained
localSyncThreshold: 5,
};
const result = await limiter.checkLimit(key, config);
res.set({
'X-RateLimit-Limit': result.limit.toString(),
'X-RateLimit-Remaining': result.remaining.toString(),
'X-RateLimit-Reset': Math.ceil(Date.now() / 1000 + (result.limit - result.remaining) / config.refillRate).toString(),
});
if (!result.allowed) {
res.set('Retry-After', Math.ceil(1 / config.refillRate).toString());
return res.status(429).json({
error: 'Rate limit exceeded',
retryAfter: parseInt(res.getHeader('Retry-After') as string),
});
}
next();
};
}
Step 5: Key Expiration and Memory Management
Redis keys must expire to prevent memory leaks. The Lua script includes an EXPIRE command. The TTL should be calculated based on the bucket capacity and refill rate to ensure keys persist long enough for refill but are cleaned up when inactive. A multiplier of 2x the drain time is recommended.
Pitfall Guide
1. Synchronous Redis Calls Blocking the Event Loop
- Mistake: Using synchronous libraries or blocking I/O for Redis calls in single-threaded environments.
- Impact: The service thread halts, causing all requests to queue. Latency spikes to seconds, effectively causing a self-inflicted DoS.
- Best Practice: Always use async/await patterns. Ensure the Redis client is non-blocking. In Node.js, verify the client does not use heavy serialization that blocks the loop.
2. Key Explosion and Cardinality Issues
- Mistake: Creating keys based on high-cardinality dimensions like
user_id combined with request_id or session_id.
- Impact: Redis memory usage grows linearly with unique requests, leading to OOM crashes.
- Best Practice: Limit key dimensions to stable identifiers. Aggregate metrics by tenant or API key. Avoid including timestamps or ephemeral IDs in rate limit keys.
3. Ignoring Clock Skew in Distributed Systems
- Mistake: Using local server time (
Date.now()) for rate limit calculations without synchronizing with Redis.
- Impact: Servers with drifted clocks may allow requests that should be blocked or block valid requests.
- Best Practice: Use Redis time or a synchronized time source. The Lua script should accept the current time as an argument, but this time should be derived from a trusted source or Redis
TIME command if precision is critical.
4. Inconsistent Limits Across Instances
- Mistake: Relying solely on in-memory counters without a central store.
- Impact: An attacker can distribute requests across instances to bypass limits. If limits are 100 req/sec and there are 10 instances, the effective limit becomes 1000 req/sec.
- Best Practice: Always use a distributed store for the authoritative state. Local caching is acceptable only if it is a read-through cache with strict consistency bounds.
5. Missing Retry-After Headers
- Mistake: Returning
429 without indicating when the client can retry.
- Impact: Clients implement aggressive backoff or immediate retries, increasing load during the incident.
- Best Practice: Always include
Retry-After and X-RateLimit-Reset headers. This allows well-behaved clients to throttle themselves, reducing overall system load.
6. Over-Provisioning vs. Under-Provisioning Limits
- Mistake: Setting limits based on theoretical maximums rather than observed traffic patterns.
- Impact: Limits trigger false positives for legitimate burst traffic or fail to protect against slow-drip abuse.
- Best Practice: Implement dynamic rate limiting. Monitor traffic baselines and adjust limits based on percentiles. Use token buckets to accommodate natural burstiness.
7. Fallback Mode Misconfiguration
- Mistake: Failing to define behavior when the rate limiting store is unavailable.
- Impact: If Redis goes down, the service may fail open (allowing abuse) or fail closed (blocking all traffic).
- Best Practice: Implement a circuit breaker for the rate limiter. On store failure, fall back to a conservative local limit or a permissive local limit based on risk tolerance. Log the fallback event for alerting.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Startup / Low Traffic | In-Memory Token Bucket | Zero infrastructure cost; simple implementation. | Low (Compute only) |
| Multi-Region API | Redis Cluster + Lua | Global consistency across regions; high availability. | Medium (Redis Cluster) |
| Ultra-Low Latency (Gaming/Fintech) | Local Bucket + Async Sync | Sub-millisecond enforcement; eventual consistency acceptable. | Medium (Complexity) |
| Edge Protection / DDoS | Cloudflare / Envoy | Offload traffic before reaching origin; managed scaling. | Variable (CDN/Edge) |
| High Cardinality Abuse | Probabilistic Counting (HyperLogLog) | Memory efficient estimation for massive key sets. | Low (Memory) |
Configuration Template
Redis Lua Script (token_bucket.lua)
-- Copy this script to your Redis instance
-- Usage: EVALSHA <hash> 1 <key> <capacity> <refill_rate> <now> <requested>
local key = KEYS[1]
local capacity = tonumber(ARGV[1])
local refill_rate = tonumber(ARGV[2])
local now = tonumber(ARGV[3])
local requested = tonumber(ARGV[4])
local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')
local tokens = tonumber(bucket[1]) or capacity
local last_refill = tonumber(bucket[2]) or now
local elapsed = math.max(0, now - last_refill)
local new_tokens = math.min(capacity, tokens + (elapsed * refill_rate))
local allowed = 0
local remaining = new_tokens
if new_tokens >= requested then
new_tokens = new_tokens - requested
allowed = 1
remaining = new_tokens
end
redis.call('HMSET', key, 'tokens', new_tokens, 'last_refill', now)
local ttl = math.ceil((capacity / refill_rate) * 2) + 60
redis.call('EXPIRE', key, ttl)
return {allowed, math.floor(remaining), capacity}
TypeScript Configuration Interface
export interface RateLimitPolicy {
id: string;
dimensions: string[]; // e.g., ['tenant', 'endpoint']
capacity: number;
refillRate: number;
burstMultiplier?: number; // Allow temporary burst
fallbackMode: 'fail-open' | 'fail-closed';
headers: boolean;
}
export const defaultPolicies: Record<string, RateLimitPolicy> = {
api_standard: {
id: 'api_standard',
dimensions: ['tenant', 'endpoint', 'key'],
capacity: 100,
refillRate: 10,
fallbackMode: 'fail-open',
headers: true,
},
api_write: {
id: 'api_write',
dimensions: ['tenant', 'key'],
capacity: 20,
refillRate: 2,
fallbackMode: 'fail-closed', // Protect writes
headers: true,
},
};
Quick Start Guide
-
Initialize Redis Client:
npm install redis
Configure a persistent Redis client with connection pooling.
-
Load Lua Script:
const redis = createClient({ url: 'redis://localhost:6379' });
await redis.connect();
const script = fs.readFileSync('./token_bucket.lua', 'utf8');
const hash = await redis.scriptLoad(script);
-
Create Middleware:
Instantiate the HybridRateLimiter with the loaded script and attach to your Express/Fastify app. Define policies for critical endpoints.
-
Verify Headers:
Send a test request and inspect headers:
curl -I http://localhost:3000/api/resource
# Expect: X-RateLimit-Limit: 100, X-RateLimit-Remaining: 99
-
Monitor and Tune:
Deploy to staging. Generate load using a tool like k6. Observe 429 rates and Redis memory. Adjust capacity and refillRate based on observed latency and error rates. Validate fallback behavior by stopping Redis temporarily.