private readonly ttlSeconds: number;
constructor(client: Redis, ttl: number = 1800) {
this.store = client;
this.ttlSeconds = ttl;
}
async provisionSession(token: string, payload: AuthContext): Promise<void> {
const key = auth:ctx:${token};
const serialized = JSON.stringify(payload);
await this.store.setex(key, this.ttlSeconds, serialized);
}
async resolveSession(token: string): Promise<AuthContext | null> {
const key = auth:ctx:${token};
const raw = await this.store.get(key);
if (!raw) return null;
// Sliding expiration: reset TTL on successful access
await this.store.expire(key, this.ttlSeconds);
return JSON.parse(raw) as AuthContext;
}
async revokeSession(token: string): Promise<void> {
await this.store.del(auth:ctx:${token});
}
}
**Rationale:** `SETEX` combines `SET` and `EXPIRE` into a single operation, eliminating race conditions during session creation. The sliding `EXPIRE` call on every successful lookup ensures active users maintain their session without client-side token refresh logic.
### Step 2: Distributed Throttling with Atomic Counters
Rate limiting must be enforced fleet-wide. Per-node counters allow users to bypass limits by rotating through different replicas. Redis atomic commands guarantee that increments and expiration checks happen in a single, non-interruptible step.
**Architecture Decision:** Implement a fixed-window counter for baseline protection, but wrap the increment and expiration check in a Lua script to guarantee atomicity under burst traffic. Lua scripts execute atomically within the Redis event loop, preventing race conditions where two requests increment the counter before the TTL is set.
**Implementation (TypeScript):**
```typescript
const THROTTLE_SCRIPT = `
local key = KEYS[1]
local limit = tonumber(ARGV[1])
local window = tonumber(ARGV[2])
local current = tonumber(redis.call('GET', key) or '0')
if current >= limit then
return 0
end
redis.call('INCR', key)
if current == 0 then
redis.call('EXPIRE', key, window)
end
return 1
`;
export class ThrottleController {
private readonly store: Redis;
constructor(client: Redis) {
this.store = client;
}
async evaluateRequest(clientIp: string, maxRequests: number, windowSec: number): Promise<boolean> {
const minuteBucket = Math.floor(Date.now() / 60000);
const key = `throttle:ip:${clientIp}:${minuteBucket}`;
const allowed = await this.store.eval(THROTTLE_SCRIPT, 1, key, maxRequests, windowSec);
return allowed === 1;
}
}
Rationale: The Lua script reads, compares, increments, and sets expiration in one network round-trip. This eliminates the classic GET β INCR β EXPIRE race condition where concurrent requests could both see a count below the limit before either increments it.
Step 3: Predictable Caching with Explicit Invalidation
Caching is only valuable when freshness is guaranteed. The cache-aside pattern remains the most reliable approach for read-heavy workloads, but it requires disciplined write-side invalidation.
Architecture Decision: Separate read and write paths. Reads check Redis first, falling back to the primary database on miss. Writes update the database first, then explicitly delete the corresponding cache key. This prevents stale data from persisting beyond the TTL.
Implementation (TypeScript):
export class DataCache {
private readonly store: Redis;
private readonly db: any; // Placeholder for your ORM/DB client
constructor(client: Redis, database: any) {
this.store = client;
this.db = database;
}
async fetchRecord(identifier: string): Promise<Record<string, unknown>> {
const cacheKey = `record:${identifier}`;
const hit = await this.store.get(cacheKey);
if (hit) return JSON.parse(hit);
const source = await this.db.records.findById(identifier);
await this.store.setex(cacheKey, 3600, JSON.stringify(source));
return source;
}
async mutateRecord(identifier: string, updates: Record<string, unknown>): Promise<void> {
await this.db.records.update(identifier, updates);
await this.store.del(`record:${identifier}`);
}
}
Rationale: Explicit DEL on mutation guarantees that the next read fetches fresh data. For high-write environments, consider write-through caching or event-driven invalidation via Redis Pub/Sub to propagate eviction signals across multiple service instances simultaneously.
Architecture Decisions & Rationale
- Redis over Memcached: Redis provides native data structures (hashes, sorted sets, streams), Lua scripting, and persistence options. Memcached lacks atomic multi-key operations and scripting, making it unsuitable for rate limiting or complex session payloads.
- Cluster Mode over Sentinel: Cluster mode partitions data across multiple primaries, enabling horizontal scaling of both reads and writes. Sentinel provides failover but does not shard data, creating a write bottleneck.
- Pipeline Batching: During traffic spikes, network round-trips become the bottleneck. Grouping multiple
GET/SET operations into a single pipeline reduces latency by 60-80% under high concurrency.
- Hot/Cold Hybrid for AI: AI inference requires immediate context retrieval but long-term durability. Redis serves the hot context window (last N turns, embeddings), while PostgreSQL asynchronously persists completed interactions. This balances sub-millisecond latency with compliance-grade storage.
Pitfall Guide
1. Unbounded Cache Growth
Explanation: Developers frequently omit TTLs on cache keys, assuming memory will handle itself. Over time, this causes memory exhaustion, triggering OOM kills or aggressive eviction of active sessions.
Fix: Enforce TTLs at the application layer. Use SETEX instead of SET, and implement a middleware that rejects cache writes without an explicit expiration parameter.
2. Blocking Key Scans in Production
Explanation: Using KEYS * or KEYS pattern scans the entire keyspace synchronously, blocking the Redis event loop and causing cascading timeouts across all connected services.
Fix: Replace with SCAN or SSCAN/HSCAN for iterative, non-blocking traversal. Implement cursor-based pagination in your application code.
3. Non-Atomic Rate Limit Checks
Explanation: Separating GET, INCR, and EXPIRE into distinct network calls creates a race window. Under burst traffic, multiple requests can pass the limit check before the counter increments.
Fix: Wrap the entire check-increment-expire sequence in a Lua script. Redis executes Lua atomically, eliminating the race condition entirely.
4. Memory Fragmentation Blind Spots
Explanation: Frequent creation and deletion of keys with varying sizes causes allocator fragmentation. The used_memory metric may appear healthy while used_memory_rss grows significantly, leading to swap usage and latency spikes.
Fix: Monitor mem_fragmentation_ratio. Keep it below 1.5. Use maxmemory-policy allkeys-lru and consider jemalloc-compiled Redis builds. Restart nodes during maintenance windows if fragmentation exceeds 2.0.
5. Connection Exhaustion Under Load
Explanation: Creating a new Redis connection per request or failing to reuse connections causes TCP handshake overhead and file descriptor exhaustion.
Fix: Implement connection pooling. Configure ioredis or redis-py with maxRetriesPerRequest, retryStrategy, and pool sizing aligned with your event loop capacity. Monitor connected_clients and rejected_connections.
6. Stale Data Propagation
Explanation: Updating a database record without invalidating the corresponding cache key leaves stale data in memory until TTL expiration. This breaks consistency guarantees for user-facing features.
Fix: Adopt a strict write-through or cache-aside invalidation policy. For multi-service architectures, publish an invalidation event via Redis Pub/Sub or a message queue to ensure all replicas evict the key simultaneously.
7. Single-Node Deployment in Production
Explanation: Running a standalone Redis instance creates a single point of failure. Node crashes, network partitions, or memory pressure immediately drop all sessions and cache hits.
Fix: Deploy Redis Cluster (minimum 3 primaries, 3 replicas) or Redis Sentinel with automatic failover. Never run stateful infrastructure without redundancy.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| High-read API with frequent updates | Cache-Aside + Event Invalidation | Balances read speed with write consistency; avoids write amplification | Low (standard Redis instance) |
| Bursty AI inference workloads | Hot/Cold Hybrid (Redis + Postgres) | Sub-ms context retrieval with durable long-term storage; reduces DB load | Medium (dual infrastructure) |
| Strict compliance session store | Redis Cluster + AOF Persistence | Guarantees no session loss during failover; meets audit requirements | Medium-High (HA cluster) |
| Global multi-region deployment | Redis Enterprise Active-Active | Low-latency reads in each region; automatic conflict resolution | High (enterprise licensing) |
Configuration Template
redis.conf (Production Baseline):
maxmemory 8gb
maxmemory-policy allkeys-lru
save 900 1
save 300 10
save 60 10000
appendonly yes
appendfsync everysec
hz 10
dynamic-hz yes
tcp-backlog 511
timeout 300
TypeScript Client Setup (ioredis):
import { Redis, Cluster } from 'ioredis';
export function createRedisClient(): Redis | Cluster {
const isCluster = process.env.REDIS_CLUSTER === 'true';
if (isCluster) {
return new Cluster(
[
{ host: 'redis-node-1', port: 6379 },
{ host: 'redis-node-2', port: 6379 },
{ host: 'redis-node-3', port: 6379 },
],
{
scaleReads: 'slave',
retryDelayOnFailover: 100,
maxRetriesPerRequest: 3,
redisOptions: {
tls: {},
reconnectOnError: () => true,
},
}
);
}
return new Redis({
host: process.env.REDIS_HOST || 'localhost',
port: Number(process.env.REDIS_PORT) || 6379,
password: process.env.REDIS_PASSWORD,
retryStrategy: (times) => Math.min(times * 50, 2000),
maxRetriesPerRequest: 3,
enableReadyCheck: true,
lazyConnect: true,
});
}
Quick Start Guide
- Provision the Cluster: Deploy a 3-node Redis Cluster (or Sentinel setup) with 4GB+ memory per node. Apply the
redis.conf baseline and enable AOF persistence.
- Initialize the Client: Import the connection template, configure environment variables for host/port/password, and verify connectivity using
PING.
- Implement the Orchestrator: Instantiate
SessionOrchestrator, ThrottleController, and DataCache using the shared client. Attach middleware to your HTTP framework for automatic session resolution and rate limit evaluation.
- Validate Atomicity: Run a load test simulating 500 concurrent requests per second. Verify that rate limit counters never exceed the threshold and that session TTLs slide correctly under sustained traffic.
- Monitor & Tune: Track
keyspace_hit_rate, mem_fragmentation_ratio, and rejected_connections. Adjust maxmemory and eviction policies based on observed cache churn and session lifecycle patterns.