edisClientType } from 'redis';
class TournamentRanking {
private client: RedisClientType;
private key: string;
constructor(redisUrl: string, tournamentId: string) {
this.client = createClient({ url: redisUrl });
this.key = tournament:${tournamentId}:scores;
}
async initialize(): Promise<void> {
await this.client.connect();
}
async updateScore(participantId: string, delta: number): Promise<number> {
const newScore = await this.client.zIncrBy(this.key, delta, participantId);
return newScore;
}
async getRank(participantId: string): Promise<number | null> {
const rank = await this.client.zRevRank(this.key, participantId);
return rank !== null ? rank + 1 : null; // 1-based indexing
}
async getTopContenders(limit: number): Promise<Array<{ id: string; score: number }>> {
const entries = await this.client.zRangeWithScores(this.key, 0, limit - 1, { REV: true });
return entries.map(e => ({ id: e.value, score: e.score }));
}
}
**Architecture Rationale:**
- Skip list internals provide logarithmic updates without table scans.
- Single-threaded execution guarantees that `ZINCRBY` and `ZREVRANK` never encounter stale reads or lock contention.
- Client-side 1-based rank conversion keeps the database layer simple while matching business logic expectations.
### 2. Event Sourcing with Streams
Redis Streams provide an append-only log with built-in consumer group semantics. Unlike Kafka, which requires partition management and JVM-based brokers, Streams handle message routing, acknowledgment tracking, and pending entry management natively.
**Implementation:**
```typescript
class InventoryAuditLog {
private client: RedisClientType;
private streamKey: string;
private groupName: string;
constructor(redisUrl: string) {
this.client = createClient({ url: redisUrl });
this.streamKey = 'warehouse:inventory:audit';
this.groupName = 'sync-workers';
}
async initialize(): Promise<void> {
await this.client.connect();
// Create group if missing, using $ to start from end of stream
try {
await this.client.xGroupCreate(this.streamKey, this.groupName, '$', { MKSTREAM: true });
} catch (err: any) {
if (!err.message.includes('BUSYGROUP')) throw err;
}
}
async appendEvent(action: string, itemId: string, quantity: number): Promise<string> {
const entryId = await this.client.xAdd(this.streamKey, '*', {
action,
item_id: itemId,
quantity: quantity.toString()
});
return entryId;
}
async consumeBatch(workerId: string, batchSize: number): Promise<any[]> {
const messages = await this.client.xReadGroup(
this.groupName,
workerId,
{ key: this.streamKey, id: '>' },
{ COUNT: batchSize, BLOCK: 2000 }
);
return messages?.[0]?.messages ?? [];
}
async acknowledge(workerId: string, entryId: string): Promise<void> {
await this.client.xAck(this.streamKey, this.groupName, entryId);
}
}
Architecture Rationale:
$ marker in group creation ensures new consumers only process future events, preventing historical replay unless explicitly requested.
BLOCK: 2000 implements long-polling semantics, reducing CPU spin while maintaining near-real-time delivery.
- Consumer groups decouple producers from processing speed, enabling horizontal scaling of workers without partition rebalancing logic.
3. Atomic Execution via Lua
Lua scripts execute atomically within the Redis event loop. No other command can interleave between script operations, eliminating time-of-check-to-time-of-use (TOCTOU) races that typically require distributed locks or saga orchestrators.
Implementation:
class SlidingWindowLimiter {
private client: RedisClientType;
private scriptHash: string | null = null;
constructor(redisUrl: string) {
this.client = createClient({ url: redisUrl });
}
async initialize(): Promise<void> {
await this.client.connect();
// Preload script to avoid repeated transmission overhead
this.scriptHash = await this.client.scriptLoad(`
local key = KEYS[1]
local window_ms = tonumber(ARGV[1])
local max_allowed = tonumber(ARGV[2])
local now_ms = tonumber(ARGV[3])
local request_id = ARGV[4]
redis.call('ZREMRANGEBYSCORE', key, 0, now_ms - window_ms)
local current_count = redis.call('ZCARD', key)
if current_count < max_allowed then
redis.call('ZADD', key, now_ms, request_id)
redis.call('PEXPIRE', key, window_ms)
return 1
end
return 0
`);
}
async isAllowed(resourceId: string, windowMs: number, maxRequests: number): Promise<boolean> {
const now = Date.now();
const requestId = `${resourceId}:${now}:${Math.random().toString(36).slice(2, 8)}`;
const result = await this.client.evalSha(
this.scriptHash!,
{ keys: [`limiter:${resourceId}`], arguments: [windowMs.toString(), maxRequests.toString(), now.toString(), requestId] }
);
return result === 1;
}
}
Architecture Rationale:
SCRIPT LOAD + EVALSHA reduces network payload and leverages Redis's script cache.
- Random suffix in
request_id prevents score collisions when multiple requests arrive in the same millisecond.
PEXPIRE on the key ensures automatic cleanup of expired windows, preventing unbounded memory growth.
Pitfall Guide
1. Unbounded Stream Growth
Explanation: Streams append indefinitely by default. Without explicit trimming, memory consumption grows linearly with event volume, eventually triggering OOM kills or eviction policies that corrupt data.
Fix: Always pair XADD with MAXLEN or TRIM strategies. Use XADD stream_key MAXLEN ~ 10000 * field value for approximate trimming, or implement a background job that calls XTRIM during low-traffic windows.
2. Blocking the Event Loop with Long Lua Scripts
Explanation: Redis executes Lua scripts synchronously on the main thread. A script exceeding 10ms stalls all other clients, causing cascading timeouts across the entire application.
Fix: Keep scripts deterministic and under 5ms execution time. Use SCRIPT DEBUG during development to profile execution. For complex logic, split operations into multiple atomic calls or migrate to Redis 7+ functions that support better isolation.
3. Memory Bloat from Long ZSET Members
Explanation: Sorted set memory consumption scales with both score precision and member string length. Storing full JSON payloads or verbose identifiers as members multiplies RAM usage by 3-5x compared to compact IDs.
Fix: Use short, fixed-length identifiers (UUIDs, hashed strings, or numeric IDs) as ZSET members. Store full payloads in separate hash keys (HSET) and reference them by ID. Monitor MEMORY USAGE per key during load testing.
4. Assuming Streams Survive Restarts Without Persistence
Explanation: Redis defaults to in-memory storage. If used as a primary event log or ranking source, an unconfigured restart results in complete data loss, not a cache miss.
Fix: Enable AOF persistence with appendonly yes and appendfsync everysec. For critical workloads, configure aof-use-rdb-preamble yes to combine RDB startup speed with AOF durability. Test restore procedures regularly.
5. Misapplying Consumer Groups to Single-Consumer Workloads
Explanation: Consumer groups introduce state tracking overhead (pending entry lists, group metadata). Using them for a single worker adds unnecessary complexity and memory usage.
Fix: Use direct XREAD with a blocking timeout for single-consumer scenarios. Reserve XREADGROUP only when multiple workers must share load or when message acknowledgment tracking is required.
6. Ignoring Cluster Mode Limitations for Multi-Key Operations
Explanation: Redis Cluster hashes keys to different nodes. Lua scripts and multi-key commands fail if keys do not share the same hash slot, throwing CROSSSLOT errors.
Fix: Use hash tags {resource_id} in key names to force co-location. Example: limiter:{user:123}:window and limiter:{user:123}:metadata will always route to the same node. Validate key distribution before deploying to cluster mode.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| <200K events/sec, single datacenter | Redis Streams | Eliminates JVM overhead, simplifies consumer group management, reduces infrastructure footprint | Lowers compute costs by 60-80% vs Kafka brokers |
| >500K events/sec, multi-region replication | Kafka + KRaft | Redis lacks native cross-datacenter replication and partition scaling at this volume | Higher infrastructure cost, but necessary for scale |
| Real-time ranking with <10M entities | Redis ZSET | O(log N) updates, sub-ms latency, no row locks or materialized view maintenance | Reduces database load by 40-70% |
| Complex analytical ranking with historical snapshots | PostgreSQL + Window Functions | Relational engines handle time-travel queries and complex joins more efficiently | Higher query latency, but better for analytical workloads |
| Rate limiting across distributed services | Redis Lua + EVALSHA | Atomic execution prevents TOCTOU races without distributed lock overhead | Eliminates dedicated rate-limiting service and its deployment pipeline |
Configuration Template
# redis-production.conf
# Persistence
appendonly yes
appendfsync everysec
aof-use-rdb-preamble yes
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
# Memory Management
maxmemory 8gb
maxmemory-policy allkeys-lru
stream-node-max-bytes 4096
stream-node-max-entries 100
# Lua Safety
lua-time-limit 5000
lua-replicate-commands yes
# Network & Security
bind 127.0.0.1
protected-mode yes
requirepass ${REDIS_PASSWORD}
rename-command FLUSHDB ""
rename-command FLUSHALL ""
rename-command DEBUG ""
# Slow Log & Monitoring
slowlog-log-slower-than 1000
slowlog-max-len 128
Quick Start Guide
- Initialize the instance: Run
redis-server redis-production.conf or deploy via container with the configuration mounted. Verify persistence is active with redis-cli INFO persistence.
- Preload atomic scripts: Execute
SCRIPT LOAD for your Lua rate limiter and ranking utilities. Store the returned SHA256 hashes in your application configuration.
- Create stream consumer groups: Run
XGROUP CREATE <stream_key> <group_name> $ MKSTREAM for each processing pipeline. Verify with XINFO GROUPS <stream_key>.
- Deploy client wrappers: Integrate the TypeScript classes into your service layer. Replace existing Kafka producers or PostgreSQL rank queries with the Redis-native equivalents.
- Validate under load: Run a synthetic traffic generator targeting 50K concurrent operations. Monitor
INFO stats, SLOWLOG GET, and memory usage. Adjust maxmemory and lua-time-limit based on observed p99 latencies.