const after: BenchmarkResult = JSON.parse(fs.readFileSync(afterPath, 'utf-8'));
const metrics: (keyof BenchmarkResult)[] = ['requests', 'latency', 'throughput'];
for (const metric of metrics) {
const b = before[metric].average;
const a = after[metric].average;
const delta = ((a - b) / b) * 100;
console.log(${metric}.average: ${b.toFixed(1)} β ${a.toFixed(1)} (${delta > 0 ? '+' : ''}${delta.toFixed(1)}%));
}
}
Run the baseline with:
```bash
autocannon -c 50 -d 10 -j http://localhost:3000/api/financial/summary > baseline.json
Step 2: Eliminate Sequential I/O (N+1 to Single Aggregation)
The original implementation iterates over parent records and fires a separate query for each child record. This creates N+1 round-trips, serializing database latency.
Rationale: PostgreSQL can aggregate nested data in a single pass using json_agg and GROUP BY. This reduces network latency, connection pool contention, and application-side loop overhead.
// src/services/ledger.service.ts
import { Pool, PoolClient } from 'pg';
export class LedgerService {
constructor(private pool: Pool) {}
async getSummary(orgId: string, startDate: string, endDate: string) {
const query = `
SELECT
t.tx_id,
t.org_id,
t.posted_at,
json_agg(
json_build_object(
'entry_id', e.entry_id,
'amount', e.amount,
'currency', e.currency,
'category', e.category
) ORDER BY e.entry_id
) AS entries,
SUM(e.amount) AS net_total
FROM transactions t
INNER JOIN ledger_entries e ON e.tx_id = t.tx_id
WHERE t.org_id = $1
AND t.posted_at BETWEEN $2 AND $3
GROUP BY t.tx_id
ORDER BY net_total DESC
`;
const result = await this.pool.query(query, [orgId, startDate, endDate]);
return result.rows;
}
}
Why this works: The database engine handles sorting, aggregation, and nesting. The application receives a fully shaped payload in one round-trip. Connection pool utilization drops dramatically, and latency variance stabilizes.
Step 3: Remove CPU-Bound Cryptographic Overhead
The original code computed a SHA-256 hash of entire transaction objects for cache invalidation. Cryptographic hashing is intentionally slow and unnecessary for cache keys.
Rationale: Cache validity only requires a deterministic, versioned identifier. Concatenating the primary key with a timestamp or update counter provides uniqueness at a fraction of the CPU cost.
// src/utils/cache-key.generator.ts
export function generateCacheKey(record: { tx_id: number; posted_at: Date }): string {
// Deterministic string composition replaces cryptographic hashing
const timestamp = new Date(record.posted_at).getTime();
return `${record.tx_id}:${timestamp.toString(36)}`;
}
Why this works: String concatenation and base-36 conversion execute in microseconds. Removing crypto.createHash and JSON.stringify from the hot path frees CPU cycles for request handling and reduces V8 allocation pressure.
Step 4: Reduce Heap Allocation & GC Pressure
Object spreading ({...record}) in tight loops creates shallow copies, triggering frequent minor garbage collection cycles. Under high concurrency, GC pauses manifest as p99 latency spikes.
Rationale: Explicit field mapping avoids unnecessary property enumeration and prevents accidental exposure of internal fields. It also gives the V8 engine predictable object shapes, improving hidden class optimization.
// src/mappers/transaction.mapper.ts
import { generateCacheKey } from '../utils/cache-key.generator';
export interface MappedTransaction {
tx_id: number;
org_id: string;
posted_at: string;
entries: Array<{ entry_id: number; amount: number; currency: string; category: string }>;
net_total: number;
cache_key: string;
}
export function mapTransactionRow(row: any): MappedTransaction {
return {
tx_id: row.tx_id,
org_id: row.org_id,
posted_at: row.posted_at,
entries: row.entries,
net_total: parseFloat(row.net_total),
cache_key: generateCacheKey(row),
};
}
Why this works: Explicit mapping eliminates spread operators, reduces temporary object creation, and enforces a strict output contract. The V8 garbage collector processes fewer short-lived allocations, shrinking max pause times from ~23ms to ~4ms.
Step 5: Unblock the Event Loop During Serialization
res.json() invokes JSON.stringify() synchronously. For payloads exceeding 100KB, this blocks the event loop, causing request queuing and latency tail inflation.
Rationale: Schema-compiled serializers like fast-json-stringify generate optimized serialization functions at startup. They bypass runtime type checking and produce faster, more predictable output.
// src/serializers/response.serializer.ts
import fastJson from 'fast-json-stringify';
const responseSchema = {
title: 'FinancialSummaryResponse',
type: 'object',
properties: {
count: { type: 'integer' },
data: {
type: 'array',
items: {
type: 'object',
properties: {
tx_id: { type: 'integer' },
org_id: { type: 'string' },
posted_at: { type: 'string' },
net_total: { type: 'number' },
cache_key: { type: 'string' },
entries: {
type: 'array',
items: {
type: 'object',
properties: {
entry_id: { type: 'integer' },
amount: { type: 'number' },
currency: { type: 'string' },
category: { type: 'string' },
},
},
},
},
},
},
},
};
export const stringifyResponse = fastJson(responseSchema);
Route implementation:
// src/routes/financial.routes.ts
import { Router, Request, Response } from 'express';
import { LedgerService } from '../services/ledger.service';
import { mapTransactionRow } from '../mappers/transaction.mapper';
import { stringifyResponse } from '../serializers/response.serializer';
const router = Router();
const ledger = new LedgerService(/* pool instance */);
router.get('/api/financial/summary', async (req: Request, res: Response) => {
const { org_id, start, end } = req.query;
const rows = await ledger.getSummary(org_id as string, start as string, end as string);
const data = rows.map(mapTransactionRow);
res.setHeader('Content-Type', 'application/json');
res.end(stringifyResponse({ data, count: data.length }));
});
export default router;
Why this works: Pre-compiled serialization removes runtime reflection, reduces CPU overhead, and prevents event loop blocking. The response is written directly to the socket stream, maintaining concurrency under load.
Pitfall Guide
| Pitfall | Explanation | Fix |
|---|
| Optimizing Without a Baseline | Teams apply fixes based on intuition rather than measurement, often degrading performance or masking real bottlenecks. | Always capture autocannon or k6 metrics before and after every change. Store JSON outputs for regression tracking. |
| Using Cryptographic Hashes for Cache Keys | SHA-256/SHA-512 are designed for security, not speed. They consume CPU cycles and increase allocation pressure in hot paths. | Replace with deterministic string composition or non-cryptographic hashes (e.g., xxhash, murmurhash) when uniqueness, not security, is required. |
| Object Spreading in Hot Loops | {...obj} creates shallow copies, triggers hidden class deoptimization, and floods the V8 new-space heap. | Use explicit field mapping or Object.assign with predefined shapes. Define strict TypeScript interfaces to enforce structure. |
| Ignoring Connection Pool Exhaustion | Fixing N+1 queries without adjusting pool size can cause connection starvation under high concurrency. | Configure pg.Pool with max aligned to expected concurrency. Monitor pool.waitingCount and pool.totalCount in production. |
| Synchronous JSON.stringify on Large Payloads | Serializing >100KB objects blocks the event loop, causing request queuing and p99 inflation. | Use schema-compiled serializers (fast-json-stringify, runtypes + fast-json) or stream responses with JSONStream. |
| Profiling in Development Mode | Running --prof or Clinic.js without production flags (--optimize_for_size, --max_old_space_size) yields inaccurate CPU/memory profiles. | Profile with NODE_ENV=production and match heap limits to deployment configuration. Use clinic flame and heapprofiler in staging. |
| Chasing p95 Instead of p99 | p95 masks tail latency caused by GC pauses, serialization blocking, or connection pool waits. | Optimize for p99 and p99.9. These metrics reflect actual user experience degradation and reveal systemic bottlenecks. |
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Low concurrency (<20 req/s), simple payloads | Native JSON.stringify + standard Express routing | Overhead of compiled serializers outweighs benefits at low scale | Minimal infrastructure cost |
| Medium concurrency (20-100 req/s), nested data | Single SQL aggregation + explicit mapping | Reduces I/O round-trips and heap allocation without external dependencies | Moderate DB compute cost |
| High concurrency (>100 req/s), large payloads | Schema-compiled JSON + connection pool tuning + deterministic cache keys | Prevents event loop blocking and GC pauses under sustained load | Higher initial dev time, lower infra scaling cost |
| Multi-tenant SaaS with variable query complexity | Query plan analysis (EXPLAIN ANALYZE) + read replicas + caching layer | Isolates tenant-specific bottlenecks and prevents cross-tenant latency spikes | Increased architecture complexity, predictable p99 |
Configuration Template
// src/config/database.ts
import { Pool } from 'pg';
export const ledgerPool = new Pool({
host: process.env.DB_HOST || 'localhost',
port: parseInt(process.env.DB_PORT || '5432', 10),
database: process.env.DB_NAME || 'ledger',
user: process.env.DB_USER || 'app_user',
password: process.env.DB_PASS,
max: 20, // Align with expected concurrent connections
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
// src/config/telemetry.ts
export const benchmarkConfig = {
connections: 50,
duration: 10,
method: 'GET',
url: 'http://localhost:3000/api/financial/summary',
headers: { 'Content-Type': 'application/json' },
};
Quick Start Guide
- Initialize the harness: Install
autocannon and clinic globally. Run autocannon -c 50 -d 10 -j http://localhost:3000/api/financial/summary > baseline.json to capture initial metrics.
- Apply I/O optimization: Replace sequential queries with a single
JOIN + json_agg statement. Verify query execution time with EXPLAIN ANALYZE.
- Refactor CPU & memory paths: Swap cryptographic hashing for deterministic string keys. Replace object spreading with explicit TypeScript interfaces and field mapping.
- Compile serialization: Install
fast-json-stringify, define the response schema, and replace res.json() with res.end(stringifyResponse(payload)).
- Validate deltas: Run the benchmark again, diff the JSON outputs, and confirm p99 latency reduction and throughput increase. Commit changes only when metrics improve across all three dimensions.