FROM node:20-alpine AS runner
WORKDIR /app
COPY --from=builder /build/.next ./.next
COPY --from=builder /build/node_modules ./node_modules
COPY --from=builder /build/package.json ./
EXPOSE 3000
CMD ["node", "node_modules/.next/standalone/server.js"]
The multi-stage build reduces image size and ensures only production artifacts reach the runtime. The health endpoint is implemented as a lightweight API route:
```typescript
// src/app/api/system/status/route.ts
import { NextResponse } from 'next/server';
export async function GET() {
return NextResponse.json({
status: 'operational',
timestamp: new Date().toISOString(),
version: process.env.APP_VERSION || 'unknown'
}, { status: 200 });
}
Step 2: Define Dual Runtime Environments
Docker Compose manages both environments simultaneously. Each environment runs on a distinct host port to prevent binding conflicts.
# docker-compose.release.yml
services:
runtime_primary:
build:
context: .
dockerfile: Dockerfile.runtime
container_name: env-primary
environment:
- NODE_ENV=production
- PORT=3000
ports:
- "8001:3000"
restart: unless-stopped
runtime_secondary:
build:
context: .
dockerfile: Dockerfile.runtime
container_name: env-secondary
environment:
- NODE_ENV=production
- PORT=3000
ports:
- "8002:3000"
restart: unless-stopped
traffic_proxy:
image: nginx:1.25-alpine
container_name: proxy-director
ports:
- "80:80"
volumes:
- ./nginx/proxy.conf:/etc/nginx/nginx.conf:ro
- ./nginx/upstream_target.conf:/etc/nginx/conf.d/upstream_target.conf:ro
depends_on:
- runtime_primary
- runtime_secondary
The restart: unless-stopped policy ensures environments recover from unexpected crashes without manual intervention. Port mapping isolates each container while exposing them to the host network for proxy routing.
Nginx acts as the traffic director. Instead of hardcoding backend addresses, it reads an upstream definition from a separate configuration file. This separation enables atomic updates without reloading the entire proxy configuration.
# nginx/proxy.conf
events {
worker_connections 1024;
}
http {
include /etc/nginx/conf.d/upstream_target.conf;
server {
listen 80;
server_name _;
location / {
proxy_pass http://live_backend;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_connect_timeout 5s;
proxy_read_timeout 30s;
}
}
}
The upstream target file defines which environment receives traffic:
# nginx/upstream_target.conf
upstream live_backend {
server host.docker.internal:8001;
}
Initially, traffic routes to port 8001 (primary). The secondary environment remains idle until the deployment script promotes it.
Step 4: Orchestrate the Release Cycle
The deployment script handles environment selection, artifact building, health validation, and traffic switching. It maintains state in a lightweight environment file.
#!/usr/bin/env bash
set -euo pipefail
STATE_FILE="./deployment_state.env"
PROXY_CONTAINER="proxy-director"
MAX_RETRIES=15
RETRY_INTERVAL=3
# Load current state
if [[ -f "$STATE_FILE" ]]; then
source "$STATE_FILE"
else
ACTIVE_SLOT="primary"
fi
# Determine target environment
if [[ "$ACTIVE_SLOT" == "primary" ]]; then
TARGET_SLOT="secondary"
TARGET_PORT=8002
TARGET_SERVICE="runtime_secondary"
else
TARGET_SLOT="primary"
TARGET_PORT=8001
TARGET_SERVICE="runtime_primary"
fi
echo "▶ Active slot: $ACTIVE_SLOT | Target slot: $TARGET_SLOT"
# Build and start target environment
echo "▶ Building $TARGET_SERVICE..."
docker compose -f docker-compose.release.yml build "$TARGET_SERVICE"
docker compose -f docker-compose.release.yml up -d --no-deps "$TARGET_SERVICE"
# Health verification loop
echo "▶ Verifying health on port $TARGET_PORT..."
HEALTHY=false
for attempt in $(seq 1 "$MAX_RETRIES"); do
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
"http://localhost:${TARGET_PORT}/api/system/status" || true)
if [[ "$HTTP_CODE" == "200" ]]; then
HEALTHY=true
echo "▶ Health check passed (attempt $attempt)"
break
fi
echo "▶ Retry $attempt/$MAX_RETRIES (status: $HTTP_CODE)"
sleep "$RETRY_INTERVAL"
done
if [[ "$HEALTHY" != "true" ]]; then
echo "✗ Health verification failed. Aborting switch."
exit 1
fi
# Atomic traffic switch
echo "▶ Routing traffic to port $TARGET_PORT..."
cat > nginx/upstream_target.conf <<EOF
upstream live_backend {
server host.docker.internal:${TARGET_PORT};
}
EOF
docker exec "$PROXY_CONTAINER" nginx -s reload
# Persist state
echo "ACTIVE_SLOT=$TARGET_SLOT" > "$STATE_FILE"
echo "✓ Deployment complete. Active slot: $TARGET_SLOT"
Key architectural decisions:
--no-deps prevents Docker Compose from restarting the active environment during the build phase.
- The health check loop implements exponential backoff logic implicitly through fixed retries, preventing premature switches.
- State persistence uses a simple environment file, avoiding external dependencies while maintaining idempotency.
- Nginx reload is graceful; existing connections drain while new requests route to the updated upstream.
Pitfall Guide
1. Skipping Pre-Switch Health Validation
Explanation: Switching traffic before the new environment finishes initialization causes immediate 502 errors. Containers may report "running" while the application is still compiling assets or establishing database connections.
Fix: Always verify the health endpoint returns 200 with expected payload structure. Add a minimum startup delay if the framework requires warm-up time.
2. Ignoring Connection Draining
Explanation: Nginx reloads terminate active connections abruptly if not configured properly. Users mid-transaction experience dropped sessions.
Fix: Configure proxy_read_timeout and keepalive_timeout appropriately. Use nginx -s reload instead of restart to allow graceful connection handoff.
3. State File Race Conditions
Explanation: Concurrent deployment triggers can corrupt the state file, causing both environments to receive traffic or neither to activate.
Fix: Wrap state updates in atomic file operations. Use mv instead of direct writes, or implement file locking with flock.
4. Database Schema Incompatibility
Explanation: Blue-green assumes identical environments, but database migrations often break this assumption. New code may reference columns that don't exist in the old environment, or vice versa.
Fix: Implement backward-compatible migrations. Deploy schema changes first, then application code. Never couple destructive DDL with feature releases.
5. Resource Starvation on Dual Instances
Explanation: Running two full environments doubles memory and CPU consumption. On constrained hosts, the OOM killer may terminate the active environment during the build phase.
Fix: Set explicit resource limits in Docker Compose (mem_limit, cpus). Monitor host capacity before enabling blue-green. Consider swapping to rolling deployments if infrastructure is tight.
6. Nginx Cache Staleness
Explanation: If Nginx caches responses, switching backends may serve stale content from the previous environment until cache TTL expires.
Fix: Disable caching for dynamic routes or implement cache purging as part of the deployment script. Use proxy_cache_bypass headers for critical endpoints.
7. Missing Rollback Automation
Explanation: Operators assume they can manually revert, but panic-driven manual switches introduce human error and delay.
Fix: Build a rollback.sh script that mirrors the deployment logic but targets the previous slot. Integrate it into CI/CD pipelines with one-click execution.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| High-traffic SaaS platform | Blue-Green | Zero downtime, instant rollback, predictable user experience | +100% compute during transition |
| Internal tooling / low traffic | Rolling Update | Simpler infrastructure, lower resource overhead | Baseline compute |
| ML model serving / canary testing | Canary Release | Gradual traffic shifting enables performance validation | +10-30% compute |
| Stateful workloads with local storage | Blue-Green + Volume Sync | Prevents data loss during environment swap | +100% compute + storage sync overhead |
Configuration Template
# docker-compose.release.yml
services:
runtime_primary:
build: .
container_name: env-primary
ports: ["8001:3000"]
restart: unless-stopped
deploy:
resources:
limits: { cpus: '1.0', memory: 1G }
runtime_secondary:
build: .
container_name: env-secondary
ports: ["8002:3000"]
restart: unless-stopped
deploy:
resources:
limits: { cpus: '1.0', memory: 1G }
traffic_proxy:
image: nginx:1.25-alpine
container_name: proxy-director
ports: ["80:80"]
volumes:
- ./nginx/proxy.conf:/etc/nginx/nginx.conf:ro
- ./nginx/upstream_target.conf:/etc/nginx/conf.d/upstream_target.conf:ro
# nginx/upstream_target.conf
upstream live_backend {
server host.docker.internal:8001;
keepalive 32;
}
#!/usr/bin/env bash
# rollback.sh
set -euo pipefail
STATE_FILE="./deployment_state.env"
source "$STATE_FILE"
if [[ "$ACTIVE_SLOT" == "primary" ]]; then
TARGET_PORT=8002
else
TARGET_PORT=8001
fi
cat > nginx/upstream_target.conf <<EOF
upstream live_backend {
server host.docker.internal:${TARGET_PORT};
}
EOF
docker exec proxy-director nginx -s reload
echo "ACTIVE_SLOT=$ACTIVE_SLOT" > "$STATE_FILE"
echo "✓ Rollback complete. Traffic routed to port $TARGET_PORT"
Quick Start Guide
- Initialize the repository structure: Create
nginx/, src/app/api/system/status/, and place the Dockerfile and compose file at the root.
- Start the baseline environment: Run
docker compose -f docker-compose.release.yml up -d to spin up both runtimes and the proxy.
- Verify initial routing: Access
http://localhost and confirm traffic reaches port 8001. Test the health endpoint at /api/system/status.
- Execute first deployment: Run
./deploy.sh. Observe the script build the secondary environment, validate health, update the upstream config, and reload Nginx. Refresh the browser to confirm zero interruption.
- Validate rollback: Run
./rollback.sh and verify traffic instantly reverts to the previous environment without service degradation.