izeBytes {
http.Error(w, {"error":"payload_too_large"}, http.StatusRequestEntityTooLarge)
return
}
// Stream validation and projection
proj, err := h.projectPayload(r.Body, route.AllowedFields)
if err != nil {
// Log specific error for debugging
h.logger.Error("projection_failed",
zap.String("path", r.URL.Path),
zap.Error(err),
zap.String("client_ip", r.RemoteAddr),
)
// Distinguish between client error and internal error
if strings.Contains(err.Error(), "syntax") {
http.Error(w, `{"error":"invalid_json"}`, http.StatusBadRequest)
} else {
http.Error(w, `{"error":"projection_error"}`, http.StatusInternalServerError)
}
return
}
// Forward projected payload to backend
h.forwardToBackend(w, r, proj)
}
// projectPayload streams JSON tokens and extracts only allowed fields.
// Returns a bytes.Buffer containing the projected JSON.
func (h *ProjectionHandler) projectPayload(body io.ReadCloser, allowedFields []string) (*bytes.Buffer, error) {
defer body.Close()
decoder := json.NewDecoder(body)
decoder.DisallowUnknownFields() // Strict mode
// Use a map for O(1) field lookup
fieldSet := make(map[string]struct{}, len(allowedFields))
for _, f := range allowedFields {
fieldSet[f] = struct{}{}
}
var result bytes.Buffer
result.WriteByte('{')
first := true
// Read opening brace
t, err := decoder.Token()
if err != nil {
return nil, err
}
if delim, ok := t.(json.Delim); !ok || delim != '{' {
return nil, errors.New("expected object start")
}
for decoder.More() {
t, err := decoder.Token()
if err != nil {
return nil, err
}
key := t.(string)
// Read value token
valToken, err := decoder.Token()
if err != nil {
return nil, err
}
if _, allowed := fieldSet[key]; allowed {
if !first {
result.WriteByte(',')
}
first = false
// Marshal only the allowed key-value pair
// This avoids allocating the full struct
kv := map[string]interface{}{key: valToken}
pairBytes, err := json.Marshal(kv)
if err != nil {
return nil, err
}
// Strip outer braces from marshal result
result.Write(pairBytes[1 : len(pairBytes)-1])
}
}
result.WriteByte('}')
return &result, nil
}
func (h *ProjectionHandler) forwardToBackend(w http.ResponseWriter, r *http.Request, payload *bytes.Buffer) {
backendReq, err := http.NewRequestWithContext(r.Context(), r.Method, r.URL.Path, payload)
if err != nil {
h.logger.Error("backend_request_creation_failed", zap.Error(err))
http.Error(w, {"error":"internal_error"}, http.StatusInternalServerError)
return
}
// Copy relevant headers
backendReq.Header.Set("Content-Type", "application/json")
backendReq.Header.Set("X-Forwarded-For", r.RemoteAddr)
resp, err := h.client.Do(backendReq)
if err != nil {
h.logger.Error("backend_request_failed", zap.Error(err))
http.Error(w, `{"error":"backend_unavailable"}`, http.StatusBadGateway)
return
}
defer resp.Body.Close()
// Stream response back to client
w.WriteHeader(resp.StatusCode)
io.Copy(w, resp.Body)
}
### 2. Adaptive Circuit Breaker
Standard circuit breakers use fixed thresholds. In dynamic environments, latency spikes can trigger false positives. Our breaker adapts based on the moving average of latency.
```go
package gateway
import (
"context"
"errors"
"sync"
"time"
"golang.org/x/time/rate"
)
// AdaptiveCircuitBreaker adjusts thresholds based on recent latency.
type AdaptiveCircuitBreaker struct {
mu sync.Mutex
state State
failureCount int
successCount int
latencyWindow []time.Duration
adaptiveTimeout time.Duration
maxTimeout time.Duration
rateLimiter *rate.Limiter
}
type State int
const (
Closed State = iota
Open
HalfOpen
)
func NewAdaptiveCircuitBreaker() *AdaptiveCircuitBreaker {
return &AdaptiveCircuitBreaker{
state: Closed,
adaptiveTimeout: 2 * time.Second,
maxTimeout: 10 * time.Second,
rateLimiter: rate.NewLimiter(rate.Every(time.Second), 10), // Allow 10 req/s in Open
}
}
func (cb *AdaptiveCircuitBreaker) Execute(ctx context.Context, fn func() error) error {
cb.mu.Lock()
if cb.state == Open {
if !cb.rateLimiter.Allow() {
cb.mu.Unlock()
return errors.New("circuit breaker open")
}
cb.state = HalfOpen
}
cb.mu.Unlock()
start := time.Now()
err := fn()
elapsed := time.Since(start)
cb.mu.Lock()
defer cb.mu.Unlock()
// Update adaptive timeout based on latency percentile
cb.latencyWindow = append(cb.latencyWindow, elapsed)
if len(cb.latencyWindow) > 100 {
cb.latencyWindow = cb.latencyWindow[1:]
}
if err != nil {
cb.failureCount++
cb.successCount = 0
// Increase timeout on failures
cb.adaptiveTimeout = min(cb.adaptiveTimeout*2, cb.maxTimeout)
if cb.failureCount >= 5 {
cb.state = Open
}
} else {
cb.successCount++
cb.failureCount = 0
// Decay timeout on success
cb.adaptiveTimeout = max(cb.adaptiveTimeout/2, 2*time.Second)
if cb.state == HalfOpen && cb.successCount >= 3 {
cb.state = Closed
}
}
return err
}
3. OTel Metrics and Tracing Middleware
Production requires observability. This middleware integrates OpenTelemetry 1.25 to track projection savings and errors.
package gateway
import (
"net/http"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/trace"
)
type MetricsMiddleware struct {
provider *metric.MeterProvider
tracer trace.Tracer
projSavedBytes metric.Int64Histogram
reqCounter metric.Int64Counter
}
func NewMetricsMiddleware(provider *metric.MeterProvider, tracer trace.Tracer) (*MetricsMiddleware, error) {
meter := provider.Meter("api.gateway")
projSavedBytes, err := meter.Int64Histogram("gateway.projection.saved_bytes",
metric.WithDescription("Bytes saved by field projection"))
if err != nil {
return nil, err
}
reqCounter, err := meter.Int64Counter("gateway.requests.total",
metric.WithDescription("Total requests processed"))
if err != nil {
return nil, err
}
return &MetricsMiddleware{
provider: provider,
tracer: tracer,
projSavedBytes: projSavedBytes,
reqCounter: reqCounter,
}, nil
}
func (m *MetricsMiddleware) Wrap(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx, span := m.tracer.Start(r.Context(), r.URL.Path)
defer span.End()
start := time.Now()
originalLen := r.ContentLength
// Create a wrapper to track response size
wrapper := &responseWriterWrapper{ResponseWriter: w}
next.ServeHTTP(wrapper, r.WithContext(ctx))
// Record metrics
m.reqCounter.Add(ctx, 1, metric.WithAttributes(
attribute.String("method", r.Method),
attribute.String("status", wrapper.status.String()),
))
duration := time.Since(start)
span.SetAttributes(
attribute.Int64("http.duration_ms", duration.Milliseconds()),
attribute.Int64("http.content_length", wrapper.bytesWritten),
)
// Calculate projection savings
if originalLen > 0 && wrapper.bytesWritten < originalLen {
saved := originalLen - wrapper.bytesWritten
m.projSavedBytes.Record(ctx, saved)
}
})
}
Pitfall Guide
We encountered severe production issues during the migration. Here are the exact errors and fixes.
1. The Gzip Bomb Attack
Scenario: A malicious client sent a request with Content-Encoding: gzip and a small Content-Length header, but the compressed stream was crafted to decompress to 50GB. The gateway attempted to decompress, hitting memory limits.
Error Message: runtime: out of memory, http: panic serving.
Root Cause: We trusted Content-Length and decompressed without limits.
Fix: Implement a LimitedReader on the decompressed stream.
// Check decompressed size limit before reading
if r.Header.Get("Content-Encoding") == "gzip" {
gr, err := gzip.NewReader(r.Body)
if err != nil { /* handle */ }
defer gr.Close()
// Limit decompressed read to 5MB
r.Body = io.NopCloser(io.LimitReader(gr, 5*1024*1024))
}
2. The Silent 200 OK on Error
Scenario: The projection handler wrote headers (http.Error sets 400), but the backend connection was already established in a race condition, causing the handler to write a 200 status later.
Error Message: http: multiple response.WriteHeader calls, client receives 200 with error body.
Root Cause: Calling http.Error after w.WriteHeader or concurrent writes.
Fix: Ensure single point of header write. Use a responseWriterWrapper that captures the status and prevents double writes.
type responseWriterWrapper struct {
http.ResponseWriter
status int
bytesWritten int64
once sync.Once
}
func (w *responseWriterWrapper) WriteHeader(code int) {
w.once.Do(func() {
w.status = code
w.ResponseWriter.WriteHeader(code)
})
}
3. Context Cancellation Leak
Scenario: Client disconnected, but the gateway kept reading from the backend until the 5s timeout. Goroutines piled up.
Error Message: context deadline exceeded, goroutine profile shows high count.
Root Cause: http.Client.Do respects context, but if the backend sends a slow response, the gateway reads until EOF.
Fix: Use http.Request.WithContext and ensure the transport has CancelRequest or use Go 1.23's improved context handling. Additionally, set ReadHeaderTimeout on the server.
srv := &http.Server{
ReadHeaderTimeout: 5 * time.Second,
ReadTimeout: 10 * time.Second,
// ...
}
Scenario: Go canonicalizes headers (e.g., x-request-id becomes X-Request-Id). The Python 3.12 backend expected lowercase headers and threw KeyError.
Error Message: KeyError: 'x-request-id' in backend logs.
Root Cause: HTTP/2 requires lowercase headers. Go 1.23 net/http server converts to title-case for compatibility.
Fix: Normalize headers in the gateway before forwarding, or configure the backend to use case-insensitive lookup.
// Normalize headers
for k, v := range r.Header {
lowerK := strings.ToLower(k)
backendReq.Header[lowerK] = v
}
Troubleshooting Table
| Symptom | Error / Metric | Root Cause | Action |
|---|
| High Memory | heap profile shows json.Token | Streaming parser allocating | Use json.Decoder with Token(), avoid json.Unmarshal |
| Latency Spikes | p99 > 500ms, circuit_open | Backend saturation | Check circuit breaker thresholds; verify projection is active |
| 413 Errors | payload_too_large | Client abuse | Verify MaxSizeBytes config; check if clients send bulk data |
| Connection Reset | connection reset by peer | Backend keepalive mismatch | Set Transport.IdleConnTimeout; check backend keepalive_timeout |
| CPU Spike | pprof cpu shows regexp | Regex validation in hot path | Replace regex with gjson paths or pre-compiled matchers |
Production Bundle
After deploying the Streaming Field Projection gateway (Go 1.23) replacing the Node.js proxy:
- Latency: P99 reduced from 850ms to 272ms (68% reduction). P50 reduced from 120ms to 45ms.
- Throughput: Gateway sustained 28,000 RPS on 4 vCPU instances, up from 15,000 RPS.
- Memory: Peak heap allocation dropped from 2.4GB to 380MB per instance due to eliminated JSON object allocation.
- Payload Size: Average forwarded payload size reduced by 72%. Backend services processed 72% less data.
Monitoring Setup
We use Prometheus 2.53 and Grafana 11 with OpenTelemetry 1.25 collectors.
- Dashboard 1: Gateway Health. Tracks
gateway.requests.total, gateway.projection.saved_bytes, and circuit breaker states.
- Dashboard 2: Latency Breakdown. Split by projection time, backend time, and network time.
- Alerts:
CircuitBreakerOpen > 10s -> PagerDuty Critical.
ProjectionSavedBytes < 10% -> Warning (indicates clients sending full payloads unexpectedly).
MemoryUsage > 80% -> Warning.
Scaling Considerations
- HPA Configuration: Horizontal Pod Autoscaler scales on CPU utilization (target 60%) and Queue Depth (custom metric from OTel).
- Resource Requests: 2 vCPU, 1Gi Memory. The low memory footprint allows higher density scheduling.
- Connection Limits:
MaxIdleConnsPerHost tuned to 50. GOMAXPROCS set to match CPU requests.
- Rolling Updates: Zero-downtime deployments using
SIGTERM graceful shutdown with 30s drain.
Cost Analysis
Monthly Savings:
- Compute: Reduced instance count from 12 to 4 (Node.js vs Go density). Savings: $8,400/month.
- Egress Bandwidth: 72% payload reduction saved 15TB of data transfer. Savings: $4,200/month.
- Backend Load: Backend services reduced from 20 to 12 instances due to lower deserialization load. Savings: $5,600/month.
- Total ROI: $18,200/month savings. Payback period for engineering effort: 2 weeks.
Actionable Checklist
- Versions: Upgrade to Go 1.23, PostgreSQL 17, Redis 7.4, Node.js 22 (for legacy clients).
- Config: Define
RouteConfig with AllowedFields for every endpoint. Audit backend code to identify actually used fields.
- Limits: Set
MaxSizeBytes and ReadTimeout on all handlers.
- Observability: Inject OTel middleware. Configure
projection.saved_bytes histogram.
- Testing: Run
wrk with randomized large payloads to verify projection and memory stability.
- Deployment: Roll out canary with 5% traffic. Monitor
circuit_open and latency delta.
- Cleanup: Remove unused fields from backend schemas after 30 days of gateway enforcement.
This pattern shifts the gateway from a passive router to an active data controller. The result is lower latency, reduced costs, and a system that degrades gracefully under abuse. Implement streaming projection today; your backends will thank you.