earch' | 'paid_social' | 'referral' | 'email' | 'direct';
export interface UTMParams {
source: string;
medium: string;
campaign: string;
term?: string;
content?: string;
}
export interface AcquisitionEventV1 {
schema_version: '1.0.0';
event_id: string; // UUID for idempotency
timestamp: ISO8601;
// User Context
user_id: string | null; // Null for anonymous sessions
session_id: string;
device_id: string;
// Acquisition Context
channel: AcquisitionChannel;
utm: UTMParams | null;
click_id: string | null; // e.g., gclid, fbclid (hashed)
referrer: string | null;
// Consent & Compliance
consent_status: 'granted' | 'denied' | 'unknown';
is_first_party: boolean;
// Conversion Signal (Optional, for funnel tracking)
conversion_type?: 'signup' | 'purchase' | 'trial_start';
revenue?: number;
currency?: string;
}
#### 2. Ingestion Architecture
Implement a server-side ingestion endpoint that normalizes raw requests into the schema. This prevents schema drift at the source and ensures consent checks occur before data persistence.
```typescript
// services/acquisition-ingestion.ts
import { AcquisitionEventV1 } from './schemas/acquisition-event';
export class AcquisitionIngestionService {
constructor(
private eventStore: EventStore,
private consentManager: ConsentManager
) {}
async ingest(rawEvent: any): Promise<void> {
// 1. Validate against schema
const event = this.normalize(rawEvent);
// 2. Enforce consent
if (event.consent_status === 'denied' && !event.is_first_party) {
return; // Drop non-essential acquisition data
}
// 3. Hash sensitive identifiers
event.click_id = this.hashIdentifier(event.click_id);
// 4. Store with idempotency key
await this.eventStore.upsert(event.event_id, event);
}
private normalize(raw: any): AcquisitionEventV1 {
// Mapping logic, type coercion, default values
// ...
}
private hashIdentifier(id: string | null): string | null {
if (!id) return null;
// SHA-256 hash with salt to protect PII while allowing matching
return crypto.createHash('sha256').update(id + SALT).digest('hex');
}
}
3. Attribution Engine
Build a configurable attribution engine that processes events into user profiles. Support multiple attribution models (Last-Click, First-Click, Linear, Time-Decay) via configuration.
// services/attribution-engine.ts
export type AttributionModel = 'last_click' | 'first_click' | 'linear' | 'time_decay';
export interface AttributionConfig {
model: AttributionModel;
lookback_window_days: number;
cross_device_stitching: boolean;
}
export interface AttributionResult {
user_id: string;
conversion_event_id: string;
credited_touchpoints: string[];
model_used: AttributionModel;
}
export class AttributionEngine {
constructor(private config: AttributionConfig) {}
calculateAttribution(
touchpoints: AcquisitionEventV1[],
conversion: AcquisitionEventV1
): AttributionResult {
// Filter touchpoints within lookback window
const windowMs = this.config.lookback_window_days * 24 * 60 * 60 * 1000;
const validTouchpoints = touchpoints.filter(tp =>
(conversion.timestamp.getTime() - tp.timestamp.getTime()) <= windowMs
);
let creditedIds: string[] = [];
switch (this.config.model) {
case 'last_click':
creditedIds = [validTouchpoints[validTouchpoints.length - 1]?.event_id].filter(Boolean);
break;
case 'first_click':
creditedIds = [validTouchpoints[0]?.event_id].filter(Boolean);
break;
case 'linear':
creditedIds = validTouchpoints.map(tp => tp.event_id);
break;
case 'time_decay':
// Assign weight based on proximity to conversion
creditedIds = validTouchpoints
.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime())
.map(tp => tp.event_id);
break;
}
return {
user_id: conversion.user_id || 'anonymous',
conversion_event_id: conversion.event_id,
credited_touchpoints: creditedIds,
model_used: this.config.model
};
}
}
4. Metric Calculation Pipeline
Aggregate attributed data to calculate CAC, LTV, and Conversion Rates. Use an OLAP database (e.g., ClickHouse, BigQuery) for efficient aggregation.
-- BigQuery SQL Example: Daily CAC Calculation
SELECT
date,
channel,
attribution_model,
SUM(spend_amount) AS total_spend,
COUNT(DISTINCT conversion_event_id) AS acquired_users,
SAFE_DIVIDE(SUM(spend_amount), COUNT(DISTINCT conversion_event_id)) AS cac
FROM (
SELECT
DATE(conversion.timestamp) AS date,
tp.channel AS channel,
attr.model_used AS attribution_model,
conversion.event_id AS conversion_event_id,
spend.spend_amount AS spend_amount
FROM `project.dataset.attributions` attr
JOIN `project.dataset.conversions` conversion
ON attr.conversion_event_id = conversion.event_id
JOIN `project.dataset.spend_data` spend
ON DATE(spend.timestamp) = DATE(conversion.timestamp)
AND spend.channel = tp.channel -- Join logic depends on spend granularity
WHERE attr.model_used = @CURRENT_MODEL
)
GROUP BY date, channel, attribution_model
Architecture Decisions and Rationale
- Server-Side Ingestion: Rationale: Eliminates ad-blocker impact and ensures consent compliance is enforced at the point of entry. Client-side events should only be used for immediate UI feedback, not metric calculation.
- Event-Driven Attribution: Rationale: Attribution should be calculated asynchronously in a stream processor (e.g., Flink, Kafka Streams) rather than synchronously during user interaction. This decouples latency from attribution complexity.
- Configurable Attribution Windows: Rationale: Hardcoding attribution windows in code requires deployments to adjust business logic. Store windows and models in a configuration service (e.g., Feature Flags, Database) to allow non-engineers to adjust parameters.
- Hashed Click IDs: Rationale: Raw click IDs can contain user-identifiable information. Hashing with a salt preserves the ability to match touchpoints to conversions while mitigating privacy risks.
Pitfall Guide
1. Client-Side Attribution Reliance
Mistake: Relying on browser cookies or pixels for attribution.
Explanation: Ad-blockers, ITP (Intelligent Tracking Prevention), and user consent banners strip or block client-side data. This leads to systematic under-reporting of acquisition efficiency, particularly in high-value tech demographics.
Best Practice: Implement server-side event forwarding. Use the backend to capture utm parameters and click_ids from redirects or API calls.
2. Ignoring Consent State in Acquisition Data
Mistake: Storing acquisition touchpoints for users who have denied marketing tracking.
Explanation: This violates GDPR/CCPA requirements. Even if the data is anonymized, linking a touchpoint to a conversion without consent can be considered processing personal data for marketing purposes.
Best Practice: Tag every event with consent_status. Filter acquisition calculations to only include events where consent is granted or where the processing is based on legitimate interest (and documented).
3. Hardcoding Attribution Models
Mistake: Embedding attribution logic (e.g., Last-Click) directly in application code.
Explanation: Marketing strategies evolve. Changing the attribution model requires a code deployment, slowing down optimization cycles and increasing risk.
Best Practice: Externalize attribution configuration. Use a configuration file or database table to define the active model and lookback windows. The attribution engine should read this config at runtime.
4. Cross-Device Stitching Failures
Mistake: Treating anonymous sessions and authenticated users as separate entities without a stitching mechanism.
Explanation: A user may click an ad on mobile (anonymous) and convert on desktop (authenticated). Without stitching, the acquisition channel is lost, and CAC is inflated as the conversion is attributed to "Direct."
Best Practice: Implement a user resolution service that links device_id to user_id upon login. Backfill attribution touchpoints to the authenticated user profile based on the linked device history.
5. Schema Drift in Event Properties
Mistake: Adding new properties to acquisition events without versioning or backward compatibility.
Explanation: Downstream dashboards and attribution engines may crash or produce incorrect metrics when encountering unexpected fields or type changes.
Best Practice: Enforce schema versioning (AcquisitionEventV1). Use a schema registry to validate incoming events. Maintain backward compatibility by making new fields optional and providing default values.
6. Calculating LTV Without Churn Adjustment
Mistake: Computing LTV as ARPU * Lifetime without accounting for churn probability.
Explanation: This overestimates value for cohorts with high early churn. Acquisition metrics become misleading, encouraging spend on channels that bring low-retention users.
Best Practice: Use cohort-based LTV calculation. Track revenue over time for each acquisition cohort and apply a discount rate. Implement a survival analysis model to predict remaining lifetime based on user behavior signals.
7. Missing Idempotency in Event Ingestion
Mistake: Processing duplicate events due to network retries or SDK buffering.
Explanation: Duplicate acquisition events inflate conversion counts and skew attribution, leading to artificially low CAC.
Best Practice: Require a unique event_id in every acquisition event. Use upsert operations with the event_id as the key in the event store to ensure exactly-once processing semantics.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Early-Stage Startup | Client-Side SDK + Last-Click | Speed to market; low engineering overhead; sufficient for initial validation. | Low |
| Scale-Up / Privacy-First | Server-Side Ingestion + Configurable Attribution | High data fidelity; compliance-ready; supports multi-touch analysis. | Medium |
| Enterprise / Multi-Channel | Unified Event Stream + Real-Time Attribution | Handles volume; enables dynamic budget pacing; cross-device accuracy. | High |
| Strict GDPR/CCPA Region | Consent-Linked Server-Side + Hashed IDs | Minimizes legal risk; relies on first-party data; avoids third-party cookies. | Medium |
| Real-Time Optimization Required | Stream Processing (Flink/Kafka) | Low latency attribution allows immediate bid adjustments and fraud detection. | High |
Configuration Template
// config/attribution-config.json
{
"version": "1.0.0",
"active_model": "time_decay",
"lookback_window_days": 30,
"cross_device_stitching": true,
"channels": [
{
"id": "paid_search",
"utm_mediums": ["cpc", "ppc"],
"weight_override": 1.0
},
{
"id": "organic",
"utm_mediums": ["organic"],
"weight_override": 0.0
}
],
"consent_rules": {
"require_marketing_consent": true,
"fallback_to_first_party": true
},
"spend_integration": {
"enabled": true,
"sync_frequency_hours": 6,
"source": "google_ads, meta_ads"
}
}
Quick Start Guide
- Initialize Schema: Copy the
AcquisitionEventV1 interface into your shared types package. Run npm run generate:types to create validation schemas.
- Deploy Ingestion Endpoint: Add the
AcquisitionIngestionService to your backend. Expose /api/v1/events/acquisition endpoint. Ensure it validates against the schema and checks consent.
- Configure Attribution: Create
attribution-config.json in your config repository. Set active_model to last_click for initial testing. Deploy the AttributionEngine.
- Validate Events: Send a test event using
curl or Postman with utm_source=google. Verify the event is stored in the database with hashed click_id and correct consent status.
- Query Metrics: Run the CAC SQL query against your data warehouse. Confirm that the test conversion is attributed to
google and CAC is calculated correctly based on injected spend data.