mimeType: string;
size: number;
checksum: string;
};
products: Map<ProductId, ProductAssetConfig>;
metadata: Record<string, unknown>; // Searchable tags
createdAt: Date;
updatedAt: Date;
}
interface ProductAssetConfig {
allowedTransforms: TransformRule[];
accessPolicy: PolicyDocument;
retention: RetentionPolicy;
}
interface TransformRule {
id: string;
format: 'JPEG' | 'WEBP' | 'PNG' | 'MP4';
dimensions?: { width: number; height: number };
quality: number;
watermark?: boolean;
}
**2. Implement the Ingestion Gateway**
The gateway handles uploads, generates the initial metadata entry, and triggers the transformation graph.
```typescript
class AssetIngestionService {
constructor(
private storage: ObjectStorage,
private metadataRepo: MetadataRepository,
private transformQueue: TransformQueue
) {}
async ingestAsset(
productId: string,
file: Buffer,
metadata: Record<string, unknown>
): Promise<AssetMatrixEntry> {
const assetId = generateId();
const version = 1;
const originalKey = `originals/${assetId}/v${version}`;
// 1. Write to immutable storage
await this.storage.put(originalKey, file, {
contentType: detectMimeType(file),
checksum: sha256(file)
});
// 2. Create matrix entry
const entry: AssetMatrixEntry = {
id: assetId,
version,
status: 'ACTIVE',
original: {
key: originalKey,
mimeType: detectMimeType(file),
size: file.length,
checksum: sha256(file)
},
products: new Map([[productId, {
allowedTransforms: getDefaultRules(productId),
accessPolicy: { allow: ['read'] },
retention: { days: 365 }
}]]),
metadata,
createdAt: new Date(),
updatedAt: new Date()
};
// 3. Persist metadata transactionally
await this.metadataRepo.put(entry);
// 4. Trigger transformation graph
await this.transformQueue.publish({
type: 'ASSET_INGESTED',
payload: { assetId, version, productId }
});
return entry;
}
}
3. Build the Transformation Graph
Transformations must be idempotent and shared. The graph resolves dependencies to avoid duplicate work.
class TransformationGraph {
async executeTransformGraph(event: AssetIngestedEvent): Promise<void> {
const asset = await this.metadataRepo.get(event.assetId);
const tasks = new Set<string>();
// Collect all required transforms across all products
for (const [pid, config] of asset.products) {
for (const rule of config.allowedTransforms) {
// Create a unique key for the transform to ensure sharing
const taskKey = `${event.assetId}:${rule.format}:${rule.dimensions?.w}:${rule.dimensions?.h}`;
tasks.add(taskKey);
}
}
// Execute unique transforms
const promises = Array.from(tasks).map(async (taskKey) => {
const [aId, format, w, h] = taskKey.split(':');
// Check if transform already exists (idempotency)
const exists = await this.transformCache.check(taskKey);
if (exists) return;
// Execute transform
const result = await this.transformEngine.process(
asset.original.key,
{ format, width: Number(w), height: Number(h) }
);
// Store result and update cache
await this.storage.put(`derivatives/${taskKey}`, result.buffer);
await this.transformCache.set(taskKey, {
key: `derivatives/${taskKey}`,
size: result.size
});
});
await Promise.all(promises);
}
}
4. Resolution and Access Control
Products request assets via the resolution API. The API evaluates the matrix to return the correct derivative and enforces access policies.
class AssetResolutionService {
async resolveAsset(
requesterProductId: string,
assetId: string,
requestedTransform: Partial<TransformRule>
): Promise<ResolvedAsset> {
const asset = await this.metadataRepo.get(assetId);
// 1. Validate product membership
const productConfig = asset.products.get(requesterProductId);
if (!productConfig) {
throw new AccessDeniedError('Product not authorized for asset');
}
// 2. Evaluate Policy
if (!this.policyEngine.evaluate(productConfig.accessPolicy, requesterProductId)) {
throw new AccessDeniedError('Policy violation');
}
// 3. Resolve Transform
const transformKey = this.computeTransformKey(requestedTransform);
const derivative = await this.transformCache.get(transformKey);
if (!derivative) {
// Fallback to on-demand transform or error
throw new TransformNotFoundError('Derivative not ready');
}
// 4. Generate Presigned URL with TTL
const url = await this.storage.presignUrl(derivative.key, {
expiresIn: 3600,
headers: {
'x-product-id': requesterProductId,
'x-asset-id': assetId
}
});
return {
url,
transformKey,
cacheControl: 'public, max-age=86400',
metadata: asset.metadata
};
}
}
Architecture Decisions
- Immutable Originals: Original assets are never overwritten. Updates create new versions. This enables rollback and audit trails across products.
- Shared Transform Cache: Transforms are keyed by content hash and parameters. If Product A requests a 300x300 JPEG and Product B requests the same, the system computes it once. This is the primary driver of cost reduction.
- Policy-as-Code: Access control is defined within the matrix entry. This allows granular permissions (e.g., "Product B can read but not transform") without code changes.
- Event-Driven Pipeline: Separation of ingestion and transformation ensures the upload API remains fast. Transforms happen asynchronously, with a fallback mechanism for critical paths.
Pitfall Guide
1. Storing Binaries in Metadata Database
Mistake: Embedding asset blobs or large metadata payloads in the metadata store.
Impact: Blows up database costs, degrades query latency, and limits scalability of the resolution API.
Best Practice: Metadata store holds only keys, pointers, and small scalars. Binaries reside exclusively in object storage.
2. Hardcoding Product IDs in Asset Paths
Mistake: Using paths like assets/prod-a/avatar.jpg.
Impact: Makes cross-product sharing impossible without complex routing or duplication. Breaks the matrix abstraction.
Best Practice: Use canonical paths (assets/{id}/v{version}/original) and let the resolution layer map product requests to these paths.
3. Ignoring Transform Idempotency
Mistake: Assuming transforms always need to run.
Impact: Wasted compute cycles, increased latency, and potential race conditions where multiple workers generate the same derivative.
Best Practice: Implement a deterministic cache check based on the transform key before execution.
4. Metadata Drift Between Products
Mistake: Allowing Product A to update asset metadata without propagating to the matrix.
Impact: Product B sees stale tags or permissions. Breaks search and consistency.
Best Practice: Metadata updates must go through the central gateway. Use optimistic concurrency control (version vectors) to prevent conflicts.
5. Single Point of Failure in Resolution API
Mistake: Making the resolution API a synchronous bottleneck that calls storage directly for every request.
Impact: High latency, cascading failures during traffic spikes.
Best Practice: Cache resolution results aggressively. Use CDN edge logic to serve presigned URLs. Ensure the API is stateless and horizontally scalable.
6. Unbounded Transformation Costs
Mistake: Allowing arbitrary transform requests without limits.
Impact: A malicious or buggy client can trigger expensive transforms (e.g., 4K video conversion) repeatedly, spiking costs.
Best Practice: Enforce transform quotas per product. Pre-define allowed transform rules in the matrix configuration. Reject requests for non-whitelisted transforms.
7. Neglecting Cold Start Latency
Mistake: Not handling the case where a product requests a transform that hasn't been generated yet.
Impact: User sees broken images or 404s immediately after upload.
Best Practice: Implement a "transform-on-demand" fallback with a placeholder response, or ensure critical transforms are part of the synchronous upload path for latency-sensitive assets.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Low Volume, High Variance | On-Demand Transforms | Reduces storage of unused derivatives; compute is pay-per-use. | Higher compute cost, lower storage cost. |
| High Volume, Standard Formats | Pre-computed Matrix | Maximizes cache hits; minimizes latency; amortizes compute cost. | Lower compute cost, predictable storage cost. |
| Strict Compliance Requirements | Immutable Versioning + Audit | Ensures traceability; prevents accidental overwrites; supports legal holds. | Moderate storage overhead; high operational complexity. |
| Multi-Tenant SaaS | Tenant-Isolated Buckets + Matrix | Prevents data leakage; simplifies tenant deletion; meets data residency. | Higher infrastructure complexity; linear storage scaling. |
Configuration Template
Terraform configuration for the core infrastructure of the Asset Matrix.
# main.tf
resource "aws_s3_bucket" "asset_originals" {
bucket = "${var.environment}-asset-matrix-originals"
versioning {
enabled = true
}
lifecycle_rule {
id = "archive-old-versions"
enabled = true
noncurrent_version_expiration {
days = 90
}
}
}
resource "aws_s3_bucket" "asset_derivatives" {
bucket = "${var.environment}-asset-matrix-derivatives"
lifecycle_rule {
id = "cleanup-orphan-derivatives"
enabled = true
expiration {
days = 365
}
}
}
resource "aws_dynamodb_table" "asset_metadata" {
name = "${var.environment}-asset-matrix-metadata"
billing_mode = "PAY_PER_REQUEST"
hash_key = "id"
range_key = "version"
attribute {
name = "id"
type = "S"
}
attribute {
name = "version"
type = "N"
}
global_secondary_index {
name = "ProductIndex"
hash_key = "productId"
range_key = "updatedAt"
projection_type = "INCLUDE"
non_key_attributes = ["status", "metadata"]
}
}
resource "aws_sqs_queue" "transform_queue" {
name = "${var.environment}-transform-queue"
visibility_timeout_seconds = 300
message_retention_seconds = 1209600
}
resource "aws_lambda_function" "transform_worker" {
function_name = "${var.environment}-transform-worker"
handler = "dist/handler.main"
runtime = "nodejs18.x"
memory_size = 1024
timeout = 300
environment {
variables = {
ORIGINAL_BUCKET = aws_s3_bucket.asset_originals.id
DERIVATIVE_BUCKET = aws_s3_bucket.asset_derivatives.id
METADATA_TABLE = aws_dynamodb_table.asset_metadata.name
}
}
event_source_mapping {
event_source_arn = aws_sqs_queue.transform_queue.arn
function_name = aws_lambda_function.transform_worker.function_name
batch_size = 10
}
}
Quick Start Guide
-
Initialize Infrastructure:
terraform init
terraform apply -var="environment=dev"
This provisions S3 buckets, DynamoDB, SQS, and the Lambda worker.
-
Deploy Services:
npm install
npm run build
npm run deploy:gateway
npm run deploy:resolver
Deploys the Ingestion Gateway and Resolution API to your compute environment.
-
Configure Product Matrix:
Use the CLI to register products and define allowed transforms.
asset-matrix config product add \
--id "prod-web" \
--allowed-transforms "jpeg:400x400,webp:800x600" \
--policy "allow:read"
-
Ingest Test Asset:
curl -X POST http://localhost:3000/api/v1/ingest \
-H "x-product-id: prod-web" \
-F "file=@test-image.png" \
-F "metadata={\"category\":\"avatar\"}"
Returns the assetId and triggers the transform pipeline.
-
Resolve Asset:
curl -X GET "http://localhost:3000/api/v1/resolve?assetId=abc123&transform=jpeg:400x400" \
-H "x-product-id: prod-web"
Returns a presigned URL for the derivative. Verify the URL serves the transformed image.
Cross-product integration requires a shift from file-centric thinking to matrix-centric thinking. By implementing this architecture, you eliminate redundancy, enforce consistency, and provide a robust foundation for multi-product scalability. The initial investment in the matrix abstraction pays immediate dividends in reduced engineering overhead and improved system reliability.