and 'Environment' tags on all resources
import * as cdk from 'aws-cdk-lib';
import { TagManager } from 'aws-cdk-lib';
export class CostAwareStack extends cdk.Stack {
constructor(scope: cdk.App, id: string, props?: cdk.StackProps) {
super(scope, id, props);
// Tag all resources automatically
cdk.Tags.of(this).add('CostCenter', 'engineering-dev');
cdk.Tags.of(this).add('Environment', 'production');
cdk.Tags.of(this).add('Optimization-Enabled', 'true');
}
}
#### 2. Compute Efficiency: Graviton and Spot Integration
AWS Graviton processors offer up to 40% better price-performance for supported workloads. Combine this with Spot Instances for fault-tolerant services.
**Architecture Decision:** Use ECS Fargate Spot for batch processing and stateless APIs that can handle interruptions. Use On-Demand with Graviton for latency-sensitive stateful services.
```typescript
// ecs-spot-graviton-service.ts
import * as ecs from 'aws-cdk-lib/aws-ecs';
import * as ec2 from 'aws-cdk-lib/aws-ec2';
export function createCostOptimizedService(scope: cdk.Construct, id: string, vpc: ec2.Vpc) {
const cluster = new ecs.Cluster(scope, 'OptimizedCluster', { vpc });
// Fargate Service with Spot capacity and Graviton architecture
const service = new ecs.FargateService(scope, 'BatchProcessor', {
cluster,
taskDefinition: new ecs.FargateTaskDefinition(scope, 'TaskDef', {
cpu: 4,
memoryLimitMiB: 8192,
runtimePlatform: {
cpuArchitecture: ecs.CpuArchitecture.ARM64, // Graviton2/3
operatingSystemFamily: ecs.OperatingSystemFamily.LINUX
}
}),
capacityProviderStrategies: [
{ capacityProvider: 'FARGATE_SPOT', weight: 1 },
{ capacityProvider: 'FARGATE', weight: 0 } // No fallback to On-Demand
],
// Enable circuit breaker to handle Spot interruptions gracefully
circuitBreaker: { rollback: true }
});
return service;
}
3. Storage Lifecycle and Intelligent Tiering
S3 costs are often inflated by storing infrequently accessed data in Standard tier. S3 Intelligent-Tiering automatically moves objects based on access patterns with no retrieval fees and minimal monitoring overhead.
// s3-intelligent-tiering.ts
import * as s3 from 'aws-cdk-lib/aws-s3';
export function createOptimizedBucket(scope: cdk.Construct, id: string) {
return new s3.Bucket(scope, 'DataLakeBucket', {
versioned: true,
lifecycleRules: [
{
enabled: true,
transitions: [
{
storageClass: s3.StorageClass.INTELLIGENT_TIERING,
transitionAfter: cdk.Duration.days(0) // Immediate tiering
},
{
storageClass: s3.StorageClass.GLACIER_IR,
transitionAfter: cdk.Duration.days(90) // Archive after 90 days
}
],
noncurrentVersionTransitions: [
{
storageClass: s3.StorageClass.GLACIER_IR,
transitionAfter: cdk.Duration.days(30)
}
]
}
],
autoDeleteObjects: true,
removalPolicy: cdk.RemovalPolicy.DESTROY
});
}
4. Data Transfer and NAT Gateway Optimization
NAT Gateways charge per GB processed and are a common cost trap for VPC architectures. Minimize NAT traffic by using VPC Endpoints for AWS services and optimizing egress patterns.
- Strategy: Route S3 and DynamoDB traffic via Gateway VPC Endpoints to avoid NAT processing fees.
- Strategy: Use CloudFront for public content distribution to reduce direct EC2 egress costs.
Pitfall Guide
-
Over-Provisioning for "Just-in-Case" Scenarios:
- Mistake: Setting auto-scaling thresholds too low or using fixed large instances to avoid latency spikes.
- Impact: Idle resources incur full costs.
- Best Practice: Implement predictive auto-scaling based on historical metrics and use smaller instance types with higher density. Validate performance under load testing before rightsizing.
-
Ignoring NAT Gateway Data Processing Fees:
- Mistake: Architecting VPCs where all internet-bound traffic flows through NAT Gateways, including traffic to AWS services like S3.
- Impact: Unnecessary costs for traffic that should be free via VPC Endpoints.
- Best Practice: Audit NAT Gateway metrics. Implement Gateway VPC Endpoints for S3 and DynamoDB. Use Interface Endpoints for other AWS services where applicable.
-
Misapplication of Savings Plans:
- Mistake: Purchasing Compute Savings Plans before establishing a stable baseline or committing to instances that are later terminated.
- Impact: Financial lock-in on unused capacity.
- Best Practice: Use AWS Cost Explorer recommendations based on 30-day usage. Start with a conservative commitment (e.g., 30-50% of baseline) and increase as usage stabilizes. Prefer Compute Savings Plans over Instance Savings Plans for flexibility.
-
Storage Sprawl and Zombie Resources:
- Mistake: Failing to delete unattached EBS volumes, old AMIs, and snapshots after instance termination.
- Impact: Accumulation of storage costs for data no longer in use.
- Best Practice: Enable
DeleteOnTermination on EBS volumes by default. Implement Lambda functions to audit and clean up unattached volumes and old snapshots weekly.
-
Optimizing Cost at the Expense of Reliability:
- Mistake: Aggressively using Spot Instances for stateful databases or critical latency-sensitive services without proper interruption handling.
- Impact: Service outages and data loss.
- Best Practice: Use Spot only for fault-tolerant, stateless workloads. Implement checkpointing and graceful shutdown hooks. For databases, use On-Demand or Reserved Instances with Multi-AZ deployments.
-
Neglecting API Request Costs:
- Mistake: High-frequency polling or inefficient SDK usage generating millions of API calls to S3, DynamoDB, or CloudWatch.
- Impact: Unexpected charges for request volume.
- Best Practice: Implement caching layers. Use batch operations where available. Review CloudWatch custom metric usage and reduce resolution where high granularity is unnecessary.
-
Lack of Unit Economics Tracking:
- Mistake: Monitoring total bill without correlating cost to business metrics (e.g., cost per order, cost per active user).
- Impact: Inability to determine if cost increases are justified by growth.
- Best Practice: Integrate cost data with business metrics using AWS Cost and Usage Report (CUR) and analytics tools. Set targets for cost per unit and monitor trends.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Stable Baseline Load (e.g., Core API) | Compute Savings Plans (1-year, All Upfront) | Predictable usage allows maximum discount commitment. | 30% β 40% savings vs On-Demand |
| Fault-Tolerant Batch Processing | Spot Instances + Graviton | Workloads can handle interruptions; Graviton reduces compute cost per task. | 70% β 90% savings vs On-Demand |
| Variable Web Traffic | Auto-scaling with Target Tracking | Matches capacity to demand, eliminating idle resources during low traffic. | 20% β 30% savings vs fixed capacity |
| Data Analytics / Cold Storage | S3 Glacier Deep Archive | Infrequent access with retrieval tolerance; lowest storage tier. | 80%+ savings vs Standard S3 |
| High Data Egress to Internet | CloudFront + S3 Transfer Acceleration | Caches content at edge, reducing direct origin egress fees. | Variable; often reduces egress costs significantly |
Configuration Template
This CDK template provisions a cost-optimized environment with tagging, S3 lifecycle rules, and ECS Spot capacity.
// cost-optimized-stack.ts
import * as cdk from 'aws-cdk-lib';
import * as ecs from 'aws-cdk-lib/aws-ecs';
import * as ec2 from 'aws-cdk-lib/aws-ec2';
import * as s3 from 'aws-cdk-lib/aws-s3';
import * as targets from 'aws-cdk-lib/aws-route53-targets';
import * as route53 from 'aws-cdk-lib/aws-route53';
export class CostOptimizedStack extends cdk.Stack {
constructor(scope: cdk.App, id: string, props?: cdk.StackProps) {
super(scope, id, props);
// 1. Mandatory Tagging
cdk.Tags.of(this).add('Project', 'CostOptDemo');
cdk.Tags.of(this).add('ManagedBy', 'CDK');
// 2. VPC with NAT Gateway optimization considerations
const vpc = new ec2.Vpc(this, 'OptimizedVpc', {
maxAzs: 2,
natGateways: 1, // Minimize NAT GW count
subnetConfiguration: [
{ cidrMask: 24, name: 'Public', subnetType: ec2.SubnetType.PUBLIC },
{ cidrMask: 24, name: 'Private', subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }
]
});
// 3. S3 Bucket with Intelligent Tiering
const dataBucket = new s3.Bucket(this, 'OptimizedBucket', {
lifecycleRules: [{
enabled: true,
transitions: [{
storageClass: s3.StorageClass.INTELLIGENT_TIERING,
transitionAfter: cdk.Duration.days(0)
}]
}],
removalPolicy: cdk.RemovalPolicy.DESTROY,
autoDeleteObjects: true
});
// 4. ECS Cluster with Spot Capacity
const cluster = new ecs.Cluster(this, 'EcsCluster', { vpc });
const taskDef = new ecs.FargateTaskDefinition(this, 'TaskDef', {
cpu: 2,
memoryLimitMiB: 4096,
runtimePlatform: {
cpuArchitecture: ecs.CpuArchitecture.ARM64
}
});
taskDef.addContainer('AppContainer', {
image: ecs.ContainerImage.fromRegistry('nginx'),
memoryLimitMiB: 2048,
});
new ecs.FargateService(this, 'SpotService', {
cluster,
taskDefinition: taskDef,
capacityProviderStrategies: [
{ capacityProvider: 'FARGATE_SPOT', weight: 1 }
],
desiredCount: 3
});
// 5. Output for verification
new cdk.CfnOutput(this, 'BucketName', { value: dataBucket.bucketName });
}
}
Quick Start Guide
- Initialize Project: Run
cdk init app --language typescript in your project directory.
- Install Dependencies: Execute
npm install aws-cdk-lib constructs.
- Apply Template: Replace
lib/cost-optimized-stack.ts with the Configuration Template code and update bin/app.ts to instantiate the stack.
- Bootstrap Environment: Run
cdk bootstrap if this is your first deployment in the account/region.
- Deploy: Execute
cdk deploy. Verify resources in the AWS Console, check Cost Explorer for the new tags, and confirm S3 lifecycle rules are active.