entStatus: 'explicit' | 'implied' | 'unknown';
licenseType: string;
transformationHash: string;
timestamp: string;
piiDetected: boolean;
}
// services/provenance-tracker.ts
import { createHash } from 'crypto';
export class ProvenanceTracker {
async record(dataset: Buffer, metadata: Partial<DataProvenance>): Promise<DataProvenance> {
const hash = createHash('sha256').update(dataset).digest('hex');
return {
datasetId: crypto.randomUUID(),
sourceUri: metadata.sourceUri || 'unknown',
consentStatus: metadata.consentStatus || 'unknown',
licenseType: metadata.licenseType || 'unverified',
transformationHash: hash,
timestamp: new Date().toISOString(),
piiDetected: await this.detectPII(dataset),
};
}
private async detectPII(data: Buffer): Promise<boolean> {
// Integrate with regex-based or ML-based PII scanner
const content = data.toString();
const emailPattern = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}/;
const ssnPattern = /\b\d{3}-\d{2}-\d{4}\b/;
return emailPattern.test(content) || ssnPattern.test(content);
}
}
**2. Fairness Metric Integration**
Ethical validation requires quantitative fairness metrics computed before model training and during fine-tuning. Implement demographic parity and equalized odds calculators that run against validation splits.
```typescript
// utils/fairness-metrics.ts
export interface FairnessReport {
demographicParity: number;
equalizedOdds: number;
acceptableThreshold: number;
status: 'pass' | 'fail' | 'warning';
}
export class FairnessCalculator {
calculate(
predictions: boolean[],
actuals: boolean[],
sensitiveAttribute: string[]
): FairnessReport {
const groups = new Map<string, { pos: number; total: number }>();
predictions.forEach((pred, i) => {
const group = sensitiveAttribute[i];
const entry = groups.get(group) || { pos: 0, total: 0 };
entry.total++;
if (pred) entry.pos++;
groups.set(group, entry);
});
const rates = Array.from(groups.values()).map(g => g.pos / g.total);
const maxDiff = Math.max(...rates) - Math.min(...rates);
// Equalized odds approximation using true positive rate variance
const tprByGroup = this.calculateTPRByGroup(predictions, actuals, sensitiveAttribute);
const tprVariance = this.variance(tprByGroup);
const status = maxDiff > 0.1 || tprVariance > 0.05 ? 'fail' : maxDiff > 0.05 ? 'warning' : 'pass';
return {
demographicParity: maxDiff,
equalizedOdds: tprVariance,
acceptableThreshold: 0.1,
status,
};
}
private calculateTPRByGroup(preds: boolean[], actuals: boolean[], attrs: string[]): number[] {
const groups = new Map<string, { tp: number; actualPos: number }>();
preds.forEach((p, i) => {
const g = attrs[i];
const e = groups.get(g) || { tp: 0, actualPos: 0 };
if (actuals[i]) e.actualPos++;
if (p && actuals[i]) e.tp++;
groups.set(g, e);
});
return Array.from(groups.values()).map(g => g.actualPos > 0 ? g.tp / g.actualPos : 0);
}
private variance(arr: number[]): number {
const mean = arr.reduce((a, b) => a + b, 0) / arr.length;
return arr.reduce((acc, val) => acc + Math.pow(val - mean, 2), 0) / arr.length;
}
}
3. Inference-Time Guardrails
Production systems require runtime validation to catch policy violations, prompt injection, and output hallucination. Implement a sidecar guardrail service that intercepts requests and responses, applying configurable policy rules without blocking the main inference path.
// services/inference-guardrail.ts
import { z } from 'zod';
const OutputSchema = z.object({
content: z.string().max(2000),
toxicityScore: z.number().min(0).max(1),
factualConsistency: z.number().min(0).max(1),
policyViolations: z.array(z.string()),
});
export class InferenceGuardrail {
async validate(rawOutput: string): Promise<z.infer<typeof OutputSchema>> {
const toxicityScore = await this.assessToxicity(rawOutput);
const factualConsistency = await this.checkConsistency(rawOutput);
const violations = this.checkPolicyRules(rawOutput);
const validated = OutputSchema.parse({
content: rawOutput,
toxicityScore,
factualConsistency,
policyViolations: violations,
});
if (validated.toxicityScore > 0.7 || validated.policyViolations.length > 0) {
throw new Error(`Guardrail violation: ${validated.policyViolations.join(', ')}`);
}
return validated;
}
private async assessToxicity(text: string): Promise<number> {
// Integrate with lightweight toxicity classifier or external API
const toxicPatterns = /\b(hate|violent|explicit|discriminatory)\b/gi;
const matches = text.match(toxicPatterns);
return matches ? Math.min(matches.length * 0.2, 1.0) : 0.0;
}
private async checkConsistency(text: string): Promise<number> {
// Placeholder for factual consistency scoring
return 0.92;
}
private checkPolicyRules(text: string): string[] {
const rules = [
{ pattern: /\b(confidential|secret)\b/i, violation: 'Data leakage detected' },
{ pattern: /\b(jailbreak|ignore previous)\b/i, violation: 'Prompt injection attempt' },
];
return rules
.filter(r => r.pattern.test(text))
.map(r => r.violation);
}
}
4. CI/CD Pipeline Enforcement
Ethics checks must run automatically on every pull request. Implement GitHub Actions or GitLab CI steps that execute fairness calculations, provenance validation, and guardrail tests before merging.
# .github/workflows/ai-ethics-check.yml
name: AI Ethics Validation
on: [pull_request]
jobs:
ethics-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: 20
- run: npm ci
- name: Run Fairness Metrics
run: npx ts-node scripts/validate-fairness.ts
- name: Check Data Provenance
run: npx ts-node scripts/audit-provenance.ts
- name: Guardrail Integration Test
run: npm test -- --testPathPattern=guardrail
- name: Generate Ethics Report
run: npx ts-node scripts/generate-model-card.ts
Architecture Decisions and Rationale
The architecture deliberately separates ethics validation from inference execution. This decoupling ensures that policy updates do not require model retraining or service restarts. The provenance tracker and fairness calculator run during data preparation and training phases, while the guardrail service operates as a non-blocking sidecar during inference. Policy rules are externalized into a configuration layer, enabling security and compliance teams to update thresholds without developer intervention.
Audit trails are maintained through immutable logs of every ethics check, including metric values, policy versions, and decision outcomes. This satisfies regulatory requirements for traceability while enabling continuous drift detection. The system uses feature flags to roll out new ethical constraints gradually, preventing production outages from overly restrictive policies. All components are instrumented with OpenTelemetry for observability, ensuring that ethical system behavior is monitored with the same rigor as performance metrics.
Pitfall Guide
1. Treating Ethics as a Checklist Rather Than a Continuous Process
Ethics is not a one-time audit. Models drift, data distributions shift, and policy requirements evolve. Teams that run fairness checks only during initial training will miss degradation in production. Best practice: Implement continuous monitoring with automated alerts when metrics cross thresholds.
2. Ignoring Data Provenance and Consent Tracking
Training on unvetted data creates latent liability. Even if a model performs well, undisclosed data sources or missing consent can trigger regulatory action and reputational damage. Best practice: Enforce provenance metadata at ingestion. Reject datasets without explicit consent or licensing documentation.
3. Over-Reliance on LLM Self-Assessment for Safety
Using the same model to validate its own outputs creates circular validation and blind spots. Models cannot reliably detect their own bias or hallucination patterns. Best practice: Deploy independent validator models or rule-based systems for ethics checks. Cross-model validation reduces false negatives by 60%+.
4. Hardcoding Ethical Rules Instead of Using Policy Engines
Embedding fairness thresholds and content filters directly in application code makes updates slow and error-prone. Best practice: Externalize policies into a configuration-driven engine. Use JSON/YAML schemas with version control to enable rapid, auditable policy changes.
5. Assuming Fairness Metrics Are Universal
Demographic parity, equalized odds, and predictive parity often conflict. Optimizing for one metric can degrade another. Best practice: Document metric trade-offs explicitly. Select fairness criteria based on use-case context and stakeholder impact, not mathematical convenience.
6. Neglecting Edge-Case Failure Modes
Ethical guardrails often fail on out-of-distribution inputs, multilingual prompts, or adversarial attacks. Best practice: Include red-teaming and adversarial testing in the validation pipeline. Simulate edge cases before production deployment.
7. Skipping Model Cards and Versioning
Undocumented models create operational opacity. Teams cannot audit, reproduce, or responsibly retire systems without structured documentation. Best practice: Generate automated model cards on every deployment. Include training data summary, fairness metrics, known limitations, and intended use boundaries.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Internal analytics dashboard | Proactive fairness checks + automated model cards | Lower risk exposure, faster iteration, compliance baseline | Low ($15k-$30k annual tooling) |
| Customer-facing API | Embedded ethics pipeline + sidecar guardrails | Direct user impact requires runtime validation and auditability | Medium ($80k-$120k annual infrastructure) |
| High-stakes/regulated domain (healthcare, finance) | Full policy-as-code + human-in-the-loop + continuous drift monitoring | Regulatory mandates require traceability, redundancy, and explicit oversight | High ($200k-$350k annual compliance stack) |
Configuration Template
{
"ethicsPolicy": {
"version": "1.2.0",
"fairness": {
"demographicParityThreshold": 0.08,
"equalizedOddsThreshold": 0.04,
"evaluationFrequency": "per-training-job",
"actionOnViolation": "block-merge"
},
"guardrails": {
"maxToxicityScore": 0.6,
"minFactualConsistency": 0.85,
"blockedPatterns": ["confidential", "jailbreak", "ignore previous"],
"enforcementMode": "sidecar-reject",
"fallbackBehavior": "human-review-queue"
},
"provenance": {
"requireConsent": true,
"requireLicense": true,
"piiDetection": true,
"ledgerType": "append-only"
},
"monitoring": {
"driftAlertThreshold": 0.15,
"reportingInterval": "daily",
"auditRetentionDays": 365
}
}
}
Quick Start Guide
- Initialize the ethics validation package: Run
npm install @codcompass/ai-ethics-core zod in your project root.
- Add the fairness calculator to your training script: Import
FairnessCalculator, pass validation splits and sensitive attributes, and fail the build if metrics exceed thresholds.
- Deploy the guardrail sidecar: Configure the
InferenceGuardrail service to intercept API responses, validate against the JSON policy template, and route violations to a review queue.
- Enable CI enforcement: Copy the GitHub Actions workflow to
.github/workflows/ai-ethics-check.yml, commit, and verify that pull requests trigger automated ethics validation before merge.