ls.
Registry APIs reject malformed identifiers immediately. Validating tax IDs before network calls preserves quota and reduces latency.
import { validateVat } from 'vat-validator';
interface EntityInput {
identifier: string;
jurisdiction: string;
}
export function sanitizeEntityInput(input: EntityInput): EntityInput {
const cleaned = input.identifier.replace(/\s+/g, '').toUpperCase();
if (!validateVat(cleaned, input.jurisdiction)) {
throw new Error(`Invalid tax identifier format for ${input.jurisdiction}: ${cleaned}`);
}
return { identifier: cleaned, jurisdiction: input.jurisdiction };
}
2. Rate-Controlled API Client
Unbounded concurrency triggers 429 Too Many Requests responses and temporary IP blocks. A queue-based client with exponential backoff ensures stable throughput.
import pLimit from 'p-limit';
import { RegistryResponse } from './types';
const API_BASE = 'https://api.get-scala.com/score';
const CONCURRENCY_LIMIT = 5;
const MAX_RETRIES = 3;
export class EntityRegistryClient {
private limit = pLimit(CONCURRENCY_LIMIT);
private apiKey: string;
constructor(apiKey: string) {
this.apiKey = apiKey;
}
async fetchEntity(identifier: string): Promise<RegistryResponse> {
return this.limit(async () => {
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
try {
const res = await fetch(`${API_BASE}/company?vat=${identifier}`, {
headers: { Authorization: `Bearer ${this.apiKey}` }
});
if (res.status === 429) {
const retryAfter = parseInt(res.headers.get('Retry-After') || '2', 10);
await new Promise(r => setTimeout(r, retryAfter * 1000));
continue;
}
if (!res.ok) throw new Error(`Registry API error: ${res.status}`);
return res.json();
} catch (err) {
if (attempt === MAX_RETRIES) throw err;
await new Promise(r => setTimeout(r, Math.pow(2, attempt) * 1000));
}
}
throw new Error('Max retries exceeded');
});
}
}
3. Schema Normalization & Risk Routing
Registry responses vary by jurisdiction. A normalization layer maps disparate fields into a unified domain model, while a scoring router determines whether raw financials or aggregated metrics should drive downstream logic.
interface NormalizedEntity {
legalName: string;
status: 'active' | 'dissolved' | 'liquidation' | 'bankrupt';
financialHealth: number;
riskTier: 'low' | 'medium' | 'high' | 'insufficient_data';
lastFilingYear: number | null;
rawMetrics: Record<string, unknown>;
}
export function normalizeRegistryData(raw: any): NormalizedEntity {
const hasFinancials = raw.revenue != null || raw.assets != null;
return {
legalName: raw.company_name,
status: raw.status,
financialHealth: raw.score ?? 0,
riskTier: hasFinancials
? (raw.score >= 70 ? 'low' : raw.score >= 40 ? 'medium' : 'high')
: 'insufficient_data',
lastFilingYear: raw.last_filing_year,
rawMetrics: {
revenue: raw.revenue,
employees: raw.employees_range,
legalForm: raw.legal_form,
industryCode: raw.ateco_code,
region: raw.region
}
};
}
4. Batch Screening Pipeline
Production screening requires deterministic ordering, error isolation, and threshold-based routing.
interface ScreeningResult {
identifier: string;
entity: NormalizedEntity;
flagged: boolean;
}
export async function runScreeningBatch(
client: EntityRegistryClient,
identifiers: string[],
riskThreshold: number = 50
): Promise<ScreeningResult[]> {
const tasks = identifiers.map(async (id) => {
try {
const raw = await client.fetchEntity(id);
const normalized = normalizeRegistryData(raw);
const flagged = normalized.financialHealth < riskThreshold || normalized.status !== 'active';
return { identifier: id, entity: normalized, flagged };
} catch (err) {
return { identifier: id, entity: null, flagged: true, error: (err as Error).message };
}
});
const settled = await Promise.allSettled(tasks);
return settled
.filter((r): r is PromiseFulfilledResult<ScreeningResult> => r.status === 'fulfilled')
.map(r => r.value)
.sort((a, b) => (a.entity?.financialHealth ?? 0) - (b.entity?.financialHealth ?? 0));
}
Architecture Rationale
- Validation First: Prevents quota waste and reduces API error noise.
- Concurrency Control:
p-limit ensures predictable throughput without triggering provider throttling.
- Normalization Layer: Decouples downstream business logic from jurisdictional schema drift.
- Error Isolation:
Promise.allSettled guarantees that one failed lookup does not abort the entire batch.
- Deterministic Sorting: Results are ordered by financial health to prioritize high-risk entities for manual review.
Pitfall Guide
Explanation: VAT and company registration numbers follow country-specific checksum algorithms and length constraints. Blindly passing raw strings to APIs results in immediate rejections and wasted quota.
Fix: Implement ISO 19600/VIES validation before network calls. Strip whitespace, normalize case, and reject malformed identifiers at the application boundary.
2. Treating Null Financials as Zero
Explanation: Missing revenue or asset fields rarely indicate bankruptcy. They often signal late filings, dormant status, or jurisdictional disclosure restrictions. Mapping nulls to zero artificially inflates risk scores.
Fix: Explicitly model missing data as insufficient_data or DATA_GAP. Adjust risk weighting to penalize absence of transparency rather than assuming financial distress.
3. Blind Score Reliance
Explanation: Aggregated risk scores compress complex financial histories into single integers. They mask volatility, recent court proceedings, or sector-specific downturns.
Fix: Always persist raw metrics alongside scores. Use scores for initial triage, but require raw data for audit trails, compliance reporting, and threshold adjustments.
4. Unbounded Batch Concurrency
Explanation: Using Promise.all on hundreds of identifiers triggers rate limits, returns 429 responses, and may result in temporary IP blocks.
Fix: Implement queue-based concurrency control with exponential backoff. Respect Retry-After headers and throttle throughput to 60β70% of the provider's stated limit.
5. Static Cache TTLs
Explanation: Applying a uniform 24-hour cache to all entities ignores risk profiles. Stable, low-risk companies rarely change status, while high-risk entities may deteriorate rapidly.
Fix: Implement dynamic TTLs. Cache low-risk entities for 24β48 hours, medium-risk for 6β12 hours, and high-risk for 1β2 hours. Invalidate cache immediately upon status changes or threshold breaches.
6. Ignoring Jurisdictional Disclosure Laws
Explanation: Some regions restrict financial data access or require explicit consent. APIs may return partial payloads or HTTP 403 responses for premium fields.
Fix: Design graceful degradation. Fall back to public registry metadata when financial endpoints fail. Log disclosure restrictions separately from network errors for compliance auditing.
7. Overlooking Historical Lag in Risk Models
Explanation: Financial filings are 12β18 months old by the time they reach production APIs. Using them as real-time solvency indicators creates dangerous blind spots.
Fix: Combine registry data with leading indicators: payment behavior, credit bureau updates, or transactional velocity. Treat registry scores as baseline filters, not final judgments.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Startup MVP / Prototyping | Standardized Aggregator API (Free Tier) | Rapid validation with minimal infra overhead | $0β$19/mo |
| Mid-Market B2B Onboarding | Hybrid Pipeline + 24h Static Cache | Balances freshness with quota efficiency | $19β$49/mo |
| Enterprise Risk Engine | Hybrid Pipeline + Dynamic TTL + Raw Data Persistence | Audit-grade accuracy, scalable batch processing, compliance-ready | $49β$200+/mo |
| High-Frequency Transaction Monitoring | Registry API + Real-Time Behavioral Signals | Registry data lags; behavioral signals provide leading indicators | Higher infra cost, lower false positives |
Configuration Template
// registry.config.ts
export const REGISTRY_CONFIG = {
api: {
baseUrl: 'https://api.get-scala.com/score',
apiKey: process.env.REGISTRY_API_KEY,
concurrency: 5,
maxRetries: 3,
retryBaseMs: 1000
},
cache: {
ttl: {
low: 86400, // 24h
medium: 21600, // 6h
high: 3600, // 1h
insufficient: 43200 // 12h
},
driver: 'redis' // or 'memory' for dev
},
risk: {
threshold: 50,
alertDelta: 10, // Trigger alert if score drops by 10+ points
requireFinancials: false // Allow onboarding without full financials
},
logging: {
separateErrorStreams: true,
logRawMetrics: true,
auditRetentionDays: 365
}
};
Quick Start Guide
- Install Dependencies:
npm install vat-validator p-limit axios (or use native fetch with built-in retry logic)
- Configure Environment: Set
REGISTRY_API_KEY and point the client to your preferred cache driver (Redis recommended for production)
- Initialize Client: Instantiate
EntityRegistryClient with your API key and concurrency limits
- Run Validation & Fetch: Pass sanitized identifiers through
sanitizeEntityInput, then execute runScreeningBatch with your risk threshold
- Route Results: Flag entities below threshold, persist raw metrics for audit, and trigger downstream workflows (onboarding hold, manual review, or alerting)
Registry data is a foundational layer for entity verification, not a replacement for comprehensive due diligence. Architect for jurisdictional variance, respect temporal lag, and maintain audit trails. When implemented correctly, a hybrid pipeline delivers scalable, compliance-ready verification without burning through quota or compromising risk accuracy.