CIENT_CLOUD = 'haiku',
PREMIUM_CLOUD = 'claude'
}
interface RouterConfig {
tiers: Record<string, { cap: number; model: ModelTier }>;
thresholds: {
anomalySigma: number;
escalationEIGRatio: number;
panicBurnRateSigma: number;
timeDecayLambda: number;
};
}
interface InferenceRequest {
marketId: string;
query: string;
timestamp: number;
priceData: number[];
sentimentScore: number;
}
// Hook 1: Market Regime Classifier
class RegimeClassifier {
classify(request: InferenceRequest): MarketRegime {
// Deterministic logic based on volume, volatility, and liquidity metrics
const volatility = this.calculateVolatility(request.priceData);
const volume = this.getVolume(request.marketId);
if (volume > 10000 && volatility < 0.05) return MarketRegime.HIGH_VOLUME_CONCENTRATION;
if (volatility > 0.5 && request.sentimentScore > 0.8) return MarketRegime.SENTIMENT_DRIVEN_VOLATILITY;
// ... additional regime logic
return MarketRegime.FUNDAMENTAL_ANCHOR;
}
private calculateVolatility(data: number[]): number {
// Standard deviation calculation
const mean = data.reduce((a, b) => a + b, 0) / data.length;
const variance = data.reduce((a, b) => a + Math.pow(b - mean, 2), 0) / data.length;
return Math.sqrt(variance);
}
}
// Hook 2: Anomaly Detector
class AnomalySensor {
isAnomalous(request: InferenceRequest, config: RouterConfig): boolean {
const priceSpike = this.detectPriceSpike(request.priceData, config.thresholds.anomalySigma);
const sentimentDivergence = Math.abs(request.sentimentScore - this.getHistoricalSentiment(request.marketId));
return priceSpike && sentimentDivergence > 0.3;
}
private detectPriceSpike(data: number[], sigma: number): boolean {
const current = data[data.length - 1];
const mean = data.reduce((a, b) => a + b, 0) / data.length;
const stdDev = this.calculateStdDev(data);
return Math.abs(current - mean) > (sigma * stdDev);
}
}
// Hook 3: Time-to-Resolution Decay
class TemporalDecayCalculator {
calculateDecayFactor(timeToResolution: number, config: RouterConfig): number {
// Exponential decay: confidence decreases as event approaches
return Math.exp(-config.thresholds.timeDecayLambda * timeToResolution);
}
}
// Hook 4: Persona Overlay
class PersonaBiasAdjuster {
adjustScore(baseScore: number, persona: string): number {
const biases: Record<string, number> = {
'calibrated_researcher': 0.0,
'whale_mimic': 0.15,
'panic_seller': -0.2,
'momentum_trader': 0.1,
'contrarian': -0.1
};
return baseScore + (biases[persona] || 0);
}
}
// Hook 5: Panic Mode Circuit Breaker
class BurnRateMonitor {
private window: number[] = [];
checkPanicMode(config: RouterConfig): boolean {
const currentBurn = this.getCurrentBurnRate();
this.window.push(currentBurn);
if (this.window.length > 60) this.window.shift(); // 60s rolling window
const mean = this.window.reduce((a, b) => a + b, 0) / this.window.length;
const stdDev = this.calculateStdDev(this.window);
return currentBurn > (mean + config.thresholds.panicBurnRateSigma * stdDev);
}
}
// Hook 6: Economic Viability Filter
class EconomicGate {
validate(request: InferenceRequest, tierId: string, config: RouterConfig): boolean {
const tier = config.tiers[tierId];
const currentSpend = this.getTierSpend(tierId);
return currentSpend < tier.cap;
}
}
// Cost-Aware Cognition Engine
class CostBenefitAnalyzer {
shouldEscalate(informationGain: number, estimatedCost: number, threshold: number): boolean {
const ratio = informationGain / estimatedCost;
return ratio >= threshold;
}
}
// Main Router Orchestrator
class InferenceRouter {
private classifier = new RegimeClassifier();
private anomalySensor = new AnomalySensor();
private decayCalc = new TemporalDecayCalculator();
private personaAdj = new PersonaBiasAdjuster();
private burnMonitor = new BurnRateMonitor();
private econGate = new EconomicGate();
private costAnalyzer = new CostBenefitAnalyzer();
async route(request: InferenceRequest, tierId: string, config: RouterConfig): Promise<InferenceResult> {
// 1. Regime Classification
const regime = this.classifier.classify(request);
if (this.isRegimeResolved(regime)) {
return { source: 'deterministic', regime, data: this.resolveRegime(regime) };
}
// 2. Anomaly Detection
if (this.anomalySensor.isAnomalous(request, config)) {
// Force premium model, bypass cost cap for critical anomalies
return this.dispatchToModel(ModelTier.PREMIUM_CLOUD, request);
}
// 3. Panic Mode Check
if (this.burnMonitor.checkPanicMode(config)) {
// Force local inference to reduce burn
return this.dispatchToModel(ModelTier.LOCAL_INFERENCE, request);
}
// 4. Temporal Decay & Persona Adjustment
const decayFactor = this.decayCalc.calculateDecayFactor(request.timeToResolution, config);
let confidence = this.calculateBaseConfidence(request) * decayFactor;
confidence = this.personaAdj.adjustScore(confidence, request.persona);
// 5. Cost-Benefit Analysis
const estimatedCost = this.estimateCost(ModelTier.EFFICIENT_CLOUD);
const eig = this.estimateInformationGain(request);
if (!this.costAnalyzer.shouldEscalate(eig, estimatedCost, config.thresholds.escalationEIGRatio)) {
// Collapse to efficient tier
return this.dispatchToModel(ModelTier.EFFICIENT_CLOUD, request);
}
// 6. Economic Viability
if (!this.econGate.validate(request, tierId, config)) {
throw new Error('QUOTA_EXCEEDED');
}
// Default escalation
return this.dispatchToModel(ModelTier.PREMIUM_CLOUD, request);
}
private isRegimeResolved(regime: MarketRegime): boolean {
return regime === MarketRegime.HIGH_VOLUME_CONCENTRATION ||
regime === MarketRegime.STALE_MATE;
}
}
#### Architecture Decisions
* **Priority Chain:** Hooks are ordered by cost and latency impact. Deterministic regime classification is evaluated first to short-circuit expensive processing. Anomaly detection follows to ensure critical events are never blocked by cost filters.
* **Modular Hooks:** Each hook is encapsulated in a dedicated class. This allows independent testing, versioning, and replacement. For example, the `RegimeClassifier` can be swapped for a lightweight ML model without affecting the `EconomicGate`.
* **Cost-Aware Escalation:** The `CostBenefitAnalyzer` introduces a quantitative gate for model selection. By comparing estimated information gain against cost, the router ensures that premium models are only used when the value justifies the expense.
* **Hard Caps:** Economic filters enforce absolute spending limits per tier. This prevents bill shock and provides predictable budgeting for enterprise deployments.
### Pitfall Guide
Implementing a deterministic-first router introduces specific challenges. The following pitfalls are derived from production experience and should be addressed during design.
1. **Deterministic Drift**
* *Explanation:* Market regimes and anomaly thresholds can become stale as market dynamics evolve. A classifier trained on historical data may misclassify new patterns, leading to incorrect routing.
* *Fix:* Implement a feedback loop where misrouted requests are flagged for review. Periodically retrain or recalibrate deterministic models using recent data. Monitor hook hit rates for anomalies that suggest drift.
2. **Hook Ordering Errors**
* *Explanation:* Placing the economic viability filter before anomaly detection can block critical inferences during market crashes, causing the system to fail when it is needed most.
* *Fix:* Strictly enforce the priority chain. Anomaly detection must always precede cost caps. Document the hook order in the architecture and enforce it via code structure.
3. **Latency in Deterministic Hooks**
* *Explanation:* If a deterministic hook performs heavy computation or external API calls, it can introduce latency that negates the benefits of avoiding the LLM.
* *Fix:* Ensure all deterministic hooks are lightweight. Use in-memory data structures, pre-computed aggregates, and efficient algorithms. Profile hook execution times and set strict timeouts.
4. **Threshold Sensitivity**
* *Explanation:* Hardcoded thresholds (e.g., 3Ο for anomalies) may be too sensitive for volatile markets or too insensitive for stable ones, leading to false positives or missed escalations.
* *Fix:* Make thresholds configurable per market or regime. Use dynamic thresholds that adapt to recent volatility. Implement A/B testing to optimize threshold values.
5. **Context Window Management**
* *Explanation:* Escalating to premium models often requires passing context. Failing to manage context size can lead to truncation or excessive token usage, increasing costs.
* *Fix:* Implement context compression and summarization strategies. Limit the context passed to the LLM based on relevance. Monitor token usage per request.
6. **Persona Conflict**
* *Explanation:* Multiple persona overlays might apply conflicting biases, resulting in unpredictable score adjustments.
* *Fix:* Define a clear precedence for personas or use a weighted aggregation model. Ensure personas are mutually exclusive or combined via a deterministic formula.
7. **Burn Rate False Positives**
* *Explanation:* A legitimate spike in traffic might trigger the panic mode circuit breaker, forcing all requests to local inference and degrading quality.
* *Fix:* Distinguish between volume spikes and cost spikes. The burn rate monitor should track cost per second, not request count. Add a cooldown period to prevent rapid toggling of panic mode.
### Production Bundle
#### Action Checklist
- [ ] **Define Tier Structure:** Establish clear cost caps and model assignments for each tier (e.g., Free, Pro, Enterprise).
- [ ] **Implement Regime Classifier:** Build the deterministic regime detection logic with comprehensive test coverage.
- [ ] **Configure Anomaly Thresholds:** Set sigma levels and sentiment divergence thresholds based on historical market data.
- [ ] **Deploy Burn Rate Monitor:** Implement the rolling window burn rate calculation and integrate with cost tracking.
- [ ] **Set Escalation Thresholds:** Calibrate the EIG/cost ratio threshold to balance quality and expense.
- [ ] **Add Observability:** Instrument the router to log hook decisions, costs, and latencies for monitoring and analysis.
- [ ] **Test Edge Cases:** Verify behavior during anomalies, quota exhaustion, and panic mode activation.
- [ ] **Review Hook Order:** Audit the priority chain to ensure critical hooks cannot be bypassed by cost filters.
#### Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
| :--- | :--- | :--- | :--- |
| **High Volume, Low Complexity** | Deterministic Regime Resolution | Most requests can be resolved without LLMs. | Minimal |
| **Market Crash / Anomaly** | Anomaly Escalation to Premium Model | Critical events require high-fidelity reasoning. | High (Bypassed Cap) |
| **Budget Constrained** | Strict Economic Caps + Haiku Fallback | Ensures spend never exceeds limits. | Predictable |
| **Volatile Traffic Spike** | Panic Mode Circuit Breaker | Prevents runaway costs during volatility. | Reduced |
| **Low Time-to-Resolution** | Temporal Decay + Efficient Model | Confidence drops as event approaches; use cheaper model. | Moderate |
#### Configuration Template
```yaml
router:
tiers:
free:
cap: 0.05
model: haiku
pro:
cap: 0.50
model: haiku
elite:
cap: 5.00
model: claude
enterprise:
cap: 100.00
model: claude
thresholds:
anomaly_sigma: 3.0
escalation_eig_ratio: 0.5
panic_burn_rate_sigma: 2.0
time_decay_lambda: 0.1
hooks:
regime_classifier:
enabled: true
cache_ttl: 60s
anomaly_detector:
enabled: true
lookback_window: 1h
burn_rate_monitor:
enabled: true
window_size: 60s
Quick Start Guide
- Initialize Router: Import the
InferenceRouter class and load the configuration file.
- Define Request: Create an
InferenceRequest object with market data, query, and metadata.
- Execute Route: Call
router.route(request, tierId, config) to process the request.
- Handle Response: Process the
InferenceResult or catch errors such as QUOTA_EXCEEDED.
- Monitor: Review logs and metrics to verify hook performance and cost compliance.