ie', 'grouped_bar', 'stacked_area']);
export const VisualizationSpec = z.object({
chart_type: ChartType,
title: z.string().min(1),
x_axis: z.object({
column: z.string(),
type: z.enum(['categorical', 'temporal', 'numerical']),
format: z.string().optional()
}),
y_axis: z.object({
column: z.string(),
aggregation: z.enum(['sum', 'avg', 'count', 'max', 'min']),
label: z.string()
}),
conditional_rules: z.array(z.object({
threshold: z.number(),
color: z.string(),
operator: z.enum(['gt', 'lt', 'eq'])
})).optional(),
metadata: z.object({
model_id: z.string(),
confidence_score: z.number().min(0).max(1),
generated_at: z.string().datetime()
})
});
export type VisualizationSpec = z.infer<typeof VisualizationSpec>;
### Step 2: Implement Intent Routing & Column Resolution
Natural language queries require semantic mapping before chart selection. Build a lightweight resolver that extracts temporal markers, aggregation hints, and grouping dimensions.
```typescript
interface QueryIntent {
temporal_hint: boolean;
grouping_dimension: string | null;
aggregation_hint: 'sum' | 'avg' | 'count' | 'max' | 'min' | null;
raw_query: string;
}
export function resolveQueryIntent(query: string): QueryIntent {
const lower = query.toLowerCase();
const temporalMarkers = ['trend', 'over time', 'monthly', 'yearly', 'daily', 'period'];
const aggregationMarkers: Record<string, QueryIntent['aggregation_hint']> = {
'total': 'sum', 'average': 'avg', 'count': 'count', 'highest': 'max', 'lowest': 'min'
};
return {
temporal_hint: temporalMarkers.some(m => lower.includes(m)),
grouping_dimension: lower.includes('by') ? lower.split('by')[1].trim().split(' ')[0] : null,
aggregation_hint: Object.keys(aggregationMarkers).find(k => lower.includes(k))
? aggregationMarkers[Object.keys(aggregationMarkers).find(k => lower.includes(k))!]
: null,
raw_query: query
};
}
Step 3: Build the Generation Pipeline with Fallback Logic
The pipeline routes requests to the optimal model based on latency budget and language requirements. It validates output against the schema, retries with a stricter prompt on failure, and falls back to a deterministic template if the model consistently fails.
import { VisualizationSpec } from './schemas';
import { resolveQueryIntent } from './intent-resolver';
type ModelRouter = {
id: string;
endpoint: string;
latency_budget_ms: number;
language_support: string[];
};
const ROUTERS: ModelRouter[] = [
{ id: 'llama-3.1-8b', endpoint: '/v1/models/llama-3.1-8b', latency_budget_ms: 2000, language_support: ['en'] },
{ id: 'qwen-2.5-7b', endpoint: '/v1/models/qwen-2.5-7b', latency_budget_ms: 2000, language_support: ['en', 'tr', 'ar'] },
{ id: 'gemma-4-e2b', endpoint: '/v1/models/gemma-4-e2b', latency_budget_ms: 1500, language_support: ['en'] }
];
export async function generateVisualizationSpec(
query: string,
locale: string = 'en',
maxLatencyMs: number = 2000
): Promise<VisualizationSpec> {
const intent = resolveQueryIntent(query);
// Route to model matching language and latency constraints
const selectedRouter = ROUTERS.find(r =>
r.language_support.includes(locale) && r.latency_budget_ms <= maxLatencyMs
) ?? ROUTERS[0];
const prompt = buildStructuredPrompt(query, intent, selectedRouter.id);
try {
const rawResponse = await callModelEndpoint(selectedRouter.endpoint, prompt);
const parsed = VisualizationSpec.safeParse(JSON.parse(rawResponse));
if (!parsed.success) {
throw new Error(`Schema validation failed: ${parsed.error.message}`);
}
return parsed.data;
} catch (error) {
// Fallback: retry with stricter system prompt, then default to safe template
console.warn(`Model ${selectedRouter.id} failed. Attempting strict fallback.`);
return generateDeterministicFallback(intent);
}
}
function buildStructuredPrompt(query: string, intent: QueryIntent, modelId: string): string {
return `
You are a chart configuration engine. Output ONLY valid JSON matching the VisualizationSpec schema.
Query: "${query}"
Detected Intent: temporal=${intent.temporal_hint}, group_by=${intent.grouping_dimension ?? 'none'}, agg=${intent.aggregation_hint ?? 'sum'}
Model: ${modelId}
Rules:
1. Map temporal hints to x_axis.type: "temporal"
2. Never output conversational text
3. Handle nulls by setting conditional_rules to empty array
4. Return confidence_score based on query clarity (0.0-1.0)
`;
}
function generateDeterministicFallback(intent: QueryIntent): VisualizationSpec {
return {
chart_type: intent.temporal_hint ? 'line' : 'bar',
title: 'Fallback Visualization',
x_axis: { column: intent.grouping_dimension ?? 'timestamp', type: intent.temporal_hint ? 'temporal' : 'categorical' },
y_axis: { column: 'value', aggregation: intent.aggregation_hint ?? 'sum', label: 'Aggregated Value' },
conditional_rules: [],
metadata: { model_id: 'deterministic-fallback', confidence_score: 0.4, generated_at: new Date().toISOString() }
};
}
Architecture Rationale
- Schema-First Validation: Zod enforces structural contracts before the frontend receives data. This prevents rendering crashes caused by missing fields or type mismatches.
- Intent Resolution Layer: Extracting temporal markers and aggregation hints before model invocation reduces hallucination. The model receives structured context instead of raw natural language.
- Dynamic Routing: Matching models to latency budgets and language requirements prevents over-provisioning. You don't need a multilingual model for English-only internal dashboards, nor a high-latency model for real-time filtering.
- Deterministic Fallback: AI models are probabilistic. Production systems require guaranteed output. The fallback template ensures the dashboard never breaks, even when the model fails schema validation.
Pitfall Guide
1. Conversational Leakage
Explanation: Models return markdown, explanations, or JSON wrapped in text blocks instead of pure configuration objects. This breaks JSON.parse() and crashes the renderer.
Fix: Enforce strict system prompts that forbid conversational output. Wrap the inference call in a regex extraction step (/```json\n([\s\S]*?)\n```/) before Zod validation. Always validate before rendering.
2. Temporal Column Misclassification
Explanation: Date/time columns are frequently mapped as categorical instead of temporal, causing line charts to render as disconnected bars or misaligned axes.
Fix: Run a lightweight type inference pass on the dataset schema before prompt generation. Pass explicit column metadata ({ column: 'created_at', inferred_type: 'temporal' }) to the model alongside the query.
3. Null/Empty Value Collapse
Explanation: Models generate chart specs that assume dense data. When the dataset contains gaps, the frontend throws rendering errors or displays misleading zero-baseline charts.
Fix: Instruct the model to output conditional_rules or interpolation: 'linear' when gaps are detected. Pre-process datasets to fill missing temporal points with null rather than 0, and let the renderer handle interpolation.
4. Session Drift
Explanation: Identical prompts yield different chart configurations across invocations due to temperature sampling or stateless context windows. This breaks user trust in interactive dashboards.
Fix: Lock temperature to 0.1 or 0 for chart generation. Seed the request with a deterministic hash of the query + dataset schema. Cache successful spec generations keyed by query hash to guarantee consistency.
5. Over-Reliance on Raw Accuracy Scores
Explanation: Teams select models based on benchmark accuracy without considering latency budgets or hardware constraints. A 90% accurate model that takes 12s to respond is unusable for interactive filtering.
Fix: Treat accuracy as one axis in a multi-dimensional routing matrix. Implement latency-aware fallbacks: if the primary model exceeds the budget, route to a faster, slightly less accurate model rather than timing out.
6. Multilingual Semantic Drift
Explanation: Direct translation of business terms fails when column names use domain-specific jargon. A Turkish prompt might map gelir to revenue, but the dataset uses net_income, causing column resolution failure.
Fix: Maintain a locale-aware column alias map. Translate user queries into canonical dataset column names before model invocation. Use Qwen 2.5 7B or Qwen 3 8B for non-English workloads, as they demonstrate superior cross-lingual semantic alignment.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Interactive dashboard with <2s latency requirement | Gemma 4 E2B or Mistral 7B | Sub-2s GPU latency preserves UX during filtering; slight accuracy trade-off is acceptable for speed | Lower compute cost per request; higher throughput |
| Multilingual user base (Turkish, Arabic, etc.) | Qwen 2.5 7B or Qwen 3 8B | Superior cross-lingual semantic alignment prevents column mapping failures | Moderate compute cost; requires multilingual alias mapping |
| Maximum chart correctness for financial reporting | Llama 3.1 8B | Highest accuracy across 32 scenarios; handles complex conditional formatting reliably | Higher latency (~2s GPU); acceptable for batch/static reports |
| Edge deployment with limited VRAM | Mistral 7B | Optimized architecture runs efficiently on consumer GPUs or CPU-only environments | Lowest infrastructure cost; may require fallback for complex queries |
| Balanced production workload | Qwen 3 8B | Strong accuracy + multilingual support + acceptable latency | Moderate cost; ideal default for most enterprise dashboards |
Configuration Template
// chart-pipeline.config.ts
export const PipelineConfig = {
routing: {
default_model: 'qwen-3-8b',
fallback_model: 'gemma-4-e2b',
max_latency_ms: 2000,
temperature: 0.1,
seed_strategy: 'query_hash'
},
validation: {
strict_schema: true,
strip_markdown: true,
retry_on_failure: 1,
fallback_to_deterministic: true
},
dataset_preprocessing: {
infer_column_types: true,
normalize_nulls: 'null',
temporal_detection_threshold: 0.85,
alias_map_path: './locales/column-aliases.json'
},
caching: {
enabled: true,
ttl_seconds: 300,
key_strategy: 'sha256(query + schema_hash)'
}
};
Quick Start Guide
- Install dependencies:
npm install zod @anthropic-ai/sdk openai (or your preferred inference SDK)
- Define your schema: Copy the
VisualizationSpec Zod schema and adapt field names to match your frontend charting library (Recharts, Chart.js, Victory, etc.)
- Deploy the pipeline: Initialize the
generateVisualizationSpec function with your model endpoints and latency budgets. Configure routing based on your user locale and performance requirements.
- Add preprocessing: Run a lightweight type inference pass on your dataset schema. Pass column metadata (
type, aggregation_hint) alongside the user query to reduce hallucination.
- Test with fallbacks: Simulate schema validation failures and verify the deterministic fallback renders correctly. Lock temperature to
0.1, enable caching, and monitor latency vs accuracy in production.