: string;
mimeType: string;
}
export async function prepareChartImage(file: File): Promise<ImagePayload> {
const bitmap = await createImageBitmap(file);
const canvas = document.createElement('canvas');
const scale = Math.min(1024 / bitmap.width, 1024 / bitmap.height, 1);
canvas.width = Math.round(bitmap.width * scale);
canvas.height = Math.round(bitmap.height * scale);
const ctx = canvas.getContext('2d');
if (!ctx) throw new Error('Canvas context unavailable');
ctx.drawImage(bitmap, 0, 0, canvas.width, canvas.height);
const base64 = canvas.toDataURL('image/jpeg', 0.85).split(',')[1];
return { base64, mimeType: 'image/jpeg' };
}
### Step 2: Forensic Prompt Architecture
The prompt must enforce a sequential analysis procedure. Instead of asking for a verdict, it mandates explicit evaluation of axis baselines, data summation, temporal framing, and linguistic framing. The model is instructed to output reasoning first, then wrap the final structured payload in a JSON code fence.
```typescript
const FORENSIC_PROTOCOL = `You are a data visualization auditor. Analyze the provided chart image through the following sequence:
1. BASELINE CHECK: Identify axis origins. Flag any non-zero Y-axis starts that exaggerate delta.
2. SUMMATION VALIDATION: For pie/donut charts, verify slice percentages total exactly 100%.
3. TEMPORAL FRAMING: Detect truncated time windows, selective date ranges, or missing baseline periods.
4. LINGUISTIC ASSESSMENT: Identify promotional adjectives, absolute claims, or misleading trend descriptors.
5. INTEGRITY SCORING: Assign 0-100 based on severity of detected manipulations.
Output your step-by-step observations in plain text. Then, provide the final assessment inside a JSON code fence matching this schema:
{
"trust_score": number,
"flags": Array<{ severity: "high" | "medium" | "low", description: string }>,
"actual_narrative": string,
"recommended_fix": string
}`;
Step 3: Inference Orchestration
Ollama's /api/chat endpoint handles multimodal input natively. Streaming is disabled to guarantee atomic response capture, which simplifies JSON extraction. Temperature is locked to 0.3 to minimize hallucination while preserving analytical flexibility.
import { z } from 'zod';
const ChartAuditSchema = z.object({
trust_score: z.number().min(0).max(100),
flags: z.array(z.object({
severity: z.enum(['high', 'medium', 'low']),
description: z.string()
})),
actual_narrative: z.string(),
recommended_fix: z.string()
});
export class VisionForensicsClient {
private readonly endpoint: string;
private readonly model: string;
constructor(endpoint = 'http://localhost:11434', model = 'gemma4:e4b') {
this.endpoint = endpoint;
this.model = model;
}
async auditChart(imagePayload: ImagePayload): Promise<z.infer<typeof ChartAuditSchema>> {
const response = await fetch(`${this.endpoint}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: this.model,
messages: [
{ role: 'system', content: FORENSIC_PROTOCOL },
{ role: 'user', content: 'Proceed with the forensic audit.', images: [imagePayload.base64] }
],
stream: false,
options: { temperature: 0.3, num_predict: 4096 }
})
});
if (!response.ok) throw new Error(`Ollama request failed: ${response.statusText}`);
const raw = await response.json();
const fullText = raw.message?.content ?? '';
return this.extractAndValidate(fullText);
}
private extractAndValidate(text: string): z.infer<typeof ChartAuditSchema> {
const jsonMatch = text.match(/```json\s*([\s\S]*?)\s*```/);
if (!jsonMatch) throw new Error('No JSON block found in model response');
const parsed = JSON.parse(jsonMatch[1]);
return ChartAuditSchema.parse(parsed);
}
}
Architectural Rationale
- Vanilla TypeScript over frameworks: Eliminates bundle overhead, simplifies deployment, and aligns with the privacy-first constraint (no external telemetry or analytics).
- Disabled streaming: Streaming fragments the response, making regex extraction of the JSON block unreliable. Atomic responses guarantee the reasoning text and JSON fence are captured together.
- Low temperature (0.3): Analytical tasks require deterministic pattern recognition, not creative generation. Higher temperatures increase false positives on axis truncation and pie summation.
- Zod validation: Raw JSON parsing is fragile. Runtime schema validation catches malformed payloads before they corrupt the UI state.
Pitfall Guide
1. Premature JSON Enforcement
Explanation: Using Ollama's format: 'json' flag forces the model to emit structured output immediately, bypassing intermediate reasoning. Small models lack the capacity to simultaneously reason spatially and format syntactically.
Fix: Remove the format flag. Instruct the model to output plain-text reasoning first, then wrap the final payload in a JSON code fence. Parse the fence post-generation.
2. Base64 Payload Bloat
Explanation: Encoding full-resolution screenshots (e.g., 4K displays) creates massive base64 strings that inflate context windows, increase latency, and degrade token attention distribution.
Fix: Implement client-side canvas resizing to a maximum dimension of 1024px. Use JPEG compression at 0.8-0.85 quality to reduce payload size without losing chart legibility.
3. Ignoring MoE Active Parameter Routing
Explanation: Gemma 4's 26B variant uses Mixture-of-Experts architecture, activating only ~3.8B parameters per forward pass. Developers often assume full model utilization, leading to incorrect latency and memory expectations.
Fix: Profile actual VRAM/RAM consumption during inference. Use the e4b variant for sub-8GB systems and reserve the 26B variant for workstations with 16GB+ unified memory. Adjust num_ctx accordingly.
4. Temperature-Induced Hallucination
Explanation: Setting temperature above 0.5 introduces stochasticity that manifests as fabricated data points, invented trend directions, or inconsistent severity classifications.
Fix: Lock temperature between 0.2 and 0.4 for analytical tasks. Use top_p: 0.9 if additional sampling control is required, but avoid creative generation parameters.
5. Unbounded Context Windows
Explanation: Ollama defaults to 2048 or 4096 context tokens. Complex charts with dense legends, multiple axes, and embedded annotations can exceed this limit, causing silent truncation.
Fix: Explicitly set num_ctx: 8192 in the request options. Monitor token consumption during development and adjust based on chart complexity.
6. Silent JSON Parse Failures
Explanation: Regex extraction of JSON blocks fails when the model outputs markdown formatting, trailing commas, or unescaped quotes. Silent failures break the UI without clear error states.
Fix: Wrap extraction in try/catch. Implement fallback parsing that strips markdown artifacts. Always validate against a runtime schema (Zod, Yup, or io-ts) before consuming the payload.
7. Overlooking Aspect Ratio Distortion
Explanation: Charts stretched horizontally or compressed vertically manipulate perceived velocity and magnitude. Small models often miss this unless explicitly prompted to evaluate spatial proportions.
Fix: Add a dedicated prompt step: "Evaluate visual proportions. Flag charts where aspect ratio distortion exaggerates slope or volume." Include examples of stretched line charts in your test suite.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Internal financial audits with strict data residency | Local Gemma 4 e4b via Ollama | Zero data exfiltration, runs on standard hardware | $0 infrastructure, ~$1200 laptop |
| High-volume marketing compliance screening | Cloud vision API + rule-based prefilter | Throughput requirements exceed local inference capacity | $0.01-0.05 per image, API subscription |
| Executive dashboard with real-time chart validation | Gemma 4 26b on dedicated workstation | MoE architecture balances accuracy and latency for complex layouts | ~$2500 workstation, $0 ongoing |
| Legacy system integration with strict JSON contracts | Reasoning-first prompt + Zod validation layer | Maintains analytical depth while guaranteeing schema compliance | Development overhead, zero runtime cost |
Configuration Template
// forensics.config.ts
export const OLLAMA_CONFIG = {
endpoint: process.env.OLLAMA_URL || 'http://localhost:11434',
model: 'gemma4:e4b', // Switch to gemma4:26b for complex multi-axis charts
options: {
temperature: 0.3,
num_predict: 4096,
num_ctx: 8192,
top_p: 0.9
},
requestTimeout: 30000,
maxRetries: 2
};
export const PROMPT_DIRECTIVES = {
baselineCheck: 'Identify axis origins. Flag any non-zero Y-axis starts that exaggerate delta.',
summationValidation: 'For pie/donut charts, verify slice percentages total exactly 100%.',
temporalFraming: 'Detect truncated time windows, selective date ranges, or missing baseline periods.',
linguisticAssessment: 'Identify promotional adjectives, absolute claims, or misleading trend descriptors.',
spatialProportion: 'Evaluate visual proportions. Flag charts where aspect ratio distortion exaggerates slope or volume.'
};
Quick Start Guide
- Install Ollama & Pull Model: Download Ollama for your OS, then run
ollama pull gemma4:e4b (or gemma4:26b if you have 16GB+ RAM). Start the daemon with ollama serve.
- Initialize Project: Create a TypeScript project, install
zod for schema validation, and scaffold a vanilla HTML/JS frontend with a drag-and-drop image uploader.
- Implement Client Module: Copy the
VisionForensicsClient class and prepareChartImage utility. Configure the Ollama endpoint and model variant in your environment.
- Wire UI to Inference: Attach the uploader to a submit handler that calls
auditChart(). Render the extracted JSON payload into a results panel, displaying trust score, severity flags, and the raw reasoning text in a collapsible section.
- Validate & Iterate: Test against known misleading charts (truncated axes, cherry-picked ranges, impossible totals). Adjust
num_ctx and temperature if the model misses subtle manipulations or hallucinates data points.