deParserModule.loadSpec(spec)));
// 5. Preload character and line recognition models into memory
await CaptureVisionRouter.appendDLModelBuffer([
'MRZCharRecognition',
'MRZTextLineRecognition'
]);
// 6. Instantiate the routing engine
this.cvr = await CaptureVisionRouter.createInstance();
this.isReady = true;
}
}
**Architecture Rationale:**
- `CoreModule.loadWasm` is called before any router or parser creation to prevent race conditions during model compilation.
- MRZ specifications are loaded in parallel to reduce initialization time. Each spec defines character sets, checksum algorithms, and field positions for specific document types.
- Deep learning models are preloaded via `appendDLModelBuffer` to avoid first-capture latency spikes. The runtime caches these in WASM memory rather than reloading from disk.
### Phase 2: Task Chaining via JSON Configuration
The pipeline uses a declarative JSON template to chain document detection, MRZ extraction, and portrait zone identification into a single capture call. This eliminates the need for sequential API calls and ensures all outputs share the same coordinate space.
```json
{
"CaptureVisionTemplates": [
{
"Name": "UnifiedIdentityScan",
"ImageROIProcessingNameArray": ["roi_identity_doc"],
"SemanticProcessingNameArray": ["sp_identity_doc"],
"OutputOriginalImage": 1,
"MaxParallelTasks": 0,
"Timeout": 15000
}
],
"TargetROIDefOptions": [
{
"Name": "roi_identity_doc",
"TaskSettingNameArray": [
"task_mrz_extraction",
"task_document_normalization"
]
}
]
}
The template is loaded during pipeline setup:
async configure(templateUrl: string): Promise<void> {
if (!this.cvr) throw new Error('Pipeline not initialized');
const cacheBuster = `?v=${Date.now()}`;
const result = await this.cvr.initSettings(templateUrl + cacheBuster);
if (result.errorCode !== 0) {
throw new Error(`Template load failed: ${result.errorString}`);
}
const valid = await this.cvr.checkTemplateNameValidity('UnifiedIdentityScan');
if (!valid) {
const available = await this.cvr.getTemplateNames();
throw new Error(`Template missing. Available: ${available.join(', ')}`);
}
}
Architecture Rationale:
OutputOriginalImage: 1 ensures the raw frame is retained for portrait cropping without requiring a second capture.
MaxParallelTasks: 0 lets the runtime auto-scale based on device cores, preventing thread exhaustion on mobile hardware.
- Cache-busting the template URL prevents stale configuration from being served by aggressive browser caches during development.
Once initialized, the pipeline accepts image sources (file input, camera frame, or clipboard paste) and routes them through the unified template. The result object contains multiple item types that must be filtered and processed independently.
interface CaptureOutput {
rawMrz: string[];
parsedFields: Record<string, string>;
documentQuad: { points: { x: number; y: number }[] } | null;
}
async executeCapture(source: HTMLImageElement | HTMLCanvasElement | string): Promise<CaptureOutput> {
if (!this.cvr || !this.parser) throw new Error('Pipeline not ready');
// Clear residual state from previous runs
await this.cvr.clearIntermediateResults();
const result = await this.cvr.capture(source, 'UnifiedIdentityScan');
const items = result.items || [];
const output: CaptureOutput = {
rawMrz: [],
parsedFields: {},
documentQuad: null
};
for (const item of items) {
switch (item.type) {
case EnumCapturedResultItemType.CRIT_TEXT_LINE:
output.rawMrz.push(item.text.trim());
break;
case EnumCapturedResultItemType.CRIT_DETECTED_QUAD:
output.documentQuad = item;
break;
}
}
// Concatenate MRZ lines without separators for specification compliance
const concatenated = output.rawMrz.join('');
if (concatenated.length > 0) {
const parseResult = await this.parser.parse(concatenated);
output.parsedFields = this.flattenParseResult(parseResult);
}
return output;
}
private flattenParseResult(result: any): Record<string, string> {
const flat: Record<string, string> = {};
if (result?.items) {
result.items.forEach((field: any) => {
flat[field.name] = field.value || '';
});
}
return flat;
}
Architecture Rationale:
clearIntermediateResults() is critical. The IdentityProcessor relies on intermediate feature maps from the current capture cycle. Residual data causes portrait zone misalignment.
- MRZ specifications require exact character counts (e.g., TD3 passports = 88 characters). Joining lines without delimiters ensures checksum validation passes.
- The
flattenParseResult utility normalizes the nested parser output into a predictable key-value structure for UI binding or API submission.
Document boundaries and portrait crops share the same coordinate system. The IdentityProcessor consumes the capture result to locate the precise facial region, then renders overlays and crops onto separate canvas elements.
import { IdentityProcessor } from 'dynamsoft-identity-processor';
export class IdentityRenderer {
private processor: IdentityProcessor;
private overlayCtx: CanvasRenderingContext2D;
private cropCtx: CanvasRenderingContext2D;
constructor(cvr: CaptureVisionRouter, overlayCanvas: HTMLCanvasElement, cropCanvas: HTMLCanvasElement) {
this.processor = new IdentityProcessor();
this.processor.setCVR(cvr);
this.overlayCtx = overlayCanvas.getContext('2d', { willReadFrequently: true })!;
this.cropCtx = cropCanvas.getContext('2d')!;
}
async renderCaptureResult(
captureOutput: CaptureOutput,
sourceImage: HTMLImageElement
): Promise<void> {
// Clear previous drawings
this.overlayCtx.clearRect(0, 0, this.overlayCtx.canvas.width, this.overlayCtx.canvas.height);
this.cropCtx.clearRect(0, 0, this.cropCtx.canvas.width, this.cropCtx.canvas.height);
// Draw document boundary
if (captureOutput.documentQuad) {
this.drawQuadOverlay(captureOutput.documentQuad);
}
// Extract and render portrait
const portraitZone = await this.processor.findPortraitZone(
sourceImage,
captureOutput.documentQuad
);
if (portraitZone) {
this.cropPortrait(sourceImage, portraitZone);
}
}
private drawQuadOverlay(quad: { points: { x: number; y: number }[] }): void {
const ctx = this.overlayCtx;
ctx.strokeStyle = '#00ff88';
ctx.lineWidth = 3;
ctx.beginPath();
quad.points.forEach((p, i) => {
i === 0 ? ctx.moveTo(p.x, p.y) : ctx.lineTo(p.x, p.y);
});
ctx.closePath();
ctx.stroke();
}
private cropPortrait(source: HTMLImageElement, zone: { x: number; y: number; width: number; height: number }): void {
const ctx = this.cropCtx;
const canvas = ctx.canvas;
canvas.width = zone.width;
canvas.height = zone.height;
ctx.drawImage(source, zone.x, zone.y, zone.width, zone.height, 0, 0, zone.width, zone.height);
}
}
Architecture Rationale:
- Separating overlay and crop canvases prevents compositing artifacts and allows independent scaling for UI display vs. downstream API submission.
willReadFrequently: true on the overlay context optimizes repeated drawing operations for camera preview loops.
- Portrait extraction uses the document quad as a spatial constraint, reducing false positives from background faces or secondary documents.
Pitfall Guide
1. WASM Module Race Conditions
Explanation: Calling capture() before loadWasm() completes causes silent failures or undefined method errors. The runtime does not throw explicit initialization errors by default.
Fix: Wrap all WASM loading in a dedicated bootstrap() method and gate capture execution behind a isReady flag. Add explicit await chains rather than parallelizing initialization steps.
2. Template Name Validation Omission
Explanation: JSON templates may fail to load due to syntax errors or missing task definitions, but the router continues operating with fallback settings that lack MRZ or portrait capabilities.
Fix: Always call checkTemplateNameValidity() after initSettings(). Log available templates on failure to accelerate debugging. Never assume template loading succeeds silently.
3. MRZ Line Concatenation with Delimiters
Explanation: Joining MRZ lines with spaces, newlines, or commas breaks checksum validation. Specifications like ICAO 9303 require exact character positioning without separators.
Fix: Use .join('') on trimmed lines. Validate length before parsing (e.g., 44 for TD2, 88 for TD3). Reject malformed strings early to prevent parser exceptions.
4. Canvas Coordinate Scaling Mismatches
Explanation: Displaying images at CSS-scaled dimensions while drawing overlays on unscaled canvas coordinates results in misaligned borders and cropped portraits.
Fix: Synchronize canvas width/height attributes with the rendered image dimensions. Apply a consistent scaling factor to all coordinate outputs before drawing. Use getBoundingClientRect() for accurate overlay positioning.
Explanation: The IdentityProcessor caches feature maps from previous captures. Reusing the router without clearing state causes portrait zones to align with old document positions.
Fix: Call cvr.clearIntermediateResults() immediately before each capture() invocation. This is non-negotiable for camera preview loops or rapid successive uploads.
6. License Feature Gating
Explanation: MRZ parsing and portrait extraction require specific license entitlements. Using a base-tier key results in empty results or LICENSE_INVALID errors during template loading.
Fix: Verify license capabilities during initialization. Implement graceful fallbacks (e.g., disable portrait crop button) when MRZ features are unavailable. Never hardcode trial keys in production builds.
7. EXIF Orientation Ignorance
Explanation: Mobile cameras embed rotation metadata in JPEG headers. Browsers ignore EXIF orientation when drawing to canvas, causing upside-down or sideways document captures.
Fix: Use createImageBitmap() with imageOrientation: 'from-image' or preprocess images with a library like exifr to normalize orientation before passing to the capture router.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| High-volume KYC (>10k/day) | Client-side WASM pipeline | Eliminates backend compute costs, scales with user count, reduces latency | Infrastructure cost drops ~60-80% |
| Offline-capable field app | Local template + cached DL models | No network dependency, deterministic performance, works in low-connectivity zones | Zero bandwidth cost, higher initial bundle size |
| Multi-document support (Passport + ID + Visa) | Unified JSON template with spec array | Single capture returns all outputs, reduces code complexity, ensures coordinate alignment | Development time reduced ~40% vs. chained services |
Configuration Template
{
"CaptureVisionTemplates": [
{
"Name": "UnifiedIdentityScan",
"ImageROIProcessingNameArray": ["roi_identity_doc"],
"SemanticProcessingNameArray": ["sp_identity_doc"],
"OutputOriginalImage": 1,
"MaxParallelTasks": 0,
"Timeout": 15000
}
],
"TargetROIDefOptions": [
{
"Name": "roi_identity_doc",
"TaskSettingNameArray": [
"task_mrz_extraction",
"task_document_normalization"
]
}
],
"ImageROIProcessingOptions": [
{
"Name": "roi_identity_doc",
"TaskSettingNameArray": [
"task_mrz_extraction",
"task_document_normalization"
]
}
],
"TaskSettingOptions": [
{
"Name": "task_mrz_extraction",
"ExpectedQREcosystem": "MRZ",
"Timeout": 10000
},
{
"Name": "task_document_normalization",
"DetectDocumentBorderModes": [
{
"Mode": "DM_GENERAL_WIDTH_CONCENTRATION"
}
],
"Timeout": 10000
}
]
}
Quick Start Guide
- Install Dependencies: Add
dynamsoft-capture-vision-bundle@3.4.2001 to your project via npm or CDN. Ensure your build target supports ES modules and WASM loading.
- Bootstrap the Pipeline: Create an
IdentityPipeline instance, call bootstrap() with a valid license key, and await WASM compilation. Verify readiness before enabling UI controls.
- Load Configuration: Pass the JSON template URL to
configure(). Validate template availability and cache-bust the request during development.
- Execute Capture: Bind file input, camera stream, or paste events to
executeCapture(). Process the returned CaptureOutput to populate UI fields, draw overlays, and trigger portrait cropping.
- Deploy & Monitor: Serve via HTTPS (required for camera access and WASM security policies). Monitor
capture() latency and error codes in production to detect license expiration or template drift.