uild forward and reverse lookup maps for function references. It also monitors constant value declarations and matches their offsets against data segments identified in Pass 1. This creates a reliable map of string and data references within the code.
// Core streaming parser implementation
import { Parser, Section } from 'wasmparser';
interface ModuleManifest {
types: Map<number, TypeSignature>;
imports: Map<number, ImportDescriptor>;
exports: Map<number, ExportDescriptor>;
dataSegments: Map<number, DataSegment>;
customSections: Map<string, Uint8Array>;
}
interface AnalysisResult {
manifest: ModuleManifest;
functionCalls: Map<number, Set<number>>;
dataRefs: Map<number, Set<number>>;
}
export class WasmStreamAnalyzer {
private parser: Parser;
private manifest: ModuleManifest;
private results: AnalysisResult;
constructor(binary: Uint8Array) {
this.parser = new Parser(binary);
this.manifest = this.initManifest();
this.results = this.initResults();
}
public analyze(): AnalysisResult {
// Pass 1: Extract metadata
this.scanMetadata();
// Pass 2: Traverse bodies and resolve references
this.traverseBodies();
return this.results;
}
private scanMetadata(): void {
const events = this.parser.parseAll();
for (const event of events) {
switch (event.section) {
case Section.Type:
this.manifest.types = this.extractTypes(event.payload);
break;
case Section.Import:
this.manifest.imports = this.extractImports(event.payload);
break;
case Section.Export:
this.manifest.exports = this.extractExports(event.payload);
break;
case Section.Data:
this.manifest.dataSegments = this.extractData(event.payload);
break;
case Section.Custom:
this.manifest.customSections.set(event.name, event.payload);
break;
}
}
}
private traverseBodies(): void {
// Re-parse or use buffered body data to scan instructions
// Map call targets and data offsets
// Example logic for call mapping:
const bodyEvents = this.parser.parseBodies();
for (const body of bodyEvents) {
const funcIndex = body.index;
const calls = new Set<number>();
const dataRefs = new Set<number>();
for (const instr of body.instructions) {
if (instr.opcode === 'call') {
calls.add(instr.immediate);
}
if (instr.opcode === 'i32.const') {
const offset = instr.immediate;
if (this.manifest.dataSegments.has(offset)) {
dataRefs.add(offset);
}
}
}
this.results.functionCalls.set(funcIndex, calls);
this.results.dataRefs.set(funcIndex, dataRefs);
}
}
// Helper methods for extraction omitted for brevity
}
2. Semantic Lifting and Control Flow Reconstruction
Wasm instructions operate on a stack, pushing and popping values. To make this readable, we implement a lifter that translates stack operations into C-like expressions. The lifter maintains a virtual stack of expression strings. When an arithmetic or logical instruction is encountered, the lifter pops the top expressions, combines them into a new expression, and pushes the result back.
Control flow reconstruction requires identifying "leaders"—instructions that start basic blocks. Leaders include function entry points and instructions immediately following branches, loops, or returns. The engine constructs a directed graph where nodes are basic blocks and edges represent control flow transitions.
// Expression lifter and CFG builder
interface BasicBlock {
id: number;
offset: number;
instructions: string[];
successors: number[];
predecessors: number[];
}
export class DecompilationEngine {
private virtualStack: string[] = [];
private locals: Map<number, string> = new Map();
private cfg: Map<number, BasicBlock> = new Map();
private currentBlockId: number = 0;
public liftFunction(body: Instruction[], typeSig: TypeSignature): string[] {
const output: string[] = [];
this.virtualStack = [];
this.locals = new Map();
this.cfg = new Map();
this.currentBlockId = 0;
// Initialize block
this.startBlock(0);
for (const instr of body) {
this.processInstruction(instr);
// Check for control flow changes
if (this.isControlFlow(instr)) {
this.endBlock();
if (instr.opcode !== 'return') {
this.startBlock(instr.nextOffset);
}
}
}
return this.generateDecompiledOutput();
}
private processInstruction(instr: Instruction): void {
switch (instr.opcode) {
case 'i32.add': {
const right = this.virtualStack.pop()!;
const left = this.virtualStack.pop()!;
const result = `(${left} + ${right})`;
this.virtualStack.push(result);
break;
}
case 'local.get': {
const varName = this.locals.get(instr.immediate) || `var_${instr.immediate}`;
this.virtualStack.push(varName);
break;
}
case 'local.set': {
const value = this.virtualStack.pop()!;
const varName = this.locals.get(instr.immediate) || `var_${instr.immediate}`;
this.locals.set(instr.immediate, varName);
// Emit assignment
this.appendToCurrentBlock(`${varName} = ${value};`);
break;
}
case 'call': {
const args = this.popArgs(instr.immediate);
const funcName = this.resolveFunctionName(instr.immediate);
const callExpr = `${funcName}(${args.join(', ')})`;
this.virtualStack.push(callExpr);
break;
}
// Handle other opcodes...
}
}
private popArgs(typeIndex: number): string[] {
const sig = this.typeSignatures.get(typeIndex);
const count = sig.params.length;
const args: string[] = [];
for (let i = 0; i < count; i++) {
args.unshift(this.virtualStack.pop()!);
}
return args;
}
private startBlock(offset: number): void {
const block: BasicBlock = {
id: this.currentBlockId++,
offset,
instructions: [],
successors: [],
predecessors: []
};
this.cfg.set(block.id, block);
}
private endBlock(): void {
// Finalize block logic
}
}
3. Concurrent Frontend Architecture
To prevent analysis tasks from freezing the UI, all heavy computation must be offloaded to a Web Worker. The frontend uses a singleton worker manager that lazily initializes the analysis engine. Communication occurs via a strict message protocol using transferable objects to minimize serialization overhead.
// Worker bridge for concurrent analysis
interface WorkerMessage {
id: string;
type: 'ANALYZE' | 'LIFT' | 'ERROR';
payload: any;
}
export class AnalysisWorkerBridge {
private worker: Worker;
private pendingRequests: Map<string, (result: any) => void> = new Map();
constructor() {
this.worker = new Worker(new URL('./analysis.worker.ts', import.meta.url));
this.worker.onmessage = this.handleWorkerMessage.bind(this);
}
public async analyzeBinary(binary: Uint8Array): Promise<AnalysisResult> {
return new Promise((resolve, reject) => {
const id = crypto.randomUUID();
this.pendingRequests.set(id, resolve);
// Use transferable to avoid copying large binary
this.worker.postMessage(
{ id, type: 'ANALYZE', payload: binary },
[binary.buffer]
);
// Timeout handling omitted for brevity
});
}
private handleWorkerMessage(event: MessageEvent): void {
const { id, type, payload } = event.data;
const resolver = this.pendingRequests.get(id);
if (resolver) {
if (type === 'ERROR') {
resolver.reject(new Error(payload));
} else {
resolver(payload);
}
this.pendingRequests.delete(id);
}
}
}
4. High-Performance Rendering
Rendering thousands of lines of disassembly requires virtualization. The UI calculates the visible viewport and only mounts DOM nodes for the lines currently in view. As the user scrolls, nodes are recycled and repopulated. For control flow graphs, we use a library like React Flow with a custom layout engine. Basic blocks are rendered as nodes containing syntax-highlighted instruction lists, allowing users to navigate the graph interactively.
Pitfall Guide
-
Memory Explosion via Monolithic Parsing
- Explanation: Loading the entire Wasm binary into a JavaScript object graph consumes excessive memory. A 10MB binary can easily require 100MB+ of heap space.
- Fix: Use a streaming parser like
wasmparser. Process events incrementally and discard raw binary data as soon as metadata is extracted.
-
UI Thread Blocking
- Explanation: Running analysis or lifting algorithms on the main thread causes frame drops and unresponsiveness. Users perceive the app as frozen.
- Fix: Offload all analysis to a Web Worker. Use
postMessage with transferable objects for zero-copy data transfer.
-
Incorrect Indirect Call Resolution
- Explanation: Indirect calls (
call_indirect) use type indices to dispatch functions. Failing to map these indices to actual function signatures results in broken control flow and missing cross-references.
- Fix: In Pass 1, build a complete map of type signatures. During Pass 2, resolve indirect calls by matching the type index against the map to identify potential targets.
-
Stack Imbalance in Lifter
- Explanation: The virtual stack can underflow if the lifter assumes more operands than are available, or overflow if results are not consumed. This leads to crashes or incorrect decompilation.
- Fix: Implement strict validation in the lifter. Check stack depth before popping. Handle unreachable code paths gracefully.
-
DOM Overload from Text Rendering
- Explanation: Appending hundreds of thousands of DOM nodes for disassembly text causes severe rendering lag and memory usage.
- Fix: Implement windowing/virtualization. Only render items within the visible scroll container. Recycle DOM elements during scroll events.
-
Worker Communication Bottlenecks
- Explanation: Sending large payloads via
postMessage without transferables forces the browser to clone data, doubling memory usage and adding latency.
- Fix: Always use the second argument of
postMessage to specify transferable objects (e.g., ArrayBuffer). Ensure the sender no longer accesses the transferred buffer.
-
Ignoring Custom Sections
- Explanation: Wasm binaries often contain custom sections with debug names, DWARF info, or metadata. Ignoring these loses valuable context for reverse engineering.
- Fix: Parse custom sections in Pass 1. Store them in the manifest and expose them to the UI for display. Handle name sections to restore function and variable names.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Binary < 5MB | Monolithic AST | Simpler implementation, negligible memory overhead | Low dev cost |
| Binary > 5MB | Streaming Two-Pass | Prevents memory crashes, scales linearly | Moderate dev cost |
| Interactive UI Required | Web Worker + Virtualization | Maintains 60fps, prevents UI freeze | High dev cost |
| Automated Auditing | Headless CLI | Integrates with pipelines, no UI overhead | Low dev cost |
| Complex Indirect Calls | Type-Indexed Resolution | Accurate control flow reconstruction | Moderate dev cost |
Configuration Template
Rust Analysis Crate (Cargo.toml)
[package]
name = "wasm-analyzer-core"
version = "0.1.0"
edition = "2021"
[dependencies]
wasmparser = "0.114"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
[lib]
crate-type = ["cdylib", "rlib"]
Frontend Worker Setup (vite.config.ts)
import { defineConfig } from 'vite';
import react from '@vitejs/plugin-react';
export default defineConfig({
plugins: [react()],
build: {
target: 'esnext',
rollupOptions: {
output: {
manualChunks: {
worker: ['./src/workers/analysis.worker.ts'],
},
},
},
},
worker: {
format: 'es',
},
});
Quick Start Guide
-
Initialize Project: Create a new TypeScript project with Vite and install wasmparser and react-flow.
npm create vite@latest wasm-workbench -- --template react-ts
cd wasm-workbench
npm install wasmparser react-flow
-
Setup Worker: Create src/workers/analysis.worker.ts and implement the WasmStreamAnalyzer logic. Expose a message handler that accepts binary data and returns analysis results.
-
Build UI Shell: Create a React component with a file drop zone. Use AnalysisWorkerBridge to send the dropped binary to the worker. Display a loading indicator while analysis is in progress.
-
Implement Views: Add a virtualized list component for disassembly output. Add a React Flow canvas for the control flow graph. Connect the worker results to these views using state management.
-
Add Interactivity: Implement click handlers on function names to navigate the disassembly view. Add hover tooltips for variable context. Ensure state synchronization across views.