= box_size
self.particle_count = particle_count
self._initialize_context()
def _initialize_context(self):
self.simulation.create_state_from_snapshot(
hoomd.Snapshot(device=self.device)
)
self.integrator = md.Integrator(dt=0.005)
self.mc = md.methods.HPMCIntegrationStandard(
seed=12345,
nselect=4
)
self.simulation.operations.integrator = self.integrator
def configure_particles(self, shape_type: str, dimensions: Dict[str, float]) -> bool:
if shape_type not in ("sphere", "cube", "disk", "capsule"):
return False
self.mc.shape.update(shape_type, **dimensions)
return True
def run_equilibration(self, steps: int) -> Dict[str, float]:
self.simulation.run(steps)
return {"accepted": self.mc.n_accept, "rejected": self.mc.n_reject}
### Step 2: Expose Tools via MCP Server
The MCP server acts as a deterministic bridge. It validates inputs, manages simulation state, and returns structured payloads. The agent never calls HOOMD-blue directly.
```typescript
// mcp_server.ts
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
const server = new McpServer({
name: "monte-carlo-orchestrator",
version: "1.0.0"
});
server.tool(
"initialize_particle_system",
"Creates a hard-particle simulation context with validated boundaries",
{
box_size: z.number().positive(),
particle_count: z.number().int().min(10).max(50000),
dimensionality: z.enum(["2D", "3D"])
},
async ({ box_size, particle_count, dimensionality }) => {
const payload = {
method: "initialize",
params: { box_size, particle_count, dimensionality }
};
const result = await callPythonBridge(payload);
return {
content: [{ type: "text", text: JSON.stringify(result) }],
isError: false
};
}
);
server.tool(
"execute_phase_transition",
"Runs Monte Carlo steps with temperature/density ramping",
{
target_density: z.number().min(0.1).max(1.2),
max_steps: z.number().int().min(1000),
convergence_threshold: z.number().min(0.001).max(0.1)
},
async ({ target_density, max_steps, convergence_threshold }) => {
const payload = {
method: "run_transition",
params: { target_density, max_steps, convergence_threshold }
};
const result = await callPythonBridge(payload);
return {
content: [{ type: "text", text: JSON.stringify(result) }],
isError: result.error !== undefined
};
}
);
Step 3: Define the Four-Stage Workflow Skill
The skill encodes the execution contract. It forces the agent to follow a strict progression: Setup β Plan β Execute β Analyze. Each stage has explicit success criteria and state dependencies.
// workflow_skill.ts
export interface SimulationStage {
id: string;
name: string;
required_tools: string[];
validation_rules: string[];
next_stage: string | null;
}
export const monteCarloSkill: SimulationStage[] = [
{
id: "stage_1_setup",
name: "System Initialization",
required_tools: ["initialize_particle_system", "configure_shape"],
validation_rules: ["box_size > 0", "particle_count within bounds", "shape dimensions valid"],
next_stage: "stage_2_planning"
},
{
id: "stage_2_planning",
name: "Parameter Scheduling",
required_tools: ["define_density_ramp", "set_convergence_criteria"],
validation_rules: ["density range monotonic", "steps >= 1000", "threshold < 0.1"],
next_stage: "stage_3_execution"
},
{
id: "stage_3_execution",
name: "Monte Carlo Run",
required_tools: ["execute_phase_transition", "monitor_acceptance_ratio"],
validation_rules: ["acceptance_ratio between 0.3 and 0.5", "no runtime exceptions"],
next_stage: "stage_4_analysis"
},
{
id: "stage_4_analysis",
name: "Phase Characterization",
required_tools: ["compute_radial_distribution", "export_order_parameters"],
validation_rules: ["rdf converges at large r", "order parameters within physical bounds"],
next_stage: null
}
];
Step 4: Agent Orchestration Logic
The agent consumes the skill contract and routes tool calls through the MCP server. It maintains stage state, validates outputs against rules, and halts on failure.
// agent_orchestrator.ts
import { MCPClient } from "@modelcontextprotocol/sdk/client/index.js";
import { monteCarloSkill } from "./workflow_skill.js";
export class SimulationAgent {
private currentStageIndex = 0;
private context: Record<string, any> = {};
constructor(private mcpClient: MCPClient) {}
async executeWorkflow(prompt: string): Promise<Record<string, any>> {
const stage = monteCarloSkill[this.currentStageIndex];
if (!stage) return { status: "complete", context: this.context };
const toolResults = await this.invokeStageTools(stage);
const isValid = this.validateStageOutput(stage, toolResults);
if (!isValid) {
throw new Error(`Stage ${stage.id} validation failed. Halting workflow.`);
}
this.context[stage.id] = toolResults;
this.currentStageIndex++;
return this.executeWorkflow(prompt);
}
private async invokeStageTools(stage: SimulationStage): Promise<any[]> {
const results = [];
for (const toolName of stage.required_tools) {
const result = await this.mcpClient.callTool(toolName, this.context);
results.push(result);
}
return results;
}
private validateStageOutput(stage: SimulationStage, outputs: any[]): boolean {
return stage.validation_rules.every(rule => {
const [field, operator, value] = rule.split(" ");
const actual = outputs.find(r => r[field] !== undefined)?.[field];
if (operator === ">") return actual > parseFloat(value);
if (operator === "<") return actual < parseFloat(value);
return true;
});
}
}
Architecture Rationale:
- MCP Server as Boundary: Prevents the LLM from directly instantiating simulation objects. All state mutations pass through schema-validated endpoints.
- Skill Contract as State Machine: Forces deterministic progression. The agent cannot skip equilibration or jump to analysis without passing validation gates.
- TypeScript Orchestration: Leverages strong typing for stage definitions, tool routing, and validation logic. Python handles the numerical heavy lifting where it excels.
- Why This Works: Scientific simulations fail when agents treat them as text generation tasks. This architecture treats them as stateful pipelines with explicit contracts, matching how computational physicists actually work.
Pitfall Guide
1. Unbounded Parameter Search
Explanation: LLMs will generate density ramps or temperature schedules that exceed physical stability limits, causing simulation crashes or non-convergent trajectories.
Fix: Enforce strict Zod schemas in MCP tool definitions. Add preconditions that reject out-of-range values before they reach the simulation engine.
2. Missing Convergence Validation
Explanation: Agents often assume a simulation completed successfully if no exception is thrown, ignoring acceptance ratios or energy drift.
Fix: Implement a dedicated monitor_acceptance_ratio tool that returns real-time metrics. Block stage progression if the ratio falls outside 0.3β0.5.
3. State Loss Between Stages
Explanation: Workflow stages run in isolated contexts, causing parameter drift or lost neighbor list configurations.
Fix: Maintain a persistent SimulationContext object that serializes state between stages. Pass context IDs through MCP calls instead of reconstructing parameters.
4. Over-Reliance on LLM Math
Explanation: Foundation models struggle with floating-point precision, unit conversions, and dimensional analysis.
Fix: Delegate all calculations to deterministic Python functions. The agent should only specify high-level targets (e.g., "target density 0.85"), not compute intermediate values.
5. Ignoring Boundary Conditions
Explanation: Hard-particle systems require explicit periodic boundary handling. Omitting this causes artificial clustering or particle escape.
Fix: Encode boundary conditions as mandatory tool parameters. Validate them during stage 1 setup and reject configurations that lack periodic flags.
Explanation: MCP tools may return empty payloads or malformed JSON when simulations hit numerical instability.
Fix: Implement structured error responses with retry logic. Log the exact simulation step where failure occurred and expose it to the agent for parameter adjustment.
7. Prompt Drift in Long Workflows
Explanation: As workflows extend across multiple stages, the agent's system prompt loses focus, causing it to repeat steps or skip analysis.
Fix: Use stage-specific system prompts with explicit success criteria. Inject a compact summary of previous stage outputs into each new prompt to maintain context continuity.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Rapid prototyping of particle shapes | MCP Agent with interactive mode | Allows human-in-the-loop parameter tuning without rewriting simulation code | Low compute, moderate token usage |
| Production phase-transition studies | MCP Agent with autoresearch mode | Ensures deterministic execution, reproducible results, and automated convergence checks | High compute, low token overhead |
| Educational demonstrations | Raw LLM prompting with guided templates | Lower infrastructure overhead, suitable for teaching workflow concepts | Minimal compute, high token usage |
| Multi-scale materials design | MCP Agent + traditional scripting hybrid | Combines agent flexibility for high-level planning with scripted precision for numerical kernels | Moderate compute, balanced token usage |
Configuration Template
# mcp_simulation_config.yaml
server:
name: "hard-particle-monte-carlo"
transport: "stdio"
version: "2.1.0"
tools:
- name: "initialize_particle_system"
schema:
box_size: { type: "number", min: 1.0, max: 100.0 }
particle_count: { type: "integer", min: 50, max: 20000 }
dimensionality: { type: "enum", values: ["2D", "3D"] }
validation: "box_volume >= particle_count * min_particle_volume"
- name: "execute_phase_transition"
schema:
target_density: { type: "number", min: 0.1, max: 1.1 }
max_steps: { type: "integer", min: 5000 }
convergence_threshold: { type: "number", min: 0.001, max: 0.05 }
validation: "acceptance_ratio in [0.3, 0.5] during first 10% of steps"
workflow:
stages:
- id: "setup"
tools: ["initialize_particle_system", "configure_shape"]
gate: "all_tools_succeed"
- id: "planning"
tools: ["define_density_ramp", "set_convergence_criteria"]
gate: "parameters_within_bounds"
- id: "execution"
tools: ["run_monte_carlo", "monitor_acceptance"]
gate: "convergence_reached"
- id: "analysis"
tools: ["compute_rdf", "export_order_parameters"]
gate: "metrics_valid"
agent:
mode: "autoresearch"
max_retries: 3
context_serialization: "json"
prompt_strategy: "stage_isolated"
Quick Start Guide
- Install Dependencies: Set up
hoomd-blue, @modelcontextprotocol/sdk, and zod in your environment. Ensure Python 3.10+ and Node.js 18+ are available.
- Launch MCP Server: Run the Python simulation wrapper alongside the TypeScript MCP server. Verify tool discovery by calling
initialize_particle_system with valid bounds.
- Load Workflow Skill: Import the four-stage skill contract into your agent runtime. Configure stage-specific system prompts with validation rules and success criteria.
- Execute Workflow: Pass an end-to-end prompt (e.g., "Simulate 3D cube packing at density 0.75 and analyze radial distribution"). The agent will route through setup, planning, execution, and analysis stages automatically.
- Validate Output: Check convergence logs, acceptance ratios, and order parameters. If any stage fails validation, adjust parameters and rerun using the autoresearch mode for deterministic recovery.