nce", "caseStudy"]),
publishDate: z.coerce.date(),
tags: z.array(z.string()).default([]),
requiresDisclaimer: z.boolean().default(false),
sponsored: z.boolean().default(false),
}),
});
export const collections = { productDocs };
**Architecture Rationale:**
- `z.enum` prevents typos and enforces a closed set of categories. Changing a category name becomes a deliberate schema migration, not a silent drift.
- `z.coerce.date` normalizes `YYYY-MM-DD` strings into JavaScript `Date` objects, eliminating manual parsing in templates.
- `.default(false)` ensures fields are omissible in YAML while guaranteeing a predictable type at runtime. This prevents `undefined` checks throughout the rendering layer.
### Step 2: Enforce Cross-Field Dependencies with `.refine()`
When two fields are logically coupled, `.refine()` validates the relationship after the base object is constructed. This is ideal for conditional requirements.
```ts
const productDocs = defineCollection({
loader: glob({ /* ... */ }),
schema: z
.object({
title: z.string().min(1),
docType: z.enum(["tutorial", "reference", "caseStudy"]),
sponsored: z.boolean().default(false),
// other fields...
})
.refine(
(data) => (data.docType === "caseStudy") === data.sponsored,
{
message: "sponsored must be true if and only if docType is 'caseStudy'",
path: ["sponsored"],
}
),
});
Architecture Rationale:
The boolean equality check (A === B) === sponsored is functionally equivalent to XNOR. It reads cleanly months later and fails fast when the relationship breaks. The path array directs the error to the exact field, making CI output actionable. Use .refine() for single, atomic relationships. It keeps the schema lightweight and avoids unnecessary context overhead.
Step 3: Handle Complex Constraints with .superRefine()
When multiple independent rules apply to the same object, or when you need granular error reporting, .superRefine() provides access to the validation context.
const productDocs = defineCollection({
loader: glob({ /* ... */ }),
schema: z
.object({
title: z.string().min(1),
docType: z.enum(["tutorial", "reference", "caseStudy"]),
sponsored: z.boolean().default(false),
isDraft: z.boolean().default(false),
lastReviewed: z.coerce.date().optional(),
})
.superRefine((data, ctx) => {
if (data.docType === "caseStudy" && !data.sponsored) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "caseStudy documents must set sponsored: true",
path: ["sponsored"],
});
}
if (data.isDraft && data.lastReviewed) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "draft documents should not include lastReviewed",
path: ["lastReviewed"],
});
}
}),
});
Architecture Rationale:
.superRefine() allows multiple ctx.addIssue() calls, each targeting a specific field. This prevents error masking and provides precise CI feedback. Reserve it for scenarios where .refine() would require convoluted boolean logic or when you need to attach multiple independent constraints to a single schema node.
Step 4: Validate Nested Structured Data
Frontmatter is the ideal source for JSON-LD generation because Zod enforces the shape before templates render. Define nested objects for FAQ and HowTo structures.
structuredData: z
.object({
faq: z
.array(
z.object({
question: z.string().min(1),
answer: z.string().min(1),
})
)
.optional(),
steps: z
.array(
z.object({
title: z.string(),
instruction: z.string(),
mediaUrl: z.string().url().optional(),
})
)
.optional(),
})
.optional(),
Architecture Rationale:
Nested z.object and z.array combinations enforce strict typing for structured data. Missing answer fields, empty strings, or malformed URLs fail the build. This eliminates runtime JSON-LD generation errors and ensures search engines receive valid markup. The .optional() wrapper allows documents to omit structured data entirely without breaking the schema.
Step 5: Bridge the Frontmatter-Body Gap
Zod only validates frontmatter. Google's structured data guidelines require that JSON-LD content has visible counterparts in the rendered body. A separate validation script closes this gap.
// scripts/validate-body-parity.ts
import { readFile } from "node:fs/promises";
import { parse as parseYaml } from "yaml";
import { glob } from "tinyglobby";
async function checkParity() {
const files = await glob("src/content/docs/**/*.md");
const violations: string[] = [];
for (const file of files) {
const raw = await readFile(file, "utf-8");
const match = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/.exec(raw);
if (!match) continue;
const frontmatter = parseYaml(match[1]);
const body = match[2].replace(/\s+/g, " ").toLowerCase();
if (Array.isArray(frontmatter.structuredData?.faq)) {
for (const [i, item] of frontmatter.structuredData.faq.entries()) {
const normalized = item.question.replace(/\s+/g, " ").toLowerCase();
if (!body.includes(normalized)) {
violations.push(`${file}: faq[${i}].question missing from body`);
}
}
}
}
if (violations.length > 0) {
console.error("Body parity violations detected:");
violations.forEach((v) => console.error(` β’ ${v}`));
process.exit(1);
}
}
checkParity();
Architecture Rationale:
This script runs as a pre-commit hook or CI step. It extracts frontmatter via regex, parses YAML, and performs case-insensitive substring matching against the body. It does not validate semantic correctness (that remains a human review task), but it guarantees structural parity. The separation of concerns keeps Zod focused on type safety while the script handles content alignment.
Pitfall Guide
1. Overusing .refine() for Independent Rules
Explanation: Chaining multiple .refine() calls on the same object creates brittle validation logic and obscures error paths.
Fix: Consolidate independent constraints into a single .superRefine() block. Use .refine() only for direct two-field relationships.
2. Ignoring Astro's Content Cache
Explanation: Astro caches parsed content in .astro/. Schema changes often require a cache clear, otherwise builds succeed with stale data.
Fix: Run astro dev --force or delete .astro/content.cache.json after modifying content.config.ts. Document this in team onboarding.
3. Date Coercion Edge Cases
Explanation: z.coerce.date() accepts loose formats but may silently parse invalid strings like "2026-13-45" into Invalid Date.
Fix: Use z.string().datetime({ offset: true }) for strict ISO 8601 validation, or add a .refine() check for !isNaN(new Date(data.publishDate).getTime()).
4. Default Values Masking Missing Data
Explanation: .default(false) or .default([]) hides intentional omissions. A missing field becomes indistinguishable from an explicit false.
Fix: Use .optional() when omission is valid. Reserve .default() for fields that must always resolve to a predictable type at render time.
5. Body-Structure Mismatch
Explanation: Frontmatter passes validation, but the rendered body lacks corresponding headings or text. Search engines flag this as structured data mismatch.
Fix: Implement the substring parity script outlined in Step 5. Run it in CI before deployment. Accept that semantic alignment requires human review.
6. Circular Validation Logic
Explanation: Rules that reference each other (e.g., A requires B, B requires A) create infinite validation loops or ambiguous error messages.
Fix: Keep rules atomic. Validate dependencies in one direction. Use .superRefine() to check state combinations rather than chaining mutual requirements.
7. Treating Validation as Editorial Replacement
Explanation: Schema enforcement catches syntax, not meaning. A technically valid document can still contain misleading claims or poor tone.
Fix: Adopt the three-layer architecture: Schema (build) β Lint/Parity Script (CI) β Human Review (pre-publish). Never push judgment calls into Zod.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Two fields must match or oppose | .refine() | Lightweight, single-responsibility, clear error path | Low |
| Multiple independent constraints on one object | .superRefine() | Prevents error masking, allows per-field messaging | Medium |
| Structured data shape enforcement | Nested z.object + z.array | Guarantees JSON-LD validity before template rendering | Low |
| Frontmatter-body alignment | External Node.js parity script | Zod cannot access MDX body; script bridges gap | Medium (CI runtime) |
| Semantic correctness / tone / claims | Human editorial review | Validation cannot judge meaning or accuracy | High (labor) |
| Date field validation | z.coerce.date() + .refine() for NaN check | Prevents silent invalid dates while keeping YAML friendly | Low |
Configuration Template
// src/content.config.ts
import { defineCollection, z } from "astro:content";
import { glob } from "astro/loaders";
const productDocs = defineCollection({
loader: glob({
pattern: "**/[^_]*.{md,mdx}",
base: "./src/content/docs",
}),
schema: z
.object({
title: z.string().min(1),
docType: z.enum(["tutorial", "reference", "caseStudy"]),
publishDate: z.coerce.date(),
tags: z.array(z.string()).default([]),
sponsored: z.boolean().default(false),
isDraft: z.boolean().default(false),
lastReviewed: z.coerce.date().optional(),
structuredData: z
.object({
faq: z
.array(
z.object({
question: z.string().min(1),
answer: z.string().min(1),
})
)
.optional(),
steps: z
.array(
z.object({
title: z.string(),
instruction: z.string(),
mediaUrl: z.string().url().optional(),
})
)
.optional(),
})
.optional(),
})
.refine(
(data) => (data.docType === "caseStudy") === data.sponsored,
{
message: "sponsored must be true if and only if docType is 'caseStudy'",
path: ["sponsored"],
}
)
.superRefine((data, ctx) => {
if (data.isDraft && data.lastReviewed) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "draft documents should not include lastReviewed",
path: ["lastReviewed"],
});
}
}),
});
export const collections = { productDocs };
Quick Start Guide
- Install Dependencies: Run
npm install zod yaml tinyglobby to add validation and parsing utilities.
- Create Configuration File: Add
src/content.config.ts using the Configuration Template above. Adjust docType enum and field names to match your editorial model.
- Add Parity Script: Save the body-parity validator as
scripts/validate-body-parity.ts. Add "validate:parity": "tsx scripts/validate-body-parity.ts" to package.json.
- Hook into CI/Pre-commit: Configure your version control or CI pipeline to run
astro build && npm run validate:parity on pull requests. Fail the pipeline on exit code 1.
- Verify Enforcement: Create a test document with a missing required field or mismatched cross-field value. Run
astro build and confirm the build fails with a precise error path. Clear .astro/ cache if the error does not appear.