SearchIntent = 'informational' | 'transactional' | 'navigational' | 'commercial_investigation';
export interface RawKeyword {
term: string;
volume: number;
difficulty: number;
cpc?: number;
source: 'gsc' | 'semrush' | 'ahrefs';
}
export interface NormalizedKeyword extends RawKeyword {
id: string;
normalizedTerm: string;
intent: SearchIntent;
entity: string;
priorityScore: number;
}
#### Step 2: Intent Classification and Clustering
Keywords must be clustered based on semantic similarity and shared intent. This prevents cannibalization by ensuring only one URL targets a specific cluster. We use a deterministic clustering algorithm based on term similarity and intent matching.
```typescript
// src/seo/engine/cluster-generator.ts
import { NormalizedKeyword, SearchIntent } from './types';
export interface KeywordCluster {
clusterId: string;
primaryKeyword: string;
intent: SearchIntent;
entity: string;
memberKeywords: string[];
canonicalUrl: string;
relatedClusters: string[];
}
export class ClusterGenerator {
private similarityThreshold: number;
constructor(similarityThreshold = 0.85) {
this.similarityThreshold = similarityThreshold;
}
generateClusters(keywords: NormalizedKeyword[]): KeywordCluster[] {
const clusters: Map<string, KeywordCluster> = new Map();
// Sort by priority to ensure high-value terms become primary keywords
const sortedKeywords = [...keywords].sort((a, b) => b.priorityScore - a.priorityScore);
for (const keyword of sortedKeywords) {
let assigned = false;
// Check existing clusters for semantic match
for (const [id, cluster] of clusters) {
if (this.isSemanticMatch(keyword, cluster) && keyword.intent === cluster.intent) {
cluster.memberKeywords.push(keyword.term);
assigned = true;
break;
}
}
// Create new cluster if no match
if (!assigned) {
const clusterId = `cluster-${keyword.entity.toLowerCase().replace(/\s+/g, '-')}`;
clusters.set(clusterId, {
clusterId,
primaryKeyword: keyword.term,
intent: keyword.intent,
entity: keyword.entity,
memberKeywords: [keyword.term],
canonicalUrl: this.generateUrlPattern(keyword.entity, keyword.intent),
relatedClusters: [],
});
}
}
return Array.from(clusters.values());
}
private isSemanticMatch(keyword: NormalizedKeyword, cluster: KeywordCluster): boolean {
// Simplified similarity check; production should use embedding cosine similarity
const primaryTokens = new Set(cluster.primaryKeyword.toLowerCase().split(' '));
const keywordTokens = keyword.normalizedTerm.toLowerCase().split(' ');
const intersection = keywordTokens.filter(t => primaryTokens.has(t)).length;
const union = new Set([...primaryTokens, ...keywordTokens]).size;
return (intersection / union) >= this.similarityThreshold;
}
private generateUrlPattern(entity: string, intent: SearchIntent): string {
const slug = entity.toLowerCase().replace(/\s+/g, '-');
return intent === 'transactional' ? `/products/${slug}` : `/learn/${slug}`;
}
}
Step 3: Architecture Decision: Decoupled SEO Layer
Implement the strategy engine as a decoupled layer that generates a seo-manifest.json during the build process. This manifest is consumed by the frontend framework to render metadata dynamically. This approach allows SEO changes to be validated via CI/CD pipelines without touching UI code.
Rationale:
- Single Source of Truth: The manifest is the authoritative source for all SEO data.
- Validation: Pre-build scripts can scan the manifest for cannibalization conflicts or missing metadata.
- Performance: Metadata is injected at build time or via edge middleware, avoiding runtime overhead.
Step 4: Implementation via Dynamic Rendering
In a Next.js or Nuxt environment, consume the manifest to populate head tags.
// src/seo/components/SEOHead.tsx
import { useSEOManifest } from '../hooks/useSEOManifest';
import { NextSeo } from 'next-seo';
interface SEOHeadProps {
clusterId: string;
}
export function SEOHead({ clusterId }: SEOHeadProps) {
const manifest = useSEOManifest();
const cluster = manifest.clusters[clusterId];
if (!cluster) {
console.warn(`SEO Cluster ${clusterId} not found in manifest.`);
return null;
}
return (
<NextSeo
title={`${cluster.primaryKeyword} | ${cluster.entity}`}
description={`Comprehensive guide on ${cluster.primaryKeyword}. Covers ${cluster.memberKeywords.slice(0, 3).join(', ')} and more.`}
canonical={cluster.canonicalUrl}
openGraph={{
title: cluster.primaryKeyword,
description: `Deep dive into ${cluster.entity} with focus on ${cluster.primaryKeyword}.`,
type: 'article',
}}
additionalMetaTags={[
{
name: 'keywords',
content: cluster.memberKeywords.join(', '),
},
]}
/>
);
}
Pitfall Guide
-
Keyword Cannibalization via URL Overlap:
- Mistake: Creating multiple pages with similar URL slugs or overlapping metadata for the same cluster.
- Fix: Implement a build-time validator that checks
canonicalUrl uniqueness across clusters. If two clusters map to the same URL pattern, the build should fail.
-
Ignoring Search Intent Drift:
- Mistake: Targeting a keyword with an informational intent on a transactional page structure.
- Fix: Enforce intent-based routing in the
ClusterGenerator. Informational clusters must map to /learn/ or /blog/ paths, while transactional clusters map to /products/.
-
Static Keyword Lists:
- Mistake: Hardcoding keywords in components or CMS fields without version control.
- Fix: Treat keywords as code. Store them in version-controlled JSON or TypeScript config files. Use a CI/CD pipeline to update the SEO manifest whenever keyword data changes.
-
Over-Optimization and Stuffing:
- Mistake: Injecting all cluster keywords into meta tags or content unnaturally.
- Fix: Limit
keywords meta tag injection to high-relevance terms. Rely on semantic clustering for content structure rather than explicit keyword repetition. Use LLM-assisted content generation to naturally incorporate variations.
-
Neglecting Technical Signal Alignment:
- Mistake: Mismatch between H1 tags, title tags, and the primary keyword of the cluster.
- Fix: Automate H1 generation based on the
primaryKeyword field. Ensure the SEOHead component and the page content component consume the same cluster data object.
-
Orphaned Clusters:
- Mistake: Creating clusters without internal links to pass equity.
- Fix: Implement a
relatedClusters field in the cluster schema. Generate a "Related Topics" component automatically based on entity relationships to ensure all clusters are linked.
-
Focusing on Volume over Value:
- Mistake: Prioritizing high-volume keywords that do not align with business goals or conversion paths.
- Fix: Incorporate a
businessValue score in the NormalizedKeyword interface. The clustering algorithm should weight priority by volume * conversionProbability * businessValue, not just volume.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| High-Volume Competitive Niche | Semantic Cluster Architecture with Entity Optimization | Requires building topical authority to compete; clusters signal depth to search engines. | High initial dev cost; low long-term maintenance. |
| Long-Tail Niche Site | Automated Cluster Generation with Dynamic Pages | Volume of keywords is high; manual implementation is impossible. Automation scales efficiently. | Medium dev cost; negligible maintenance. |
| E-Commerce Product Catalog | Intent-Based Routing with Transactional Clusters | Focus on conversion; clusters map to product categories and filters. | Low dev cost; high ROI on conversion. |
| Technical Documentation | Hierarchical Cluster Structure | Users need structured information; hierarchy matches search intent for technical queries. | Low dev cost; improves user engagement. |
Configuration Template
Copy this template to initialize your keyword strategy engine.
// src/seo/config/seo-strategy.config.ts
import { ClusterGeneratorConfig } from '../engine/types';
export const seoStrategyConfig: ClusterGeneratorConfig = {
similarityThreshold: 0.82,
minClusterSize: 3,
maxClusterSize: 15,
priorityWeights: {
volume: 0.3,
conversionProbability: 0.5,
businessValue: 0.2,
},
intentRouting: {
informational: '/learn/:entity/:slug',
transactional: '/products/:entity/:slug',
navigational: '/:entity',
},
validationRules: {
preventCannibalization: true,
requireCanonicalUrl: true,
maxKeywordsPerCluster: 20,
},
excludedDomains: ['competitor-site.com'],
refreshInterval: '24h',
};
Quick Start Guide
-
Install Dependencies:
Run npm install @codcompass/seo-engine next-seo to add the core library and SEO component wrapper.
-
Initialize Configuration:
Create src/seo/config/seo-strategy.config.ts using the template above. Adjust similarityThreshold and priorityWeights based on your business goals.
-
Generate Manifest:
Run npx seo-engine generate --config ./src/seo/config/seo-strategy.config.ts. This produces public/seo-manifest.json.
-
Integrate Components:
Import SEOHead into your page layouts and pass the clusterId derived from your URL or content context.
-
Verify:
Run npx seo-engine validate --manifest ./public/seo-manifest.json. Ensure zero errors before deployment. Monitor the first 24 hours for metadata rendering accuracy.