deltas, and handles multiple response shapes through an adapter pattern.
// types.ts
export interface RawJobPosting {
id: string | number;
title: string;
location?: string;
updated_at?: string;
url?: string;
team?: string;
}
export interface HiringProfile {
board: string;
totalRoles: number;
categoryDistribution: Record<string, number>;
geographicDistribution: Record<string, number>;
primarySignal: string;
snapshotTimestamp: string;
}
export interface DeltaReport {
categoryShifts: Record<string, number>;
newLocations: string[];
leadershipSurge: number;
analysisDate: string;
}
// taxonomy.ts
const CATEGORY_PATTERNS: Record<string, RegExp> = {
go_to_market: /\b(account executive|ae|sales|business development|bdr|sdr|revenue|customer success|csm)\b/i,
engineering: /\b(engineer|developer|sre|devops|infrastructure|platform|backend|frontend|fullstack|software)\b/i,
ai_research: /\b(machine learning|ml engineer|applied scientist|\bai\b|research scientist|nlp|computer vision)\b/i,
product_design: /\b(product manager|\bpm\b|product designer|ux|ui designer|design researcher)\b/i,
talent_ops: /\b(recruiter|talent|people ops|hr business partner|sourcing specialist)\b/i,
finance_legal: /\b(finance|accounting|controller|legal|counsel|compliance|tax)\b/i,
support_ops: /\b(support|implementation|onboarding|technical account manager|help desk)\b/i,
};
const SENIORITY_KEYWORDS = /\b(head|director|vp|vice president|chief|lead|staff|principal|senior)\b/i;
export class TaxonomyEngine {
static classify(title: string): string {
for (const [category, pattern] of Object.entries(CATEGORY_PATTERNS)) {
if (pattern.test(title)) return category;
}
return "unclassified";
}
static isLeadership(title: string): boolean {
return SENIORITY_KEYWORDS.test(title);
}
}
// analyzer.ts
export class HiringSignalAnalyzer {
private cache: Map<string, HiringProfile> = new Map();
private readonly apiBase = "https://boards-api.greenhouse.io/v1/boards";
async fetchBoard(boardToken: string): Promise<RawJobPosting[]> {
const endpoint = `${this.apiBase}/${boardToken}/jobs`;
const response = await fetch(endpoint);
if (!response.ok) {
throw new Error(`Failed to fetch board "${boardToken}": HTTP ${response.status}`);
}
const payload = await response.json();
// Normalize Greenhouse response shape
return (payload.jobs || []).map((j: any) => ({
id: j.id,
title: j.title,
location: j.location?.name || "Remote/Unknown",
updated_at: j.updated_at,
url: j.absolute_url,
}));
}
async generateProfile(boardToken: string): Promise<HiringProfile> {
const jobs = await this.fetchBoard(boardToken);
const categoryDist: Record<string, number> = {};
const geoDist: Record<string, number> = {};
for (const job of jobs) {
const category = TaxonomyEngine.classify(job.title);
categoryDist[category] = (categoryDist[category] || 0) + 1;
const loc = job.location || "Unspecified";
geoDist[loc] = (geoDist[loc] || 0) + 1;
}
const sortedCategories = Object.entries(categoryDist)
.sort(([, a], [, b]) => b - a);
return {
board: boardToken,
totalRoles: jobs.length,
categoryDistribution: categoryDist,
geographicDistribution: geoDist,
primarySignal: sortedCategories[0]?.[0] || "none",
snapshotTimestamp: new Date().toISOString(),
};
}
async computeDelta(current: HiringProfile, previous: HiringProfile): Promise<DeltaReport> {
const shifts: Record<string, number> = {};
for (const [cat, count] of Object.entries(current.categoryDistribution)) {
const prevCount = previous.categoryDistribution[cat] || 0;
const diff = count - prevCount;
if (diff !== 0) shifts[cat] = diff;
}
const currentLocations = new Set(Object.keys(current.geographicDistribution));
const previousLocations = new Set(Object.keys(previous.geographicDistribution));
const newLocations = [...currentLocations].filter(loc => !previousLocations.has(loc));
const currentLeadership = Object.entries(current.categoryDistribution)
.filter(([cat]) => cat === "engineering" || cat === "product_design")
.reduce((sum, [, count]) => sum + count, 0);
return {
categoryShifts: shifts,
newLocations,
leadershipSurge: currentLeadership,
analysisDate: new Date().toISOString(),
};
}
async runAnalysis(boardToken: string): Promise<{ profile: HiringProfile; delta?: DeltaReport }> {
const currentProfile = await this.generateProfile(boardToken);
const previousProfile = this.cache.get(boardToken);
let deltaReport: DeltaReport | undefined;
if (previousProfile) {
deltaReport = await this.computeDelta(currentProfile, previousProfile);
}
this.cache.set(boardToken, currentProfile);
return { profile: currentProfile, delta: deltaReport };
}
}
Architecture Decisions & Rationale
-
Modular Taxonomy Engine: Job titles are notoriously inconsistent. Staff Software Engineer, Payments Risk Platform and Senior Backend Developer (Fintech) describe the same function but use different vocabulary. Extracting classification into a dedicated TaxonomyEngine allows you to swap regex patterns for an LLM-based classifier later without touching the ingestion or tracking logic.
-
Delta Tracking Over Snapshots: The computeDelta method explicitly compares the current state against a cached previous state. This enforces the core principle: intent lives in change. A category growing from 2 to 14 openings triggers a structural alert, while a stable category of 50 openings represents operational maintenance.
-
Adapter-Ready Ingestion: The fetchBoard method currently targets Greenhouse. The RawJobPosting interface normalizes the response shape. When you add Lever (https://api.lever.co/v0/postings/{company}?mode=json) or Ashby, you only need to write a mapper that aligns their text, categories.team, and categories.location fields to the shared interface. This prevents schema drift from breaking the analysis pipeline.
-
Client-Side Caching Simulation: The Map-based cache demonstrates the pattern. In production, you would replace this with Redis or a document store with TTL policies. Caching is non-negotiable because ATS endpoints are intended for candidate discovery, not bulk harvesting. Aggressive caching reduces request volume and respects platform rate limits.
Pitfall Guide
1. Treating Absolute Counts as Strategic Signals
Explanation: A company with 200 engineering roles and 5 sales roles isn't necessarily "engineering-focused." They might be backfilling attrition while quietly building a sales team from zero. Absolute numbers mask velocity.
Fix: Always compute week-over-week or month-over-month deltas. Flag categories where the growth rate exceeds 20% or where a category transitions from 0 to >0.
2. Ignoring Title Ambiguity and Misclassification
Explanation: Regex patterns will misfile titles like Technical Program Manager, AI Infrastructure or Revenue Operations Analyst. Over-reliance on brittle pattern matching creates noisy buckets.
Fix: Implement a fallback classification layer. If a title matches multiple categories, assign it to the highest-weighted match. For production systems, route unclassified or multi-match titles through a lightweight LLM prompt that maps them to your taxonomy using few-shot examples.
3. Overlooking Geographic Expansion Signals
Explanation: Many pipelines only track role categories and ignore location strings. A sudden cluster of Country Manager, Germany or Head of APAC, Singapore is often the earliest public indicator of market entry.
Fix: Parse location fields into normalized regions. Track the emergence of new cities/countries in the geographic distribution. Weight leadership roles in new locations higher than individual contributor roles.
Explanation: Greenhouse's wildcard CORS and simple GET pagination make it developer-friendly. Workday tenants require POST-based offset pagination, lack consistent base URLs, and rarely send permissive CORS headers. Lever and Ashby have different response shapes.
Fix: Build an adapter layer. Never hardcode endpoint logic into the analyzer. Implement a PlatformAdapter interface with fetch(), normalize(), and paginate() methods. Test each platform independently before integrating into the pipeline.
5. Missing Seniority and Contextual Weighting
Explanation: Ten junior developer postings signal different intent than one VP of Engineering or Head of Platform. Raw counts treat all requisitions equally, diluting leadership signals.
Fix: Extract seniority keywords (head, director, vp, chief, staff, principal). Apply a weight multiplier to leadership roles when calculating category scores. Track leadership surges separately from IC hiring waves.
6. Accumulating Stale or Reposted Data
Explanation: ATS boards often keep old postings active or repost them without updating the updated_at field. This creates phantom demand and inflates category counts.
Fix: Filter jobs by updated_at within a rolling window (e.g., last 30β45 days). Implement a deduplication step using job IDs or URL hashes. Treat postings older than 60 days as archival, not active demand.
Explanation: These endpoints exist to help candidates find roles, not to feed bulk intelligence pipelines. Aggressive polling, high concurrency, or data republishing can trigger IP blocks or legal action.
Fix: Implement exponential backoff, respect robots.txt, cache aggressively, and limit polling frequency to 1β2 times per week per board. Never store or republish personal data. Read the strategy, not the people.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Prototype / 1β10 targets | Browser console fetch + regex classifier | Zero infrastructure, instant validation, CORS allows direct client calls | $0 |
| Mid-scale / 50β500 targets | Scheduled Node.js script + Redis cache + delta tracking | Handles multiple ATS adapters, maintains historical snapshots, respects rate limits | Low ($50β$150/mo infra) |
| Enterprise / 1,000+ targets | Distributed pipeline + LLM classifier + Workday adapter | Normalizes messy titles, handles POST pagination, scales horizontally, fires alerts on spikes | Medium-High ($300β$800/mo + LLM tokens) |
| Workday-heavy portfolio | Dedicated scraper with headless browser or proxy rotation | Workday lacks wildcard CORS and uses offset POST pagination; requires specialized handling | High (proxy costs + maintenance) |
| Sales intent / GTM tracking | Focus on sales/marketing delta + funding cross-reference | Early GTM expansion signals correlate strongly with recent capital raises | Low (subset of full pipeline) |
Configuration Template
// config.ts
export const ANALYSIS_CONFIG = {
polling: {
intervalHours: 168, // Weekly snapshot
maxConcurrency: 3,
timeoutMs: 15000,
},
cache: {
ttlDays: 7,
storage: "redis", // or "memory" for prototyping
keyPrefix: "hiring:signal:",
},
taxonomy: {
confidenceThreshold: 0.85, // For LLM fallback
leadershipWeight: 2.5,
recencyWindowDays: 45,
},
alerting: {
deltaThreshold: 3, // Flag if category grows by >= 3
newLocationThreshold: 1, // Flag first appearance of new city/country
leadershipSurgeThreshold: 2, // Flag if leadership roles jump by >= 2
},
platforms: {
greenhouse: {
enabled: true,
endpoint: "https://boards-api.greenhouse.io/v1/boards/{token}/jobs",
cors: true,
},
lever: {
enabled: true,
endpoint: "https://api.lever.co/v0/postings/{company}?mode=json",
cors: true,
},
workday: {
enabled: false, // Requires custom adapter
cors: false,
pagination: "post-offset",
},
},
};
Quick Start Guide
- Initialize the analyzer: Copy the
TaxonomyEngine and HiringSignalAnalyzer classes into a TypeScript project. Install typescript and run tsc --init if starting fresh.
- Run a single board test: Execute
const analyzer = new HiringSignalAnalyzer(); analyzer.runAnalysis("stripe").then(console.log); in a Node.js environment or browser console. Verify the output matches the expected HiringProfile structure.
- Schedule weekly snapshots: Set up a cron job or GitHub Action that runs the analyzer against your target board list. Store results in a JSON file or Redis instance with a 7-day TTL.
- Enable delta tracking: On the second run, the analyzer will automatically compare against the cached snapshot and return a
DeltaReport. Monitor categoryShifts and newLocations for structural changes.
- Refine taxonomy: Review misclassified titles from the first run. Adjust regex patterns in
CATEGORY_PATTERNS or route ambiguous cases to an LLM endpoint. Iterate until classification accuracy exceeds 85%.