// === src/agents/chunker.ts === import { estimateTokens } from "../utils/llm.js"; const MAX_CHUNK_TOKENS = 80_000; // Leave room for system prompt + output const OVERLAP_CHARS = 500; // Overlap between chunks to preserve context at boundaries export interface Chunk { index: number; total: number; text: string; tokenEstimate: number; } /** * Split text into chunks that fit within LLM context limits. * Splits on paragraph boundaries when possible, with overlap. */ export function chunkInput(text: string): Chunk[] { const totalTokens = estimateTokens(text); if (totalTokens <= MAX_CHUNK_TOKENS) { return [{ index: 0, total: 1, text, tokenEstimate: totalTokens }]; } const maxCharsPerChunk = MAX_CHUNK_TOKENS * 4; // reverse of token estimate const chunks: Chunk[] = []; let offset = 0; while (offset < text.length) { let end = Math.min(offset + maxCharsPerChunk, text.length); // Try to split on a paragraph boundary if (end < text.length) { const searchRegion = text.slice(Math.max(end - 2000, offset), end); const lastParagraph = searchRegion.lastIndexOf("\n\n"); if (lastParagraph > 0) { end = Math.max(end - 2000, offset) + lastParagraph + 2; } else { // Fall back to line boundary const lastLine = searchRegion.lastIndexOf("\n"); if (lastLine > 0) { end = Math.max(end - 2000, offset) + lastLine + 1; } } } const chunkText = text.slice(offset, end); chunks.push({ index: chunks.length, total: 0, // filled in below text: chunkText, tokenEstimate: estimateTokens(chunkText), }); // Advance with overlap offset = end - (end < text.length ? OVERLAP_CHARS : 0); } // Set total count for (const chunk of chunks) { chunk.total = chunks.length; } return chunks; } // === src/agents/extraction.ts === import { callAgentJSON, checkInputSize } from "../utils/llm.js"; import type { PipelineInput } from "../pipeline/index.js"; import { chunkInput } from "./chunker.js"; import { getPromptForSourceType } from "./prompts.js"; import { validateExtraction } from "../schema/extraction.js"; export interface RawExtraction { entities: Array<{ name: string; type: string; description: string; properties?: Record; tags?: string[]; confidence?: number; }>; relations: Array<{ source: string; target: string; type: string; label: string; bidirectional?: boolean; }>; processes: Array<{ name: string; description: string; trigger?: string; steps: Array<{ order: number; action: string; actor?: string; inputs?: string[]; outputs?: string[]; }>; participants: string[]; outcomes: string[]; }>; constraints: Array<{ name: string; type: string; description: string; scope: string[]; severity: "hard" | "soft"; }>; model_name: string; model_description: string; source_summary: string; confidence: number; extraction_notes: string[]; } const SYSTEM_PROMPT = `You are a world-model extraction agent. Your job is to analyze ANY input — text, code, conversation, documentation, descriptions — and extract a complete structured world model from it. You must extract: 1. **Entities** — every distinct thing, actor, system, concept, resource, location, event, or group mentioned or implied. For each: - name: clear identifier - type: one of [actor, object, system, concept, location, event, group, resource] - description: what it is and why it matters in context - properties: any measurable/specific attributes - tags: categorization labels 2. **Relations** — every connection between entities. For each: - source: name of source entity (must match an entity name exactly) - target: name of target entity (must match an entity name exactly) - type: one of [has, is_a, part_of, depends_on, produces, consumes, controls, communicates_with, located_in, triggers, inherits, contains, uses, flows_to, opposes, enables, transforms] - label: human-readable description - bidirectional: true if the relation goes both ways 3. **Processes** — every dynamic sequence, workflow, or series of events. For each: - name, description, trigger - steps: ordered list with action, actor (entity name), inputs (entity names), outputs (entity names) - participants: all entity names involved - outcomes: what the process produces or changes 4. **Constraints** — every rule, invariant, limitation, boundary, or requirement. For each: - name, description - type: one of [invariant, rule, boundary, dependency, capacity, temporal, authorization] - scope: entity names this applies to - severity: hard (violation = error) or soft (violation = warning) 5. **Metadata**: - model_name: a concise name for the world being modeled - model_description: what domain/system this represents - source_summary: brief description of the input - confidence: 0-1 overall extraction confidence - extraction_notes: ambiguities, gaps, assumptions you made RULES: - Extract EVERYTHING — be thorough, not selective - Infer implicit entities and relations (e.g., if "users log in", there's a User actor, an Authentication system, and a Login process) - Entity names in relations/processes MUST exactly match entity names - Output ONLY valid JSON matching the schema — no commentary outside the JSON - If the input is code, model the architecture (modules, data flows, APIs, etc.) - If the input is a conversation, model the topics, participants, decisions, and action items - If the input is vague, extract what you can and note gaps in extraction_notes`; const CHUNK_SYSTEM_PROMPT = `${SYSTEM_PROMPT} IMPORTANT: You are processing chunk {chunkIndex} of {chunkTotal} from a larger input. - Extract everything from THIS chunk - Use consistent entity names (the chunks will be merged later) - Note in extraction_notes that this is a partial extraction from chunk {chunkIndex}/{chunkTotal}`; function mergeRawExtractions(extractions: RawExtraction[]): RawExtraction { const merged: RawExtraction = { entities: [], relations: [], processes: [], constraints: [], model_name: extractions[0]?.model_name ?? "Untitled", model_description: extractions[0]?.model_description ?? "", source_summary: extractions .map((e) => e.source_summary) .filter(Boolean) .join("; "), confidence: 0, extraction_notes: [], }; // Deduplicate entities by normalized name const entityMap = new Map(); for (const ext of extractions) { for (const e of ext.entities) { const key = e.name.toLowerCase().trim(); if (!entityMap.has(key)) { entityMap.set(key, e); } else { const existing = entityMap.get(key)!; // Keep longer description, merge props/tags if (e.description.length > existing.description.length) { existing.description = e.description; } if (e.properties) { existing.properties = { ...existing.properties, ...e.properties }; } if (e.tags) { existing.tags = [...new Set([...(existing.tags ?? []), ...e.tags])]; } } } } merged.entities = [...entityMap.values()]; // Deduplicate relations by (source, target, type) const relSet = new Set(); for (const ext of extractions) { for (const r of ext.relations) { const key = `${r.source.toLowerCase()}::${r.type}::${r.target.toLowerCase()}`; if (!relSet.has(key)) { relSet.add(key); merged.relations.push(r); } } } // Deduplicate processes by name const procSet = new Set(); for (const ext of extractions) { for (const p of ext.processes) { const key = p.name.toLowerCase().trim(); if (!procSet.has(key)) { procSet.add(key); merged.processes.push(p); } } } // Deduplicate constraints by name const cstrSet = new Set(); for (const ext of extractions) { for (const c of ext.constraints) { const key = c.name.toLowerCase().trim(); if (!cstrSet.has(key)) { cstrSet.add(key); merged.constraints.push(c); } } } // Average confidence const confidences = extractions.map((e) => e.confidence).filter((c) => c > 0); merged.confidence = confidences.length ? confidences.reduce((a, b) => a + b, 0) / confidences.length : 0.5; // Collect all notes merged.extraction_notes = extractions.flatMap( (e) => e.extraction_notes ?? [], ); if (extractions.length > 1) { merged.extraction_notes.push( `Merged from ${extractions.length} chunks (${merged.entities.length} unique entities after dedup)`, ); } return merged; } class EmptyExtractionError extends Error { constructor(issues: string[]) { super(`Extraction produced empty result: ${issues.join("; ")}`); this.name = "EmptyExtractionError"; } } function validateAndCoerce(raw: unknown, throwOnEmpty = false): RawExtraction { const { extraction, issues } = validateExtraction(raw); if (issues.length > 0) { process.stderr.write(` [validation] ${issues.join("; ")}\n`); } // If extraction is completely empty and we should retry, throw if ( throwOnEmpty && extraction.entities.length === 0 && extraction.relations.length === 0 ) { throw new EmptyExtractionError(issues); } // Cast validated extraction to RawExtraction (shapes are compatible) return extraction as unknown as RawExtraction; } export async function extractionAgent( input: PipelineInput, ): Promise<{ input: PipelineInput; extraction: RawExtraction }> { if (!input.raw || !input.raw.trim()) { throw new Error("Cannot extract from empty input"); } const sizeCheck = checkInputSize(input.raw); if (sizeCheck.warning) { process.stderr.write(` [warn] ${sizeCheck.warning}\n`); } const chunks = chunkInput(input.raw); const sourcePrompt = getPromptForSourceType(input.sourceType); if (chunks.length === 1) { // Single chunk — direct extraction with source-specific prompt, retry on empty const userMessage = `Analyze the following ${input.sourceType} input and extract a complete world model.\n\n---\n\n${input.raw}`; const MAX_EMPTY_RETRIES = 2; for (let attempt = 0; attempt <= MAX_EMPTY_RETRIES; attempt++) { const rawResult = await callAgentJSON( sourcePrompt, userMessage, { maxTokens: 16384, }, ); try { return { input, extraction: validateAndCoerce(rawResult, attempt < MAX_EMPTY_RETRIES), }; } catch (err) { if ( err instanceof EmptyExtractionError && attempt < MAX_EMPTY_RETRIES ) { process.stderr.write( ` [retry] empty extraction, attempt ${attempt + 1}/${MAX_EMPTY_RETRIES}...\n`, ); continue; } // Final attempt — accept whatever we got (coerced empty) return { input, extraction: validateAndCoerce(rawResult, false) }; } } // Shouldn't reach here, but satisfy TypeScript const rawResult = await callAgentJSON(sourcePrompt, userMessage, { maxTokens: 16384, }); return { input, extraction: validateAndCoerce(rawResult, false) }; } // Multi-chunk — extract per chunk with source-specific prompt, then merge const chunkSuffix = `\n\nIMPORTANT: You are processing chunk {chunkIndex} of {chunkTotal} from a larger input.\n- Extract everything from THIS chunk\n- Use consistent entity names (chunks will be merged later)\n- Note in extraction_notes that this is a partial extraction from chunk {chunkIndex}/{chunkTotal}`; const extractions: RawExtraction[] = []; for (const chunk of chunks) { const prompt = (sourcePrompt + chunkSuffix) .replace(/\{chunkIndex\}/g, String(chunk.index + 1)) .replace(/\{chunkTotal\}/g, String(chunk.total)); const userMessage = `Analyze chunk ${chunk.index + 1}/${chunk.total} of a ${input.sourceType} input and extract all world model elements.\n\n---\n\n${chunk.text}`; const rawResult = await callAgentJSON(prompt, userMessage, { maxTokens: 16384, }); extractions.push(validateAndCoerce(rawResult)); } return { input, extraction: mergeRawExtractions(extractions) }; } // === src/agents/index.ts === export { extractionAgent } from "./extraction.js"; export { structuringAgent } from "./structuring.js"; export { validationAgent } from "./validation.js"; export { refineWorldModel } from "./refinement.js"; export { chunkInput } from "./chunker.js"; export { getPromptForSourceType } from "./prompts.js"; export { secondPassAgent } from "./second-pass.js"; // === src/agents/prompts.ts === const BASE_SCHEMA = `Output ONLY valid JSON with this EXACT structure — no other text before or after: { "entities": [ { "name": "string", "type": "actor|object|system|concept|location|event|group|resource", "description": "string", "properties": {}, "tags": ["string"] } ], "relations": [ { "source": "entity name string", "target": "entity name string", "type": "has|is_a|part_of|depends_on|produces|consumes|controls|communicates_with|located_in|triggers|inherits|contains|uses|flows_to|opposes|enables|transforms", "label": "string", "bidirectional": false } ], "processes": [ { "name": "string", "description": "string", "trigger": "string", "steps": [{ "order": 1, "action": "string", "actor": "entity name", "inputs": ["entity name"], "outputs": ["entity name"] }], "participants": ["entity name"], "outcomes": ["string"] } ], "constraints": [ { "name": "string", "type": "invariant|rule|boundary|dependency|capacity|temporal|authorization", "description": "string", "scope": ["entity name"], "severity": "hard|soft" } ], "model_name": "string", "model_description": "string", "source_summary": "string", "confidence": 0.9, "extraction_notes": ["string"] } IMPORTANT: Every field shown as an array MUST be an array (even if empty: []). Every field shown as a string MUST be a string. Do not use any other types. RULES: - Entity names in relations/processes MUST exactly match entity names - Extract EVERYTHING — be thorough, not selective - Infer implicit entities and relations - Output ONLY valid JSON — no commentary, no markdown, no explanation outside the JSON`; export const PROMPTS: Record = { text: `You are a world-model extraction agent. Analyze the given text and extract a complete structured world model. Focus on: - Named entities (people, organizations, places, things, concepts) - Relationships between them (ownership, hierarchy, dependency, flow) - Any described processes, workflows, or sequences of events - Rules, constraints, limitations mentioned or implied - Implicit entities that must exist for described behaviors to work EXAMPLE — input: "A library lets members borrow books. Each book has an ISBN and a genre. Members can reserve books. Late returns incur a $1/day fine." Expected extraction (abbreviated): { "entities": [ {"name": "Library", "type": "system", "description": "System that manages book lending to members"}, {"name": "Member", "type": "actor", "description": "Registered user who can borrow and reserve books"}, {"name": "Book", "type": "object", "description": "Physical item available for borrowing", "properties": {"isbn": "string", "genre": "string"}}, {"name": "Reservation", "type": "object", "description": "A hold placed on a book by a member"} ], "relations": [ {"source": "Library", "target": "Book", "type": "contains", "label": "holds inventory of"}, {"source": "Member", "target": "Book", "type": "uses", "label": "borrows"}, {"source": "Member", "target": "Reservation", "type": "produces", "label": "creates reservation for a book"} ], "processes": [ {"name": "Book Borrowing", "description": "Member borrows a book from the library", "steps": [{"order": 1, "action": "Member selects book", "actor": "Member"}, {"order": 2, "action": "Library checks availability", "actor": "Library"}, {"order": 3, "action": "Book is checked out to member", "actor": "Library"}], "participants": ["Member", "Library", "Book"], "outcomes": ["Book is borrowed"]} ], "constraints": [ {"name": "Late Return Fine", "type": "rule", "description": "Late returns incur a $1/day fine", "scope": ["Member", "Book"], "severity": "hard"} ] } Note how the example extracts the implicit Reservation entity and the Library system entity even though they're not directly named as such. Apply the same thoroughness to the actual input. ${BASE_SCHEMA}`, code: `You are a world-model extraction agent specialized in SOURCE CODE analysis. Analyze the code and extract its architectural world model. Focus on: - Modules, classes, functions, and services as entities - Import/export dependencies as relations — TRACE IMPORT CHAINS: if module A imports from module B and calls B's functions, that's a "uses" relation - Data flow between components (who produces what, who consumes what) - API endpoints, routes, handlers as processes with steps - Type definitions and interfaces as concept entities - Database models and schemas as resource entities - Configuration and environment variables as constraints - Error handling patterns as boundary constraints - Authentication/authorization as authorization constraints - External service integrations as system entities - CLI commands / entry points as actor entities — trace which systems each command invokes by following the imports in its action handler - Utility modules that are imported by multiple files — these are shared systems, create "uses" relations from each consumer CRITICAL RULES: - Follow import chains to establish relations. If file A imports function X from file B, and function X operates on type T from file C, then A uses B and B depends_on C - Do NOT create entities for local variables, function parameters, intermediate values, or internal state. Only extract architectural components (modules, services, agents, data types, external systems) - Do NOT create orphan entities — every entity should have at least one relation - Name entities after the COMPONENT they represent, not the variable name (e.g., "Extraction Agent" not "extractionAgent", "Pipeline" not "pipeline instance") - Prefer fewer, well-connected entities over many disconnected ones Infer the ARCHITECTURE, not just list files. Model how data flows through the system. ${BASE_SCHEMA}`, conversation: `You are a world-model extraction agent specialized in CONVERSATION analysis. Analyze the conversation and extract a structured world model of its content. Focus on: - Participants as actor entities - Topics discussed as concept entities - Systems/products/tools mentioned as system/object entities - Decisions made as event entities with relations to what they affect - Action items as process entities with steps and assigned actors - Agreements and disagreements as relations (enables/opposes) - Deadlines and commitments as temporal constraints - Open questions as extraction_notes Model the SUBSTANCE of the conversation, not the conversation itself. ${BASE_SCHEMA}`, document: `You are a world-model extraction agent specialized in DOCUMENT and STRUCTURED DATA analysis. Analyze the input and extract a complete structured world model. If the input is JSON: - Object keys become entities or properties - Nested objects become "contains" or "part_of" relations - Arrays of objects become entity collections with shared type - API endpoints become processes with request/response steps - Schema definitions (OpenAPI, JSON Schema) become concept entities with property details If the input is YAML/TOML: - Configuration sections become system entities - Key-value pairs become properties on entities - References between sections become relations If the input is CSV/tabular: - Column headers define entity properties - Each row is an instance — extract the SCHEMA, not individual rows - Foreign key patterns become relations For all documents: - All named entities (organizations, roles, systems, concepts, regulations) - Hierarchical relationships (org charts, system architectures, taxonomies) - Described workflows and procedures as processes - Requirements, policies, and rules as constraints - Defined terms as concept entities - Dependencies between components or teams - Temporal sequences (phases, milestones, deadlines) Treat the document as a specification of a world — extract that world completely. ${BASE_SCHEMA}`, url: `You are a world-model extraction agent. The input is content fetched from a URL. Analyze it and extract a complete structured world model. Focus on: - The domain/topic the page covers - All entities, services, products, or concepts described - Relationships between them - Any processes, workflows, or user journeys described - Pricing, limitations, or constraints mentioned - Technical specifications as properties on entities ${BASE_SCHEMA}`, mixed: `You are a world-model extraction agent. The input contains MIXED content types (possibly text, code, data, and structured content together). Focus on: - Identify what each section represents (narrative, code, data, config) - Extract entities from ALL sections — they may reference each other - Cross-reference: code entities may implement concepts described in text - Data sections may define entity properties or constraints - Use consistent entity names across all sections ${BASE_SCHEMA}`, }; export function getPromptForSourceType(sourceType: string): string { return PROMPTS[sourceType] ?? PROMPTS.text; } // === src/agents/query.ts === import { callAgent } from "../utils/llm.js"; import { findEntity, findDependents, pathsBetween, getStats, analyzeImpact, } from "../utils/graph.js"; import type { WorldModelType } from "../schema/index.js"; export interface QueryResult { answer: string; method: "graph" | "inference"; entities_referenced: string[]; confidence: number; } // ─── Deterministic graph queries ────────────────────────────── const GRAPH_PATTERNS: Array<{ pattern: RegExp; handler: ( model: WorldModelType, match: RegExpMatchArray, ) => QueryResult | null; }> = [ { // "what depends on X" / "what uses X" / "what needs X" pattern: /what\s+(?:depends\s+on|uses|needs|requires|consumes)\s+(?:the\s+)?(.+?)(?:\?|$)/i, handler: (model, match) => { const entity = findEntity(model, match[1].trim()); if (!entity) return null; const deps = findDependents(model, entity.id); if (deps.incoming.length === 0) { return { answer: `Nothing in the model depends on ${entity.name}.`, method: "graph", entities_referenced: [entity.name], confidence: 1, }; } const lines = deps.incoming.map( (d) => `- ${d.entity.name} —[${d.relation.type}]→ ${entity.name}: ${d.relation.label}`, ); return { answer: `${deps.incoming.length} entities depend on ${entity.name}:\n${lines.join("\n")}`, method: "graph", entities_referenced: [ entity.name, ...deps.incoming.map((d) => d.entity.name), ], confidence: 1, }; }, }, { // "what does X depend on" / "what does X use" / "what does X need" pattern: /what\s+does\s+(.+?)\s+(?:depend\s+on|use|need|require|consume)(?:\?|$)/i, handler: (model, match) => { const entity = findEntity(model, match[1].trim()); if (!entity) return null; const deps = findDependents(model, entity.id); if (deps.outgoing.length === 0) { return { answer: `${entity.name} does not depend on anything in the model.`, method: "graph", entities_referenced: [entity.name], confidence: 1, }; } const lines = deps.outgoing.map( (d) => `- ${entity.name} —[${d.relation.type}]→ ${d.entity.name}: ${d.relation.label}`, ); return { answer: `${entity.name} depends on ${deps.outgoing.length} entities:\n${lines.join("\n")}`, method: "graph", entities_referenced: [ entity.name, ...deps.outgoing.map((d) => d.entity.name), ], confidence: 1, }; }, }, { // "how is X connected to Y" / "path from X to Y" / "how does X relate to Y" pattern: /(?:how\s+(?:is|does)\s+(.+?)\s+(?:connected|related?)\s+to\s+(.+?)|path\s+from\s+(.+?)\s+to\s+(.+?))(?:\?|$)/i, handler: (model, match) => { const srcName = (match[1] || match[3])?.trim(); const tgtName = (match[2] || match[4])?.trim(); if (!srcName || !tgtName) return null; const src = findEntity(model, srcName); const tgt = findEntity(model, tgtName); if (!src || !tgt) return null; const paths = pathsBetween(model, src.id, tgt.id); if (paths.length === 0) { return { answer: `No path found from ${src.name} to ${tgt.name} in the model.`, method: "graph", entities_referenced: [src.name, tgt.name], confidence: 1, }; } const pathDescs = paths.map((path, i) => { const hops = path .map((step, j) => { if (j === 0) return step.entity.name; return `—[${step.relation?.type ?? "?"}]→ ${step.entity.name}`; }) .join(" "); return ` Path ${i + 1}: ${hops}`; }); return { answer: `${paths.length} path(s) from ${src.name} to ${tgt.name}:\n${pathDescs.join("\n")}`, method: "graph", entities_referenced: [src.name, tgt.name], confidence: 1, }; }, }, { // "what constraints apply to X" / "rules for X" pattern: /(?:what\s+constraints?\s+(?:apply|applies)\s+to|rules?\s+for)\s+(?:the\s+)?(.+?)(?:\?|$)/i, handler: (model, match) => { const entity = findEntity(model, match[1].trim()); if (!entity) return null; const applicable = model.constraints.filter((c) => c.scope.includes(entity.id), ); if (applicable.length === 0) { return { answer: `No constraints apply to ${entity.name}.`, method: "graph", entities_referenced: [entity.name], confidence: 1, }; } const lines = applicable.map( (c) => `- [${c.severity}] ${c.name}: ${c.description}`, ); return { answer: `${applicable.length} constraint(s) apply to ${entity.name}:\n${lines.join("\n")}`, method: "graph", entities_referenced: [entity.name], confidence: 1, }; }, }, { // "what breaks if I remove X" / "impact of removing X" / "what happens without X" pattern: /(?:what\s+(?:breaks|happens)|impact\s+of\s+removing|what\s+if\s+(?:we|I)\s+remove)\s+(?:if\s+(?:we|I)\s+remove\s+)?(?:the\s+)?(.+?)(?:\?|$)/i, handler: (model, match) => { const entity = findEntity(model, match[1].trim()); if (!entity) return null; const result = analyzeImpact(model, entity.id); if (!result) return null; const lines = [result.summary]; if (result.dependents.length > 0) { lines.push( `Dependents: ${result.dependents.map((d) => d.name).join(", ")}`, ); } if (result.affectedProcesses.length > 0) { lines.push( `Affected processes: ${result.affectedProcesses.map((a) => a.process.name).join(", ")}`, ); } if (result.affectedConstraints.length > 0) { lines.push( `Affected constraints: ${result.affectedConstraints.map((c) => `[${c.severity}] ${c.name}`).join(", ")}`, ); } return { answer: lines.join("\n"), method: "graph" as const, entities_referenced: [ entity.name, ...result.dependents.map((d) => d.name), ], confidence: 1, }; }, }, { // "what processes involve X" / "where does X participate" / "processes for X" pattern: /(?:what\s+processes?\s+(?:involve|include|use|have)|(?:where|which\s+processes?)\s+does\s+.+?\s+participate|processes?\s+(?:for|with|involving))\s+(?:the\s+)?(.+?)(?:\?|$)/i, handler: (model, match) => { const entity = findEntity(model, match[1].trim()); if (!entity) return null; const involved = model.processes.filter( (p) => p.participants.includes(entity.id) || p.steps.some((s) => s.actor === entity.id), ); if (involved.length === 0) { return { answer: `${entity.name} does not participate in any processes.`, method: "graph" as const, entities_referenced: [entity.name], confidence: 1, }; } const lines = involved.map((p) => { const steps = p.steps .filter((s) => s.actor === entity.id) .map((s) => ` ${s.order}. ${s.action}`); const role = steps.length > 0 ? `\n Steps as ${entity.name}:\n${steps.join("\n")}` : "\n (participant, no direct steps)"; return `- **${p.name}**: ${p.description}${role}`; }); return { answer: `${entity.name} participates in ${involved.length} process${involved.length > 1 ? "es" : ""}:\n${lines.join("\n")}`, method: "graph" as const, entities_referenced: [entity.name, ...involved.map((p) => p.name)], confidence: 1, }; }, }, { // "list all actors" / "show all systems" / "show actors" / "what actors are there" pattern: /(?:list|show|what)\s+(?:all\s+)?(\w+?)s?(?:\s|$|\?)/i, handler: (model, match) => { const typeQuery = match[1].toLowerCase(); const validTypes = [ "actor", "object", "system", "concept", "location", "event", "group", "resource", ]; const matchedType = validTypes.find( (t) => t === typeQuery || t + "s" === typeQuery + "s" || typeQuery.startsWith(t), ); if (!matchedType) return null; const filtered = model.entities.filter((e) => e.type === matchedType); if (filtered.length === 0) { return { answer: `No ${matchedType} entities in this model.`, method: "graph" as const, entities_referenced: [], confidence: 1, }; } const lines = filtered.map((e) => `- **${e.name}**: ${e.description}`); return { answer: `${filtered.length} ${matchedType}${filtered.length > 1 ? "s" : ""}:\n${lines.join("\n")}`, method: "graph" as const, entities_referenced: filtered.map((e) => e.name), confidence: 1, }; }, }, { // "how many entities" / "stats" / "summary" pattern: /(?:how\s+many|stats|statistics|summary|overview)\b/i, handler: (model) => { const stats = getStats(model); const lines = [ `Entities: ${stats.entities.total} (${Object.entries( stats.entities.byType, ) .map(([t, c]) => `${c} ${t}`) .join(", ")})`, `Relations: ${stats.relations.total}`, `Processes: ${stats.processes.total} (${stats.processes.totalSteps} steps)`, `Constraints: ${stats.constraints.total} (${stats.constraints.hard} hard, ${stats.constraints.soft} soft)`, `Confidence: ${stats.confidence}`, "", "Most connected:", ...stats.mostConnected.map( (mc) => ` - ${mc.entity}: ${mc.connections} connections`, ), ]; return { answer: lines.join("\n"), method: "graph", entities_referenced: stats.mostConnected.map((mc) => mc.entity), confidence: 1, }; }, }, { // "what is X" / "describe X" / "tell me about X" pattern: /(?:what\s+is|describe|tell\s+me\s+about|who\s+is)\s+(?:the\s+)?(.+?)(?:\?|$)/i, handler: (model, match) => { const entity = findEntity(model, match[1].trim()); if (!entity) return null; const deps = findDependents(model, entity.id); const constraints = model.constraints.filter((c) => c.scope.includes(entity.id), ); const processes = model.processes.filter((p) => p.participants.includes(entity.id), ); const lines = [ `**${entity.name}** (${entity.type})`, entity.description, "", ]; if (entity.properties && Object.keys(entity.properties).length > 0) { lines.push(`Properties: ${JSON.stringify(entity.properties)}`); } if (deps.incoming.length > 0) { lines.push( `Depended on by: ${deps.incoming.map((d) => d.entity.name).join(", ")}`, ); } if (deps.outgoing.length > 0) { lines.push( `Depends on: ${deps.outgoing.map((d) => d.entity.name).join(", ")}`, ); } if (processes.length > 0) { lines.push( `Participates in: ${processes.map((p) => p.name).join(", ")}`, ); } if (constraints.length > 0) { lines.push( `Constraints: ${constraints.map((c) => `[${c.severity}] ${c.name}`).join(", ")}`, ); } return { answer: lines.join("\n"), method: "graph", entities_referenced: [ entity.name, ...deps.incoming.map((d) => d.entity.name), ...deps.outgoing.map((d) => d.entity.name), ], confidence: 1, }; }, }, ]; // ─── LLM inference query ────────────────────────────────────── function modelToContext(model: WorldModelType): string { const entities = model.entities .map((e) => `- ${e.name} (${e.type}): ${e.description}`) .join("\n"); const relations = model.relations .map((r) => { const src = model.entities.find((e) => e.id === r.source)?.name ?? r.source; const tgt = model.entities.find((e) => e.id === r.target)?.name ?? r.target; return `- ${src} —[${r.type}]→ ${tgt}: ${r.label}`; }) .join("\n"); const processes = model.processes .map((p) => { const steps = p.steps .map((s) => { const actor = s.actor ? (model.entities.find((e) => e.id === s.actor)?.name ?? "?") : "?"; return ` ${s.order}. ${actor}: ${s.action}`; }) .join("\n"); return `- ${p.name} (trigger: ${p.trigger ?? "n/a"}): ${p.description}\n${steps}\n Outcomes: ${p.outcomes.join(", ")}`; }) .join("\n"); const constraints = model.constraints .map((c) => { const scopeNames = c.scope .map((id) => model.entities.find((e) => e.id === id)?.name ?? id) .join(", "); return `- [${c.severity}] ${c.name} (applies to: ${scopeNames}): ${c.description}`; }) .join("\n"); return `# World Model: ${model.name}\n${model.description}\n\n## Entities (${model.entities.length})\n${entities}\n\n## Relations (${model.relations.length})\n${relations}\n\n## Processes (${model.processes.length})\n${processes}\n\n## Constraints (${model.constraints.length})\n${constraints}`; } const QUERY_SYSTEM_PROMPT = `You are a world-model query agent. You answer questions based STRICTLY on the world model provided. RULES: - Only use information present in the model — do not hallucinate or infer beyond what the model states - If the model doesn't contain enough information to answer, say so explicitly - Reference specific entities, relations, processes, and constraints by name - Be concise and direct - If the question asks about something not in the model, say "The model does not contain information about [X]"`; async function inferenceQuery( model: WorldModelType, question: string, ): Promise { const context = modelToContext(model); const userMessage = `${context}\n\n---\n\nQuestion: ${question}`; const answer = await callAgent(QUERY_SYSTEM_PROMPT, userMessage, { maxTokens: 4096, }); // Extract entity names that appear in the answer const referenced = model.entities .filter((e) => answer.toLowerCase().includes(e.name.toLowerCase())) .map((e) => e.name); return { answer, method: "inference", entities_referenced: referenced, confidence: 0.8, }; } // ─── Public API ─────────────────────────────────────────────── export async function queryWorldModel( model: WorldModelType, question: string, ): Promise { if (!question || !question.trim()) { return { answer: "No question provided.", method: "graph", entities_referenced: [], confidence: 1, }; } // Try deterministic graph queries first for (const { pattern, handler } of GRAPH_PATTERNS) { const match = question.match(pattern); if (match) { const result = handler(model, match); if (result) return result; // Pattern matched but handler returned null (entity not found) — fall through to inference } } // Fall back to LLM inference return inferenceQuery(model, question); } // === src/agents/refinement.ts === import { callAgentJSON } from "../utils/llm.js"; import type { WorldModelType } from "../schema/index.js"; import type { PipelineInput } from "../pipeline/index.js"; import type { RawExtraction } from "./extraction.js"; import { structuringAgent } from "./structuring.js"; import { validationAgent } from "./validation.js"; import { mergeWorldModels } from "../utils/merge.js"; const REFINEMENT_PROMPT = `You are a world-model refinement agent. You are given an EXISTING world model and NEW input. Your job is to extract ONLY what the new input adds, changes, or contradicts relative to the existing model. ## Existing World Model Summary: {existingModelSummary} ## Instructions: 1. Extract new entities NOT already in the existing model 2. Extract new relations between entities (new or existing) 3. Extract new processes or refinements to existing processes 4. Extract new constraints or modifications to existing ones 5. If the new input CONTRADICTS something in the existing model, extract the new version and note the contradiction in extraction_notes RULES: - DO NOT re-extract entities/relations that already exist unchanged - DO reference existing entity names exactly when creating new relations to them - Mark confidence based on how clearly the new input supports each extraction - Note in extraction_notes what was added vs what was modified - Output ONLY valid JSON matching the extraction schema`; function summarizeModel(model: WorldModelType): string { const entityList = model.entities .map((e) => ` - ${e.name} (${e.type}): ${e.description.slice(0, 100)}`) .join("\n"); const relationList = model.relations .map((r) => { const src = model.entities.find((e) => e.id === r.source)?.name ?? r.source; const tgt = model.entities.find((e) => e.id === r.target)?.name ?? r.target; return ` - ${src} —[${r.type}]→ ${tgt}`; }) .join("\n"); const processList = model.processes .map((p) => ` - ${p.name}: ${p.description.slice(0, 80)}`) .join("\n"); const constraintList = model.constraints .map((c) => ` - [${c.severity}] ${c.name}: ${c.description.slice(0, 80)}`) .join("\n"); return `Entities (${model.entities.length}):\n${entityList}\n\nRelations (${model.relations.length}):\n${relationList}\n\nProcesses (${model.processes.length}):\n${processList}\n\nConstraints (${model.constraints.length}):\n${constraintList}`; } export async function refineWorldModel( existingModel: WorldModelType, newInput: PipelineInput, options?: { onStageStart?: (name: string) => void; onStageEnd?: (name: string, ms: number) => void; }, ): Promise<{ worldModel: WorldModelType; delta: WorldModelType }> { const summary = summarizeModel(existingModel); const systemPrompt = REFINEMENT_PROMPT.replace( "{existingModelSummary}", summary, ); // Extract delta options?.onStageStart?.("refinement-extraction"); const start = Date.now(); const userMessage = `Given the existing world model above, analyze this NEW ${newInput.sourceType} input and extract only what's new or changed.\n\n---\n\n${newInput.raw}`; const deltaExtraction = await callAgentJSON( systemPrompt, userMessage, { maxTokens: 16384, }, ); options?.onStageEnd?.("refinement-extraction", Date.now() - start); // Structure the delta into a world model options?.onStageStart?.("refinement-structuring"); const structStart = Date.now(); const { worldModel: deltaModel } = await structuringAgent({ input: newInput, extraction: deltaExtraction, }); options?.onStageEnd?.("refinement-structuring", Date.now() - structStart); // Merge existing + delta options?.onStageStart?.("refinement-merge"); const mergeStart = Date.now(); const merged = mergeWorldModels(existingModel, deltaModel, { name: existingModel.name, description: existingModel.description, }); options?.onStageEnd?.("refinement-merge", Date.now() - mergeStart); // Validate the merged result options?.onStageStart?.("refinement-validation"); const valStart = Date.now(); const { worldModel: validatedModel } = await validationAgent({ input: newInput, worldModel: merged, }); options?.onStageEnd?.("refinement-validation", Date.now() - valStart); return { worldModel: validatedModel, delta: deltaModel }; } // === src/agents/second-pass.ts === import { callAgentJSON } from "../utils/llm.js"; import type { WorldModelType } from "../schema/index.js"; import type { PipelineInput } from "../pipeline/index.js"; import type { RawExtraction } from "./extraction.js"; import { validateExtraction } from "../schema/extraction.js"; const SECOND_PASS_PROMPT = `You are a world-model COMPLETENESS agent. You are given: 1. The original raw input 2. A world model that was already extracted from it Your job is to find everything the FIRST PASS MISSED. The first pass captures what's explicitly stated. You capture what's IMPLICIT. Look for: **Missing entities:** - Entities that must EXIST for the described processes to work, but were never named - Infrastructure entities (authentication, logging, error handling, networking) - Role entities implied by actions ("someone approves" → there's an Approver actor) - Data entities implied by processes ("saves to database" → there's a Record object) **Missing relations:** - Dependencies that are logically necessary but not stated - Hierarchical relations implied by context (if X contains Y, Y is part_of X) - Communication paths implied by processes (if A triggers B, A communicates_with B) **Missing processes:** - Error/failure paths (what happens when the main process fails?) - Setup/teardown processes (what must happen before/after the described flow?) - Maintenance processes (backups, migrations, updates) **Missing constraints:** - Physical/logical impossibilities not stated (can't be in two places, can't exceed capacity) - Temporal constraints implied by ordering (step 2 can't happen before step 1) - Authorization constraints implied by roles (only admins can X) - Data integrity constraints (required fields, unique identifiers, referential integrity) RULES: - ONLY extract what's NEW — do not re-extract entities/relations already in the model - Reference existing entity names exactly when creating relations to them - Every extraction must be JUSTIFIED by the input — no hallucination - Set confidence lower (0.3-0.7) since these are inferences, not direct extractions - In extraction_notes, explain WHY each new element was inferred Output ONLY valid JSON with this EXACT structure (no other text): { "entities": [{ "name": "string", "type": "actor|object|system|concept|location|event|group|resource", "description": "string", "tags": ["string"] }], "relations": [{ "source": "entity name", "target": "entity name", "type": "has|is_a|part_of|depends_on|produces|consumes|controls|communicates_with|located_in|triggers|inherits|contains|uses|flows_to|opposes|enables|transforms", "label": "string" }], "processes": [{ "name": "string", "description": "string", "steps": [{ "order": 1, "action": "string", "actor": "entity name" }], "participants": ["entity name"], "outcomes": ["string"] }], "constraints": [{ "name": "string", "type": "invariant|rule|boundary|dependency|capacity|temporal|authorization", "description": "string", "scope": ["entity name"], "severity": "hard|soft" }], "model_name": "string", "model_description": "string", "source_summary": "string", "confidence": 0.5, "extraction_notes": ["string"] } If you find NOTHING new, return: {"entities":[],"relations":[],"processes":[],"constraints":[],"model_name":"","model_description":"","source_summary":"No new elements found","confidence":0.0,"extraction_notes":["Second pass found no implicit elements"]}`; function summarizeModelForPrompt(model: WorldModelType): string { const entities = model.entities .map((e) => `- ${e.name} (${e.type}): ${e.description}`) .join("\n"); const relations = model.relations .map((r) => { const src = model.entities.find((e) => e.id === r.source)?.name ?? r.source; const tgt = model.entities.find((e) => e.id === r.target)?.name ?? r.target; return `- ${src} —[${r.type}]→ ${tgt}: ${r.label}`; }) .join("\n"); const processes = model.processes .map((p) => { const steps = p.steps .map((s) => { const actor = s.actor ? (model.entities.find((e) => e.id === s.actor)?.name ?? s.actor) : "unknown"; return ` ${s.order}. ${actor}: ${s.action}`; }) .join("\n"); return `- ${p.name}: ${p.description}\n${steps}`; }) .join("\n"); const constraints = model.constraints .map((c) => `- [${c.severity}] ${c.name}: ${c.description}`) .join("\n"); return `ENTITIES (${model.entities.length}):\n${entities}\n\nRELATIONS (${model.relations.length}):\n${relations}\n\nPROCESSES (${model.processes.length}):\n${processes}\n\nCONSTRAINTS (${model.constraints.length}):\n${constraints}`; } export async function secondPassAgent( originalInput: PipelineInput, currentModel: WorldModelType, ): Promise { const modelSummary = summarizeModelForPrompt(currentModel); const userMessage = `## Original Input:\n${originalInput.raw}\n\n---\n\n## Already Extracted World Model:\n${modelSummary}\n\n---\n\nWhat did the first pass MISS? Extract only NEW entities, relations, processes, and constraints that are implicit in the input but not yet in the model.`; const rawResult = await callAgentJSON( SECOND_PASS_PROMPT, userMessage, { maxTokens: 16384, }, ); const { extraction, issues } = validateExtraction(rawResult); if (issues.length > 0) { process.stderr.write(` [second-pass validation] ${issues.join("; ")}\n`); } return extraction as unknown as RawExtraction; } // === src/agents/structuring.ts === import type { WorldModelType } from "../schema/index.js"; import { WorldModel } from "../schema/world-model.js"; import type { PipelineInput } from "../pipeline/index.js"; import type { RawExtraction } from "./extraction.js"; import { genId } from "../utils/ids.js"; export interface StructuringOutput { input: PipelineInput; worldModel: WorldModelType; } export function structuringAgent(stageInput: { input: PipelineInput; extraction: RawExtraction; }): Promise { const { input, extraction } = stageInput; // Normalize entity types the LLM may return outside the enum const VALID_ENTITY_TYPES = new Set([ "actor", "object", "system", "concept", "location", "event", "group", "resource", ]); const ENTITY_TYPE_ALIASES: Record = { person: "actor", user: "actor", role: "actor", agent: "actor", organization: "group", org: "group", team: "group", company: "group", place: "location", area: "location", region: "location", service: "system", platform: "system", tool: "system", application: "system", app: "system", idea: "concept", principle: "concept", pattern: "concept", category: "concept", item: "object", thing: "object", product: "object", data: "resource", asset: "resource", file: "resource", document: "resource", incident: "event", action: "event", occurrence: "event", }; function normalizeEntityType( raw: string, ): WorldModelType["entities"][number]["type"] { const lower = raw.toLowerCase().trim(); if (VALID_ENTITY_TYPES.has(lower)) return lower as WorldModelType["entities"][number]["type"]; return (ENTITY_TYPE_ALIASES[lower] ?? "object") as WorldModelType["entities"][number]["type"]; } // Normalize relation types const VALID_RELATION_TYPES = new Set([ "has", "is_a", "part_of", "depends_on", "produces", "consumes", "controls", "communicates_with", "located_in", "triggers", "inherits", "contains", "uses", "flows_to", "opposes", "enables", "transforms", ]); function normalizeRelationType( raw: string, ): WorldModelType["relations"][number]["type"] { const lower = raw.toLowerCase().trim().replace(/ /g, "_"); if (VALID_RELATION_TYPES.has(lower)) return lower as WorldModelType["relations"][number]["type"]; return "uses" as WorldModelType["relations"][number]["type"]; } // Normalize constraint types const VALID_CONSTRAINT_TYPES = new Set([ "invariant", "rule", "boundary", "dependency", "capacity", "temporal", "authorization", ]); function normalizeConstraintType( raw: string, ): WorldModelType["constraints"][number]["type"] { const lower = raw.toLowerCase().trim().replace(/ /g, "_"); if (VALID_CONSTRAINT_TYPES.has(lower)) return lower as WorldModelType["constraints"][number]["type"]; return "rule" as WorldModelType["constraints"][number]["type"]; } // Build entity name → ID map (case-insensitive + trimmed for robust matching) const entityIdMap = new Map(); // normalized name → id const entityOriginalNames = new Map(); // normalized name → original name const normalizeForLookup = (name: string) => name.toLowerCase().trim(); // Deduplicate entities by normalized name during initial build const entities: Array<{ id: string; name: string; type: WorldModelType["entities"][number]["type"]; description: string; properties?: Record; tags?: string[]; confidence?: number; }> = []; for (const e of extraction.entities) { const key = normalizeForLookup(e.name); if (entityIdMap.has(key)) { // Duplicate — merge into existing entity const existingId = entityIdMap.get(key)!; const existing = entities.find((ent) => ent.id === existingId); if (existing) { // Keep the longer description if (e.description.length > existing.description.length) { existing.description = e.description; } // Merge properties if (e.properties) { existing.properties = { ...existing.properties, ...e.properties }; } // Merge tags if (e.tags) { existing.tags = [...new Set([...(existing.tags ?? []), ...e.tags])]; } } } else { const id = genId("ent"); entityIdMap.set(key, id); entityOriginalNames.set(key, e.name); entities.push({ id, name: e.name, type: normalizeEntityType(e.type), description: e.description, properties: e.properties, tags: e.tags, confidence: e.confidence, }); } } const resolveEntityId = (name: string): string => { const key = normalizeForLookup(name); const existing = entityIdMap.get(key); if (existing) return existing; // Create a placeholder entity for unresolved references const id = genId("ent"); entityIdMap.set(key, id); entityOriginalNames.set(key, name); entities.push({ id, name, type: "object", description: `Auto-created entity for unresolved reference: ${name}`, properties: undefined, tags: ["auto-created"], confidence: 0.2, }); return id; }; const relations = extraction.relations.map((r) => ({ id: genId("rel"), type: normalizeRelationType(r.type), source: resolveEntityId(r.source), target: resolveEntityId(r.target), label: r.label, bidirectional: r.bidirectional, })); const processes = extraction.processes.map((p) => ({ id: genId("proc"), name: p.name, description: p.description, trigger: p.trigger, steps: p.steps.map((s, idx) => ({ order: s.order ?? idx + 1, action: s.action, actor: s.actor ? resolveEntityId(s.actor) : undefined, input: s.inputs?.map(resolveEntityId), output: s.outputs?.map(resolveEntityId), })), participants: p.participants.map(resolveEntityId), outcomes: p.outcomes, })); const constraints = extraction.constraints.map((c) => ({ id: genId("cstr"), name: c.name, type: normalizeConstraintType(c.type), description: c.description, scope: c.scope.map(resolveEntityId), severity: c.severity, })); const worldModel: WorldModelType = { id: genId("wm"), name: extraction.model_name || input.name || "Untitled World Model", description: extraction.model_description || "Extracted world model", version: "0.1.0", created_at: new Date().toISOString(), entities, relations, processes, constraints, metadata: { source_type: input.sourceType, source_summary: extraction.source_summary || "No summary", confidence: extraction.confidence ?? 0.5, extraction_notes: extraction.extraction_notes, }, }; // Validate output against Zod schema — catch structuring bugs before they propagate const parseResult = WorldModel.safeParse(worldModel); if (!parseResult.success) { const issues = parseResult.error.issues .slice(0, 3) .map((i) => i.message) .join("; "); process.stderr.write( ` [structuring] Output failed schema validation: ${issues}\n`, ); // Don't throw — return what we have, validation agent will catch specifics } return Promise.resolve({ input, worldModel }); } // === src/agents/transform.ts === import { callAgentJSON } from "../utils/llm.js"; import type { WorldModelType } from "../schema/index.js"; import type { RawExtraction } from "./extraction.js"; import { validateExtraction } from "../schema/extraction.js"; import { structuringAgent } from "./structuring.js"; import { mergeWorldModels } from "../utils/merge.js"; import { validationAgent } from "./validation.js"; const TRANSFORM_PROMPT = `You are a world-model transformation agent. You are given an existing world model and a transformation instruction. Your job is to output the CHANGES needed to apply the transformation. Output entities, relations, processes, and constraints that should be ADDED to the model. For REMOVALS, add an extraction_note like "REMOVE: Entity Name" or "REMOVE RELATION: Source -> Target". For MODIFICATIONS, output the entity/relation with the new values — the merge will update by name. ## Existing World Model: {modelSummary} ## Rules: - Only output what CHANGES — don't re-output unchanged elements - Reference existing entity names exactly - If the transformation adds new entities, include full descriptions - If the transformation modifies entities, output them with updated fields - Note removals in extraction_notes with "REMOVE:" prefix Output ONLY valid JSON with this structure: { "entities": [{ "name": "string", "type": "actor|object|system|concept|location|event|group|resource", "description": "string", "tags": ["string"] }], "relations": [{ "source": "entity name", "target": "entity name", "type": "has|is_a|part_of|depends_on|produces|consumes|controls|communicates_with|located_in|triggers|inherits|contains|uses|flows_to|opposes|enables|transforms", "label": "string" }], "processes": [{ "name": "string", "description": "string", "steps": [{ "order": 1, "action": "string", "actor": "entity name" }], "participants": ["entity name"], "outcomes": ["string"] }], "constraints": [{ "name": "string", "type": "invariant|rule|boundary|dependency|capacity|temporal|authorization", "description": "string", "scope": ["entity name"], "severity": "hard|soft" }], "model_name": "", "model_description": "", "source_summary": "transformation applied", "confidence": 0.8, "extraction_notes": ["string"] }`; function summarizeModel(model: WorldModelType): string { const entities = model.entities .map((e) => `- ${e.name} (${e.type}): ${e.description}`) .join("\n"); const relations = model.relations .map((r) => { const src = model.entities.find((e) => e.id === r.source)?.name ?? r.source; const tgt = model.entities.find((e) => e.id === r.target)?.name ?? r.target; return `- ${src} —[${r.type}]→ ${tgt}`; }) .join("\n"); const processes = model.processes .map((p) => `- ${p.name}: ${p.description}`) .join("\n"); const constraints = model.constraints .map((c) => `- [${c.severity}] ${c.name}: ${c.description}`) .join("\n"); return `Entities:\n${entities}\n\nRelations:\n${relations}\n\nProcesses:\n${processes}\n\nConstraints:\n${constraints}`; } export async function transformWorldModel( model: WorldModelType, instruction: string, ): Promise<{ model: WorldModelType; changes: string[] }> { const summary = summarizeModel(model); const systemPrompt = TRANSFORM_PROMPT.replace("{modelSummary}", summary); const userMessage = `Apply this transformation to the world model:\n\n${instruction}`; const rawResult = await callAgentJSON(systemPrompt, userMessage, { maxTokens: 16384, }); const { extraction, issues } = validateExtraction(rawResult); if (issues.length > 0) { process.stderr.write(` [transform validation] ${issues.join("; ")}\n`); } const changes: string[] = []; // Process removals from extraction_notes let result = model; const removalNotes = (extraction.extraction_notes ?? []).filter((n) => n.startsWith("REMOVE"), ); if (removalNotes.length > 0) { const entitiesToRemove = new Set(); for (const note of removalNotes) { const entityMatch = note.match(/REMOVE:\s*(.+)/i); if (entityMatch) { entitiesToRemove.add(entityMatch[1].trim().toLowerCase()); changes.push(`Removed: ${entityMatch[1].trim()}`); } } if (entitiesToRemove.size > 0) { const filteredEntities = result.entities.filter( (e) => !entitiesToRemove.has(e.name.toLowerCase()), ); const removedIds = new Set( result.entities .filter((e) => entitiesToRemove.has(e.name.toLowerCase())) .map((e) => e.id), ); result = { ...result, entities: filteredEntities, relations: result.relations.filter( (r) => !removedIds.has(r.source) && !removedIds.has(r.target), ), processes: result.processes.map((p) => ({ ...p, participants: p.participants.filter((pid) => !removedIds.has(pid)), })), constraints: result.constraints.map((c) => ({ ...c, scope: c.scope.filter((sid) => !removedIds.has(sid)), })), }; } } // Merge additions const addedExtraction = extraction as unknown as RawExtraction; if ( addedExtraction.entities.length > 0 || addedExtraction.relations.length > 0 || addedExtraction.processes.length > 0 || addedExtraction.constraints.length > 0 ) { const { worldModel: deltaModel } = await structuringAgent({ input: { raw: instruction, sourceType: "text" }, extraction: addedExtraction, }); if (deltaModel.entities.length > 0) changes.push(`Added ${deltaModel.entities.length} entities`); if (deltaModel.relations.length > 0) changes.push(`Added ${deltaModel.relations.length} relations`); if (deltaModel.processes.length > 0) changes.push(`Added ${deltaModel.processes.length} processes`); if (deltaModel.constraints.length > 0) changes.push(`Added ${deltaModel.constraints.length} constraints`); result = mergeWorldModels(result, deltaModel, { name: result.name, description: result.description, }); } // Re-validate const { worldModel: validated } = await validationAgent({ input: { raw: instruction, sourceType: "text" }, worldModel: result, }); return { model: validated, changes }; } // === src/agents/validation.ts === import type { WorldModelType, ValidationResultType, ValidationIssueType, } from "../schema/index.js"; import type { PipelineInput } from "../pipeline/index.js"; export interface ValidationOutput { worldModel: WorldModelType; validation: ValidationResultType; } export function validationAgent(stageInput: { input: PipelineInput; worldModel: WorldModelType; }): Promise { const { worldModel } = stageInput; const issues: ValidationIssueType[] = []; const entityIds = new Set(worldModel.entities.map((e) => e.id)); // Check relations reference valid entities for (const rel of worldModel.relations) { if (!entityIds.has(rel.source)) { issues.push({ type: "error", code: "DANGLING_REL_SOURCE", message: `Relation "${rel.id}" references non-existent source entity "${rel.source}"`, path: `relations.${rel.id}.source`, }); } if (!entityIds.has(rel.target)) { issues.push({ type: "error", code: "DANGLING_REL_TARGET", message: `Relation "${rel.id}" references non-existent target entity "${rel.target}"`, path: `relations.${rel.id}.target`, }); } if (rel.source === rel.target) { issues.push({ type: "warning", code: "SELF_RELATION", message: `Relation "${rel.id}" is a self-reference on entity "${rel.source}"`, path: `relations.${rel.id}`, }); } } // Check processes reference valid entities for (const proc of worldModel.processes) { for (const participant of proc.participants) { if (!entityIds.has(participant)) { issues.push({ type: "error", code: "DANGLING_PROC_PARTICIPANT", message: `Process "${proc.name}" references non-existent participant "${participant}"`, path: `processes.${proc.id}.participants`, }); } } for (const step of proc.steps) { if (step.actor && !entityIds.has(step.actor)) { issues.push({ type: "error", code: "DANGLING_STEP_ACTOR", message: `Process "${proc.name}" step ${step.order} references non-existent actor "${step.actor}"`, path: `processes.${proc.id}.steps.${step.order}.actor`, }); } } if (proc.steps.length === 0) { issues.push({ type: "warning", code: "EMPTY_PROCESS", message: `Process "${proc.name}" has no steps`, path: `processes.${proc.id}.steps`, }); } if (!proc.trigger) { issues.push({ type: "info", code: "MISSING_TRIGGER", message: `Process "${proc.name}" has no trigger — when does it start?`, path: `processes.${proc.id}.trigger`, }); } // Check step ordering if (proc.steps.length > 1) { const orders = proc.steps.map((s) => s.order); const hasDuplicates = new Set(orders).size !== orders.length; if (hasDuplicates) { issues.push({ type: "warning", code: "DUPLICATE_STEP_ORDER", message: `Process "${proc.name}" has duplicate step order numbers: [${orders.join(", ")}]`, path: `processes.${proc.id}.steps`, }); } const sorted = [...orders].sort((a, b) => a - b); const isMonotonic = orders.every((o, i) => o === sorted[i]); if (!isMonotonic) { issues.push({ type: "warning", code: "UNORDERED_STEPS", message: `Process "${proc.name}" steps are not in ascending order: [${orders.join(", ")}]`, path: `processes.${proc.id}.steps`, }); } } } // Check constraints reference valid entities for (const constraint of worldModel.constraints) { for (const scopeId of constraint.scope) { if (!entityIds.has(scopeId)) { issues.push({ type: "error", code: "DANGLING_CONSTRAINT_SCOPE", message: `Constraint "${constraint.name}" references non-existent entity "${scopeId}"`, path: `constraints.${constraint.id}.scope`, }); } } } // Check for orphan entities (no relations, not in any process/constraint) const referencedEntities = new Set(); for (const rel of worldModel.relations) { referencedEntities.add(rel.source); referencedEntities.add(rel.target); } for (const proc of worldModel.processes) { for (const p of proc.participants) referencedEntities.add(p); for (const s of proc.steps) { if (s.actor) referencedEntities.add(s.actor); for (const inp of s.input ?? []) referencedEntities.add(inp); for (const out of s.output ?? []) referencedEntities.add(out); } } for (const constraint of worldModel.constraints) { for (const s of constraint.scope) referencedEntities.add(s); } for (const entity of worldModel.entities) { if (!referencedEntities.has(entity.id)) { issues.push({ type: "warning", code: "ORPHAN_ENTITY", message: `Entity "${entity.name}" (${entity.id}) is not referenced by any relation, process, or constraint`, path: `entities.${entity.id}`, }); } } // Check for weak entity descriptions for (const entity of worldModel.entities) { if (!entity.description || entity.description.trim().length < 5) { issues.push({ type: "warning", code: "WEAK_DESCRIPTION", message: `Entity "${entity.name}" has a missing or trivially short description`, path: `entities.${entity.id}.description`, }); } } // Check for duplicate entity names const nameCount = new Map(); for (const entity of worldModel.entities) { nameCount.set(entity.name, (nameCount.get(entity.name) ?? 0) + 1); } for (const [name, count] of nameCount) { if (count > 1) { issues.push({ type: "warning", code: "DUPLICATE_ENTITY_NAME", message: `Entity name "${name}" appears ${count} times — may indicate extraction duplication`, path: `entities`, }); } } // Check for circular dependencies (A depends_on B, B depends_on A) const depTypes = new Set(["depends_on", "part_of", "contains", "inherits"]); const depEdges = worldModel.relations.filter((r) => depTypes.has(r.type)); const visited = new Set(); const inStack = new Set(); function detectCycle(entityId: string, path: string[]): boolean { if (inStack.has(entityId)) { const cycleStart = path.indexOf(entityId); const cycle = path.slice(cycleStart); const cycleNames = cycle.map( (id) => worldModel.entities.find((e) => e.id === id)?.name ?? id, ); issues.push({ type: "warning", code: "CIRCULAR_DEPENDENCY", message: `Circular dependency detected: ${cycleNames.join(" → ")} → ${cycleNames[0]}`, path: `relations`, }); return true; } if (visited.has(entityId)) return false; visited.add(entityId); inStack.add(entityId); for (const edge of depEdges) { if (edge.source === entityId) { detectCycle(edge.target, [...path, entityId]); } } inStack.delete(entityId); return false; } for (const entity of worldModel.entities) { if (!visited.has(entity.id)) { detectCycle(entity.id, []); } } // Completeness checks if (worldModel.entities.length === 0) { issues.push({ type: "error", code: "NO_ENTITIES", message: "World model has no entities", }); } if (worldModel.relations.length === 0) { issues.push({ type: "warning", code: "NO_RELATIONS", message: "World model has no relations — entities are unconnected", }); } // Check for low type diversity (one type dominates > 80% of entities) if (worldModel.entities.length >= 5) { const typeCounts = new Map(); for (const e of worldModel.entities) { typeCounts.set(e.type, (typeCounts.get(e.type) ?? 0) + 1); } for (const [type, count] of typeCounts) { const pct = count / worldModel.entities.length; if (pct > 0.8) { issues.push({ type: "warning", code: "LOW_TYPE_DIVERSITY", message: `${Math.round(pct * 100)}% of entities are type "${type}" (${count}/${worldModel.entities.length}) — may indicate poor type classification`, path: "entities", }); } } } // Check metadata confidence if ( worldModel.metadata?.confidence !== undefined && worldModel.metadata.confidence < 0.3 ) { issues.push({ type: "warning", code: "LOW_CONFIDENCE", message: `Model confidence is ${Math.round(worldModel.metadata.confidence * 100)}% — extraction may be unreliable`, path: "metadata.confidence", }); } // Check for missing metadata if (!worldModel.metadata) { issues.push({ type: "warning", code: "MISSING_METADATA", message: "Model has no metadata — source type and confidence unknown", path: "metadata", }); } // Check for deep dependency chains (> 3 hops) { const depTypes = new Set(["depends_on", "part_of"]); const depEdges = worldModel.relations.filter((r) => depTypes.has(r.type)); const adj = new Map(); for (const r of depEdges) { const arr = adj.get(r.source) ?? []; arr.push(r.target); adj.set(r.source, arr); } function longestChainFrom(id: string, visited: Set): number { if (visited.has(id)) return 0; visited.add(id); let max = 0; for (const next of adj.get(id) ?? []) { max = Math.max(max, 1 + longestChainFrom(next, visited)); } visited.delete(id); return max; } for (const entity of worldModel.entities) { const depth = longestChainFrom(entity.id, new Set()); if (depth > 3) { issues.push({ type: "warning", code: "DEEP_DEPENDENCY_CHAIN", message: `Entity "${entity.name}" starts a dependency chain ${depth} levels deep — may indicate fragile architecture`, path: `entities.${entity.id}`, }); } } } // Check for disconnected subgraphs if (worldModel.entities.length >= 4 && worldModel.relations.length > 0) { const adj = new Map>(); for (const e of worldModel.entities) adj.set(e.id, new Set()); for (const r of worldModel.relations) { adj.get(r.source)?.add(r.target); adj.get(r.target)?.add(r.source); } const componentVisited = new Set(); let componentCount = 0; for (const e of worldModel.entities) { if (componentVisited.has(e.id)) continue; componentCount++; const queue = [e.id]; while (queue.length > 0) { const id = queue.shift()!; if (componentVisited.has(id)) continue; componentVisited.add(id); for (const n of adj.get(id) ?? []) { if (!componentVisited.has(n)) queue.push(n); } } } if (componentCount > 1) { issues.push({ type: "warning", code: "DISCONNECTED_SUBGRAPHS", message: `Model has ${componentCount} disconnected clusters — may indicate missing relations between components`, path: "relations", }); } } const hasErrors = issues.some((i) => i.type === "error"); // Compute quality score (0-100) let score = 100; // Penalize errors (-15 each) and warnings (-3 each) const errors = issues.filter((i) => i.type === "error").length; const warnings = issues.filter((i) => i.type === "warning").length; score -= errors * 15; score -= warnings * 3; // Reward completeness: having all four element types if (worldModel.entities.length === 0) score -= 20; if (worldModel.relations.length === 0) score -= 10; if (worldModel.processes.length === 0) score -= 5; if (worldModel.constraints.length === 0) score -= 5; // Reward relation density (relations / entities ratio — ideal ~1.0+) if (worldModel.entities.length > 0) { const density = worldModel.relations.length / worldModel.entities.length; if (density < 0.5) score -= 10; else if (density >= 1.0) score += 5; } // Reward confidence const conf = worldModel.metadata?.confidence ?? 0.5; score += Math.round((conf - 0.5) * 10); // +/-5 based on confidence score = Math.max(0, Math.min(100, score)); const validation: ValidationResultType = { valid: !hasErrors, issues, stats: { entities: worldModel.entities.length, relations: worldModel.relations.length, processes: worldModel.processes.length, constraints: worldModel.constraints.length, }, score, }; return Promise.resolve({ worldModel, validation }); } // === src/cli.ts === #!/usr/bin/env node import { program } from "commander"; import { readFileSync, writeFileSync, existsSync } from "node:fs"; import { resolve } from "node:path"; import chalk from "chalk"; import { stringify as yamlStringify } from "yaml"; import { buildWorldModel } from "./swm.js"; import { fetchUrl, isUrl } from "./utils/fetch.js"; import { refineWorldModel } from "./agents/refinement.js"; import { mergeWorldModels, diffWorldModels } from "./utils/merge.js"; import { findEntity, findDependents, toMermaid, toDot, getStats, summarize, subgraph, findClusters, analyzeImpact, } from "./utils/graph.js"; import { queryWorldModel } from "./agents/query.js"; import { intersection, difference, overlay } from "./utils/algebra.js"; import { toClaudeMd } from "./export/claude-md.js"; import { toSystemPrompt } from "./export/system-prompt.js"; import { toMcpSchema } from "./export/mcp-schema.js"; import { createTimeline, addSnapshot, entityHistory, timelineSummary, } from "./utils/timeline.js"; import { coverage as coverageFn } from "./utils/coverage.js"; import type { Timeline } from "./utils/timeline.js"; import type { PipelineInput } from "./pipeline/index.js"; import type { WorldModelType } from "./schema/index.js"; function detectSourceType( raw: string, filePath?: string, ): PipelineInput["sourceType"] { // Check file extension first if (filePath) { const ext = filePath.split(".").pop()?.toLowerCase(); const codeExts = new Set([ "ts", "tsx", "js", "jsx", "py", "rb", "go", "rs", "java", "c", "cpp", "cs", "swift", "kt", ]); if (codeExts.has(ext ?? "")) return "code"; if ( ext === "json" || ext === "yaml" || ext === "yml" || ext === "xml" || ext === "csv" || ext === "toml" ) return "document"; if (ext === "md" || ext === "txt" || ext === "rst") return "text"; } const trimmed = raw.trimStart(); // URL if (/^https?:\/\//i.test(trimmed)) return "url"; // JSON if (trimmed.startsWith("{") || trimmed.startsWith("[")) { try { JSON.parse(raw); return "document"; } catch { /* not valid JSON, continue */ } } // YAML (multiple key: value lines, not code) const yamlLines = raw.split("\n").filter((l) => /^\w[\w\s]*:\s/.test(l)); if (yamlLines.length >= 3 && !raw.includes("function ")) return "document"; // XML/HTML-like structured data if ( trimmed.startsWith("\s*\{/.test(raw), ]; if (codeSignals.filter(Boolean).length >= 2) return "code"; // Conversation (speaker patterns: "Name:", "Speaker 1:", "Q:", "A:") if (/^[A-Z][a-z]+\s*:/m.test(raw) && /\n[A-Z][a-z]+\s*:/m.test(raw)) return "conversation"; return "text"; } function readInput(inputArg?: string, filePath?: string): string { if (filePath) return readFileSync(resolve(filePath), "utf-8"); if (inputArg) { try { return readFileSync(resolve(inputArg), "utf-8"); } catch { return inputArg; } } throw new Error( "No input provided. Pass text, a file path, or use -f .", ); } async function readStdin(): Promise { if (process.stdin.isTTY) { throw new Error( "No input provided. Pass text, a file path, a URL, or pipe via stdin.", ); } const chunks: Buffer[] = []; for await (const chunk of process.stdin) { chunks.push(chunk as Buffer); } return Buffer.concat(chunks).toString("utf-8"); } async function readInputAsync( inputArg?: string, filePaths?: string | string[], ): Promise<{ raw: string; detectedUrl?: string }> { // Multiple files — concatenate with headers if (Array.isArray(filePaths) && filePaths.length > 1) { const parts: string[] = []; for (const fp of filePaths) { const resolved = resolve(fp); if (!existsSync(resolved)) throw new Error(`File not found: ${resolved}`); const content = readFileSync(resolved, "utf-8"); parts.push(`// === ${fp} ===\n${content}`); } return { raw: parts.join("\n\n") }; } // Single file path (extract from array if needed) const filePath = Array.isArray(filePaths) ? filePaths[0] : filePaths; // Check if input is a URL — fetch it const candidate = filePath || inputArg || ""; if (isUrl(candidate)) { process.stderr.write(chalk.gray(` Fetching ${candidate}...\n`)); const { text } = await fetchUrl(candidate); return { raw: text, detectedUrl: candidate }; } // Try file/arg, fall back to stdin try { return { raw: readInput(inputArg, filePath) }; } catch { return { raw: await readStdin() }; } } async function readModel(path: string): Promise { if (path === "-" || (!process.stdin.isTTY && !existsSync(resolve(path)))) { const raw = path === "-" ? await readStdin() : (() => { throw new Error(`File not found: ${resolve(path)}`); })(); try { return JSON.parse(raw) as WorldModelType; } catch { throw new Error("Invalid JSON from stdin — is this a world model?"); } } const resolved = resolve(path); if (!existsSync(resolved)) { throw new Error(`File not found: ${resolved}`); } const raw = readFileSync(resolved, "utf-8"); try { return JSON.parse(raw) as WorldModelType; } catch { throw new Error(`Invalid JSON in ${path} — is this a world model file?`); } } function formatOutput( model: WorldModelType, format: string, pretty: boolean, ): string { if (format === "yaml") return yamlStringify(model); if (format === "mermaid") return toMermaid(model); if (format === "dot") return toDot(model); return pretty ? JSON.stringify(model, null, 2) : JSON.stringify(model); } function stageCallbacks(quiet?: boolean) { let timer: ReturnType | null = null; let stageStart = 0; return { onStageStart: (name: string) => { if (!quiet) { process.stderr.write(chalk.yellow(` ▸ ${name}...`)); stageStart = Date.now(); timer = setInterval(() => { const elapsed = Math.round((Date.now() - stageStart) / 1000); process.stderr.write( `\r${chalk.yellow(` ▸ ${name}... ${elapsed}s`)}`, ); }, 2000); } }, onStageEnd: (_name: string, ms: number, data?: unknown) => { if (timer) { clearInterval(timer); timer = null; } if (!quiet) { let detail = ""; if (data && typeof data === "object") { const d = data as Record; // After structuring or validation, show model stats if (d.worldModel && typeof d.worldModel === "object") { const wm = d.worldModel as Record; detail = chalk.gray( ` (${wm.entities?.length ?? "?"}e ${wm.relations?.length ?? "?"}r ${wm.processes?.length ?? "?"}p ${wm.constraints?.length ?? "?"}c)`, ); } // After extraction, show raw extraction counts if (d.extraction && typeof d.extraction === "object") { const ex = d.extraction as Record; detail = chalk.gray( ` (${ex.entities?.length ?? "?"}e ${ex.relations?.length ?? "?"}r)`, ); } } process.stderr.write(chalk.green(` done (${ms}ms)`) + detail + "\n"); } }, }; } program .name("swm") .description( "Structured World Model — turn anything into a structured world model", ) .version("0.1.0"); // ─── model ──────────────────────────────────────────────────── program .command("model") .description("Build a structured world model from input") .argument("[input]", "Text input or file path") .option( "-f, --file ", "Read input from one or more files (concatenated)", ) .option("-o, --output ", "Write output to file") .option( "-t, --type ", "Source type: text, code, document, url, conversation, mixed", ) .option( "--format ", "Output format: json, yaml, mermaid, dot", "json", ) .option("--pretty", "Pretty-print JSON output", true) .option("--full", "Output full result (model + validation + score + timings)") .option("--quiet", "Suppress progress output") .option( "-p, --passes ", "Number of extraction passes (1=standard, 2-3=deeper)", "1", ) .option( "-m, --model ", "Claude model to use (e.g. claude-opus-4-20250514, claude-haiku-4-5-20251001)", ) .option( "--fix", "Auto-fix validation issues before outputting (remove orphans, dangling refs, duplicates)", ) .option( "--min-score ", "Exit non-zero if quality score is below this threshold (0-100)", ) .option("--watch", "Watch input file and rebuild on change") .option( "-n, --name ", "Set the world model name (overrides LLM-generated name)", ) .option("-d, --description ", "Set the world model description") .action( async ( inputArg: string | undefined, opts: Record, ) => { try { const { raw, detectedUrl } = await readInputAsync( inputArg, opts.file as string | string[] | undefined, ); if (!raw.trim()) { console.error(chalk.red("Error: No input provided")); process.exit(1); } const sourceType = (opts.type as PipelineInput["sourceType"]) || (detectedUrl ? "url" : detectSourceType( raw, (Array.isArray(opts.file) ? opts.file[0] : (opts.file as string)) ?? inputArg, )); const input: PipelineInput = { raw, sourceType, name: detectedUrl || (opts.file as string) || (inputArg && inputArg.length < 100 ? inputArg : undefined), }; if (!opts.quiet) { console.error(chalk.blue("■ Structured World Model")); console.error( chalk.gray(` Source: ${sourceType} (${raw.length} chars)\n`), ); } const passes = parseInt((opts.passes as string) ?? "1", 10) || 1; let result = await buildWorldModel(input, { ...stageCallbacks(opts.quiet as boolean), passes, model: opts.model as string | undefined, }); let finalModel = result.worldModel; if (opts.name) { finalModel = { ...finalModel, name: opts.name as string }; } if (opts.description) { finalModel = { ...finalModel, description: opts.description as string, }; } if (opts.fix) { const { fixWorldModel } = await import("./utils/fix.js"); const { model: fixed, fixes } = fixWorldModel(finalModel); finalModel = fixed; if (!opts.quiet && fixes.length > 0) { console.error(chalk.yellow(`\n Auto-fixed: ${fixes.join(", ")}`)); } // Re-validate after fix to get accurate score const { validationAgent: va } = await import("./agents/validation.js"); const { validation: revalidation } = await va({ input, worldModel: finalModel, }); result = { ...result, worldModel: finalModel, validation: revalidation, }; } let output: string; if (opts.full) { const fullResult = { worldModel: finalModel, validation: result.validation, totalDurationMs: result.totalDurationMs, }; output = JSON.stringify(fullResult, null, 2); } else { output = formatOutput( finalModel, (opts.format as string) ?? "json", (opts.pretty as boolean) ?? true, ); } if (opts.output) { writeFileSync(resolve(opts.output as string), output, "utf-8"); if (!opts.quiet) console.error(chalk.green(`\n ✓ Written to ${opts.output}`)); } else { console.log(output); } if (!opts.quiet) { const v = result.validation; const statusColor = v.valid ? chalk.green : chalk.red; console.error( statusColor( `\n Validation: ${v.valid ? "PASSED" : "FAILED"} — ${v.stats.entities} entities, ${v.stats.relations} relations, ${v.stats.processes} processes, ${v.stats.constraints} constraints`, ), ); if (v.issues.length > 0) { console.error(chalk.gray(` Issues:`)); for (const issue of v.issues) { const icon = issue.type === "error" ? chalk.red("✗") : issue.type === "warning" ? chalk.yellow("!") : chalk.blue("i"); console.error(` ${icon} ${issue.message}`); } } if (v.score !== undefined) { const sc = v.score >= 80 ? chalk.green : v.score >= 50 ? chalk.yellow : chalk.red; console.error(sc(` Quality: ${v.score}/100`)); } console.error(chalk.gray(` Summary: ${summarize(finalModel)}`)); console.error(chalk.gray(`\n Total: ${result.totalDurationMs}ms`)); } // Quality gate const minScore = opts.minScore ? parseInt(opts.minScore as string, 10) : undefined; if (minScore !== undefined && result.validation.score !== undefined) { if (result.validation.score < minScore) { console.error( chalk.red( `\n Quality gate FAILED: score ${result.validation.score} < threshold ${minScore}`, ), ); process.exit(1); } } // Watch mode if (opts.watch) { const watchPaths = Array.isArray(opts.file) ? opts.file : opts.file ? [opts.file as string] : inputArg ? [inputArg] : []; if ( watchPaths.length === 0 || !watchPaths.every((p) => existsSync(resolve(p as string))) ) { console.error( chalk.yellow(" --watch requires a file path (-f or argument)"), ); return; } const { watch } = await import("node:fs"); console.error( chalk.blue( `\n Watching ${watchPaths.length} file(s) for changes... (Ctrl+C to stop)\n`, ), ); let rebuilding = false; for (const wp of watchPaths) { watch(resolve(wp as string), async (eventType) => { if (eventType !== "change" || rebuilding) return; rebuilding = true; console.error( chalk.gray( `\n [${new Date().toLocaleTimeString()}] Change detected, rebuilding...`, ), ); try { const { raw: newRaw } = await readInputAsync( inputArg, opts.file as string | string[] | undefined, ); const newInput: PipelineInput = { raw: newRaw, sourceType: sourceType, name: input.name, }; const newResult = await buildWorldModel(newInput, { ...stageCallbacks(opts.quiet as boolean), passes, model: opts.model as string | undefined, }); let newFinal = newResult.worldModel; if (opts.fix) { const { fixWorldModel } = await import("./utils/fix.js"); const { model: fixed } = fixWorldModel(newFinal); newFinal = fixed; } const newOutput = formatOutput( newFinal, (opts.format as string) ?? "json", (opts.pretty as boolean) ?? true, ); if (opts.output) { writeFileSync( resolve(opts.output as string), newOutput, "utf-8", ); console.error( chalk.green( ` ✓ Updated ${opts.output} — ${newResult.validation.stats.entities} entities, score: ${newResult.validation.score}/100`, ), ); } else { console.log(newOutput); } } catch (e) { console.error( chalk.red( ` Rebuild error: ${e instanceof Error ? e.message : String(e)}`, ), ); } rebuilding = false; }); } // Keep process alive await new Promise(() => {}); } } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── refine ─────────────────────────────────────────────────── program .command("refine") .description( "Refine an existing world model with new input (incremental extraction)", ) .argument("", "Path to existing world model JSON") .argument("[input]", "New text input or file path") .option("-f, --file ", "Read new input from file") .option("-o, --output ", "Write refined model to file") .option("-t, --type ", "Source type of new input") .option( "--format ", "Output format: json, yaml, mermaid, dot", "json", ) .option("--quiet", "Suppress progress output") .action( async ( modelPath: string, inputArg: string | undefined, opts: Record, ) => { try { const existing = await readModel(modelPath); const raw = readInput(inputArg, opts.file as string | undefined); const sourceType = (opts.type as PipelineInput["sourceType"]) || detectSourceType(raw); if (!opts.quiet) { console.error(chalk.blue("■ Refining World Model")); console.error( chalk.gray( ` Existing: ${existing.entities.length} entities, ${existing.relations.length} relations`, ), ); console.error( chalk.gray(` New input: ${sourceType} (${raw.length} chars)\n`), ); } const { worldModel, delta } = await refineWorldModel( existing, { raw, sourceType }, stageCallbacks(opts.quiet as boolean), ); const output = formatOutput( worldModel, (opts.format as string) ?? "json", true, ); if (opts.output) { writeFileSync(resolve(opts.output as string), output, "utf-8"); if (!opts.quiet) console.error(chalk.green(`\n ✓ Written to ${opts.output}`)); } else { console.log(output); } if (!opts.quiet) { console.error( chalk.gray( `\n Delta: +${delta.entities.length} entities, +${delta.relations.length} relations, +${delta.processes.length} processes`, ), ); console.error( chalk.gray( ` Result: ${worldModel.entities.length} entities, ${worldModel.relations.length} relations total`, ), ); } } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── merge ──────────────────────────────────────────────────── program .command("merge") .description("Merge two world models into one") .argument("", "Path to first world model JSON") .argument("", "Path to second world model JSON") .option("-o, --output ", "Write merged model to file") .option("--format ", "Output format: json, yaml", "json") .action( async ( pathA: string, pathB: string, opts: Record, ) => { try { const a = await readModel(pathA); const b = await readModel(pathB); const merged = mergeWorldModels(a, b); const output = formatOutput(merged, opts.format ?? "json", true); if (opts.output) { writeFileSync(resolve(opts.output), output, "utf-8"); console.error( chalk.green(`✓ Merged model written to ${opts.output}`), ); } else { console.log(output); } console.error( chalk.gray( ` ${merged.entities.length} entities, ${merged.relations.length} relations`, ), ); } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── diff ───────────────────────────────────────────────────── program .command("diff") .description("Diff two world models") .argument("", "Path to before world model JSON") .argument("", "Path to after world model JSON") .action(async (beforePath: string, afterPath: string) => { try { const before = await readModel(beforePath); const after = await readModel(afterPath); const diff = diffWorldModels(before, after); console.log(chalk.blue("■ World Model Diff\n")); console.log(chalk.white(` Summary: ${diff.summary}\n`)); if (diff.entities.added.length) { console.log(chalk.green(" + Entities added:")); for (const name of diff.entities.added) console.log(chalk.green(` + ${name}`)); } if (diff.entities.removed.length) { console.log(chalk.red(" - Entities removed:")); for (const name of diff.entities.removed) console.log(chalk.red(` - ${name}`)); } if (diff.entities.modified.length) { console.log(chalk.yellow(" ~ Entities modified:")); for (const name of diff.entities.modified) console.log(chalk.yellow(` ~ ${name}`)); } if (diff.relations.added.length) { console.log( chalk.green(` + ${diff.relations.added.length} relations added`), ); } if (diff.relations.removed.length) { console.log( chalk.red(` - ${diff.relations.removed.length} relations removed`), ); } if (diff.processes.added.length) { console.log( chalk.green(` + ${diff.processes.added.length} processes added`), ); } if (diff.constraints.added.length) { console.log( chalk.green(` + ${diff.constraints.added.length} constraints added`), ); } // Score comparison const { validationAgent: va } = await import("./agents/validation.js"); const { validation: vBefore } = await va({ input: { raw: "", sourceType: "text" }, worldModel: before, }); const { validation: vAfter } = await va({ input: { raw: "", sourceType: "text" }, worldModel: after, }); if (vBefore.score !== undefined && vAfter.score !== undefined) { const delta = vAfter.score - vBefore.score; const arrow = delta > 0 ? chalk.green(`+${delta}`) : delta < 0 ? chalk.red(`${delta}`) : chalk.gray("±0"); console.log( `\n Quality: ${vBefore.score} → ${vAfter.score} (${arrow})`, ); } } catch (err) { console.error( chalk.red(`Error: ${err instanceof Error ? err.message : String(err)}`), ); process.exit(1); } }); // ─── inspect ────────────────────────────────────────────────── program .command("inspect") .description("Inspect a world model — stats, entity lookup, graph export") .argument("", "Path to world model JSON") .option( "-e, --entity ", "Look up a specific entity and show its relations", ) .option("--stats", "Show detailed statistics") .option("--format ", "Export format: mermaid, dot") .action( async ( modelPath: string, opts: Record, ) => { try { const model = await readModel(modelPath); if (opts.format) { console.log(formatOutput(model, opts.format as string, true)); return; } if (opts.entity) { const entity = findEntity(model, opts.entity as string); if (!entity) { console.error(chalk.red(`Entity "${opts.entity}" not found`)); process.exit(1); } console.log( chalk.blue(`■ ${entity.name}`) + chalk.gray(` (${entity.type})`), ); console.log(chalk.white(` ${entity.description}`)); if (entity.properties) { console.log( chalk.gray(` Properties: ${JSON.stringify(entity.properties)}`), ); } const deps = findDependents(model, entity.id); if (deps.incoming.length) { console.log(chalk.gray("\n Incoming:")); for (const d of deps.incoming) { console.log( ` ${d.entity.name} —[${d.relation.type}]→ ${entity.name}`, ); } } if (deps.outgoing.length) { console.log(chalk.gray("\n Outgoing:")); for (const d of deps.outgoing) { console.log( ` ${entity.name} —[${d.relation.type}]→ ${d.entity.name}`, ); } } return; } // Default: show stats const stats = getStats(model); console.log(chalk.blue(`■ ${model.name}`)); console.log(chalk.gray(` ${model.description}\n`)); console.log(` Entities: ${stats.entities.total}`); for (const [type, count] of Object.entries(stats.entities.byType)) { console.log(chalk.gray(` ${type}: ${count}`)); } console.log(` Relations: ${stats.relations.total}`); for (const [type, count] of Object.entries(stats.relations.byType)) { console.log(chalk.gray(` ${type}: ${count}`)); } console.log( ` Processes: ${stats.processes.total} (${stats.processes.totalSteps} steps)`, ); console.log( ` Constraints: ${stats.constraints.total} (${stats.constraints.hard} hard, ${stats.constraints.soft} soft)`, ); console.log(` Confidence: ${stats.confidence}`); if (stats.mostConnected.length) { console.log(chalk.gray("\n Most connected:")); for (const mc of stats.mostConnected) { console.log(` ${mc.entity}: ${mc.connections} connections`); } } } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── validate ───────────────────────────────────────────────── program .command("validate") .description("Validate a world model JSON file with full integrity checks") .argument("", "Path to world model JSON") .option("--strict", "Exit non-zero on any warning (not just errors)") .action(async (file: string, opts: Record) => { try { const model = await readModel(file); console.log(chalk.blue("■ Validating world model")); console.log( chalk.gray( ` ${model.entities.length} entities, ${model.relations.length} relations\n`, ), ); const { validationAgent } = await import("./agents/validation.js"); const { validation } = await validationAgent({ input: { raw: "", sourceType: "text" }, worldModel: model, }); const statusColor = validation.valid ? chalk.green : chalk.red; console.log( statusColor(` ${validation.valid ? "✓ VALID" : "✗ INVALID"}`), ); if (validation.issues.length > 0) { for (const issue of validation.issues) { const icon = issue.type === "error" ? chalk.red("✗") : issue.type === "warning" ? chalk.yellow("!") : chalk.blue("i"); console.log(` ${icon} [${issue.code}] ${issue.message}`); } } else { console.log(chalk.green(" No issues found")); } console.log( chalk.gray( `\n Stats: ${validation.stats.entities} entities, ${validation.stats.relations} relations, ${validation.stats.processes} processes, ${validation.stats.constraints} constraints`, ), ); if (validation.score !== undefined) { const scoreColor = validation.score >= 80 ? chalk.green : validation.score >= 50 ? chalk.yellow : chalk.red; console.log(scoreColor(` Quality score: ${validation.score}/100`)); } if (!validation.valid) process.exit(1); if (opts.strict && validation.issues.length > 0) { console.error( chalk.red( ` Strict mode: ${validation.issues.length} issue(s) found`, ), ); process.exit(1); } } catch (err) { console.error( chalk.red(`Error: ${err instanceof Error ? err.message : String(err)}`), ); process.exit(1); } }); // ─── query ──────────────────────────────────────────────────── program .command("query") .description("Ask a question about a world model") .argument("", "Path to world model JSON") .argument("", "Natural language question") .option("--json", "Output result as JSON") .action( async ( modelPath: string, question: string, opts: Record, ) => { try { const model = await readModel(modelPath); const result = await queryWorldModel(model, question); if (opts.json) { console.log(JSON.stringify(result, null, 2)); } else { console.log(result.answer); console.error( chalk.gray( `\n Method: ${result.method} | Confidence: ${result.confidence} | Entities: ${result.entities_referenced.join(", ") || "none"}`, ), ); } } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── algebra: intersect ─────────────────────────────────────── program .command("intersect") .description("Compute the intersection of two world models (shared entities)") .argument("", "Path to first world model JSON") .argument("", "Path to second world model JSON") .option("-o, --output ", "Write result to file") .action( async ( pathA: string, pathB: string, opts: Record, ) => { try { const result = intersection( await readModel(pathA), await readModel(pathB), ); const output = JSON.stringify(result, null, 2); if (opts.output) { writeFileSync(resolve(opts.output), output, "utf-8"); console.error(chalk.green(`✓ Written to ${opts.output}`)); } else { console.log(output); } console.error( chalk.gray( ` ${result.entities.length} shared entities, ${result.relations.length} shared relations`, ), ); } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── algebra: subtract ─────────────────────────────────────── program .command("subtract") .description("Compute A \\ B — entities in A that are not in B") .argument("", "Path to base world model JSON") .argument("", "Path to model to subtract") .option("-o, --output ", "Write result to file") .action( async ( pathA: string, pathB: string, opts: Record, ) => { try { const result = difference( await readModel(pathA), await readModel(pathB), ); const output = JSON.stringify(result, null, 2); if (opts.output) { writeFileSync(resolve(opts.output), output, "utf-8"); console.error(chalk.green(`✓ Written to ${opts.output}`)); } else { console.log(output); } console.error( chalk.gray(` ${result.entities.length} unique entities remaining`), ); } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── algebra: overlay ──────────────────────────────────────── program .command("overlay") .description( "Apply a lens model on top of a base model (constraints, relations overlay)", ) .argument("", "Path to base world model JSON") .argument("", "Path to lens model to overlay") .option("-o, --output ", "Write result to file") .action( async ( basePath: string, lensPath: string, opts: Record, ) => { try { const result = overlay( await readModel(basePath), await readModel(lensPath), ); const output = JSON.stringify(result, null, 2); if (opts.output) { writeFileSync(resolve(opts.output), output, "utf-8"); console.error(chalk.green(`✓ Written to ${opts.output}`)); } else { console.log(output); } console.error( chalk.gray( ` ${result.entities.length} entities, ${result.constraints.length} constraints after overlay`, ), ); } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── export ────────────────────────────────────────────────── program .command("export") .description("Export a world model as AI-consumable context") .argument("", "Path to world model JSON") .option( "--as ", "Export format: claude-md, system-prompt, mcp", "claude-md", ) .option("-o, --output ", "Write to file") .action( async (modelPath: string, opts: Record) => { try { const model = await readModel(modelPath); let output: string; switch (opts.as) { case "claude-md": output = toClaudeMd(model); break; case "system-prompt": output = toSystemPrompt(model); break; case "mcp": output = JSON.stringify(toMcpSchema(model), null, 2); break; default: console.error( chalk.red( `Unknown export format: ${opts.as}. Use: claude-md, system-prompt, mcp`, ), ); process.exit(1); } if (opts.output) { writeFileSync(resolve(opts.output), output, "utf-8"); console.error( chalk.green(`✓ Exported as ${opts.as} to ${opts.output}`), ); } else { console.log(output); } } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── timeline: snapshot ─────────────────────────────────────── program .command("snapshot") .description("Add a world model as a snapshot to a timeline") .argument("", "Path to world model JSON") .option( "--timeline ", "Path to timeline JSON (created if missing)", "timeline.json", ) .option("-l, --label