// === src/agents/chunker.ts === import { estimateTokens } from "../utils/llm.js"; const MAX_CHUNK_TOKENS = 80_000; // Leave room for system prompt + output const OVERLAP_CHARS = 500; // Overlap between chunks to preserve context at boundaries export interface Chunk { index: number; total: number; text: string; tokenEstimate: number; } /** * Split text into chunks that fit within LLM context limits. * Splits on paragraph boundaries when possible, with overlap. */ export function chunkInput(text: string): Chunk[] { const totalTokens = estimateTokens(text); if (totalTokens <= MAX_CHUNK_TOKENS) { return [{ index: 0, total: 1, text, tokenEstimate: totalTokens }]; } const maxCharsPerChunk = MAX_CHUNK_TOKENS * 4; // reverse of token estimate const chunks: Chunk[] = []; let offset = 0; while (offset < text.length) { let end = Math.min(offset + maxCharsPerChunk, text.length); // Try to split on a paragraph boundary if (end < text.length) { const searchRegion = text.slice(Math.max(end - 2000, offset), end); const lastParagraph = searchRegion.lastIndexOf("\n\n"); if (lastParagraph > 0) { end = Math.max(end - 2000, offset) + lastParagraph + 2; } else { // Fall back to line boundary const lastLine = searchRegion.lastIndexOf("\n"); if (lastLine > 0) { end = Math.max(end - 2000, offset) + lastLine + 1; } } } const chunkText = text.slice(offset, end); chunks.push({ index: chunks.length, total: 0, // filled in below text: chunkText, tokenEstimate: estimateTokens(chunkText), }); // Advance with overlap offset = end - (end < text.length ? OVERLAP_CHARS : 0); } // Set total count for (const chunk of chunks) { chunk.total = chunks.length; } return chunks; } // === src/agents/extraction.ts === import { callAgentJSON, checkInputSize } from "../utils/llm.js"; import type { PipelineInput } from "../pipeline/index.js"; import { chunkInput } from "./chunker.js"; import { getPromptForSourceType } from "./prompts.js"; import { validateExtraction } from "../schema/extraction.js"; export interface RawExtraction { entities: Array<{ name: string; type: string; description: string; properties?: Record; tags?: string[]; }>; relations: Array<{ source: string; target: string; type: string; label: string; bidirectional?: boolean; }>; processes: Array<{ name: string; description: string; trigger?: string; steps: Array<{ order: number; action: string; actor?: string; inputs?: string[]; outputs?: string[]; }>; participants: string[]; outcomes: string[]; }>; constraints: Array<{ name: string; type: string; description: string; scope: string[]; severity: "hard" | "soft"; }>; model_name: string; model_description: string; source_summary: string; confidence: number; extraction_notes: string[]; } const SYSTEM_PROMPT = `You are a world-model extraction agent. Your job is to analyze ANY input — text, code, conversation, documentation, descriptions — and extract a complete structured world model from it. You must extract: 1. **Entities** — every distinct thing, actor, system, concept, resource, location, event, or group mentioned or implied. For each: - name: clear identifier - type: one of [actor, object, system, concept, location, event, group, resource] - description: what it is and why it matters in context - properties: any measurable/specific attributes - tags: categorization labels 2. **Relations** — every connection between entities. For each: - source: name of source entity (must match an entity name exactly) - target: name of target entity (must match an entity name exactly) - type: one of [has, is_a, part_of, depends_on, produces, consumes, controls, communicates_with, located_in, triggers, inherits, contains, uses, flows_to, opposes, enables, transforms] - label: human-readable description - bidirectional: true if the relation goes both ways 3. **Processes** — every dynamic sequence, workflow, or series of events. For each: - name, description, trigger - steps: ordered list with action, actor (entity name), inputs (entity names), outputs (entity names) - participants: all entity names involved - outcomes: what the process produces or changes 4. **Constraints** — every rule, invariant, limitation, boundary, or requirement. For each: - name, description - type: one of [invariant, rule, boundary, dependency, capacity, temporal, authorization] - scope: entity names this applies to - severity: hard (violation = error) or soft (violation = warning) 5. **Metadata**: - model_name: a concise name for the world being modeled - model_description: what domain/system this represents - source_summary: brief description of the input - confidence: 0-1 overall extraction confidence - extraction_notes: ambiguities, gaps, assumptions you made RULES: - Extract EVERYTHING — be thorough, not selective - Infer implicit entities and relations (e.g., if "users log in", there's a User actor, an Authentication system, and a Login process) - Entity names in relations/processes MUST exactly match entity names - Output ONLY valid JSON matching the schema — no commentary outside the JSON - If the input is code, model the architecture (modules, data flows, APIs, etc.) - If the input is a conversation, model the topics, participants, decisions, and action items - If the input is vague, extract what you can and note gaps in extraction_notes`; const CHUNK_SYSTEM_PROMPT = `${SYSTEM_PROMPT} IMPORTANT: You are processing chunk {chunkIndex} of {chunkTotal} from a larger input. - Extract everything from THIS chunk - Use consistent entity names (the chunks will be merged later) - Note in extraction_notes that this is a partial extraction from chunk {chunkIndex}/{chunkTotal}`; function mergeRawExtractions(extractions: RawExtraction[]): RawExtraction { const merged: RawExtraction = { entities: [], relations: [], processes: [], constraints: [], model_name: extractions[0]?.model_name ?? "Untitled", model_description: extractions[0]?.model_description ?? "", source_summary: extractions .map((e) => e.source_summary) .filter(Boolean) .join("; "), confidence: 0, extraction_notes: [], }; // Deduplicate entities by normalized name const entityMap = new Map(); for (const ext of extractions) { for (const e of ext.entities) { const key = e.name.toLowerCase().trim(); if (!entityMap.has(key)) { entityMap.set(key, e); } else { const existing = entityMap.get(key)!; // Keep longer description, merge props/tags if (e.description.length > existing.description.length) { existing.description = e.description; } if (e.properties) { existing.properties = { ...existing.properties, ...e.properties }; } if (e.tags) { existing.tags = [...new Set([...(existing.tags ?? []), ...e.tags])]; } } } } merged.entities = [...entityMap.values()]; // Deduplicate relations by (source, target, type) const relSet = new Set(); for (const ext of extractions) { for (const r of ext.relations) { const key = `${r.source.toLowerCase()}::${r.type}::${r.target.toLowerCase()}`; if (!relSet.has(key)) { relSet.add(key); merged.relations.push(r); } } } // Deduplicate processes by name const procSet = new Set(); for (const ext of extractions) { for (const p of ext.processes) { const key = p.name.toLowerCase().trim(); if (!procSet.has(key)) { procSet.add(key); merged.processes.push(p); } } } // Deduplicate constraints by name const cstrSet = new Set(); for (const ext of extractions) { for (const c of ext.constraints) { const key = c.name.toLowerCase().trim(); if (!cstrSet.has(key)) { cstrSet.add(key); merged.constraints.push(c); } } } // Average confidence const confidences = extractions.map((e) => e.confidence).filter((c) => c > 0); merged.confidence = confidences.length ? confidences.reduce((a, b) => a + b, 0) / confidences.length : 0.5; // Collect all notes merged.extraction_notes = extractions.flatMap( (e) => e.extraction_notes ?? [], ); if (extractions.length > 1) { merged.extraction_notes.push( `Merged from ${extractions.length} chunks (${merged.entities.length} unique entities after dedup)`, ); } return merged; } function validateAndCoerce(raw: unknown): RawExtraction { const { extraction, issues } = validateExtraction(raw); if (issues.length > 0) { process.stderr.write(` [validation] ${issues.join("; ")}\n`); } // Cast validated extraction to RawExtraction (shapes are compatible) return extraction as unknown as RawExtraction; } export async function extractionAgent( input: PipelineInput, ): Promise<{ input: PipelineInput; extraction: RawExtraction }> { if (!input.raw || !input.raw.trim()) { throw new Error("Cannot extract from empty input"); } const sizeCheck = checkInputSize(input.raw); if (sizeCheck.warning) { process.stderr.write(` [warn] ${sizeCheck.warning}\n`); } const chunks = chunkInput(input.raw); const sourcePrompt = getPromptForSourceType(input.sourceType); if (chunks.length === 1) { // Single chunk — direct extraction with source-specific prompt const userMessage = `Analyze the following ${input.sourceType} input and extract a complete world model.\n\n---\n\n${input.raw}`; const rawResult = await callAgentJSON(sourcePrompt, userMessage, { maxTokens: 16384, }); return { input, extraction: validateAndCoerce(rawResult) }; } // Multi-chunk — extract per chunk with source-specific prompt, then merge const chunkSuffix = `\n\nIMPORTANT: You are processing chunk {chunkIndex} of {chunkTotal} from a larger input.\n- Extract everything from THIS chunk\n- Use consistent entity names (chunks will be merged later)\n- Note in extraction_notes that this is a partial extraction from chunk {chunkIndex}/{chunkTotal}`; const extractions: RawExtraction[] = []; for (const chunk of chunks) { const prompt = (sourcePrompt + chunkSuffix) .replace(/\{chunkIndex\}/g, String(chunk.index + 1)) .replace(/\{chunkTotal\}/g, String(chunk.total)); const userMessage = `Analyze chunk ${chunk.index + 1}/${chunk.total} of a ${input.sourceType} input and extract all world model elements.\n\n---\n\n${chunk.text}`; const rawResult = await callAgentJSON(prompt, userMessage, { maxTokens: 16384, }); extractions.push(validateAndCoerce(rawResult)); } return { input, extraction: mergeRawExtractions(extractions) }; } // === src/agents/index.ts === export { extractionAgent } from "./extraction.js"; export { structuringAgent } from "./structuring.js"; export { validationAgent } from "./validation.js"; export { refineWorldModel } from "./refinement.js"; export { chunkInput } from "./chunker.js"; export { getPromptForSourceType } from "./prompts.js"; export { secondPassAgent } from "./second-pass.js"; // === src/agents/prompts.ts === const BASE_SCHEMA = `You must output ONLY valid JSON with this structure: { "entities": [{ "name", "type" (actor|object|system|concept|location|event|group|resource), "description", "properties"?, "tags"? }], "relations": [{ "source" (entity name), "target" (entity name), "type" (has|is_a|part_of|depends_on|produces|consumes|controls|communicates_with|located_in|triggers|inherits|contains|uses|flows_to|opposes|enables|transforms), "label", "bidirectional"? }], "processes": [{ "name", "description", "trigger"?, "steps": [{ "order", "action", "actor"?, "inputs"?, "outputs"? }], "participants" (entity names), "outcomes" }], "constraints": [{ "name", "type" (invariant|rule|boundary|dependency|capacity|temporal|authorization), "description", "scope" (entity names), "severity" (hard|soft) }], "model_name", "model_description", "source_summary", "confidence" (0-1), "extraction_notes": [] } RULES: - Entity names in relations/processes MUST exactly match entity names - Extract EVERYTHING — be thorough, not selective - Infer implicit entities and relations - Output ONLY valid JSON — no commentary outside the JSON`; export const PROMPTS: Record = { text: `You are a world-model extraction agent. Analyze the given text and extract a complete structured world model. Focus on: - Named entities (people, organizations, places, things, concepts) - Relationships between them (ownership, hierarchy, dependency, flow) - Any described processes, workflows, or sequences of events - Rules, constraints, limitations mentioned or implied - Implicit entities that must exist for described behaviors to work ${BASE_SCHEMA}`, code: `You are a world-model extraction agent specialized in SOURCE CODE analysis. Analyze the code and extract its architectural world model. Focus on: - Modules, classes, functions, and services as entities - Import/export dependencies as relations - Data flow between components (who produces what, who consumes what) - API endpoints, routes, handlers as processes with steps - Type definitions and interfaces as concept entities - Database models and schemas as resource entities - Configuration and environment variables as constraints - Error handling patterns as boundary constraints - Authentication/authorization as authorization constraints - External service integrations as system entities Infer the ARCHITECTURE, not just list files. Model how data flows through the system. ${BASE_SCHEMA}`, conversation: `You are a world-model extraction agent specialized in CONVERSATION analysis. Analyze the conversation and extract a structured world model of its content. Focus on: - Participants as actor entities - Topics discussed as concept entities - Systems/products/tools mentioned as system/object entities - Decisions made as event entities with relations to what they affect - Action items as process entities with steps and assigned actors - Agreements and disagreements as relations (enables/opposes) - Deadlines and commitments as temporal constraints - Open questions as extraction_notes Model the SUBSTANCE of the conversation, not the conversation itself. ${BASE_SCHEMA}`, document: `You are a world-model extraction agent specialized in DOCUMENT analysis. Analyze the document and extract a complete structured world model. Focus on: - All named entities (organizations, roles, systems, concepts, regulations) - Hierarchical relationships (org charts, system architectures, taxonomies) - Described workflows and procedures as processes - Requirements, policies, and rules as constraints - Defined terms as concept entities - Dependencies between components or teams - Temporal sequences (phases, milestones, deadlines) Treat the document as a specification of a world — extract that world completely. ${BASE_SCHEMA}`, url: `You are a world-model extraction agent. The input is content fetched from a URL. Analyze it and extract a complete structured world model. Focus on: - The domain/topic the page covers - All entities, services, products, or concepts described - Relationships between them - Any processes, workflows, or user journeys described - Pricing, limitations, or constraints mentioned - Technical specifications as properties on entities ${BASE_SCHEMA}`, mixed: `You are a world-model extraction agent. The input contains MIXED content types (possibly text, code, data, and structured content together). Focus on: - Identify what each section represents (narrative, code, data, config) - Extract entities from ALL sections — they may reference each other - Cross-reference: code entities may implement concepts described in text - Data sections may define entity properties or constraints - Use consistent entity names across all sections ${BASE_SCHEMA}`, }; export function getPromptForSourceType(sourceType: string): string { return PROMPTS[sourceType] ?? PROMPTS.text; } // === src/agents/query.ts === import { callAgent } from "../utils/llm.js"; import { findEntity, findDependents, pathsBetween, getStats, } from "../utils/graph.js"; import type { WorldModelType } from "../schema/index.js"; export interface QueryResult { answer: string; method: "graph" | "inference"; entities_referenced: string[]; confidence: number; } // ─── Deterministic graph queries ────────────────────────────── const GRAPH_PATTERNS: Array<{ pattern: RegExp; handler: ( model: WorldModelType, match: RegExpMatchArray, ) => QueryResult | null; }> = [ { // "what depends on X" / "what uses X" / "what needs X" pattern: /what\s+(?:depends\s+on|uses|needs|requires|consumes)\s+(?:the\s+)?(.+?)(?:\?|$)/i, handler: (model, match) => { const entity = findEntity(model, match[1].trim()); if (!entity) return null; const deps = findDependents(model, entity.id); if (deps.incoming.length === 0) { return { answer: `Nothing in the model depends on ${entity.name}.`, method: "graph", entities_referenced: [entity.name], confidence: 1, }; } const lines = deps.incoming.map( (d) => `- ${d.entity.name} —[${d.relation.type}]→ ${entity.name}: ${d.relation.label}`, ); return { answer: `${deps.incoming.length} entities depend on ${entity.name}:\n${lines.join("\n")}`, method: "graph", entities_referenced: [ entity.name, ...deps.incoming.map((d) => d.entity.name), ], confidence: 1, }; }, }, { // "what does X depend on" / "what does X use" / "what does X need" pattern: /what\s+does\s+(.+?)\s+(?:depend\s+on|use|need|require|consume)(?:\?|$)/i, handler: (model, match) => { const entity = findEntity(model, match[1].trim()); if (!entity) return null; const deps = findDependents(model, entity.id); if (deps.outgoing.length === 0) { return { answer: `${entity.name} does not depend on anything in the model.`, method: "graph", entities_referenced: [entity.name], confidence: 1, }; } const lines = deps.outgoing.map( (d) => `- ${entity.name} —[${d.relation.type}]→ ${d.entity.name}: ${d.relation.label}`, ); return { answer: `${entity.name} depends on ${deps.outgoing.length} entities:\n${lines.join("\n")}`, method: "graph", entities_referenced: [ entity.name, ...deps.outgoing.map((d) => d.entity.name), ], confidence: 1, }; }, }, { // "how is X connected to Y" / "path from X to Y" / "how does X relate to Y" pattern: /(?:how\s+(?:is|does)\s+(.+?)\s+(?:connected|related?)\s+to\s+(.+?)|path\s+from\s+(.+?)\s+to\s+(.+?))(?:\?|$)/i, handler: (model, match) => { const srcName = (match[1] || match[3])?.trim(); const tgtName = (match[2] || match[4])?.trim(); if (!srcName || !tgtName) return null; const src = findEntity(model, srcName); const tgt = findEntity(model, tgtName); if (!src || !tgt) return null; const paths = pathsBetween(model, src.id, tgt.id); if (paths.length === 0) { return { answer: `No path found from ${src.name} to ${tgt.name} in the model.`, method: "graph", entities_referenced: [src.name, tgt.name], confidence: 1, }; } const pathDescs = paths.map((path, i) => { const hops = path .map((step, j) => { if (j === 0) return step.entity.name; return `—[${step.relation?.type ?? "?"}]→ ${step.entity.name}`; }) .join(" "); return ` Path ${i + 1}: ${hops}`; }); return { answer: `${paths.length} path(s) from ${src.name} to ${tgt.name}:\n${pathDescs.join("\n")}`, method: "graph", entities_referenced: [src.name, tgt.name], confidence: 1, }; }, }, { // "what constraints apply to X" / "rules for X" pattern: /(?:what\s+constraints?\s+(?:apply|applies)\s+to|rules?\s+for)\s+(?:the\s+)?(.+?)(?:\?|$)/i, handler: (model, match) => { const entity = findEntity(model, match[1].trim()); if (!entity) return null; const applicable = model.constraints.filter((c) => c.scope.includes(entity.id), ); if (applicable.length === 0) { return { answer: `No constraints apply to ${entity.name}.`, method: "graph", entities_referenced: [entity.name], confidence: 1, }; } const lines = applicable.map( (c) => `- [${c.severity}] ${c.name}: ${c.description}`, ); return { answer: `${applicable.length} constraint(s) apply to ${entity.name}:\n${lines.join("\n")}`, method: "graph", entities_referenced: [entity.name], confidence: 1, }; }, }, { // "how many entities" / "stats" / "summary" pattern: /(?:how\s+many|stats|statistics|summary|overview)\b/i, handler: (model) => { const stats = getStats(model); const lines = [ `Entities: ${stats.entities.total} (${Object.entries( stats.entities.byType, ) .map(([t, c]) => `${c} ${t}`) .join(", ")})`, `Relations: ${stats.relations.total}`, `Processes: ${stats.processes.total} (${stats.processes.totalSteps} steps)`, `Constraints: ${stats.constraints.total} (${stats.constraints.hard} hard, ${stats.constraints.soft} soft)`, `Confidence: ${stats.confidence}`, "", "Most connected:", ...stats.mostConnected.map( (mc) => ` - ${mc.entity}: ${mc.connections} connections`, ), ]; return { answer: lines.join("\n"), method: "graph", entities_referenced: stats.mostConnected.map((mc) => mc.entity), confidence: 1, }; }, }, { // "what is X" / "describe X" / "tell me about X" pattern: /(?:what\s+is|describe|tell\s+me\s+about|who\s+is)\s+(?:the\s+)?(.+?)(?:\?|$)/i, handler: (model, match) => { const entity = findEntity(model, match[1].trim()); if (!entity) return null; const deps = findDependents(model, entity.id); const constraints = model.constraints.filter((c) => c.scope.includes(entity.id), ); const processes = model.processes.filter((p) => p.participants.includes(entity.id), ); const lines = [ `**${entity.name}** (${entity.type})`, entity.description, "", ]; if (entity.properties && Object.keys(entity.properties).length > 0) { lines.push(`Properties: ${JSON.stringify(entity.properties)}`); } if (deps.incoming.length > 0) { lines.push( `Depended on by: ${deps.incoming.map((d) => d.entity.name).join(", ")}`, ); } if (deps.outgoing.length > 0) { lines.push( `Depends on: ${deps.outgoing.map((d) => d.entity.name).join(", ")}`, ); } if (processes.length > 0) { lines.push( `Participates in: ${processes.map((p) => p.name).join(", ")}`, ); } if (constraints.length > 0) { lines.push( `Constraints: ${constraints.map((c) => `[${c.severity}] ${c.name}`).join(", ")}`, ); } return { answer: lines.join("\n"), method: "graph", entities_referenced: [ entity.name, ...deps.incoming.map((d) => d.entity.name), ...deps.outgoing.map((d) => d.entity.name), ], confidence: 1, }; }, }, ]; // ─── LLM inference query ────────────────────────────────────── function modelToContext(model: WorldModelType): string { const entities = model.entities .map((e) => `- ${e.name} (${e.type}): ${e.description}`) .join("\n"); const relations = model.relations .map((r) => { const src = model.entities.find((e) => e.id === r.source)?.name ?? r.source; const tgt = model.entities.find((e) => e.id === r.target)?.name ?? r.target; return `- ${src} —[${r.type}]→ ${tgt}: ${r.label}`; }) .join("\n"); const processes = model.processes .map((p) => { const steps = p.steps .map((s) => { const actor = s.actor ? (model.entities.find((e) => e.id === s.actor)?.name ?? "?") : "?"; return ` ${s.order}. ${actor}: ${s.action}`; }) .join("\n"); return `- ${p.name} (trigger: ${p.trigger ?? "n/a"}): ${p.description}\n${steps}\n Outcomes: ${p.outcomes.join(", ")}`; }) .join("\n"); const constraints = model.constraints .map((c) => { const scopeNames = c.scope .map((id) => model.entities.find((e) => e.id === id)?.name ?? id) .join(", "); return `- [${c.severity}] ${c.name} (applies to: ${scopeNames}): ${c.description}`; }) .join("\n"); return `# World Model: ${model.name}\n${model.description}\n\n## Entities (${model.entities.length})\n${entities}\n\n## Relations (${model.relations.length})\n${relations}\n\n## Processes (${model.processes.length})\n${processes}\n\n## Constraints (${model.constraints.length})\n${constraints}`; } const QUERY_SYSTEM_PROMPT = `You are a world-model query agent. You answer questions based STRICTLY on the world model provided. RULES: - Only use information present in the model — do not hallucinate or infer beyond what the model states - If the model doesn't contain enough information to answer, say so explicitly - Reference specific entities, relations, processes, and constraints by name - Be concise and direct - If the question asks about something not in the model, say "The model does not contain information about [X]"`; async function inferenceQuery( model: WorldModelType, question: string, ): Promise { const context = modelToContext(model); const userMessage = `${context}\n\n---\n\nQuestion: ${question}`; const answer = await callAgent(QUERY_SYSTEM_PROMPT, userMessage, { maxTokens: 4096, }); // Extract entity names that appear in the answer const referenced = model.entities .filter((e) => answer.toLowerCase().includes(e.name.toLowerCase())) .map((e) => e.name); return { answer, method: "inference", entities_referenced: referenced, confidence: 0.8, }; } // ─── Public API ─────────────────────────────────────────────── export async function queryWorldModel( model: WorldModelType, question: string, ): Promise { if (!question || !question.trim()) { return { answer: "No question provided.", method: "graph", entities_referenced: [], confidence: 1, }; } // Try deterministic graph queries first for (const { pattern, handler } of GRAPH_PATTERNS) { const match = question.match(pattern); if (match) { const result = handler(model, match); if (result) return result; // Pattern matched but handler returned null (entity not found) — fall through to inference } } // Fall back to LLM inference return inferenceQuery(model, question); } // === src/agents/refinement.ts === import { callAgentJSON } from "../utils/llm.js"; import type { WorldModelType } from "../schema/index.js"; import type { PipelineInput } from "../pipeline/index.js"; import type { RawExtraction } from "./extraction.js"; import { structuringAgent } from "./structuring.js"; import { validationAgent } from "./validation.js"; import { mergeWorldModels } from "../utils/merge.js"; const REFINEMENT_PROMPT = `You are a world-model refinement agent. You are given an EXISTING world model and NEW input. Your job is to extract ONLY what the new input adds, changes, or contradicts relative to the existing model. ## Existing World Model Summary: {existingModelSummary} ## Instructions: 1. Extract new entities NOT already in the existing model 2. Extract new relations between entities (new or existing) 3. Extract new processes or refinements to existing processes 4. Extract new constraints or modifications to existing ones 5. If the new input CONTRADICTS something in the existing model, extract the new version and note the contradiction in extraction_notes RULES: - DO NOT re-extract entities/relations that already exist unchanged - DO reference existing entity names exactly when creating new relations to them - Mark confidence based on how clearly the new input supports each extraction - Note in extraction_notes what was added vs what was modified - Output ONLY valid JSON matching the extraction schema`; function summarizeModel(model: WorldModelType): string { const entityList = model.entities .map((e) => ` - ${e.name} (${e.type}): ${e.description.slice(0, 100)}`) .join("\n"); const relationList = model.relations .map((r) => { const src = model.entities.find((e) => e.id === r.source)?.name ?? r.source; const tgt = model.entities.find((e) => e.id === r.target)?.name ?? r.target; return ` - ${src} —[${r.type}]→ ${tgt}`; }) .join("\n"); const processList = model.processes .map((p) => ` - ${p.name}: ${p.description.slice(0, 80)}`) .join("\n"); const constraintList = model.constraints .map((c) => ` - [${c.severity}] ${c.name}: ${c.description.slice(0, 80)}`) .join("\n"); return `Entities (${model.entities.length}):\n${entityList}\n\nRelations (${model.relations.length}):\n${relationList}\n\nProcesses (${model.processes.length}):\n${processList}\n\nConstraints (${model.constraints.length}):\n${constraintList}`; } export async function refineWorldModel( existingModel: WorldModelType, newInput: PipelineInput, options?: { onStageStart?: (name: string) => void; onStageEnd?: (name: string, ms: number) => void; }, ): Promise<{ worldModel: WorldModelType; delta: WorldModelType }> { const summary = summarizeModel(existingModel); const systemPrompt = REFINEMENT_PROMPT.replace( "{existingModelSummary}", summary, ); // Extract delta options?.onStageStart?.("refinement-extraction"); const start = Date.now(); const userMessage = `Given the existing world model above, analyze this NEW ${newInput.sourceType} input and extract only what's new or changed.\n\n---\n\n${newInput.raw}`; const deltaExtraction = await callAgentJSON( systemPrompt, userMessage, { maxTokens: 16384, }, ); options?.onStageEnd?.("refinement-extraction", Date.now() - start); // Structure the delta into a world model options?.onStageStart?.("refinement-structuring"); const structStart = Date.now(); const { worldModel: deltaModel } = await structuringAgent({ input: newInput, extraction: deltaExtraction, }); options?.onStageEnd?.("refinement-structuring", Date.now() - structStart); // Merge existing + delta options?.onStageStart?.("refinement-merge"); const mergeStart = Date.now(); const merged = mergeWorldModels(existingModel, deltaModel, { name: existingModel.name, description: existingModel.description, }); options?.onStageEnd?.("refinement-merge", Date.now() - mergeStart); // Validate the merged result options?.onStageStart?.("refinement-validation"); const valStart = Date.now(); const { worldModel: validatedModel } = await validationAgent({ input: newInput, worldModel: merged, }); options?.onStageEnd?.("refinement-validation", Date.now() - valStart); return { worldModel: validatedModel, delta: deltaModel }; } // === src/agents/second-pass.ts === import { callAgentJSON } from "../utils/llm.js"; import type { WorldModelType } from "../schema/index.js"; import type { PipelineInput } from "../pipeline/index.js"; import type { RawExtraction } from "./extraction.js"; import { validateExtraction } from "../schema/extraction.js"; const SECOND_PASS_PROMPT = `You are a world-model COMPLETENESS agent. You are given: 1. The original raw input 2. A world model that was already extracted from it Your job is to find everything the FIRST PASS MISSED. The first pass captures what's explicitly stated. You capture what's IMPLICIT. Look for: **Missing entities:** - Entities that must EXIST for the described processes to work, but were never named - Infrastructure entities (authentication, logging, error handling, networking) - Role entities implied by actions ("someone approves" → there's an Approver actor) - Data entities implied by processes ("saves to database" → there's a Record object) **Missing relations:** - Dependencies that are logically necessary but not stated - Hierarchical relations implied by context (if X contains Y, Y is part_of X) - Communication paths implied by processes (if A triggers B, A communicates_with B) **Missing processes:** - Error/failure paths (what happens when the main process fails?) - Setup/teardown processes (what must happen before/after the described flow?) - Maintenance processes (backups, migrations, updates) **Missing constraints:** - Physical/logical impossibilities not stated (can't be in two places, can't exceed capacity) - Temporal constraints implied by ordering (step 2 can't happen before step 1) - Authorization constraints implied by roles (only admins can X) - Data integrity constraints (required fields, unique identifiers, referential integrity) RULES: - ONLY extract what's NEW — do not re-extract entities/relations already in the model - Reference existing entity names exactly when creating relations to them - Every extraction must be JUSTIFIED by the input — no hallucination - Set confidence lower (0.3-0.7) since these are inferences, not direct extractions - In extraction_notes, explain WHY each new element was inferred - Output ONLY valid JSON matching the extraction schema`; function summarizeModelForPrompt(model: WorldModelType): string { const entities = model.entities .map((e) => `- ${e.name} (${e.type}): ${e.description}`) .join("\n"); const relations = model.relations .map((r) => { const src = model.entities.find((e) => e.id === r.source)?.name ?? r.source; const tgt = model.entities.find((e) => e.id === r.target)?.name ?? r.target; return `- ${src} —[${r.type}]→ ${tgt}: ${r.label}`; }) .join("\n"); const processes = model.processes .map((p) => { const steps = p.steps .map((s) => { const actor = s.actor ? (model.entities.find((e) => e.id === s.actor)?.name ?? s.actor) : "unknown"; return ` ${s.order}. ${actor}: ${s.action}`; }) .join("\n"); return `- ${p.name}: ${p.description}\n${steps}`; }) .join("\n"); const constraints = model.constraints .map((c) => `- [${c.severity}] ${c.name}: ${c.description}`) .join("\n"); return `ENTITIES (${model.entities.length}):\n${entities}\n\nRELATIONS (${model.relations.length}):\n${relations}\n\nPROCESSES (${model.processes.length}):\n${processes}\n\nCONSTRAINTS (${model.constraints.length}):\n${constraints}`; } export async function secondPassAgent( originalInput: PipelineInput, currentModel: WorldModelType, ): Promise { const modelSummary = summarizeModelForPrompt(currentModel); const userMessage = `## Original Input:\n${originalInput.raw}\n\n---\n\n## Already Extracted World Model:\n${modelSummary}\n\n---\n\nWhat did the first pass MISS? Extract only NEW entities, relations, processes, and constraints that are implicit in the input but not yet in the model.`; const rawResult = await callAgentJSON( SECOND_PASS_PROMPT, userMessage, { maxTokens: 16384, }, ); const { extraction, issues } = validateExtraction(rawResult); if (issues.length > 0) { process.stderr.write(` [second-pass validation] ${issues.join("; ")}\n`); } return extraction as unknown as RawExtraction; } // === src/agents/structuring.ts === import type { WorldModelType } from "../schema/index.js"; import type { PipelineInput } from "../pipeline/index.js"; import type { RawExtraction } from "./extraction.js"; import { genId } from "../utils/ids.js"; export interface StructuringOutput { input: PipelineInput; worldModel: WorldModelType; } export function structuringAgent(stageInput: { input: PipelineInput; extraction: RawExtraction; }): Promise { const { input, extraction } = stageInput; // Normalize entity types the LLM may return outside the enum const VALID_ENTITY_TYPES = new Set([ "actor", "object", "system", "concept", "location", "event", "group", "resource", ]); const ENTITY_TYPE_ALIASES: Record = { person: "actor", user: "actor", role: "actor", agent: "actor", organization: "group", org: "group", team: "group", company: "group", place: "location", area: "location", region: "location", service: "system", platform: "system", tool: "system", application: "system", app: "system", idea: "concept", principle: "concept", pattern: "concept", category: "concept", item: "object", thing: "object", product: "object", data: "resource", asset: "resource", file: "resource", document: "resource", incident: "event", action: "event", occurrence: "event", }; function normalizeEntityType( raw: string, ): WorldModelType["entities"][number]["type"] { const lower = raw.toLowerCase().trim(); if (VALID_ENTITY_TYPES.has(lower)) return lower as WorldModelType["entities"][number]["type"]; return (ENTITY_TYPE_ALIASES[lower] ?? "object") as WorldModelType["entities"][number]["type"]; } // Normalize relation types const VALID_RELATION_TYPES = new Set([ "has", "is_a", "part_of", "depends_on", "produces", "consumes", "controls", "communicates_with", "located_in", "triggers", "inherits", "contains", "uses", "flows_to", "opposes", "enables", "transforms", ]); function normalizeRelationType( raw: string, ): WorldModelType["relations"][number]["type"] { const lower = raw.toLowerCase().trim().replace(/ /g, "_"); if (VALID_RELATION_TYPES.has(lower)) return lower as WorldModelType["relations"][number]["type"]; return "uses" as WorldModelType["relations"][number]["type"]; } // Normalize constraint types const VALID_CONSTRAINT_TYPES = new Set([ "invariant", "rule", "boundary", "dependency", "capacity", "temporal", "authorization", ]); function normalizeConstraintType( raw: string, ): WorldModelType["constraints"][number]["type"] { const lower = raw.toLowerCase().trim().replace(/ /g, "_"); if (VALID_CONSTRAINT_TYPES.has(lower)) return lower as WorldModelType["constraints"][number]["type"]; return "rule" as WorldModelType["constraints"][number]["type"]; } // Build entity name → ID map const entityIdMap = new Map(); const entities = extraction.entities.map((e) => { const id = genId("ent"); entityIdMap.set(e.name, id); return { id, name: e.name, type: normalizeEntityType(e.type), description: e.description, properties: e.properties, tags: e.tags, }; }); const resolveEntityId = (name: string): string => { const existing = entityIdMap.get(name); if (existing) return existing; // Create a placeholder entity for unresolved references const id = genId("ent"); entityIdMap.set(name, id); entities.push({ id, name, type: "object", description: `Auto-created entity for unresolved reference: ${name}`, properties: undefined, tags: ["auto-created"], }); return id; }; const relations = extraction.relations.map((r) => ({ id: genId("rel"), type: normalizeRelationType(r.type), source: resolveEntityId(r.source), target: resolveEntityId(r.target), label: r.label, bidirectional: r.bidirectional, })); const processes = extraction.processes.map((p) => ({ id: genId("proc"), name: p.name, description: p.description, trigger: p.trigger, steps: p.steps.map((s, idx) => ({ order: s.order ?? idx + 1, action: s.action, actor: s.actor ? resolveEntityId(s.actor) : undefined, input: s.inputs?.map(resolveEntityId), output: s.outputs?.map(resolveEntityId), })), participants: p.participants.map(resolveEntityId), outcomes: p.outcomes, })); const constraints = extraction.constraints.map((c) => ({ id: genId("cstr"), name: c.name, type: normalizeConstraintType(c.type), description: c.description, scope: c.scope.map(resolveEntityId), severity: c.severity, })); const worldModel: WorldModelType = { id: genId("wm"), name: extraction.model_name || input.name || "Untitled World Model", description: extraction.model_description || "Extracted world model", version: "0.1.0", created_at: new Date().toISOString(), entities, relations, processes, constraints, metadata: { source_type: input.sourceType, source_summary: extraction.source_summary || "No summary", confidence: extraction.confidence ?? 0.5, extraction_notes: extraction.extraction_notes, }, }; return Promise.resolve({ input, worldModel }); } // === src/agents/validation.ts === import type { WorldModelType, ValidationResultType, ValidationIssueType, } from "../schema/index.js"; import type { PipelineInput } from "../pipeline/index.js"; export interface ValidationOutput { worldModel: WorldModelType; validation: ValidationResultType; } export function validationAgent(stageInput: { input: PipelineInput; worldModel: WorldModelType; }): Promise { const { worldModel } = stageInput; const issues: ValidationIssueType[] = []; const entityIds = new Set(worldModel.entities.map((e) => e.id)); // Check relations reference valid entities for (const rel of worldModel.relations) { if (!entityIds.has(rel.source)) { issues.push({ type: "error", code: "DANGLING_REL_SOURCE", message: `Relation "${rel.id}" references non-existent source entity "${rel.source}"`, path: `relations.${rel.id}.source`, }); } if (!entityIds.has(rel.target)) { issues.push({ type: "error", code: "DANGLING_REL_TARGET", message: `Relation "${rel.id}" references non-existent target entity "${rel.target}"`, path: `relations.${rel.id}.target`, }); } if (rel.source === rel.target) { issues.push({ type: "warning", code: "SELF_RELATION", message: `Relation "${rel.id}" is a self-reference on entity "${rel.source}"`, path: `relations.${rel.id}`, }); } } // Check processes reference valid entities for (const proc of worldModel.processes) { for (const participant of proc.participants) { if (!entityIds.has(participant)) { issues.push({ type: "error", code: "DANGLING_PROC_PARTICIPANT", message: `Process "${proc.name}" references non-existent participant "${participant}"`, path: `processes.${proc.id}.participants`, }); } } for (const step of proc.steps) { if (step.actor && !entityIds.has(step.actor)) { issues.push({ type: "error", code: "DANGLING_STEP_ACTOR", message: `Process "${proc.name}" step ${step.order} references non-existent actor "${step.actor}"`, path: `processes.${proc.id}.steps.${step.order}.actor`, }); } } if (proc.steps.length === 0) { issues.push({ type: "warning", code: "EMPTY_PROCESS", message: `Process "${proc.name}" has no steps`, path: `processes.${proc.id}.steps`, }); } } // Check constraints reference valid entities for (const constraint of worldModel.constraints) { for (const scopeId of constraint.scope) { if (!entityIds.has(scopeId)) { issues.push({ type: "error", code: "DANGLING_CONSTRAINT_SCOPE", message: `Constraint "${constraint.name}" references non-existent entity "${scopeId}"`, path: `constraints.${constraint.id}.scope`, }); } } } // Check for orphan entities (no relations, not in any process) const referencedEntities = new Set(); for (const rel of worldModel.relations) { referencedEntities.add(rel.source); referencedEntities.add(rel.target); } for (const proc of worldModel.processes) { for (const p of proc.participants) referencedEntities.add(p); } for (const constraint of worldModel.constraints) { for (const s of constraint.scope) referencedEntities.add(s); } for (const entity of worldModel.entities) { if (!referencedEntities.has(entity.id)) { issues.push({ type: "warning", code: "ORPHAN_ENTITY", message: `Entity "${entity.name}" (${entity.id}) is not referenced by any relation, process, or constraint`, path: `entities.${entity.id}`, }); } } // Check for duplicate entity names const nameCount = new Map(); for (const entity of worldModel.entities) { nameCount.set(entity.name, (nameCount.get(entity.name) ?? 0) + 1); } for (const [name, count] of nameCount) { if (count > 1) { issues.push({ type: "warning", code: "DUPLICATE_ENTITY_NAME", message: `Entity name "${name}" appears ${count} times — may indicate extraction duplication`, path: `entities`, }); } } // Completeness checks if (worldModel.entities.length === 0) { issues.push({ type: "error", code: "NO_ENTITIES", message: "World model has no entities", }); } if (worldModel.relations.length === 0) { issues.push({ type: "warning", code: "NO_RELATIONS", message: "World model has no relations — entities are unconnected", }); } const hasErrors = issues.some((i) => i.type === "error"); const validation: ValidationResultType = { valid: !hasErrors, issues, stats: { entities: worldModel.entities.length, relations: worldModel.relations.length, processes: worldModel.processes.length, constraints: worldModel.constraints.length, }, }; return Promise.resolve({ worldModel, validation }); } // === src/cli.ts === #!/usr/bin/env node import { program } from "commander"; import { readFileSync, writeFileSync, existsSync } from "node:fs"; import { resolve } from "node:path"; import chalk from "chalk"; import { stringify as yamlStringify } from "yaml"; import { buildWorldModel } from "./swm.js"; import { refineWorldModel } from "./agents/refinement.js"; import { mergeWorldModels, diffWorldModels } from "./utils/merge.js"; import { findEntity, findDependents, toMermaid, toDot, getStats, } from "./utils/graph.js"; import { queryWorldModel } from "./agents/query.js"; import { intersection, difference, overlay } from "./utils/algebra.js"; import { toClaudeMd } from "./export/claude-md.js"; import { toSystemPrompt } from "./export/system-prompt.js"; import { toMcpSchema } from "./export/mcp-schema.js"; import { createTimeline, addSnapshot, entityHistory, timelineSummary, } from "./utils/timeline.js"; import type { Timeline } from "./utils/timeline.js"; import type { PipelineInput } from "./pipeline/index.js"; import type { WorldModelType } from "./schema/index.js"; function detectSourceType(raw: string): PipelineInput["sourceType"] { if (raw.startsWith("http://") || raw.startsWith("https://")) return "url"; if ( raw.includes("function ") || raw.includes("class ") || raw.includes("import ") || raw.includes("def ") || raw.includes("fn ") ) return "code"; if (raw.includes("?") && raw.includes(":")) return "conversation"; return "text"; } function readInput(inputArg?: string, filePath?: string): string { if (filePath) return readFileSync(resolve(filePath), "utf-8"); if (inputArg) { try { return readFileSync(resolve(inputArg), "utf-8"); } catch { return inputArg; } } throw new Error( "No input provided. Pass text, a file path, or use -f .", ); } function readModel(path: string): WorldModelType { const raw = readFileSync(resolve(path), "utf-8"); return JSON.parse(raw) as WorldModelType; } function formatOutput( model: WorldModelType, format: string, pretty: boolean, ): string { if (format === "yaml") return yamlStringify(model); if (format === "mermaid") return toMermaid(model); if (format === "dot") return toDot(model); return pretty ? JSON.stringify(model, null, 2) : JSON.stringify(model); } function stageCallbacks(quiet?: boolean) { return { onStageStart: (name: string) => { if (!quiet) process.stderr.write(chalk.yellow(` ▸ ${name}...`)); }, onStageEnd: (_name: string, ms: number) => { if (!quiet) process.stderr.write(chalk.green(` done (${ms}ms)\n`)); }, }; } program .name("swm") .description( "Structured World Model — turn anything into a structured world model", ) .version("0.1.0"); // ─── model ──────────────────────────────────────────────────── program .command("model") .description("Build a structured world model from input") .argument("[input]", "Text input or file path") .option("-f, --file ", "Read input from file") .option("-o, --output ", "Write output to file") .option( "-t, --type ", "Source type: text, code, document, url, conversation, mixed", ) .option( "--format ", "Output format: json, yaml, mermaid, dot", "json", ) .option("--pretty", "Pretty-print JSON output", true) .option("--quiet", "Suppress progress output") .option( "-p, --passes ", "Number of extraction passes (1=standard, 2-3=deeper)", "1", ) .action( async ( inputArg: string | undefined, opts: Record, ) => { try { const raw = readInput(inputArg, opts.file as string | undefined); if (!raw.trim()) { console.error(chalk.red("Error: No input provided")); process.exit(1); } const sourceType = (opts.type as PipelineInput["sourceType"]) || detectSourceType(raw); const input: PipelineInput = { raw, sourceType, name: (opts.file as string) || (inputArg && inputArg.length < 100 ? inputArg : undefined), }; if (!opts.quiet) { console.error(chalk.blue("■ Structured World Model")); console.error( chalk.gray(` Source: ${sourceType} (${raw.length} chars)\n`), ); } const passes = parseInt((opts.passes as string) ?? "1", 10) || 1; const result = await buildWorldModel(input, { ...stageCallbacks(opts.quiet as boolean), passes, }); const output = formatOutput( result.worldModel, (opts.format as string) ?? "json", (opts.pretty as boolean) ?? true, ); if (opts.output) { writeFileSync(resolve(opts.output as string), output, "utf-8"); if (!opts.quiet) console.error(chalk.green(`\n ✓ Written to ${opts.output}`)); } else { console.log(output); } if (!opts.quiet) { const v = result.validation; const statusColor = v.valid ? chalk.green : chalk.red; console.error( statusColor( `\n Validation: ${v.valid ? "PASSED" : "FAILED"} — ${v.stats.entities} entities, ${v.stats.relations} relations, ${v.stats.processes} processes, ${v.stats.constraints} constraints`, ), ); if (v.issues.length > 0) { console.error(chalk.gray(` Issues:`)); for (const issue of v.issues) { const icon = issue.type === "error" ? chalk.red("✗") : issue.type === "warning" ? chalk.yellow("!") : chalk.blue("i"); console.error(` ${icon} ${issue.message}`); } } console.error(chalk.gray(`\n Total: ${result.totalDurationMs}ms`)); } } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── refine ─────────────────────────────────────────────────── program .command("refine") .description( "Refine an existing world model with new input (incremental extraction)", ) .argument("", "Path to existing world model JSON") .argument("[input]", "New text input or file path") .option("-f, --file ", "Read new input from file") .option("-o, --output ", "Write refined model to file") .option("-t, --type ", "Source type of new input") .option( "--format ", "Output format: json, yaml, mermaid, dot", "json", ) .option("--quiet", "Suppress progress output") .action( async ( modelPath: string, inputArg: string | undefined, opts: Record, ) => { try { const existing = readModel(modelPath); const raw = readInput(inputArg, opts.file as string | undefined); const sourceType = (opts.type as PipelineInput["sourceType"]) || detectSourceType(raw); if (!opts.quiet) { console.error(chalk.blue("■ Refining World Model")); console.error( chalk.gray( ` Existing: ${existing.entities.length} entities, ${existing.relations.length} relations`, ), ); console.error( chalk.gray(` New input: ${sourceType} (${raw.length} chars)\n`), ); } const { worldModel, delta } = await refineWorldModel( existing, { raw, sourceType }, stageCallbacks(opts.quiet as boolean), ); const output = formatOutput( worldModel, (opts.format as string) ?? "json", true, ); if (opts.output) { writeFileSync(resolve(opts.output as string), output, "utf-8"); if (!opts.quiet) console.error(chalk.green(`\n ✓ Written to ${opts.output}`)); } else { console.log(output); } if (!opts.quiet) { console.error( chalk.gray( `\n Delta: +${delta.entities.length} entities, +${delta.relations.length} relations, +${delta.processes.length} processes`, ), ); console.error( chalk.gray( ` Result: ${worldModel.entities.length} entities, ${worldModel.relations.length} relations total`, ), ); } } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── merge ──────────────────────────────────────────────────── program .command("merge") .description("Merge two world models into one") .argument("", "Path to first world model JSON") .argument("", "Path to second world model JSON") .option("-o, --output ", "Write merged model to file") .option("--format ", "Output format: json, yaml", "json") .action( ( pathA: string, pathB: string, opts: Record, ) => { try { const a = readModel(pathA); const b = readModel(pathB); const merged = mergeWorldModels(a, b); const output = formatOutput(merged, opts.format ?? "json", true); if (opts.output) { writeFileSync(resolve(opts.output), output, "utf-8"); console.error( chalk.green(`✓ Merged model written to ${opts.output}`), ); } else { console.log(output); } console.error( chalk.gray( ` ${merged.entities.length} entities, ${merged.relations.length} relations`, ), ); } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── diff ───────────────────────────────────────────────────── program .command("diff") .description("Diff two world models") .argument("", "Path to before world model JSON") .argument("", "Path to after world model JSON") .action((beforePath: string, afterPath: string) => { try { const before = readModel(beforePath); const after = readModel(afterPath); const diff = diffWorldModels(before, after); console.log(chalk.blue("■ World Model Diff\n")); console.log(chalk.white(` Summary: ${diff.summary}\n`)); if (diff.entities.added.length) { console.log(chalk.green(" + Entities added:")); for (const name of diff.entities.added) console.log(chalk.green(` + ${name}`)); } if (diff.entities.removed.length) { console.log(chalk.red(" - Entities removed:")); for (const name of diff.entities.removed) console.log(chalk.red(` - ${name}`)); } if (diff.entities.modified.length) { console.log(chalk.yellow(" ~ Entities modified:")); for (const name of diff.entities.modified) console.log(chalk.yellow(` ~ ${name}`)); } if (diff.relations.added.length) { console.log( chalk.green(` + ${diff.relations.added.length} relations added`), ); } if (diff.relations.removed.length) { console.log( chalk.red(` - ${diff.relations.removed.length} relations removed`), ); } if (diff.processes.added.length) { console.log( chalk.green(` + ${diff.processes.added.length} processes added`), ); } if (diff.constraints.added.length) { console.log( chalk.green(` + ${diff.constraints.added.length} constraints added`), ); } } catch (err) { console.error( chalk.red(`Error: ${err instanceof Error ? err.message : String(err)}`), ); process.exit(1); } }); // ─── inspect ────────────────────────────────────────────────── program .command("inspect") .description("Inspect a world model — stats, entity lookup, graph export") .argument("", "Path to world model JSON") .option( "-e, --entity ", "Look up a specific entity and show its relations", ) .option("--stats", "Show detailed statistics") .option("--format ", "Export format: mermaid, dot") .action( (modelPath: string, opts: Record) => { try { const model = readModel(modelPath); if (opts.format) { console.log(formatOutput(model, opts.format as string, true)); return; } if (opts.entity) { const entity = findEntity(model, opts.entity as string); if (!entity) { console.error(chalk.red(`Entity "${opts.entity}" not found`)); process.exit(1); } console.log( chalk.blue(`■ ${entity.name}`) + chalk.gray(` (${entity.type})`), ); console.log(chalk.white(` ${entity.description}`)); if (entity.properties) { console.log( chalk.gray(` Properties: ${JSON.stringify(entity.properties)}`), ); } const deps = findDependents(model, entity.id); if (deps.incoming.length) { console.log(chalk.gray("\n Incoming:")); for (const d of deps.incoming) { console.log( ` ${d.entity.name} —[${d.relation.type}]→ ${entity.name}`, ); } } if (deps.outgoing.length) { console.log(chalk.gray("\n Outgoing:")); for (const d of deps.outgoing) { console.log( ` ${entity.name} —[${d.relation.type}]→ ${d.entity.name}`, ); } } return; } // Default: show stats const stats = getStats(model); console.log(chalk.blue(`■ ${model.name}`)); console.log(chalk.gray(` ${model.description}\n`)); console.log(` Entities: ${stats.entities.total}`); for (const [type, count] of Object.entries(stats.entities.byType)) { console.log(chalk.gray(` ${type}: ${count}`)); } console.log(` Relations: ${stats.relations.total}`); for (const [type, count] of Object.entries(stats.relations.byType)) { console.log(chalk.gray(` ${type}: ${count}`)); } console.log( ` Processes: ${stats.processes.total} (${stats.processes.totalSteps} steps)`, ); console.log( ` Constraints: ${stats.constraints.total} (${stats.constraints.hard} hard, ${stats.constraints.soft} soft)`, ); console.log(` Confidence: ${stats.confidence}`); if (stats.mostConnected.length) { console.log(chalk.gray("\n Most connected:")); for (const mc of stats.mostConnected) { console.log(` ${mc.entity}: ${mc.connections} connections`); } } } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── validate ───────────────────────────────────────────────── program .command("validate") .description("Validate a world model JSON file with full integrity checks") .argument("", "Path to world model JSON") .action(async (file: string) => { try { const model = readModel(file); console.log(chalk.blue("■ Validating world model")); console.log( chalk.gray( ` ${model.entities.length} entities, ${model.relations.length} relations\n`, ), ); const { validationAgent } = await import("./agents/validation.js"); const { validation } = await validationAgent({ input: { raw: "", sourceType: "text" }, worldModel: model, }); const statusColor = validation.valid ? chalk.green : chalk.red; console.log( statusColor(` ${validation.valid ? "✓ VALID" : "✗ INVALID"}`), ); if (validation.issues.length > 0) { for (const issue of validation.issues) { const icon = issue.type === "error" ? chalk.red("✗") : issue.type === "warning" ? chalk.yellow("!") : chalk.blue("i"); console.log(` ${icon} [${issue.code}] ${issue.message}`); } } else { console.log(chalk.green(" No issues found")); } console.log( chalk.gray( `\n Stats: ${validation.stats.entities} entities, ${validation.stats.relations} relations, ${validation.stats.processes} processes, ${validation.stats.constraints} constraints`, ), ); } catch (err) { console.error( chalk.red(`Error: ${err instanceof Error ? err.message : String(err)}`), ); process.exit(1); } }); // ─── query ──────────────────────────────────────────────────── program .command("query") .description("Ask a question about a world model") .argument("", "Path to world model JSON") .argument("", "Natural language question") .option("--json", "Output result as JSON") .action( async ( modelPath: string, question: string, opts: Record, ) => { try { const model = readModel(modelPath); const result = await queryWorldModel(model, question); if (opts.json) { console.log(JSON.stringify(result, null, 2)); } else { console.log(result.answer); console.error( chalk.gray( `\n Method: ${result.method} | Confidence: ${result.confidence} | Entities: ${result.entities_referenced.join(", ") || "none"}`, ), ); } } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── algebra: intersect ─────────────────────────────────────── program .command("intersect") .description("Compute the intersection of two world models (shared entities)") .argument("", "Path to first world model JSON") .argument("", "Path to second world model JSON") .option("-o, --output ", "Write result to file") .action( ( pathA: string, pathB: string, opts: Record, ) => { try { const result = intersection(readModel(pathA), readModel(pathB)); const output = JSON.stringify(result, null, 2); if (opts.output) { writeFileSync(resolve(opts.output), output, "utf-8"); console.error(chalk.green(`✓ Written to ${opts.output}`)); } else { console.log(output); } console.error( chalk.gray( ` ${result.entities.length} shared entities, ${result.relations.length} shared relations`, ), ); } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── algebra: subtract ─────────────────────────────────────── program .command("subtract") .description("Compute A \\ B — entities in A that are not in B") .argument("", "Path to base world model JSON") .argument("", "Path to model to subtract") .option("-o, --output ", "Write result to file") .action( ( pathA: string, pathB: string, opts: Record, ) => { try { const result = difference(readModel(pathA), readModel(pathB)); const output = JSON.stringify(result, null, 2); if (opts.output) { writeFileSync(resolve(opts.output), output, "utf-8"); console.error(chalk.green(`✓ Written to ${opts.output}`)); } else { console.log(output); } console.error( chalk.gray(` ${result.entities.length} unique entities remaining`), ); } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── algebra: overlay ──────────────────────────────────────── program .command("overlay") .description( "Apply a lens model on top of a base model (constraints, relations overlay)", ) .argument("", "Path to base world model JSON") .argument("", "Path to lens model to overlay") .option("-o, --output ", "Write result to file") .action( ( basePath: string, lensPath: string, opts: Record, ) => { try { const result = overlay(readModel(basePath), readModel(lensPath)); const output = JSON.stringify(result, null, 2); if (opts.output) { writeFileSync(resolve(opts.output), output, "utf-8"); console.error(chalk.green(`✓ Written to ${opts.output}`)); } else { console.log(output); } console.error( chalk.gray( ` ${result.entities.length} entities, ${result.constraints.length} constraints after overlay`, ), ); } catch (err) { console.error( chalk.red( `Error: ${err instanceof Error ? err.message : String(err)}`, ), ); process.exit(1); } }, ); // ─── export ────────────────────────────────────────────────── program .command("export") .description("Export a world model as AI-consumable context") .argument("", "Path to world model JSON") .option( "--as ", "Export format: claude-md, system-prompt, mcp", "claude-md", ) .option("-o, --output ", "Write to file") .action((modelPath: string, opts: Record) => { try { const model = readModel(modelPath); let output: string; switch (opts.as) { case "claude-md": output = toClaudeMd(model); break; case "system-prompt": output = toSystemPrompt(model); break; case "mcp": output = JSON.stringify(toMcpSchema(model), null, 2); break; default: console.error( chalk.red( `Unknown export format: ${opts.as}. Use: claude-md, system-prompt, mcp`, ), ); process.exit(1); } if (opts.output) { writeFileSync(resolve(opts.output), output, "utf-8"); console.error( chalk.green(`✓ Exported as ${opts.as} to ${opts.output}`), ); } else { console.log(output); } } catch (err) { console.error( chalk.red(`Error: ${err instanceof Error ? err.message : String(err)}`), ); process.exit(1); } }); // ─── timeline: snapshot ─────────────────────────────────────── program .command("snapshot") .description("Add a world model as a snapshot to a timeline") .argument("", "Path to world model JSON") .option( "--timeline ", "Path to timeline JSON (created if missing)", "timeline.json", ) .option("-l, --label