diff --git a/docs-site/content/docs/cli-reference/ktx-ingest.mdx b/docs-site/content/docs/cli-reference/ktx-ingest.mdx index 722d9d87..9d94cd88 100644 --- a/docs-site/content/docs/cli-reference/ktx-ingest.mdx +++ b/docs-site/content/docs/cli-reference/ktx-ingest.mdx @@ -111,6 +111,41 @@ notion skipped skipped done done Use `--json` when a script or agent needs the selected plan and per-target results. +## Inspect source ingest traces + +Source ingest writes persistent JSONL traces for postmortem debugging. Plain +ingest output prints the trace path near the report, run, and job identifiers +when a trace is available: + +```text +Report: report-abc123 +Run: run-abc123 +Job: job-abc123 +Trace: .ktx/ingest-traces/job-abc123/trace.jsonl +``` + +The trace file lives under the project directory at +`.ktx/ingest-traces//trace.jsonl`. Each line is a JSON event with the +job id, run id, sync id, connection id, source key, phase, event name, timing, +state snapshot, decision context, and error details. Failed runs also write a +stored ingest report with `status: "failed"`, `failure.phase`, +`failure.message`, and the same trace path. + +Use `jq` or line-oriented tools to inspect a trace: + +```bash +jq -c '. | {at, level, phase, event, durationMs, data, error}' \ + .ktx/ingest-traces//trace.jsonl +``` + +KTX writes `debug` trace events by default. Set `KTX_INGEST_TRACE_LEVEL` to +`error`, `info`, `debug`, or `trace` before running ingest to change the trace +verbosity: + +```bash +KTX_INGEST_TRACE_LEVEL=trace ktx ingest metabase +``` + ## Common errors | Error | Cause | Recovery | diff --git a/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-core.md b/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-core.md new file mode 100644 index 00000000..89e8ed6c --- /dev/null +++ b/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-core.md @@ -0,0 +1,2938 @@ +# Isolated Diff Ingestion V1 Core Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add the first production isolated-diff ingestion path, with persistent +postmortem traces, final artifact gates, and Metabase regression coverage. + +**Architecture:** Keep the existing shared-worktree runner as the fallback path +while a private runner-owned source allowlist enables isolated diffs for +Metabase and tests. The isolated path creates one integration worktree, runs +optional deterministic projection there, executes each work unit in a child +worktree from the same ingestion base commit, collects binary Git patches, and +applies accepted patches back to the integration worktree in deterministic +order before reconciliation, final gates, and squash. Every ingestion step emits +structured JSONL trace events under `.ktx/ingest-traces//trace.jsonl` +and references that path in reports and CLI status output. + +**Tech Stack:** TypeScript ESM/NodeNext, simple-git, Node `fs/promises`, Vitest, +existing `GitService`, `SessionWorktreeService`, `IngestBundleRunner`, +`SemanticLayerService`, `KnowledgeWikiService`, and ingest report schemas. + +--- + +## Audit summary + +This audit read +`docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md`, searched +`docs/superpowers/plans/`, and inspected the current ingest runner under +`packages/context/src/ingest/`. + +No existing plan or implementation covers isolated-diff ingestion. Searches for +the exact implementation terms from the spec, including `git apply --3way`, +`--binary --no-renames`, `integration worktree`, `global semantic gate`, and +`wiki body reference`, returned no plan or code matches. Existing May 13 unified +ingest plans are implemented public CLI and UX work; May 15 Claude Code plans +cover LLM backend isolation, not ingestion diff isolation. + +Implemented foundations that this plan reuses: + +- `SessionWorktreeService` can create Git worktrees from a base SHA. +- `GitService` already supports `addWorktree`, `removeWorktree`, + `resetHardTo`, `diffNameStatus`, `assertWorktreeClean`, and + `squashMergeIntoMain`. +- `IngestBundleRunner` already stages raw snapshots in a session worktree, + chunks adapters into `WorkUnit[]`, runs WorkUnit and reconciliation agent + loops, records tool transcripts, writes reports, inserts provenance, and + squashes into main. +- `buildWuToolSet()` already withholds `sl_write_source` and `sl_edit_source` + for `slDisallowed` WorkUnits. + +Current gaps that block v1: + +- WorkUnits still run against one mutable session worktree. In + `ingest-bundle.runner.ts`, the runner creates one `sessionWorktree` and each + WorkUnit uses `sessionWorktree.workdir`, `sessionWorktree.config`, and + `sessionWorktree.git`. +- WorkUnits do not produce durable Git patch proposal artifacts. +- There is no artifact-aware patch integration layer using + `git apply --3way --index`. +- There is no integration rollback and structured conflict classification for + failed patch application or semantic gate failures. +- Deterministic imports run as post-processors after WorkUnits and + reconciliation, while the spec requires projection before child worktree + creation. +- Final gates do not validate wiki body inline-code references to semantic + layer entities or raw tables. +- Provenance insertion accepts unknown raw hashes instead of failing before + insertion. +- `slDisallowed` is enforced at tool construction only; there is no integration + patch rejection for `semantic-layer/**`. +- Existing progress events and tool transcripts are useful but not sufficient + persistent traces. They do not capture the input snapshot, every routing + decision, patch collection, patch application timing, gate timing, rollback + context, and final outcome in one inspectable trace file. + +Non-blocking gaps for this plan: + +- Migrating Notion, LookML, Looker, dbt, MetricFlow, and historic-SQL direct + durable writes to the isolated path. This plan enables the path privately for + Metabase and test fixtures. +- Promoting isolated diffs as the default for every connector. +- Removing the old shared-worktree WorkUnit path. +- Interactive, CLI, or agent-driven conflict resolution. +- Auto-merging semantic conflicts that cannot be proven correct. +- Transitive SQL-projection closure for semantic-layer dependency expansion. +- Moving provenance to worktree files. +- Public connector knobs such as `executionMode`, `planningStrategy`, or + `conflictPolicy`. + +## File structure + +- Create `packages/context/src/ingest/ingest-trace.ts`. + Owns persistent JSONL trace writing, trace timing helpers, error + serialization, and trace path construction. +- Create `packages/context/src/ingest/ingest-trace.test.ts`. + Covers JSONL trace persistence, timing events, error context, and path layout. +- Modify `packages/context/src/ingest/ports.ts`. + Adds trace storage and private isolated-diff settings. +- Modify `packages/context/src/ingest/local-bundle-runtime.ts`. + Stores traces under `.ktx/ingest-traces//trace.jsonl` and enables the + isolated path for Metabase. +- Modify `packages/context/src/ingest/reports.ts` and + `packages/context/src/ingest/report-snapshot.ts`. + Adds `tracePath` and isolated-diff outcome fields to reports. +- Modify `packages/cli/src/ingest.ts`. + Prints `Trace: ` in stored ingest status. +- Modify `packages/context/src/core/git.service.ts` and tests. + Adds binary patch collection, patch application, staged commit, and path + inspection helpers needed by patch integration. +- Create `packages/context/src/ingest/isolated-diff/git-patch.ts`. + Owns patch metadata parsing, path restrictions, mode-change checks, and + binary/text artifact rejection. +- Create `packages/context/src/ingest/isolated-diff/git-patch.test.ts`. + Covers path parsing, `slDisallowed`, text-artifact binary rejection, and + executable-mode rejection. +- Create `packages/context/src/ingest/wiki-body-refs.ts`. + Parses and validates explicit wiki body references. +- Create `packages/context/src/ingest/wiki-body-refs.test.ts`. + Covers the `source.entity`, `connectionId/source.entity`, + `source:source_name`, and `table:qualified_table_name` grammar. +- Create `packages/context/src/ingest/artifact-gates.ts`. + Runs WorkUnit-local and final global artifact gates for SL, wiki refs, + wiki `sl_refs`, wiki body refs, and provenance rows. +- Create `packages/context/src/ingest/artifact-gates.test.ts`. + Covers the stale `total_contract_arr_cents` incident and provenance raw-path + failure. +- Create `packages/context/src/ingest/isolated-diff/work-unit-executor.ts`. + Executes a WorkUnit inside a child worktree, records traces, persists + transcripts, runs local gates, collects its patch, and cleans up the child. +- Create `packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts`. + Covers child-worktree base SHA usage, patch collection, child cleanup, and + trace emission on success and failure. +- Create `packages/context/src/ingest/isolated-diff/patch-integrator.ts`. + Applies accepted WorkUnit patches into the integration worktree, commits each + accepted patch, rolls back on textual or semantic conflict, and records + trace events. +- Create `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`. + Covers clean patch integration, textual conflict rollback, semantic conflict + rollback, and `slDisallowed` rejection. +- Modify `packages/context/src/ingest/types.ts`. + Adds the optional `SourceAdapter.project()` hook for deterministic projection. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. + Adds the isolated-diff execution branch, final gates, trace lifecycle, and + report integration. +- Create `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Adds the known Metabase-style stale wiki reference regression, clean + different-page integration, textual conflict, hybrid projection, Notion-style + invalid `sl_refs`, and LookML-style `slDisallowed` rejection tests. +- Modify `packages/context/src/ingest/index.ts`. + Exports new trace, artifact gate, and isolated-diff testing types. + +--- + +### Task 1: Persistent ingestion trace sink + +**Files:** +- Create: `packages/context/src/ingest/ingest-trace.ts` +- Create: `packages/context/src/ingest/ingest-trace.test.ts` +- Modify: `packages/context/src/ingest/ports.ts` +- Modify: `packages/context/src/ingest/local-bundle-runtime.ts` +- Modify: `packages/context/src/ingest/reports.ts` +- Modify: `packages/context/src/ingest/report-snapshot.ts` +- Modify: `packages/cli/src/ingest.ts` + +- [ ] **Step 1: Write failing trace sink tests** + +Create `packages/context/src/ingest/ingest-trace.test.ts`: + +```ts +import { mkdtemp, readFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { FileIngestTraceWriter, ingestTracePathForJob, traceTimed } from './ingest-trace.js'; + +describe('FileIngestTraceWriter', () => { + it('persists structured trace events as JSONL', async () => { + const root = await mkdtemp(join(tmpdir(), 'ktx-trace-')); + const tracePath = ingestTracePathForJob(root, 'job-1'); + const trace = new FileIngestTraceWriter({ + tracePath, + jobId: 'job-1', + connectionId: 'metabase-main', + sourceKey: 'metabase', + level: 'debug', + }); + + await trace.event('debug', 'snapshot', 'input_snapshot', { + baseSha: 'abc123', + rawFileCount: 2, + diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 }, + }); + + const lines = (await readFile(tracePath, 'utf-8')).trim().split('\n').map((line) => JSON.parse(line)); + expect(lines).toHaveLength(1); + expect(lines[0]).toMatchObject({ + schemaVersion: 1, + jobId: 'job-1', + connectionId: 'metabase-main', + sourceKey: 'metabase', + level: 'debug', + phase: 'snapshot', + event: 'input_snapshot', + data: { + baseSha: 'abc123', + rawFileCount: 2, + diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 }, + }, + }); + expect(typeof lines[0].at).toBe('string'); + }); + + it('records timing and error context for postmortem inspection', async () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-05-17T12:00:00.000Z')); + const root = await mkdtemp(join(tmpdir(), 'ktx-trace-')); + const tracePath = ingestTracePathForJob(root, 'job-2'); + const trace = new FileIngestTraceWriter({ + tracePath, + jobId: 'job-2', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + await expect( + traceTimed(trace, 'integration', 'apply_patch', { unitKey: 'wu-1' }, async () => { + vi.advanceTimersByTime(17); + throw new Error('patch conflict'); + }), + ).rejects.toThrow('patch conflict'); + + const lines = (await readFile(tracePath, 'utf-8')).trim().split('\n').map((line) => JSON.parse(line)); + expect(lines.map((line) => line.event)).toEqual(['apply_patch_started', 'apply_patch_failed']); + expect(lines[1]).toMatchObject({ + level: 'error', + phase: 'integration', + data: { unitKey: 'wu-1' }, + error: { name: 'Error', message: 'patch conflict' }, + }); + expect(lines[1].durationMs).toBe(17); + vi.useRealTimers(); + }); + + it('uses the documented trace path layout', () => { + expect(ingestTracePathForJob('/project/.ktx', 'job-3')).toBe('/project/.ktx/ingest-traces/job-3/trace.jsonl'); + }); +}); +``` + +- [ ] **Step 2: Run the failing trace sink tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-trace.test.ts +``` + +Expected: FAIL because `packages/context/src/ingest/ingest-trace.ts` does not +exist. + +- [ ] **Step 3: Add the trace sink implementation** + +Create `packages/context/src/ingest/ingest-trace.ts`: + +```ts +import { appendFile, mkdir } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { performance } from 'node:perf_hooks'; + +export type IngestTraceLevel = 'info' | 'debug' | 'trace' | 'error'; + +const TRACE_LEVEL_RANK: Record = { + error: 0, + info: 1, + debug: 2, + trace: 3, +}; + +export interface IngestTraceContext { + tracePath: string; + jobId: string; + connectionId: string; + sourceKey: string; + runId?: string; + syncId?: string; + level?: IngestTraceLevel; +} + +export interface IngestTraceEvent { + schemaVersion: 1; + at: string; + level: IngestTraceLevel; + jobId: string; + connectionId: string; + sourceKey: string; + runId?: string; + syncId?: string; + phase: string; + event: string; + durationMs?: number; + data?: Record; + error?: { + name: string; + message: string; + stack?: string; + }; +} + +export interface IngestTraceWriter { + readonly tracePath: string; + readonly context: IngestTraceContext; + withContext(context: Partial>): IngestTraceWriter; + event( + level: IngestTraceLevel, + phase: string, + event: string, + data?: Record, + error?: unknown, + durationMs?: number, + ): Promise; +} + +export function ingestTracePathForJob(homeDir: string, jobId: string): string { + return join(homeDir, 'ingest-traces', jobId, 'trace.jsonl'); +} + +function serializeError(error: unknown): IngestTraceEvent['error'] | undefined { + if (error === undefined || error === null) { + return undefined; + } + if (error instanceof Error) { + return { + name: error.name, + message: error.message, + ...(error.stack ? { stack: error.stack } : {}), + }; + } + return { name: 'Error', message: String(error) }; +} + +function shouldWrite(configured: IngestTraceLevel, incoming: IngestTraceLevel): boolean { + return TRACE_LEVEL_RANK[incoming] <= TRACE_LEVEL_RANK[configured]; +} + +export class FileIngestTraceWriter implements IngestTraceWriter { + readonly tracePath: string; + readonly context: IngestTraceContext; + + constructor(context: IngestTraceContext) { + this.context = { ...context, level: context.level ?? 'debug' }; + this.tracePath = context.tracePath; + } + + withContext(context: Partial>): IngestTraceWriter { + return new FileIngestTraceWriter({ ...this.context, ...context, tracePath: this.tracePath }); + } + + async event( + level: IngestTraceLevel, + phase: string, + event: string, + data?: Record, + error?: unknown, + durationMs?: number, + ): Promise { + if (!shouldWrite(this.context.level ?? 'debug', level)) { + return; + } + const payload: IngestTraceEvent = { + schemaVersion: 1, + at: new Date().toISOString(), + level, + jobId: this.context.jobId, + connectionId: this.context.connectionId, + sourceKey: this.context.sourceKey, + ...(this.context.runId ? { runId: this.context.runId } : {}), + ...(this.context.syncId ? { syncId: this.context.syncId } : {}), + phase, + event, + ...(durationMs !== undefined ? { durationMs } : {}), + ...(data ? { data } : {}), + ...(serializeError(error) ? { error: serializeError(error) } : {}), + }; + await mkdir(dirname(this.tracePath), { recursive: true }); + await appendFile(this.tracePath, `${JSON.stringify(payload)}\n`, 'utf-8'); + } +} + +export class NoopIngestTraceWriter implements IngestTraceWriter { + readonly tracePath = ''; + readonly context: IngestTraceContext = { + tracePath: '', + jobId: '', + connectionId: '', + sourceKey: '', + level: 'error', + }; + + withContext(): IngestTraceWriter { + return this; + } + + async event(): Promise {} +} + +export async function traceTimed( + trace: IngestTraceWriter, + phase: string, + event: string, + data: Record, + fn: () => Promise, +): Promise { + await trace.event('debug', phase, `${event}_started`, data); + const started = performance.now(); + try { + const result = await fn(); + await trace.event('debug', phase, `${event}_finished`, data, undefined, performance.now() - started); + return result; + } catch (error) { + await trace.event('error', phase, `${event}_failed`, data, error, performance.now() - started); + throw error; + } +} +``` + +- [ ] **Step 4: Add trace storage and report fields** + +In `packages/context/src/ingest/ports.ts`, import `IngestTraceLevel`: + +```ts +import type { IngestTraceLevel } from './ingest-trace.js'; +``` + +Then extend `IngestSettingsPort` and `IngestStoragePort`: + +```ts +export interface IngestSettingsPort { + memoryIngestionModel: string; + probeRowCount: number; + workUnitMaxConcurrency?: number; + workUnitStepBudget?: number; + workUnitFailureMode?: 'abort' | 'continue'; + isolatedDiffSourceKeys?: string[]; + ingestTraceLevel?: IngestTraceLevel; +} + +export interface IngestStoragePort { + homeDir: string; + systemGitAuthor: IngestGitAuthor; + resolveUploadDir(uploadId: string): string; + resolvePullDir(jobId: string): string; + resolveTranscriptDir(jobId: string): string; + resolveTracePath(jobId: string): string; +} +``` + +In `packages/context/src/ingest/local-bundle-runtime.ts`, import +`ingestTracePathForJob`: + +```ts +import { ingestTracePathForJob } from './ingest-trace.js'; +``` + +Then add the storage method: + +```ts + resolveTracePath(jobId: string): string { + return ingestTracePathForJob(this.homeDir, jobId); + } +``` + +When creating the runner settings in `createLocalBundleIngestRuntime()`, set: + +```ts + settings: { + memoryIngestionModel: options.memoryModel ?? project.config.llm.memoryIngestionModel, + probeRowCount: project.config.ai.slValidation.probeRowCount, + workUnitMaxConcurrency: project.config.ingest.workUnitMaxConcurrency, + workUnitStepBudget: project.config.ingest.workUnitStepBudget, + workUnitFailureMode: project.config.ingest.workUnitFailureMode, + isolatedDiffSourceKeys: ['metabase'], + ingestTraceLevel: 'debug', + }, +``` + +In `packages/context/src/ingest/reports.ts`, add report fields: + +```ts +export interface IngestReportBody { + syncId: string; + diffSummary: IngestDiffSummary; + fetch?: SourceFetchReport; + commitSha: string | null; + tracePath?: string; + isolatedDiff?: { + enabled: boolean; + integrationWorktreePath?: string; + ingestionBaseSha?: string; + projectionSha?: string | null; + acceptedPatches: number; + textualConflicts: number; + semanticConflicts: number; + }; + workUnits: IngestReportWorkUnit[]; + failedWorkUnits: string[]; + reconciliationSkipped: boolean; + reconciliationActions?: MemoryAction[]; + conflictsResolved: ConflictResolvedRecord[]; + evictionsApplied: EvictionAppliedRecord[]; + unmappedFallbacks: UnmappedFallbackRecord[]; + artifactResolutions?: ArtifactResolutionRecord[]; + evictionInputs: string[]; + unresolvedCards: UnresolvedCardInfo[]; + supersededBy: string | null; + overrideOf: string | null; + provenanceRows: IngestReportProvenanceDetail[]; + toolTranscripts: IngestReportToolTranscriptSummary[]; + postProcessor?: IngestReportPostProcessorOutcome; + wikiSlRefRepairs?: WikiSlRefRepair[]; + wikiSlRefRepairWarnings?: string[]; + memoryFlow?: MemoryFlowReplayInput; +} +``` + +In `packages/context/src/ingest/report-snapshot.ts`, add this schema inside +`body`: + +```ts + tracePath: z.string().optional(), + isolatedDiff: z + .object({ + enabled: z.boolean(), + integrationWorktreePath: z.string().optional(), + ingestionBaseSha: z.string().optional(), + projectionSha: z.string().nullable().optional(), + acceptedPatches: z.number().int().min(0), + textualConflicts: z.number().int().min(0), + semanticConflicts: z.number().int().min(0), + }) + .optional(), +``` + +In `packages/cli/src/ingest.ts`, update `writeReportStatus()`: + +```ts + if (report.body.tracePath) { + io.stdout.write(`Trace: ${report.body.tracePath}\n`); + } +``` + +Place it after the `Job:` line so a failed run's trace path is visible near the +run identifiers. + +- [ ] **Step 5: Run trace sink tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-trace.test.ts src/ingest/report-snapshot.test.ts +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add packages/context/src/ingest/ingest-trace.ts \ + packages/context/src/ingest/ingest-trace.test.ts \ + packages/context/src/ingest/ports.ts \ + packages/context/src/ingest/local-bundle-runtime.ts \ + packages/context/src/ingest/reports.ts \ + packages/context/src/ingest/report-snapshot.ts \ + packages/cli/src/ingest.ts +git commit -m "feat: persist ingest trace events" +``` + +--- + +### Task 2: Git patch contract helpers + +**Files:** +- Modify: `packages/context/src/core/git.service.ts` +- Create: `packages/context/src/core/git.service.patch.test.ts` +- Create: `packages/context/src/ingest/isolated-diff/git-patch.ts` +- Create: `packages/context/src/ingest/isolated-diff/git-patch.test.ts` + +- [ ] **Step 1: Write failing GitService patch tests** + +Create `packages/context/src/core/git.service.patch.test.ts`: + +```ts +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { GitService } from './git.service.js'; + +async function makeGit() { + const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-')); + const configDir = join(homeDir, 'config'); + const git = new GitService({ + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'init', + bootstrapAuthor: 'system', + bootstrapAuthorEmail: 'system@example.com', + }, + }); + await git.onModuleInit(); + return { homeDir, configDir, git }; +} + +describe('GitService patch helpers', () => { + it('collects binary-safe no-rename patches and applies them with --3way --index', async () => { + const { homeDir, configDir, git } = await makeGit(); + await mkdir(join(configDir, 'wiki/global'), { recursive: true }); + await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n'); + await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com'); + const base = await git.revParseHead(); + + await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n'); + await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'proposal.patch'); + await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath); + + const targetDir = join(homeDir, 'target'); + await git.addWorktree(targetDir, 'target', base); + const targetGit = git.forWorktree(targetDir); + await targetGit.applyPatchFile3WayIndex(patchPath); + await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com'); + + await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n'); + }); +}); +``` + +- [ ] **Step 2: Run failing GitService patch tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/core/git.service.patch.test.ts +``` + +Expected: FAIL because `writeBinaryNoRenamePatch`, `applyPatchFile3WayIndex`, +and `commitStaged` are missing. + +- [ ] **Step 3: Add GitService patch helpers** + +At the top of `packages/context/src/core/git.service.ts`, change: + +```ts +import { join } from 'node:path'; +``` + +to: + +```ts +import { dirname, join } from 'node:path'; +``` + +Then add these methods to the `GitService` class: + +```ts + async writeBinaryNoRenamePatch(from: string, to: string, patchPath: string): Promise { + await this.withMutationQueue(async () => { + const patch = await this.git.raw(['diff', '--binary', '--no-renames', `${from}..${to}`]); + await fs.mkdir(dirname(patchPath), { recursive: true }); + await fs.writeFile(patchPath, patch, 'utf-8'); + }); + } + + async applyPatchFile3WayIndex(patchPath: string): Promise { + await this.withMutationQueue(async () => { + await this.git.raw(['apply', '--3way', '--index', patchPath]); + }); + } + + async commitStaged(commitMessage: string, author: string, authorEmail: string): Promise { + return this.withMutationQueue(async () => { + const stagedChanges = await this.git.diff(['--cached', '--name-only']); + if (!stagedChanges.trim()) { + const head = (await this.git.revparse(['HEAD'])).trim(); + const log = await this.git.log({ maxCount: 1 }); + const latest = log.latest; + return { + commitHash: head, + shortHash: head.substring(0, 7), + message: latest?.message ?? '', + author: latest?.author_name ?? '', + authorEmail: latest?.author_email ?? '', + timestamp: latest?.date ?? new Date(0).toISOString(), + committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date(0).toISOString(), + created: false, + }; + } + await this.git.commit(commitMessage, { '--author': `${author} <${authorEmail}>` }); + const head = (await this.git.revparse(['HEAD'])).trim(); + const log = await this.git.log({ maxCount: 1 }); + const latest = log.latest; + return { + commitHash: head, + shortHash: head.substring(0, 7), + message: latest?.message ?? commitMessage, + author: latest?.author_name ?? author, + authorEmail: latest?.author_email ?? authorEmail, + timestamp: latest?.date ?? new Date().toISOString(), + committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date().toISOString(), + created: true, + }; + }); + } +``` + +- [ ] **Step 4: Write failing patch contract tests** + +Create `packages/context/src/ingest/isolated-diff/git-patch.test.ts`: + +```ts +import { describe, expect, it } from 'vitest'; +import { + assertPatchAllowedForWorkUnit, + parsePatchTouchedPaths, + textArtifactRoots, +} from './git-patch.js'; + +describe('isolated diff patch contract', () => { + it('parses touched paths from no-rename git patches', () => { + const patch = [ + 'diff --git a/wiki/global/a.md b/wiki/global/a.md', + 'index 1111111..2222222 100644', + '--- a/wiki/global/a.md', + '+++ b/wiki/global/a.md', + '@@ -1 +1 @@', + '-old', + '+new', + 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml', + 'new file mode 100644', + '--- /dev/null', + '+++ b/semantic-layer/c1/orders.yaml', + '@@ -0,0 +1 @@', + '+name: orders', + '', + ].join('\n'); + + expect(parsePatchTouchedPaths(patch)).toEqual([ + { path: 'wiki/global/a.md', oldPath: 'wiki/global/a.md', newPath: 'wiki/global/a.md', mode: '100644', binary: false }, + { + path: 'semantic-layer/c1/orders.yaml', + oldPath: 'semantic-layer/c1/orders.yaml', + newPath: 'semantic-layer/c1/orders.yaml', + mode: '100644', + binary: false, + }, + ]); + }); + + it('rejects semantic-layer paths for slDisallowed work units', () => { + const patch = 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml\nindex 1..2 100644\n'; + + expect(() => + assertPatchAllowedForWorkUnit({ + unitKey: 'lookml-mismatch', + patch, + slDisallowed: true, + }), + ).toThrow(/slDisallowed WorkUnit lookml-mismatch touched semantic-layer\/c1\/orders.yaml/); + }); + + it('rejects executable and binary changes under known text artifact roots', () => { + expect(textArtifactRoots).toEqual(['wiki/', 'semantic-layer/']); + + const executablePatch = + 'diff --git a/wiki/global/a.md b/wiki/global/a.md\nold mode 100644\nnew mode 100755\nindex 1..2\n'; + expect(() => + assertPatchAllowedForWorkUnit({ + unitKey: 'wu-1', + patch: executablePatch, + slDisallowed: false, + }), + ).toThrow(/unexpected executable mode under wiki\/global\/a.md/); + + const binaryPatch = [ + 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml', + 'index 1111111..2222222 100644', + 'GIT binary patch', + 'literal 0', + '', + ].join('\n'); + expect(() => + assertPatchAllowedForWorkUnit({ + unitKey: 'wu-2', + patch: binaryPatch, + slDisallowed: false, + }), + ).toThrow(/unexpected binary patch under semantic-layer\/c1\/orders.yaml/); + }); +}); +``` + +- [ ] **Step 5: Add patch contract helpers** + +Create `packages/context/src/ingest/isolated-diff/git-patch.ts`: + +```ts +export const textArtifactRoots = ['wiki/', 'semantic-layer/'] as const; + +export interface PatchTouchedPath { + path: string; + oldPath: string; + newPath: string; + mode: string | null; + binary: boolean; +} + +export interface PatchPolicyInput { + unitKey: string; + patch: string; + slDisallowed: boolean; +} + +function stripPrefix(path: string): string { + return path.replace(/^[ab]\//, ''); +} + +function isTextArtifactPath(path: string): boolean { + return textArtifactRoots.some((root) => path.startsWith(root)); +} + +export function parsePatchTouchedPaths(patch: string): PatchTouchedPath[] { + const lines = patch.split('\n'); + const entries: PatchTouchedPath[] = []; + let current: PatchTouchedPath | null = null; + + const pushCurrent = () => { + if (current) { + entries.push(current); + } + }; + + for (const line of lines) { + const diffMatch = /^diff --git (.+) (.+)$/.exec(line); + if (diffMatch) { + pushCurrent(); + const oldPath = stripPrefix(diffMatch[1] ?? ''); + const newPath = stripPrefix(diffMatch[2] ?? ''); + current = { + path: newPath === '/dev/null' ? oldPath : newPath, + oldPath, + newPath, + mode: null, + binary: false, + }; + continue; + } + if (!current) { + continue; + } + const indexMode = /^index [0-9a-f]+\.\.[0-9a-f]+(?: [0-7]{6})?$/.exec(line); + if (indexMode && line.includes(' ')) { + current.mode = line.split(' ').at(-1) ?? current.mode; + } + const newMode = /^new mode ([0-7]{6})$/.exec(line); + if (newMode) { + current.mode = newMode[1] ?? current.mode; + } + if (line === 'GIT binary patch' || line.startsWith('Binary files ')) { + current.binary = true; + } + } + + pushCurrent(); + return entries; +} + +export function assertPatchAllowedForWorkUnit(input: PatchPolicyInput): PatchTouchedPath[] { + const touched = parsePatchTouchedPaths(input.patch); + for (const entry of touched) { + if (input.slDisallowed && entry.path.startsWith('semantic-layer/')) { + throw new Error(`slDisallowed WorkUnit ${input.unitKey} touched ${entry.path}`); + } + if (!isTextArtifactPath(entry.path)) { + continue; + } + if (entry.binary) { + throw new Error(`unexpected binary patch under ${entry.path}`); + } + if (entry.mode && entry.mode !== '100644') { + throw new Error(`unexpected executable mode under ${entry.path}: ${entry.mode}`); + } + } + return touched; +} +``` + +- [ ] **Step 6: Run patch helper tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/core/git.service.patch.test.ts src/ingest/isolated-diff/git-patch.test.ts +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add packages/context/src/core/git.service.ts \ + packages/context/src/core/git.service.patch.test.ts \ + packages/context/src/ingest/isolated-diff/git-patch.ts \ + packages/context/src/ingest/isolated-diff/git-patch.test.ts +git commit -m "feat: add isolated ingest patch helpers" +``` + +--- + +### Task 3: Wiki body reference parser and validator + +**Files:** +- Create: `packages/context/src/ingest/wiki-body-refs.ts` +- Create: `packages/context/src/ingest/wiki-body-refs.test.ts` + +- [ ] **Step 1: Write failing wiki body reference tests** + +Create `packages/context/src/ingest/wiki-body-refs.test.ts`: + +```ts +import { describe, expect, it } from 'vitest'; +import { findInvalidWikiBodyRefs, parseWikiBodyRefs } from './wiki-body-refs.js'; + +const sources = [ + { + name: 'mart_account_segments', + grain: ['account_id'], + columns: [{ name: 'account_id', type: 'string' }, { name: 'segment', type: 'string' }], + joins: [], + measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }], + segments: [{ name: 'enterprise', expr: "segment = 'enterprise'" }], + table: 'analytics.mart_account_segments', + }, +]; + +describe('wiki body refs', () => { + it('parses only explicit inline-code body references outside fenced blocks', () => { + const body = [ + 'Valid `mart_account_segments.total_contract_arr` and `source:mart_account_segments`.', + 'Also `warehouse/mart_account_segments.segment` and `table:analytics.mart_account_segments`.', + 'Ignore prose mart_account_segments.total_contract_arr_cents.', + 'Ignore `single_token`.', + '```sql', + 'select `mart_account_segments.total_contract_arr_cents`', + '```', + ].join('\n'); + + expect(parseWikiBodyRefs(body)).toEqual([ + { kind: 'sl_entity', connectionId: null, sourceName: 'mart_account_segments', entityName: 'total_contract_arr' }, + { kind: 'sl_source', connectionId: null, sourceName: 'mart_account_segments' }, + { kind: 'sl_entity', connectionId: 'warehouse', sourceName: 'mart_account_segments', entityName: 'segment' }, + { kind: 'table', connectionId: null, tableRef: 'analytics.mart_account_segments' }, + ]); + }); + + it('rejects stale inline-code semantic-layer references', async () => { + const invalid = await findInvalidWikiBodyRefs({ + pageKey: 'account-segments', + body: 'ARR is documented as `mart_account_segments.total_contract_arr_cents`.', + visibleConnectionIds: ['warehouse'], + loadSources: async () => sources, + tableExists: async () => true, + }); + + expect(invalid).toEqual([ + 'account-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents', + ]); + }); + + it('validates source, dimension, segment, measure, and table references', async () => { + const invalid = await findInvalidWikiBodyRefs({ + pageKey: 'account-segments', + body: [ + '`mart_account_segments.total_contract_arr`', + '`mart_account_segments.segment`', + '`mart_account_segments.enterprise`', + '`source:mart_account_segments`', + '`table:analytics.mart_account_segments`', + ].join('\n'), + visibleConnectionIds: ['warehouse'], + loadSources: async () => sources, + tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments', + }); + + expect(invalid).toEqual([]); + }); +}); +``` + +- [ ] **Step 2: Run failing wiki body reference tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/wiki-body-refs.test.ts +``` + +Expected: FAIL because `wiki-body-refs.ts` does not exist. + +- [ ] **Step 3: Add parser and validator** + +Create `packages/context/src/ingest/wiki-body-refs.ts`: + +```ts +import type { SemanticLayerSource } from '../sl/index.js'; + +export type WikiBodyRef = + | { kind: 'sl_entity'; connectionId: string | null; sourceName: string; entityName: string } + | { kind: 'sl_source'; connectionId: string | null; sourceName: string } + | { kind: 'table'; connectionId: string | null; tableRef: string }; + +export interface WikiBodyRefValidationInput { + pageKey: string; + body: string; + visibleConnectionIds: string[]; + loadSources(connectionId: string): Promise; + tableExists(connectionId: string, tableRef: string): Promise; +} + +const inlineCodePattern = /`([^`\n]+)`/g; + +function visibleLinesOutsideFences(body: string): string[] { + const lines: string[] = []; + let fenced = false; + for (const line of body.split('\n')) { + if (/^\s*```/.test(line)) { + fenced = !fenced; + continue; + } + if (!fenced) { + lines.push(line); + } + } + return lines; +} + +function parseConnectionScoped(value: string): { connectionId: string | null; body: string } { + const slash = value.indexOf('/'); + if (slash <= 0) { + return { connectionId: null, body: value }; + } + return { connectionId: value.slice(0, slash), body: value.slice(slash + 1) }; +} + +export function parseWikiBodyRefs(body: string): WikiBodyRef[] { + const refs: WikiBodyRef[] = []; + for (const line of visibleLinesOutsideFences(body)) { + for (const match of line.matchAll(inlineCodePattern)) { + const token = (match[1] ?? '').trim(); + if (!token) { + continue; + } + const scoped = parseConnectionScoped(token); + if (scoped.body.startsWith('source:')) { + const sourceName = scoped.body.slice('source:'.length).trim(); + if (sourceName) { + refs.push({ kind: 'sl_source', connectionId: scoped.connectionId, sourceName }); + } + continue; + } + if (scoped.body.startsWith('table:')) { + const tableRef = scoped.body.slice('table:'.length).trim(); + if (tableRef) { + refs.push({ kind: 'table', connectionId: scoped.connectionId, tableRef }); + } + continue; + } + const parts = scoped.body.split('.'); + if (parts.length === 2 && parts[0] && parts[1]) { + refs.push({ + kind: 'sl_entity', + connectionId: scoped.connectionId, + sourceName: parts[0], + entityName: parts[1], + }); + } + } + } + return refs; +} + +function entityNames(source: SemanticLayerSource): Set { + return new Set([ + ...(source.measures ?? []).map((measure) => measure.name), + ...(source.columns ?? []).map((column) => column.name), + ...(source.segments ?? []).map((segment) => segment.name), + ]); +} + +export async function findInvalidWikiBodyRefs(input: WikiBodyRefValidationInput): Promise { + const errors: string[] = []; + const sourceCache = new Map(); + const loadSources = async (connectionId: string): Promise => { + const cached = sourceCache.get(connectionId); + if (cached) { + return cached; + } + const sources = await input.loadSources(connectionId); + sourceCache.set(connectionId, sources); + return sources; + }; + + for (const ref of parseWikiBodyRefs(input.body)) { + const connectionIds = ref.connectionId ? [ref.connectionId] : input.visibleConnectionIds; + if (ref.kind === 'table') { + const found = await Promise.all(connectionIds.map((connectionId) => input.tableExists(connectionId, ref.tableRef))); + if (!found.some(Boolean)) { + errors.push(`${input.pageKey}: unknown raw table ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.tableRef}`); + } + continue; + } + + let source: SemanticLayerSource | undefined; + for (const connectionId of connectionIds) { + source = (await loadSources(connectionId)).find((candidate) => candidate.name === ref.sourceName); + if (source) { + break; + } + } + if (!source) { + errors.push(`${input.pageKey}: unknown semantic-layer source ${ref.sourceName}`); + continue; + } + if (ref.kind === 'sl_entity' && !entityNames(source).has(ref.entityName)) { + errors.push(`${input.pageKey}: unknown semantic-layer entity ${ref.sourceName}.${ref.entityName}`); + } + } + + return errors; +} +``` + +- [ ] **Step 4: Run wiki body reference tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/wiki-body-refs.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/context/src/ingest/wiki-body-refs.ts \ + packages/context/src/ingest/wiki-body-refs.test.ts +git commit -m "feat: validate wiki body semantic references" +``` + +--- + +### Task 4: Artifact gates and provenance validation + +**Files:** +- Create: `packages/context/src/ingest/artifact-gates.ts` +- Create: `packages/context/src/ingest/artifact-gates.test.ts` + +- [ ] **Step 1: Write failing artifact gate tests** + +Create `packages/context/src/ingest/artifact-gates.test.ts`: + +```ts +import { describe, expect, it, vi } from 'vitest'; +import { validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js'; + +describe('artifact gates', () => { + it('fails the final tree when wiki body references a stale semantic-layer measure', async () => { + const wikiService = { + readPage: vi.fn().mockResolvedValue({ + pageKey: 'account-segments', + frontmatter: { + summary: 'Account segments', + usage_mode: 'auto', + sl_refs: ['mart_account_segments'], + }, + content: 'ARR is `mart_account_segments.total_contract_arr_cents`.', + }), + }; + const semanticLayerService = { + loadAllSources: vi.fn().mockResolvedValue({ + sources: [ + { + name: 'mart_account_segments', + grain: ['account_id'], + columns: [{ name: 'account_id', type: 'string' }], + joins: [], + measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }], + table: 'analytics.mart_account_segments', + }, + ], + loadErrors: [], + }), + }; + + await expect( + validateFinalIngestArtifacts({ + connectionIds: ['warehouse'], + changedWikiPageKeys: ['account-segments'], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }], + wikiService: wikiService as never, + semanticLayerService: semanticLayerService as never, + validateTouchedSources: async () => ({ invalidSources: [], validSources: ['mart_account_segments'] }), + tableExists: async () => true, + }), + ).rejects.toThrow(/unknown semantic-layer entity mart_account_segments\.total_contract_arr_cents/); + }); + + it('fails before provenance insertion when a raw path cannot be tied to the current snapshot or eviction set', () => { + expect(() => + validateProvenanceRawPaths({ + rows: [{ rawPath: 'cards/missing.json' }], + currentRawPaths: new Set(['cards/present.json']), + deletedRawPaths: new Set(['cards/deleted.json']), + }), + ).toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/); + }); +}); +``` + +- [ ] **Step 2: Run failing artifact gate tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/artifact-gates.test.ts +``` + +Expected: FAIL because `artifact-gates.ts` does not exist. + +- [ ] **Step 3: Add artifact gate implementation** + +Create `packages/context/src/ingest/artifact-gates.ts`: + +```ts +import type { SemanticLayerService } from '../sl/index.js'; +import type { TouchedSlSource } from '../tools/index.js'; +import type { KnowledgeWikiService } from '../wiki/index.js'; +import { findInvalidWikiBodyRefs } from './wiki-body-refs.js'; + +export interface TouchedValidationResult { + invalidSources: string[]; + validSources: string[]; +} + +export interface FinalArtifactGateInput { + connectionIds: string[]; + changedWikiPageKeys: string[]; + touchedSlSources: TouchedSlSource[]; + wikiService: KnowledgeWikiService; + semanticLayerService: SemanticLayerService; + validateTouchedSources(touched: TouchedSlSource[]): Promise; + tableExists(connectionId: string, tableRef: string): Promise; +} + +export interface ProvenanceRawPathValidationInput { + rows: Array<{ rawPath: string }>; + currentRawPaths: Set; + deletedRawPaths: Set; +} + +function bareSlRef(ref: string): string { + const withoutConnection = ref.includes('/') ? ref.slice(ref.indexOf('/') + 1) : ref; + return withoutConnection.split('.')[0] ?? withoutConnection; +} + +async function validateWikiSlRefs(input: FinalArtifactGateInput): Promise { + const errors: string[] = []; + const sourcesByConnection = new Map>(); + for (const connectionId of input.connectionIds) { + const { sources } = await input.semanticLayerService.loadAllSources(connectionId); + sourcesByConnection.set(connectionId, new Set(sources.map((source) => source.name))); + } + + for (const pageKey of input.changedWikiPageKeys) { + const page = await input.wikiService.readPage('GLOBAL', null, pageKey); + if (!page) { + continue; + } + for (const ref of page.frontmatter.sl_refs ?? []) { + const sourceName = bareSlRef(ref); + const connectionId = ref.includes('/') ? ref.slice(0, ref.indexOf('/')) : null; + const sourceSets = connectionId ? [sourcesByConnection.get(connectionId)] : [...sourcesByConnection.values()]; + if (!sourceSets.some((set) => set?.has(sourceName))) { + errors.push(`${pageKey}: unknown sl_refs entry ${ref}`); + } + } + } + return errors; +} + +export async function validateFinalIngestArtifacts(input: FinalArtifactGateInput): Promise { + const validation = await input.validateTouchedSources(input.touchedSlSources); + const errors: string[] = validation.invalidSources.map((source) => `semantic-layer validation failed for ${source}`); + errors.push(...(await validateWikiSlRefs(input))); + + for (const pageKey of input.changedWikiPageKeys) { + const page = await input.wikiService.readPage('GLOBAL', null, pageKey); + if (!page) { + continue; + } + errors.push( + ...(await findInvalidWikiBodyRefs({ + pageKey, + body: page.content, + visibleConnectionIds: input.connectionIds, + loadSources: async (connectionId) => { + const { sources } = await input.semanticLayerService.loadAllSources(connectionId); + return sources; + }, + tableExists: input.tableExists, + })), + ); + } + + if (errors.length > 0) { + throw new Error(`final artifact gates failed:\n${errors.join('\n')}`); + } +} + +export function validateProvenanceRawPaths(input: ProvenanceRawPathValidationInput): void { + for (const row of input.rows) { + if (!input.currentRawPaths.has(row.rawPath) && !input.deletedRawPaths.has(row.rawPath)) { + throw new Error(`provenance row references raw path outside this snapshot: ${row.rawPath}`); + } + } +} +``` + +- [ ] **Step 4: Run artifact gate tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/artifact-gates.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/context/src/ingest/artifact-gates.ts \ + packages/context/src/ingest/artifact-gates.test.ts +git commit -m "feat: add final ingest artifact gates" +``` + +--- + +### Task 5: Isolated WorkUnit executor + +**Files:** +- Create: `packages/context/src/ingest/isolated-diff/work-unit-executor.ts` +- Create: `packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts` +- Modify: `packages/context/src/ingest/stages/stage-3-work-units.ts` + +- [ ] **Step 1: Write failing isolated WorkUnit executor tests** + +Create `packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts`: + +```ts +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { GitService } from '../../core/index.js'; +import { FileIngestTraceWriter } from '../ingest-trace.js'; +import { runIsolatedWorkUnit } from './work-unit-executor.js'; + +async function makeGit() { + const homeDir = await mkdtemp(join(tmpdir(), 'ktx-isolated-wu-')); + const configDir = join(homeDir, 'config'); + const git = new GitService({ + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'init', + bootstrapAuthor: 'system', + bootstrapAuthorEmail: 'system@example.com', + }, + }); + await git.onModuleInit(); + await mkdir(join(configDir, 'raw-sources/c1/fake/s'), { recursive: true }); + await writeFile(join(configDir, 'raw-sources/c1/fake/s/a.json'), '{}\n'); + await git.commitFiles(['raw-sources/c1/fake/s/a.json'], 'raw snapshot', 'System User', 'system@example.com'); + return { homeDir, configDir, git, baseSha: await git.revParseHead() }; +} + +describe('runIsolatedWorkUnit', () => { + it('creates a child worktree at the ingestion base and persists a patch proposal', async () => { + const { homeDir, git, baseSha } = await makeGit(); + const childDir = join(homeDir, '.worktrees/session-job-1-wu-1'); + const childGit = git.forWorktree(childDir); + const sessionWorktreeService = { + create: vi.fn(async (_key: string, startSha: string) => { + await mkdir(join(homeDir, '.worktrees'), { recursive: true }); + await git.addWorktree(childDir, 'session/job-1-wu-1', startSha); + return { chatId: 'job-1-wu-1', workdir: childDir, branch: 'session/job-1-wu-1', baseSha: startSha, createdAt: new Date(), git: childGit, config: {} }; + }), + cleanup: vi.fn(async () => undefined), + }; + const tracePath = join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'); + const trace = new FileIngestTraceWriter({ + tracePath, + jobId: 'job-1', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await runIsolatedWorkUnit({ + unitIndex: 0, + ingestionBaseSha: baseSha, + sessionWorktreeService: sessionWorktreeService as never, + patchDir: join(homeDir, '.ktx/ingest-patches/job-1'), + trace, + run: async (child) => { + await mkdir(join(child.workdir, 'wiki/global'), { recursive: true }); + await writeFile(join(child.workdir, 'wiki/global/a.md'), '---\nsummary: A\nusage_mode: auto\n---\n\nBody\n'); + await child.git.commitFiles(['wiki/global/a.md'], 'test: write wiki', 'KTX Test', 'system@ktx.local'); + return { + unitKey: 'wu-1', + status: 'success', + preSha: baseSha, + postSha: await child.git.revParseHead(), + actions: [{ target: 'wiki', type: 'created', key: 'a', detail: 'A' }], + touchedSlSources: [], + }; + }, + workUnit: { unitKey: 'wu-1', rawFiles: ['a.json'], peerFileIndex: [], dependencyPaths: [] }, + }); + + expect(sessionWorktreeService.create).toHaveBeenCalledWith('job-1-wu-1', baseSha); + expect(sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success'); + expect(result.status).toBe('success'); + expect(result.patchPath).toContain('0000-wu-1.patch'); + await expect(readFile(result.patchPath, 'utf-8')).resolves.toContain('wiki/global/a.md'); + await expect(readFile(tracePath, 'utf-8')).resolves.toContain('work_unit_child_created'); + }); +}); +``` + +- [ ] **Step 2: Run failing isolated WorkUnit executor tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/work-unit-executor.test.ts +``` + +Expected: FAIL because `work-unit-executor.ts` does not exist. + +- [ ] **Step 3: Add patch metadata to WorkUnitOutcome** + +In `packages/context/src/ingest/stages/stage-3-work-units.ts`, extend +`WorkUnitOutcome`: + +```ts +export interface WorkUnitOutcome { + unitKey: string; + status: 'success' | 'failed'; + reason?: string; + preSha: string; + postSha: string; + actions: MemoryAction[]; + touchedSlSources: TouchedSlSource[]; + slDisallowed?: boolean; + slDisallowedReason?: 'lookml_connection_mismatch'; + patchPath?: string; + patchTouchedPaths?: string[]; + childWorktreePath?: string; +} +``` + +- [ ] **Step 4: Add isolated WorkUnit executor** + +Create `packages/context/src/ingest/isolated-diff/work-unit-executor.ts`: + +```ts +import { mkdir, readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { SessionOutcome } from '../../core/index.js'; +import type { IngestSessionWorktree, IngestSessionWorktreePort } from '../ports.js'; +import type { WorkUnit } from '../types.js'; +import type { IngestTraceWriter } from '../ingest-trace.js'; +import type { WorkUnitOutcome } from '../stages/stage-3-work-units.js'; +import { assertPatchAllowedForWorkUnit } from './git-patch.js'; + +export interface RunIsolatedWorkUnitInput { + unitIndex: number; + ingestionBaseSha: string; + sessionWorktreeService: IngestSessionWorktreePort; + patchDir: string; + trace: IngestTraceWriter; + workUnit: WorkUnit; + run(child: IngestSessionWorktree): Promise; +} + +function patchFileName(unitIndex: number, unitKey: string): string { + const safeKey = unitKey.replace(/[^a-zA-Z0-9_.-]+/g, '-'); + return `${String(unitIndex).padStart(4, '0')}-${safeKey}.patch`; +} + +export async function runIsolatedWorkUnit(input: RunIsolatedWorkUnitInput): Promise { + const sessionKey = `${input.trace.context.jobId}-${input.workUnit.unitKey}`; + let cleanupOutcome: SessionOutcome = 'crash'; + const child = await input.sessionWorktreeService.create(sessionKey, input.ingestionBaseSha); + await input.trace.event('debug', 'work_unit', 'work_unit_child_created', { + unitKey: input.workUnit.unitKey, + unitIndex: input.unitIndex, + worktreePath: child.workdir, + baseSha: input.ingestionBaseSha, + }); + + try { + const outcome = await input.run(child); + if (outcome.status !== 'success') { + cleanupOutcome = 'crash'; + await input.trace.event('error', 'work_unit', 'work_unit_failed_before_patch', { + unitKey: input.workUnit.unitKey, + reason: outcome.reason ?? 'unknown failure', + }); + return { ...outcome, childWorktreePath: child.workdir }; + } + + await mkdir(input.patchDir, { recursive: true }); + const patchPath = join(input.patchDir, patchFileName(input.unitIndex, input.workUnit.unitKey)); + await child.git.writeBinaryNoRenamePatch(input.ingestionBaseSha, 'HEAD', patchPath); + const patch = await readFile(patchPath, 'utf-8'); + const touched = assertPatchAllowedForWorkUnit({ + unitKey: input.workUnit.unitKey, + patch, + slDisallowed: input.workUnit.slDisallowed === true, + }); + cleanupOutcome = 'success'; + await input.trace.event('debug', 'work_unit', 'work_unit_patch_collected', { + unitKey: input.workUnit.unitKey, + patchPath, + touchedPaths: touched.map((entry) => entry.path), + patchBytes: Buffer.byteLength(patch), + }); + return { + ...outcome, + patchPath, + patchTouchedPaths: touched.map((entry) => entry.path), + childWorktreePath: child.workdir, + }; + } catch (error) { + cleanupOutcome = 'crash'; + await input.trace.event( + 'error', + 'work_unit', + 'work_unit_child_failed', + { unitKey: input.workUnit.unitKey, worktreePath: child.workdir }, + error, + ); + throw error; + } finally { + await input.sessionWorktreeService.cleanup(child, cleanupOutcome); + await input.trace.event('trace', 'work_unit', 'work_unit_child_cleanup', { + unitKey: input.workUnit.unitKey, + outcome: cleanupOutcome, + worktreePath: child.workdir, + }); + } +} +``` + +- [ ] **Step 5: Run isolated WorkUnit executor tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/work-unit-executor.test.ts src/ingest/stages/stage-3-work-units.test.ts +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add packages/context/src/ingest/isolated-diff/work-unit-executor.ts \ + packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts \ + packages/context/src/ingest/stages/stage-3-work-units.ts +git commit -m "feat: execute ingest work units in child worktrees" +``` + +--- + +### Task 6: Patch integration and rollback + +**Files:** +- Create: `packages/context/src/ingest/isolated-diff/patch-integrator.ts` +- Create: `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts` + +- [ ] **Step 1: Write failing patch integrator tests** + +Create `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`: + +```ts +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { GitService } from '../../core/index.js'; +import { FileIngestTraceWriter } from '../ingest-trace.js'; +import { integrateWorkUnitPatch } from './patch-integrator.js'; + +async function makeRepo() { + const homeDir = await mkdtemp(join(tmpdir(), 'ktx-integrate-')); + const configDir = join(homeDir, 'config'); + const git = new GitService({ + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'init', + bootstrapAuthor: 'system', + bootstrapAuthorEmail: 'system@example.com', + }, + }); + await git.onModuleInit(); + await mkdir(join(configDir, 'wiki/global'), { recursive: true }); + await writeFile(join(configDir, 'wiki/global/a.md'), 'old\n'); + await git.commitFiles(['wiki/global/a.md'], 'base', 'System User', 'system@example.com'); + return { homeDir, configDir, git, baseSha: await git.revParseHead() }; +} + +describe('integrateWorkUnitPatch', () => { + it('applies a clean patch, runs semantic gates, and commits accepted changes', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child'); + await git.addWorktree(childDir, 'child', baseSha); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'new\n'); + await childGit.commitFiles(['wiki/global/a.md'], 'edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/wu.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'), + jobId: 'job-1', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-1', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockResolvedValue(undefined), + slDisallowed: false, + }); + + expect(result.status).toBe('accepted'); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('new\n'); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_apply_finished'); + }); + + it('rolls back and classifies semantic conflicts', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child-semantic'); + await git.addWorktree(childDir, 'child-semantic', baseSha); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'bad\n'); + await childGit.commitFiles(['wiki/global/a.md'], 'bad edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/bad.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-2/trace.jsonl'), + jobId: 'job-2', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-bad', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockRejectedValue(new Error('final artifact gates failed')), + slDisallowed: false, + }); + + expect(result.status).toBe('semantic_conflict'); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('old\n'); + }); +}); +``` + +- [ ] **Step 2: Run failing patch integrator tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts +``` + +Expected: FAIL because `patch-integrator.ts` does not exist. + +- [ ] **Step 3: Add patch integrator** + +Create `packages/context/src/ingest/isolated-diff/patch-integrator.ts`: + +```ts +import { readFile } from 'node:fs/promises'; +import type { GitService } from '../../core/index.js'; +import type { IngestTraceWriter } from '../ingest-trace.js'; +import { traceTimed } from '../ingest-trace.js'; +import { assertPatchAllowedForWorkUnit } from './git-patch.js'; + +export type PatchIntegrationResult = + | { status: 'accepted'; commitSha: string; touchedPaths: string[] } + | { status: 'textual_conflict'; reason: string; touchedPaths: string[] } + | { status: 'semantic_conflict'; reason: string; touchedPaths: string[] }; + +export interface IntegrateWorkUnitPatchInput { + unitKey: string; + patchPath: string; + integrationGit: GitService; + trace: IngestTraceWriter; + author: { name: string; email: string }; + slDisallowed: boolean; + validateAppliedTree(touchedPaths: string[]): Promise; +} + +export async function integrateWorkUnitPatch(input: IntegrateWorkUnitPatchInput): Promise { + const preApplyHead = await input.integrationGit.revParseHead(); + const patch = await readFile(input.patchPath, 'utf-8'); + const touched = assertPatchAllowedForWorkUnit({ + unitKey: input.unitKey, + patch, + slDisallowed: input.slDisallowed, + }); + const touchedPaths = touched.map((entry) => entry.path); + + try { + await traceTimed(input.trace, 'integration', 'patch_apply', { unitKey: input.unitKey, patchPath: input.patchPath, touchedPaths }, async () => { + await input.integrationGit.applyPatchFile3WayIndex(input.patchPath); + await input.integrationGit.assertWorktreeClean(); + }); + } catch (error) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + await input.trace.event('error', 'integration', 'patch_textual_conflict', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason: error instanceof Error ? error.message : String(error), + }); + return { + status: 'textual_conflict', + reason: error instanceof Error ? error.message : String(error), + touchedPaths, + }; + } + + try { + await traceTimed(input.trace, 'integration', 'semantic_gate', { unitKey: input.unitKey, touchedPaths }, async () => { + await input.validateAppliedTree(touchedPaths); + }); + } catch (error) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + await input.trace.event('error', 'integration', 'patch_semantic_conflict', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason: error instanceof Error ? error.message : String(error), + }); + return { + status: 'semantic_conflict', + reason: error instanceof Error ? error.message : String(error), + touchedPaths, + }; + } + + const commit = await input.integrationGit.commitStaged( + `ingest: accept WorkUnit ${input.unitKey}`, + input.author.name, + input.author.email, + ); + await input.trace.event('debug', 'integration', 'patch_accepted', { + unitKey: input.unitKey, + commitSha: commit.commitHash, + touchedPaths, + }); + return { status: 'accepted', commitSha: commit.commitHash, touchedPaths }; +} +``` + +- [ ] **Step 4: Run patch integrator tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/context/src/ingest/isolated-diff/patch-integrator.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.test.ts +git commit -m "feat: integrate isolated work unit patches" +``` + +--- + +### Task 7: Runner-owned isolated-diff execution path + +**Files:** +- Modify: `packages/context/src/ingest/types.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Modify: `packages/context/src/ingest/index.ts` + +- [ ] **Step 1: Add deterministic projection hook to SourceAdapter** + +In `packages/context/src/ingest/types.ts`, add these interfaces before +`SourceAdapter`: + +```ts +export interface DeterministicProjectionContext { + connectionId: string; + sourceKey: string; + syncId: string; + jobId: string; + runId: string; + stagedDir: string; + workdir: string; + parseArtifacts?: unknown; +} + +export interface ProjectionResult { + warnings: string[]; + errors: string[]; + touchedSources: Array<{ connectionId: string; sourceName: string }>; + changedWikiPageKeys: string[]; + result?: unknown; +} +``` + +Then add the optional adapter method: + +```ts + project?(ctx: DeterministicProjectionContext): Promise; +``` + +Keep existing adapter fields unchanged. + +- [ ] **Step 2: Add isolated-diff exports** + +In `packages/context/src/ingest/index.ts`, export the new modules: + +```ts +export * from './ingest-trace.js'; +export * from './artifact-gates.js'; +export * from './wiki-body-refs.js'; +export * from './isolated-diff/git-patch.js'; +export * from './isolated-diff/work-unit-executor.js'; +export * from './isolated-diff/patch-integrator.js'; +``` + +- [ ] **Step 3: Refactor shared runner helpers** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, add imports: + +```ts +import { validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js'; +import { FileIngestTraceWriter, type IngestTraceWriter, traceTimed } from './ingest-trace.js'; +import { integrateWorkUnitPatch } from './isolated-diff/patch-integrator.js'; +import { runIsolatedWorkUnit } from './isolated-diff/work-unit-executor.js'; +``` + +Add these private helpers inside `IngestBundleRunner`: + +```ts + private isIsolatedDiffEnabled(sourceKey: string): boolean { + return (this.deps.settings.isolatedDiffSourceKeys ?? []).includes(sourceKey); + } + + private createTrace(job: IngestBundleJob): IngestTraceWriter { + return new FileIngestTraceWriter({ + tracePath: this.deps.storage.resolveTracePath(job.jobId), + jobId: job.jobId, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + level: this.deps.settings.ingestTraceLevel ?? 'debug', + }); + } + + private wikiPageKeysFromPaths(paths: string[]): string[] { + return [ + ...new Set( + paths + .filter((path) => path.startsWith('wiki/global/') && path.endsWith('.md')) + .map((path) => path.slice('wiki/global/'.length, -'.md'.length)), + ), + ].sort(); + } + + private touchedSlSourcesFromPaths(paths: string[]): TouchedSlSource[] { + return paths + .filter((path) => path.startsWith('semantic-layer/') && path.endsWith('.yaml') && !path.includes('/_schema/')) + .map((path) => { + const [, connectionId, fileName] = path.split('/'); + return { connectionId: connectionId ?? '', sourceName: (fileName ?? '').replace(/\.yaml$/, '') }; + }) + .filter((source) => source.connectionId.length > 0 && source.sourceName.length > 0); + } +``` + +- [ ] **Step 4: Add isolated branch after planning** + +In `runInner()`, create the trace immediately after `syncId`: + +```ts + const trace = this.createTrace(job); + await trace.event('info', 'run', 'ingest_started', { + trigger: job.trigger, + bundleRefKind: job.bundleRef.kind, + }); +``` + +After `runs.create()`, bind run and sync context: + +```ts + const runTrace = trace.withContext({ runId: runRow.id, syncId }); + await runTrace.event('debug', 'snapshot', 'input_snapshot', { + baseSha, + stagedDir, + rawFileCount: currentHashes.size, + rawDirInWorktree, + diffSummary, + scopeFingerprint: scopeDescriptor?.fingerprint ?? null, + }); +``` + +After `workUnits` are planned and `stageIndex` is initialized, branch: + +```ts + const isolatedDiffEnabled = !overrideReport && this.isIsolatedDiffEnabled(job.sourceKey); + const isolatedDiffSummary = { + enabled: isolatedDiffEnabled, + integrationWorktreePath: isolatedDiffEnabled ? sessionWorktree.workdir : undefined, + ingestionBaseSha: undefined as string | undefined, + projectionSha: null as string | null, + acceptedPatches: 0, + textualConflicts: 0, + semanticConflicts: 0, + }; +``` + +Replace only the current `if (!overrideReport) { ...run work units... }` block +with a two-path branch: + +```ts + if (!overrideReport && isolatedDiffEnabled) { + await runTrace.event('info', 'routing', 'isolated_diff_enabled', { + sourceKey: job.sourceKey, + workUnitCount: workUnits.length, + integrationWorktreePath: sessionWorktree.workdir, + }); + + let projectionTouchedSources: TouchedSlSource[] = []; + let projectionChangedWikiPageKeys: string[] = []; + if (adapter.project) { + const projection = await traceTimed( + runTrace, + 'projection', + 'deterministic_projection', + { sourceKey: job.sourceKey }, + () => + adapter.project!({ + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + jobId: job.jobId, + runId: runRow.id, + stagedDir, + workdir: sessionWorktree.workdir, + parseArtifacts, + }), + ); + if (projection.errors.length > 0) { + await this.deps.runs.markFailed(runRow.id); + throw new Error(`deterministic projection failed: ${projection.errors.join('; ')}`); + } + projectionTouchedSources = projection.touchedSources; + projectionChangedWikiPageKeys = projection.changedWikiPageKeys; + const projectionCommit = await sessionWorktree.git.commitStaged( + `ingest(${job.sourceKey}): deterministic projection syncId=${syncId}`, + this.deps.storage.systemGitAuthor.name, + this.deps.storage.systemGitAuthor.email, + ); + isolatedDiffSummary.projectionSha = projectionCommit.created ? projectionCommit.commitHash : null; + } + + const ingestionBaseSha = await sessionWorktree.git.revParseHead(); + isolatedDiffSummary.ingestionBaseSha = ingestionBaseSha; + const patchDir = join(this.deps.storage.homeDir, 'ingest-patches', job.jobId); + const workUnitSettings = { + maxConcurrency: this.deps.settings.workUnitMaxConcurrency ?? 1, + stepBudget: this.deps.settings.workUnitStepBudget ?? 40, + failureMode: this.deps.settings.workUnitFailureMode ?? 'continue', + }; + const limitWorkUnit = pLimit(workUnitSettings.maxConcurrency); + const workUnitOutcomesByIndex: WorkUnitOutcome[] = []; + let completedWorkUnits = 0; + + await Promise.all( + workUnits.map((wu, index) => + limitWorkUnit(async () => { + const outcome = await runIsolatedWorkUnit({ + unitIndex: index, + ingestionBaseSha, + sessionWorktreeService: this.deps.sessionWorktreeService, + patchDir, + trace: runTrace, + workUnit: wu, + run: async (child) => { + const scopedWikiService = this.deps.wikiService.forWorktree(child.workdir); + const scopedSemanticLayerService = this.deps.semanticLayerService.forWorktree(child.workdir); + return this.runWorkUnitInWorktree({ + job, + wu, + worktree: child, + stagedDir, + contextReport, + ingestToolMetadata, + slConnectionIds, + wikiIndex, + slIndex, + priorProvenance: await this.deps.provenance.findLatestArtifactsForRawPaths( + job.connectionId, + job.sourceKey, + wu.rawFiles, + ), + scopedWikiService, + scopedSemanticLayerService, + baseFraming, + skillsPrompt, + canonicalPins, + workUnitSettings, + transcriptDir, + transcriptSummaries, + recordTranscriptEntry, + stageIndex, + currentTableExists: (tableRef) => + this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef), + onStepFinish: ({ stepIndex, stepBudget }) => { + memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget }); + }, + }); + }, + }); + workUnitOutcomesByIndex[index] = outcome; + memoryFlow?.emit({ + type: 'work_unit_finished', + unitKey: outcome.unitKey, + status: outcome.status, + ...(outcome.reason ? { reason: outcome.reason } : {}), + }); + completedWorkUnits += 1; + await stage3?.updateProgress( + completedWorkUnits / workUnits.length, + `${completedWorkUnits} of ${workUnits.length} work units complete`, + ); + }), + ), + ); + + workUnitOutcomes.push(...workUnitOutcomesByIndex.filter((outcome): outcome is WorkUnitOutcome => Boolean(outcome))); + failedWorkUnits.push(...workUnitOutcomes.filter((outcome) => outcome.status === 'failed').map((outcome) => outcome.unitKey)); + stageIndex.workUnits = workUnitOutcomes.map((o) => ({ + unitKey: o.unitKey, + rawFiles: workUnits.find((w) => w.unitKey === o.unitKey)?.rawFiles ?? [], + status: o.status, + reason: o.reason, + actions: o.actions, + touchedSlSources: o.touchedSlSources, + slDisallowed: o.slDisallowed, + slDisallowedReason: o.slDisallowedReason, + })); + + for (const [index, outcome] of workUnitOutcomes.entries()) { + if (outcome.status !== 'success' || !outcome.patchPath) { + continue; + } + const wu = workUnits[index]!; + const integration = await integrateWorkUnitPatch({ + unitKey: outcome.unitKey, + patchPath: outcome.patchPath, + integrationGit: sessionWorktree.git, + trace: runTrace, + author: this.deps.storage.systemGitAuthor, + slDisallowed: wu.slDisallowed === true, + validateAppliedTree: async (touchedPaths) => { + await validateFinalIngestArtifacts({ + connectionIds: slConnectionIds, + changedWikiPageKeys: this.wikiPageKeysFromPaths(touchedPaths), + touchedSlSources: this.touchedSlSourcesFromPaths(touchedPaths), + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + validateTouchedSources: (touched) => + validateWuTouchedSources({ + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + connections: this.deps.connections, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + probeRowCount: this.deps.settings.probeRowCount, + slValidator: this.deps.slValidator, + }, touched), + tableExists: (connectionId, tableRef) => + this.tableRefExistsInSemanticLayer( + this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + [connectionId], + tableRef, + ), + }); + }, + }); + if (integration.status === 'textual_conflict') { + isolatedDiffSummary.textualConflicts += 1; + await this.deps.runs.markFailed(runRow.id); + cleanupOutcome = 'conflict'; + throw new Error(`isolated diff textual conflict in ${outcome.unitKey}: ${integration.reason}`); + } + if (integration.status === 'semantic_conflict') { + isolatedDiffSummary.semanticConflicts += 1; + await this.deps.runs.markFailed(runRow.id); + cleanupOutcome = 'conflict'; + throw new Error(`isolated diff semantic conflict in ${outcome.unitKey}: ${integration.reason}`); + } + isolatedDiffSummary.acceptedPatches += 1; + } + + await validateFinalIngestArtifacts({ + connectionIds: slConnectionIds, + changedWikiPageKeys: [ + ...new Set([ + ...projectionChangedWikiPageKeys, + ...workUnitOutcomes.flatMap((outcome) => outcome.patchTouchedPaths ?? []).flatMap((path) => this.wikiPageKeysFromPaths([path])), + ]), + ], + touchedSlSources: [ + ...projectionTouchedSources, + ...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources), + ], + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + validateTouchedSources: (touched) => + validateWuTouchedSources({ + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + connections: this.deps.connections, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + probeRowCount: this.deps.settings.probeRowCount, + slValidator: this.deps.slValidator, + }, touched), + tableExists: (connectionId, tableRef) => + this.tableRefExistsInSemanticLayer( + this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + [connectionId], + tableRef, + ), + }); + } else if (!overrideReport) { + await runTrace.event('info', 'routing', 'shared_worktree_path_enabled', { sourceKey: job.sourceKey }); + // Keep the existing shared-worktree WorkUnit block here unchanged. + } +``` + +Extract the existing inner `runSingleWorkUnit()` implementation into a private +method named `runWorkUnitInWorktree()` before this replacement. Its code is the +current body of `runSingleWorkUnit()` with these explicit parameters: + +```ts + private async runWorkUnitInWorktree(input: { + job: IngestBundleJob; + wu: WorkUnit; + worktree: IngestSessionWorktree; + stagedDir: string; + contextReport: ContextEvidenceIndexSummary | null; + ingestToolMetadata: { runId: string; jobId: string; syncId: string; sourceKey: string }; + slConnectionIds: string[]; + wikiIndex: string; + slIndex: string; + priorProvenance: Map; + scopedWikiService: ReturnType; + scopedSemanticLayerService: ReturnType; + baseFraming: string; + skillsPrompt: string; + canonicalPins: CanonicalPin[]; + workUnitSettings: { maxConcurrency: number; stepBudget: number; failureMode: 'abort' | 'continue' }; + transcriptDir: string; + transcriptSummaries: Map; + recordTranscriptEntry(path: string): (entry: ToolCallLogEntry) => void; + stageIndex: StageIndex; + currentTableExists(tableRef: string): Promise; + onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void; + }): Promise +``` + +The method must preserve the current tool sessions, transcript wrapping, skill +loading behavior, unmapped fallback behavior, `validateWikiRefs`, and +`validateTouchedSources`. The only value changes are: + +- Use `input.worktree.workdir`, `input.worktree.git`, and + `input.worktree.config`. +- Use `input.scopedWikiService` and `input.scopedSemanticLayerService`. +- Use `input.priorProvenance` instead of loading it inside the method. +- Use `input.onStepFinish`. + +- [ ] **Step 5: Add report trace and isolated summary** + +In the final `reportBody`, add: + +```ts + tracePath: runTrace.tracePath, + isolatedDiff: isolatedDiffEnabled ? isolatedDiffSummary : undefined, +``` + +Before provenance insertion, replace unknown-hash fallback with validation: + +```ts + validateProvenanceRawPaths({ + rows: provenanceRows, + currentRawPaths: new Set(currentHashes.keys()), + deletedRawPaths: new Set(eviction?.deletedRawPaths ?? []), + }); +``` + +Then change: + +```ts +const hash = currentHashes.get(rawPath) ?? 'unknown'; +``` + +to: + +```ts +const hash = currentHashes.get(rawPath) ?? ''; +``` + +for action and artifact-resolution provenance. The validation above guarantees +that non-eviction rows from current actions have a current hash. + +At the end of a successful run, before `return`, add: + +```ts + await runTrace.event('info', 'run', 'ingest_finished', { + status: 'completed', + commitSha, + failedWorkUnits, + tracePath: runTrace.tracePath, + }); +``` + +In the outer `catch` path in `run()`, add a trace event if `runInner()` throws +after trace creation by wrapping `runInner()` errors inside `runInner()`: + +```ts + } catch (error) { + await trace.event('error', 'run', 'ingest_failed', { + tracePath: trace.tracePath, + }, error); + throw error; + } +``` + +Place that catch around the body of `runInner()` after `const trace = +this.createTrace(job);`. + +- [ ] **Step 6: Run focused runner tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-bundle.runner.test.ts \ + src/ingest/ingest-trace.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add packages/context/src/ingest/types.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/index.ts +git commit -m "feat: route selected ingest sources through isolated diffs" +``` + +--- + +### Task 8: V1 regression coverage and Metabase rollout + +**Files:** +- Create: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +- Modify: `packages/context/src/ingest/local-bundle-runtime.ts` + +- [ ] **Step 1: Write isolated-diff regression tests** + +Create `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +with these six tests: + +```ts +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { GitService, SessionWorktreeService } from '../core/index.js'; +import { LocalGitFileStore } from '../project/local-git-file-store.js'; +import { addTouchedSlSource } from '../tools/index.js'; +import { IngestBundleRunner } from './ingest-bundle.runner.js'; +import type { IngestBundleRunnerDeps } from './ports.js'; + +async function makeRealGitRuntime() { + const homeDir = await mkdtemp(join(tmpdir(), 'ktx-isolated-runner-')); + const configDir = join(homeDir, 'config'); + const git = new GitService({ + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'init', + bootstrapAuthor: 'system', + bootstrapAuthorEmail: 'system@example.com', + }, + }); + await git.onModuleInit(); + const configService = new LocalGitFileStore(configDir); + const sessionWorktreeService = new SessionWorktreeService({ + coreConfig: { + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'init', + bootstrapAuthor: 'system', + bootstrapAuthorEmail: 'system@example.com', + }, + }, + gitService: git, + configService, + }); + return { homeDir, configDir, git, configService, sessionWorktreeService }; +} + +function makeDeps(runtime: Awaited>) { + const adapter = { + source: 'metabase', + skillNames: [], + detect: vi.fn().mockResolvedValue(true), + chunk: vi.fn().mockResolvedValue({ + workUnits: [ + { unitKey: 'card-wiki', rawFiles: ['cards/wiki.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'card-source', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }), + }; + const scopedWikiService = { + readPage: vi.fn(async (_scope: string, _scopeId: string | null, key: string) => { + const path = join(runtime.configDir, 'wiki/global', `${key}.md`); + const raw = await readFile(path, 'utf-8').catch(() => null); + if (!raw) return null; + const [, yaml = '', content = ''] = /^---\n([\s\S]*?)\n---\n?([\s\S]*)$/.exec(raw) ?? []; + const slRefs = /sl_refs:\n((?: - .+\n?)*)/.exec(yaml)?.[1]?.split('\n').map((line) => line.trim().replace(/^- /, '')).filter(Boolean) ?? []; + return { pageKey: key, frontmatter: { summary: key, usage_mode: 'auto', sl_refs: slRefs }, content: content.trim() }; + }), + listPageKeys: vi.fn().mockResolvedValue(['account-segments']), + }; + const semanticLayerService = { + forWorktree: vi.fn(() => semanticLayerService), + loadAllSources: vi.fn(async () => { + const raw = await readFile(join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), 'utf-8').catch(() => ''); + const hasCents = raw.includes('total_contract_arr_cents'); + return { + sources: [ + { + name: 'mart_account_segments', + grain: ['account_id'], + columns: [{ name: 'account_id', type: 'string' }], + joins: [], + measures: [{ name: hasCents ? 'total_contract_arr_cents' : 'total_contract_arr', expr: 'sum(contract_arr)' }], + table: 'analytics.mart_account_segments', + }, + ], + loadErrors: [], + }; + }), + listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']), + }; + const deps: IngestBundleRunnerDeps = { + runs: { create: vi.fn().mockResolvedValue({ id: 'run-1' }), markCompleted: vi.fn(), markFailed: vi.fn() }, + provenance: { insertMany: vi.fn(), findLatestHashesForCompletedSyncs: vi.fn().mockResolvedValue(new Map()), findLatestArtifactsForRawPaths: vi.fn().mockResolvedValue(new Map()) }, + reports: { create: vi.fn().mockResolvedValue({ id: 'report-1' }), findByJobId: vi.fn().mockResolvedValue(null), markSuperseded: vi.fn() }, + canonicalPins: { listPins: vi.fn().mockResolvedValue([]) }, + registry: { get: vi.fn().mockReturnValue(adapter), register: vi.fn(), has: vi.fn(), list: vi.fn() }, + diffSetService: { compute: vi.fn().mockResolvedValue({ added: ['cards/wiki.json', 'cards/source.json'], modified: [], deleted: [], unchanged: [] }) }, + sessionWorktreeService: runtime.sessionWorktreeService, + agentRunner: { runLoop: vi.fn() }, + gitService: runtime.git, + lockingService: { withLock: vi.fn(async (_key, fn) => fn()) }, + storage: { + homeDir: join(runtime.configDir, '.ktx'), + systemGitAuthor: { name: 'KTX Test', email: 'system@ktx.local' }, + resolveUploadDir: (id) => join(runtime.homeDir, 'upload', id), + resolvePullDir: (id) => join(runtime.homeDir, 'pull', id), + resolveTranscriptDir: (id) => join(runtime.configDir, '.ktx/ingest-transcripts', id), + resolveTracePath: (id) => join(runtime.configDir, '.ktx/ingest-traces', id, 'trace.jsonl'), + }, + settings: { memoryIngestionModel: 'test', probeRowCount: 1, isolatedDiffSourceKeys: ['metabase'], ingestTraceLevel: 'trace' }, + skillsRegistry: { listSkills: vi.fn().mockResolvedValue([]), getSkill: vi.fn().mockResolvedValue(null), buildSkillsPrompt: vi.fn().mockReturnValue(''), stripFrontmatter: vi.fn((body) => body) }, + promptService: { loadPrompt: vi.fn().mockResolvedValue('base') }, + wikiService: { forWorktree: vi.fn(() => scopedWikiService), readPage: scopedWikiService.readPage, syncFromCommit: vi.fn() }, + knowledgeIndex: { listPagesForUser: vi.fn().mockResolvedValue([]) }, + knowledgeSlRefs: { syncFromWiki: vi.fn() }, + semanticLayerService: semanticLayerService as never, + slSearchService: { indexSources: vi.fn() }, + slSourcesRepository: {}, + slValidator: { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) }, + connections: { listEnabledConnections: vi.fn().mockResolvedValue([]), getConnectionById: vi.fn() } as never, + toolsetFactory: { createIngestWuToolset: vi.fn(() => ({ toRuntimeTools: vi.fn(() => ({})) })) }, + commitMessages: { enqueueForExternalCommit: vi.fn() }, + }; + return { deps, adapter }; +} + +describe('IngestBundleRunner isolated diff path', () => { + it('rejects the Metabase stale-measure wiki body regression before squash', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps } = makeDeps(runtime); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.unitKey === 'card-wiki') { + await mkdir(join(currentSession.configService.rootDir ?? runtime.configDir, 'semantic-layer/warehouse'), { recursive: true }); + await mkdir(join(currentSession.configService.rootDir ?? runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(currentSession.configService.rootDir ?? runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr_cents\n expr: sum(contract_arr)\n', + ); + await writeFile( + join(currentSession.configService.rootDir ?? runtime.configDir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nsl_refs:\n - mart_account_segments\n---\n\nARR is `mart_account_segments.total_contract_arr_cents`.\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ target: 'wiki', type: 'created', key: 'account-segments', detail: 'Account segments' }); + currentSession.actions.push({ target: 'sl', type: 'created', key: 'mart_account_segments', detail: 'Cents measure', targetConnectionId: 'warehouse' }); + await currentSession.gitService.commitFiles(['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'], 'wu wiki', 'KTX Test', 'system@ktx.local'); + } + if (params.telemetryTags.unitKey === 'card-source') { + await mkdir(join(currentSession.configService.rootDir ?? runtime.configDir, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(currentSession.configService.rootDir ?? runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ target: 'sl', type: 'updated', key: 'mart_account_segments', detail: 'Dollar measure', targetConnectionId: 'warehouse' }); + await currentSession.gitService.commitFiles(['semantic-layer/warehouse/mart_account_segments.yaml'], 'wu source', 'KTX Test', 'system@ktx.local'); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue(join(runtime.homeDir, 'stage')); + (runner as any).stageRawFilesStage1 = vi.fn(async ({ worktreeRoot }: any) => { + const rawDir = join(worktreeRoot, 'raw-sources/warehouse/metabase/s'); + await mkdir(rawDir, { recursive: true }); + await writeFile(join(rawDir, 'wiki.json'), '{}'); + await writeFile(join(rawDir, 'source.json'), '{}'); + return { currentHashes: new Map([['cards/wiki.json', 'h1'], ['cards/source.json', 'h2']]), rawDirInWorktree: 'raw-sources/warehouse/metabase/s' }; + }); + + await expect( + runner.run({ jobId: 'job-1', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/total_contract_arr_cents/); + await expect(readFile(join(runtime.configDir, '.ktx/ingest-traces/job-1/trace.jsonl'), 'utf-8')).resolves.toContain('patch_semantic_conflict'); + expect(deps.gitService.squashMergeIntoMain).toBeDefined(); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + +}); +``` + +Add these five additional `it()` blocks inside the same `describe()` block. +They use the same `makeRealGitRuntime()` and `makeDeps()` helpers from the +first test: + +```ts + it('accepts two isolated work units that edit different wiki pages', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'page-a', rawFiles: ['pages/a.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'page-b', rawFiles: ['pages/b.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const unitKey = params.telemetryTags.unitKey; + await mkdir(join(currentSession.configService.rootDir ?? runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(currentSession.configService.rootDir ?? runtime.configDir, `wiki/global/${unitKey}.md`), + `---\nsummary: ${unitKey}\nusage_mode: auto\n---\n\n${unitKey}\n`, + ); + currentSession.actions.push({ target: 'wiki', type: 'created', key: unitKey, detail: unitKey }); + await currentSession.gitService.commitFiles([`wiki/global/${unitKey}.md`], `wu ${unitKey}`, 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + const runner = new IngestBundleRunner(deps); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue(join(runtime.homeDir, 'stage')); + (runner as any).stageRawFilesStage1 = vi.fn(async ({ worktreeRoot }: any) => { + const rawDir = join(worktreeRoot, 'raw-sources/warehouse/metabase/s'); + await mkdir(rawDir, { recursive: true }); + await writeFile(join(rawDir, 'a.json'), '{}'); + await writeFile(join(rawDir, 'b.json'), '{}'); + return { currentHashes: new Map([['pages/a.json', 'h1'], ['pages/b.json', 'h2']]), rawDirInWorktree: 'raw-sources/warehouse/metabase/s' }; + }); + + const result = await runner.run({ jobId: 'job-clean', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }); + expect(result.failedWorkUnits).toEqual([]); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-clean/trace.jsonl'), 'utf-8'); + expect(trace.match(/patch_accepted/g)).toHaveLength(2); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('classifies same-source patch application failure as a textual conflict', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'orders-a', rawFiles: ['orders/a.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'orders-b', rawFiles: ['orders/b.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const suffix = params.telemetryTags.unitKey === 'orders-a' ? 'a' : 'b'; + await mkdir(join(currentSession.configService.rootDir ?? runtime.configDir, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(currentSession.configService.rootDir ?? runtime.configDir, 'semantic-layer/warehouse/orders.yaml'), + `name: orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures:\n - name: order_count_${suffix}\n expr: count(*)\n`, + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'orders'); + currentSession.actions.push({ target: 'sl', type: 'updated', key: 'orders', detail: suffix, targetConnectionId: 'warehouse' }); + await currentSession.gitService.commitFiles(['semantic-layer/warehouse/orders.yaml'], `wu ${suffix}`, 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + const runner = new IngestBundleRunner(deps); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue(join(runtime.homeDir, 'stage')); + (runner as any).stageRawFilesStage1 = vi.fn(async ({ worktreeRoot }: any) => { + const rawDir = join(worktreeRoot, 'raw-sources/warehouse/metabase/s'); + await mkdir(rawDir, { recursive: true }); + return { currentHashes: new Map([['orders/a.json', 'h1'], ['orders/b.json', 'h2']]), rawDirInWorktree: 'raw-sources/warehouse/metabase/s' }; + }); + + await expect( + runner.run({ jobId: 'job-text-conflict', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/isolated diff textual conflict/); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('makes deterministic projection visible to child worktrees before WorkUnit synthesis', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'wiki-projected', rawFiles: ['projected/wiki.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + adapter.project = vi.fn(async ({ workdir }) => { + await mkdir(join(workdir, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(workdir, 'semantic-layer/warehouse/projected_orders.yaml'), + 'name: projected_orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures:\n - name: order_count\n expr: count(*)\n', + ); + return { warnings: [], errors: [], touchedSources: [{ connectionId: 'warehouse', sourceName: 'projected_orders' }], changedWikiPageKeys: [], result: { sourcesCreated: 1 } }; + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + await expect( + readFile(join(currentSession.configService.rootDir ?? runtime.configDir, 'semantic-layer/warehouse/projected_orders.yaml'), 'utf-8'), + ).resolves.toContain('order_count'); + await mkdir(join(currentSession.configService.rootDir ?? runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(currentSession.configService.rootDir ?? runtime.configDir, 'wiki/global/projected-orders.md'), + '---\nsummary: Projected orders\nusage_mode: auto\nsl_refs:\n - projected_orders\n---\n\nBad ref `projected_orders.missing_measure`.\n', + ); + currentSession.actions.push({ target: 'wiki', type: 'created', key: 'projected-orders', detail: 'Projected orders' }); + await currentSession.gitService.commitFiles(['wiki/global/projected-orders.md'], 'wu projected wiki', 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + const runner = new IngestBundleRunner(deps); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue(join(runtime.homeDir, 'stage')); + (runner as any).stageRawFilesStage1 = vi.fn(async ({ worktreeRoot }: any) => { + const rawDir = join(worktreeRoot, 'raw-sources/warehouse/metabase/s'); + await mkdir(rawDir, { recursive: true }); + return { currentHashes: new Map([['projected/wiki.json', 'h1']]), rawDirInWorktree: 'raw-sources/warehouse/metabase/s' }; + }); + + await expect( + runner.run({ jobId: 'job-projection', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/projected_orders\.missing_measure/); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('rejects Notion-style changed wiki pages with invalid sl_refs', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'notion-page', rawFiles: ['pages/notion.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + await mkdir(join(currentSession.configService.rootDir ?? runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(currentSession.configService.rootDir ?? runtime.configDir, 'wiki/global/notion-page.md'), + '---\nsummary: Notion page\nusage_mode: auto\nsl_refs:\n - missing_source\n---\n\nBody\n', + ); + currentSession.actions.push({ target: 'wiki', type: 'created', key: 'notion-page', detail: 'Notion page' }); + await currentSession.gitService.commitFiles(['wiki/global/notion-page.md'], 'wu notion', 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + const runner = new IngestBundleRunner(deps); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue(join(runtime.homeDir, 'stage')); + (runner as any).stageRawFilesStage1 = vi.fn(async () => ({ currentHashes: new Map([['pages/notion.json', 'h1']]), rawDirInWorktree: 'raw-sources/warehouse/metabase/s' })); + + await expect( + runner.run({ jobId: 'job-invalid-slrefs', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/unknown sl_refs entry missing_source/); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('rejects slDisallowed patches that touch semantic-layer files', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'lookml-mismatch', rawFiles: ['views/orders.lkml'], peerFileIndex: [], dependencyPaths: [], slDisallowed: true, slDisallowedReason: 'lookml_connection_mismatch' }], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + await mkdir(join(currentSession.configService.rootDir ?? runtime.configDir, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(currentSession.configService.rootDir ?? runtime.configDir, 'semantic-layer/warehouse/orders.yaml'), + 'name: orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures: []\n', + ); + currentSession.actions.push({ target: 'sl', type: 'created', key: 'orders', detail: 'forbidden', targetConnectionId: 'warehouse' }); + await currentSession.gitService.commitFiles(['semantic-layer/warehouse/orders.yaml'], 'forbidden sl', 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + const runner = new IngestBundleRunner(deps); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue(join(runtime.homeDir, 'stage')); + (runner as any).stageRawFilesStage1 = vi.fn(async () => ({ currentHashes: new Map([['views/orders.lkml', 'h1']]), rawDirInWorktree: 'raw-sources/warehouse/metabase/s' })); + + await expect( + runner.run({ jobId: 'job-sl-disallowed', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/slDisallowed WorkUnit lookml-mismatch touched semantic-layer\/warehouse\/orders.yaml/); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 2: Run failing isolated regression tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts +``` + +Expected: FAIL until the runner branch from Task 7 is complete. + +- [ ] **Step 3: Confirm Metabase remains privately allowlisted** + +In `packages/context/src/ingest/local-bundle-runtime.ts`, verify settings still +include: + +```ts + isolatedDiffSourceKeys: ['metabase'], +``` + +Do not add a public `executionMode`, `planningStrategy`, or `conflictPolicy` +adapter field. Do not add a CLI flag. + +- [ ] **Step 4: Run isolated regression tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts +``` + +Expected: PASS. The trace file assertions must prove that the run records input +snapshot, routing decision, WorkUnit child creation, patch collection, patch +application, semantic gate result, rollback/conflict events for failing cases, +and final run outcome. + +- [ ] **Step 5: Commit** + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + packages/context/src/ingest/local-bundle-runtime.ts +git commit -m "test: cover isolated diff ingestion regressions" +``` + +--- + +### Task 9: Final verification and observability acceptance + +**Files:** +- Modify: no source files unless checks identify issues. + +- [ ] **Step 1: Run focused context tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/core/git.service.patch.test.ts \ + src/ingest/ingest-trace.test.ts \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/ingest-bundle.runner.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run package type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run package tests** + +Run: + +```bash +pnpm --filter @ktx/context run test +``` + +Expected: PASS. If this produces too much output, capture it: + +```bash +pnpm --filter @ktx/context run test 2>&1 | tee /tmp/ktx-context-isolated-diff-tests.log +``` + +Then inspect the failing section in `/tmp/ktx-context-isolated-diff-tests.log`. + +- [ ] **Step 4: Run dead-code check** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS. Investigate any new Knip findings before adding ignores. + +- [ ] **Step 5: Run pre-commit for changed TypeScript files** + +Run: + +```bash +uv run pre-commit run --files \ + packages/context/src/core/git.service.ts \ + packages/context/src/core/git.service.patch.test.ts \ + packages/context/src/ingest/ingest-trace.ts \ + packages/context/src/ingest/ingest-trace.test.ts \ + packages/context/src/ingest/wiki-body-refs.ts \ + packages/context/src/ingest/wiki-body-refs.test.ts \ + packages/context/src/ingest/artifact-gates.ts \ + packages/context/src/ingest/artifact-gates.test.ts \ + packages/context/src/ingest/isolated-diff/git-patch.ts \ + packages/context/src/ingest/isolated-diff/git-patch.test.ts \ + packages/context/src/ingest/isolated-diff/work-unit-executor.ts \ + packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + packages/context/src/ingest/types.ts \ + packages/context/src/ingest/ports.ts \ + packages/context/src/ingest/local-bundle-runtime.ts \ + packages/context/src/ingest/reports.ts \ + packages/context/src/ingest/report-snapshot.ts \ + packages/context/src/ingest/index.ts \ + packages/cli/src/ingest.ts +``` + +Expected: PASS. If `pre-commit` is unavailable or the configured hook +environment cannot run, record the exact error and rely on the focused tests, +type-check, dead-code, and `git diff --check`. + +- [ ] **Step 6: Verify trace usefulness manually** + +Run one isolated regression and inspect the trace: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "rejects the Metabase stale-measure wiki body regression before squash" +``` + +Expected: PASS. Open the test-created +`.ktx/ingest-traces/job-1/trace.jsonl` path printed by the failed-run assertion +or test output. Confirm it includes these events: + +- `ingest_started` +- `input_snapshot` +- `isolated_diff_enabled` +- `work_unit_child_created` +- `work_unit_patch_collected` +- `patch_apply_started` +- `semantic_gate_failed` or `patch_semantic_conflict` +- `ingest_failed` + +The trace must include `jobId`, `runId`, `syncId`, `connectionId`, +`sourceKey`, `unitKey` where applicable, worktree paths, patch paths, touched +paths, durations, error messages, and final status. + +- [ ] **Step 7: Commit final fixes** + +```bash +git status --short +git add packages/context/src packages/cli/src +git commit -m "feat: add isolated diff ingestion v1 core" +``` + +--- + +## Self-review + +Spec coverage: + +- Per-WorkUnit child worktrees, patch proposals, deterministic integration, + `slDisallowed` integration rejection, and fail-fast textual or semantic + conflicts are covered by Tasks 2, 5, 6, 7, and 8. +- The Metabase stale `total_contract_arr_cents` regression is covered by + Task 8. +- Deterministic projection before child worktree creation is covered by Task 7 + and the hybrid projection test in Task 8. +- Final global wiki body, wiki `sl_refs`, semantic-layer, and provenance gates + are covered by Tasks 3, 4, 7, and 8. +- Persistent postmortem observability is covered by Task 1 and required in every + ingestion task's acceptance checks. Task 9 explicitly verifies trace + usefulness from logs alone. + +Placeholder scan: + +- The implementation tasks contain exact file paths, commands, expected + results, and concrete code snippets. +- Task 8 contains concrete regression assertions for the Metabase incident, + clean integration, textual conflict, hybrid projection, invalid `sl_refs`, and + `slDisallowed` rejection. + +Type consistency: + +- `IngestTraceWriter`, `IngestTraceLevel`, `ProjectionResult`, + `DeterministicProjectionContext`, `WorkUnitOutcome.patchPath`, + `patchTouchedPaths`, and `childWorktreePath` are introduced before later + tasks consume them. +- Report fields use `tracePath` and `isolatedDiff` consistently across + `reports.ts`, `report-snapshot.ts`, runner output, and CLI status output. diff --git a/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-gates-and-trace-closure.md b/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-gates-and-trace-closure.md new file mode 100644 index 00000000..850b4dc7 --- /dev/null +++ b/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-gates-and-trace-closure.md @@ -0,0 +1,1786 @@ +# Isolated Diff Ingestion V1 Gates and Trace Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1-blocking isolated-diff ingestion gaps so the +actual final integration tree is globally gated and every failed run leaves a +persistent trace and stored failure report that are useful for postmortems. + +**Architecture:** Keep the isolated-diff runner private to the runner-owned +source allowlist, but make its safety boundary match the design: per-patch +gates still run during integration, reconciliation and follow-on deterministic +mutations are diffed, and one final global artifact gate runs after every +mutating integration-stage operation and before squash. Persistent JSONL traces +become the operational source of truth for postmortems, with start/finish/fail +events, timings, state snapshots, conflict classification, and a stored failure +report that lets `ktx ingest status ` surface the trace path even +when the run fails before the normal success report. + +**Tech Stack:** TypeScript ESM/NodeNext, Vitest, simple-git, existing +`IngestBundleRunner`, `GitService`, `SessionWorktreeService`, +`SemanticLayerService`, `KnowledgeWikiService`, ingest report schemas, and CLI +status rendering. + +--- + +## Audit Summary + +The latest plan and commits implemented the first isolated-diff path and the +focused tests pass: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-trace.test.ts \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts +``` + +Current result: `7 passed`, `20 passed`. + +The remaining gaps below are v1-blocking: + +- The isolated branch runs `final_artifact_gates` immediately after accepted + WorkUnit patches, but reconciliation, post-processors, and wiki `sl_refs` + repair can still mutate the integration worktree afterward. The tree that is + squashed is therefore not globally gated after every mutating stage. +- Reconciliation changes are not captured as a diff against the + pre-reconciliation integration `HEAD`, and reconciliation-touched artifacts + are not included in a post-reconciliation artifact gate. +- Wiki frontmatter `sl_refs` validation checks only source existence. It does + not validate measure-level references such as + `mart_account_segments.total_contract_arr_cents`. +- Wiki body reference parsing treats every two-part inline-code token as a + semantic-layer reference, even when the left side is not a visible source. The + spec says those tokens must be ignored unless they name a visible source. +- Semantic-layer final gates validate only touched sources. They do not expand + the touched set to direct declared-join neighbors, including sources joined + from touched sources and sources that join to touched sources. +- `slDisallowed` and patch policy rejections can throw before integration emits + a structured conflict event or stored failure report. +- Failed runs before success-report creation do not leave a stored ingest + report, so `ktx ingest status ` cannot surface the trace path. +- Trace coverage does not yet cover fetch/stage/detect/planning decisions, + reconciliation, post-processing, wiki repair, provenance validation and + insertion, squash, report creation, and failure-report creation with timings + and state needed for postmortem reconstruction. +- Failed child WorkUnit worktrees are preserved with `cleanup('crash')`. The + spec requires child worktrees to be cleaned up after diff, transcript, and + outcome metadata are persisted. Only the integration worktree should be + preserved for version-one resolver conflicts. + +Non-blocking gaps remain after this plan: + +- Migrating Notion, LookML, Looker, dbt, MetricFlow, and historic-SQL direct + durable writes to the isolated path. +- Promoting isolated diffs as the default for all connectors. +- Removing the old shared-worktree WorkUnit execution path. +- Interactive, CLI, or agent-driven conflict resolution. +- Auto-merging semantic conflicts that cannot be proven correct. +- Transitive SQL-projection dependency expansion beyond direct declared joins. +- Moving provenance rows to worktree files. +- Public connector knobs such as `executionMode`, `planningStrategy`, or + `conflictPolicy`. + +## File Structure + +- Modify `packages/context/src/ingest/wiki-body-refs.ts`. + Fix inline-code grammar so unknown two-part tokens are ignored, while + explicit `source:` and `table:` references remain validated. +- Modify `packages/context/src/ingest/wiki-body-refs.test.ts`. + Add regression coverage for ignored non-source two-part tokens. +- Modify `packages/context/src/ingest/artifact-gates.ts`. + Add source/entity frontmatter validation, direct join-neighbor expansion, and + reusable gate-scope helpers. +- Modify `packages/context/src/ingest/artifact-gates.test.ts`. + Cover measure-level `sl_refs`, direct dependency validation, and final body + ref behavior. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. + Move the final global gate after reconciliation, post-processing, and wiki + ref repair. Add trace events around every meaningful phase, create stored + failure reports, and preserve only the integration worktree on conflicts. +- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Add regressions for reconciliation-created stale refs, failed-run report + trace surfacing, and trace event completeness. +- Modify `packages/context/src/ingest/isolated-diff/work-unit-executor.ts`. + Stop enforcing patch policy during collection, record patch metadata only, + and always remove child worktrees after outcome metadata is emitted. +- Modify `packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts`. + Cover cleanup on failed WorkUnits. +- Modify `packages/context/src/ingest/isolated-diff/patch-integrator.ts`. + Classify patch policy rejections as structured textual conflicts and emit + trace events before returning. +- Modify `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`. + Cover `slDisallowed` policy rejection as a traced textual conflict. +- Modify `packages/context/src/ingest/reports.ts`. + Add report-level `status` and `failure` fields. +- Modify `packages/context/src/ingest/report-snapshot.ts`. + Parse the new failure report fields while preserving old reports. +- Modify `packages/context/src/ingest/report-snapshot.test.ts`. + Cover failed report parsing. +- Modify `packages/cli/src/ingest.ts`. + Render failed stored reports as `Status: error` even when no WorkUnit failed, + and keep the trace path near run identifiers. +- Modify `packages/cli/src/ingest.test.ts`. + Cover status output for a failed report with a trace path. +- Modify `docs-site/content/docs/cli-reference/ktx-ingest.mdx`. + Document that failed runs also write stored reports and that trace events + include phase timings, state snapshots, decisions, errors, and final outcome. + +--- + +### Task 1: Correct artifact gate semantics + +**Files:** +- Modify: `packages/context/src/ingest/wiki-body-refs.test.ts` +- Modify: `packages/context/src/ingest/wiki-body-refs.ts` +- Modify: `packages/context/src/ingest/artifact-gates.test.ts` +- Modify: `packages/context/src/ingest/artifact-gates.ts` + +- [ ] **Step 1: Write failing wiki body grammar tests** + +Append these tests inside the existing `describe('wiki body refs', ...)` block +in `packages/context/src/ingest/wiki-body-refs.test.ts`: + +```ts + it('ignores two-part inline code when the source is not visible', async () => { + const invalid = await findInvalidWikiBodyRefs({ + pageKey: 'engineering-notes', + body: [ + 'A version token like `node.v22` is not a semantic-layer reference.', + 'A raw table must use `table:analytics.mart_account_segments`.', + ].join('\n'), + visibleConnectionIds: ['warehouse'], + loadSources: async () => sources, + tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments', + }); + + expect(invalid).toEqual([]); + }); + + it('still rejects explicit missing source and table references', async () => { + const invalid = await findInvalidWikiBodyRefs({ + pageKey: 'account-segments', + body: [ + '`source:missing_source`', + '`warehouse/source:missing_source`', + '`table:analytics.missing_table`', + ].join('\n'), + visibleConnectionIds: ['warehouse'], + loadSources: async () => sources, + tableExists: async () => false, + }); + + expect(invalid).toEqual([ + 'account-segments: unknown semantic-layer source missing_source', + 'account-segments: unknown semantic-layer source warehouse/missing_source', + 'account-segments: unknown raw table analytics.missing_table', + ]); + }); +``` + +- [ ] **Step 2: Run wiki body tests to verify they fail** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/wiki-body-refs.test.ts +``` + +Expected: FAIL because `node.v22` is treated as an unknown semantic-layer +source. + +- [ ] **Step 3: Implement the wiki body grammar fix** + +In `packages/context/src/ingest/wiki-body-refs.ts`, replace +`findInvalidWikiBodyRefs()` with this implementation: + +```ts +export async function findInvalidWikiBodyRefs(input: WikiBodyRefValidationInput): Promise { + const errors: string[] = []; + const sourceCache = new Map(); + const loadSources = async (connectionId: string): Promise => { + const cached = sourceCache.get(connectionId); + if (cached) { + return cached; + } + const sources = await input.loadSources(connectionId); + sourceCache.set(connectionId, sources); + return sources; + }; + + const findSource = async ( + connectionIds: string[], + sourceName: string, + ): Promise<{ connectionId: string; source: SemanticLayerSource } | null> => { + for (const connectionId of connectionIds) { + const source = (await loadSources(connectionId)).find((candidate) => candidate.name === sourceName); + if (source) { + return { connectionId, source }; + } + } + return null; + }; + + for (const ref of parseWikiBodyRefs(input.body)) { + const connectionIds = ref.connectionId ? [ref.connectionId] : input.visibleConnectionIds; + if (ref.kind === 'table') { + const found = await Promise.all(connectionIds.map((connectionId) => input.tableExists(connectionId, ref.tableRef))); + if (!found.some(Boolean)) { + errors.push(`${input.pageKey}: unknown raw table ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.tableRef}`); + } + continue; + } + + const found = await findSource(connectionIds, ref.sourceName); + if (!found) { + if (ref.kind === 'sl_source') { + errors.push( + `${input.pageKey}: unknown semantic-layer source ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.sourceName}`, + ); + } + continue; + } + + if (ref.kind === 'sl_entity' && !entityNames(found.source).has(ref.entityName)) { + errors.push(`${input.pageKey}: unknown semantic-layer entity ${ref.sourceName}.${ref.entityName}`); + } + } + + return errors; +} +``` + +- [ ] **Step 4: Run wiki body tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/wiki-body-refs.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Write failing artifact gate tests** + +Append these tests inside `describe('artifact gates', ...)` in +`packages/context/src/ingest/artifact-gates.test.ts`: + +```ts + it('fails measure-level wiki frontmatter sl_refs that point at missing entities', async () => { + const wikiService = { + readPage: vi.fn().mockResolvedValue({ + pageKey: 'account-segments', + frontmatter: { + summary: 'Account segments', + usage_mode: 'auto', + sl_refs: ['mart_account_segments.total_contract_arr_cents'], + }, + content: 'ARR uses a renamed measure.', + }), + }; + const semanticLayerService = { + loadAllSources: vi.fn().mockResolvedValue({ + sources: [ + { + name: 'mart_account_segments', + grain: ['account_id'], + columns: [{ name: 'account_id', type: 'string' }], + joins: [], + measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }], + table: 'analytics.mart_account_segments', + }, + ], + loadErrors: [], + }), + }; + + await expect( + validateFinalIngestArtifacts({ + connectionIds: ['warehouse'], + changedWikiPageKeys: ['account-segments'], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }], + wikiService: wikiService as never, + semanticLayerService: semanticLayerService as never, + validateTouchedSources: async () => ({ invalidSources: [], validSources: ['warehouse:mart_account_segments'] }), + tableExists: async () => true, + }), + ).rejects.toThrow(/unknown sl_refs entity mart_account_segments\.total_contract_arr_cents/); + }); + + it('validates direct declared-join neighbors of touched semantic-layer sources', async () => { + const semanticLayerService = { + loadAllSources: vi.fn().mockResolvedValue({ + sources: [ + { + name: 'orders', + grain: ['order_id'], + columns: [{ name: 'order_id', type: 'string' }, { name: 'account_id', type: 'string' }], + joins: [{ to: 'accounts', on: 'orders.account_id = accounts.account_id', relationship: 'many_to_one' }], + measures: [{ name: 'order_count', expr: 'count(*)' }], + }, + { + name: 'accounts', + grain: ['account_id'], + columns: [{ name: 'account_id', type: 'string' }], + joins: [], + measures: [{ name: 'account_count', expr: 'count(*)' }], + }, + { + name: 'segments', + grain: ['segment_id'], + columns: [{ name: 'segment_id', type: 'string' }, { name: 'account_id', type: 'string' }], + joins: [{ to: 'accounts', on: 'segments.account_id = accounts.account_id', relationship: 'many_to_one' }], + measures: [], + }, + ], + loadErrors: [], + }), + }; + const validateTouchedSources = vi.fn().mockResolvedValue({ invalidSources: [], validSources: [] }); + + await validateFinalIngestArtifacts({ + connectionIds: ['warehouse'], + changedWikiPageKeys: [], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'accounts' }], + wikiService: { readPage: vi.fn() } as never, + semanticLayerService: semanticLayerService as never, + validateTouchedSources, + tableExists: async () => true, + }); + + expect(validateTouchedSources).toHaveBeenCalledWith([ + { connectionId: 'warehouse', sourceName: 'accounts' }, + { connectionId: 'warehouse', sourceName: 'orders' }, + { connectionId: 'warehouse', sourceName: 'segments' }, + ]); + }); +``` + +- [ ] **Step 6: Run artifact gate tests to verify they fail** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/artifact-gates.test.ts +``` + +Expected: FAIL because frontmatter entity refs and join-neighbor expansion are +not implemented. + +- [ ] **Step 7: Implement frontmatter entity refs and direct dependency expansion** + +In `packages/context/src/ingest/artifact-gates.ts`, replace the existing +`bareSlRef()` helper and `validateWikiSlRefs()` with this code, then update +`validateFinalIngestArtifacts()` as shown below: + +```ts +function parseSlRef(ref: string): { connectionId: string | null; sourceName: string; entityName: string | null } { + const withoutConnection = ref.includes('/') ? ref.slice(ref.indexOf('/') + 1) : ref; + const connectionId = ref.includes('/') ? ref.slice(0, ref.indexOf('/')) : null; + const [sourceName = '', entityName = null] = withoutConnection.split('.', 2); + return { connectionId, sourceName, entityName }; +} + +function slEntityNames(source: Awaited>['sources'][number]): Set { + return new Set([ + ...(source.measures ?? []).map((measure) => measure.name), + ...(source.columns ?? []).map((column) => column.name), + ...(source.segments ?? []).map((segment) => segment.name), + ]); +} + +function uniqueTouchedSources(sources: TouchedSlSource[]): TouchedSlSource[] { + const seen = new Set(); + const unique: TouchedSlSource[] = []; + for (const source of sources) { + const key = `${source.connectionId}:${source.sourceName}`; + if (seen.has(key)) { + continue; + } + seen.add(key); + unique.push(source); + } + return unique.sort((left, right) => { + const byConnection = left.connectionId.localeCompare(right.connectionId); + return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection; + }); +} + +async function expandTouchedSlSourcesWithDirectJoinNeighbors(input: FinalArtifactGateInput): Promise { + const expanded = [...input.touchedSlSources]; + const touchedByConnection = new Map>(); + for (const source of input.touchedSlSources) { + const bucket = touchedByConnection.get(source.connectionId) ?? new Set(); + bucket.add(source.sourceName); + touchedByConnection.set(source.connectionId, bucket); + } + + for (const connectionId of input.connectionIds) { + const touched = touchedByConnection.get(connectionId); + if (!touched || touched.size === 0) { + continue; + } + const { sources } = await input.semanticLayerService.loadAllSources(connectionId); + for (const source of sources) { + const sourceIsTouched = touched.has(source.name); + if (sourceIsTouched) { + for (const join of source.joins ?? []) { + expanded.push({ connectionId, sourceName: join.to }); + } + } + if ((source.joins ?? []).some((join) => touched.has(join.to))) { + expanded.push({ connectionId, sourceName: source.name }); + } + } + } + + return uniqueTouchedSources(expanded); +} + +async function validateWikiSlRefs(input: FinalArtifactGateInput): Promise { + const errors: string[] = []; + const sourcesByConnection = new Map>['sources']>(); + for (const connectionId of input.connectionIds) { + const { sources } = await input.semanticLayerService.loadAllSources(connectionId); + sourcesByConnection.set(connectionId, sources); + } + + for (const pageKey of input.changedWikiPageKeys) { + const page = await input.wikiService.readPage('GLOBAL', null, pageKey); + if (!page) { + continue; + } + for (const ref of page.frontmatter.sl_refs ?? []) { + const parsed = parseSlRef(ref); + const candidateConnections = parsed.connectionId ? [parsed.connectionId] : input.connectionIds; + let source: Awaited>['sources'][number] | undefined; + for (const connectionId of candidateConnections) { + source = sourcesByConnection.get(connectionId)?.find((candidate) => candidate.name === parsed.sourceName); + if (source) { + break; + } + } + if (!source) { + errors.push(`${pageKey}: unknown sl_refs entry ${ref}`); + continue; + } + if (parsed.entityName && !slEntityNames(source).has(parsed.entityName)) { + errors.push(`${pageKey}: unknown sl_refs entity ${ref}`); + } + } + } + return errors; +} +``` + +Then replace the first two lines inside `validateFinalIngestArtifacts()` with: + +```ts + const touchedWithDependencies = await expandTouchedSlSourcesWithDirectJoinNeighbors(input); + const validation = await input.validateTouchedSources(touchedWithDependencies); +``` + +- [ ] **Step 8: Run artifact gate tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/artifact-gates.test.ts src/ingest/wiki-body-refs.test.ts +``` + +Expected: PASS. + +- [ ] **Step 9: Commit artifact gate fixes** + +```bash +git add packages/context/src/ingest/wiki-body-refs.ts \ + packages/context/src/ingest/wiki-body-refs.test.ts \ + packages/context/src/ingest/artifact-gates.ts \ + packages/context/src/ingest/artifact-gates.test.ts +git commit -m "fix(ingest): tighten final artifact gates" +``` + +### Task 2: Gate the actual final integration tree + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` + +- [ ] **Step 1: Write failing reconciliation stale-reference regression** + +Append this test to `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +inside the existing `describe('IngestBundleRunner isolated diff path', ...)` +block: + +```ts + it('runs final artifact gates after reconciliation mutates the integration tree', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'card-source', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'mart_account_segments', + detail: 'Source with renamed ARR measure', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles(['semantic-layer/warehouse/mart_account_segments.yaml'], 'wu source', 'KTX Test', 'system@ktx.local'); + } else { + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nsl_refs:\n - mart_account_segments\n---\n\nReconcile wrote stale ARR `mart_account_segments.total_contract_arr_cents`.\n', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'account-segments', + detail: 'Stale reconcile wiki page', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles(['wiki/global/account-segments.md'], 'reconcile wiki', 'KTX Test', 'system@ktx.local'); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]); + + await expect( + runner.run({ jobId: 'job-reconcile-stale', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/total_contract_arr_cents/); + + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-reconcile-stale/trace.jsonl'), 'utf-8'); + expect(trace).toContain('reconciliation_finished'); + expect(trace).toContain('final_artifact_gates_failed'); + expect(trace).toContain('ingest_failed'); + expect(await runtime.git.revParseHead()).not.toContain('reconcile wiki'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 2: Run the failing reconciliation regression** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "after reconciliation" +``` + +Expected: FAIL because the current runner gates before reconciliation and then +squashes the invalid reconciled page. + +- [ ] **Step 3: Add final gate scope helpers to the runner** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, add these private +helpers after `touchedSlSourcesFromPaths()`: + +```ts + private touchedSlSourcesFromActions(actions: MemoryAction[], fallbackConnectionId: string): TouchedSlSource[] { + return actions + .filter((action) => action.target === 'sl') + .map((action) => ({ + connectionId: actionTargetConnectionId(action, fallbackConnectionId), + sourceName: action.key, + })); + } + + private wikiPageKeysFromActions(actions: MemoryAction[]): string[] { + return actions.filter((action) => action.target === 'wiki').map((action) => action.key); + } + + private uniqueWikiPageKeys(keys: string[]): string[] { + return [...new Set(keys.filter((key) => key.length > 0))].sort(); + } + + private uniqueTouchedSlSources(sources: TouchedSlSource[]): TouchedSlSource[] { + const seen = new Set(); + const unique: TouchedSlSource[] = []; + for (const source of sources) { + const key = `${source.connectionId}:${source.sourceName}`; + if (seen.has(key)) { + continue; + } + seen.add(key); + unique.push(source); + } + return unique.sort((left, right) => { + const byConnection = left.connectionId.localeCompare(right.connectionId); + return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection; + }); + } +``` + +- [ ] **Step 4: Track integration mutations after WorkUnit patch integration** + +In `runInner()` in `packages/context/src/ingest/ingest-bundle.runner.ts`, add +these variables before the Stage 4 reconciliation block: + +```ts + const preReconciliationSha = await sessionWorktree.git.revParseHead(); +``` + +Remove the isolated-branch `traceTimed(... 'final_artifact_gates' ...)` block +that currently runs before the `else if (!overrideReport)` branch ends. Keep +per-patch `validateAppliedTree` in `integrateWorkUnitPatch()` unchanged. + +- [ ] **Step 5: Run the final global gate after reconciliation, post-processing, and repair** + +In `runInner()`, immediately after `wikiSlRefRepairResult = await +repairWikiSlRefs(...)` and before Stage 6 starts, add this block: + +```ts + const postReconciliationSha = await sessionWorktree.git.revParseHead(); + const postReconciliationPaths = + preReconciliationSha && postReconciliationSha && preReconciliationSha !== postReconciliationSha + ? (await sessionWorktree.git.diffNameStatus(preReconciliationSha, postReconciliationSha)).map((entry) => entry.path) + : []; + const finalChangedWikiPageKeys = this.uniqueWikiPageKeys([ + ...(isolatedDiffEnabled ? projectionChangedWikiPageKeys : []), + ...workUnitOutcomes + .flatMap((outcome) => outcome.patchTouchedPaths ?? []) + .flatMap((path) => this.wikiPageKeysFromPaths([path])), + ...this.wikiPageKeysFromActions(reconcileActions), + ...postReconciliationPaths.flatMap((path) => this.wikiPageKeysFromPaths([path])), + ...wikiSlRefRepairResult.repairs + .filter((repair) => repair.scope === 'GLOBAL') + .map((repair) => repair.pageKey), + ]); + const finalTouchedSlSources = this.uniqueTouchedSlSources([ + ...(isolatedDiffEnabled ? projectionTouchedSources : []), + ...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources), + ...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId), + ...this.touchedSlSourcesFromPaths(postReconciliationPaths), + ...(postProcessorOutcome?.touchedSources ?? []), + ]); + + await traceTimed( + runTrace, + 'final_gates', + 'final_artifact_gates', + { + changedWikiPageKeys: finalChangedWikiPageKeys, + touchedSlSources: finalTouchedSlSources, + preReconciliationSha, + postReconciliationSha, + postReconciliationPaths, + reconciliationActionCount: reconcileActions.length, + wikiSlRefRepairCount: wikiSlRefRepairResult.repairs.length, + }, + async () => { + await validateFinalIngestArtifacts({ + connectionIds: repairConnectionIds, + changedWikiPageKeys: finalChangedWikiPageKeys, + touchedSlSources: finalTouchedSlSources, + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + validateTouchedSources: (touched) => + validateWuTouchedSources( + { + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + connections: this.deps.connections, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + probeRowCount: this.deps.settings.probeRowCount, + slValidator: this.deps.slValidator, + }, + touched, + ), + tableExists: (connectionId, tableRef) => + this.tableRefExistsInSemanticLayer( + this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + [connectionId], + tableRef, + ), + }); + }, + ); +``` + +Use the existing `projectionTouchedSources` and `projectionChangedWikiPageKeys` +variables from the isolated branch by declaring them before the branch instead +of inside it: + +```ts + let projectionTouchedSources: TouchedSlSource[] = []; + let projectionChangedWikiPageKeys: string[] = []; +``` + +- [ ] **Step 6: Run the reconciliation regression** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "after reconciliation" +``` + +Expected: PASS. + +- [ ] **Step 7: Run isolated runner tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts +``` + +Expected: PASS. + +- [ ] **Step 8: Commit final gate ordering** + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +git commit -m "fix(ingest): gate isolated final integration tree" +``` + +### Task 3: Complete persistent traces and failed-run surfacing + +**Files:** +- Modify: `packages/context/src/ingest/reports.ts` +- Modify: `packages/context/src/ingest/report-snapshot.ts` +- Modify: `packages/context/src/ingest/report-snapshot.test.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +- Modify: `packages/cli/src/ingest.ts` +- Modify: `packages/cli/src/ingest.test.ts` +- Modify: `docs-site/content/docs/cli-reference/ktx-ingest.mdx` + +- [ ] **Step 1: Add failing report schema coverage for failed runs** + +Append this test to `packages/context/src/ingest/report-snapshot.test.ts`: + +```ts + it('parses failed ingest reports with trace and failure details', () => { + const snapshot = parseIngestReportSnapshot({ + id: 'report-failed', + runId: 'run-failed', + jobId: 'job-failed', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-17T12:00:00.000Z', + body: { + status: 'failed', + syncId: 'sync-failed', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + tracePath: '/project/.ktx/ingest-traces/job-failed/trace.jsonl', + failure: { + phase: 'final_gates', + message: 'final artifact gates failed', + }, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }); + + expect(snapshot.body.status).toBe('failed'); + expect(snapshot.body.failure).toEqual({ + phase: 'final_gates', + message: 'final artifact gates failed', + }); + expect(snapshot.body.tracePath).toContain('trace.jsonl'); + }); +``` + +- [ ] **Step 2: Run report snapshot test to verify it fails** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/report-snapshot.test.ts -t "failed ingest reports" +``` + +Expected: FAIL because `status` and `failure` are not typed or parsed. + +- [ ] **Step 3: Add report status and failure fields** + +In `packages/context/src/ingest/reports.ts`, add this interface after +`IngestReportPostProcessorOutcome`: + +```ts +export interface IngestReportFailure { + phase: string; + message: string; +} +``` + +Then add these fields to `IngestReportBody`: + +```ts + status?: 'completed' | 'failed'; + failure?: IngestReportFailure; +``` + +In `packages/context/src/ingest/report-snapshot.ts`, add this schema near the +other body field schemas: + +```ts +const ingestReportFailureSchema = z.object({ + phase: z.string().min(1), + message: z.string().min(1), +}); +``` + +Then add these fields to the `body` object schema: + +```ts + status: z.enum(['completed', 'failed']).optional(), + failure: ingestReportFailureSchema.optional(), +``` + +- [ ] **Step 4: Run report snapshot tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/report-snapshot.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Write failing CLI status test for failed reports** + +In `packages/cli/src/ingest.test.ts`, add a test near the existing ingest +status tests: + +```ts + it('prints trace path and error status for stored failed ingest reports', async () => { + const io = makeIo(); + const report = { + id: 'report-failed', + runId: 'run-failed', + jobId: 'job-failed', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-17T12:00:00.000Z', + body: { + status: 'failed', + syncId: 'sync-failed', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + tracePath: '/project/.ktx/ingest-traces/job-failed/trace.jsonl', + failure: { phase: 'final_gates', message: 'final artifact gates failed' }, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }; + + await runKtxIngest( + { command: 'status', projectDir: '/project', runId: 'run-failed', outputMode: 'plain', inputMode: 'disabled' }, + { + loadProject: vi.fn().mockResolvedValue({ projectDir: '/project' }), + getLocalIngestStatus: vi.fn().mockResolvedValue(report), + } as never, + io, + ); + + expect(io.stdout()).toContain('Trace: /project/.ktx/ingest-traces/job-failed/trace.jsonl'); + expect(io.stdout()).toContain('Status: error'); + expect(io.stdout()).toContain('Error: final artifact gates failed'); + }); +``` + +Use the actual local test helpers in `packages/cli/src/ingest.test.ts`. If the +file names the command function or IO helper differently, keep the assertions +exactly as written and adapt only the helper calls. + +- [ ] **Step 6: Update CLI rendering** + +In `packages/cli/src/ingest.ts`, replace `reportStatus()` with: + +```ts +function reportStatus(report: IngestReportSnapshot): 'done' | 'error' { + return report.body.status === 'failed' || report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; +} +``` + +In `failedReportMessage()`, add this block before reading `failedCount`: + +```ts + if (report.body.status === 'failed' && report.body.failure?.message) { + return sanitizeMemoryFlowError(report.body.failure.message); + } +``` + +- [ ] **Step 7: Add failed-run report creation state to the runner** + +In `runInner()` in `packages/context/src/ingest/ingest-bundle.runner.ts`, add +these helpers near `createTrace()`: + +```ts + private errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); + } +``` + +Inside `runInner()`, immediately after `const trace = this.createTrace(job);`, +add: + +```ts + let activeTrace: IngestTraceWriter = trace; + let activePhase = 'run'; + let runRow: Awaited | null = null; + let latestDiffSummary: IngestDiffSummary = { added: 0, modified: 0, deleted: 0, unchanged: 0 }; + let latestWorkUnits: WorkUnitOutcome[] = []; + let latestFailedWorkUnits: string[] = []; + let latestReconciliationSkipped = true; + let latestIsolatedDiffSummary: + | { + enabled: boolean; + integrationWorktreePath?: string; + ingestionBaseSha?: string; + projectionSha?: string | null; + acceptedPatches: number; + textualConflicts: number; + semanticConflicts: number; + } + | undefined; +``` + +Replace the existing inner `const runRow = await this.deps.runs.create(...)` +with: + +```ts + runRow = await this.deps.runs.create({ + jobId: job.jobId, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + trigger: job.trigger, + scopeFingerprint: scopeDescriptor?.fingerprint ?? null, + }); +``` + +After creating `runTrace`, set: + +```ts + activeTrace = runTrace; +``` + +After computing `diffSummary`, set: + +```ts + latestDiffSummary = diffSummary; +``` + +After `workUnitOutcomes.push(...)`, set: + +```ts + latestWorkUnits = workUnitOutcomes; + latestFailedWorkUnits = failedWorkUnits; +``` + +After `isolatedDiffSummary` is created, set: + +```ts + latestIsolatedDiffSummary = isolatedDiffSummary; +``` + +After reconciliation finishes, set: + +```ts + latestReconciliationSkipped = reconcileOutcome.skipped; +``` + +In the success `reportBody`, add: + +```ts + status: 'completed' as const, +``` + +In the outer `catch`, replace the existing trace event with: + +```ts + await activeTrace.event( + 'error', + 'run', + 'ingest_failed', + { + tracePath: activeTrace.tracePath, + phase: activePhase, + runId: runRow?.id ?? null, + syncId, + }, + error, + ); + if (runRow) { + await this.deps.reports.create({ + runId: runRow.id, + jobId: job.jobId, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + body: { + status: 'failed' as const, + syncId, + diffSummary: latestDiffSummary, + commitSha: null, + tracePath: activeTrace.tracePath, + isolatedDiff: latestIsolatedDiffSummary, + failure: { + phase: activePhase, + message: this.errorMessage(error), + }, + workUnits: latestWorkUnits.map((wu) => ({ + unitKey: wu.unitKey, + rawFiles: [], + status: wu.status, + reason: wu.reason, + actions: wu.actions, + touchedSlSources: wu.touchedSlSources, + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + })), + failedWorkUnits: latestFailedWorkUnits, + reconciliationSkipped: latestReconciliationSkipped, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + artifactResolutions: [], + evictionInputs: [], + reconciliationActions: [], + evictionDecisions: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: Array.from(transcriptSummaries.values()).map((summary) => ({ + unitKey: summary.unitKey, + path: summary.path, + toolCallCount: summary.toolCallCount, + errorCount: summary.errorCount, + toolNames: Array.from(summary.toolNames).sort(), + })), + }, + }); + await activeTrace.event('info', 'report', 'failure_report_created', { + runId: runRow.id, + jobId: job.jobId, + tracePath: activeTrace.tracePath, + }); + } + throw error; +``` + +At each major phase, assign `activePhase` before work begins: + +```ts + activePhase = 'fetch'; + activePhase = 'stage_raw_files'; + activePhase = 'diff'; + activePhase = 'detect'; + activePhase = 'planning'; + activePhase = 'work_units'; + activePhase = 'integration'; + activePhase = 'reconciliation'; + activePhase = 'post_processor'; + activePhase = 'wiki_sl_ref_repair'; + activePhase = 'final_gates'; + activePhase = 'squash'; + activePhase = 'provenance'; + activePhase = 'report'; +``` + +- [ ] **Step 8: Add trace timing and decision events for missing phases** + +Wrap these existing operations in `traceTimed()` and include the listed data: + +```ts + activePhase = 'fetch'; + const stagedDir = await traceTimed(trace, 'fetch', 'resolve_staged_dir', { + bundleRefKind: job.bundleRef.kind, + sourceKey: job.sourceKey, + }, () => + overrideReport + ? this.materializeOverrideSnapshot(overrideReport, { + connectionId: job.connectionId, + sourceKey: job.sourceKey, + jobId: job.jobId, + }) + : this.resolveStagedDir(job.bundleRef, { + connectionId: job.connectionId, + sourceKey: job.sourceKey, + jobId: job.jobId, + }), + ); +``` + +Add explicit events after decisions: + +```ts + await runTrace.event('debug', 'detect', 'adapter_detected', { detected }); + await runTrace.event('debug', 'planning', 'work_units_planned', { + workUnitCount: workUnits.length, + evictionCount: eviction?.deletedRawPaths.length ?? 0, + unresolvedCardCount: unresolvedCards?.length ?? 0, + triageEnabled: triageResult?.enabled ?? false, + }); + await runTrace.event('debug', 'planning', 'target_connections_resolved', { + connectionIds: slConnectionIds, + }); + await runTrace.event('debug', 'reconciliation', 'reconciliation_finished', { + skipped: reconcileOutcome.skipped, + stopReason: reconcileOutcome.stopReason ?? null, + actionCount: reconcileActions.length, + conflictCount: stageIndex.conflictsResolved.length, + fallbackCount: stageIndex.unmappedFallbacks.length, + artifactResolutionCount: stageIndex.artifactResolutions?.length ?? 0, + }); + await runTrace.event('debug', 'post_processor', 'post_processor_finished', { + sourceKey: job.sourceKey, + status: postProcessorOutcome?.status ?? 'skipped', + touchedSources: postProcessorOutcome?.touchedSources ?? [], + warnings: postProcessorOutcome?.warnings ?? [], + }); + await runTrace.event('debug', 'wiki_sl_ref_repair', 'wiki_sl_refs_repaired', { + repairCount: wikiSlRefRepairResult.repairs.length, + repairs: wikiSlRefRepairResult.repairs, + warnings: wikiSlRefRepairResult.warnings, + }); + await runTrace.event('debug', 'provenance', 'provenance_rows_validated', { + rowCount: provenanceRows.length, + }); + await runTrace.event('debug', 'squash', 'squash_finished', { + commitSha, + touchedPaths: mergeResult.touchedPaths, + }); + await runTrace.event('debug', 'report', 'success_report_created', { + reportId, + runId: runRow.id, + tracePath: runTrace.tracePath, + }); +``` + +Acceptance criteria for this step: + +- A successful isolated run trace contains phase events for `fetch`, + `snapshot`, `routing`, `planning`, `work_unit`, `integration`, + `reconciliation`, `final_gates`, `squash`, `provenance`, `report`, and + `run`. +- A failed isolated run trace contains an `ingest_failed` event with `runId`, + `syncId`, `phase`, `tracePath`, and serialized error details. +- Failed runs after `runRow` creation have a stored report whose body includes + `status: "failed"`, `failure.phase`, `failure.message`, and `tracePath`. + +- [ ] **Step 9: Add isolated trace completeness test** + +Append this test to `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`: + +```ts + it('stores a failure report and postmortem trace for final gate failures', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + const createdReports: any[] = []; + deps.reports.create = vi.fn(async (args: any) => { + createdReports.push(args); + return { id: `report-${createdReports.length}` }; + }); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'card-wiki', rawFiles: ['cards/wiki.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'card-source', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + if (params.telemetryTags.unitKey === 'card-wiki') { + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nsl_refs:\n - mart_account_segments\n---\n\nARR is `mart_account_segments.total_contract_arr_cents`.\n', + ); + currentSession.actions.push({ target: 'wiki', type: 'created', key: 'account-segments', detail: 'Account segments', rawPaths: ['cards/wiki.json'] }); + await currentSession.gitService.commitFiles(['wiki/global/account-segments.md'], 'wu wiki', 'KTX Test', 'system@ktx.local'); + } + if (params.telemetryTags.unitKey === 'card-source') { + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ target: 'sl', type: 'created', key: 'mart_account_segments', detail: 'Dollar measure', targetConnectionId: 'warehouse', rawPaths: ['cards/source.json'] }); + await currentSession.gitService.commitFiles(['semantic-layer/warehouse/mart_account_segments.yaml'], 'wu source', 'KTX Test', 'system@ktx.local'); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [ + ['cards/wiki.json', 'h1'], + ['cards/source.json', 'h2'], + ]); + + await expect( + runner.run({ jobId: 'job-trace-failure', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/total_contract_arr_cents/); + + const failureReport = createdReports.find((report) => report.body.status === 'failed'); + expect(failureReport.body.tracePath).toContain('job-trace-failure/trace.jsonl'); + expect(failureReport.body.failure).toMatchObject({ phase: 'final_gates' }); + + const events = (await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-trace-failure/trace.jsonl'), 'utf-8')) + .trim() + .split('\n') + .map((line) => JSON.parse(line)); + expect(events.map((event) => event.event)).toEqual(expect.arrayContaining([ + 'ingest_started', + 'input_snapshot', + 'work_units_planned', + 'isolated_diff_enabled', + 'work_unit_child_created', + 'work_unit_patch_collected', + 'patch_apply_started', + 'patch_accepted', + 'reconciliation_finished', + 'final_artifact_gates_failed', + 'ingest_failed', + 'failure_report_created', + ])); + const failed = events.find((event) => event.event === 'ingest_failed'); + expect(failed).toMatchObject({ + runId: 'run-1', + syncId: expect.any(String), + data: { phase: 'final_gates', tracePath: expect.stringContaining('trace.jsonl') }, + error: { message: expect.stringContaining('total_contract_arr_cents') }, + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 10: Run context and CLI trace tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/report-snapshot.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts +pnpm --filter @ktx/cli exec vitest run src/ingest.test.ts -t "failed ingest reports" +``` + +Expected: PASS. + +- [ ] **Step 11: Update trace inspection docs** + +In `docs-site/content/docs/cli-reference/ktx-ingest.mdx`, replace the paragraph +under "Inspect source ingest traces" that starts with "Each line is a JSON +event" with: + +```mdx +The trace file lives under the project directory at +`.ktx/ingest-traces//trace.jsonl`. Each line is a JSON event with the +job id, run id, sync id, connection id, source key, phase, event name, timing, +state snapshot, decision context, and error details. Failed runs also write a +stored ingest report with `status: "failed"`, `failure.phase`, +`failure.message`, and the same trace path, so `ktx ingest status ` can +point you to the postmortem trace. +``` + +- [ ] **Step 12: Commit trace and failure report work** + +```bash +git add packages/context/src/ingest/reports.ts \ + packages/context/src/ingest/report-snapshot.ts \ + packages/context/src/ingest/report-snapshot.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + packages/cli/src/ingest.ts \ + packages/cli/src/ingest.test.ts \ + docs-site/content/docs/cli-reference/ktx-ingest.mdx +git commit -m "fix(ingest): persist postmortem failure traces" +``` + +### Task 4: Structured policy conflicts and child cleanup + +**Files:** +- Modify: `packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts` +- Modify: `packages/context/src/ingest/isolated-diff/work-unit-executor.ts` +- Modify: `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts` +- Modify: `packages/context/src/ingest/isolated-diff/patch-integrator.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Add failing child cleanup test** + +Append this test to `packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts`: + +```ts + it('removes child worktrees after failed WorkUnit outcomes are traced', async () => { + const { homeDir, git, baseSha } = await makeGit(); + const childDir = join(homeDir, '.worktrees/session-job-1-wu-fail'); + const sessionWorktreeService = { + create: vi.fn(async (_key: string, startSha: string) => { + await mkdir(join(homeDir, '.worktrees'), { recursive: true }); + await git.addWorktree(childDir, 'session/job-1-wu-fail', startSha); + return { + chatId: 'job-1-wu-fail', + workdir: childDir, + branch: 'session/job-1-wu-fail', + baseSha: startSha, + createdAt: new Date(), + git: git.forWorktree(childDir), + config: {}, + }; + }), + cleanup: vi.fn(async () => undefined), + }; + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'), + jobId: 'job-1', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await runIsolatedWorkUnit({ + unitIndex: 0, + ingestionBaseSha: baseSha, + sessionWorktreeService: sessionWorktreeService as never, + patchDir: join(homeDir, '.ktx/ingest-patches/job-1'), + trace, + run: async () => ({ + unitKey: 'wu-fail', + status: 'failed', + reason: 'agent loop errored', + preSha: baseSha, + postSha: baseSha, + actions: [], + touchedSlSources: [], + }), + workUnit: { unitKey: 'wu-fail', rawFiles: ['a.json'], peerFileIndex: [], dependencyPaths: [] }, + }); + + expect(result.status).toBe('failed'); + expect(sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success'); + }); +``` + +- [ ] **Step 2: Run child cleanup test to verify it fails** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/work-unit-executor.test.ts -t "failed WorkUnit" +``` + +Expected: FAIL because failed WorkUnits call `cleanup(..., 'crash')`. + +- [ ] **Step 3: Cleanup child worktrees on failed outcomes and collect patch metadata only** + +In `packages/context/src/ingest/isolated-diff/work-unit-executor.ts`, replace +the import: + +```ts +import { assertPatchAllowedForWorkUnit } from './git-patch.js'; +``` + +with: + +```ts +import { parsePatchTouchedPaths } from './git-patch.js'; +``` + +Then replace this failed-outcome block: + +```ts + if (outcome.status !== 'success') { + cleanupOutcome = 'crash'; + await input.trace.event('error', 'work_unit', 'work_unit_failed_before_patch', { + unitKey: input.workUnit.unitKey, + reason: outcome.reason ?? 'unknown failure', + }); + return { ...outcome, childWorktreePath: child.workdir }; + } +``` + +with: + +```ts + if (outcome.status !== 'success') { + cleanupOutcome = 'success'; + await input.trace.event('error', 'work_unit', 'work_unit_failed_before_patch', { + unitKey: input.workUnit.unitKey, + reason: outcome.reason ?? 'unknown failure', + }); + return { ...outcome, childWorktreePath: child.workdir }; + } +``` + +Replace patch policy enforcement: + +```ts + const touched = assertPatchAllowedForWorkUnit({ + unitKey: input.workUnit.unitKey, + patch, + slDisallowed: input.workUnit.slDisallowed === true, + }); +``` + +with: + +```ts + const touched = parsePatchTouchedPaths(patch); +``` + +In the `catch` block, set `cleanupOutcome = 'success'` after the error is +traced: + +```ts + cleanupOutcome = 'success'; +``` + +- [ ] **Step 4: Run child cleanup tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/work-unit-executor.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Add failing policy rejection trace test** + +Append this test to `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`: + +```ts + it('classifies slDisallowed patch policy failures as traced textual conflicts', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + await mkdir(join(configDir, 'semantic-layer/c1'), { recursive: true }); + await git.commitFiles(['semantic-layer/c1'], 'empty sl dir', 'System User', 'system@example.com'); + const childDir = join(homeDir, 'child-policy'); + await git.addWorktree(childDir, 'child-policy', baseSha); + const childGit = git.forWorktree(childDir); + await mkdir(join(childDir, 'semantic-layer/c1'), { recursive: true }); + await writeFile(join(childDir, 'semantic-layer/c1/orders.yaml'), 'name: orders\ncolumns: []\njoins: []\nmeasures: []\n'); + await childGit.commitFiles(['semantic-layer/c1/orders.yaml'], 'forbidden sl', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/forbidden.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-policy/trace.jsonl'), + jobId: 'job-policy', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'lookml-mismatch', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockResolvedValue(undefined), + slDisallowed: true, + }); + + expect(result).toMatchObject({ + status: 'textual_conflict', + touchedPaths: ['semantic-layer/c1/orders.yaml'], + }); + const rawTrace = await readFile(trace.tracePath, 'utf-8'); + expect(rawTrace).toContain('patch_policy_rejected'); + expect(rawTrace).toContain('slDisallowed WorkUnit lookml-mismatch touched semantic-layer/c1/orders.yaml'); + }); +``` + +- [ ] **Step 6: Run policy rejection test to verify it fails** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts -t "policy failures" +``` + +Expected: FAIL because policy rejection throws before a structured result. + +- [ ] **Step 7: Classify policy rejections in the integrator** + +In `packages/context/src/ingest/isolated-diff/patch-integrator.ts`, add +`parsePatchTouchedPaths` to the import from `git-patch.js`: + +```ts +import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths } from './git-patch.js'; +``` + +Replace lines that read and assert the patch with: + +```ts + const patch = await readFile(input.patchPath, 'utf-8'); + const touchedPaths = parsePatchTouchedPaths(patch).map((entry) => entry.path); + try { + assertPatchAllowedForWorkUnit({ + unitKey: input.unitKey, + patch, + slDisallowed: input.slDisallowed, + }); + } catch (error) { + await input.trace.event('error', 'integration', 'patch_policy_rejected', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason: errorMessage(error), + }); + return { + status: 'textual_conflict', + reason: errorMessage(error), + touchedPaths, + }; + } +``` + +Keep the existing `patch_apply`, `patch_textual_conflict`, +`semantic_gate`, and `patch_semantic_conflict` blocks unchanged. + +- [ ] **Step 8: Update isolated slDisallowed regression expectations** + +In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`, +replace the `slDisallowed` rejection assertion with: + +```ts + await expect( + runner.run({ jobId: 'job-sl-disallowed', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/isolated diff textual conflict/); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-sl-disallowed/trace.jsonl'), 'utf-8'); + expect(trace).toContain('patch_policy_rejected'); + expect(trace).toContain('slDisallowed WorkUnit lookml-mismatch touched semantic-layer/warehouse/orders.yaml'); +``` + +- [ ] **Step 9: Run policy and isolated tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts +``` + +Expected: PASS. + +- [ ] **Step 10: Commit policy and cleanup fixes** + +```bash +git add packages/context/src/ingest/isolated-diff/work-unit-executor.ts \ + packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +git commit -m "fix(ingest): trace policy conflicts and cleanup child worktrees" +``` + +### Task 5: Final verification + +**Files:** +- Verify: all files modified in Tasks 1-4 + +- [ ] **Step 1: Run focused context tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-trace.test.ts \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + src/ingest/report-snapshot.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run focused CLI tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/ingest.test.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Run package type checks** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +pnpm --filter @ktx/cli run type-check +``` + +Expected: PASS. + +- [ ] **Step 4: Run dead-code check because TypeScript exports and report fields changed** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS. + +- [ ] **Step 5: Run pre-commit for touched files** + +Run: + +```bash +uv run pre-commit run --files \ + packages/context/src/ingest/wiki-body-refs.ts \ + packages/context/src/ingest/wiki-body-refs.test.ts \ + packages/context/src/ingest/artifact-gates.ts \ + packages/context/src/ingest/artifact-gates.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + packages/context/src/ingest/isolated-diff/work-unit-executor.ts \ + packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.test.ts \ + packages/context/src/ingest/reports.ts \ + packages/context/src/ingest/report-snapshot.ts \ + packages/context/src/ingest/report-snapshot.test.ts \ + packages/cli/src/ingest.ts \ + packages/cli/src/ingest.test.ts \ + docs-site/content/docs/cli-reference/ktx-ingest.mdx +``` + +Expected: PASS. If the local `uv` version does not satisfy the repository pin, +record the version mismatch and run the focused `pnpm` checks above. + +- [ ] **Step 6: Inspect one failed trace manually** + +Run the final-gate failure test and inspect the trace: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + -t "postmortem trace" +``` + +Open the trace path printed in the assertion failure output or the test temp +directory if the test logs it. The trace must let a human reconstruct: + +- the job, run, sync, source, connection, and input snapshot; +- routing into isolated diffs; +- WorkUnit child creation, patch collection, patch application, and accepted + patch order; +- reconciliation status and action counts; +- final gate input scope and failure reason; +- failure report creation; and +- final `ingest_failed` event with phase and serialized error. + +- [ ] **Step 7: Inspect git status** + +Run: + +```bash +git status --short +``` + +Expected: only intended source, test, CLI, and docs files are modified before +the final commit. + +- [ ] **Step 8: Commit verification updates if any** + +If test or docs edits were needed during verification: + +```bash +git add packages/context/src/ingest packages/cli/src/ingest.ts packages/cli/src/ingest.test.ts docs-site/content/docs/cli-reference/ktx-ingest.mdx +git commit -m "test(ingest): verify isolated diff postmortem coverage" +``` + +If no files changed during verification, do not create an empty commit. + +## Self-Review + +Spec coverage: + +- Isolated WorkUnits and binary no-rename patches are already implemented in + the previous plan. Task 4 moves policy rejection to the integration layer and + keeps child cleanup aligned with the spec. +- Artifact-aware gates are completed by Task 1 for semantic-layer YAML, wiki + frontmatter source/entity refs, wiki body refs, and direct join dependencies. +- The final global gate moves to the correct point in Task 2, after + reconciliation, post-processing, and wiki repair, and before squash. +- Reconciliation mutation tracking is added in Task 2 through a diff from + pre-reconciliation `HEAD` to post-repair `HEAD`. +- Persistent postmortem observability is completed by Task 3 with trace events, + timings, state snapshots, stored failure reports, and CLI status surfacing. +- Version-one resolver behavior remains fail-fast and preserves the integration + worktree on conflicts. + +Placeholder scan: + +- The plan contains no placeholder tasks. +- Each code-changing step includes concrete code or exact replacement blocks. +- Verification commands and expected outcomes are explicit. + +Type consistency: + +- New report fields are named `status` and `failure` consistently in + `reports.ts`, `report-snapshot.ts`, runner report bodies, and CLI rendering. +- Final gate scope uses existing `TouchedSlSource`, `MemoryAction`, + `WorkUnitOutcome`, and `WikiSlRefRepairResult` types. +- Trace event names are stable and asserted by tests: + `reconciliation_finished`, `final_artifact_gates_failed`, + `failure_report_created`, `patch_policy_rejected`, and `ingest_failed`. diff --git a/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-global-wiki-reference-gate-closure.md b/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-global-wiki-reference-gate-closure.md new file mode 100644 index 00000000..9ed083ec --- /dev/null +++ b/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-global-wiki-reference-gate-closure.md @@ -0,0 +1,493 @@ +# Isolated Diff Ingestion V1 Global Wiki Reference Gate Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Reject final trees where an isolated-diff run changes semantic-layer +sources or deletes wiki pages and leaves pre-existing wiki pages with stale +body, `sl_refs`, frontmatter `refs`, or inline `[[page-key]]` references. + +**Architecture:** Keep `artifact-gates.ts` validation-only. The runner expands +the final wiki gate scope before the existing final artifact gate: changed pages +are always validated, and all global wiki pages are validated when the run +changes any semantic-layer source or removes any wiki page. The final-gate trace +records the expanded scope and why it was expanded. + +**Tech Stack:** TypeScript, Vitest, pnpm workspace commands, existing +`IngestBundleRunner`, `KnowledgeWikiService`, and isolated-diff test fixtures. + +--- + +## Audit Summary + +The implemented isolated-diff plans cover the core v1 flow: child worktrees, +binary no-rename patch proposals, `git apply --3way --index`, policy rejection, +final gates after reconciliation and repair, pre-squash provenance raw-path +validation, target-connection enforcement, failed reports, and persistent JSONL +traces. + +One v1-blocking correctness gap remains. Final wiki gates currently validate +wiki pages changed by the run. They do not validate unchanged pages that become +invalid because the run changes a semantic-layer source or deletes a referenced +wiki page. Two concrete failures can therefore squash into main: + +- A pre-existing wiki page body contains + `` `mart_account_segments.total_contract_arr_cents` `` while the run updates + `semantic-layer/warehouse/mart_account_segments.yaml` to define only + `total_contract_arr`. +- A pre-existing wiki page has `refs: [source-page]` or `[[source-page]]` while + the run deletes `wiki/global/source-page.md`. + +This plan does not expand connector rollout, promote isolated diffs to the +default, add interactive resolution, add semantic auto-merge, remove the old +path, expand transitive semantic-layer dependencies, or move provenance into +files. + +## File Structure + +- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Adds two failing end-to-end regressions for unchanged wiki pages made stale by + semantic-layer changes and wiki-page deletion. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. + Adds a final wiki gate scope helper, expands validation to all global wiki + pages when final state changes can invalidate unchanged references, and records + scope details in the final-gate trace and failed report. + +--- + +### Task 1: Add failing unchanged wiki regressions + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Add the stale existing wiki body regression** + +Insert this test inside `describe('IngestBundleRunner isolated diff path', ...)` +after the existing Metabase stale-measure regression: + +```ts + it('rejects unchanged wiki body refs made stale by isolated semantic-layer changes', async () => { + const runtime = await makeRealGitRuntime(); + try { + await mkdir(join(runtime.configDir, 'semantic-layer/warehouse'), { recursive: true }); + await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr_cents\n expr: sum(contract_arr)\n', + ); + await writeFile( + join(runtime.configDir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\n---\n\nExisting ARR uses `mart_account_segments.total_contract_arr_cents`.\n', + ); + await runtime.git.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'], + 'seed existing wiki body ref', + 'KTX Test', + 'system@ktx.local', + ); + const preRunHead = await runtime.git.revParseHead(); + + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'source-only', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + detail: 'Rename ARR measure', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml'], + 'wu source rename', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]); + + await expect( + runner.run({ + jobId: 'job-existing-body-stale', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/total_contract_arr_cents/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-existing-body-stale/trace.jsonl'), 'utf-8'); + expect(trace).toContain('final_artifact_gates_failed'); + expect(trace).toContain('account-segments'); + expect(trace).toContain('semantic_layer_changed'); + expect(trace).toContain('ingest_failed'); + expect(trace).toContain('failure_report_created'); + expect(trace).not.toContain('squash_finished'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 2: Add the stale existing wiki page-reference regression** + +Insert this test near the existing final wiki reference regression: + +```ts + it('rejects unchanged inbound wiki refs broken by an isolated wiki deletion', async () => { + const runtime = await makeRealGitRuntime(); + try { + await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(runtime.configDir, 'wiki/global/source-page.md'), + '---\nsummary: Source page\nusage_mode: auto\n---\n\nSource page\n', + ); + await writeFile( + join(runtime.configDir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nrefs:\n - source-page\n---\n\nSee [[source-page]].\n', + ); + await runtime.git.commitFiles( + ['wiki/global/source-page.md', 'wiki/global/account-segments.md'], + 'seed inbound wiki refs', + 'KTX Test', + 'system@ktx.local', + ); + const preRunHead = await runtime.git.revParseHead(); + + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'delete-target-page', rawFiles: ['pages/delete.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await rm(join(root, 'wiki/global/source-page.md'), { force: true }); + currentSession.actions.push({ + target: 'wiki', + type: 'removed', + key: 'source-page', + detail: 'Delete referenced page', + rawPaths: ['pages/delete.json'], + }); + await currentSession.gitService.commitFiles( + ['wiki/global/source-page.md'], + 'wu delete target page', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['pages/delete.json', 'h1']]); + + await expect( + runner.run({ + jobId: 'job-existing-wiki-ref-stale', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/wiki references target missing page\(s\): account-segments -> source-page/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-existing-wiki-ref-stale/trace.jsonl'), 'utf-8'); + expect(trace).toContain('final_artifact_gates_failed'); + expect(trace).toContain('account-segments -> source-page'); + expect(trace).toContain('wiki_page_removed'); + expect(trace).toContain('ingest_failed'); + expect(trace).toContain('failure_report_created'); + expect(trace).not.toContain('squash_finished'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 3: Run the focused regressions and verify they fail** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unchanged wiki body refs|unchanged inbound wiki refs" +``` + +Expected: FAIL. The stale body test currently squashes successfully because the +unchanged `account-segments` page is not in `finalChangedWikiPageKeys`. The +inbound wiki ref test currently squashes successfully because the deleted +`source-page` is validated as a missing changed page and skipped, while the +unchanged page that references it is never validated. + +--- + +### Task 2: Expand the final wiki validation scope + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` + +- [ ] **Step 1: Add final wiki gate scope helpers** + +Add these private methods after `uniqueTouchedSlSources()`: + +```ts + private removedWikiPageKeysFromActions(actions: MemoryAction[]): string[] { + return this.uniqueWikiPageKeys( + actions.filter((action) => action.target === 'wiki' && action.type === 'removed').map((action) => action.key), + ); + } + + private async wikiPageKeysForFinalGates(input: { + wikiService: ReturnType; + changedWikiPageKeys: string[]; + touchedSlSources: TouchedSlSource[]; + actions: MemoryAction[]; + }): Promise<{ + pageKeys: string[]; + trace: { + global: boolean; + reasons: string[]; + changedWikiPageKeys: string[]; + removedWikiPageKeys: string[]; + pageKeysValidated: string[]; + }; + }> { + const changedWikiPageKeys = this.uniqueWikiPageKeys(input.changedWikiPageKeys); + const removedWikiPageKeys = this.removedWikiPageKeysFromActions(input.actions); + const reasons: string[] = []; + if (input.touchedSlSources.length > 0) { + reasons.push('semantic_layer_changed'); + } + if (removedWikiPageKeys.length > 0) { + reasons.push('wiki_page_removed'); + } + + let pageKeys = changedWikiPageKeys; + if (reasons.length > 0) { + pageKeys = this.uniqueWikiPageKeys([ + ...changedWikiPageKeys, + ...(await input.wikiService.listPageKeys('GLOBAL', null)), + ]); + } + + return { + pageKeys, + trace: { + global: reasons.length > 0, + reasons, + changedWikiPageKeys, + removedWikiPageKeys, + pageKeysValidated: pageKeys, + }, + }; + } +``` + +- [ ] **Step 2: Use the expanded scope before final gates** + +In `runInner()`, replace the current `finalChangedWikiPageKeys` and +`finalTouchedSlSources` block with this code: + +```ts + const baseFinalChangedWikiPageKeys = this.uniqueWikiPageKeys([ + ...(isolatedDiffEnabled ? projectionChangedWikiPageKeys : []), + ...workUnitOutcomes + .flatMap((outcome) => outcome.patchTouchedPaths ?? []) + .flatMap((path) => this.wikiPageKeysFromPaths([path])), + ...this.wikiPageKeysFromActions(reconcileActions), + ...postReconciliationPaths.flatMap((path) => this.wikiPageKeysFromPaths([path])), + ...wikiSlRefRepairResult.repairs.filter((repair) => repair.scope === 'GLOBAL').map((repair) => repair.pageKey), + ]); + const finalTouchedSlSources = this.uniqueTouchedSlSources([ + ...(isolatedDiffEnabled ? projectionTouchedSources : []), + ...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources), + ...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId), + ...this.touchedSlSourcesFromPaths(postReconciliationPaths), + ...(postProcessorOutcome?.touchedSources ?? []), + ]); + const finalWikiGateScope = await this.wikiPageKeysForFinalGates({ + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + changedWikiPageKeys: baseFinalChangedWikiPageKeys, + touchedSlSources: finalTouchedSlSources, + actions: [...stageIndex.workUnits.flatMap((wu) => wu.actions), ...reconcileActions], + }); + const finalChangedWikiPageKeys = finalWikiGateScope.pageKeys; +``` + +This keeps the existing variable name used by `validateFinalIngestArtifacts()`, +but the value now means "wiki page keys to validate in final gates." + +- [ ] **Step 3: Add scope details to final-gate trace data** + +In the `finalArtifactGateTraceData` object, add the +`wikiReferenceGateScope` field: + +```ts + const finalArtifactGateTraceData = { + changedWikiPageKeys: finalChangedWikiPageKeys, + wikiReferenceGateScope: finalWikiGateScope.trace, + touchedSlSources: finalTouchedSlSources, + projectionTouchedPaths, + workUnitPatchTouchedPaths: workUnitOutcomes.flatMap((outcome) => outcome.patchTouchedPaths ?? []), + preReconciliationSha, + postReconciliationSha, + postReconciliationPaths, + reconciliationActionCount: reconcileActions.length, + wikiSlRefRepairCount: wikiSlRefRepairResult.repairs.length, + }; +``` + +The failure report already stores `activeFailureDetails`, so this trace data +also becomes persistent failed-report context when final gates fail. + +- [ ] **Step 4: Run the focused regressions and verify they pass** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unchanged wiki body refs|unchanged inbound wiki refs" +``` + +Expected: PASS. Both traces include `final_artifact_gates_failed`, +`failure_report_created`, no `squash_finished`, and +`wikiReferenceGateScope` with either `semantic_layer_changed` or +`wiki_page_removed`. + +--- + +### Task 3: Verification and commit + +**Files:** +- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Run the isolated-diff focused suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/semantic-layer-target-policy.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/core/git.service.patch.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Type-check the context package** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run dead-code analysis** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS, or only pre-existing findings unrelated to +`packages/context/src/ingest/ingest-bundle.runner.ts` and +`packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. +Investigate any new finding before committing. + +- [ ] **Step 4: Verify trace acceptance criteria** + +Open the traces produced by the two new failing-run tests and confirm these +events and fields exist: + +```text +job-existing-body-stale: +- final_artifact_gates_started +- final_artifact_gates_failed +- ingest_failed +- failure_report_created +- no squash_finished +- wikiReferenceGateScope.global is true +- wikiReferenceGateScope.reasons includes semantic_layer_changed +- wikiReferenceGateScope.pageKeysValidated includes account-segments +- error.message includes total_contract_arr_cents + +job-existing-wiki-ref-stale: +- final_artifact_gates_started +- final_artifact_gates_failed +- ingest_failed +- failure_report_created +- no squash_finished +- wikiReferenceGateScope.global is true +- wikiReferenceGateScope.reasons includes wiki_page_removed +- wikiReferenceGateScope.removedWikiPageKeys includes source-page +- error.message includes account-segments -> source-page +``` + +- [ ] **Step 5: Commit** + +Run: + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +git commit -m "fix(ingest): gate global wiki references" +``` + +Expected: one commit containing only the runner and isolated-diff runner test +changes. + +--- + +## Self-Review + +Spec coverage: +- Final global wiki body reference validation now covers unchanged wiki pages + when a run changes semantic-layer sources. +- Final global wiki page reference validation now covers unchanged inbound + references when a run deletes wiki pages. +- The plan keeps resolver behavior fail-fast and stops before squash. +- Persistent trace and failed-report acceptance criteria are explicit and tied + to the concrete failure modes. + +Non-blocking gaps unchanged: +- Broader connector rollout. +- Isolated-diff default promotion. +- Old shared-worktree path removal. +- Interactive conflict resolution. +- Semantic auto-merge. +- Transitive semantic-layer dependency expansion. +- Provenance-as-files. diff --git a/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-provenance-gate-closure.md b/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-provenance-gate-closure.md new file mode 100644 index 00000000..8d2fb94d --- /dev/null +++ b/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-provenance-gate-closure.md @@ -0,0 +1,494 @@ +# Isolated Diff Ingestion V1 Provenance Gate Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Ensure invalid provenance raw paths are rejected before isolated-diff +ingestion squashes any integration worktree changes into the main project +worktree. + +**Architecture:** Keep provenance insertion after squash, but derive and +validate the planned provenance rows immediately after final artifact gates and +before the squash stage. This makes provenance validation part of the final +pre-main safety boundary while preserving the existing report and database +write shape. + +**Tech Stack:** TypeScript ESM/NodeNext, Vitest, existing +`IngestBundleRunner`, `validateProvenanceRawPaths`, ingest reports, and +persistent ingest traces. + +--- + +## Audit Summary + +The implemented isolated-diff path now covers the core v1 safety surface: +child worktrees, binary no-rename patches, `git apply --3way --index`, patch +policy rejection, final wiki and semantic-layer gates after reconciliation and +post-processing, failure reports, and persistent JSONL traces. The focused +isolated-diff test suite passes: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-trace.test.ts \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts +``` + +Current result: `7 passed`, `28 passed`. + +One v1-blocking gap remains. `validateProvenanceRawPaths()` is called in +`packages/context/src/ingest/ingest-bundle.runner.ts` after +`squashMergeIntoMain()`. A work unit or reconciliation action can emit an +otherwise valid wiki or semantic-layer artifact whose `rawPaths` contain a path +outside the current raw snapshot and eviction set. Today the run fails during +provenance recording, but only after the invalidly-attributed artifacts have +already reached the main project worktree. That violates the spec requirement +that final global gates run before any changes reach main. + +Observability for the already-implemented phases is sufficient for postmortem +reconstruction: traces include input snapshots, routing, child worktree +creation and cleanup, patch collection and application, conflict +classification, reconciliation, final gates, failure reports, and run outcome. +This plan adds only the missing provenance validation failure trace because it +corresponds to a concrete pre-main failure mode, not cosmetic trace expansion. + +Non-blocking gaps that remain after this plan: + +- Migrating Notion, LookML, Looker, dbt, MetricFlow, and historic-SQL direct + durable writes to the isolated path. +- Promoting isolated diffs as the default for all connectors. +- Removing the old shared-worktree WorkUnit execution path. +- Interactive, CLI, or agent-driven conflict resolution. +- Auto-merging semantic conflicts that cannot be proven correct. +- Transitive SQL-projection dependency expansion beyond direct declared joins. +- Moving provenance rows to worktree files. +- Adding failure reports for failures that happen before an ingest run row + exists. The trace file is still written at the deterministic job path. + +## File Structure + +- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Add a regression proving invalid provenance raw paths fail before squash, + leave main unchanged, skip SQLite provenance insertion, and emit a + postmortem-grade trace event. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. + Extract provenance row construction into private helpers, run provenance + raw-path validation before squash, trace validation success and failure, and + reuse the prevalidated rows for insertion and reports after squash. + +--- + +### Task 1: Add the pre-squash provenance regression + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Write the failing runner test** + +Append this test inside the existing +`describe('IngestBundleRunner isolated diff path', ...)` block in +`packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`: + +```ts + it('rejects invalid provenance raw paths before squash reaches main', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'card-valid-artifacts', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + await writeFile( + join(root, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nsl_refs:\n - mart_account_segments\n---\n\nARR is `mart_account_segments.total_contract_arr`.\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'mart_account_segments', + detail: 'Valid source', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'account-segments', + detail: 'Valid wiki with invalid provenance raw path', + rawPaths: ['cards/missing.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'], + 'valid artifacts with invalid provenance', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]); + const preRunHead = await runtime.git.revParseHead(); + + await expect( + runner.run({ + jobId: 'job-invalid-provenance', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + expect(deps.provenance.insertMany).not.toHaveBeenCalled(); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-invalid-provenance/trace.jsonl'), 'utf-8'); + expect(trace).toContain('final_artifact_gates_finished'); + expect(trace).toContain('provenance_rows_validation_failed'); + expect(trace).toContain('cards/missing.json'); + expect(trace).toContain('ingest_failed'); + expect(trace).not.toContain('squash_finished'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 2: Run the failing regression** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "invalid provenance raw paths" +``` + +Expected: FAIL because the current runner validates provenance after +`squashMergeIntoMain()`, so `runtime.git.revParseHead()` changes and the trace +does not contain `provenance_rows_validation_failed`. + +### Task 2: Move provenance validation into the pre-squash gate boundary + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` + +- [ ] **Step 1: Import the provenance report and insert types** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, update the imports. + +Replace this import block: + +```ts +import type { + ContextEvidenceIndexSummary, + IngestBundleRunnerDeps, + IngestProvenanceRow, + IngestRunsPort, + IngestSessionWorktree, + PageTriageRunResult, +} from './ports.js'; +``` + +With: + +```ts +import type { + ContextEvidenceIndexSummary, + IngestBundleRunnerDeps, + IngestProvenanceInsert, + IngestProvenanceRow, + IngestRunsPort, + IngestSessionWorktree, + PageTriageRunResult, +} from './ports.js'; +``` + +Replace this import block: + +```ts +import { + buildStageIndexFromReportBody, + postProcessorSavedMemoryCounts, + type IngestReportPostProcessorOutcome, + type IngestReportSnapshot, +} from './reports.js'; +``` + +With: + +```ts +import { + buildStageIndexFromReportBody, + postProcessorSavedMemoryCounts, + type IngestReportPostProcessorOutcome, + type IngestReportProvenanceDetail, + type IngestReportSnapshot, +} from './reports.js'; +``` + +- [ ] **Step 2: Add provenance row helpers** + +Add these private methods after `private errorMessage(error: unknown): string` +in `packages/context/src/ingest/ingest-bundle.runner.ts`: + +```ts + private buildProvenanceRows(input: { + job: IngestBundleJob; + syncId: string; + currentHashes: Map; + stageIndex: StageIndex; + reconcileActions: MemoryAction[]; + eviction?: EvictionUnit; + }): IngestProvenanceInsert[] { + const provenanceRows: IngestProvenanceInsert[] = []; + const actionToType = (action: MemoryAction): IngestProvenanceInsert['actionType'] => { + if (action.target === 'wiki') { + return 'wiki_written'; + } + return action.type === 'created' ? 'source_created' : 'measure_added'; + }; + const producedPaths = new Set(); + const pushActionProvenance = (rawPath: string, action: MemoryAction): void => { + const hash = input.currentHashes.get(rawPath) ?? ''; + provenanceRows.push({ + connectionId: input.job.connectionId, + sourceKey: input.job.sourceKey, + syncId: input.syncId, + rawPath, + rawContentHash: hash, + artifactKind: action.target, + artifactKey: action.key, + targetConnectionId: action.target === 'sl' ? actionTargetConnectionId(action, input.job.connectionId) : null, + artifactContentHash: null, + actionType: actionToType(action), + }); + producedPaths.add(rawPath); + }; + + for (const wu of input.stageIndex.workUnits) { + for (const action of wu.actions) { + for (const rawPath of rawPathsForAction(action, wu.rawFiles)) { + pushActionProvenance(rawPath, action); + } + } + } + for (const action of input.reconcileActions) { + for (const rawPath of action.rawPaths ?? []) { + pushActionProvenance(rawPath, action); + } + } + for (const resolution of input.stageIndex.artifactResolutions ?? []) { + const hash = input.currentHashes.get(resolution.rawPath) ?? ''; + provenanceRows.push({ + connectionId: input.job.connectionId, + sourceKey: input.job.sourceKey, + syncId: input.syncId, + rawPath: resolution.rawPath, + rawContentHash: hash, + artifactKind: resolution.artifactKind, + artifactKey: resolution.artifactKey, + targetConnectionId: null, + artifactContentHash: null, + actionType: resolution.actionType, + }); + producedPaths.add(resolution.rawPath); + } + for (const [rawPath, hash] of input.currentHashes) { + if (producedPaths.has(rawPath)) { + continue; + } + provenanceRows.push({ + connectionId: input.job.connectionId, + sourceKey: input.job.sourceKey, + syncId: input.syncId, + rawPath, + rawContentHash: hash, + artifactKind: null, + artifactKey: null, + targetConnectionId: null, + artifactContentHash: null, + actionType: 'skipped', + }); + } + + return provenanceRows; + } + + private toReportProvenanceRows(rows: IngestProvenanceInsert[]): IngestReportProvenanceDetail[] { + return rows.map(({ rawPath, artifactKind, artifactKey, actionType, targetConnectionId }) => ({ + rawPath, + artifactKind, + artifactKey, + targetConnectionId: targetConnectionId ?? null, + actionType, + })); + } +``` + +- [ ] **Step 3: Validate planned provenance rows before squash** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, find the code that +sets `activePhase = 'final_gates';` and runs `traceTimed(..., +'final_artifact_gates', ...)`. Immediately after that `await traceTimed(...)` +block and before the `// Stage 6 — squash commit` comment, insert: + +```ts + activePhase = 'provenance_validation'; + const provenanceRows = this.buildProvenanceRows({ + job, + syncId, + currentHashes, + stageIndex, + reconcileActions, + eviction, + }); + await traceTimed( + runTrace, + 'provenance', + 'provenance_rows_validation', + { + rowCount: provenanceRows.length, + currentRawPathCount: currentHashes.size, + deletedRawPathCount: eviction?.deletedRawPaths.length ?? 0, + }, + async () => { + validateProvenanceRawPaths({ + rows: provenanceRows, + currentRawPaths: new Set(currentHashes.keys()), + deletedRawPaths: new Set(eviction?.deletedRawPaths ?? []), + }); + }, + ); + const reportProvenanceRows = this.toReportProvenanceRows(provenanceRows); +``` + +- [ ] **Step 4: Replace the post-squash provenance construction block** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, in the +`activePhase = 'provenance';` section after squash, delete the current block +that starts with: + +```ts + // Provenance rows: per-artifact when the WU emitted actions, plus a `skipped` + // fallback for raw files that produced nothing so the next DiffSet still sees + // them. + const provenanceRows: Parameters[0] = []; +``` + +And ends with: + +```ts + await runTrace.event('debug', 'provenance', 'provenance_rows_validated', { + rowCount: provenanceRows.length, + }); +``` + +Do not delete the existing call to `await this.deps.provenance.insertMany(provenanceRows);`. +Immediately after that insertion call, add: + +```ts + await runTrace.event('debug', 'provenance', 'provenance_rows_inserted', { + rowCount: provenanceRows.length, + }); +``` + +Then delete the later `const reportProvenanceRows = provenanceRows.map(...)` +block because `reportProvenanceRows` is now created before squash from the +prevalidated rows. + +- [ ] **Step 5: Run the provenance regression** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "invalid provenance raw paths" +``` + +Expected: PASS. The trace contains `provenance_rows_validation_failed`, main +HEAD remains unchanged, and `provenance.insertMany` is not called. + +- [ ] **Step 6: Run the focused isolated-diff suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-trace.test.ts \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts +``` + +Expected: PASS. + +### Task 3: Type-check, dead-code check, and commit + +**Files:** +- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Run the context package type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 2: Run the workspace dead-code check** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS, or only existing unrelated Knip/Biome findings. Investigate +any new findings in the two modified files before continuing. + +- [ ] **Step 3: Commit the provenance gate closure** + +Run: + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +git commit -m "fix(ingest): gate provenance before isolated diff squash" +``` + +Expected: one commit containing only the runner and isolated-diff runner test +changes. + +## Self-Review + +Spec coverage: this plan closes the remaining violation of the design's final +global gate invariant by proving invalid provenance raw paths fail before +squash and by moving provenance validation into the pre-main gate boundary. + +Placeholder scan: no placeholder steps remain. Every implementation step names +the exact files, code, commands, and expected results. + +Type consistency: the plan uses existing `IngestProvenanceInsert`, +`IngestReportProvenanceDetail`, `MemoryAction`, `EvictionUnit`, `StageIndex`, +`rawPathsForAction()`, and `validateProvenanceRawPaths()` names. diff --git a/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-reference-and-target-gate-closure.md b/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-reference-and-target-gate-closure.md new file mode 100644 index 00000000..0b7f3837 --- /dev/null +++ b/docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-reference-and-target-gate-closure.md @@ -0,0 +1,1350 @@ +# Isolated Diff Ingestion V1 Reference and Target Gate Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1-blocking isolated-diff correctness gaps by +validating final wiki page references and enforcing allowed semantic-layer +target connections before any isolated-diff run can squash into main. + +**Architecture:** Extend the existing validation-only artifact gate rather than +adding a resolver. Wiki reference validation runs in the final composed +integration worktree for changed wiki pages, including frontmatter `refs` and +inline `[[page-key]]` references. Semantic-layer target authorization is +enforced in three places: SL write/edit tools reject out-of-scope connection +IDs, WorkUnit patch policy rejects unauthorized `semantic-layer//` +paths, and the runner checks projection, reconciliation, post-processor, and +repair paths before final gates and squash. Target-policy failures emit +persistent JSONL trace events and failed reports with enough path and connection +context for postmortem reconstruction. + +**Tech Stack:** TypeScript ESM/NodeNext, Vitest, simple-git, existing +`IngestBundleRunner`, `GitService`, `SlWriteSourceTool`, `SlEditSourceTool`, +`KnowledgeWikiService`, `findMissingWikiRefs`, ingest reports, and persistent +ingest traces. + +--- + +## Audit summary + +The implemented plans cover the main v1 isolated-diff flow: integration +worktree creation, child worktrees from the post-projection base, binary +no-rename patches, `git apply --3way --index`, final semantic-layer and wiki +SL/body gates after reconciliation, structured conflict classification, child +cleanup, failed reports, persistent JSONL traces, and pre-squash provenance raw +path validation. + +Two concrete v1-blocking gaps remain: + +- Final global gates do not validate wiki page references. Existing local + checks use `findDanglingWikiRefsForActions()`, but + `validateFinalIngestArtifacts()` validates only wiki `sl_refs` and body + semantic/table references. A WorkUnit can update a page that references an + existing page while another accepted WorkUnit deletes that target page. Both + local gates can pass, and the final tree can squash with a dangling + frontmatter `refs` or inline `[[page-key]]` reference. +- Allowed semantic-layer target connections are not enforced for SL write/edit + tools or integration diffs. The runner computes `slConnectionIds` from the + primary connection plus adapter-declared targets, but `sl_write_source` and + `sl_edit_source` ignore `session.allowedConnectionNames`, and patch policy + rejects only `slDisallowed` plus binary/mode violations. A buggy tool call or + bypassed tool can write `semantic-layer//...` and reach main if + the artifact is otherwise valid. + +Non-blocking gaps remain unchanged: + +- Migrating Notion, LookML, Looker, dbt, MetricFlow, and historic-SQL direct + durable writes to the isolated path. +- Promoting isolated diffs as the default for all connectors. +- Removing the old shared-worktree WorkUnit execution path. +- Interactive, CLI, or agent-driven conflict resolution. +- Auto-merging semantic conflicts that cannot be proven correct. +- Transitive SQL-projection dependency expansion beyond direct declared joins. +- Moving provenance rows to worktree files. +- Adding stored failure reports for failures before an ingest run row exists. + The deterministic trace file is still written for those early failures. + +## File structure + +- Create `packages/context/src/ingest/semantic-layer-target-policy.ts`. + Owns semantic-layer path-to-connection parsing and authorization errors. +- Create `packages/context/src/ingest/semantic-layer-target-policy.test.ts`. + Covers allowed paths, unauthorized paths, non-SL paths, and sorted errors. +- Modify `packages/context/src/ingest/artifact-gates.ts`. + Adds final wiki page reference validation for changed pages. +- Modify `packages/context/src/ingest/artifact-gates.test.ts`. + Adds dangling final wiki `refs` and `[[...]]` coverage and updates mocks with + `listPageKeys()`. +- Create `packages/context/src/tools/action-target-connection.ts`. + Adds session-level target connection validation shared by SL write/edit + tools. +- Modify `packages/context/src/tools/index.ts`. + Exports `validateActionTargetConnection()`. +- Modify `packages/context/src/sl/tools/sl-write-source.tool.ts`. + Rejects session-scoped writes to connections outside + `allowedConnectionNames`. +- Modify `packages/context/src/sl/tools/sl-write-source.tool.test.ts`. + Covers denied session-scoped writes. +- Modify `packages/context/src/sl/tools/sl-edit-source.tool.ts`. + Rejects session-scoped edits and deletes to connections outside + `allowedConnectionNames`. +- Modify `packages/context/src/sl/tools/sl-edit-source.tool.test.ts`. + Covers denied session-scoped edits. +- Modify `packages/context/src/ingest/isolated-diff/git-patch.ts`. + Adds allowed target connection checks to WorkUnit patch policy. +- Modify `packages/context/src/ingest/isolated-diff/git-patch.test.ts`. + Covers unauthorized semantic-layer paths in patches. +- Modify `packages/context/src/ingest/isolated-diff/patch-integrator.ts`. + Accepts `allowedTargetConnectionIds` and includes it in policy rejection + traces. +- Modify `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`. + Covers traced unauthorized target rejection. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. + Passes allowed target sets to patch integration and runs a traced target + policy gate over final integration-stage paths before final artifact gates. +- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Adds cross-WorkUnit wiki-ref deletion, unauthorized WorkUnit patch, and + unauthorized reconciliation mutation regressions. +- Modify `packages/context/src/ingest/index.ts`. + Exports target-policy helpers for tests and future runner checks. + +--- + +### Task 1: Add final wiki reference validation + +**Files:** +- Modify: `packages/context/src/ingest/artifact-gates.test.ts` +- Modify: `packages/context/src/ingest/artifact-gates.ts` + +- [ ] **Step 1: Write failing final wiki reference tests** + +In `packages/context/src/ingest/artifact-gates.test.ts`, add this helper near +the top of the file after the imports: + +```ts +function wikiServiceWithPages(pages: Record) { + return { + listPageKeys: vi.fn().mockResolvedValue(Object.keys(pages)), + readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, pageKey: string) => { + const page = pages[pageKey]; + if (!page) { + return Promise.resolve(null); + } + return Promise.resolve({ + pageKey, + frontmatter: { + summary: pageKey, + usage_mode: 'auto', + refs: page.refs, + sl_refs: page.slRefs, + }, + content: page.content ?? '', + }); + }), + }; +} +``` + +Replace the three existing inline `wikiService = { readPage: ... }` mocks with +`wikiServiceWithPages(...)` so those tests expose `listPageKeys()`. Use these +exact replacements: + +```ts +const wikiService = wikiServiceWithPages({ + 'account-segments': { + slRefs: ['mart_account_segments'], + content: 'ARR is `mart_account_segments.total_contract_arr_cents`.', + }, +}); +``` + +```ts +const wikiService = wikiServiceWithPages({ + 'account-segments': { + slRefs: ['mart_account_segments.total_contract_arr_cents'], + content: 'ARR uses a renamed measure.', + }, +}); +``` + +```ts +const wikiService = wikiServiceWithPages({}); +``` + +Append this test inside `describe('artifact gates', ...)`: + +```ts + it('fails final gates when a changed wiki page references a missing wiki page', async () => { + const wikiService = wikiServiceWithPages({ + 'account-segments': { + refs: ['missing-frontmatter-page'], + content: 'See [[missing-inline-page]] for the related process.', + }, + }); + const semanticLayerService = { + loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }), + }; + + await expect( + validateFinalIngestArtifacts({ + connectionIds: ['warehouse'], + changedWikiPageKeys: ['account-segments'], + touchedSlSources: [], + wikiService: wikiService as never, + semanticLayerService: semanticLayerService as never, + validateTouchedSources: async () => ({ invalidSources: [], validSources: [] }), + tableExists: async () => true, + }), + ).rejects.toThrow(/wiki references target missing page\(s\): account-segments -> missing-frontmatter-page, account-segments -> missing-inline-page/); + }); +``` + +- [ ] **Step 2: Run the failing artifact-gate test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/artifact-gates.test.ts -t "missing wiki page" +``` + +Expected: FAIL because `validateFinalIngestArtifacts()` does not validate wiki +frontmatter `refs` or inline `[[...]]` references. + +- [ ] **Step 3: Implement final wiki reference validation** + +In `packages/context/src/ingest/artifact-gates.ts`, add this import: + +```ts +import { findMissingWikiRefs } from '../wiki/wiki-ref-validation.js'; +``` + +Add this helper after `validateWikiSlRefs()`: + +```ts +async function validateWikiRefs(input: FinalArtifactGateInput): Promise { + const dangling: string[] = []; + for (const pageKey of input.changedWikiPageKeys) { + const page = await input.wikiService.readPage('GLOBAL', null, pageKey); + if (!page) { + continue; + } + const missingRefs = await findMissingWikiRefs({ + wikiService: input.wikiService, + scope: 'GLOBAL', + scopeId: null, + pageKey, + refs: page.frontmatter.refs, + content: page.content, + }); + for (const missingRef of missingRefs) { + dangling.push(`${pageKey} -> ${missingRef}`); + } + } + return dangling; +} +``` + +In `validateFinalIngestArtifacts()`, immediately after this line: + +```ts + errors.push(...(await validateWikiSlRefs(input))); +``` + +add: + +```ts + const danglingWikiRefs = await validateWikiRefs(input); + if (danglingWikiRefs.length > 0) { + errors.push(`wiki references target missing page(s): ${danglingWikiRefs.join(', ')}`); + } +``` + +- [ ] **Step 4: Run artifact-gate tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/artifact-gates.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit final wiki reference gate** + +Run: + +```bash +git add packages/context/src/ingest/artifact-gates.ts packages/context/src/ingest/artifact-gates.test.ts +git commit -m "fix(ingest): gate final wiki references" +``` + +### Task 2: Enforce target connections in SL tools and patch policy + +**Files:** +- Create: `packages/context/src/tools/action-target-connection.ts` +- Modify: `packages/context/src/tools/index.ts` +- Modify: `packages/context/src/sl/tools/sl-write-source.tool.ts` +- Modify: `packages/context/src/sl/tools/sl-write-source.tool.test.ts` +- Modify: `packages/context/src/sl/tools/sl-edit-source.tool.ts` +- Modify: `packages/context/src/sl/tools/sl-edit-source.tool.test.ts` +- Create: `packages/context/src/ingest/semantic-layer-target-policy.ts` +- Create: `packages/context/src/ingest/semantic-layer-target-policy.test.ts` +- Modify: `packages/context/src/ingest/isolated-diff/git-patch.ts` +- Modify: `packages/context/src/ingest/isolated-diff/git-patch.test.ts` + +- [ ] **Step 1: Write failing session target-connection tests** + +In `packages/context/src/sl/tools/sl-write-source.tool.test.ts`, append this +test inside `describe('SlWriteSourceTool — session gating', ...)`: + +```ts + it('rejects session-scoped writes outside allowed target connections', async () => { + const { tool } = makeTool(); + const session = makeSession({ + allowedConnectionNames: new Set(['warehouse']), + }); + const context: ToolContext = { ...baseContext, session }; + + const result = await tool.call( + { + connectionId: 'finance', + sourceName: 'finance_orders', + source: { + name: 'finance_orders', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'string' }], + measures: [], + joins: [], + } as any, + } as any, + context, + ); + + expect(result.structured.success).toBe(false); + expect(result.markdown).toContain('connectionId "finance" is outside this ingest session'); + expect(session.actions).toEqual([]); + }); +``` + +In `packages/context/src/sl/tools/sl-edit-source.tool.test.ts`, append this test +inside `describe('SlEditSourceTool — session gating', ...)`: + +```ts + it('rejects session-scoped edits outside allowed target connections', async () => { + const { tool } = makeTool(); + const session = makeSession({ + allowedConnectionNames: new Set(['warehouse']), + }); + const context: ToolContext = { ...baseContext, session }; + + const result = await tool.call( + { + connectionId: 'finance', + sourceName: 'orders', + yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }], + } as any, + context, + ); + + expect(result.structured.success).toBe(false); + expect(result.markdown).toContain('connectionId "finance" is outside this ingest session'); + expect(session.actions).toEqual([]); + }); +``` + +- [ ] **Step 2: Run the failing SL tool tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/sl/tools/sl-write-source.tool.test.ts \ + src/sl/tools/sl-edit-source.tool.test.ts \ + -t "outside allowed target connections" +``` + +Expected: FAIL because the tools do not inspect +`session.allowedConnectionNames`. + +- [ ] **Step 3: Add shared session target validation** + +Create `packages/context/src/tools/action-target-connection.ts`: + +```ts +import type { ToolSession } from './tool-session.js'; + +type ActionTargetConnectionValidation = { ok: true } | { ok: false; error: string }; + +export function validateActionTargetConnection( + session: ToolSession | undefined, + connectionId: string, +): ActionTargetConnectionValidation { + const allowed = session?.allowedConnectionNames; + if (!allowed) { + return { ok: true }; + } + if (allowed.has(connectionId)) { + return { ok: true }; + } + const allowedList = [...allowed].sort(); + return { + ok: false, + error: `connectionId "${connectionId}" is outside this ingest session's allowed target connections: ${ + allowedList.length > 0 ? allowedList.join(', ') : '(none)' + }`, + }; +} +``` + +In `packages/context/src/tools/index.ts`, add this export next to +`validateActionRawPaths`: + +```ts +export { validateActionTargetConnection } from './action-target-connection.js'; +``` + +- [ ] **Step 4: Wire target validation into SL write/edit tools** + +In `packages/context/src/sl/tools/sl-write-source.tool.ts`, replace this import: + +```ts +import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js'; +``` + +with: + +```ts +import { + addTouchedSlSource, + type ToolContext, + type ToolOutput, + validateActionRawPaths, + validateActionTargetConnection, +} from '../../tools/index.js'; +``` + +In `SlWriteSourceTool.call()`, immediately after: + +```ts + const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService; + const skipIndex = context.session?.isWorktreeScoped === true; +``` + +add: + +```ts + const targetConnectionValidation = validateActionTargetConnection(context.session, connectionId); + if (!targetConnectionValidation.ok) { + return this.buildOutput(false, [targetConnectionValidation.error], sourceName); + } +``` + +In `packages/context/src/sl/tools/sl-edit-source.tool.ts`, replace this import: + +```ts +import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js'; +``` + +with: + +```ts +import { + addTouchedSlSource, + type ToolContext, + type ToolOutput, + validateActionRawPaths, + validateActionTargetConnection, +} from '../../tools/index.js'; +``` + +In `SlEditSourceTool.call()`, immediately after: + +```ts + const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService; + const skipIndex = context.session?.isWorktreeScoped === true; +``` + +add: + +```ts + const targetConnectionValidation = validateActionTargetConnection(context.session, connectionId); + if (!targetConnectionValidation.ok) { + return this.buildOutput(false, [targetConnectionValidation.error], sourceName); + } +``` + +- [ ] **Step 5: Write target-policy unit tests** + +Create `packages/context/src/ingest/semantic-layer-target-policy.test.ts`: + +```ts +import { describe, expect, it } from 'vitest'; +import { + assertSemanticLayerTargetPathsAllowed, + findDisallowedSemanticLayerTargetPaths, + semanticLayerConnectionIdFromPath, +} from './semantic-layer-target-policy.js'; + +describe('semantic-layer target policy', () => { + it('extracts connection ids from semantic-layer paths', () => { + expect(semanticLayerConnectionIdFromPath('semantic-layer/warehouse/orders.yaml')).toBe('warehouse'); + expect(semanticLayerConnectionIdFromPath('a/semantic-layer/finance/orders.yaml')).toBe('finance'); + expect(semanticLayerConnectionIdFromPath('wiki/global/orders.md')).toBeNull(); + }); + + it('finds semantic-layer paths outside the allowed target connections', () => { + expect( + findDisallowedSemanticLayerTargetPaths({ + paths: [ + 'semantic-layer/warehouse/orders.yaml', + 'semantic-layer/finance/orders.yaml', + 'wiki/global/orders.md', + ], + allowedConnectionIds: new Set(['warehouse']), + }), + ).toEqual([{ path: 'semantic-layer/finance/orders.yaml', connectionId: 'finance' }]); + }); + + it('throws a deterministic error for unauthorized semantic-layer targets', () => { + expect(() => + assertSemanticLayerTargetPathsAllowed({ + paths: ['semantic-layer/finance/orders.yaml', 'semantic-layer/marketing/accounts.yaml'], + allowedConnectionIds: new Set(['warehouse']), + }), + ).toThrow( + /semantic-layer target connection not allowed: semantic-layer\/finance\/orders\.yaml \(finance\), semantic-layer\/marketing\/accounts\.yaml \(marketing\); allowed: warehouse/, + ); + }); +}); +``` + +- [ ] **Step 6: Implement target-policy helpers** + +Create `packages/context/src/ingest/semantic-layer-target-policy.ts`: + +```ts +export interface SemanticLayerTargetPolicyInput { + paths: readonly string[]; + allowedConnectionIds: ReadonlySet; +} + +export interface SemanticLayerTargetPolicyViolation { + path: string; + connectionId: string; +} + +export function semanticLayerConnectionIdFromPath(path: string): string | null { + const normalized = path.replace(/^[ab]\//, ''); + const match = /^semantic-layer\/([^/]+)\//.exec(normalized); + return match?.[1] ?? null; +} + +export function findDisallowedSemanticLayerTargetPaths( + input: SemanticLayerTargetPolicyInput, +): SemanticLayerTargetPolicyViolation[] { + return input.paths + .map((path) => ({ path, connectionId: semanticLayerConnectionIdFromPath(path) })) + .filter((entry): entry is SemanticLayerTargetPolicyViolation => { + return entry.connectionId !== null && !input.allowedConnectionIds.has(entry.connectionId); + }) + .sort((left, right) => { + const byConnection = left.connectionId.localeCompare(right.connectionId); + return byConnection === 0 ? left.path.localeCompare(right.path) : byConnection; + }); +} + +export function assertSemanticLayerTargetPathsAllowed(input: SemanticLayerTargetPolicyInput): void { + const violations = findDisallowedSemanticLayerTargetPaths(input); + if (violations.length === 0) { + return; + } + const allowed = [...input.allowedConnectionIds].sort(); + throw new Error( + `semantic-layer target connection not allowed: ${violations + .map((violation) => `${violation.path} (${violation.connectionId})`) + .join(', ')}; allowed: ${allowed.length > 0 ? allowed.join(', ') : '(none)'}`, + ); +} +``` + +- [ ] **Step 7: Add failing patch-policy test** + +In `packages/context/src/ingest/isolated-diff/git-patch.test.ts`, append this +test inside `describe('isolated diff patch contract', ...)`: + +```ts + it('rejects semantic-layer paths outside allowed target connections', () => { + const patch = + 'diff --git a/semantic-layer/finance/orders.yaml b/semantic-layer/finance/orders.yaml\nindex 1..2 100644\n'; + + expect(() => + assertPatchAllowedForWorkUnit({ + unitKey: 'wu-finance', + patch, + slDisallowed: false, + allowedTargetConnectionIds: new Set(['warehouse']), + }), + ).toThrow(/semantic-layer target connection not allowed: semantic-layer\/finance\/orders.yaml \(finance\); allowed: warehouse/); + }); +``` + +- [ ] **Step 8: Wire target policy into patch parsing** + +In `packages/context/src/ingest/isolated-diff/git-patch.ts`, add this import: + +```ts +import { assertSemanticLayerTargetPathsAllowed } from '../semantic-layer-target-policy.js'; +``` + +Update `PatchPolicyInput` to include allowed targets: + +```ts +export interface PatchPolicyInput { + unitKey: string; + patch: string; + slDisallowed: boolean; + allowedTargetConnectionIds?: ReadonlySet; +} +``` + +In `assertPatchAllowedForWorkUnit()`, after `const touched = +parsePatchTouchedPaths(input.patch);`, add: + +```ts + if (input.allowedTargetConnectionIds) { + assertSemanticLayerTargetPathsAllowed({ + paths: touched.map((entry) => entry.path), + allowedConnectionIds: input.allowedTargetConnectionIds, + }); + } +``` + +- [ ] **Step 9: Run policy and SL tool tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/sl/tools/sl-write-source.tool.test.ts \ + src/sl/tools/sl-edit-source.tool.test.ts \ + src/ingest/semantic-layer-target-policy.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts +``` + +Expected: PASS. + +- [ ] **Step 10: Commit target tool and patch policy** + +Run: + +```bash +git add \ + packages/context/src/tools/action-target-connection.ts \ + packages/context/src/tools/index.ts \ + packages/context/src/sl/tools/sl-write-source.tool.ts \ + packages/context/src/sl/tools/sl-write-source.tool.test.ts \ + packages/context/src/sl/tools/sl-edit-source.tool.ts \ + packages/context/src/sl/tools/sl-edit-source.tool.test.ts \ + packages/context/src/ingest/semantic-layer-target-policy.ts \ + packages/context/src/ingest/semantic-layer-target-policy.test.ts \ + packages/context/src/ingest/isolated-diff/git-patch.ts \ + packages/context/src/ingest/isolated-diff/git-patch.test.ts +git commit -m "fix(ingest): enforce SL target connection scope" +``` + +### Task 3: Wire target policy through integration and final runner gates + +**Files:** +- Modify: `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts` +- Modify: `packages/context/src/ingest/isolated-diff/patch-integrator.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Modify: `packages/context/src/ingest/index.ts` + +- [ ] **Step 1: Add traced patch-integrator target rejection coverage** + +In `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`, add +`allowedTargetConnectionIds: new Set(['c1']),` to every existing +`integrateWorkUnitPatch()` call. + +Append this test inside `describe('integrateWorkUnitPatch', ...)`: + +```ts + it('classifies unauthorized semantic-layer targets as traced textual conflicts', async () => { + const { homeDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child-target-policy'); + await git.addWorktree(childDir, 'child-target-policy', baseSha); + const childGit = git.forWorktree(childDir); + await mkdir(join(childDir, 'semantic-layer/finance'), { recursive: true }); + await writeFile( + join(childDir, 'semantic-layer/finance/orders.yaml'), + 'name: orders\ncolumns: []\njoins: []\nmeasures: []\n', + ); + await childGit.commitFiles(['semantic-layer/finance/orders.yaml'], 'unauthorized sl', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/unauthorized.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-target-policy/trace.jsonl'), + jobId: 'job-target-policy', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-finance', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockResolvedValue(undefined), + slDisallowed: false, + allowedTargetConnectionIds: new Set(['warehouse']), + }); + + expect(result).toMatchObject({ + status: 'textual_conflict', + touchedPaths: ['semantic-layer/finance/orders.yaml'], + }); + const rawTrace = await readFile(trace.tracePath, 'utf-8'); + expect(rawTrace).toContain('patch_policy_rejected'); + expect(rawTrace).toContain('semantic-layer target connection not allowed'); + expect(rawTrace).toContain('allowedTargetConnectionIds'); + }); +``` + +- [ ] **Step 2: Run the failing patch-integrator test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts -t "unauthorized semantic-layer targets" +``` + +Expected: FAIL because `IntegrateWorkUnitPatchInput` does not accept or pass +allowed target connections to patch policy. + +- [ ] **Step 3: Implement patch-integrator target policy wiring** + +In `packages/context/src/ingest/isolated-diff/patch-integrator.ts`, add this +field to `IntegrateWorkUnitPatchInput`: + +```ts + allowedTargetConnectionIds: ReadonlySet; +``` + +In the `assertPatchAllowedForWorkUnit()` call, add: + +```ts + allowedTargetConnectionIds: input.allowedTargetConnectionIds, +``` + +In the `patch_policy_rejected` trace data, add: + +```ts + allowedTargetConnectionIds: [...input.allowedTargetConnectionIds].sort(), +``` + +- [ ] **Step 4: Wire WorkUnit target sets and final target gate in the runner** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, add this import: + +```ts +import { assertSemanticLayerTargetPathsAllowed } from './semantic-layer-target-policy.js'; +``` + +Near the existing projection state: + +```ts + let projectionTouchedSources: TouchedSlSource[] = []; + let projectionChangedWikiPageKeys: string[] = []; +``` + +add: + +```ts + let projectionTouchedPaths: string[] = []; +``` + +Inside the `adapter.project` block, immediately after `const projectionPaths = +[...]`, add: + +```ts + projectionTouchedPaths = projectionPaths; +``` + +In the `integrateWorkUnitPatch()` call, add: + +```ts + allowedTargetConnectionIds: new Set(slConnectionIds), +``` + +After `const finalTouchedSlSources = this.uniqueTouchedSlSources([...]);` and +before `activePhase = 'final_gates';`, add this traced policy gate: + +```ts + const finalTargetPolicyPaths = [ + ...projectionTouchedPaths, + ...workUnitOutcomes.flatMap((outcome) => outcome.patchTouchedPaths ?? []), + ...postReconciliationPaths, + ...(postProcessorOutcome?.touchedSources ?? []).map( + (source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`, + ), + ]; + const targetPolicyTraceData = { + allowedTargetConnectionIds: slConnectionIds, + touchedPaths: [...new Set(finalTargetPolicyPaths)].sort(), + }; + activePhase = 'target_policy'; + activeFailureDetails = targetPolicyTraceData; + await traceTimed(runTrace, 'target_policy', 'semantic_layer_target_policy', targetPolicyTraceData, async () => { + assertSemanticLayerTargetPathsAllowed({ + paths: finalTargetPolicyPaths, + allowedConnectionIds: new Set(slConnectionIds), + }); + }); + activeFailureDetails = undefined; +``` + +In `packages/context/src/ingest/index.ts`, export the target policy helpers: + +```ts +export { + assertSemanticLayerTargetPathsAllowed, + findDisallowedSemanticLayerTargetPaths, + semanticLayerConnectionIdFromPath, +} from './semantic-layer-target-policy.js'; +``` + +- [ ] **Step 5: Run patch-integrator tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts +``` + +Expected: PASS. + +- [ ] **Step 6: Commit integration target-policy wiring** + +Run: + +```bash +git add \ + packages/context/src/ingest/isolated-diff/patch-integrator.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/index.ts +git commit -m "fix(ingest): trace isolated SL target policy gates" +``` + +### Task 4: Add end-to-end isolated-diff regressions + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Update the runner test wiki helper** + +In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`, +replace `makeWikiService()` with this implementation: + +```ts +async function listGlobalWikiPageKeys(root: string): Promise { + const dir = join(root, 'wiki/global'); + const entries = await readdir(dir).catch(() => []); + return entries + .filter((entry) => entry.endsWith('.md')) + .map((entry) => entry.slice(0, -'.md'.length)) + .sort(); +} + +function frontmatterList(yaml: string, key: string): string[] { + const pattern = new RegExp(`${key}:\\n((?: - .+\\n?)*)`); + return ( + pattern + .exec(yaml)?.[1] + ?.split('\n') + .map((line) => line.trim().replace(/^- /, '')) + .filter(Boolean) ?? [] + ); +} + +function makeWikiService(root: string) { + return { + listPageKeys: vi.fn(async (scope: string) => (scope === 'GLOBAL' ? listGlobalWikiPageKeys(root) : [])), + readPage: vi.fn(async (_scope: string, _scopeId: string | null, key: string) => { + const path = join(root, 'wiki/global', `${key}.md`); + const raw = await readFile(path, 'utf-8').catch(() => null); + if (!raw) { + return null; + } + const [, yaml = '', content = ''] = /^---\n([\s\S]*?)\n---\n?([\s\S]*)$/.exec(raw) ?? []; + return { + pageKey: key, + frontmatter: { + summary: key, + usage_mode: 'auto', + refs: frontmatterList(yaml, 'refs'), + sl_refs: frontmatterList(yaml, 'sl_refs'), + }, + content: content.trim(), + }; + }), + syncFromCommit: vi.fn(), + }; +} +``` + +Add `readdir` to the first import from `node:fs/promises`: + +```ts +import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'node:fs/promises'; +``` + +- [ ] **Step 2: Add failing cross-WorkUnit wiki ref regression** + +Append this test inside +`describe('IngestBundleRunner isolated diff path', ...)`: + +```ts + it('rejects final wiki refs broken by another accepted WorkUnit before squash', async () => { + const runtime = await makeRealGitRuntime(); + try { + await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(runtime.configDir, 'wiki/global/source-page.md'), + '---\nsummary: Source page\nusage_mode: auto\n---\n\nSource page\n', + ); + await runtime.git.commitFiles(['wiki/global/source-page.md'], 'seed source page', 'KTX Test', 'system@ktx.local'); + const preRunHead = await runtime.git.revParseHead(); + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'page-ref', rawFiles: ['pages/ref.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'page-delete', rawFiles: ['pages/delete.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + if (params.telemetryTags.unitKey === 'page-ref') { + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nrefs:\n - source-page\n---\n\nSee [[source-page]].\n', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'account-segments', + detail: 'Page with wiki ref', + rawPaths: ['pages/ref.json'], + }); + await currentSession.gitService.commitFiles(['wiki/global/account-segments.md'], 'wu page ref', 'KTX Test', 'system@ktx.local'); + } + if (params.telemetryTags.unitKey === 'page-delete') { + await rm(join(root, 'wiki/global/source-page.md'), { force: true }); + currentSession.actions.push({ + target: 'wiki', + type: 'removed', + key: 'source-page', + detail: 'Delete referenced page', + rawPaths: ['pages/delete.json'], + }); + await currentSession.gitService.commitFiles(['wiki/global/source-page.md'], 'wu delete source page', 'KTX Test', 'system@ktx.local'); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [ + ['pages/ref.json', 'h1'], + ['pages/delete.json', 'h2'], + ]); + + await expect( + runner.run({ + jobId: 'job-wiki-ref-conflict', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/wiki references target missing page\(s\): account-segments -> source-page/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-wiki-ref-conflict/trace.jsonl'), 'utf-8'); + expect(trace).toContain('final_artifact_gates_failed'); + expect(trace).toContain('account-segments -> source-page'); + expect(trace).toContain('ingest_failed'); + expect(trace).not.toContain('squash_finished'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 3: Add failing unauthorized WorkUnit patch regression** + +Append this test inside the same `describe(...)` block: + +```ts + it('rejects WorkUnit patches that touch unauthorized semantic-layer target connections', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'finance-source', rawFiles: ['cards/finance.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'semantic-layer/finance'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/finance/orders.yaml'), + 'name: orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures: []\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'finance', 'orders'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'orders', + detail: 'Unauthorized target', + targetConnectionId: 'finance', + rawPaths: ['cards/finance.json'], + }); + await currentSession.gitService.commitFiles(['semantic-layer/finance/orders.yaml'], 'wu unauthorized target', 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/finance.json', 'h1']]); + const preRunHead = await runtime.git.revParseHead(); + + await expect( + runner.run({ + jobId: 'job-unauthorized-wu-target', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/isolated diff textual conflict.*semantic-layer target connection not allowed/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-unauthorized-wu-target/trace.jsonl'), 'utf-8'); + expect(trace).toContain('patch_policy_rejected'); + expect(trace).toContain('semantic-layer/finance/orders.yaml'); + expect(trace).toContain('allowedTargetConnectionIds'); + expect(trace).not.toContain('squash_finished'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 4: Add failing unauthorized reconciliation regression** + +Append this test inside the same `describe(...)` block: + +```ts + it('rejects reconciliation mutations that touch unauthorized semantic-layer target connections before squash', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'valid-page', rawFiles: ['pages/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile(join(root, 'wiki/global/valid-page.md'), '---\nsummary: Valid page\nusage_mode: auto\n---\n\nValid\n'); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'valid-page', + detail: 'Valid page', + rawPaths: ['pages/source.json'], + }); + await currentSession.gitService.commitFiles(['wiki/global/valid-page.md'], 'wu valid page', 'KTX Test', 'system@ktx.local'); + } else { + await mkdir(join(root, 'semantic-layer/finance'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/finance/reconcile_orders.yaml'), + 'name: reconcile_orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures: []\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'finance', 'reconcile_orders'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'reconcile_orders', + detail: 'Unauthorized reconcile target', + targetConnectionId: 'finance', + rawPaths: ['pages/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/finance/reconcile_orders.yaml'], + 'reconcile unauthorized target', + 'KTX Test', + 'system@ktx.local', + ); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['pages/source.json', 'h1']]); + const preRunHead = await runtime.git.revParseHead(); + + await expect( + runner.run({ + jobId: 'job-unauthorized-reconcile-target', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/semantic-layer target connection not allowed/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-unauthorized-reconcile-target/trace.jsonl'), 'utf-8'); + expect(trace).toContain('semantic_layer_target_policy_failed'); + expect(trace).toContain('semantic-layer/finance/reconcile_orders.yaml'); + expect(trace).toContain('ingest_failed'); + expect(trace).not.toContain('squash_finished'); + const failureReport = (deps.reports.create as any).mock.calls + .map((call: any[]) => call[0]) + .find((report: any) => report.body.status === 'failed'); + expect(failureReport.body.failure).toMatchObject({ + phase: 'target_policy', + message: expect.stringContaining('semantic-layer target connection not allowed'), + }); + expect(failureReport.body.failure.details).toMatchObject({ + allowedTargetConnectionIds: ['warehouse'], + touchedPaths: expect.arrayContaining(['semantic-layer/finance/reconcile_orders.yaml']), + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 5: Run failing runner regressions** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + -t "wiki refs broken|unauthorized semantic-layer target" +``` + +Expected before Tasks 1-3 are complete: FAIL. Expected after Tasks 1-3 are +complete: PASS. + +- [ ] **Step 6: Commit runner regressions** + +Run: + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +git commit -m "test(ingest): cover isolated diff reference and target gates" +``` + +### Task 5: Verification and trace acceptance + +**Files:** +- Verify: `packages/context/src/ingest/*` +- Verify: `packages/context/src/ingest/isolated-diff/*` +- Verify: `packages/context/src/sl/tools/*` +- Verify: `packages/context/src/tools/*` + +- [ ] **Step 1: Run the focused isolated-diff and tool suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-trace.test.ts \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/semantic-layer-target-policy.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + src/sl/tools/sl-write-source.tool.test.ts \ + src/sl/tools/sl-edit-source.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run context type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run dead-code check for TypeScript changes** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS, or only pre-existing findings unrelated to the files in this +plan. If there are unrelated pre-existing findings, capture the exact output in +the final handoff. + +- [ ] **Step 4: Run pre-commit for changed files** + +Run: + +```bash +uv run pre-commit run --files \ + packages/context/src/ingest/artifact-gates.ts \ + packages/context/src/ingest/artifact-gates.test.ts \ + packages/context/src/ingest/semantic-layer-target-policy.ts \ + packages/context/src/ingest/semantic-layer-target-policy.test.ts \ + packages/context/src/ingest/isolated-diff/git-patch.ts \ + packages/context/src/ingest/isolated-diff/git-patch.test.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + packages/context/src/ingest/index.ts \ + packages/context/src/tools/action-target-connection.ts \ + packages/context/src/tools/index.ts \ + packages/context/src/sl/tools/sl-write-source.tool.ts \ + packages/context/src/sl/tools/sl-write-source.tool.test.ts \ + packages/context/src/sl/tools/sl-edit-source.tool.ts \ + packages/context/src/sl/tools/sl-edit-source.tool.test.ts +``` + +Expected: PASS. If the repository has no usable pre-commit configuration or the +local `uv` version cannot satisfy the project pin, report the exact failure and +run `pnpm --filter @ktx/context run type-check` plus the Vitest suite above. + +- [ ] **Step 5: Verify persistent trace acceptance criteria** + +Inspect the traces produced by the two new runner failures. The trace must +include these events and fields: + +```text +job-wiki-ref-conflict: +- final_artifact_gates_failed +- ingest_failed +- failure_report_created +- no squash_finished event +- error.message includes "account-segments -> source-page" + +job-unauthorized-wu-target: +- patch_policy_rejected +- ingest_failed +- failure_report_created +- no squash_finished event +- data.allowedTargetConnectionIds includes "warehouse" +- data.touchedPaths includes "semantic-layer/finance/orders.yaml" + +job-unauthorized-reconcile-target: +- semantic_layer_target_policy_started +- semantic_layer_target_policy_failed +- ingest_failed +- failure_report_created +- no squash_finished event +- data.allowedTargetConnectionIds includes "warehouse" +- data.touchedPaths includes "semantic-layer/finance/reconcile_orders.yaml" +- error.message includes "semantic-layer target connection not allowed" +``` + +The failed stored reports for the two target-policy regressions must include: + +```text +failure.phase: +- "integration" for WorkUnit patch policy rejection +- "target_policy" for reconciliation or integration-stage mutation rejection + +failure.details: +- allowedTargetConnectionIds +- touchedPaths +- invalid path and connection in the error message +``` + +- [ ] **Step 6: Commit verification-only fixes if needed** + +If verification exposes formatting, type, or test issues in the files changed +by this plan, fix them and commit: + +```bash +git add \ + packages/context/src/ingest/artifact-gates.ts \ + packages/context/src/ingest/artifact-gates.test.ts \ + packages/context/src/ingest/semantic-layer-target-policy.ts \ + packages/context/src/ingest/semantic-layer-target-policy.test.ts \ + packages/context/src/ingest/isolated-diff/git-patch.ts \ + packages/context/src/ingest/isolated-diff/git-patch.test.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + packages/context/src/ingest/index.ts \ + packages/context/src/tools/action-target-connection.ts \ + packages/context/src/tools/index.ts \ + packages/context/src/sl/tools/sl-write-source.tool.ts \ + packages/context/src/sl/tools/sl-write-source.tool.test.ts \ + packages/context/src/sl/tools/sl-edit-source.tool.ts \ + packages/context/src/sl/tools/sl-edit-source.tool.test.ts +git commit -m "chore(ingest): verify isolated diff gate closure" +``` + +If verification passes without edits, do not create an empty commit. + +## Self-review + +Spec coverage: + +- Wiki `refs` and inline `[[...]]` validation is added to the final global gate + for changed wiki pages in the composed integration tree. +- WorkUnit patch integration rejects unauthorized semantic-layer target + connections before patch application can commit into the integration tree. +- Reconciliation and other integration-stage mutations are checked with a + traced target-policy gate before final artifact gates and before squash. +- SL write/edit tools reject out-of-scope target connections during + session-scoped ingest tool calls. +- Failure traces and failed reports include explicit target-policy context, + rejected paths, allowed connection IDs, failure phase, and no `squash_finished` + event when the run stops before main. + +Placeholder scan: + +- The plan contains no placeholder tokens, deferred implementation notes, or + unspecified edge-case instructions. + +Type consistency: + +- `allowedTargetConnectionIds` is the patch-policy and patch-integrator field. +- `allowedConnectionNames` remains the existing `ToolSession` field. +- `semantic_layer_target_policy_*` is the trace event prefix from `traceTimed()`. +- `refs` is the existing wiki frontmatter field that implements the spec's + wiki-reference gate. diff --git a/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-connector-migration.md b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-connector-migration.md new file mode 100644 index 00000000..cb7b7f53 --- /dev/null +++ b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-connector-migration.md @@ -0,0 +1,1051 @@ +# Isolated Diff Ingestion V1 Connector Migration Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Route Notion, LookML, Looker, dbt, and MetricFlow direct durable-write +ingest through the isolated-diff runner path. + +**Architecture:** Keep isolated-diff routing private and runner-owned by +centralizing the default source-key list outside adapters and public +configuration. The shared runner continues to own per-work-unit child +worktrees, patch integration, gates, repair, traces, and reports. MetricFlow +also gets its deterministic semantic-model import moved into the adapter +projector hook so those authoritative writes land in the integration worktree +before child worktrees are created. + +**Tech Stack:** TypeScript ESM/NodeNext, Vitest, simple-git, existing +`IngestBundleRunner`, `SessionWorktreeService`, `MetricflowSourceAdapter`, +`importMetricflowSemanticModels()`, and local ingest runtime wiring. + +--- + +## Audit summary + +This audit read +`docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md`, all +implemented isolated-diff plans from May 17 and May 18, and the current runner +and adapter code under `packages/context/src/ingest/`. + +Implemented v1 safety plans: + +- `2026-05-17-isolated-diff-ingestion-v1-core.md`: core isolated worktrees, + patch proposals, integration, trace storage, body-reference parsing, and the + Metabase stale-measure regression exist in code. +- `2026-05-17-isolated-diff-ingestion-v1-gates-and-trace-closure.md`: final + gates run after reconciliation and later mutating stages, child worktrees are + cleaned up, failed reports are stored, and traces cover postmortem phases. +- `2026-05-17-isolated-diff-ingestion-v1-provenance-gate-closure.md`: + provenance validation runs before squash. +- `2026-05-17-isolated-diff-ingestion-v1-reference-and-target-gate-closure.md`: + final wiki reference gates, semantic-layer target policy, and patch target + checks exist. +- `2026-05-17-isolated-diff-ingestion-v1-global-wiki-reference-gate-closure.md`: + global wiki reference scope expands when semantic-layer sources change or + wiki pages are removed. +- `2026-05-18-isolated-diff-ingestion-v1-textual-conflict-resolver.md`: + bounded textual conflict repair exists and is wired into patch integration. +- `2026-05-18-isolated-diff-ingestion-v1-gate-repair.md`: bounded repair for + cleanly applied patch and final artifact gate failures exists. + +Current v1-blocking gaps: + +- `packages/context/src/ingest/local-bundle-runtime.ts` still sets + `isolatedDiffSourceKeys: ['metabase']`, so Notion, LookML, Looker, dbt, and + MetricFlow still use the old shared-worktree WorkUnit path by default. +- `packages/context/src/ingest/ingest-bundle.runner.ts` still contains the + shared-worktree fallback branch. That branch must remain until connector + migration and default promotion finish, but the other direct durable-write + connectors must stop taking it. +- There is no regression matrix proving the five non-Metabase connector source + keys route through child worktrees and produce `isolatedDiff` report data. +- MetricFlow has `importMetricflowSemanticModels()` but + `MetricflowSourceAdapter` does not expose it as `project()`. The spec says + MetricFlow's deterministic semantic-model import becomes an ingestion + projector, not a post-WorkUnit shared-worktree write. + +Later v1-blocking gaps after this plan: + +- Promote isolated diffs to the default once the Metabase regression and at + least one non-Metabase connector pass are green. +- Remove the old shared-worktree WorkUnit execution path after the default path + is promoted. + +Non-blocking gaps: + +- Deterministic semantic merge helpers from rollout step 9. +- Transitive SQL-projection dependency expansion beyond direct declared joins. +- Moving provenance rows into worktree files. +- Public connector knobs such as `executionMode`, `planningStrategy`, or + `conflictPolicy`. +- Resolver context expansion to include richer transcript excerpts and every + previously applied overlapping patch. + +## File structure + +- Create `packages/context/src/ingest/isolated-diff/source-routing.ts`. + Owns the private runner default source-key list for direct durable-write + connectors. +- Create `packages/context/src/ingest/isolated-diff/source-routing.test.ts`. + Locks the internal list to Metabase plus the five migrated connectors. +- Modify `packages/context/src/ingest/local-bundle-runtime.ts`. + Uses the centralized isolated-diff source-key list instead of the Metabase-only + inline array. +- Modify `packages/context/src/ingest/local-bundle-runtime.test.ts`. + Verifies local ingest runtime deps enable isolated routing for the migrated + connector list. +- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Adds a non-Metabase source-key routing matrix that proves direct writes run in + isolated child worktrees and report `isolatedDiff` metadata. +- Modify `packages/context/src/ingest/types.ts`. + Adds the semantic-layer service to `DeterministicProjectionContext` so + adapter projectors can write to the integration worktree. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. + Passes the semantic-layer service into adapter projectors. +- Create `packages/context/src/ingest/adapters/metricflow/projection-config.ts`. + Persists and reads MetricFlow projection metadata from the staged snapshot and + converts parsed target-table mappings into importer host-table inputs. +- Modify `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts`. + Writes projection metadata during fetch and implements `project()` via + `importMetricflowSemanticModels()`. +- Modify `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts`. + Covers projection metadata persistence and the adapter projector. +- Modify `packages/context/src/ingest/local-bundle-ingest.test.ts`. + Verifies local MetricFlow ingest takes the isolated path and records a + projection commit. + +--- + +### Task 1: Centralize runner-owned connector routing + +**Files:** +- Create: `packages/context/src/ingest/isolated-diff/source-routing.ts` +- Create: `packages/context/src/ingest/isolated-diff/source-routing.test.ts` +- Modify: `packages/context/src/ingest/local-bundle-runtime.ts` +- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts` + +- [ ] **Step 1: Write the failing routing tests** + +Create `packages/context/src/ingest/isolated-diff/source-routing.test.ts`: + +```ts +import { describe, expect, it } from 'vitest'; +import { + defaultIsolatedDiffSourceKeys, + isIsolatedDiffDirectWriteSourceKey, + ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEYS, +} from './source-routing.js'; + +describe('isolated-diff source routing', () => { + it('keeps the runner-owned direct-write connector list explicit', () => { + expect(ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEYS).toEqual([ + 'metabase', + 'notion', + 'lookml', + 'looker', + 'dbt', + 'metricflow', + ]); + }); + + it('returns a mutable copy for runtime settings', () => { + const keys = defaultIsolatedDiffSourceKeys(); + keys.push('fake'); + + expect(defaultIsolatedDiffSourceKeys()).toEqual([ + 'metabase', + 'notion', + 'lookml', + 'looker', + 'dbt', + 'metricflow', + ]); + }); + + it('recognizes migrated connector source keys only', () => { + expect(isIsolatedDiffDirectWriteSourceKey('notion')).toBe(true); + expect(isIsolatedDiffDirectWriteSourceKey('metricflow')).toBe(true); + expect(isIsolatedDiffDirectWriteSourceKey('historic-sql')).toBe(false); + expect(isIsolatedDiffDirectWriteSourceKey('live-database')).toBe(false); + }); +}); +``` + +In `packages/context/src/ingest/local-bundle-runtime.test.ts`, add this helper +type near the existing runtime helper types: + +```ts +type RuntimeWithSettingsDeps = { + deps: { + settings: { + isolatedDiffSourceKeys?: string[]; + }; + }; +}; +``` + +Then append this test inside `describe('createLocalBundleIngestRuntime', ...)`: + +```ts + it('enables isolated-diff routing for direct durable-write connectors', () => { + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + agentRunner: testAgentRunner(), + }); + + const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings; + + expect(settings.isolatedDiffSourceKeys).toEqual([ + 'metabase', + 'notion', + 'lookml', + 'looker', + 'dbt', + 'metricflow', + ]); + }); +``` + +- [ ] **Step 2: Run the failing routing tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/source-routing.test.ts src/ingest/local-bundle-runtime.test.ts -t "isolated-diff source routing|direct durable-write connectors" +``` + +Expected: FAIL because `source-routing.ts` does not exist and local runtime +still uses only `['metabase']`. + +- [ ] **Step 3: Add centralized routing code** + +Create `packages/context/src/ingest/isolated-diff/source-routing.ts`: + +```ts +export const ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEYS = [ + 'metabase', + 'notion', + 'lookml', + 'looker', + 'dbt', + 'metricflow', +] as const; + +export type IsolatedDiffDirectWriteSourceKey = (typeof ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEYS)[number]; + +const ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEY_SET = new Set(ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEYS); + +export function defaultIsolatedDiffSourceKeys(): string[] { + return [...ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEYS]; +} + +export function isIsolatedDiffDirectWriteSourceKey( + sourceKey: string, +): sourceKey is IsolatedDiffDirectWriteSourceKey { + return ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEY_SET.has(sourceKey); +} +``` + +In `packages/context/src/ingest/local-bundle-runtime.ts`, add this import: + +```ts +import { defaultIsolatedDiffSourceKeys } from './isolated-diff/source-routing.js'; +``` + +Then replace the settings value: + +```ts + isolatedDiffSourceKeys: ['metabase'], +``` + +with: + +```ts + isolatedDiffSourceKeys: defaultIsolatedDiffSourceKeys(), +``` + +- [ ] **Step 4: Run the routing tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/source-routing.test.ts src/ingest/local-bundle-runtime.test.ts -t "isolated-diff source routing|direct durable-write connectors" +``` + +Expected: PASS. + +- [ ] **Step 5: Commit routing changes** + +Run: + +```bash +git add packages/context/src/ingest/isolated-diff/source-routing.ts \ + packages/context/src/ingest/isolated-diff/source-routing.test.ts \ + packages/context/src/ingest/local-bundle-runtime.ts \ + packages/context/src/ingest/local-bundle-runtime.test.ts +git commit -m "feat(ingest): route direct-write connectors through isolated diffs" +``` + +Expected: commit is created with only the routing files. + +--- + +### Task 2: Add non-Metabase isolated routing regressions + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Write the failing non-Metabase routing matrix** + +In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`, +add this import: + +```ts +import { defaultIsolatedDiffSourceKeys } from './isolated-diff/source-routing.js'; +``` + +Change `makeDeps()` to accept a source key: + +```ts +function makeDeps(runtime: Awaited>, sourceKey = 'metabase') { + const adapter: any = { + source: sourceKey, + skillNames: [], + detect: vi.fn().mockResolvedValue(true), + chunk: vi.fn().mockResolvedValue({ + workUnits: [ + { unitKey: 'card-wiki', rawFiles: ['cards/wiki.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'card-source', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }), + }; +``` + +In the same helper, replace the settings block with: + +```ts + settings: { + memoryIngestionModel: 'test', + probeRowCount: 1, + isolatedDiffSourceKeys: defaultIsolatedDiffSourceKeys(), + ingestTraceLevel: 'trace', + }, +``` + +Change `mockStageRawFiles()` to accept the source key: + +```ts +async function mockStageRawFiles( + runner: IngestBundleRunner, + runtime: Awaited>, + hashes: [string, string][], + sourceKey = 'metabase', +) { + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue(join(runtime.homeDir, 'stage')); + (runner as any).stageRawFilesStage1 = vi.fn(async ({ worktreeRoot }: any) => { + const rawDir = join(worktreeRoot, 'raw-sources/warehouse', sourceKey, 's'); + await mkdir(rawDir, { recursive: true }); + for (const [rawPath] of hashes) { + await mkdir(join(rawDir, rawPath.split('/').slice(0, -1).join('/')), { recursive: true }); + await writeFile(join(rawDir, rawPath), '{}'); + } + return { currentHashes: new Map(hashes), rawDirInWorktree: `raw-sources/warehouse/${sourceKey}/s` }; + }); +} +``` + +Append this test inside `describe('IngestBundleRunner isolated diff path', ...)`: + +```ts + it.each(['notion', 'lookml', 'looker', 'dbt', 'metricflow'] as const)( + 'routes %s direct writes through isolated child worktrees', + async (sourceKey) => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime, sourceKey); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: `${sourceKey}-wiki`, + rawFiles: [`${sourceKey}/page.json`], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global', `${sourceKey}-isolated.md`), + `---\nsummary: ${sourceKey} isolated write\nusage_mode: auto\n---\n\nIsolated ${sourceKey} write.\n`, + 'utf-8', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: `${sourceKey}-isolated`, + detail: `${sourceKey} isolated write`, + rawPaths: [`${sourceKey}/page.json`], + }); + await currentSession.gitService.commitFiles( + [`wiki/global/${sourceKey}-isolated.md`], + `${sourceKey} wiki`, + 'KTX Test', + 'system@ktx.local', + ); + + expect(params.telemetryTags).toMatchObject({ + operationName: 'ingest-bundle-wu', + source: sourceKey, + unitKey: `${sourceKey}-wiki`, + }); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [[`${sourceKey}/page.json`, 'h1']], sourceKey); + + await expect( + runner.run({ + jobId: `job-${sourceKey}`, + connectionId: 'warehouse', + sourceKey, + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).resolves.toMatchObject({ + jobId: `job-${sourceKey}`, + failedWorkUnits: [], + workUnitCount: 1, + }); + + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces', `job-${sourceKey}`, 'trace.jsonl'), 'utf-8'); + expect(trace).toContain('isolated_diff_enabled'); + expect(trace).toContain('work_unit_child_created'); + expect(trace).toContain('work_unit_patch_collected'); + expect(trace).toContain('patch_apply_started'); + expect(trace).not.toContain('shared_worktree_path_enabled'); + + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0]; + expect(reportCreate?.body.isolatedDiff).toMatchObject({ + enabled: true, + acceptedPatches: 1, + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }, + ); +``` + +- [ ] **Step 2: Run the non-Metabase routing matrix** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "routes .* direct writes" +``` + +Expected: PASS after Task 1. If it fails, the failure must point to one of +these concrete problems: settings do not include the source key, the shared path +still runs, or the final report lacks `isolatedDiff`. + +- [ ] **Step 3: Commit runner regression coverage** + +Run: + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +git commit -m "test(ingest): cover non-metabase isolated diff routing" +``` + +Expected: commit contains only the isolated runner regression file. + +--- + +### Task 3: Move MetricFlow deterministic import into projection + +**Files:** +- Modify: `packages/context/src/ingest/types.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Create: `packages/context/src/ingest/adapters/metricflow/projection-config.ts` +- Modify: `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts` +- Modify: `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts` + +- [ ] **Step 1: Write failing MetricFlow projector tests** + +In `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts`, +add these imports: + +```ts +import type { MetricFlowParseResult } from './deep-parse.js'; +import { readMetricflowProjectionConfig, writeMetricflowProjectionConfig } from './projection-config.js'; +``` + +Add this helper near the top of the file: + +```ts +function metricflowParseResult(): MetricFlowParseResult { + return { + semanticModels: [ + { + name: 'orders', + description: 'Orders', + modelRef: 'orders', + dimensions: [{ name: 'status', column: 'status', type: 'string', label: 'Status' }], + measures: [{ type: 'simple', name: 'order_count', column: 'id', aggregation: 'count' }], + entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }], + defaultTimeDimension: null, + }, + ], + crossModelMetrics: [], + relationships: [], + warnings: ['parser warning'], + }; +} +``` + +Append these tests inside `describe('MetricflowSourceAdapter', ...)`: + +```ts + it('persists parsed target tables for deterministic projection during fetch', async () => { + const repo = await makeRepo(tmpRoot, { + 'dbt_project.yml': 'name: analytics\n', + 'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n', + }); + + await adapter.fetch?.( + { + repoUrl: repo.repoUrl, + branch: 'main', + path: null, + authToken: null, + parsedTargetTables: { + orders: { + ok: true, + catalog: null, + schema: 'analytics', + name: 'orders', + canonicalTable: 'analytics.orders', + }, + }, + }, + stagedDir, + { connectionId: 'warehouse-1', sourceKey: 'metricflow' }, + ); + + await expect(readMetricflowProjectionConfig(stagedDir)).resolves.toMatchObject({ + parsedTargetTables: { + orders: { + ok: true, + schema: 'analytics', + name: 'orders', + }, + }, + }); + }); + + it('projects parsed MetricFlow semantic models in the integration worktree', async () => { + await writeMetricflowProjectionConfig(stagedDir, { + parsedTargetTables: { + orders: { + ok: true, + catalog: null, + schema: 'analytics', + name: 'orders', + canonicalTable: 'analytics.orders', + }, + }, + }); + const scoped = { + getManifestEntry: vi.fn().mockResolvedValue(null), + isManifestBacked: vi.fn().mockResolvedValue(false), + loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }), + loadSource: vi.fn().mockResolvedValue(null), + writeSource: vi.fn().mockResolvedValue({ warnings: [] }), + }; + const semanticLayerService = { + forWorktree: vi.fn().mockReturnValue(scoped), + getManifestEntry: vi.fn(), + isManifestBacked: vi.fn(), + loadAllSources: vi.fn(), + loadSource: vi.fn(), + writeSource: vi.fn(), + }; + + const result = await adapter.project?.({ + connectionId: 'warehouse-1', + sourceKey: 'metricflow', + syncId: 'sync-1', + jobId: 'job-1', + runId: 'run-1', + stagedDir, + workdir: '/tmp/metricflow-integration', + parseArtifacts: metricflowParseResult(), + semanticLayerService: semanticLayerService as never, + }); + + expect(semanticLayerService.forWorktree).toHaveBeenCalledWith('/tmp/metricflow-integration'); + expect(scoped.writeSource).toHaveBeenCalledWith( + 'warehouse-1', + expect.objectContaining({ name: 'orders' }), + 'dbt MetricFlow', + expect.any(String), + 'dbt MetricFlow sync: create source orders', + { skipValidation: true }, + ); + expect(result).toMatchObject({ + warnings: ['parser warning'], + errors: [], + touchedSources: [{ connectionId: 'warehouse-1', sourceName: 'orders' }], + changedWikiPageKeys: [], + }); + }); + + it('returns a projection error when parse artifacts are missing', async () => { + const result = await adapter.project?.({ + connectionId: 'warehouse-1', + sourceKey: 'metricflow', + syncId: 'sync-1', + jobId: 'job-1', + runId: 'run-1', + stagedDir, + workdir: '/tmp/metricflow-integration', + parseArtifacts: undefined, + semanticLayerService: {} as never, + }); + + expect(result).toMatchObject({ + warnings: [], + errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'], + touchedSources: [], + changedWikiPageKeys: [], + }); + }); +``` + +- [ ] **Step 2: Run the failing MetricFlow projector tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/adapters/metricflow/metricflow.adapter.test.ts -t "deterministic projection|projects parsed|parse artifacts" +``` + +Expected: FAIL because `projection-config.ts` and `adapter.project()` do not +exist. + +- [ ] **Step 3: Add projector service context** + +In `packages/context/src/ingest/types.ts`, add this import: + +```ts +import type { SemanticLayerService } from '../sl/index.js'; +``` + +Then extend `DeterministicProjectionContext`: + +```ts +export interface DeterministicProjectionContext { + connectionId: string; + sourceKey: string; + syncId: string; + jobId: string; + runId: string; + stagedDir: string; + workdir: string; + parseArtifacts?: unknown; + semanticLayerService: SemanticLayerService; +} +``` + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, add this property to +the `adapter.project!({ ... })` call: + +```ts + semanticLayerService: this.deps.semanticLayerService, +``` + +- [ ] **Step 4: Add MetricFlow projection config helpers** + +Create `packages/context/src/ingest/adapters/metricflow/projection-config.ts`: + +```ts +import { readFile, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { z } from 'zod'; +import { parsedTargetTableSchema, type ParsedTargetTable } from '../../parsed-target-table.js'; +import type { MetricflowHostTable } from './semantic-models.js'; + +export const METRICFLOW_PROJECTION_CONFIG_FILE = 'sync-config.json'; + +export const metricflowProjectionConfigSchema = z.object({ + parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}), +}); + +export type MetricflowProjectionConfig = z.infer; + +export async function writeMetricflowProjectionConfig( + stagedDir: string, + config: MetricflowProjectionConfig, +): Promise { + const parsed = metricflowProjectionConfigSchema.parse(config); + await writeFile(join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE), `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8'); +} + +export async function readMetricflowProjectionConfig(stagedDir: string): Promise { + const path = join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE); + try { + return metricflowProjectionConfigSchema.parse(JSON.parse(await readFile(path, 'utf-8'))); + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return { parsedTargetTables: {} }; + } + throw error; + } +} + +export function metricflowHostTablesFromParsedTargets( + parsedTargetTables: Record, +): MetricflowHostTable[] { + return Object.entries(parsedTargetTables) + .flatMap(([id, table]) => + table.ok + ? [ + { + id, + name: table.name, + catalog: table.catalog, + db: table.schema, + columns: [], + }, + ] + : [], + ) + .sort((left, right) => left.id.localeCompare(right.id)); +} +``` + +- [ ] **Step 5: Implement MetricFlow adapter projection** + +In `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts`, +replace the type import with: + +```ts +import type { + ChunkResult, + DeterministicProjectionContext, + DiffSet, + FetchContext, + ProjectionResult, + SourceAdapter, +} from '../../types.js'; +``` + +Add these imports: + +```ts +import { importMetricflowSemanticModels } from './import-semantic-models.js'; +import { + metricflowHostTablesFromParsedTargets, + readMetricflowProjectionConfig, + writeMetricflowProjectionConfig, +} from './projection-config.js'; +``` + +After `await fetchMetricflowRepo({ ... })` in `fetch()`, persist projection +metadata: + +```ts + await writeMetricflowProjectionConfig(stagedDir, { + parsedTargetTables: config.parsedTargetTables, + }); +``` + +Add this method to `MetricflowSourceAdapter`: + +```ts + async project(ctx: DeterministicProjectionContext): Promise { + if (!isMetricFlowParseResult(ctx.parseArtifacts)) { + return { + warnings: [], + errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'], + touchedSources: [], + changedWikiPageKeys: [], + }; + } + + const projectionConfig = await readMetricflowProjectionConfig(ctx.stagedDir); + const result = await importMetricflowSemanticModels( + { semanticLayerService: ctx.semanticLayerService }, + { + connectionId: ctx.connectionId, + parseResult: ctx.parseArtifacts, + targetSchema: null, + hostTables: metricflowHostTablesFromParsedTargets(projectionConfig.parsedTargetTables), + workdir: ctx.workdir, + }, + ); + + return { + result, + warnings: result.warnings, + errors: result.errors, + touchedSources: result.touchedSources, + changedWikiPageKeys: [], + }; + } +``` + +Add this helper below `parseMetricflowStagedDirForImport()`: + +```ts +function isMetricFlowParseResult(value: unknown): value is MetricFlowParseResult { + if (!value || typeof value !== 'object') { + return false; + } + const candidate = value as Partial; + return ( + Array.isArray(candidate.semanticModels) && + Array.isArray(candidate.crossModelMetrics) && + Array.isArray(candidate.relationships) && + Array.isArray(candidate.warnings) + ); +} +``` + +- [ ] **Step 6: Run the MetricFlow projector tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/adapters/metricflow/metricflow.adapter.test.ts -t "deterministic projection|projects parsed|parse artifacts" +``` + +Expected: PASS. + +- [ ] **Step 7: Commit MetricFlow projection changes** + +Run: + +```bash +git add packages/context/src/ingest/types.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/adapters/metricflow/projection-config.ts \ + packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts \ + packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts +git commit -m "feat(ingest): project metricflow semantic models before work units" +``` + +Expected: commit contains only MetricFlow projector and projector context files. + +--- + +### Task 4: Verify MetricFlow takes the isolated path locally + +**Files:** +- Modify: `packages/context/src/ingest/local-bundle-ingest.test.ts` + +- [ ] **Step 1: Add local MetricFlow isolated projection assertions** + +In +`packages/context/src/ingest/local-bundle-ingest.test.ts`, update the existing +`runs full MetricFlow local ingest from a dbt repo fixture through the canonical +runner` test after the report assertions: + +```ts + expect(result.report.body.isolatedDiff).toMatchObject({ + enabled: true, + acceptedPatches: 0, + projectionSha: expect.any(String), + }); + + const projectedSourcePath = join(metricflowProject.projectDir, 'semantic-layer/warehouse/orders.yaml'); + await expect(readFile(projectedSourcePath, 'utf-8')).resolves.toContain('name: orders'); +``` + +Keep the existing `expect(agentRunner.runLoop).toHaveBeenCalledTimes(1);` +assertion. It proves the connector remains hybrid: deterministic projection +runs first, then the MetricFlow WorkUnit still runs for agent-authored wiki or +enrichment work. + +- [ ] **Step 2: Run the local MetricFlow acceptance test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-ingest.test.ts -t "runs full MetricFlow local ingest" +``` + +Expected: PASS. The report body must include `isolatedDiff.enabled: true`, and +the final project must contain `semantic-layer/warehouse/orders.yaml`. + +- [ ] **Step 3: Commit local acceptance coverage** + +Run: + +```bash +git add packages/context/src/ingest/local-bundle-ingest.test.ts +git commit -m "test(ingest): verify metricflow isolated projection path" +``` + +Expected: commit contains only the local bundle ingest acceptance test. + +--- + +### Task 5: Final verification + +**Files:** +- Verify: `packages/context/src/ingest/isolated-diff/source-routing.ts` +- Verify: `packages/context/src/ingest/local-bundle-runtime.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Verify: `packages/context/src/ingest/types.ts` +- Verify: `packages/context/src/ingest/adapters/metricflow/*` +- Verify: `packages/context/src/ingest/*.test.ts` + +- [ ] **Step 1: Run focused connector migration tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/isolated-diff/source-routing.test.ts \ + src/ingest/local-bundle-runtime.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + src/ingest/adapters/metricflow/metricflow.adapter.test.ts \ + src/ingest/local-bundle-ingest.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run the isolated-diff safety suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-trace.test.ts \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/semantic-layer-target-policy.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/isolated-diff/textual-conflict-resolver.test.ts \ + src/ingest/isolated-diff/source-routing.test.ts \ + src/ingest/final-gate-repair.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + src/ingest/report-snapshot.test.ts \ + src/sl/tools/sl-write-source.tool.test.ts \ + src/sl/tools/sl-edit-source.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Run package type checks** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 4: Run dead-code checks** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS, or only pre-existing findings unrelated to this connector +migration. + +- [ ] **Step 5: Run formatting and diff checks** + +Run: + +```bash +pnpm exec biome check \ + packages/context/src/ingest/isolated-diff/source-routing.ts \ + packages/context/src/ingest/isolated-diff/source-routing.test.ts \ + packages/context/src/ingest/local-bundle-runtime.ts \ + packages/context/src/ingest/local-bundle-runtime.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + packages/context/src/ingest/types.ts \ + packages/context/src/ingest/adapters/metricflow/projection-config.ts \ + packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts \ + packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts \ + packages/context/src/ingest/local-bundle-ingest.test.ts +git diff --check +``` + +Expected: PASS. + +- [ ] **Step 6: Decide docs-site impact** + +No `docs-site/content/docs/` update is required for this plan because it +changes an internal ingest correctness route and does not add, remove, or rename +public CLI commands, flags, config fields, or connector setup instructions. + +- [ ] **Step 7: Commit verification fixes only when files changed** + +If verification required formatting or type-only edits, run: + +```bash +git add packages/context/src/ingest docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-connector-migration.md +git commit -m "chore(ingest): verify isolated diff connector migration" +``` + +Expected: no empty commit. If no files changed during verification, leave the +branch at the previous task commit. + +## Self-review + +Spec coverage: + +- Rollout step 8 is covered for Notion, LookML, Looker, dbt, and MetricFlow by + the centralized source-key routing and the non-Metabase isolated runner + regression matrix. +- The connector migration notes remain source-shaped: adapters keep fetch, + chunk, clustering, target resolution, and domain rules; the runner owns + execution isolation and gates. +- MetricFlow's existing deterministic semantic-model import moves into + `project()`, so its authoritative writes happen in the integration worktree + before child worktrees are created. +- Notion clustering remains adapter logic; the routing change only changes where + WorkUnits execute. +- LookML `slDisallowed` remains adapter-scoped and continues to be enforced by + existing scoped tools and integration patch policy. +- Default promotion and old shared-worktree path removal remain later rollout + steps and are not implemented by this plan. + +Placeholder scan: + +- No deferred implementation markers remain. +- Every code-changing step includes exact paths, commands, expected outcomes, + and concrete code or insertion snippets. + +Type consistency: + +- The routing helper names are `ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEYS`, + `defaultIsolatedDiffSourceKeys()`, and + `isIsolatedDiffDirectWriteSourceKey()` across code and tests. +- The MetricFlow projection config helper names are + `writeMetricflowProjectionConfig()`, `readMetricflowProjectionConfig()`, and + `metricflowHostTablesFromParsedTargets()`. +- `DeterministicProjectionContext.semanticLayerService` is passed by + `IngestBundleRunner` and consumed by `MetricflowSourceAdapter.project()`. diff --git a/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-default-promotion.md b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-default-promotion.md new file mode 100644 index 00000000..dc993557 --- /dev/null +++ b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-default-promotion.md @@ -0,0 +1,754 @@ +# Isolated Diff Ingestion V1 Default Promotion Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Promote isolated-diff WorkUnit execution to the default ingest runner +path while keeping the old shared-worktree branch reachable by an explicit +private fallback setting for the final cleanup rollout. + +**Architecture:** The runner stops asking whether a source is on an +isolated-diff allowlist. Instead, non-override bundle ingests use isolated +diffs unless the private settings object lists the source in +`sharedWorktreeSourceKeys`. Local runtime defaults that fallback list to empty, +and tests keep the old path covered with an explicit legacy source setting so +rollout step 11 can delete it safely. + +**Tech Stack:** TypeScript ESM/NodeNext, Vitest, pnpm workspace commands, +existing `IngestBundleRunner`, `IngestSettingsPort`, local ingest runtime, and +isolated-diff runner tests. + +--- + +## Audit summary + +This audit read the original spec at +`docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md`, all +plans matching +`docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-*.md` and +`docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-*.md`, and the +current ingest runner code under `packages/context/src/ingest/`. + +Implemented v1 rollout coverage: + +- Rollout steps 1 and 2 are implemented by the core plan: child worktrees, + binary no-rename patch proposals, and `git apply --3way --index` + integration exist. +- Rollout step 3 is implemented by the textual conflict resolver plan: + `textual-conflict-resolver.ts` is wired through `patch-integrator.ts`. +- Rollout steps 4, 5, and 6 are implemented by the gates, provenance, + reference, global wiki, and gate-repair plans: final gates, persistent traces, + failure reports, provenance validation, target policy, and repair counters + exist. +- Rollout step 7 is implemented by the core and follow-up plans: Metabase has + isolated-diff stale-reference regression coverage. +- Rollout step 8 is implemented by + `2026-05-18-isolated-diff-ingestion-v1-connector-migration.md` and the + follow-up commits: Notion, LookML, Looker, dbt, and MetricFlow route through + isolated child worktrees, and MetricFlow projection runs before WorkUnits. + +Current v1-blocking gaps: + +- Rollout step 10 is not complete. `IngestBundleRunner.isIsolatedDiffEnabled()` + still checks `settings.isolatedDiffSourceKeys`, and + `local-bundle-runtime.ts` still installs the internal allowlist returned by + `defaultIsolatedDiffSourceKeys()`. +- Rollout step 11 remains blocked until step 10 lands. The old + shared-worktree WorkUnit branch is still present and must stay reachable in + this plan for final cleanup validation. + +Non-blocking gaps: + +- Rollout step 9 deterministic semantic merge helpers remain intentionally + deferred until v1 resolver metrics show frequent mechanical repairs. +- Transitive SQL-projection dependency expansion remains outside v1; current + gates cover direct declared join neighbors. +- Moving provenance into worktree files remains outside v1; the implemented + source of truth is the ingest provenance store and report body. +- Public connector knobs such as `executionMode`, `planningStrategy`, and + `conflictPolicy` remain non-goals and must not be added. +- Richer resolver context, such as full transcript excerpts for every + overlapping patch, can be evaluated after the default path has production + traces. + +## File structure + +- Modify `packages/context/src/ingest/isolated-diff/source-routing.ts`. + Replace the isolated-diff direct-write allowlist with an empty default + shared-worktree fallback list. +- Modify `packages/context/src/ingest/isolated-diff/source-routing.test.ts`. + Lock the fallback list semantics and remove direct-write allowlist + assertions. +- Modify `packages/context/src/ingest/ports.ts`. + Replace `isolatedDiffSourceKeys?: string[]` with + `sharedWorktreeSourceKeys?: string[]` on the private runner settings port. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. + Make isolated diff the default for non-override runs and route to the old + shared branch only when `sharedWorktreeSourceKeys` contains the source. +- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Prove an unlisted source uses isolated diffs by default and prove an + explicit fallback source can still reach the shared-worktree branch. +- Modify `packages/context/src/ingest/local-bundle-runtime.ts`. + Install the new empty fallback list instead of the old isolated-diff + allowlist. +- Modify `packages/context/src/ingest/local-bundle-runtime.test.ts`. + Assert local runtime settings do not expose `isolatedDiffSourceKeys` and do + default `sharedWorktreeSourceKeys` to `[]`. + +--- + +### Task 1: Replace source routing semantics + +**Files:** +- Modify: `packages/context/src/ingest/isolated-diff/source-routing.test.ts` +- Modify: `packages/context/src/ingest/isolated-diff/source-routing.ts` +- Modify: `packages/context/src/ingest/ports.ts` + +- [ ] **Step 1: Write the failing source-routing tests** + +Replace `packages/context/src/ingest/isolated-diff/source-routing.test.ts` with: + +```ts +import { describe, expect, it } from 'vitest'; +import { defaultSharedWorktreeSourceKeys, isSharedWorktreeFallbackSourceKey } from './source-routing.js'; + +describe('isolated-diff source routing', () => { + it('defaults every non-override source to isolated diffs', () => { + expect(defaultSharedWorktreeSourceKeys()).toEqual([]); + }); + + it('returns a mutable copy for runtime settings', () => { + const keys = defaultSharedWorktreeSourceKeys(); + keys.push('legacy-source'); + + expect(defaultSharedWorktreeSourceKeys()).toEqual([]); + }); + + it('recognizes only explicitly configured shared-worktree fallback sources', () => { + expect(isSharedWorktreeFallbackSourceKey('notion', [])).toBe(false); + expect(isSharedWorktreeFallbackSourceKey('metricflow', [])).toBe(false); + expect(isSharedWorktreeFallbackSourceKey('legacy-source', ['legacy-source'])).toBe(true); + expect(isSharedWorktreeFallbackSourceKey('other-source', ['legacy-source'])).toBe(false); + }); +}); +``` + +- [ ] **Step 2: Run the source-routing tests to verify they fail** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/source-routing.test.ts +``` + +Expected: FAIL because `defaultSharedWorktreeSourceKeys()` and +`isSharedWorktreeFallbackSourceKey()` are not exported yet. + +- [ ] **Step 3: Rewrite the routing helper** + +Replace `packages/context/src/ingest/isolated-diff/source-routing.ts` with: + +```ts +const DEFAULT_SHARED_WORKTREE_SOURCE_KEYS: readonly string[] = []; + +export function defaultSharedWorktreeSourceKeys(): string[] { + return [...DEFAULT_SHARED_WORKTREE_SOURCE_KEYS]; +} + +export function isSharedWorktreeFallbackSourceKey( + sourceKey: string, + sharedWorktreeSourceKeys: readonly string[] = DEFAULT_SHARED_WORKTREE_SOURCE_KEYS, +): boolean { + return sharedWorktreeSourceKeys.includes(sourceKey); +} +``` + +- [ ] **Step 4: Rename the private settings field** + +In `packages/context/src/ingest/ports.ts`, replace the +`IngestSettingsPort` interface with: + +```ts +export interface IngestSettingsPort { + memoryIngestionModel: string; + probeRowCount: number; + workUnitMaxConcurrency?: number; + workUnitStepBudget?: number; + workUnitFailureMode?: 'abort' | 'continue'; + sharedWorktreeSourceKeys?: string[]; + ingestTraceLevel?: IngestTraceLevel; +} +``` + +- [ ] **Step 5: Run the source-routing tests again** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/source-routing.test.ts +``` + +Expected: PASS. + +- [ ] **Step 6: Commit routing semantics** + +Run: + +```bash +git add packages/context/src/ingest/isolated-diff/source-routing.ts \ + packages/context/src/ingest/isolated-diff/source-routing.test.ts \ + packages/context/src/ingest/ports.ts +git commit -m "feat(ingest): make isolated diff routing the private default" +``` + +### Task 2: Promote the runner default + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` + +- [ ] **Step 1: Update the isolated runner test imports and harness** + +In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`, +replace the source-routing import with: + +```ts +import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js'; +``` + +Then change the `makeDeps()` signature and `settings` block to: + +```ts +function makeDeps( + runtime: Awaited>, + sourceKey = 'metabase', + settings: Partial = {}, +) { +``` + +```ts + settings: { + memoryIngestionModel: 'test', + probeRowCount: 1, + sharedWorktreeSourceKeys: defaultSharedWorktreeSourceKeys(), + ingestTraceLevel: 'trace', + ...settings, + }, +``` + +- [ ] **Step 2: Add the default-promotion regression tests** + +Insert these tests inside +`describe('IngestBundleRunner isolated diff path', ...)`, before the existing +non-Metabase routing matrix: + +```ts + it('routes an unlisted direct-writing source through isolated diffs by default', async () => { + const runtime = await makeRealGitRuntime(); + try { + const sourceKey = 'custom-direct-source'; + const { deps, adapter } = makeDeps(runtime, sourceKey); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'custom-wiki', + rawFiles: ['custom/page.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName !== 'ingest-bundle-wu') { + return { stopReason: 'natural' }; + } + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/custom-isolated.md'), + '---\nsummary: Custom isolated write\nusage_mode: auto\n---\n\nCustom isolated write.\n', + 'utf-8', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'custom-isolated', + detail: 'Custom isolated write', + rawPaths: ['custom/page.json'], + }); + await currentSession.gitService.commitFiles( + ['wiki/global/custom-isolated.md'], + 'custom wiki', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['custom/page.json', 'h1']], sourceKey); + + await expect( + runner.run({ + jobId: 'job-custom-default', + connectionId: 'warehouse', + sourceKey, + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).resolves.toMatchObject({ + jobId: 'job-custom-default', + failedWorkUnits: [], + workUnitCount: 1, + }); + + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces/job-custom-default/trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('isolated_diff_enabled'); + expect(trace).toContain('work_unit_child_created'); + expect(trace).not.toContain('shared_worktree_path_enabled'); + + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0]; + const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined; + expect(reportBody?.isolatedDiff).toMatchObject({ + enabled: true, + acceptedPatches: 1, + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('keeps the shared-worktree path reachable through explicit private fallback settings', async () => { + const runtime = await makeRealGitRuntime(); + try { + const sourceKey = 'legacy-source'; + const { deps, adapter } = makeDeps(runtime, sourceKey, { + sharedWorktreeSourceKeys: ['legacy-source'], + }); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'legacy-wiki', + rawFiles: ['legacy/page.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName !== 'ingest-bundle-wu') { + return { stopReason: 'natural' }; + } + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/legacy-shared.md'), + '---\nsummary: Legacy shared write\nusage_mode: auto\n---\n\nLegacy shared write.\n', + 'utf-8', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'legacy-shared', + detail: 'Legacy shared write', + rawPaths: ['legacy/page.json'], + }); + await currentSession.gitService.commitFiles( + ['wiki/global/legacy-shared.md'], + 'legacy wiki', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['legacy/page.json', 'h1']], sourceKey); + + await expect( + runner.run({ + jobId: 'job-legacy-shared', + connectionId: 'warehouse', + sourceKey, + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).resolves.toMatchObject({ + jobId: 'job-legacy-shared', + failedWorkUnits: [], + workUnitCount: 1, + }); + + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces/job-legacy-shared/trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('shared_worktree_path_enabled'); + expect(trace).not.toContain('work_unit_child_created'); + + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0]; + const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined; + expect(reportBody?.isolatedDiff).toMatchObject({ + enabled: false, + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 3: Run the new runner tests to verify the default test fails** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unlisted direct-writing source|shared-worktree path reachable" +``` + +Expected: FAIL. The unlisted source still enters the old shared-worktree path +because the runner checks `isolatedDiffSourceKeys`. + +- [ ] **Step 4: Change the runner routing decision** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, replace +`isIsolatedDiffEnabled()` with: + +```ts + private isSharedWorktreeFallbackEnabled(sourceKey: string): boolean { + return (this.deps.settings.sharedWorktreeSourceKeys ?? []).includes(sourceKey); + } +``` + +Then replace the isolated-diff routing line with: + +```ts + const isolatedDiffEnabled = !overrideReport && !this.isSharedWorktreeFallbackEnabled(job.sourceKey); +``` + +Finally, replace the shared-path trace event with: + +```ts + await runTrace.event('info', 'routing', 'shared_worktree_path_enabled', { + sourceKey: job.sourceKey, + reason: 'explicit_private_fallback', + }); +``` + +- [ ] **Step 5: Run the new runner tests again** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unlisted direct-writing source|shared-worktree path reachable" +``` + +Expected: PASS. + +- [ ] **Step 6: Commit runner default promotion** + +Run: + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +git commit -m "feat(ingest): promote isolated diff to default runner path" +``` + +### Task 3: Update local runtime defaults + +**Files:** +- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts` +- Modify: `packages/context/src/ingest/local-bundle-runtime.ts` + +- [ ] **Step 1: Update the local runtime settings test type** + +In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace +`RuntimeWithSettingsDeps` with: + +```ts +type RuntimeWithSettingsDeps = { + deps: { + settings: { + sharedWorktreeSourceKeys?: string[]; + isolatedDiffSourceKeys?: string[]; + }; + }; +}; +``` + +- [ ] **Step 2: Replace the local runtime settings assertion** + +Replace the test named +`enables isolated-diff routing for direct durable-write connectors` with: + +```ts + it('defaults local bundle ingest to isolated diffs without an allowlist', () => { + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + agentRunner: testAgentRunner(), + }); + + const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings; + + expect(settings.sharedWorktreeSourceKeys).toEqual([]); + expect('isolatedDiffSourceKeys' in settings).toBe(false); + }); +``` + +- [ ] **Step 3: Run the local runtime settings test to verify it fails** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "defaults local bundle ingest" +``` + +Expected: FAIL because `local-bundle-runtime.ts` still sets +`isolatedDiffSourceKeys`. + +- [ ] **Step 4: Update local runtime imports and settings** + +In `packages/context/src/ingest/local-bundle-runtime.ts`, replace the +source-routing import with: + +```ts +import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js'; +``` + +Then replace the settings field: + +```ts + isolatedDiffSourceKeys: defaultIsolatedDiffSourceKeys(), +``` + +with: + +```ts + sharedWorktreeSourceKeys: defaultSharedWorktreeSourceKeys(), +``` + +- [ ] **Step 5: Run the local runtime settings test again** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "defaults local bundle ingest" +``` + +Expected: PASS. + +- [ ] **Step 6: Commit local runtime defaults** + +Run: + +```bash +git add packages/context/src/ingest/local-bundle-runtime.ts \ + packages/context/src/ingest/local-bundle-runtime.test.ts +git commit -m "feat(ingest): default local ingest to isolated diffs" +``` + +### Task 4: Remove stale allowlist references + +**Files:** +- Verify: `packages/context/src/ingest/isolated-diff/source-routing.ts` +- Verify: `packages/context/src/ingest/local-bundle-runtime.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Verify: `packages/context/src/ingest/ports.ts` +- Verify: `packages/context/src/ingest/**/*.test.ts` + +- [ ] **Step 1: Search for old allowlist names** + +Run: + +```bash +rg -n "isolatedDiffSourceKeys|defaultIsolatedDiffSourceKeys|ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEYS|isIsolatedDiffDirectWriteSourceKey" packages/context/src +``` + +Expected: no matches. + +- [ ] **Step 2: Search for the new fallback setting** + +Run: + +```bash +rg -n "sharedWorktreeSourceKeys|defaultSharedWorktreeSourceKeys|isSharedWorktreeFallbackSourceKey" packages/context/src +``` + +Expected: matches only in these files: + +```text +packages/context/src/ingest/ports.ts +packages/context/src/ingest/ingest-bundle.runner.ts +packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +packages/context/src/ingest/isolated-diff/source-routing.ts +packages/context/src/ingest/isolated-diff/source-routing.test.ts +packages/context/src/ingest/local-bundle-runtime.ts +packages/context/src/ingest/local-bundle-runtime.test.ts +``` + +- [ ] **Step 3: Run a focused no-allowlist regression suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/isolated-diff/source-routing.test.ts \ + src/ingest/local-bundle-runtime.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + -t "source routing|defaults local bundle ingest|unlisted direct-writing source|shared-worktree path reachable|routes notion|routes lookml|routes looker|routes dbt|routes metricflow" +``` + +Expected: PASS. + +- [ ] **Step 4: Commit stale-reference cleanup if needed** + +If Step 1 or Step 2 required any edits, run: + +```bash +git add packages/context/src/ingest +git commit -m "chore(ingest): remove isolated diff allowlist references" +``` + +If no files changed, record that no cleanup commit was needed in the execution +notes for this task. + +### Task 5: Final verification + +**Files:** +- Verify: `packages/context/src/ingest/isolated-diff/source-routing.ts` +- Verify: `packages/context/src/ingest/isolated-diff/source-routing.test.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +- Verify: `packages/context/src/ingest/local-bundle-runtime.ts` +- Verify: `packages/context/src/ingest/local-bundle-runtime.test.ts` +- Verify: `packages/context/src/ingest/ports.ts` +- Verify: `docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-default-promotion.md` + +- [ ] **Step 1: Run the full isolated-diff focused suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-trace.test.ts \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/semantic-layer-target-policy.test.ts \ + src/ingest/isolated-diff/source-routing.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/isolated-diff/textual-conflict-resolver.test.ts \ + src/ingest/final-gate-repair.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + src/ingest/report-snapshot.test.ts \ + src/ingest/local-bundle-runtime.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run the MetricFlow local ingest regression** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-ingest.test.ts -t "runs full MetricFlow local ingest" +``` + +Expected: PASS. The report body includes `isolatedDiff.enabled: true`, +`acceptedPatches: 0`, and a string `projectionSha`. + +- [ ] **Step 3: Run package type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 4: Run package tests** + +Run: + +```bash +pnpm --filter @ktx/context run test +``` + +Expected: PASS. + +- [ ] **Step 5: Run TypeScript dead-code checks** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS, or only pre-existing findings unrelated to the files changed +by this plan. Investigate any finding that names `source-routing.ts`, +`ports.ts`, `local-bundle-runtime.ts`, or `ingest-bundle.runner.ts`. + +- [ ] **Step 6: Decide whether docs-site needs an update** + +No `docs-site/content/docs/` change is expected for this plan because the +change is an internal runner rollout switch and does not add or remove public +CLI commands, flags, config fields, connector setup steps, or user-facing +documentation concepts. + +- [ ] **Step 7: Commit final verification notes** + +Run: + +```bash +git status --short +git add docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-default-promotion.md +git commit -m "docs: add isolated diff default promotion plan" +``` + +Only include the plan file in this commit if all implementation commits have +already captured their code changes. + +## Completion criteria + +This plan is complete when: + +- `packages/context/src/ingest/ports.ts` has + `sharedWorktreeSourceKeys?: string[]` and no `isolatedDiffSourceKeys` field. +- `IngestBundleRunner` uses isolated diffs for every non-override source unless + `sharedWorktreeSourceKeys` explicitly contains that source. +- The trace for a default-routed source contains `isolated_diff_enabled` and + not `shared_worktree_path_enabled`. +- The trace for an explicitly fallback-routed source contains + `shared_worktree_path_enabled` and not `work_unit_child_created`. +- Local runtime settings default `sharedWorktreeSourceKeys` to `[]`. +- No production or test code under `packages/context/src` references the old + isolated-diff allowlist names. +- The focused isolated-diff suite, MetricFlow local ingest regression, + `@ktx/context` type-check, `@ktx/context` tests, and dead-code checks pass. + +## Next rollout step + +After this plan is implemented and verified, the only remaining v1-blocking +rollout item from the spec is step 11: remove the old shared-worktree WorkUnit +execution path and delete the private `sharedWorktreeSourceKeys` fallback +setting. diff --git a/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-gate-repair-classification.md b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-gate-repair-classification.md new file mode 100644 index 00000000..d211629c --- /dev/null +++ b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-gate-repair-classification.md @@ -0,0 +1,1436 @@ +# Isolated Diff Ingestion V1 Gate Repair Classification Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Prevent isolated-diff gate repair from automatically editing +high-risk semantic-layer or warehouse-validation failures while preserving +bounded repair for stale wiki reference drift. + +**Architecture:** Keep the existing final gate and repair-agent modules, but +make artifact gate failures structured. A small repair-policy module classifies +structured issues before `repairFinalGateFailure()` is invoked from patch +integration or final composed-tree gates. Unknown or high-risk gate failures +fail before repair and before squash. + +**Tech Stack:** TypeScript, Vitest, pnpm, existing KTX ingest runner, +`FinalArtifactGateFailure`, JSONL ingest traces. + +--- + +## Audit summary + +The implemented rollout covers isolated child worktrees, binary no-rename +patch proposals, `git apply --3way --index`, textual conflict repair, final +artifact gates, provenance pre-squash validation, connector migration, default +promotion, and old shared-worktree path removal. + +One v1-blocking gap remains in the spec's Gate repair stage. The spec requires +the runner to classify final gate failures before deciding whether to repair or +fail. Repairable failures include stale wiki body references and stale wiki +frontmatter references. High-risk failures, including missing warehouse tables +or columns and invalid SQL sources, must fail without automatic repair unless a +later implementation adds a stronger evidence contract. + +Current code calls `repairFinalGateFailure()` for every +`validateFinalIngestArtifacts()` error in both: + +- `packages/context/src/ingest/isolated-diff/patch-integrator.ts` +- `packages/context/src/ingest/ingest-bundle.runner.ts` + +That lets a repair agent edit semantic-layer files after a warehouse dry-run +failure. Rerunning gates is necessary, but not sufficient: the spec explicitly +forbids automatic repair when the repair would require choosing facts without +evidence. + +Non-blocking gaps after this plan: + +- Deterministic semantic merge helpers remain intentionally deferred as rollout + step 9. +- Semantic-layer dependency expansion remains direct declared joins only. +- Provenance remains in the ingest provenance store and report body. +- Resolver and repair prompts can later include richer transcript excerpts, + overlapping patch summaries, and raw evidence bundles. +- Failures before an ingest run row exists still have deterministic trace files + but no stored ingest report. + +## File structure + +- Modify `packages/context/src/ingest/wiki-body-refs.ts`. + Add structured wiki body reference issues while keeping the existing + `findInvalidWikiBodyRefs()` string API for current callers. +- Modify `packages/context/src/ingest/wiki-body-refs.test.ts`. + Cover structured issue codes for stale semantic-layer entities and missing + raw tables. +- Modify `packages/context/src/ingest/stages/validate-wu-sources.ts`. + Preserve validator error messages in `validateWuTouchedSources()` output. +- Modify `packages/context/src/ingest/stages/validate-wu-sources.test.ts`. + Cover the new `issues` payload while keeping existing `validSources` and + `invalidSources` behavior. +- Modify `packages/context/src/ingest/artifact-gates.ts`. + Throw `FinalArtifactGateFailure` with structured issues from semantic-layer, + wiki frontmatter, wiki page-reference, wiki body, and provenance-adjacent + artifact gates. +- Modify `packages/context/src/ingest/artifact-gates.test.ts`. + Assert structured issue codes for repairable and non-repairable gate + failures. +- Create `packages/context/src/ingest/gate-repair-policy.ts`. + Classify structured artifact gate failures as repairable or non-repairable. +- Create `packages/context/src/ingest/gate-repair-policy.test.ts`. + Lock the policy for stale wiki refs versus high-risk semantic/warehouse + errors. +- Modify `packages/context/src/ingest/isolated-diff/patch-integrator.ts`. + Call the repair policy before patch-level semantic gate repair. +- Modify `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`. + Prove high-risk semantic gate failures do not invoke the repair callback. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. + Call the repair policy before final composed-tree gate repair and include + non-repairable issue metadata in failure reports. +- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Add a final-gate regression where reconciliation creates an invalid + semantic-layer source and the repair agent is not called. + +### Task 1: Preserve structured artifact gate issues + +**Files:** +- Modify: `packages/context/src/ingest/wiki-body-refs.ts` +- Modify: `packages/context/src/ingest/wiki-body-refs.test.ts` +- Modify: `packages/context/src/ingest/stages/validate-wu-sources.ts` +- Modify: `packages/context/src/ingest/stages/validate-wu-sources.test.ts` + +- [ ] **Step 1: Add structured wiki body issue tests** + +In `packages/context/src/ingest/wiki-body-refs.test.ts`, extend the import and +append this test inside `describe('wiki body refs', ...)`: + +```ts +import { findInvalidWikiBodyRefIssues, findInvalidWikiBodyRefs, parseWikiBodyRefs } from './wiki-body-refs.js'; +``` + +```ts + it('returns structured issue codes for body reference failures', async () => { + const invalid = await findInvalidWikiBodyRefIssues({ + pageKey: 'account-segments', + body: [ + '`mart_account_segments.total_contract_arr_cents`', + '`source:missing_source`', + '`table:analytics.missing_table`', + ].join('\n'), + visibleConnectionIds: ['warehouse'], + loadSources: async () => sources, + tableExists: async () => false, + }); + + expect(invalid).toEqual([ + { + code: 'wiki_body_unknown_sl_entity', + message: 'account-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents', + pageKey: 'account-segments', + ref: 'mart_account_segments.total_contract_arr_cents', + sourceName: 'mart_account_segments', + entityName: 'total_contract_arr_cents', + connectionId: null, + }, + { + code: 'wiki_body_unknown_sl_source', + message: 'account-segments: unknown semantic-layer source missing_source', + pageKey: 'account-segments', + ref: 'source:missing_source', + sourceName: 'missing_source', + connectionId: null, + }, + { + code: 'wiki_body_unknown_raw_table', + message: 'account-segments: unknown raw table analytics.missing_table', + pageKey: 'account-segments', + ref: 'table:analytics.missing_table', + tableRef: 'analytics.missing_table', + connectionId: null, + }, + ]); + }); +``` + +- [ ] **Step 2: Run the wiki body issue test to verify it fails** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/wiki-body-refs.test.ts -t "structured issue codes" +``` + +Expected: FAIL with an export error for `findInvalidWikiBodyRefIssues`. + +- [ ] **Step 3: Implement structured wiki body issues** + +In `packages/context/src/ingest/wiki-body-refs.ts`, add this type after +`WikiBodyRefValidationInput`: + +```ts +export type WikiBodyRefIssue = + | { + code: 'wiki_body_unknown_sl_source'; + message: string; + pageKey: string; + ref: string; + sourceName: string; + connectionId: string | null; + } + | { + code: 'wiki_body_unknown_sl_entity'; + message: string; + pageKey: string; + ref: string; + sourceName: string; + entityName: string; + connectionId: string | null; + } + | { + code: 'wiki_body_unknown_raw_table'; + message: string; + pageKey: string; + ref: string; + tableRef: string; + connectionId: string | null; + }; + +function renderConnectionScopedRef(connectionId: string | null, body: string): string { + return connectionId ? `${connectionId}/${body}` : body; +} +``` + +Replace `findInvalidWikiBodyRefs()` with these two functions: + +```ts +export async function findInvalidWikiBodyRefIssues(input: WikiBodyRefValidationInput): Promise { + const issues: WikiBodyRefIssue[] = []; + const sourceCache = new Map(); + const loadSources = async (connectionId: string): Promise => { + const cached = sourceCache.get(connectionId); + if (cached) { + return cached; + } + const sources = await input.loadSources(connectionId); + sourceCache.set(connectionId, sources); + return sources; + }; + + const findSource = async ( + connectionIds: string[], + sourceName: string, + ): Promise<{ connectionId: string; source: SemanticLayerSource } | null> => { + for (const connectionId of connectionIds) { + const source = (await loadSources(connectionId)).find((candidate) => candidate.name === sourceName); + if (source) { + return { connectionId, source }; + } + } + return null; + }; + + for (const ref of parseWikiBodyRefs(input.body)) { + const connectionIds = ref.connectionId ? [ref.connectionId] : input.visibleConnectionIds; + if (ref.kind === 'table') { + const found = await Promise.all(connectionIds.map((connectionId) => input.tableExists(connectionId, ref.tableRef))); + if (!found.some(Boolean)) { + const renderedRef = renderConnectionScopedRef(ref.connectionId, `table:${ref.tableRef}`); + issues.push({ + code: 'wiki_body_unknown_raw_table', + message: `${input.pageKey}: unknown raw table ${renderConnectionScopedRef(ref.connectionId, ref.tableRef)}`, + pageKey: input.pageKey, + ref: renderedRef, + tableRef: ref.tableRef, + connectionId: ref.connectionId, + }); + } + continue; + } + + const found = await findSource(connectionIds, ref.sourceName); + if (!found) { + if (ref.kind === 'sl_source') { + const renderedRef = renderConnectionScopedRef(ref.connectionId, `source:${ref.sourceName}`); + issues.push({ + code: 'wiki_body_unknown_sl_source', + message: `${input.pageKey}: unknown semantic-layer source ${renderConnectionScopedRef(ref.connectionId, ref.sourceName)}`, + pageKey: input.pageKey, + ref: renderedRef, + sourceName: ref.sourceName, + connectionId: ref.connectionId, + }); + } + continue; + } + if (ref.kind === 'sl_entity' && !entityNames(found.source).has(ref.entityName)) { + issues.push({ + code: 'wiki_body_unknown_sl_entity', + message: `${input.pageKey}: unknown semantic-layer entity ${ref.sourceName}.${ref.entityName}`, + pageKey: input.pageKey, + ref: renderConnectionScopedRef(ref.connectionId, `${ref.sourceName}.${ref.entityName}`), + sourceName: ref.sourceName, + entityName: ref.entityName, + connectionId: ref.connectionId, + }); + } + } + + return issues; +} + +export async function findInvalidWikiBodyRefs(input: WikiBodyRefValidationInput): Promise { + return (await findInvalidWikiBodyRefIssues(input)).map((issue) => issue.message); +} +``` + +- [ ] **Step 4: Run the wiki body issue test to verify it passes** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/wiki-body-refs.test.ts -t "structured issue codes" +``` + +Expected: PASS. + +- [ ] **Step 5: Add validator detail tests** + +In `packages/context/src/ingest/stages/validate-wu-sources.test.ts`, replace +the first test's final expectation block with: + +```ts + expect(result).toEqual({ + validSources: ['warehouse-a:good'], + invalidSources: ['warehouse-b:bad'], + issues: [ + { + connectionId: 'warehouse-b', + sourceName: 'bad', + sourceId: 'warehouse-b:bad', + errors: ['bad.yaml: measure "revenue" dry-run failed.\n Error: column missing_revenue does not exist'], + warnings: ['bad.yaml: warehouse warning'], + }, + ], + }); +``` + +Replace the mocked validator in that same test with: + +```ts + const validateSingleSource = vi + .fn() + .mockResolvedValueOnce({ errors: [], warnings: [] }) + .mockResolvedValueOnce({ + errors: ['bad.yaml: measure "revenue" dry-run failed.\n Error: column missing_revenue does not exist'], + warnings: ['bad.yaml: warehouse warning'], + }); +``` + +- [ ] **Step 6: Run the validator detail test to verify it fails** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/stages/validate-wu-sources.test.ts +``` + +Expected: FAIL because `issues` is missing from the returned object. + +- [ ] **Step 7: Preserve validator error details** + +In `packages/context/src/ingest/stages/validate-wu-sources.ts`, replace the +interfaces and function with: + +```ts +export interface WuValidationIssue { + connectionId: string; + sourceName: string; + sourceId: string; + errors: string[]; + warnings: string[]; +} + +export interface WuValidationResult { + validSources: string[]; + invalidSources: string[]; + issues: WuValidationIssue[]; +} + +export async function validateWuTouchedSources( + deps: SlValidationDeps & { slValidator: SlValidatorPort }, + touched: TouchedSlSource[], +): Promise { + const valid: string[] = []; + const invalid: string[] = []; + const issues: WuValidationIssue[] = []; + for (const source of touched) { + const sourceId = `${source.connectionId}:${source.sourceName}`; + const result = await deps.slValidator.validateSingleSource(deps, source.connectionId, source.sourceName); + if (result.errors.length === 0) { + valid.push(sourceId); + } else { + invalid.push(sourceId); + issues.push({ + connectionId: source.connectionId, + sourceName: source.sourceName, + sourceId, + errors: result.errors, + warnings: result.warnings, + }); + } + } + return { validSources: valid, invalidSources: invalid, issues }; +} +``` + +- [ ] **Step 8: Run the validator tests to verify they pass** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/stages/validate-wu-sources.test.ts +``` + +Expected: PASS. + +- [ ] **Step 9: Commit structured issue foundations** + +```bash +git add packages/context/src/ingest/wiki-body-refs.ts \ + packages/context/src/ingest/wiki-body-refs.test.ts \ + packages/context/src/ingest/stages/validate-wu-sources.ts \ + packages/context/src/ingest/stages/validate-wu-sources.test.ts +git commit -m "feat(ingest): preserve structured gate issue details" +``` + +### Task 2: Throw structured final artifact gate failures + +**Files:** +- Modify: `packages/context/src/ingest/artifact-gates.ts` +- Modify: `packages/context/src/ingest/artifact-gates.test.ts` + +- [ ] **Step 1: Add structured failure tests** + +In `packages/context/src/ingest/artifact-gates.test.ts`, extend the import: + +```ts +import { FinalArtifactGateFailure, validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js'; +``` + +Append this test inside `describe('artifact gates', ...)`: + +```ts + it('throws structured final artifact gate issues', async () => { + const wikiService = wikiServiceWithPages({ + 'account-segments': { + refs: ['missing-page'], + slRefs: ['mart_account_segments.total_contract_arr_cents'], + content: [ + 'ARR is `mart_account_segments.total_contract_arr_cents`.', + 'Warehouse table `table:analytics.missing_table`.', + ].join('\n'), + }, + }); + const semanticLayerService = { + loadAllSources: vi.fn().mockResolvedValue({ + sources: [ + { + name: 'mart_account_segments', + grain: ['account_id'], + columns: [{ name: 'account_id', type: 'string' }], + joins: [], + measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }], + table: 'analytics.mart_account_segments', + }, + ], + loadErrors: [], + }), + }; + + await expect( + validateFinalIngestArtifacts({ + connectionIds: ['warehouse'], + changedWikiPageKeys: ['account-segments'], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }], + wikiService: wikiService as never, + semanticLayerService: semanticLayerService as never, + validateTouchedSources: async () => ({ + invalidSources: ['warehouse:mart_account_segments'], + validSources: [], + issues: [ + { + connectionId: 'warehouse', + sourceName: 'mart_account_segments', + sourceId: 'warehouse:mart_account_segments', + errors: ['mart_account_segments.yaml: measure "total_contract_arr" dry-run failed.\n Error: column missing_arr does not exist'], + warnings: [], + }, + ], + }), + tableExists: async () => false, + }), + ).rejects.toMatchObject({ + issues: expect.arrayContaining([ + expect.objectContaining({ code: 'semantic_layer_validation_failed', sourceId: 'warehouse:mart_account_segments' }), + expect.objectContaining({ code: 'wiki_sl_ref_unknown_entity', pageKey: 'account-segments' }), + expect.objectContaining({ code: 'wiki_ref_missing_page', pageKey: 'account-segments', missingRef: 'missing-page' }), + expect.objectContaining({ code: 'wiki_body_unknown_sl_entity', pageKey: 'account-segments' }), + expect.objectContaining({ code: 'wiki_body_unknown_raw_table', pageKey: 'account-segments' }), + ]), + }); + }); +``` + +- [ ] **Step 2: Run the structured failure test to verify it fails** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/artifact-gates.test.ts -t "structured final artifact gate issues" +``` + +Expected: FAIL with an export error for `FinalArtifactGateFailure` or missing +`issues`. + +- [ ] **Step 3: Add structured issue types and failure class** + +In `packages/context/src/ingest/artifact-gates.ts`, change the wiki body import: + +```ts +import { findInvalidWikiBodyRefIssues, type WikiBodyRefIssue } from './wiki-body-refs.js'; +``` + +Replace the existing `TouchedValidationResult` interface with these exported +types: + +```ts +export interface TouchedSourceValidationIssue { + connectionId: string; + sourceName: string; + sourceId: string; + errors: string[]; + warnings?: string[]; +} + +export interface TouchedValidationResult { + invalidSources: string[]; + validSources: string[]; + issues?: TouchedSourceValidationIssue[]; +} + +export type FinalArtifactGateIssue = + | { + code: 'semantic_layer_validation_failed'; + message: string; + connectionId: string | null; + sourceName: string; + sourceId: string; + sourceErrors: string[]; + } + | { + code: 'wiki_sl_ref_unknown_source'; + message: string; + pageKey: string; + ref: string; + sourceName: string; + connectionId: string | null; + } + | { + code: 'wiki_sl_ref_unknown_entity'; + message: string; + pageKey: string; + ref: string; + sourceName: string; + entityName: string; + connectionId: string | null; + } + | { + code: 'wiki_ref_missing_page'; + message: string; + pageKey: string; + missingRef: string; + } + | WikiBodyRefIssue; + +export class FinalArtifactGateFailure extends Error { + readonly issues: FinalArtifactGateIssue[]; + + constructor(issues: FinalArtifactGateIssue[]) { + super(`final artifact gates failed:\n${issues.map((issue) => issue.message).join('\n')}`); + this.name = 'FinalArtifactGateFailure'; + this.issues = issues; + } +} +``` + +- [ ] **Step 4: Return structured wiki frontmatter issues** + +Replace `validateWikiSlRefs()` with: + +```ts +async function validateWikiSlRefs(input: FinalArtifactGateInput): Promise { + const issues: FinalArtifactGateIssue[] = []; + const sourcesByConnection = new Map>['sources']>(); + for (const connectionId of input.connectionIds) { + const { sources } = await input.semanticLayerService.loadAllSources(connectionId); + sourcesByConnection.set(connectionId, sources); + } + + for (const pageKey of input.changedWikiPageKeys) { + const page = await input.wikiService.readPage('GLOBAL', null, pageKey); + if (!page) { + continue; + } + for (const ref of page.frontmatter.sl_refs ?? []) { + const parsed = parseSlRef(ref); + const candidateConnections = parsed.connectionId ? [parsed.connectionId] : input.connectionIds; + let source: Awaited>['sources'][number] | undefined; + for (const connectionId of candidateConnections) { + source = sourcesByConnection.get(connectionId)?.find((candidate) => candidate.name === parsed.sourceName); + if (source) { + break; + } + } + if (!source) { + issues.push({ + code: 'wiki_sl_ref_unknown_source', + message: `${pageKey}: unknown sl_refs entry ${ref}`, + pageKey, + ref, + sourceName: parsed.sourceName, + connectionId: parsed.connectionId, + }); + continue; + } + if (parsed.entityName && !slEntityNames(source).has(parsed.entityName)) { + issues.push({ + code: 'wiki_sl_ref_unknown_entity', + message: `${pageKey}: unknown sl_refs entity ${ref}`, + pageKey, + ref, + sourceName: parsed.sourceName, + entityName: parsed.entityName, + connectionId: parsed.connectionId, + }); + } + } + } + return issues; +} +``` + +- [ ] **Step 5: Return structured wiki page reference issues** + +Replace `validateWikiRefs()` with: + +```ts +async function validateWikiRefs(input: FinalArtifactGateInput): Promise { + const issues: FinalArtifactGateIssue[] = []; + for (const pageKey of input.changedWikiPageKeys) { + const page = await input.wikiService.readPage('GLOBAL', null, pageKey); + if (!page) { + continue; + } + const missingRefs = await findMissingWikiRefs({ + wikiService: input.wikiService, + scope: 'GLOBAL', + scopeId: null, + pageKey, + refs: page.frontmatter.refs, + content: page.content, + }); + for (const missingRef of missingRefs) { + issues.push({ + code: 'wiki_ref_missing_page', + message: `${pageKey} -> ${missingRef}`, + pageKey, + missingRef, + }); + } + } + return issues; +} +``` + +- [ ] **Step 6: Throw `FinalArtifactGateFailure` from final gates** + +Replace `validateFinalIngestArtifacts()` with: + +```ts +export async function validateFinalIngestArtifacts(input: FinalArtifactGateInput): Promise { + const touchedWithDependencies = await expandTouchedSlSourcesWithDirectJoinNeighbors(input); + const validation = await input.validateTouchedSources(touchedWithDependencies); + const issues: FinalArtifactGateIssue[] = []; + const validationIssues = + validation.issues ?? + validation.invalidSources.map((sourceId) => { + const [connectionId, sourceName] = sourceId.includes(':') ? sourceId.split(':', 2) : [null, sourceId]; + return { + connectionId, + sourceName: sourceName ?? sourceId, + sourceId, + errors: [`semantic-layer validation failed for ${sourceId}`], + warnings: [], + }; + }); + for (const issue of validationIssues) { + issues.push({ + code: 'semantic_layer_validation_failed', + message: `semantic-layer validation failed for ${issue.sourceId}`, + connectionId: issue.connectionId, + sourceName: issue.sourceName, + sourceId: issue.sourceId, + sourceErrors: issue.errors, + }); + } + + issues.push(...(await validateWikiSlRefs(input))); + const danglingWikiRefs = await validateWikiRefs(input); + if (danglingWikiRefs.length > 0) { + const combined = danglingWikiRefs.map((issue) => issue.message).join(', '); + issues.push({ + code: 'wiki_ref_missing_page', + message: `wiki references target missing page(s): ${combined}`, + pageKey: danglingWikiRefs[0].pageKey, + missingRef: danglingWikiRefs[0].missingRef, + }); + } + + for (const pageKey of input.changedWikiPageKeys) { + const page = await input.wikiService.readPage('GLOBAL', null, pageKey); + if (!page) { + continue; + } + issues.push( + ...(await findInvalidWikiBodyRefIssues({ + pageKey, + body: page.content, + visibleConnectionIds: input.connectionIds, + loadSources: async (connectionId) => { + const { sources } = await input.semanticLayerService.loadAllSources(connectionId); + return sources; + }, + tableExists: input.tableExists, + })), + ); + } + + if (issues.length > 0) { + throw new FinalArtifactGateFailure(issues); + } +} +``` + +- [ ] **Step 7: Run artifact gate tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/artifact-gates.test.ts +``` + +Expected: PASS. + +- [ ] **Step 8: Commit structured final gate failures** + +```bash +git add packages/context/src/ingest/artifact-gates.ts \ + packages/context/src/ingest/artifact-gates.test.ts +git commit -m "feat(ingest): structure final artifact gate failures" +``` + +### Task 3: Add gate repair policy + +**Files:** +- Create: `packages/context/src/ingest/gate-repair-policy.ts` +- Create: `packages/context/src/ingest/gate-repair-policy.test.ts` + +- [ ] **Step 1: Add policy tests** + +Create `packages/context/src/ingest/gate-repair-policy.test.ts`: + +```ts +import { describe, expect, it } from 'vitest'; +import { FinalArtifactGateFailure, type FinalArtifactGateIssue } from './artifact-gates.js'; +import { classifyFinalGateRepair } from './gate-repair-policy.js'; + +function failure(...issues: FinalArtifactGateIssue[]): FinalArtifactGateFailure { + return new FinalArtifactGateFailure(issues); +} + +describe('classifyFinalGateRepair', () => { + it('allows stale wiki reference drift to use the repair agent', () => { + const decision = classifyFinalGateRepair( + failure({ + code: 'wiki_body_unknown_sl_entity', + message: 'account-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents', + pageKey: 'account-segments', + ref: 'mart_account_segments.total_contract_arr_cents', + sourceName: 'mart_account_segments', + entityName: 'total_contract_arr_cents', + connectionId: null, + }), + ); + + expect(decision).toEqual({ + repairable: true, + issueCodes: ['wiki_body_unknown_sl_entity'], + }); + }); + + it('blocks semantic-layer validation failures from automatic repair', () => { + const decision = classifyFinalGateRepair( + failure({ + code: 'semantic_layer_validation_failed', + message: 'semantic-layer validation failed for warehouse:orders', + connectionId: 'warehouse', + sourceName: 'orders', + sourceId: 'warehouse:orders', + sourceErrors: ['orders.yaml: measure "revenue" dry-run failed.\n Error: column missing_revenue does not exist'], + }), + ); + + expect(decision).toEqual({ + repairable: false, + reason: 'non-repairable artifact gate issue(s): semantic_layer_validation_failed', + issueCodes: ['semantic_layer_validation_failed'], + }); + }); + + it('blocks missing raw table body references from automatic repair', () => { + const decision = classifyFinalGateRepair( + failure({ + code: 'wiki_body_unknown_raw_table', + message: 'account-segments: unknown raw table analytics.missing_table', + pageKey: 'account-segments', + ref: 'table:analytics.missing_table', + tableRef: 'analytics.missing_table', + connectionId: null, + }), + ); + + expect(decision).toEqual({ + repairable: false, + reason: 'non-repairable artifact gate issue(s): wiki_body_unknown_raw_table', + issueCodes: ['wiki_body_unknown_raw_table'], + }); + }); + + it('blocks unstructured errors by default', () => { + expect(classifyFinalGateRepair(new Error('plain gate failure'))).toEqual({ + repairable: false, + reason: 'unclassified artifact gate failure', + issueCodes: [], + }); + }); +}); +``` + +- [ ] **Step 2: Run policy tests to verify they fail** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/gate-repair-policy.test.ts +``` + +Expected: FAIL because `gate-repair-policy.ts` does not exist. + +- [ ] **Step 3: Implement the policy module** + +Create `packages/context/src/ingest/gate-repair-policy.ts`: + +```ts +import { FinalArtifactGateFailure, type FinalArtifactGateIssue } from './artifact-gates.js'; + +export type GateRepairDecision = + | { repairable: true; issueCodes: string[] } + | { repairable: false; reason: string; issueCodes: string[] }; + +const repairableIssueCodes = new Set([ + 'wiki_body_unknown_sl_entity', + 'wiki_body_unknown_sl_source', + 'wiki_sl_ref_unknown_entity', + 'wiki_ref_missing_page', +]); + +export function artifactGateIssueSummary(error: unknown): { message: string; issues: FinalArtifactGateIssue[] } { + if (error instanceof FinalArtifactGateFailure) { + return { message: error.message, issues: error.issues }; + } + return { message: error instanceof Error ? error.message : String(error), issues: [] }; +} + +export function classifyFinalGateRepair(error: unknown): GateRepairDecision { + const { issues } = artifactGateIssueSummary(error); + if (issues.length === 0) { + return { + repairable: false, + reason: 'unclassified artifact gate failure', + issueCodes: [], + }; + } + + const issueCodes = [...new Set(issues.map((issue) => issue.code))].sort(); + const nonRepairableCodes = issueCodes.filter( + (code): code is FinalArtifactGateIssue['code'] => !repairableIssueCodes.has(code as FinalArtifactGateIssue['code']), + ); + if (nonRepairableCodes.length > 0) { + return { + repairable: false, + reason: `non-repairable artifact gate issue(s): ${nonRepairableCodes.join(', ')}`, + issueCodes, + }; + } + + return { repairable: true, issueCodes }; +} +``` + +- [ ] **Step 4: Run policy tests to verify they pass** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/gate-repair-policy.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit the policy module** + +```bash +git add packages/context/src/ingest/gate-repair-policy.ts \ + packages/context/src/ingest/gate-repair-policy.test.ts +git commit -m "feat(ingest): classify final gate repair safety" +``` + +### Task 4: Block non-repairable patch-level gate failures + +**Files:** +- Modify: `packages/context/src/ingest/isolated-diff/patch-integrator.ts` +- Modify: `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts` + +- [ ] **Step 1: Add patch-level non-repairable regression** + +In `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`, +extend the imports: + +```ts +import { FinalArtifactGateFailure } from '../artifact-gates.js'; +``` + +Append this test inside `describe('integrateWorkUnitPatch', ...)`: + +```ts + it('does not invoke gate repair for non-repairable semantic validation failures', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child-semantic-high-risk'); + await git.addWorktree(childDir, 'child-semantic-high-risk', baseSha); + const childGit = git.forWorktree(childDir); + await mkdir(join(childDir, 'semantic-layer/c1'), { recursive: true }); + await writeFile( + join(childDir, 'semantic-layer/c1/orders.yaml'), + 'name: orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures:\n - name: revenue\n expr: sum(missing_revenue)\n', + ); + await childGit.commitFiles(['semantic-layer/c1/orders.yaml'], 'invalid semantic edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/semantic-high-risk.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-high-risk/trace.jsonl'), + jobId: 'job-semantic-high-risk', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + const repairGateFailure = vi.fn(async () => { + throw new Error('repair must not run for high-risk semantic validation failures'); + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-high-risk', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockRejectedValue( + new FinalArtifactGateFailure([ + { + code: 'semantic_layer_validation_failed', + message: 'semantic-layer validation failed for c1:orders', + connectionId: 'c1', + sourceName: 'orders', + sourceId: 'c1:orders', + sourceErrors: ['orders.yaml: measure "revenue" dry-run failed.\n Error: column missing_revenue does not exist'], + }, + ]), + ), + slDisallowed: false, + allowedTargetConnectionIds: new Set(['c1']), + repairGateFailure, + }); + + expect(result).toMatchObject({ + status: 'semantic_conflict', + reason: expect.stringContaining('semantic-layer validation failed for c1:orders'), + }); + expect(repairGateFailure).not.toHaveBeenCalled(); + await expect(readFile(join(configDir, 'semantic-layer/c1/orders.yaml'), 'utf-8')).rejects.toThrow(); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_semantic_conflict_not_repairable'); + }); +``` + +- [ ] **Step 2: Run the patch integrator regression to verify it fails** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts -t "non-repairable semantic validation failures" +``` + +Expected: FAIL because `repairGateFailure` is called. + +- [ ] **Step 3: Wire repair policy into patch integration** + +In `packages/context/src/ingest/isolated-diff/patch-integrator.ts`, add: + +```ts +import { artifactGateIssueSummary, classifyFinalGateRepair } from '../gate-repair-policy.js'; +``` + +Inside the `catch (error)` block after the clean patch applies and +`validateAppliedTree(touchedPaths)` rejects, replace: + +```ts + const reason = errorMessage(error); +``` + +with: + +```ts + const gateFailure = artifactGateIssueSummary(error); + const reason = gateFailure.message; + const repairDecision = classifyFinalGateRepair(error); +``` + +Immediately after the existing `patch_semantic_conflict` trace event, insert: + +```ts + if (!repairDecision.repairable) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + await input.trace.event('error', 'integration', 'patch_semantic_conflict_not_repairable', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason: repairDecision.reason, + issueCodes: repairDecision.issueCodes, + }); + return { + status: 'semantic_conflict', + reason, + touchedPaths, + }; + } +``` + +Then keep the existing `if (input.repairGateFailure) { ... }` block unchanged. +This means gate repair runs only when `repairDecision.repairable` is true. + +- [ ] **Step 4: Convert existing semantic repair tests to structured repairable failures** + +In `patch-integrator.test.ts`, change the repairable semantic-gate test's +mock rejection from: + +```ts + .mockRejectedValueOnce(new Error('final artifact gates failed:\na: unknown semantic-layer entity')) +``` + +to: + +```ts + .mockRejectedValueOnce( + new FinalArtifactGateFailure([ + { + code: 'wiki_body_unknown_sl_entity', + message: 'a: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents', + pageKey: 'a', + ref: 'mart_account_segments.total_contract_arr_cents', + sourceName: 'mart_account_segments', + entityName: 'total_contract_arr_cents', + connectionId: null, + }, + ]), + ) +``` + +- [ ] **Step 5: Run patch integrator tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts +``` + +Expected: PASS. + +- [ ] **Step 6: Commit patch-level policy wiring** + +```bash +git add packages/context/src/ingest/isolated-diff/patch-integrator.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.test.ts +git commit -m "fix(ingest): block high-risk patch gate repair" +``` + +### Task 5: Block non-repairable final composed-tree gate failures + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Add final-gate non-repairable regression** + +In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`, +append this test inside `describe('IngestBundleRunner isolated diff path', ...)` +before the final gate repair success test: + +```ts + it('does not invoke final gate repair for semantic-layer warehouse validation failures', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'valid-page', rawFiles: ['pages/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + deps.slValidator.validateSingleSource = vi.fn().mockResolvedValue({ + errors: ['orders.yaml: measure "revenue" dry-run failed.\n Error: column missing_revenue does not exist'], + warnings: [], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-isolated-diff-gate-repair') { + throw new Error('gate repair must not run for semantic-layer validation failures'); + } + + const root = rootOfConfig(currentSession.configService, runtime.configDir); + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile(join(root, 'wiki/global/valid-page.md'), '---\nsummary: Valid page\nusage_mode: auto\n---\n\nValid\n'); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'valid-page', + detail: 'Valid page', + rawPaths: ['pages/source.json'], + }); + await currentSession.gitService.commitFiles(['wiki/global/valid-page.md'], 'wu valid page', 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' as const }; + } + + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/warehouse/orders.yaml'), + 'name: orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures:\n - name: revenue\n expr: sum(missing_revenue)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'orders'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'orders', + detail: 'Invalid source from reconciliation', + targetConnectionId: 'warehouse', + rawPaths: ['pages/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/orders.yaml'], + 'reconcile invalid semantic source', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' as const }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['pages/source.json', 'h1']]); + const preRunHead = await runtime.git.revParseHead(); + + await expect( + runner.run({ + jobId: 'job-final-high-risk-semantic', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/semantic-layer validation failed for warehouse:orders/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + expect(deps.agentRunner.runLoop).not.toHaveBeenCalledWith( + expect.objectContaining({ + telemetryTags: expect.objectContaining({ + operationName: 'ingest-isolated-diff-gate-repair', + }), + }), + ); + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces/job-final-high-risk-semantic/trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('final_artifact_gates_failed'); + expect(trace).toContain('final_artifact_gates_not_repairable'); + expect(trace).toContain('semantic_layer_validation_failed'); + expect(trace).not.toContain('gate_repair_started'); + expect(trace).not.toContain('squash_finished'); + + const failureReport = (deps.reports.create as any).mock.calls + .map((call: any[]) => call[0]) + .find((report: any) => report.body.status === 'failed'); + expect(failureReport.body.failure).toMatchObject({ + phase: 'final_gates', + message: expect.stringContaining('semantic-layer validation failed for warehouse:orders'), + details: expect.objectContaining({ + gateRepairDecision: { + repairable: false, + reason: 'non-repairable artifact gate issue(s): semantic_layer_validation_failed', + issueCodes: ['semantic_layer_validation_failed'], + }, + }), + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 2: Run the final-gate regression to verify it fails** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "semantic-layer warehouse validation failures" +``` + +Expected: FAIL because final gate repair is invoked for the semantic-layer +validation failure. + +- [ ] **Step 3: Wire repair policy into the runner** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, add: + +```ts +import { classifyFinalGateRepair } from './gate-repair-policy.js'; +``` + +Inside the `catch (error)` block around final `validateFinalIngestArtifacts()`, +immediately after: + +```ts + const gateError = this.errorMessage(error); +``` + +insert: + +```ts + const gateRepairDecision = classifyFinalGateRepair(error); + if (!gateRepairDecision.repairable) { + activeFailureDetails = { + ...finalArtifactGateTraceData, + gateRepairDecision, + }; + await runTrace.event('error', 'final_gates', 'final_artifact_gates_not_repairable', { + ...finalArtifactGateTraceData, + gateRepairDecision, + }); + throw error; + } +``` + +Leave the existing repair path unchanged after this insertion. It will run only +for repairable structured wiki-reference failures. + +- [ ] **Step 4: Run the final-gate regression to verify it passes** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "semantic-layer warehouse validation failures" +``` + +Expected: PASS. + +- [ ] **Step 5: Update the invalid `sl_refs` regression** + +In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`, +find the test named +`rejects Notion-style changed wiki pages with invalid sl_refs`. Replace the +final assertion with: + +```ts + await expect( + runner.run({ jobId: 'job-invalid-slrefs', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/unknown sl_refs entry missing_source/); + + expect(deps.agentRunner.runLoop).not.toHaveBeenCalledWith( + expect.objectContaining({ + telemetryTags: expect.objectContaining({ + operationName: 'ingest-isolated-diff-gate-repair', + }), + }), + ); +``` + +Missing source-level `sl_refs` are non-repairable in v1 because selecting a +replacement source without evidence can invent semantic context. + +- [ ] **Step 6: Run existing gate repair regressions** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "repairs final wiki body refs|fails before squash when final gate repair makes no edit|invalid sl_refs" +``` + +Expected: PASS. + +- [ ] **Step 7: Commit runner policy wiring** + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +git commit -m "fix(ingest): block high-risk final gate repair" +``` + +### Task 6: Verify the v1 closure + +**Files:** +- Verify: `packages/context/src/ingest/**/*.ts` +- Verify: `packages/context/src/ingest/**/*.test.ts` + +- [ ] **Step 1: Run the focused gate repair suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/gate-repair-policy.test.ts \ + src/ingest/final-gate-repair.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + src/ingest/stages/validate-wu-sources.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run the context type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run the context test suite** + +Run: + +```bash +pnpm --filter @ktx/context run test +``` + +Expected: PASS. + +- [ ] **Step 4: Run dead-code analysis** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS or only pre-existing findings unrelated to these files. If +there are findings in files changed by this plan, remove the dead code and run +the command again. + +- [ ] **Step 5: Run pre-commit on changed TypeScript and plan files** + +Run: + +```bash +uv run pre-commit run --files \ + packages/context/src/ingest/wiki-body-refs.ts \ + packages/context/src/ingest/wiki-body-refs.test.ts \ + packages/context/src/ingest/stages/validate-wu-sources.ts \ + packages/context/src/ingest/stages/validate-wu-sources.test.ts \ + packages/context/src/ingest/artifact-gates.ts \ + packages/context/src/ingest/artifact-gates.test.ts \ + packages/context/src/ingest/gate-repair-policy.ts \ + packages/context/src/ingest/gate-repair-policy.test.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.ts \ + packages/context/src/ingest/isolated-diff/patch-integrator.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-gate-repair-classification.md +``` + +Expected: PASS. If this repository does not have a pre-commit configuration or +the local `uv` binary cannot satisfy the pinned project version, record that +explicitly in the implementation summary and keep the TypeScript checks above +as the authoritative verification. + +- [ ] **Step 6: Commit verification** + +```bash +git status --short +git commit --allow-empty -m "chore(ingest): verify gate repair classification" +``` + +## Self-review + +Spec coverage: + +- The Gate repair stage classification requirement is covered by Tasks 2 + through 5. High-risk semantic-layer validation failures and missing raw table + references are blocked before repair. +- Repairable stale wiki body references still run through bounded gate repair, + rerun final gates, and commit only after validation passes. +- Patch-level semantic gate failures and final composed-tree gate failures use + the same repair policy. +- The Global semantic gates section remains covered by the existing gates; + this plan preserves direct declared-join validation and the existing wiki + body grammar. +- Regression coverage now includes the spec's unrepairable final-gate failure + class without relying on the repair agent choosing not to edit. + +Remaining gaps: + +- No v1-blocking gaps remain after this plan is implemented and verified. +- Deterministic semantic merge helpers remain rollout step 9 and are + intentionally post-v1. +- Richer resolver and repair context can be added after v1 traces show the + frequent repair shapes. + +Placeholder scan: + +- The plan contains exact file paths, concrete test code, concrete + implementation snippets, commands, and expected outcomes. +- The plan contains no deferred implementation markers. + +Type consistency: + +- `FinalArtifactGateIssue`, `FinalArtifactGateFailure`, + `WikiBodyRefIssue`, `TouchedSourceValidationIssue`, + `classifyFinalGateRepair()`, and `artifactGateIssueSummary()` are introduced + before use. +- `GateRepairDecision` uses `repairable`, `reason`, and `issueCodes` + consistently in tests, traces, and failure-report details. + +Plan complete and saved to +`docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-gate-repair-classification.md`. diff --git a/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-gate-repair.md b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-gate-repair.md new file mode 100644 index 00000000..62a174f5 --- /dev/null +++ b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-gate-repair.md @@ -0,0 +1,1438 @@ +# Isolated Diff Ingestion V1 Gate Repair Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add bounded repair-agent handling for isolated-diff artifact gate +failures so cleanly applied integration trees get one scoped repair attempt +before the ingest fails. + +**Architecture:** Reuse the existing isolated-diff integration worktree, +trace writer, and `AgentRunnerPort`. A new `final-gate-repair` module exposes +scoped read/write tools over the exact wiki and semantic-layer files involved +in the failed gate. Patch-level semantic conflicts and final composed-tree gate +failures both call this repair module, rerun artifact gates, commit repaired +files only after gates pass, and record repair counters in ingest reports. + +**Tech Stack:** TypeScript ESM/NodeNext, Vitest, zod, Node `fs/promises`, +existing `IngestBundleRunner`, `GitService`, `AgentRunnerPort`, +`IngestTraceWriter`, `integrateWorkUnitPatch`, and `validateFinalIngestArtifacts`. + +--- + +## Audit summary + +This audit read +`docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md`, searched +`docs/superpowers/plans/`, inspected the current isolated-diff implementation, +and ran the focused isolated-diff verification suite. + +Plans already based on the spec: + +| Plan | Implementation status | Evidence | +| --- | --- | --- | +| `docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-core.md` | Implemented | `packages/context/src/ingest/isolated-diff/git-patch.ts`, `work-unit-executor.ts`, `patch-integrator.ts`, `ingest-trace.ts`, `wiki-body-refs.ts`, and runner coverage exist. Git history includes `cae5c4b`, `1013bb6`, and `c481f1c`. | +| `docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-gates-and-trace-closure.md` | Implemented | Final gates run after reconciliation and follow-on mutations, child worktrees clean up, failed reports are stored, and trace coverage exists. Git history includes `656e584` and `87f1193`. | +| `docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-provenance-gate-closure.md` | Implemented | `validateProvenanceRawPaths()` runs before squash and has isolated-diff regression coverage. Git history includes `977a610`. | +| `docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-reference-and-target-gate-closure.md` | Implemented | Final wiki reference gates, SL write/edit target checks, patch target checks, and target-policy traces exist. Git history includes `5ec6396` and `c61c50b`. | +| `docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-global-wiki-reference-gate-closure.md` | Implemented | `wikiPageKeysForFinalGates()` expands to all global wiki pages when semantic-layer sources change or wiki pages are removed. Git history includes `ba534fb`. | +| `docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-textual-conflict-resolver.md` | Implemented | `textual-conflict-resolver.ts` exists, `patch-integrator.ts` invokes it after Git textual conflicts, `ingest-bundle.runner.ts` passes the callback, and report snapshots parse resolver counters. Git history includes `9f0abe5`, `529c6da`, `8784a47`, `aa8d59c`, and `3228843`. | + +Focused verification passed before writing this plan: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-trace.test.ts src/ingest/wiki-body-refs.test.ts src/ingest/artifact-gates.test.ts src/ingest/semantic-layer-target-policy.test.ts src/ingest/isolated-diff/git-patch.test.ts src/ingest/isolated-diff/work-unit-executor.test.ts src/ingest/isolated-diff/patch-integrator.test.ts src/ingest/isolated-diff/textual-conflict-resolver.test.ts src/ingest/ingest-bundle.runner.isolated-diff.test.ts src/ingest/report-snapshot.test.ts src/sl/tools/sl-write-source.tool.test.ts src/sl/tools/sl-edit-source.tool.test.ts +``` + +Current result: `12 passed`, `73 passed`. + +One v1-essential design gap remains. The spec's gate repair stage says that +cleanly applied trees that fail semantic or wiki gates get a bounded repair +agent before the run fails. Current code still fails immediately in two places: + +- `packages/context/src/ingest/isolated-diff/patch-integrator.ts` returns + `semantic_conflict` as soon as `validateAppliedTree()` rejects after a patch + applies cleanly. +- `packages/context/src/ingest/ingest-bundle.runner.ts` calls + `validateFinalIngestArtifacts()` inside `traceTimed()` and lets the error + abort the run without a repair attempt. + +## Scope + +This plan implements bounded gate repair for artifact gate failures only: + +- semantic gate failures after a patch applies cleanly; +- final artifact gate failures after reconciliation, deterministic + post-processing, and wiki `sl_refs` repair; +- repair counters and traces for attempts, repairs, and failures. + +This plan does not repair patch policy failures, target-policy failures, +textual Git conflicts, provenance validation failures, squash conflicts, +connector rollout gaps, default-path promotion, semantic auto-merge helpers, or +removal of the shared-worktree fallback path. + +## File structure + +- Create `packages/context/src/ingest/final-gate-repair.ts`. + Owns bounded repair-agent execution, scoped repair tools, allowed path + derivation, prompt text, and result types. +- Create `packages/context/src/ingest/final-gate-repair.test.ts`. + Covers allowed-path derivation, scoped read/write enforcement, successful + repair, and no-edit failure. +- Modify `packages/context/src/ingest/isolated-diff/patch-integrator.ts`. + Calls gate repair after clean patch application when artifact gates fail, + reruns gates, commits repaired files, and returns repair metadata. +- Modify `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`. + Adds semantic-gate repair success and failure coverage. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. + Wires final gate repair into the isolated-diff runner, commits repaired final + gate files before provenance validation, and updates isolated-diff counters. +- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Adds end-to-end coverage for repairable final wiki body references and a + failed no-edit repair. +- Modify `packages/context/src/ingest/reports.ts`. + Adds gate repair counters to `IngestReportBody.isolatedDiff`. +- Modify `packages/context/src/ingest/report-snapshot.ts`. + Parses gate repair counters from stored reports. +- Modify `packages/context/src/ingest/report-snapshot.test.ts`. + Covers stored gate repair counters. + +--- + +### Task 1: Add final gate repair unit tests + +**Files:** +- Create: `packages/context/src/ingest/final-gate-repair.test.ts` + +- [ ] **Step 1: Write the failing unit tests** + +Create `packages/context/src/ingest/final-gate-repair.test.ts`: + +```ts +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { FileIngestTraceWriter } from './ingest-trace.js'; +import { finalGateRepairPaths, repairFinalGateFailure } from './final-gate-repair.js'; + +async function makeHarness() { + const root = await mkdtemp(join(tmpdir(), 'ktx-final-gate-repair-')); + const workdir = join(root, 'workdir'); + await mkdir(join(workdir, 'wiki/global'), { recursive: true }); + await mkdir(join(workdir, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(workdir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\n---\n\nARR uses `mart_account_segments.total_contract_arr_cents`.\n', + 'utf-8', + ); + await writeFile( + join(workdir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + 'utf-8', + ); + const trace = new FileIngestTraceWriter({ + tracePath: join(root, 'trace.jsonl'), + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + runId: 'run-1', + syncId: 'sync-1', + level: 'trace', + }); + return { root, workdir, trace }; +} + +describe('finalGateRepairPaths', () => { + it('derives sorted wiki and semantic-layer file paths', () => { + expect( + finalGateRepairPaths({ + changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'], + touchedSlSources: [ + { connectionId: 'warehouse', sourceName: 'mart_account_segments' }, + { connectionId: 'warehouse', sourceName: 'orders' }, + { connectionId: 'warehouse', sourceName: 'orders' }, + ], + }), + ).toEqual([ + 'semantic-layer/warehouse/mart_account_segments.yaml', + 'semantic-layer/warehouse/orders.yaml', + 'wiki/global/account-segments.md', + 'wiki/global/overview.md', + ]); + }); +}); + +describe('repairFinalGateFailure', () => { + it('lets the repair agent read gate errors and edit only allowed files', async () => { + const { workdir, trace } = await makeHarness(); + const agentRunner = { + runLoop: vi.fn(async (params: any) => { + const error = await params.toolSet.read_gate_error.execute({}); + expect(error.markdown).toContain('total_contract_arr_cents'); + + const page = await params.toolSet.read_repair_file.execute({ + path: 'wiki/global/account-segments.md', + }); + expect(page.markdown).toContain('total_contract_arr_cents'); + + await expect( + params.toolSet.write_repair_file.execute({ + path: 'wiki/global/other.md', + content: 'not allowed', + }), + ).rejects.toThrow(/gate repair path not allowed/); + + await params.toolSet.write_repair_file.execute({ + path: 'wiki/global/account-segments.md', + content: page.markdown.replace('total_contract_arr_cents', 'total_contract_arr'), + }); + return { stopReason: 'natural' as const }; + }), + }; + + const result = await repairFinalGateFailure({ + agentRunner, + workdir, + gateError: + 'final artifact gates failed:\naccount-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents', + allowedPaths: ['wiki/global/account-segments.md'], + trace, + repairKind: 'final_artifact_gate', + maxAttempts: 1, + stepBudget: 8, + }); + + expect(result).toEqual({ + status: 'repaired', + attempts: 1, + changedPaths: ['wiki/global/account-segments.md'], + }); + await expect(readFile(join(workdir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.toContain( + 'total_contract_arr', + ); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_repaired'); + expect(agentRunner.runLoop).toHaveBeenCalledWith( + expect.objectContaining({ + modelRole: 'repair', + stepBudget: 8, + telemetryTags: expect.objectContaining({ + operationName: 'ingest-isolated-diff-gate-repair', + repairKind: 'final_artifact_gate', + }), + }), + ); + }); + + it('returns failed when the repair agent edits no allowed file', async () => { + const { workdir, trace } = await makeHarness(); + const result = await repairFinalGateFailure({ + agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) }, + workdir, + gateError: 'final artifact gates failed:\naccount-segments: unknown semantic-layer entity', + allowedPaths: ['wiki/global/account-segments.md'], + trace, + repairKind: 'final_artifact_gate', + maxAttempts: 1, + stepBudget: 8, + }); + + expect(result).toEqual({ + status: 'failed', + attempts: 1, + reason: 'gate repair completed without editing an allowed path', + }); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_failed'); + }); +}); +``` + +- [ ] **Step 2: Run the tests to verify they fail** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/final-gate-repair.test.ts +``` + +Expected: FAIL because `./final-gate-repair.js` does not exist. + +- [ ] **Step 3: Commit the failing tests** + +Run: + +```bash +git add packages/context/src/ingest/final-gate-repair.test.ts +git commit -m "test(ingest): cover isolated diff gate repair" +``` + +### Task 2: Implement the final gate repair module + +**Files:** +- Create: `packages/context/src/ingest/final-gate-repair.ts` +- Test: `packages/context/src/ingest/final-gate-repair.test.ts` + +- [ ] **Step 1: Add the repair module** + +Create `packages/context/src/ingest/final-gate-repair.ts`: + +```ts +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { z } from 'zod'; +import type { AgentRunnerPort, KtxRuntimeToolSet } from '../llm/index.js'; +import type { TouchedSlSource } from '../tools/index.js'; +import type { IngestTraceWriter } from './ingest-trace.js'; +import { traceTimed } from './ingest-trace.js'; + +export type FinalGateRepairKind = 'patch_semantic_gate' | 'final_artifact_gate'; + +export type FinalGateRepairResult = + | { status: 'repaired'; attempts: number; changedPaths: string[] } + | { status: 'failed'; attempts: number; reason: string }; + +export interface RepairFinalGateFailureInput { + agentRunner: AgentRunnerPort; + workdir: string; + gateError: string; + allowedPaths: string[]; + trace: IngestTraceWriter; + repairKind: FinalGateRepairKind; + maxAttempts?: number; + stepBudget?: number; +} + +const readRepairFileSchema = z.object({ + path: z.string().min(1), +}); + +const writeRepairFileSchema = z.object({ + path: z.string().min(1), + content: z.string(), +}); + +function normalizeRepoPath(path: string): string { + const normalized = path.replace(/\\/g, '/').replace(/^\/+/, ''); + const parts = normalized.split('/').filter((part) => part.length > 0); + if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) { + throw new Error(`gate repair path must be a repository-relative path: ${path}`); + } + return parts.join('/'); +} + +function assertAllowedPath(path: string, allowedPaths: ReadonlySet): string { + const normalized = normalizeRepoPath(path); + if (!allowedPaths.has(normalized)) { + throw new Error(`gate repair path not allowed: ${normalized}`); + } + return normalized; +} + +async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> { + try { + return { exists: true, content: await readFile(path, 'utf-8') }; + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return { exists: false, content: '' }; + } + throw error; + } +} + +function buildGateRepairSystemPrompt(): string { + return ` +You repair one KTX isolated-diff artifact gate failure inside the integration worktree. + + + +- Use read_gate_error first. +- Read only files exposed by read_repair_file. +- Edit only paths exposed by write_repair_file. +- Prefer the smallest text edit that makes the gate pass. +- Preserve accepted work-unit, reconciliation, and deterministic projection content. +- Do not invent warehouse facts, business definitions, or semantic-layer entities. +- If the gate error requires choosing between conflicting facts without evidence, stop without editing. +`; +} + +function buildGateRepairUserPrompt(input: { + gateError: string; + allowedPaths: string[]; + repairKind: FinalGateRepairKind; + attempt: number; + maxAttempts: number; +}): string { + return `Repair isolated-diff artifact gates. + +Repair kind: ${input.repairKind} +Attempt: ${input.attempt} of ${input.maxAttempts} + +Allowed files: +${input.allowedPaths.map((path) => `- ${path}`).join('\n')} + +Gate error: +${input.gateError} + +Use read_gate_error first. Then inspect only the allowed files, write the +minimal repaired content, and stop.`; +} + +function buildToolSet(input: { + workdir: string; + gateError: string; + allowedPaths: ReadonlySet; + editedPaths: Set; +}): KtxRuntimeToolSet { + return { + read_gate_error: { + name: 'read_gate_error', + description: 'Read the artifact gate failure that must be repaired.', + inputSchema: z.object({}), + execute: async () => ({ + markdown: input.gateError, + structured: { gateError: input.gateError }, + }), + }, + read_repair_file: { + name: 'read_repair_file', + description: 'Read one allowed file from the integration worktree.', + inputSchema: readRepairFileSchema, + execute: async ({ path }: z.infer) => { + const normalized = assertAllowedPath(path, input.allowedPaths); + const file = await readOptionalFile(join(input.workdir, normalized)); + return { + markdown: file.exists ? file.content : `(missing file: ${normalized})`, + structured: { path: normalized, exists: file.exists }, + }; + }, + }, + write_repair_file: { + name: 'write_repair_file', + description: 'Replace one allowed integration worktree file with repaired text content.', + inputSchema: writeRepairFileSchema, + execute: async ({ path, content }: z.infer) => { + const normalized = assertAllowedPath(path, input.allowedPaths); + const fullPath = join(input.workdir, normalized); + await mkdir(dirname(fullPath), { recursive: true }); + await writeFile(fullPath, content, 'utf-8'); + input.editedPaths.add(normalized); + return { + markdown: `Wrote ${normalized}`, + structured: { path: normalized, bytes: Buffer.byteLength(content) }, + }; + }, + }, + }; +} + +export function finalGateRepairPaths(input: { + changedWikiPageKeys: string[]; + touchedSlSources: TouchedSlSource[]; +}): string[] { + return [ + ...new Set([ + ...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`), + ...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`), + ]), + ].sort(); +} + +export async function repairFinalGateFailure( + input: RepairFinalGateFailureInput, +): Promise { + const allowedPaths = new Set(input.allowedPaths.map(normalizeRepoPath)); + const maxAttempts = input.maxAttempts ?? 1; + const stepBudget = input.stepBudget ?? 16; + let lastFailure = 'gate repair did not run'; + + for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { + const editedPaths = new Set(); + const sortedAllowedPaths = [...allowedPaths].sort(); + const traceData = { + repairKind: input.repairKind, + attempt, + maxAttempts, + allowedPaths: sortedAllowedPaths, + gateError: input.gateError, + }; + const result = await traceTimed(input.trace, 'gate_repair', 'gate_repair', traceData, async () => + input.agentRunner.runLoop({ + modelRole: 'repair', + systemPrompt: buildGateRepairSystemPrompt(), + userPrompt: buildGateRepairUserPrompt({ + gateError: input.gateError, + allowedPaths: sortedAllowedPaths, + repairKind: input.repairKind, + attempt, + maxAttempts, + }), + toolSet: buildToolSet({ + workdir: input.workdir, + gateError: input.gateError, + allowedPaths, + editedPaths, + }), + stepBudget, + telemetryTags: { + operationName: 'ingest-isolated-diff-gate-repair', + source: input.trace.context.sourceKey, + jobId: input.trace.context.jobId, + repairKind: input.repairKind, + }, + }), + ); + + if (result.stopReason === 'error') { + lastFailure = result.error?.message ?? 'gate repair agent loop errored'; + await input.trace.event('error', 'gate_repair', 'gate_repair_failed', traceData, result.error); + continue; + } + + const changedPaths = [...editedPaths].sort(); + if (changedPaths.length === 0) { + lastFailure = 'gate repair completed without editing an allowed path'; + await input.trace.event('error', 'gate_repair', 'gate_repair_failed', { + ...traceData, + reason: lastFailure, + }); + continue; + } + + await input.trace.event('debug', 'gate_repair', 'gate_repair_repaired', { + ...traceData, + changedPaths, + }); + return { status: 'repaired', attempts: attempt, changedPaths }; + } + + return { status: 'failed', attempts: maxAttempts, reason: lastFailure }; +} +``` + +- [ ] **Step 2: Run the repair module tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/final-gate-repair.test.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Commit the repair module** + +Run: + +```bash +git add packages/context/src/ingest/final-gate-repair.ts packages/context/src/ingest/final-gate-repair.test.ts +git commit -m "feat(ingest): add isolated diff gate repair agent" +``` + +### Task 3: Repair patch-level semantic gate failures + +**Files:** +- Modify: `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts` +- Modify: `packages/context/src/ingest/isolated-diff/patch-integrator.ts` +- Test: `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts` + +- [ ] **Step 1: Add patch integrator repair regressions** + +Append these tests inside +`describe('integrateWorkUnitPatch', ...)` in +`packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`: + +```ts + it('repairs semantic gate failures after a patch applies cleanly', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child-semantic-repair'); + await git.addWorktree(childDir, 'child-semantic-repair', baseSha); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'bad semantic ref\n'); + await childGit.commitFiles(['wiki/global/a.md'], 'bad semantic edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/semantic-repair.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-repair/trace.jsonl'), + jobId: 'job-semantic-repair', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + const validateAppliedTree = vi + .fn() + .mockRejectedValueOnce(new Error('final artifact gates failed:\na: unknown semantic-layer entity')) + .mockResolvedValueOnce(undefined); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-repairable', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree, + slDisallowed: false, + allowedTargetConnectionIds: new Set(['c1']), + repairGateFailure: vi.fn(async (context) => { + expect(context).toMatchObject({ + unitKey: 'wu-repairable', + patchPath, + touchedPaths: ['wiki/global/a.md'], + }); + await writeFile(join(configDir, 'wiki/global/a.md'), 'repaired semantic ref\n', 'utf-8'); + return { + status: 'repaired' as const, + attempts: 1, + changedPaths: ['wiki/global/a.md'], + }; + }), + }); + + expect(result).toMatchObject({ + status: 'accepted', + touchedPaths: ['wiki/global/a.md'], + gateRepair: { + status: 'repaired', + attempts: 1, + changedPaths: ['wiki/global/a.md'], + }, + }); + expect(validateAppliedTree).toHaveBeenCalledTimes(2); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('repaired semantic ref\n'); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_accepted_after_gate_repair'); + }); + + it('keeps the pre-apply tree when semantic gate repair fails', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child-semantic-repair-fails'); + await git.addWorktree(childDir, 'child-semantic-repair-fails', baseSha); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'bad semantic ref\n'); + await childGit.commitFiles(['wiki/global/a.md'], 'bad semantic edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/semantic-repair-fails.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-repair-fails/trace.jsonl'), + jobId: 'job-semantic-repair-fails', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-not-repaired', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockRejectedValue(new Error('final artifact gates failed')), + slDisallowed: false, + allowedTargetConnectionIds: new Set(['c1']), + repairGateFailure: vi.fn(async () => ({ + status: 'failed' as const, + attempts: 1, + reason: 'gate repair completed without editing an allowed path', + })), + }); + + expect(result).toMatchObject({ + status: 'semantic_conflict', + gateRepair: { + status: 'failed', + attempts: 1, + reason: 'gate repair completed without editing an allowed path', + }, + }); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('old\n'); + }); +``` + +- [ ] **Step 2: Run the patch integrator tests to verify they fail** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts +``` + +Expected: FAIL because `integrateWorkUnitPatch()` does not accept +`repairGateFailure` and does not return `gateRepair`. + +- [ ] **Step 3: Add gate repair metadata to the patch integrator** + +Modify `packages/context/src/ingest/isolated-diff/patch-integrator.ts`: + +```ts +import type { FinalGateRepairResult } from '../final-gate-repair.js'; +``` + +Replace the `PatchIntegrationResult` type with: + +```ts +export type PatchIntegrationResult = + | { + status: 'accepted'; + commitSha: string; + touchedPaths: string[]; + textualResolution?: PatchIntegrationTextualResolution; + gateRepair?: FinalGateRepairResult; + } + | { + status: 'textual_conflict'; + reason: string; + touchedPaths: string[]; + textualResolution?: PatchIntegrationTextualResolution; + gateRepair?: FinalGateRepairResult; + } + | { + status: 'semantic_conflict'; + reason: string; + touchedPaths: string[]; + textualResolution?: PatchIntegrationTextualResolution; + gateRepair?: FinalGateRepairResult; + }; +``` + +Add this optional callback to `IntegrateWorkUnitPatchInput`: + +```ts + repairGateFailure?(input: { + unitKey: string; + patchPath: string; + touchedPaths: string[]; + reason: string; + }): Promise; +``` + +Replace the current `catch` block for the non-textual +`semantic_gate` section with this block: + +```ts + } catch (error) { + const reason = errorMessage(error); + await input.trace.event('error', 'integration', 'patch_semantic_conflict', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason, + }); + + if (input.repairGateFailure) { + const gateRepair = await input.repairGateFailure({ + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason, + }); + + if (gateRepair.status === 'failed') { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + return { + status: 'semantic_conflict', + reason: gateRepair.reason, + touchedPaths, + gateRepair, + }; + } + + try { + await traceTimed( + input.trace, + 'integration', + 'semantic_gate_after_gate_repair', + { unitKey: input.unitKey, touchedPaths: gateRepair.changedPaths }, + async () => { + await input.validateAppliedTree(gateRepair.changedPaths); + }, + ); + } catch (repairValidationError) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + return { + status: 'semantic_conflict', + reason: errorMessage(repairValidationError), + touchedPaths: gateRepair.changedPaths, + gateRepair, + }; + } + + const commit = await input.integrationGit.commitFiles( + gateRepair.changedPaths, + `ingest: repair WorkUnit ${input.unitKey} gates`, + input.author.name, + input.author.email, + ); + if (!commit.created) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + return { + status: 'semantic_conflict', + reason: 'gate repair produced no committable changes', + touchedPaths: gateRepair.changedPaths, + gateRepair, + }; + } + + await input.trace.event('debug', 'integration', 'patch_accepted_after_gate_repair', { + unitKey: input.unitKey, + commitSha: commit.commitHash, + touchedPaths: gateRepair.changedPaths, + attempts: gateRepair.attempts, + }); + return { + status: 'accepted', + commitSha: commit.commitHash, + touchedPaths: gateRepair.changedPaths, + gateRepair, + }; + } + + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + return { + status: 'semantic_conflict', + reason, + touchedPaths, + }; + } +``` + +- [ ] **Step 4: Run the patch integrator tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit patch-level gate repair** + +Run: + +```bash +git add packages/context/src/ingest/isolated-diff/patch-integrator.ts packages/context/src/ingest/isolated-diff/patch-integrator.test.ts +git commit -m "feat(ingest): repair isolated diff semantic gate failures" +``` + +### Task 4: Wire final gate repair into the runner + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Modify: `packages/context/src/ingest/reports.ts` +- Modify: `packages/context/src/ingest/report-snapshot.ts` +- Modify: `packages/context/src/ingest/report-snapshot.test.ts` + +- [ ] **Step 1: Add report fields and parser coverage** + +In `packages/context/src/ingest/reports.ts`, extend +`IngestReportBody.isolatedDiff`: + +```ts + gateRepairAttempts?: number; + gateRepairs?: number; + gateRepairFailures?: number; +``` + +In `packages/context/src/ingest/report-snapshot.ts`, extend the +`isolatedDiff` schema: + +```ts + gateRepairAttempts: z.number().int().min(0).default(0), + gateRepairs: z.number().int().min(0).default(0), + gateRepairFailures: z.number().int().min(0).default(0), +``` + +Append this test to `packages/context/src/ingest/report-snapshot.test.ts`: + +```ts + it('parses isolated-diff gate repair counters', () => { + const snapshot = parseIngestReportSnapshot({ + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-18T00:00:00.000Z', + body: { + status: 'completed', + syncId: 'sync-1', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: 'abc123', + isolatedDiff: { + enabled: true, + acceptedPatches: 1, + textualConflicts: 0, + semanticConflicts: 1, + gateRepairAttempts: 1, + gateRepairs: 1, + gateRepairFailures: 0, + }, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }); + + expect(snapshot.body.isolatedDiff).toMatchObject({ + gateRepairAttempts: 1, + gateRepairs: 1, + gateRepairFailures: 0, + }); + }); +``` + +- [ ] **Step 2: Run report snapshot tests to verify they pass** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/report-snapshot.test.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Import the gate repair module in the runner** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, add: + +```ts +import { finalGateRepairPaths, repairFinalGateFailure } from './final-gate-repair.js'; +``` + +- [ ] **Step 4: Add gate repair counters to the isolated summary** + +In the `isolatedDiffSummary` object, add: + +```ts + gateRepairAttempts: 0, + gateRepairs: 0, + gateRepairFailures: 0, +``` + +- [ ] **Step 5: Pass patch-level gate repair to `integrateWorkUnitPatch()`** + +In the `integrateWorkUnitPatch()` call, add this callback next to +`resolveTextualConflict`: + +```ts + repairGateFailure: (context) => + repairFinalGateFailure({ + agentRunner: this.deps.agentRunner, + workdir: sessionWorktree.workdir, + gateError: context.reason, + allowedPaths: context.touchedPaths, + trace: runTrace, + repairKind: 'patch_semantic_gate', + maxAttempts: 1, + stepBudget: 16, + }), +``` + +After the existing `integration.textualResolution` counter block, add: + +```ts + if (integration.gateRepair) { + isolatedDiffSummary.gateRepairAttempts += integration.gateRepair.attempts; + if (integration.gateRepair.status === 'repaired') { + isolatedDiffSummary.semanticConflicts += 1; + isolatedDiffSummary.gateRepairs += 1; + } else { + isolatedDiffSummary.gateRepairFailures += 1; + } + } +``` + +- [ ] **Step 6: Replace final artifact gate throw-through with bounded repair** + +Replace the current `await traceTimed(... 'final_artifact_gates' ...)` block in +`packages/context/src/ingest/ingest-bundle.runner.ts` with: + +```ts + try { + await traceTimed( + runTrace, + 'final_gates', + 'final_artifact_gates', + finalArtifactGateTraceData, + async () => { + await validateFinalIngestArtifacts({ + connectionIds: repairConnectionIds, + changedWikiPageKeys: finalChangedWikiPageKeys, + touchedSlSources: finalTouchedSlSources, + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + validateTouchedSources: (touched) => + validateWuTouchedSources( + { + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + connections: this.deps.connections, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + probeRowCount: this.deps.settings.probeRowCount, + slValidator: this.deps.slValidator, + }, + touched, + ), + tableExists: (connectionId, tableRef) => + this.tableRefExistsInSemanticLayer( + this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + [connectionId], + tableRef, + ), + }); + }, + ); + } catch (error) { + const gateError = this.errorMessage(error); + const repairPaths = finalGateRepairPaths({ + changedWikiPageKeys: finalChangedWikiPageKeys, + touchedSlSources: finalTouchedSlSources, + }); + const gateRepair = await repairFinalGateFailure({ + agentRunner: this.deps.agentRunner, + workdir: sessionWorktree.workdir, + gateError, + allowedPaths: repairPaths, + trace: runTrace, + repairKind: 'final_artifact_gate', + maxAttempts: 1, + stepBudget: 16, + }); + + isolatedDiffSummary.gateRepairAttempts += gateRepair.attempts; + if (gateRepair.status === 'failed') { + isolatedDiffSummary.gateRepairFailures += 1; + activeFailureDetails = { + ...finalArtifactGateTraceData, + gateRepair, + gateError, + }; + throw new Error(`${gateError}\ngate repair failed: ${gateRepair.reason}`); + } + + isolatedDiffSummary.gateRepairs += 1; + await traceTimed( + runTrace, + 'final_gates', + 'final_artifact_gates_after_gate_repair', + { + ...finalArtifactGateTraceData, + repairedPaths: gateRepair.changedPaths, + }, + async () => { + await validateFinalIngestArtifacts({ + connectionIds: repairConnectionIds, + changedWikiPageKeys: finalChangedWikiPageKeys, + touchedSlSources: finalTouchedSlSources, + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + validateTouchedSources: (touched) => + validateWuTouchedSources( + { + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + connections: this.deps.connections, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + probeRowCount: this.deps.settings.probeRowCount, + slValidator: this.deps.slValidator, + }, + touched, + ), + tableExists: (connectionId, tableRef) => + this.tableRefExistsInSemanticLayer( + this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + [connectionId], + tableRef, + ), + }); + }, + ); + + const repairCommit = await sessionWorktree.git.commitFiles( + gateRepair.changedPaths, + `ingest(${job.sourceKey}): repair final gates syncId=${syncId}`, + this.deps.storage.systemGitAuthor.name, + this.deps.storage.systemGitAuthor.email, + ); + if (!repairCommit.created) { + isolatedDiffSummary.gateRepairFailures += 1; + throw new Error('final gate repair produced no committable changes'); + } + await runTrace.event('debug', 'final_gates', 'final_gate_repair_committed', { + commitSha: repairCommit.commitHash, + repairedPaths: gateRepair.changedPaths, + }); + } +``` + +- [ ] **Step 7: Run the runner and report tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts src/ingest/report-snapshot.test.ts +``` + +Expected: PASS. + +- [ ] **Step 8: Commit runner wiring and report fields** + +Run: + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.ts packages/context/src/ingest/reports.ts packages/context/src/ingest/report-snapshot.ts packages/context/src/ingest/report-snapshot.test.ts +git commit -m "feat(ingest): wire isolated diff gate repair" +``` + +### Task 5: Add isolated runner gate repair regressions + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +- Test: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Add a final gate repair success regression** + +Append this test inside +`describe('IngestBundleRunner isolated diff path', ...)`: + +```ts + it('repairs final wiki body refs before squash when the repair agent edits the scoped page', async () => { + const runtime = await makeRealGitRuntime(); + try { + await mkdir(join(runtime.configDir, 'semantic-layer/warehouse'), { recursive: true }); + await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr_cents\n expr: sum(contract_arr)\n', + ); + await writeFile( + join(runtime.configDir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\n---\n\nExisting ARR uses `mart_account_segments.total_contract_arr_cents`.\n', + ); + await runtime.git.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'], + 'seed stale wiki body ref', + 'KTX Test', + 'system@ktx.local', + ); + + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'source-only', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-isolated-diff-gate-repair') { + const gateError = await params.toolSet.read_gate_error.execute({}); + expect(gateError.markdown).toContain('total_contract_arr_cents'); + const page = await params.toolSet.read_repair_file.execute({ + path: 'wiki/global/account-segments.md', + }); + await params.toolSet.write_repair_file.execute({ + path: 'wiki/global/account-segments.md', + content: page.markdown.replace('total_contract_arr_cents', 'total_contract_arr'), + }); + return { stopReason: 'natural' as const }; + } + if (params.modelRole === 'reconcile') { + return { stopReason: 'natural' as const }; + } + + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + detail: 'Rename ARR measure', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml'], + 'wu source rename', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' as const }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]); + + const result = await runner.run({ + jobId: 'job-final-gate-repair', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }); + + expect(result.commitSha).toBeTruthy(); + await expect(readFile(join(runtime.configDir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.toContain( + 'mart_account_segments.total_contract_arr', + ); + await expect(readFile(join(runtime.configDir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.not.toContain( + 'total_contract_arr_cents', + ); + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0] as any; + expect(reportCreate.body.isolatedDiff).toMatchObject({ + gateRepairAttempts: 1, + gateRepairs: 1, + gateRepairFailures: 0, + }); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-final-gate-repair/trace.jsonl'), 'utf-8'); + expect(trace).toContain('gate_repair_repaired'); + expect(trace).toContain('final_artifact_gates_after_gate_repair_finished'); + expect(trace).toContain('final_gate_repair_committed'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 2: Add a final gate repair no-edit failure regression** + +Append this test inside the same `describe(...)` block: + +```ts + it('fails before squash when final gate repair makes no edit', async () => { + const runtime = await makeRealGitRuntime(); + try { + await mkdir(join(runtime.configDir, 'semantic-layer/warehouse'), { recursive: true }); + await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr_cents\n expr: sum(contract_arr)\n', + ); + await writeFile( + join(runtime.configDir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\n---\n\nExisting ARR uses `mart_account_segments.total_contract_arr_cents`.\n', + ); + await runtime.git.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'], + 'seed stale wiki body ref', + 'KTX Test', + 'system@ktx.local', + ); + const preRunHead = await runtime.git.revParseHead(); + + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'source-only', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-isolated-diff-gate-repair') { + return { stopReason: 'natural' as const }; + } + if (params.modelRole === 'reconcile') { + return { stopReason: 'natural' as const }; + } + + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + detail: 'Rename ARR measure', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml'], + 'wu source rename', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' as const }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]); + + await expect( + runner.run({ + jobId: 'job-final-gate-repair-fails', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/gate repair completed without editing an allowed path/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0] as any; + expect(reportCreate.body.status).toBe('failed'); + expect(reportCreate.body.isolatedDiff).toMatchObject({ + gateRepairAttempts: 1, + gateRepairs: 0, + gateRepairFailures: 1, + }); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-final-gate-repair-fails/trace.jsonl'), 'utf-8'); + expect(trace).toContain('gate_repair_failed'); + expect(trace).not.toContain('squash_finished'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 3: Run the isolated runner tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts +``` + +Expected: PASS. + +- [ ] **Step 4: Commit runner regressions** + +Run: + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +git commit -m "test(ingest): verify isolated diff final gate repair" +``` + +### Task 6: Final verification + +**Files:** +- Verify: `packages/context/src/ingest/final-gate-repair.ts` +- Verify: `packages/context/src/ingest/final-gate-repair.test.ts` +- Verify: `packages/context/src/ingest/isolated-diff/patch-integrator.ts` +- Verify: `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +- Verify: `packages/context/src/ingest/reports.ts` +- Verify: `packages/context/src/ingest/report-snapshot.ts` +- Verify: `packages/context/src/ingest/report-snapshot.test.ts` + +- [ ] **Step 1: Run focused tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/final-gate-repair.test.ts src/ingest/isolated-diff/patch-integrator.test.ts src/ingest/ingest-bundle.runner.isolated-diff.test.ts src/ingest/report-snapshot.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run the existing isolated-diff safety suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-trace.test.ts src/ingest/wiki-body-refs.test.ts src/ingest/artifact-gates.test.ts src/ingest/semantic-layer-target-policy.test.ts src/ingest/isolated-diff/git-patch.test.ts src/ingest/isolated-diff/work-unit-executor.test.ts src/ingest/isolated-diff/patch-integrator.test.ts src/ingest/isolated-diff/textual-conflict-resolver.test.ts src/ingest/ingest-bundle.runner.isolated-diff.test.ts src/ingest/report-snapshot.test.ts src/sl/tools/sl-write-source.tool.test.ts src/sl/tools/sl-edit-source.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Run package type checks** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 4: Run dead-code checks** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS or only pre-existing findings unrelated to these files. + +- [ ] **Step 5: Run formatting and diff checks** + +Run: + +```bash +pnpm exec prettier --check packages/context/src/ingest/final-gate-repair.ts packages/context/src/ingest/final-gate-repair.test.ts packages/context/src/ingest/isolated-diff/patch-integrator.ts packages/context/src/ingest/isolated-diff/patch-integrator.test.ts packages/context/src/ingest/ingest-bundle.runner.ts packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts packages/context/src/ingest/reports.ts packages/context/src/ingest/report-snapshot.ts packages/context/src/ingest/report-snapshot.test.ts docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-gate-repair.md +git diff --check +``` + +Expected: PASS. + +- [ ] **Step 6: Commit final verification adjustments** + +If verification required formatting or type-only adjustments, run: + +```bash +git add packages/context/src/ingest/final-gate-repair.ts packages/context/src/ingest/final-gate-repair.test.ts packages/context/src/ingest/isolated-diff/patch-integrator.ts packages/context/src/ingest/isolated-diff/patch-integrator.test.ts packages/context/src/ingest/ingest-bundle.runner.ts packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts packages/context/src/ingest/reports.ts packages/context/src/ingest/report-snapshot.ts packages/context/src/ingest/report-snapshot.test.ts docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-gate-repair.md +git commit -m "chore(ingest): verify isolated diff gate repair" +``` + +Expected: commit is created only when Step 1 through Step 5 produced tracked +source changes after the previous task commits. + +## Self-review + +Spec coverage: + +- The plan implements the remaining gate repair stage from the spec. +- Patch-level semantic gate failures get one bounded repair attempt after the + patch applies cleanly. +- Final composed-tree artifact gate failures get one bounded repair attempt + before provenance validation and squash. +- Repair tools are scoped to touched wiki and semantic-layer files. +- Target-policy, patch-policy, textual conflict, provenance, and squash + failures remain non-repairable in this plan. +- Connector rollout, default promotion, old-path removal, and deterministic + semantic merge helpers remain non-v1 follow-up work. + +Placeholder scan: + +- No deferred implementation markers remain. +- Every code-changing step includes concrete code or exact insertion snippets. + +Type consistency: + +- The report field names are `gateRepairAttempts`, `gateRepairs`, and + `gateRepairFailures` in `reports.ts`, `report-snapshot.ts`, runner code, and + tests. +- The repair result type is `FinalGateRepairResult`. +- The repair function is `repairFinalGateFailure()`. +- The path helper is `finalGateRepairPaths()`. diff --git a/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-shared-worktree-removal.md b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-shared-worktree-removal.md new file mode 100644 index 00000000..980ef8f3 --- /dev/null +++ b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-shared-worktree-removal.md @@ -0,0 +1,980 @@ +# Isolated Diff Ingestion V1 Shared Worktree Removal Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use +> superpowers:subagent-driven-development (recommended) or +> superpowers:executing-plans to implement this plan task-by-task. Steps use +> checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Remove the old shared-worktree WorkUnit execution path so every +non-override bundle ingest uses isolated WorkUnit diffs. + +**Architecture:** Keep `IngestBundleRunner` with one non-override execution +path: raw snapshot, optional deterministic projection, child WorkUnit +worktrees, patch integration, reconciliation, final gates, provenance +validation, and squash. Delete the private fallback routing setting and all +legacy tests, traces, and agent instructions that existed only for shared +WorkUnit state. + +**Tech Stack:** TypeScript, Vitest, pnpm, KTX ingest runner, Git worktrees. + +--- + +## Audit summary + +This audit read the original design in +`docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md`, every +implemented plan matching +`docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-*.md` and +`docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-*.md`, and the +current implementation under `packages/context/src/ingest/`, +`packages/context/prompts/`, and `packages/context/skills/`. + +Implemented v1 rollout coverage: + +- Rollout steps 1 and 2 exist in code: isolated child worktrees, binary + no-rename patch collection, and `git apply --3way --index` patch integration. +- Rollout step 3 exists in code: + `packages/context/src/ingest/isolated-diff/textual-conflict-resolver.ts` is + wired through the patch integrator and runner. +- Rollout steps 4, 5, and 6 exist in code: final wiki and semantic-layer gates, + provenance validation before squash, target policy checks, bounded gate + repair, failed reports, and trace counters. +- Rollout step 7 exists in code: the Metabase stale body-reference regression + is covered in `ingest-bundle.runner.isolated-diff.test.ts`. +- Rollout step 8 is committed: Notion, LookML, Looker, dbt, and MetricFlow + route through isolated child worktrees, and MetricFlow projection runs before + WorkUnits. +- Rollout step 10 is committed: non-override ingests default to isolated diffs, + and the old branch is reachable only through the private + `sharedWorktreeSourceKeys` fallback setting. + +## Remaining gaps + +The remaining v1-blocking gaps are all part of rollout step 11: + +- `packages/context/src/ingest/ports.ts` still exposes the private + `sharedWorktreeSourceKeys?: string[]` setting. +- `packages/context/src/ingest/isolated-diff/source-routing.ts` and its test + exist only to support the fallback setting. +- `packages/context/src/ingest/local-bundle-runtime.ts` still installs + `sharedWorktreeSourceKeys: []`. +- `packages/context/src/ingest/ingest-bundle.runner.ts` still checks + `isSharedWorktreeFallbackEnabled()` and contains the + `shared_worktree_path_enabled` branch that runs WorkUnits against the mutable + integration worktree. +- `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + still has a regression proving the shared-worktree fallback is reachable. +- `packages/context/src/ingest/ingest-bundle.runner.test.ts` keeps broad runner + tests on the legacy path through `sharedWorktreeSourceKeys`; those tests must + either use the isolated mock harness or move coverage into the real-git + isolated suite. +- `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md` and + `packages/context/skills/ingest_triage/SKILL.md` still tell WorkUnit agents + that prior WorkUnit writes in the same job are visible in the current working + branch. That instruction is false after isolated diffs and must be removed + with the shared path. + +Non-blocking gaps after this plan: + +- Rollout step 9 deterministic semantic merge helpers remain intentionally + deferred until resolver metrics show frequent mechanical repairs. +- Semantic-layer dependency expansion remains direct declared joins only; the + spec explicitly defers transitive SQL-projection closure. +- Provenance remains in the ingest provenance store and report body; moving it + to worktree files is a separate schema migration. +- Resolver context can later include richer transcript excerpts and explicit + overlap summaries for every previously applied patch. +- Failures before an ingest run row exists still have deterministic trace files + but no stored ingest report. + +## File structure + +- Modify `packages/context/src/ingest/ports.ts`. Remove the private fallback + setting from `IngestSettingsPort`. +- Modify `packages/context/src/ingest/local-bundle-runtime.ts`. Stop importing + and installing default shared-worktree fallback settings. +- Delete `packages/context/src/ingest/isolated-diff/source-routing.ts`. This + helper has no responsibility once fallback routing is removed. +- Delete `packages/context/src/ingest/isolated-diff/source-routing.test.ts`. + Its assertions exist only for the fallback helper. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. Delete + `isSharedWorktreeFallbackEnabled()`, the old shared-worktree WorkUnit branch, + and helper methods that only served that branch. +- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Remove fallback reachability coverage and add a stale-setting regression that + proves a runtime object cannot opt out of isolated diffs. +- Modify `packages/context/src/ingest/ingest-bundle.runner.test.ts`. Remove + the fallback setting from the broad test harness and make its mocked Git + session support no-op isolated patch collection. +- Modify `packages/context/src/ingest/local-bundle-runtime.test.ts`. Assert + local runtime settings do not contain the fallback key. +- Modify `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`. + Replace shared-branch WorkUnit visibility instructions with isolated-diff + instructions. +- Modify `packages/context/skills/ingest_triage/SKILL.md`. Remove Stage 3 + prior-WorkUnit visibility language and keep cross-WorkUnit sweep guidance in + Stage 4 reconciliation. + +--- + +### Task 1: Add removal-contract regressions + +**Files:** +- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Update the local runtime settings type** + +In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace +`RuntimeWithSettingsDeps` with: + +```ts +type RuntimeWithSettingsDeps = { + deps: { + settings: Record; + }; +}; +``` + +- [ ] **Step 2: Replace the local runtime fallback-setting assertion** + +In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace the test +named `defaults local bundle ingest to isolated diffs without an allowlist` with: + +```ts + it('defaults local bundle ingest to isolated diffs without a shared-worktree fallback setting', () => { + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + agentRunner: testAgentRunner(), + }); + + const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings; + + expect(settings).not.toHaveProperty('sharedWorktreeSourceKeys'); + expect(Object.keys(settings).sort()).toEqual([ + 'ingestTraceLevel', + 'memoryIngestionModel', + 'probeRowCount', + 'workUnitFailureMode', + 'workUnitMaxConcurrency', + 'workUnitStepBudget', + ]); + }); +``` + +- [ ] **Step 3: Remove the source-routing import from the isolated runner test** + +In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`, +delete this import: + +```ts +import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js'; +``` + +Then remove the `sharedWorktreeSourceKeys` line from the `settings` object in +`makeDeps()`: + +```ts + settings: { + memoryIngestionModel: 'test', + probeRowCount: 1, + ingestTraceLevel: 'trace', + ...settings, + }, +``` + +- [ ] **Step 4: Replace the shared fallback reachability test** + +In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`, +replace the test named +`keeps the shared-worktree path reachable through explicit private fallback settings` +with this stale-setting regression: + +```ts + it('does not support shared-worktree fallback settings', async () => { + const runtime = await makeRealGitRuntime(); + try { + const sourceKey = 'legacy-source'; + const staleSettings = { + sharedWorktreeSourceKeys: ['legacy-source'], + } as Partial & Record; + const { deps, adapter } = makeDeps(runtime, sourceKey, staleSettings); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'legacy-wiki', + rawFiles: ['legacy/page.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName !== 'ingest-bundle-wu') { + return { stopReason: 'natural' }; + } + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/legacy-isolated.md'), + '---\nsummary: Legacy isolated write\nusage_mode: auto\n---\n\nLegacy isolated write.\n', + 'utf-8', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'legacy-isolated', + detail: 'Legacy isolated write', + rawPaths: ['legacy/page.json'], + }); + await currentSession.gitService.commitFiles( + ['wiki/global/legacy-isolated.md'], + 'legacy isolated wiki', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['legacy/page.json', 'h1']], sourceKey); + + await expect( + runner.run({ + jobId: 'job-legacy-isolated', + connectionId: 'warehouse', + sourceKey, + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).resolves.toMatchObject({ + jobId: 'job-legacy-isolated', + failedWorkUnits: [], + workUnitCount: 1, + }); + + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces/job-legacy-isolated/trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('isolated_diff_enabled'); + expect(trace).toContain('work_unit_child_created'); + expect(trace).not.toContain('shared_worktree_path_enabled'); + + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0]; + const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined; + expect(reportBody?.isolatedDiff).toMatchObject({ + enabled: true, + acceptedPatches: 1, + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 5: Run the removal regressions and confirm they fail** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/local-bundle-runtime.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + -t "shared-worktree fallback|stale|defaults local bundle ingest|unlisted direct-writing source" +``` + +Expected: FAIL. The local runtime still exposes `sharedWorktreeSourceKeys`, and +the stale-setting runner test still reaches `shared_worktree_path_enabled`. + +--- + +### Task 2: Remove the fallback setting and routing module + +**Files:** +- Modify: `packages/context/src/ingest/ports.ts` +- Modify: `packages/context/src/ingest/local-bundle-runtime.ts` +- Delete: `packages/context/src/ingest/isolated-diff/source-routing.ts` +- Delete: `packages/context/src/ingest/isolated-diff/source-routing.test.ts` + +- [ ] **Step 1: Remove the fallback setting from the runner settings port** + +In `packages/context/src/ingest/ports.ts`, replace `IngestSettingsPort` with: + +```ts +export interface IngestSettingsPort { + memoryIngestionModel: string; + probeRowCount: number; + workUnitMaxConcurrency?: number; + workUnitStepBudget?: number; + workUnitFailureMode?: 'abort' | 'continue'; + ingestTraceLevel?: IngestTraceLevel; +} +``` + +- [ ] **Step 2: Remove the local runtime source-routing import** + +In `packages/context/src/ingest/local-bundle-runtime.ts`, delete this import: + +```ts +import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js'; +``` + +- [ ] **Step 3: Remove the local runtime fallback setting** + +In `packages/context/src/ingest/local-bundle-runtime.ts`, replace the settings +object with: + +```ts + settings: { + memoryIngestionModel: options.project.config.llm.models.default ?? 'local-ingest-model', + probeRowCount: 0, + workUnitMaxConcurrency: options.project.config.ingest.workUnits.maxConcurrency, + workUnitStepBudget: options.project.config.ingest.workUnits.stepBudget, + workUnitFailureMode: options.project.config.ingest.workUnits.failureMode, + ingestTraceLevel: ingestTraceLevelFromEnv(), + }, +``` + +- [ ] **Step 4: Delete the fallback routing helper files** + +Delete: + +```bash +git rm packages/context/src/ingest/isolated-diff/source-routing.ts +git rm packages/context/src/ingest/isolated-diff/source-routing.test.ts +``` + +- [ ] **Step 5: Confirm no fallback helper imports remain** + +Run: + +```bash +rg -n "defaultSharedWorktreeSourceKeys|isSharedWorktreeFallbackSourceKey|source-routing" packages/context/src +``` + +Expected: FAIL with no matches. `rg` exits with status 1 when the cleanup is +complete. + +--- + +### Task 3: Delete the shared-worktree runner branch + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` + +- [ ] **Step 1: Remove helper methods used only by the shared branch** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, delete these private +methods: + +```ts + private buildFailedWorkUnitOutcome(wu: WorkUnit, error: unknown): WorkUnitOutcome { + return { + unitKey: wu.unitKey, + status: 'failed', + reason: error instanceof Error ? error.message : String(error), + preSha: '', + postSha: '', + actions: [], + touchedSlSources: [], + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + }; + } + + private formatWorkUnitFailure(outcome: WorkUnitOutcome): string { + return `WorkUnit ${outcome.unitKey} failed: ${outcome.reason ?? 'unknown failure'}`; + } + + private isSharedWorktreeFallbackEnabled(sourceKey: string): boolean { + return (this.deps.settings.sharedWorktreeSourceKeys ?? []).includes(sourceKey); + } +``` + +- [ ] **Step 2: Make non-override isolated routing unconditional** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, replace: + +```ts + const isolatedDiffEnabled = !overrideReport && !this.isSharedWorktreeFallbackEnabled(job.sourceKey); +``` + +with: + +```ts + const isolatedDiffEnabled = !overrideReport; +``` + +Then replace: + +```ts + if (!overrideReport && isolatedDiffEnabled) { +``` + +with: + +```ts + if (!overrideReport) { +``` + +- [ ] **Step 3: Delete the old shared-worktree branch** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, delete the whole +branch that starts with: + +```ts + } else if (!overrideReport) { + await runTrace.event('info', 'routing', 'shared_worktree_path_enabled', { + sourceKey: job.sourceKey, + reason: 'explicit_private_fallback', + }); +``` + +and ends with: + +```ts + latestReportWorkUnits = this.toReportWorkUnits(stageIndex); + } +``` + +After the deletion, the surrounding code must read: + +```ts + } + + } + const carryForwardResult = + contextReport && this.deps.contextCandidateCarryforward + ? await this.deps.contextCandidateCarryforward.carryForward({ + runId: runRow.id, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + }) + : null; +``` + +- [ ] **Step 4: Confirm the branch trace event is gone** + +Run: + +```bash +rg -n "shared_worktree_path_enabled|explicit_private_fallback|isSharedWorktreeFallbackEnabled|sharedWorktreeSourceKeys" packages/context/src/ingest/ingest-bundle.runner.ts +``` + +Expected: FAIL with no matches. + +--- + +### Task 4: Update runner tests for isolated-only execution + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.test.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Remove the fallback setting from the broad runner test harness** + +In `packages/context/src/ingest/ingest-bundle.runner.test.ts`, replace the +`settings` block in `buildRunner()` with: + +```ts + settings: { + probeRowCount: 1, + memoryIngestionModel: 'test-model', + }, +``` + +- [ ] **Step 2: Add no-op isolated patch support to the broad mock Git** + +In `packages/context/src/ingest/ingest-bundle.runner.test.ts`, replace the +`scopedGit` object in `makeDeps()` with: + +```ts + const scopedGit = { + revParseHead: vi.fn().mockResolvedValue('h'), + commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }), + commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }), + resetHardTo: vi.fn(), + assertWorktreeClean: vi.fn().mockResolvedValue(undefined), + writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => { + await writeFile(patchPath, '', 'utf-8'); + }), + applyPatchFile3WayIndex: vi.fn(), + diffNameStatus: vi.fn().mockResolvedValue([]), + }; +``` + +- [ ] **Step 3: Update the custom sequencer test Git mock** + +In the test named +`refuses to squash-merge when the session worktree has an in-progress sequencer op`, +replace the `sessionGit` object with: + +```ts + const sessionGit = { + revParseHead: vi.fn().mockResolvedValue('h'), + commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }), + commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }), + resetHardTo: vi.fn(), + assertWorktreeClean: vi.fn().mockRejectedValue(assertError), + writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => { + await writeFile(patchPath, '', 'utf-8'); + }), + applyPatchFile3WayIndex: vi.fn(), + diffNameStatus: vi.fn().mockResolvedValue([]), + }; +``` + +- [ ] **Step 4: Move the failed-WorkUnit integration regression to the isolated suite** + +In `packages/context/src/ingest/ingest-bundle.runner.test.ts`, delete the test +named `squash-merges only successful WUs into main when one WU fails sl_validate`. + +In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`, +add this test near the other real-git isolated runner regressions: + +```ts + it('does not integrate failed isolated WorkUnit patches', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime, 'fake'); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + deps.diffSetService.compute = vi.fn().mockResolvedValue({ + added: ['good.raw', 'bad.raw'], + modified: [], + deleted: [], + unchanged: [], + }); + deps.slValidator.validateSingleSource = vi.fn( + async (_validationDeps: unknown, _connectionId: string, sourceName: string) => ({ + errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [], + warnings: [], + }), + ) as never; + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName !== 'ingest-bundle-wu') { + return { stopReason: 'natural' }; + } + const unitKey = params.telemetryTags.unitKey; + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + if (unitKey === 'wu-good') { + await writeFile(join(root, 'semantic-layer/warehouse/good.yaml'), 'name: good\n', 'utf-8'); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'good'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'good', + detail: 'good source', + targetConnectionId: 'warehouse', + rawPaths: ['good.raw'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/good.yaml'], + 'test: add good source', + 'KTX Test', + 'system@ktx.local', + ); + } + if (unitKey === 'wu-bad') { + await writeFile(join(root, 'semantic-layer/warehouse/bad.yaml'), 'name: bad\n', 'utf-8'); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'bad'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'bad', + detail: 'bad source', + targetConnectionId: 'warehouse', + rawPaths: ['bad.raw'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/bad.yaml'], + 'test: add bad source', + 'KTX Test', + 'system@ktx.local', + ); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles( + runner, + runtime, + [ + ['good.raw', 'good-hash'], + ['bad.raw', 'bad-hash'], + ], + 'fake', + ); + + const result = await runner.run({ + jobId: 'job-failed-wu-isolated', + connectionId: 'warehouse', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }); + + expect(result.failedWorkUnits).toEqual(['wu-bad']); + await expect(readFile(join(runtime.configDir, 'semantic-layer/warehouse/good.yaml'), 'utf-8')).resolves.toContain( + 'good', + ); + await expect(readFile(join(runtime.configDir, 'semantic-layer/warehouse/bad.yaml'), 'utf-8')).rejects.toThrow(); + + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0]; + const reportBody = reportCreate?.body as { isolatedDiff?: { acceptedPatches?: number }; failedWorkUnits?: string[] }; + expect(reportBody.failedWorkUnits).toEqual(['wu-bad']); + expect(reportBody.isolatedDiff).toMatchObject({ enabled: true, acceptedPatches: 1 }); + + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces/job-failed-wu-isolated/trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('work_unit_failed_before_patch'); + expect(trace).toContain('patch_accepted'); + expect(trace).not.toContain('shared_worktree_path_enabled'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 5: Run the updated focused runner tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + src/ingest/local-bundle-runtime.test.ts \ + -t "does not support shared-worktree|does not integrate failed isolated|defaults local bundle ingest|unlisted direct-writing source" +``` + +Expected: PASS. The traces contain `isolated_diff_enabled`, child worktree +events, and no `shared_worktree_path_enabled`. + +- [ ] **Step 6: Run the broad runner suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.test.ts +``` + +Expected: PASS. Broad runner coverage no longer depends on +`sharedWorktreeSourceKeys`. + +- [ ] **Step 7: Commit the runner removal** + +Run: + +```bash +git add \ + packages/context/src/ingest/ports.ts \ + packages/context/src/ingest/local-bundle-runtime.ts \ + packages/context/src/ingest/local-bundle-runtime.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/ingest-bundle.runner.test.ts \ + packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \ + packages/context/src/ingest/isolated-diff/source-routing.ts \ + packages/context/src/ingest/isolated-diff/source-routing.test.ts +git commit -m "refactor(ingest): remove shared worktree WorkUnit path" +``` + +Expected: commit succeeds. The deleted routing files are included as deletions. + +--- + +### Task 5: Remove shared-branch agent instructions + +**Files:** +- Modify: `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md` +- Modify: `packages/context/skills/ingest_triage/SKILL.md` +- Test: `packages/context/src/ingest/ingest-prompts.test.ts` +- Test: `packages/context/src/ingest/ingest-runtime-assets.test.ts` + +- [ ] **Step 1: Update the WorkUnit role text** + +In `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`, replace +the `` block with: + +```md + +You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit +gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, +Metabase card JSONs, Notion pages, or similar) and you must translate that +slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. +You run in an isolated WorkUnit worktree. Deterministic projection output, +existing project memory, and listed dependency paths are visible; sibling +WorkUnit edits from this same job are not visible until the runner integrates +accepted patches. + +``` + +- [ ] **Step 2: Update the WorkUnit workflow text** + +In the same prompt, replace workflow steps 2 and 4 with: + +```md +2. Load the per-source review skill first (for example `lookml_ingest`, + `metricflow_ingest`, or `dbt_ingest`), then `sl_capture` and + `wiki_capture`, and `ingest_triage` last. The triage skill tells you how to + react when existing project memory, deterministic projection output, or + prior provenance overlaps with what this WorkUnit is about to write. +4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large + files) to load content. Before writing a new SL source or wiki page, call + `discover_data` for each candidate source, table, metric, or topic name to + find existing wiki pages, SL sources, deterministic projection output, prior + sync artifacts, and raw warehouse matches; apply `ingest_triage` when you hit + one, and apply any matching canonical pin before deciding whether to edit, + rename, or skip. +``` + +- [ ] **Step 3: Update the WorkUnit do-not rule** + +In the same prompt, replace: + +```md +- Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`. +``` + +with: + +```md +- Do not silently accept a name collision with visible existing memory, + deterministic projection output, or prior provenance when the formula differs. + Trigger `ingest_triage`. +``` + +- [ ] **Step 4: Update ingest triage caller guidance** + +In `packages/context/skills/ingest_triage/SKILL.md`, replace: + +```md +This skill is loaded in two contexts: +- By a Stage 3 WorkUnit agent when `sl_discover` reveals that a prior WU (or a prior sync) already wrote something that overlaps with what the current WU is about to write. +- By the Stage 4 reconciliation agent for cross-WU sweeps and for eviction decisions. +``` + +with: + +```md +This skill is loaded in two contexts: +- By a Stage 3 WorkUnit agent when `sl_discover`, deterministic projection + output, existing project memory, or prior provenance overlaps with what the + current WorkUnit is about to write. +- By the Stage 4 reconciliation agent for cross-WorkUnit sweeps, accepted patch + overlap, and eviction decisions. +``` + +- [ ] **Step 5: Update same-ingest wording in ingest triage** + +In `packages/context/skills/ingest_triage/SKILL.md`, replace: + +```md +4. **If there's no prior-sync row (both are from THIS job), check for same-ingest contradictions:** +``` + +with: + +```md +4. **If reconciliation sees accepted patches from this same job with no +prior-sync row, check for same-ingest contradictions:** +``` + +- [ ] **Step 6: Search for stale shared-state prompt language** + +Run: + +```bash +rg -n "prior WU|prior-WU|Prior WorkUnits|same job may have already written|visible on the working branch|shared_worktree_path_enabled|shared-worktree path reachable" packages/context/prompts packages/context/skills packages/context/src/ingest +``` + +Expected: FAIL with no matches. + +- [ ] **Step 7: Run prompt asset tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-prompts.test.ts \ + src/ingest/ingest-runtime-assets.test.ts +``` + +Expected: PASS. Prompt assets still load from packaged KTX assets. + +- [ ] **Step 8: Commit the prompt cleanup** + +Run: + +```bash +git add \ + packages/context/prompts/memory_agent_bundle_ingest_work_unit.md \ + packages/context/skills/ingest_triage/SKILL.md +git commit -m "docs(ingest): align WorkUnit prompts with isolated diffs" +``` + +Expected: commit succeeds. + +--- + +### Task 6: Final verification + +**Files:** +- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Verify: `packages/context/src/ingest/ports.ts` +- Verify: `packages/context/src/ingest/local-bundle-runtime.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.test.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +- Verify: `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md` +- Verify: `packages/context/skills/ingest_triage/SKILL.md` + +- [ ] **Step 1: Run the isolated-diff focused suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-trace.test.ts \ + src/ingest/wiki-body-refs.test.ts \ + src/ingest/artifact-gates.test.ts \ + src/ingest/semantic-layer-target-policy.test.ts \ + src/ingest/isolated-diff/git-patch.test.ts \ + src/ingest/isolated-diff/work-unit-executor.test.ts \ + src/ingest/isolated-diff/patch-integrator.test.ts \ + src/ingest/isolated-diff/textual-conflict-resolver.test.ts \ + src/ingest/final-gate-repair.test.ts \ + src/ingest/report-snapshot.test.ts \ + src/ingest/ingest-bundle.runner.isolated-diff.test.ts +``` + +Expected: PASS. The output includes the isolated-diff runner tests and no +`source-routing.test.ts`. + +- [ ] **Step 2: Run the full context test suite** + +Run: + +```bash +pnpm --filter @ktx/context run test +``` + +Expected: PASS. + +- [ ] **Step 3: Run context type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. There are no `sharedWorktreeSourceKeys` type errors because the +setting no longer exists. + +- [ ] **Step 4: Run dead-code checks** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS. Knip does not report deleted source-routing exports, and Biome +does not report stale imports. + +- [ ] **Step 5: Search for removed legacy path names** + +Run: + +```bash +rg -n "sharedWorktreeSourceKeys|defaultSharedWorktreeSourceKeys|isSharedWorktreeFallbackSourceKey|shared_worktree_path_enabled|explicit_private_fallback|source-routing" packages docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-shared-worktree-removal.md +``` + +Expected: matches only in this plan file. There must be no matches under +`packages/`. + +- [ ] **Step 6: Confirm docs-site does not need an update** + +Run: + +```bash +rg -n "sharedWorktree|isolatedDiffSourceKeys|sharedWorktreeSourceKeys|executionMode|planningStrategy|conflictPolicy" docs-site README.md packages/*/README.md +``` + +Expected: either no matches or matches unrelated to a public user-facing knob. +This change removes an internal runner fallback and does not add, remove, or +rename public CLI behavior, configuration, or docs-site content. + +- [ ] **Step 7: Commit final verification notes if files changed** + +Run: + +```bash +git status --short +``` + +Expected: clean after the two implementation commits. If this command reports +new changes, stop and inspect them before finishing; final verification should +not create extra source changes. + +## Self-review + +Spec coverage: + +- Rollout step 11 is covered by Tasks 1 through 4: the private fallback setting, + helper module, old runner branch, trace event, and fallback tests are deleted. +- The isolated-diff WorkUnit flow remains covered by existing real-git tests and + the new failed-WorkUnit regression in Task 4. +- Agent-facing instructions are aligned with the spec's worktree invariant in + Task 5: sibling WorkUnit edits are not visible inside a child worktree. +- Override ingestion remains outside the WorkUnit execution branch and still + uses prior report materialization plus serial reconciliation. + +Placeholder scan: + +- This plan contains exact file paths, test names, replacement snippets, + commands, and expected results. +- There are no deferred implementation markers or unspecified edge-case + instructions. + +Type consistency: + +- `IngestSettingsPort` no longer includes `sharedWorktreeSourceKeys`. +- `isolatedDiffEnabled` remains the runner's internal summary flag and is + equivalent to `!overrideReport`. +- The removed trace event is `shared_worktree_path_enabled`; retained isolated + events include `isolated_diff_enabled`, `work_unit_child_created`, and + `work_unit_patch_collected`. + +Execution handoff: + +Plan complete and saved to +`docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-shared-worktree-removal.md`. + +Two execution options: + +1. **Subagent-Driven (recommended)** - Dispatch a fresh subagent per task, + review between tasks, and keep iteration fast. +2. **Inline Execution** - Execute tasks in this session using + `superpowers:executing-plans`, with batch execution and checkpoints. diff --git a/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-textual-conflict-resolver.md b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-textual-conflict-resolver.md new file mode 100644 index 00000000..43ad036b --- /dev/null +++ b/docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-textual-conflict-resolver.md @@ -0,0 +1,1241 @@ +# Isolated Diff Ingestion V1 Textual Conflict Resolver Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add bounded resolver-agent handling for textual isolated-diff patch +conflicts so overlapping WorkUnit edits can be repaired, globally gated, and +committed before the runner fails the ingest. + +**Architecture:** Keep patch policy failures and semantic gate failures +fail-fast. When an allowed patch fails `git apply --3way --index`, the +integration worktree resets to the pre-apply `HEAD`, one repair agent runs with +tools limited to the failed patch's touched paths, the existing artifact gates +validate the repaired files, and the runner records resolver attempts, repairs, +and failures in traces and reports. Gate repair for cleanly applied but +semantically invalid trees remains a separate plan. + +**Tech Stack:** TypeScript ESM/NodeNext, Vitest, zod, Node `fs/promises`, +existing `AgentRunnerPort`, `GitService`, `IngestTraceWriter`, +`integrateWorkUnitPatch`, and `IngestBundleRunner`. + +--- + +## Audit Summary + +The source spec is +`docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md`. + +Plans already based on this spec: + +| Plan | Implementation status | Evidence | +| --- | --- | --- | +| `docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-core.md` | Implemented | `packages/context/src/ingest/isolated-diff/*`, `ingest-trace.ts`, `wiki-body-refs.ts`, `artifact-gates.ts`, and `ingest-bundle.runner.isolated-diff.test.ts` exist. Git history includes `cae5c4b feat: add isolated diff ingestion v1 core`, `1013bb6 test: cover isolated diff ingestion regressions`, and `c481f1c feat: route selected ingest sources through isolated diffs`. | +| `docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-gates-and-trace-closure.md` | Implemented | Final gates run after reconciliation, traces and failed reports are stored, and child worktree cleanup is covered. Git history includes `656e584 test(ingest): verify isolated diff postmortem coverage` and `87f1193 chore(ingest): verify isolated diff gate closure`. | +| `docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-provenance-gate-closure.md` | Implemented | `validateProvenanceRawPaths()` runs before squash, and the isolated runner has a pre-squash provenance regression. Git history includes `977a610 fix(ingest): gate provenance before isolated diff squash`. | +| `docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-reference-and-target-gate-closure.md` | Implemented | `semantic-layer-target-policy.ts`, SL write/edit target checks, patch target checks, and final wiki ref checks exist. Git history includes `5ec6396 fix(ingest): gate final wiki references` and `c61c50b test(ingest): cover isolated diff reference and target gates`. | +| `docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-global-wiki-reference-gate-closure.md` | Implemented | `wikiPageKeysForFinalGates()` expands to all global pages when semantic-layer sources change or wiki pages are removed. Git history includes `ba534fb fix(ingest): gate global wiki references`. | + +Focused verification passed before writing this plan: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-trace.test.ts src/ingest/wiki-body-refs.test.ts src/ingest/artifact-gates.test.ts src/ingest/semantic-layer-target-policy.test.ts src/ingest/isolated-diff/git-patch.test.ts src/ingest/isolated-diff/work-unit-executor.test.ts src/ingest/isolated-diff/patch-integrator.test.ts src/ingest/ingest-bundle.runner.isolated-diff.test.ts src/sl/tools/sl-write-source.tool.test.ts src/sl/tools/sl-edit-source.tool.test.ts +``` + +Current result: `10 passed`, `61 passed`. + +The next spec gap is bounded textual conflict resolution. Today +`packages/context/src/ingest/isolated-diff/patch-integrator.ts` rolls back and +returns `textual_conflict` as soon as `git apply --3way --index` fails. The +spec requires expected cross-WorkUnit overlap to get one bounded repair attempt +before the run fails. + +## Scope + +This plan implements only textual conflict repair for allowed patches that fail +Git application. It does not repair: + +- patch policy failures such as `slDisallowed`, unauthorized target connection + paths, executable modes, or binary changes under text artifact roots; +- semantic conflicts where the patch applies but artifact gates fail; +- final gate failures after reconciliation or post-processing; +- broad connector rollout beyond the existing runner-owned Metabase allowlist; +- isolated-diff default promotion; or +- removal of the shared-worktree fallback path. + +## File Structure + +- Create `packages/context/src/ingest/isolated-diff/textual-conflict-resolver.ts`. + Owns the bounded repair-agent loop and its read/write/delete tools. +- Create `packages/context/src/ingest/isolated-diff/textual-conflict-resolver.test.ts`. + Covers allowed-path scoping, failed-patch visibility, successful repair, and + no-edit failure. +- Modify `packages/context/src/ingest/isolated-diff/patch-integrator.ts`. + Calls the resolver after Git textual conflicts, validates repaired files, and + commits the repair. +- Modify `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`. + Covers repair success and repair failure while preserving pre-apply state. +- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. + Wires the resolver into the isolated-diff integration loop and increments + resolver counters. +- Modify `packages/context/src/ingest/report-snapshot.ts`. + Parses resolver counters from stored report bodies. +- Modify `packages/context/src/ingest/reports.ts`. + Adds resolver counters to the `isolatedDiff` report body type. +- Modify `packages/context/src/ingest/report-snapshot.test.ts`. + Covers the new report fields. +- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`. + Adds an end-to-end same-source conflict regression. + +--- + +### Task 1: Add Resolver Unit Tests + +**Files:** +- Create: `packages/context/src/ingest/isolated-diff/textual-conflict-resolver.test.ts` + +- [ ] **Step 1: Write the failing resolver tests** + +Create `packages/context/src/ingest/isolated-diff/textual-conflict-resolver.test.ts`: + +```ts +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { FileIngestTraceWriter } from '../ingest-trace.js'; +import { resolveTextualConflict } from './textual-conflict-resolver.js'; + +async function makeHarness() { + const root = await mkdtemp(join(tmpdir(), 'ktx-textual-resolver-')); + const workdir = join(root, 'workdir'); + const patchPath = join(root, 'failed.patch'); + const trace = new FileIngestTraceWriter({ + tracePath: join(root, 'trace.jsonl'), + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + runId: 'run-1', + syncId: 'sync-1', + level: 'trace', + }); + await mkdir(join(workdir, 'wiki/global'), { recursive: true }); + await writeFile(join(workdir, 'wiki/global/account.md'), 'accepted line\n', 'utf-8'); + await writeFile( + patchPath, + [ + 'diff --git a/wiki/global/account.md b/wiki/global/account.md', + 'index 8877391..6f63f4d 100644', + '--- a/wiki/global/account.md', + '+++ b/wiki/global/account.md', + '@@ -1 +1 @@', + '-base line', + '+proposal line', + '', + ].join('\n'), + 'utf-8', + ); + return { root, workdir, patchPath, trace }; +} + +describe('resolveTextualConflict', () => { + it('lets the repair agent read the failed patch and write only touched paths', async () => { + const { workdir, patchPath, trace } = await makeHarness(); + const agentRunner = { + runLoop: vi.fn(async (params: any) => { + const current = await params.toolSet.read_integration_file.execute({ path: 'wiki/global/account.md' }); + expect(current.structured).toEqual({ path: 'wiki/global/account.md', exists: true }); + expect(current.markdown).toContain('accepted line'); + + const patch = await params.toolSet.read_failed_patch.execute({}); + expect(patch.markdown).toContain('proposal line'); + + await expect( + params.toolSet.write_integration_file.execute({ + path: 'wiki/global/not-allowed.md', + content: 'bad\n', + }), + ).rejects.toThrow(/resolver path not allowed/); + + await params.toolSet.write_integration_file.execute({ + path: 'wiki/global/account.md', + content: 'accepted line\nproposal line\n', + }); + return { stopReason: 'natural' }; + }), + }; + + const result = await resolveTextualConflict({ + agentRunner, + workdir, + unitKey: 'wu-a', + patchPath, + touchedPaths: ['wiki/global/account.md'], + trace, + reason: 'patch failed: wiki/global/account.md', + maxAttempts: 1, + stepBudget: 8, + }); + + expect(result).toEqual({ + status: 'repaired', + attempts: 1, + changedPaths: ['wiki/global/account.md'], + }); + await expect(readFile(join(workdir, 'wiki/global/account.md'), 'utf-8')).resolves.toBe( + 'accepted line\nproposal line\n', + ); + expect(agentRunner.runLoop).toHaveBeenCalledWith( + expect.objectContaining({ + modelRole: 'repair', + stepBudget: 8, + telemetryTags: expect.objectContaining({ + operationName: 'ingest-isolated-diff-textual-resolver', + jobId: 'job-1', + unitKey: 'wu-a', + }), + }), + ); + }); + + it('fails when the repair agent completes without editing any touched path', async () => { + const { workdir, patchPath, trace } = await makeHarness(); + const result = await resolveTextualConflict({ + agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' })) }, + workdir, + unitKey: 'wu-a', + patchPath, + touchedPaths: ['wiki/global/account.md'], + trace, + reason: 'patch failed: wiki/global/account.md', + maxAttempts: 1, + stepBudget: 8, + }); + + expect(result).toEqual({ + status: 'failed', + attempts: 1, + reason: 'resolver completed without editing an allowed path', + }); + }); +}); +``` + +- [ ] **Step 2: Run the resolver tests to verify they fail** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/textual-conflict-resolver.test.ts +``` + +Expected: FAIL with a module resolution error for +`./textual-conflict-resolver.js`. + +- [ ] **Step 3: Commit the failing tests** + +```bash +git add packages/context/src/ingest/isolated-diff/textual-conflict-resolver.test.ts +git commit -m "test(ingest): cover isolated diff textual conflict resolver" +``` + +--- + +### Task 2: Add Patch Integrator Resolver Contract Tests + +**Files:** +- Modify: `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts` + +- [ ] **Step 1: Add resolver contract regressions** + +Append these tests inside `describe('integrateWorkUnitPatch', ...)` in +`packages/context/src/ingest/isolated-diff/patch-integrator.test.ts`: + +```ts + it('repairs a textual conflict through the bounded resolver and commits repaired files', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + await mkdir(join(configDir, 'wiki/global'), { recursive: true }); + await writeFile(join(configDir, 'wiki/global/a.md'), 'base\n', 'utf-8'); + await git.commitFiles(['wiki/global/a.md'], 'base page', 'System User', 'system@example.com'); + const conflictBase = await git.revParseHead(); + + await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\n', 'utf-8'); + await git.commitFiles(['wiki/global/a.md'], 'accepted edit', 'System User', 'system@example.com'); + + const childDir = join(homeDir, 'child-conflict'); + await git.addWorktree(childDir, 'child-conflict', conflictBase); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'proposal\n', 'utf-8'); + await childGit.commitFiles(['wiki/global/a.md'], 'proposal edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'proposal.patch'); + await childGit.writeBinaryNoRenamePatch(conflictBase, 'HEAD', patchPath); + + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-resolver/trace.jsonl'), + jobId: 'job-resolver', + connectionId: 'warehouse', + sourceKey: 'metabase', + level: 'trace', + }); + + const validateAppliedTree = vi.fn(async (paths: string[]) => { + expect(paths).toEqual(['wiki/global/a.md']); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe( + 'accepted\nproposal\n', + ); + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-conflict', + patchPath, + integrationGit: git, + trace, + author: { name: 'System User', email: 'system@example.com' }, + slDisallowed: false, + allowedTargetConnectionIds: new Set(['warehouse']), + validateAppliedTree, + resolveTextualConflict: vi.fn(async (context) => { + expect(context).toMatchObject({ + unitKey: 'wu-conflict', + patchPath, + touchedPaths: ['wiki/global/a.md'], + }); + await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\nproposal\n', 'utf-8'); + return { + status: 'repaired', + attempts: 1, + changedPaths: ['wiki/global/a.md'], + }; + }), + }); + + expect(result).toMatchObject({ + status: 'accepted', + touchedPaths: ['wiki/global/a.md'], + textualResolution: { + status: 'repaired', + attempts: 1, + changedPaths: ['wiki/global/a.md'], + }, + }); + expect(validateAppliedTree).toHaveBeenCalledOnce(); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe( + 'accepted\nproposal\n', + ); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_accepted_after_textual_resolution'); + expect(await git.revParseHead()).not.toBe(baseSha); + }); + + it('keeps the pre-apply integration tree when the resolver cannot repair a textual conflict', async () => { + const { homeDir, configDir, git } = await makeRepo(); + await mkdir(join(configDir, 'wiki/global'), { recursive: true }); + await writeFile(join(configDir, 'wiki/global/a.md'), 'base\n', 'utf-8'); + await git.commitFiles(['wiki/global/a.md'], 'base page', 'System User', 'system@example.com'); + const conflictBase = await git.revParseHead(); + + await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\n', 'utf-8'); + await git.commitFiles(['wiki/global/a.md'], 'accepted edit', 'System User', 'system@example.com'); + const acceptedHead = await git.revParseHead(); + + const childDir = join(homeDir, 'child-conflict-fails'); + await git.addWorktree(childDir, 'child-conflict-fails', conflictBase); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'proposal\n', 'utf-8'); + await childGit.commitFiles(['wiki/global/a.md'], 'proposal edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'proposal-fails.patch'); + await childGit.writeBinaryNoRenamePatch(conflictBase, 'HEAD', patchPath); + + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-resolver-fails/trace.jsonl'), + jobId: 'job-resolver-fails', + connectionId: 'warehouse', + sourceKey: 'metabase', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-conflict', + patchPath, + integrationGit: git, + trace, + author: { name: 'System User', email: 'system@example.com' }, + slDisallowed: false, + allowedTargetConnectionIds: new Set(['warehouse']), + validateAppliedTree: vi.fn(async () => {}), + resolveTextualConflict: vi.fn(async () => ({ + status: 'failed', + attempts: 1, + reason: 'resolver completed without editing an allowed path', + })), + }); + + expect(result).toMatchObject({ + status: 'textual_conflict', + textualResolution: { + status: 'failed', + attempts: 1, + reason: 'resolver completed without editing an allowed path', + }, + }); + expect(await git.revParseHead()).toBe(acceptedHead); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\n'); + }); +``` + +- [ ] **Step 2: Run the patch integrator tests to verify they fail** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts +``` + +Expected: FAIL because `integrateWorkUnitPatch()` does not accept +`resolveTextualConflict` and does not return `textualResolution`. + +- [ ] **Step 3: Commit the failing integrator tests** + +```bash +git add packages/context/src/ingest/isolated-diff/patch-integrator.test.ts +git commit -m "test(ingest): cover isolated diff resolver integration" +``` + +--- + +### Task 3: Implement the Textual Conflict Resolver + +**Files:** +- Create: `packages/context/src/ingest/isolated-diff/textual-conflict-resolver.ts` +- Modify: `packages/context/src/ingest/isolated-diff/patch-integrator.ts` + +- [ ] **Step 1: Add the resolver module** + +Create `packages/context/src/ingest/isolated-diff/textual-conflict-resolver.ts`: + +```ts +import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { z } from 'zod'; +import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../llm/index.js'; +import type { IngestTraceWriter } from '../ingest-trace.js'; +import { traceTimed } from '../ingest-trace.js'; + +export type TextualConflictResolutionResult = + | { status: 'repaired'; attempts: number; changedPaths: string[] } + | { status: 'failed'; attempts: number; reason: string }; + +export interface ResolveTextualConflictInput { + agentRunner: AgentRunnerPort; + workdir: string; + unitKey: string; + patchPath: string; + touchedPaths: string[]; + trace: IngestTraceWriter; + reason: string; + maxAttempts?: number; + stepBudget?: number; +} + +const readIntegrationFileSchema = z.object({ + path: z.string().min(1), +}); + +const writeIntegrationFileSchema = z.object({ + path: z.string().min(1), + content: z.string(), +}); + +const deleteIntegrationFileSchema = z.object({ + path: z.string().min(1), +}); + +function normalizeRepoPath(path: string): string { + const normalized = path.replace(/\\/g, '/').replace(/^\/+/, ''); + const parts = normalized.split('/').filter((part) => part.length > 0); + if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) { + throw new Error(`resolver path must be a repository-relative path: ${path}`); + } + return parts.join('/'); +} + +function assertAllowedPath(path: string, allowedPaths: ReadonlySet): string { + const normalized = normalizeRepoPath(path); + if (!allowedPaths.has(normalized)) { + throw new Error(`resolver path not allowed: ${normalized}`); + } + return normalized; +} + +async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> { + try { + return { exists: true, content: await readFile(path, 'utf-8') }; + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return { exists: false, content: '' }; + } + throw error; + } +} + +function buildResolverSystemPrompt(): string { + return ` +You repair one failed KTX isolated-diff patch inside the integration worktree. + + + +- Preserve accepted integration content that is unrelated to the failed patch. +- Incorporate the failed patch only when the patch evidence is compatible with the current file. +- Edit only paths exposed by the resolver tools. +- Prefer the smallest text edit that makes the composed artifact coherent. +- Do not create new facts that are absent from the current file or failed patch. +- Stop after writing the repaired file content. +`; +} + +function buildResolverUserPrompt(input: { + unitKey: string; + patchPath: string; + touchedPaths: string[]; + reason: string; + attempt: number; + maxAttempts: number; +}): string { + return `Repair isolated-diff textual conflict. + +WorkUnit: ${input.unitKey} +Attempt: ${input.attempt} of ${input.maxAttempts} +Patch path: ${input.patchPath} +Touched paths: +${input.touchedPaths.map((path) => `- ${path}`).join('\n')} + +Git apply failure: +${input.reason} + +Use read_failed_patch first. Then read the touched integration files, write the +repaired content, and stop.`; +} + +function buildToolSet(input: { + workdir: string; + patchPath: string; + allowedPaths: ReadonlySet; + editedPaths: Set; +}): KtxRuntimeToolSet { + return { + read_failed_patch: { + name: 'read_failed_patch', + description: 'Read the failed Git patch that could not be applied to the integration worktree.', + inputSchema: z.object({}), + execute: async () => { + const patch = await readFile(input.patchPath, 'utf-8'); + return { + markdown: patch, + structured: { patchPath: input.patchPath, bytes: Buffer.byteLength(patch) }, + }; + }, + }, + read_integration_file: { + name: 'read_integration_file', + description: 'Read one allowed file from the current integration worktree.', + inputSchema: readIntegrationFileSchema, + execute: async ({ path }: z.infer) => { + const normalized = assertAllowedPath(path, input.allowedPaths); + const file = await readOptionalFile(join(input.workdir, normalized)); + return { + markdown: file.exists ? file.content : `(missing file: ${normalized})`, + structured: { path: normalized, exists: file.exists }, + }; + }, + }, + write_integration_file: { + name: 'write_integration_file', + description: 'Replace one allowed integration worktree file with repaired text content.', + inputSchema: writeIntegrationFileSchema, + execute: async ({ path, content }: z.infer) => { + const normalized = assertAllowedPath(path, input.allowedPaths); + const fullPath = join(input.workdir, normalized); + await mkdir(dirname(fullPath), { recursive: true }); + await writeFile(fullPath, content, 'utf-8'); + input.editedPaths.add(normalized); + return { + markdown: `Wrote ${normalized}`, + structured: { path: normalized, bytes: Buffer.byteLength(content) }, + }; + }, + }, + delete_integration_file: { + name: 'delete_integration_file', + description: 'Delete one allowed integration worktree file when the failed patch proves the deletion is correct.', + inputSchema: deleteIntegrationFileSchema, + execute: async ({ path }: z.infer) => { + const normalized = assertAllowedPath(path, input.allowedPaths); + await rm(join(input.workdir, normalized), { force: true }); + input.editedPaths.add(normalized); + return { + markdown: `Deleted ${normalized}`, + structured: { path: normalized }, + }; + }, + }, + }; +} + +export async function resolveTextualConflict( + input: ResolveTextualConflictInput, +): Promise { + const allowedPaths = new Set(input.touchedPaths.map(normalizeRepoPath)); + const maxAttempts = input.maxAttempts ?? 1; + const stepBudget = input.stepBudget ?? 12; + let lastFailure = 'resolver did not run'; + + for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { + const editedPaths = new Set(); + const traceData = { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths: [...allowedPaths].sort(), + attempt, + maxAttempts, + reason: input.reason, + }; + const result = await traceTimed(input.trace, 'resolver', 'textual_conflict_resolver', traceData, async () => + input.agentRunner.runLoop({ + modelRole: 'repair', + systemPrompt: buildResolverSystemPrompt(), + userPrompt: buildResolverUserPrompt({ + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths: [...allowedPaths].sort(), + reason: input.reason, + attempt, + maxAttempts, + }), + toolSet: buildToolSet({ + workdir: input.workdir, + patchPath: input.patchPath, + allowedPaths, + editedPaths, + }), + stepBudget, + telemetryTags: { + operationName: 'ingest-isolated-diff-textual-resolver', + source: input.trace.context.sourceKey, + jobId: input.trace.context.jobId, + unitKey: input.unitKey, + }, + }), + ); + + if (result.stopReason === 'error') { + lastFailure = result.error?.message ?? 'resolver agent loop errored'; + await input.trace.event('error', 'resolver', 'textual_conflict_resolver_failed', traceData, result.error); + continue; + } + + const changedPaths = [...editedPaths].sort(); + if (changedPaths.length === 0) { + lastFailure = 'resolver completed without editing an allowed path'; + await input.trace.event('error', 'resolver', 'textual_conflict_resolver_failed', { + ...traceData, + reason: lastFailure, + }); + continue; + } + + await input.trace.event('debug', 'resolver', 'textual_conflict_resolver_repaired', { + ...traceData, + changedPaths, + }); + return { status: 'repaired', attempts: attempt, changedPaths }; + } + + return { status: 'failed', attempts: maxAttempts, reason: lastFailure }; +} +``` + +- [ ] **Step 2: Run resolver tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/textual-conflict-resolver.test.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Update the patch integrator types and conflict path** + +In `packages/context/src/ingest/isolated-diff/patch-integrator.ts`, add the +import: + +```ts +import type { TextualConflictResolutionResult } from './textual-conflict-resolver.js'; +``` + +Replace the result type and input interface with: + +```ts +export type PatchIntegrationTextualResolution = + | { status: 'repaired'; attempts: number; changedPaths: string[] } + | { status: 'failed'; attempts: number; reason: string }; + +export type PatchIntegrationResult = + | { status: 'accepted'; commitSha: string; touchedPaths: string[]; textualResolution?: PatchIntegrationTextualResolution } + | { status: 'textual_conflict'; reason: string; touchedPaths: string[]; textualResolution?: PatchIntegrationTextualResolution } + | { status: 'semantic_conflict'; reason: string; touchedPaths: string[]; textualResolution?: PatchIntegrationTextualResolution }; + +export interface IntegrateWorkUnitPatchInput { + unitKey: string; + patchPath: string; + integrationGit: GitService; + trace: IngestTraceWriter; + author: { name: string; email: string }; + slDisallowed: boolean; + allowedTargetConnectionIds: ReadonlySet; + validateAppliedTree(touchedPaths: string[]): Promise; + resolveTextualConflict?(input: { + unitKey: string; + patchPath: string; + touchedPaths: string[]; + reason: string; + }): Promise; +} +``` + +Inside the `catch` block that currently handles `patch_apply` errors, replace +the existing return with: + +```ts + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + const reason = errorMessage(error); + await input.trace.event('error', 'integration', 'patch_textual_conflict', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason, + }); + + if (!input.resolveTextualConflict) { + return { + status: 'textual_conflict', + reason, + touchedPaths, + }; + } + + const textualResolution = await input.resolveTextualConflict({ + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason, + }); + + if (textualResolution.status === 'failed') { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + return { + status: 'textual_conflict', + reason: textualResolution.reason, + touchedPaths, + textualResolution, + }; + } + + try { + await traceTimed( + input.trace, + 'integration', + 'semantic_gate_after_textual_resolution', + { unitKey: input.unitKey, touchedPaths: textualResolution.changedPaths }, + async () => { + await input.validateAppliedTree(textualResolution.changedPaths); + }, + ); + } catch (semanticError) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + await input.trace.event('error', 'integration', 'patch_semantic_conflict_after_textual_resolution', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths: textualResolution.changedPaths, + reason: errorMessage(semanticError), + }); + return { + status: 'semantic_conflict', + reason: errorMessage(semanticError), + touchedPaths: textualResolution.changedPaths, + textualResolution, + }; + } + + const commit = await input.integrationGit.commitFiles( + textualResolution.changedPaths, + `ingest: resolve WorkUnit ${input.unitKey} conflict`, + input.author.name, + input.author.email, + ); + if (!commit.created) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + const noChangeReason = 'textual resolver produced no committable changes'; + await input.trace.event('error', 'integration', 'textual_conflict_resolver_noop', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths: textualResolution.changedPaths, + }); + return { + status: 'textual_conflict', + reason: noChangeReason, + touchedPaths: textualResolution.changedPaths, + textualResolution, + }; + } + + await input.trace.event('debug', 'integration', 'patch_accepted_after_textual_resolution', { + unitKey: input.unitKey, + commitSha: commit.commitHash, + touchedPaths: textualResolution.changedPaths, + attempts: textualResolution.attempts, + }); + return { + status: 'accepted', + commitSha: commit.commitHash, + touchedPaths: textualResolution.changedPaths, + textualResolution, + }; +``` + +Leave the earlier patch policy rejection branch unchanged so policy failures +cannot invoke the resolver. + +- [ ] **Step 4: Run patch integrator tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/patch-integrator.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit resolver implementation** + +```bash +git add packages/context/src/ingest/isolated-diff/textual-conflict-resolver.ts packages/context/src/ingest/isolated-diff/patch-integrator.ts +git commit -m "feat(ingest): repair isolated diff textual conflicts" +``` + +--- + +### Task 4: Wire the Resolver into the Runner and Reports + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Modify: `packages/context/src/ingest/reports.ts` +- Modify: `packages/context/src/ingest/report-snapshot.ts` +- Modify: `packages/context/src/ingest/report-snapshot.test.ts` + +- [ ] **Step 1: Import the resolver in the runner** + +In `packages/context/src/ingest/ingest-bundle.runner.ts`, add: + +```ts +import { resolveTextualConflict } from './isolated-diff/textual-conflict-resolver.js'; +``` + +- [ ] **Step 2: Add resolver counters to the isolated-diff summary** + +In the `isolatedDiffSummary` initializer in +`packages/context/src/ingest/ingest-bundle.runner.ts`, add: + +```ts + resolverAttempts: 0, + resolverRepairs: 0, + resolverFailures: 0, +``` + +- [ ] **Step 3: Pass the resolver callback to `integrateWorkUnitPatch()`** + +Inside the isolated-diff integration loop, add this property to the +`integrateWorkUnitPatch({ ... })` call: + +```ts + resolveTextualConflict: (context) => + resolveTextualConflict({ + agentRunner: this.deps.agentRunner, + workdir: sessionWorktree.workdir, + unitKey: context.unitKey, + patchPath: context.patchPath, + touchedPaths: context.touchedPaths, + trace: runTrace, + reason: context.reason, + maxAttempts: 1, + stepBudget: 12, + }), +``` + +- [ ] **Step 4: Record resolver outcomes after each integration attempt** + +Immediately after `const integration = await integrateWorkUnitPatch({ ... });`, +add: + +```ts + if (integration.textualResolution) { + isolatedDiffSummary.resolverAttempts += integration.textualResolution.attempts; + if (integration.textualResolution.status === 'repaired') { + isolatedDiffSummary.textualConflicts += 1; + isolatedDiffSummary.resolverRepairs += 1; + } else { + isolatedDiffSummary.resolverFailures += 1; + } + } +``` + +Keep the existing textual-conflict and semantic-conflict branches after this +counter update. + +- [ ] **Step 5: Add report body fields** + +In `packages/context/src/ingest/reports.ts`, extend +`IngestReportBody['isolatedDiff']` with: + +```ts + resolverAttempts?: number; + resolverRepairs?: number; + resolverFailures?: number; +``` + +In `packages/context/src/ingest/report-snapshot.ts`, extend the +`isolatedDiff` object schema with: + +```ts + resolverAttempts: z.number().int().min(0).default(0), + resolverRepairs: z.number().int().min(0).default(0), + resolverFailures: z.number().int().min(0).default(0), +``` + +- [ ] **Step 6: Add the report parser regression** + +Append this test to `packages/context/src/ingest/report-snapshot.test.ts`: + +```ts + it('parses isolated-diff textual resolver counters', () => { + const snapshot = parseIngestReportSnapshot({ + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-18T00:00:00.000Z', + body: { + status: 'completed', + syncId: 'sync-1', + diffSummary: { added: 0, modified: 1, deleted: 0, unchanged: 0 }, + commitSha: 'abc123', + isolatedDiff: { + enabled: true, + acceptedPatches: 2, + textualConflicts: 1, + semanticConflicts: 0, + resolverAttempts: 1, + resolverRepairs: 1, + resolverFailures: 0, + }, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + artifactResolutions: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }); + + expect(snapshot.body.isolatedDiff).toMatchObject({ + resolverAttempts: 1, + resolverRepairs: 1, + resolverFailures: 0, + }); + }); +``` + +- [ ] **Step 7: Run report tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/report-snapshot.test.ts +``` + +Expected: PASS. + +- [ ] **Step 8: Commit runner and report wiring** + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.ts packages/context/src/ingest/reports.ts packages/context/src/ingest/report-snapshot.ts packages/context/src/ingest/report-snapshot.test.ts +git commit -m "feat(ingest): report isolated diff resolver outcomes" +``` + +--- + +### Task 5: Add End-to-End Resolver Regression + +**Files:** +- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` + +- [ ] **Step 1: Add the end-to-end test** + +Append this test inside `describe('IngestBundleRunner isolated diff path', ...)` +in `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`: + +```ts + it('repairs additive same-source textual conflicts before final gates and squash', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps } = makeDeps(runtime); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-isolated-diff-textual-resolver') { + const current = await params.toolSet.read_integration_file.execute({ + path: 'semantic-layer/warehouse/mart_account_segments.yaml', + }); + expect(current.markdown).toContain('total_contract_arr_cents'); + const patch = await params.toolSet.read_failed_patch.execute({}); + expect(patch.markdown).toContain('account_count'); + await params.toolSet.write_integration_file.execute({ + path: 'semantic-layer/warehouse/mart_account_segments.yaml', + content: + 'name: mart_account_segments\n' + + 'grain: [account_id]\n' + + 'columns: [{name: account_id, type: string}]\n' + + 'joins: []\n' + + 'measures:\n' + + ' - name: total_contract_arr_cents\n' + + ' expr: sum(contract_arr)\n' + + ' - name: account_count\n' + + ' expr: count_distinct(account_id)\n', + }); + return { stopReason: 'natural' }; + } + + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + if (params.telemetryTags.unitKey === 'card-wiki') { + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\n' + + 'grain: [account_id]\n' + + 'columns: [{name: account_id, type: string}]\n' + + 'joins: []\n' + + 'measures:\n' + + ' - name: total_contract_arr_cents\n' + + ' expr: sum(contract_arr)\n', + ); + } else if (params.telemetryTags.unitKey === 'card-source') { + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\n' + + 'grain: [account_id]\n' + + 'columns: [{name: account_id, type: string}]\n' + + 'joins: []\n' + + 'measures:\n' + + ' - name: account_count\n' + + ' expr: count_distinct(account_id)\n', + ); + } + addTouchedSlSource(currentSession.touchedSlSources, { + connectionId: 'warehouse', + sourceName: 'mart_account_segments', + }); + return { stopReason: 'natural' }; + }); + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [ + ['cards/wiki.json', 'hash-a'], + ['cards/source.json', 'hash-b'], + ]); + + const result = await runner.run({ + jobId: 'job-resolver-e2e', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'manual_resync', + bundleRef: { kind: 'upload', uploadId: 'upload-1' }, + }); + + expect(result.commitSha).toBeTruthy(); + const source = await readFile( + join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'utf-8', + ); + expect(source).toContain('total_contract_arr_cents'); + expect(source).toContain('account_count'); + expect(deps.agentRunner.runLoop).toHaveBeenCalledWith( + expect.objectContaining({ + modelRole: 'repair', + telemetryTags: expect.objectContaining({ + operationName: 'ingest-isolated-diff-textual-resolver', + unitKey: 'card-source', + }), + }), + ); + const successReport = (deps.reports.create as any).mock.calls.at(-1)?.[0]?.body; + expect(successReport.isolatedDiff).toMatchObject({ + acceptedPatches: 2, + textualConflicts: 1, + semanticConflicts: 0, + resolverAttempts: 1, + resolverRepairs: 1, + resolverFailures: 0, + }); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-resolver-e2e/trace.jsonl'), 'utf-8'); + expect(trace).toContain('textual_conflict_resolver_repaired'); + expect(trace).toContain('patch_accepted_after_textual_resolution'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +``` + +- [ ] **Step 2: Run the isolated-diff runner regression** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "repairs additive same-source textual conflicts" +``` + +Expected: PASS. + +- [ ] **Step 3: Commit the end-to-end regression** + +```bash +git add packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts +git commit -m "test(ingest): verify isolated diff textual conflict repair" +``` + +--- + +### Task 6: Final Verification + +**Files:** +- Verify: `packages/context/src/ingest/isolated-diff/textual-conflict-resolver.test.ts` +- Verify: `packages/context/src/ingest/isolated-diff/patch-integrator.test.ts` +- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts` +- Verify: `packages/context/src/ingest/report-snapshot.test.ts` + +- [ ] **Step 1: Run the focused resolver and isolated-diff tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/textual-conflict-resolver.test.ts src/ingest/isolated-diff/patch-integrator.test.ts src/ingest/ingest-bundle.runner.isolated-diff.test.ts src/ingest/report-snapshot.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run the existing isolated-diff safety suite** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-trace.test.ts src/ingest/wiki-body-refs.test.ts src/ingest/artifact-gates.test.ts src/ingest/semantic-layer-target-policy.test.ts src/ingest/isolated-diff/git-patch.test.ts src/ingest/isolated-diff/work-unit-executor.test.ts src/ingest/isolated-diff/patch-integrator.test.ts src/ingest/ingest-bundle.runner.isolated-diff.test.ts src/sl/tools/sl-write-source.tool.test.ts src/sl/tools/sl-edit-source.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Run package type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 4: Run dead-code analysis** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS, or only pre-existing findings unrelated to the files changed +by this plan. + +- [ ] **Step 5: Decide docs-site impact** + +No `docs-site/content/docs/` update is required for this plan because the +change is an internal ingest correctness behavior and report diagnostics +extension. If execution changes public CLI output while implementing this +plan, add a follow-up docs-site plan for the affected CLI/status page. + +- [ ] **Step 6: Commit verification notes only if files changed** + +If verification updates snapshots or checked-in fixtures, commit only those +intended files: + +```bash +git add packages/context/src/ingest +git commit -m "chore(ingest): verify isolated diff textual conflict repair" +``` + +If no files changed during verification, do not create an empty commit. + +--- + +## Self-Review + +Spec coverage: + +- Bounded resolver-agent handling for textual conflicts is covered by Tasks 1 + through 5. +- The resolver receives the failed patch, current integration files, touched + path scope, and trace context. +- Patch policy failures remain non-repairable, preserving the existing + `slDisallowed`, target-connection, binary, and executable-mode gates. +- Repaired files run through the existing artifact gates before commit and + before squash. +- Resolver attempts, repaired files, failures, and trace events are reported. + +Remaining spec gaps after this plan: + +- Gate repair for cleanly applied trees that fail final gates. +- Resolver context that includes work-unit transcript excerpts and all + previously applied overlapping patches. +- Broader connector rollout for Notion, LookML, Looker, dbt, and MetricFlow. +- Isolated-diff default promotion after at least one non-Metabase connector + passes. +- Shared-worktree WorkUnit path removal. + +Placeholder scan: + +- The plan contains exact file paths, commands, expected outcomes, and concrete + code blocks for every code-changing step. +- The plan does not contain deferred implementation markers. + +Type consistency: + +- `TextualConflictResolutionResult`, `PatchIntegrationTextualResolution`, and + `textualResolution` use the same `status`, `attempts`, `changedPaths`, and + `reason` fields across resolver, integrator, runner, and tests. +- Report fields use `resolverAttempts`, `resolverRepairs`, and + `resolverFailures` consistently in `reports.ts`, `report-snapshot.ts`, and + runner report bodies. diff --git a/docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md b/docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md new file mode 100644 index 00000000..b2428ae5 --- /dev/null +++ b/docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md @@ -0,0 +1,612 @@ +# Isolated-diff ingestion design + +**Date:** 2026-05-17 +**Author:** Andrey Avtomonov +**Status:** Design - pending implementation plan + +## Background + +KTX ingests third-party context sources into durable project memory: raw source +snapshots, wiki pages, semantic-layer sources, evidence documents, candidates, +and fallback records. The current bundle runner stages raw source data in one +ingestion session worktree, then runs work units against that same mutable +worktree. + +A Metabase ingestion run exposed the failure mode this design addresses. One +work unit inferred and wrote the semantic-layer measure +`mart_account_segments.total_contract_arr_cents`, a later work unit overwrote +the same source with `total_contract_arr`, and the generated wiki page kept +referencing the stale non-existent measure. The local per-work-unit checks did +not catch the final cross-artifact inconsistency because durable writes were +accepted into shared state before final integration. + +The fix is not a Metabase-only validation patch. The same class of risk exists +any time LLM-authored work units mutate durable wiki or semantic-layer files: +Metabase cards, Notion pages and clusters, dbt YAML, MetricFlow YAML, Looker +dashboards and explores, and LookML models and views can all produce overlapping +or contested memory artifacts. KTX needs one ingestion execution model that +isolates agent-authored changes, integrates them deliberately, and validates +the final project state globally. + +## Goals + +This design creates one opinionated ingestion algorithm for all context sources. +Connector-specific code stays responsible for source-shaped work: fetching raw +data, normalizing raw files, planning work units, and optionally projecting +deterministic facts. The shared runner owns execution correctness. + +The design has these goals: + +- Run all agent-authored durable writes in isolated per-work-unit worktrees. +- Treat each work unit's git diff as its proposal artifact. +- Integrate accepted diffs through a shared artifact-aware merge path. +- Resolve expected cross-work-unit overlap with bounded agent repair before + failing the run. +- Run final global semantic gates before any changes reach the main project + worktree. +- Keep connector variance minimal and source-shaped, not pipeline-shaped. +- Avoid proposal manifests, typed candidates, and extra reporting entities for + the first implementation. +- Preserve deterministic projections for source systems with authoritative + structured metadata. + +## Non-goals + +This design does not change the wiki frontmatter schema, wiki page file layout, +the semantic-layer YAML format, or the raw source snapshot layouts. It does add +a narrow author-facing inline-code grammar for explicit wiki body references to +semantic-layer entities and raw tables, because body text is part of the +stale-reference failure class. It also does not remove source adapters' current +fetch and chunk logic in one large rewrite. + +This design does not introduce public connector knobs such as +`executionMode`, `planningStrategy`, or `conflictPolicy`. The core runner +becomes more opinionated instead. + +This design does not require all connectors to stop using candidates. Candidate +storage remains valid for flows that intentionally defer wiki curation. The +isolation model applies when a work unit writes durable project files. + +## Locked design direction + +The ingestion runner uses one flow for every source that can produce durable +changes. + +```text +fetch raw + -> optional deterministic project + -> adapter plans WorkUnit[] + -> isolated WU diffs + -> artifact-aware integration + -> global semantic gates + -> squash +``` + +The important invariant is that the core runner does not know why a work unit +exists. A dbt adapter may plan by model, Notion may plan by page or cluster, +MetricFlow may plan by graph component, and Looker may plan by dashboard or +explore. Those differences describe the source system. They are not ingestion +execution modes. + +## Architecture + +The design splits ingestion into two layers with explicit responsibility +boundaries. + +### Source adapter layer + +The adapter owns source semantics. It fetches raw evidence, normalizes that +evidence into staged files, and plans work units from the staged snapshot and +diff scope. + +The adapter may also provide deterministic projectors. A projector is code that +converts authoritative source facts into KTX artifacts without an agent. Good +examples are live database schema introspection and straightforward MetricFlow +semantic-model import. + +The isolation-relevant adapter surface remains small: + +```ts +interface SourceAdapter { + source: string; + skillNames: string[]; + + fetch?(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise; + chunk(stagedDir: string, diffSet?: DiffSet): Promise; + + project?(ctx: DeterministicProjectionContext): Promise; + resolveSlTargets?(ctx: SlTargetResolutionContext): Promise; +} +``` + +This is the subset the isolated-diff runner needs to understand source-shaped +planning and deterministic projection. It is not a proposal to delete existing +`SourceAdapter` fields. Existing lifecycle and source-support fields such as +`detect`, `readFetchReport`, `listTargetConnectionIds`, `clusterWorkUnits`, +`describeScope`, `onPullSucceeded`, `evidenceIndexing`, `triageSupported`, +`getTriageSignals`, and `reconcileSkillNames` stay part of the adapter contract +until a separate cleanup intentionally removes them with migration impact +called out. + +`chunk()` returns ordinary `WorkUnit[]`. The runner does not need a +`planningStrategy` enum because the source adapter can plan by any domain shape +that makes sense. + +### Ingestion execution layer + +The runner owns correctness, isolation, and integration. After `WorkUnit[]` +exists, all connectors follow the same execution path. + +The runner is responsible for: + +- creating the ingestion integration worktree from the project base commit; +- committing deterministic projection in the integration worktree before child + worktree creation; +- creating one child worktree per work unit from the post-projection ingestion + base commit; +- scoping tools to the work unit's raw files and allowed target connections; +- running the agent loop inside the work unit worktree; +- validating touched artifacts before accepting the work unit diff; +- collecting the work unit git diff; +- applying accepted diffs into the integration worktree; +- resolving textual and artifact-level conflicts; +- running final global gates; and +- squashing the integration worktree back to the project main worktree. + +## Worktree model + +The design uses three levels of git state. + +```text +project main worktree + ingest integration worktree + per-work-unit worktree(s) +``` + +The project main worktree is the durable KTX project state. The ingestion +integration worktree stages raw snapshots, deterministic projections, accepted +work-unit diffs, reconciliation changes, and final gate repairs before one +squash merge back to main. + +Deterministic projection runs first in the integration worktree, after the raw +snapshot is staged and before any per-work-unit worktree is created. The runner +commits those projector changes as a single projection commit. The integration +worktree's post-projection HEAD is the ingestion base commit referenced in this +design. If the adapter has no projector, the raw-snapshot commit is the +ingestion base commit. + +Each per-work-unit worktree starts from the same ingestion base commit. A work +unit never observes another concurrent work unit's transient edits. This makes +the work unit diff a clean proposal against a stable base. Work units observe +deterministic projection outputs, including through `dependencyPaths` context, +and do not re-derive authoritative projected facts. + +The integration worktree and each per-work-unit worktree must share one Git +object database, created through `git worktree add` from the same repository. +This is required so `git apply --3way` can resolve the base blobs recorded in +each work-unit patch during integration. + +The runner creates and runs child worktrees under the existing +`workUnitMaxConcurrency` setting. A run may have many planned work units, but no +more than that bound may be active or left on disk at once. The default remains +serial execution. Child worktrees must be cleaned up after the diff, transcript, +and outcome metadata are persisted, including failure paths. Adapters with +large fan-out, such as Notion, may use `clusterWorkUnits` before execution to +keep work-unit count tractable, but clustering remains source-shaped planning +rather than a separate execution mode. + +## Work-unit lifecycle + +Each work unit follows a fixed lifecycle. + +1. Create a child worktree at the ingestion base commit. +2. Build a scoped tool session for the child worktree. +3. Run the source skill and agent loop. +4. Run work-unit-local gates against touched artifacts. +5. If gates pass, record `git diff --binary` from base to child HEAD. +6. If gates fail, mark the work unit failed and discard the child worktree. +7. Clean up the child worktree after the diff and transcript are persisted. + +The work unit outcome stores the existing operational metadata KTX already +records: unit key, status, actions, touched semantic-layer sources, failure +reason, raw files, and transcript path. It does not add a proposal manifest. +The diff is the proposal. + +For `slDisallowed` work units, isolation is defense in depth. The scoped +work-unit tools must withhold semantic-layer write and edit tools, and the +integration layer must reject any otherwise accepted diff from that work unit +that touches `semantic-layer/**`. This catches buggy or bypassed tool behavior +before an invalid LookML connection-mismatch write can reach the integration +worktree. + +### Diff proposal contract + +The proposal artifact is a Git patch with binary-safe content, not the existing +hash-based raw-source `DiffSet`. + +The first implementation must use one pinned patch contract: + +- collect `git diff --binary --no-renames ..HEAD`; +- disable rename and copy detection so renames are represented as delete plus + create in version one; +- preserve mode changes from the patch metadata, but reject unexpected + executable-mode or binary changes under known text artifact roots such as + `wiki/**` and `semantic-layer/**`; +- apply each accepted patch to the integration worktree with + `git apply --3way --index`; +- do not use `git apply --reject`, because partial hunk application is not an + accepted integration state; and +- if patch application fails, leaves conflicts, or touches a path disallowed for + that work unit, roll back the integration worktree to its pre-apply HEAD and + classify the outcome as a textual conflict. + +Delete-versus-edit, recreate-versus-edit, and delete-versus-create races are +therefore textual conflicts when Git cannot apply the patch cleanly. If Git +applies the patch but known artifact validators reject the resulting tree, the +outcome is a semantic conflict. + +## Integration lifecycle + +The integration worktree applies accepted work-unit diffs after local gates +pass. The runner applies diffs in a deterministic order, using the original +work-unit index unless a future implementation introduces explicit dependency +ordering. + +Integration has three conflict classes: + +- Clean patch application: the diff applies without conflict. +- Textual conflict: git cannot apply the patch cleanly. +- Semantic conflict: the patch applies textually but creates an invalid or + inconsistent artifact. + +Textual conflicts are resolved before semantic gates run when a bounded +resolver agent can produce a valid result. Overlapping work-unit writes are +normal, especially for Metabase cards that target the same semantic-layer marts +from different collections. The runner must treat overlap as an integration +case, not as a reason to fail immediately. + +Version one is agent-first. If `git apply --3way --index` leaves conflicts, +the runner starts a resolver agent in the integration worktree. The resolver +receives only the failed patch, already-applied patches, conflicted files, +relevant work-unit transcripts, raw evidence paths, and the final-gate rules. +The resolver must preserve all non-conflicting accepted content, resolve +duplicate or competing artifact entries from evidence, and edit only files +touched by the failed patch or already-applied overlapping patches. + +The runner then reruns artifact gates for the changed files and continues with +the remaining patches if validation passes. Resolver attempts are capped to +avoid an unbounded repair loop. A run fails only after the bounded resolver +attempts cannot produce a valid integration tree. + +Deterministic semantic merge is a later optimization, not a version-one +requirement. After measuring resolver latency, cost, and failure modes, KTX can +add merge helpers for common semantic-layer YAML cases, such as additive +`measures`, `segments`, `columns`, `joins`, and `descriptions` updates keyed by +their stable logical identifiers. Those helpers can replace agent calls for +mechanical merges once the measured v1 behavior justifies the added complexity. + +The integration worktree is preserved on failure with conflict markers or +resolver edits, work-unit patches, transcripts, trace events, and the failure +report. The runner never squashes a failed or partially repaired integration +tree back to the project main worktree. + +### Gate repair stage + +The gate repair stage handles cases where patches apply cleanly but the +combined tree fails final semantic or wiki gates. This is distinct from textual +conflict resolution: the tree is textually valid, but the artifacts violate KTX +contracts. + +After each patch integration and after reconciliation, the runner runs final +artifact gates for the affected scope. If gates fail, the runner classifies the +errors before deciding whether to repair or fail. + +Repairable gate errors include: + +- stale wiki body references to renamed semantic-layer entities; +- invalid `sl_refs` entries that point to entities instead of sources; +- inline prose that accidentally uses explicit SL reference syntax; +- duplicate measures, segments, or joins with equivalent definitions; +- missing or stale wiki references created by accepted patches; and +- join or source references that can be corrected from the composed manifest + and work-unit evidence. + +High-risk gate errors fail without automatic repair unless a later +implementation adds a stronger evidence contract: + +- two work units define the same measure with different business meaning; +- a required warehouse table or column does not exist; +- a SQL source fails execution and no obvious localized rewrite exists; or +- the repair would require choosing between conflicting facts without evidence. + +For repairable errors, the runner starts a gate repair agent with the exact +gate errors, changed files, relevant work-unit transcripts, raw evidence paths, +and final-gate rules. The agent may edit only the files involved in the gate +failure. The runner reruns gates after each repair attempt and caps attempts to +one or two passes per integration stage. If the tree still fails, the run stops +with the final gate report and preserved integration worktree. + +### Reconciliation in the new flow + +Reconciliation remains a shared runner stage, but it runs as a serial +integration-stage pass instead of a parallel work unit. + +The runner applies all accepted work-unit diffs to the integration worktree, +resolves textual conflicts that can be resolved, and then runs reconciliation in +that integration worktree before final global gates and before squash. +Reconciliation must see the integrated state because its job is to resolve +cross-work-unit duplicates, evictions, fallbacks, and source-specific +reconcile guidance. + +Reconciliation runs exactly once per integration pass, serially against the +integration worktree, after all accepted work-unit diffs have been applied and +after textual conflicts are resolved. It never runs inside a child worktree and +never overlaps with work-unit execution. This is the safety carve-out from the +isolation goal: concurrent agent writes are the failure mode being avoided, and +reconciliation is non-concurrent by construction. + +Reconciliation is not allowed to mutate project main directly. Its changes are +captured as a reconciliation diff against the pre-reconciliation integration +HEAD and recorded in the existing stage/report metadata. Reconciliation gates +validate the artifacts touched by the reconciliation diff plus any wiki page or +semantic-layer source referenced by changed frontmatter or body references, +using the same artifact-class validators as work-unit gates. Reconciliation may +write only to target connections authorized by the adapter for the ingest run, +but it is not subject to any single work unit's `slDisallowed` scope. The final +global gates validate the combined tree after reconciliation. If reconciliation +introduces an invalid wiki or semantic-layer reference, touches an unauthorized +target, or records an unresolvable artifact conflict, the runner sends +repairable failures through the gate repair stage and stops before squash only +when bounded repair cannot produce a valid tree. + +## Artifact-aware integration + +KTX durable artifacts are structured enough that git-only merge is not a strong +correctness boundary. Artifact-aware integration must parse and validate known +file classes after diffs are applied. + +The first implementation must cover these worktree file classes: + +- semantic-layer source YAML; +- wiki markdown frontmatter; +- wiki body references to semantic-layer sources, measures, dimensions, and raw + warehouse tables. + +Unmapped fallback records are not worktree files in version one. They remain +typed stage-index and report records emitted by `emit_unmapped_fallback`; the +integration layer validates their raw paths and structured reason codes as +report metadata, not as mergeable artifacts. + +Provenance also stays out of the worktree in version one. The source of truth is +the ingest provenance store and report body. Before inserting provenance rows, +the global gate derives the planned rows from accepted work-unit actions, +reconciliation actions, artifact-resolution records, and skipped raw files, then +checks those rows against the integrated worktree and staged raw hashes. Moving +provenance to on-disk files would be a separate schema migration, not part of +this design. + +Artifact-resolution records are the existing merged or subsumed reconciliation +outputs emitted through `emit_artifact_resolution` as +`ArtifactResolutionRecord` stage-index records. They are in-memory stage +records, not worktree files, and they feed the provenance gate. + +Artifact-aware integration starts with validation plus bounded agent repair. +It does not need semantic-layer YAML merge helpers in version one. If two diffs +contest the same source YAML or wiki page and bounded agent repair cannot prove +correctness, the runner must stop rather than silently accepting stale +references. Deterministic semantic merge helpers can be added after v1 metrics +show which conflicts are frequent, mechanical, and worth optimizing. + +## Global semantic gates + +Final gates run after every accepted diff, deterministic projection, and +reconciliation change has landed in the integration worktree. These gates are +global because the final failure can emerge only after independent valid diffs +combine. + +The final gates must include: + +- semantic-layer validation for touched and dependency sources; +- wiki `wiki_refs` validation; +- wiki frontmatter `sl_refs` validation, including source-level and + measure-level references; +- wiki body validation for explicit semantic-layer source, measure, dimension, + and table references; and +- provenance validation for raw paths referenced by new or changed artifacts + before those rows are inserted into SQLite. + +For semantic-layer validation, touched sources are sources changed by accepted +work-unit diffs, deterministic projection, or reconciliation. Dependency sources +are their direct declared-join neighbors in the composed semantic-layer graph, +including sources they join to and sources that join to them. Version one runs +the existing whole-connection structural checks and source-scoped checks with +the touched-and-dependency source set; it does not expand dependency scope to a +transitive SQL-projection closure. + +The wiki body gate needs a narrow grammar so ordinary prose does not become a +semantic-layer reference. In version one, an explicit body reference is one of +these Markdown forms outside fenced code blocks: + +- an inline code token in the form `source.entity`, where both parts are plain + identifier tokens, `source` matches a visible semantic-layer source, and + `entity` must match one of that source's measures, dimensions, or segments; +- an inline code token in the form `connectionId/source.entity`, where + `source.entity` follows the same plain-identifier rule and validates against + that specific target connection; +- an inline code token in the form `source:source_name`, which validates a + source-level semantic-layer reference; or +- an inline code token in the form `table:qualified_table_name`, which validates + a raw warehouse table reference against the visible raw table/catalog sources. + +The parser ignores unformatted prose, fenced SQL examples, wildcard patterns +such as `mart_nrr_quarterly.*_arr_cents`, inline SQL predicates such as +`users.is_internal = false`, and unprefixed single-token inline code. Two-part +inline code that does not name a visible semantic-layer source is not treated +as an SL entity reference; use the `table:` prefix for raw warehouse table +references. + +The `total_contract_arr_cents` incident is the regression case for this gate: +the integrated tree must fail if a wiki page references +`mart_account_segments.total_contract_arr_cents` as an inline-code body token +while the final semantic-layer source defines only `total_contract_arr`. + +## Deterministic projection + +Some connectors have authoritative structured inputs that do not need an LLM to +write KTX artifacts. Those connectors can provide deterministic projectors that +run in the integration worktree. + +Projection is different from work-unit execution: + +- projectors are code, not agents; +- projectors run against the integration worktree; +- projectors produce ordinary durable file changes; and +- projector outputs still pass final global gates. + +The runner infers hybrid behavior from the adapter. If an adapter has both +projectors and work units, it is hybrid. If it has only projectors, it is +deterministic. If it has only work units, it uses isolated diffs. No public +`executionMode` knob is needed. + +## Connector migration notes + +Each connector keeps its source-shaped planning logic. The migration changes +where durable writes happen and how they are integrated. + +### Metabase + +Metabase must move first because it produced the observed stale-measure wiki +reference. Collection and card chunking can remain adapter-specific, but direct +wiki and semantic-layer writes must happen in per-work-unit worktrees. + +The regression test must reproduce two work units that touch +`mart_account_segments`: one writes a wiki reference to an inferred measure and +another leaves the final source with a different measure name. The final global +gate must reject the integrated tree. + +### dbt + +dbt uses source-shaped planning by model or schema file. Deterministic +projection is appropriate for straightforward model, source, column, and +description facts when dbt artifacts are authoritative. Agent work units remain +useful for business wiki synthesis, ambiguous relationship interpretation, and +enrichment that is not directly represented in dbt YAML. + +### MetricFlow + +MetricFlow uses source-shaped planning by graph component. Existing +deterministic semantic-model import code becomes a projector in the ingestion +flow. Agent work units handle unsupported constructs, cross-model explanations, +and wiki synthesis. + +### Looker + +Looker already defers some dashboard and look knowledge through candidates. +That can continue. Any direct semantic-layer writes from explores or query +translation must run through isolated work-unit diffs. + +Looker-specific API and file-adapter collisions remain connector domain logic, +but final correctness still belongs to the shared integration gates. + +### LookML + +LookML already has useful source-shaped ownership rules: models, views, orphan +views, dashboards, and connection-mismatch guards. Those rules stay in the +adapter. Direct semantic-layer writes move into isolated work-unit diffs. + +Connection-mismatch work units can keep their existing write restrictions. The +runner enforces those restrictions through scoped tools and target connection +resolution. + +### Notion + +Notion pages and clusters can create overlapping durable wiki knowledge and can +write semantic-layer overlays after warehouse verification. Notion therefore +uses the same isolated-diff execution model for direct durable writes. + +Large Notion workspaces still need source-shaped clustering to control context +size and cost. Clustering remains adapter logic; correctness comes from isolated +diffs and final global gates. + +## Minimal connector variance + +New connectors must not choose from a menu of ingestion architectures. They +must provide the small amount of source-specific behavior the shared runner +needs. + +Every connector answers these questions: + +- How does KTX fetch or receive raw evidence? +- How does KTX normalize that evidence into staged files? +- How does KTX split the staged evidence into `WorkUnit[]`? +- Are any source facts authoritative enough for deterministic projection? +- Which target semantic-layer connections can the connector write to? + +Everything else is shared runner behavior. + +## Regression tests + +The implementation plan must start with narrow tests that prove the new +execution model prevents the known failure class. + +The first test creates a fake or Metabase-like adapter with two work units +starting from the same base: + +1. Work unit A writes a wiki page that references + `mart_account_segments.total_contract_arr_cents` as an inline-code body + token. +2. Work unit B writes or overwrites the final semantic-layer source with only + `total_contract_arr`. +3. Both work units pass their local gates in isolation. +4. Integration applies both diffs. +5. The final global gate fails the run before squash. + +Additional tests cover: + +- two work units editing different wiki pages without conflict; +- two work units editing the same semantic-layer overlay with additive changes, + where the resolver agent preserves both changes and gates the repaired file; +- two work units editing the same semantic-layer overlay with incompatible + definitions, where the resolver agent receives the conflict context and the + run fails only after bounded repair attempts cannot prove a result; +- a textual conflict in a wiki page where the resolver agent preserves + non-conflicting accepted content and gates the repaired page before squash; +- a cleanly merged tree that fails final gates, where the gate repair agent + fixes a stale wiki reference and the run continues; +- an unrepairable final-gate failure, such as a missing warehouse column, where + the runner stops with a preserved integration worktree and report; +- a hybrid adapter case where deterministic projector outputs are visible in a + child worktree before work-unit wiki synthesis, and the final global gate + catches any stale reference to a non-existent projected semantic-layer entity; +- Notion-style direct wiki writes with invalid `sl_refs`; and +- LookML-style `slDisallowed` work units where write tools are unavailable and + integration rejects any diff that still touches `semantic-layer/**`. + +## Rollout + +The rollout must be incremental because the current runner is shared by all +adapters. + +The rollout switch is runner-owned. During migration it may be a private +per-source allowlist, or an internal `IngestSettingsPort` map keyed by +`sourceKey`, but it must not become a `SourceAdapter` field or public connector +configuration knob. + +1. Add the per-work-unit worktree executor behind that internal runner setting. +2. Add diff collection and deterministic integration in the existing runner. +3. Add bounded resolver-agent handling for textual conflicts. +4. Add final global wiki and semantic-layer reference gates, including the wiki + body reference parser defined above. +5. Add bounded gate-repair-agent handling for repairable final-gate failures. +6. Instrument resolver latency, attempts, repaired files, and failure classes. +7. Migrate Metabase to the new execution path first. +8. Migrate Notion, LookML, Looker, dbt, and MetricFlow. +9. Add deterministic semantic merge helpers only after v1 metrics show which + agent repairs are frequent and mechanical enough to justify optimization. +10. Promote the new path to the default after the Metabase regression test and + at least one non-Metabase connector pass. +11. Remove the old shared-worktree work-unit execution path. + +The rollout is complete when every connector that permits agent-authored durable +writes uses isolated diffs and all integrations pass the same final global +gates. diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index aed006c6..ab7c717b 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -635,6 +635,117 @@ describe('runKtxIngest', () => { expect(io.stderr()).not.toContain('Metabase ingest: prod-metabase'); }); + it('emits structured child ingest progress during Metabase fan-out', async () => { + const projectDir = join(tempDir, 'project'); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + const progressEvents: Array<{ percent: number; message: string; transient?: boolean }> = []; + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'json', + }, + io.io, + { + progress: (event) => progressEvents.push(event), + runLocalMetabaseIngest: async (input) => { + input.progress?.onMetabaseFanoutPlanned?.({ + metabaseConnectionId: 'prod-metabase', + children: [{ metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a' }], + }); + input.progress?.onMetabaseChildStarted?.({ + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + jobId: 'metabase-child-1', + }); + input.memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'metabase-col-6', + rawFiles: ['cards/40.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + input.memoryFlow?.emit({ + type: 'work_unit_started', + unitKey: 'metabase-col-6', + skills: ['sl_capture'], + stepBudget: 40, + }); + input.memoryFlow?.emit({ + type: 'work_unit_step', + unitKey: 'metabase-col-6', + stepIndex: 7, + stepBudget: 40, + }); + input.memoryFlow?.emit({ + type: 'stage_progress', + stage: 'integration', + percent: 81, + message: 'Resolving text conflict for metabase-col-6', + }); + input.memoryFlow?.emit({ type: 'work_unit_finished', unitKey: 'metabase-col-6', status: 'success' }); + input.memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'metabase-col-7', + rawFiles: ['cards/48.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + input.memoryFlow?.emit({ + type: 'work_unit_started', + unitKey: 'metabase-col-7', + skills: ['sl_capture'], + stepBudget: 40, + }); + input.progress?.onMetabaseChildCompleted?.({ + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + jobId: 'metabase-child-1', + status: 'done', + }); + return { + metabaseConnectionId: 'prod-metabase', + status: 'all_succeeded', + totals: { workUnits: 1, failedWorkUnits: 0 }, + children: [], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(progressEvents).toEqual( + expect.arrayContaining([ + { percent: 45, message: 'Planned 1 task' }, + { percent: 55, message: 'Processing 1/1 tasks: metabase-col-6' }, + { + percent: 60, + message: 'Processing tasks: 0/1 complete, 1 active; latest metabase-col-6 step 7/40', + transient: true, + }, + { percent: 81, message: 'Resolving text conflict for metabase-col-6' }, + { percent: 81, message: 'Processing 1/1 tasks: metabase-col-7' }, + ]), + ); + expect(io.stdout()).toContain('"status": "all_succeeded"'); + expect(io.stderr()).not.toContain('Metabase ingest: prod-metabase'); + }); + it('runs Metabase scheduled ingest through the public CLI command path with real fan-out', async () => { const projectDir = join(tempDir, 'metabase-cli-project'); await writeWarehouseConfig(projectDir); @@ -985,6 +1096,59 @@ describe('runKtxIngest', () => { expect(io.stdout()).toContain('Status: error\n'); }); + it('prints trace path and error status for stored failed ingest reports', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const io = makeIo(); + const report = { + id: 'report-failed', + runId: 'run-failed', + jobId: 'job-failed', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-17T12:00:00.000Z', + body: { + status: 'failed', + syncId: 'sync-failed', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + tracePath: '/project/.ktx/ingest-traces/job-failed/trace.jsonl', + failure: { phase: 'final_gates', message: 'final artifact gates failed' }, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }; + + await runKtxIngest( + { + command: 'status', + projectDir, + reportFile: '/project/report-failed.json', + runId: 'run-failed', + outputMode: 'plain', + inputMode: 'disabled', + }, + io.io, + { + readReportFile: vi.fn().mockResolvedValue(report), + }, + ); + + expect(io.stdout()).toContain('Trace: /project/.ktx/ingest-traces/job-failed/trace.jsonl'); + expect(io.stdout()).toContain('Status: error'); + expect(io.stdout()).toContain('Error: final artifact gates failed'); + }); + it('prints a clear first failure reason when query-history work units fail', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index deaa9d77..a45308f1 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -102,7 +102,7 @@ export interface KtxIngestDeps { } function reportStatus(report: IngestReportSnapshot): 'done' | 'error' { - return report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; + return report.body.status === 'failed' || report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; } const REPORT_SOURCE_LABELS = new Map([ @@ -174,6 +174,9 @@ function formatFailureReason(sourceKey: string, reason: string): string { } function failedReportMessage(report: IngestReportSnapshot): string | null { + if (report.body.status === 'failed' && report.body.failure?.message) { + return sanitizeMemoryFlowError(report.body.failure.message); + } const failedCount = report.body.failedWorkUnits.length; if (failedCount === 0) { return null; @@ -195,6 +198,9 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void io.stdout.write(`Report: ${report.id}\n`); io.stdout.write(`Run: ${report.runId}\n`); io.stdout.write(`Job: ${report.jobId}\n`); + if (report.body.tracePath) { + io.stdout.write(`Trace: ${report.body.tracePath}\n`); + } io.stdout.write(`Status: ${reportStatus(report)}\n`); io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`); io.stdout.write(`Connection: ${report.connectionId}\n`); @@ -289,7 +295,11 @@ function formatDiffProgress(event: Extract event.type === 'chunks_planned'); + const startIndex = latestPlanIndex >= 0 ? latestPlanIndex + 1 : 0; + return snapshot.events.slice(startIndex, eventIndex + 1); } function completedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number { @@ -313,7 +323,8 @@ function plannedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex if (snapshot.plannedWorkUnits.length > 0) { return snapshot.plannedWorkUnits.length; } - const planEvent = workUnitEventsThrough(snapshot, eventIndex) + const planEvent = snapshot.events + .slice(0, eventIndex + 1) .filter((event) => event.type === 'chunks_planned') .at(-1); return planEvent?.workUnitCount ?? completedWorkUnitCountThrough(snapshot, eventIndex); @@ -359,6 +370,12 @@ function plainIngestEventProgress( }; case 'stage_skipped': return { percent: 45, message: `Skipped ${event.stage}: ${event.reason}` }; + case 'stage_progress': + return { + percent: event.percent, + message: event.message, + ...(event.transient !== undefined ? { transient: event.transient } : {}), + }; case 'work_unit_started': { const total = plannedWorkUnitCountThrough(snapshot, eventIndex); const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey); @@ -705,6 +722,25 @@ export async function runKtxIngest( } if (args.adapter === 'metabase') { const executeMetabaseFanout = deps.runLocalMetabaseIngest ?? runLocalMetabaseIngest; + const runOutputMode = effectiveIngestOutputMode(args.outputMode, io, env, { + requireInput: (args.inputMode ?? 'auto') === 'auto', + }); + const plainProgress = shouldWritePlainIngestProgress(runOutputMode, io, env) + ? createPlainIngestProgressRenderer(args, io) + : null; + const structuredProgress = deps.progress + ? createPlainIngestProgressObserver(args, deps.progress) + : null; + const initialMemoryFlow = + plainProgress || structuredProgress ? initialRunMemoryFlowInput(args, 'pending') : undefined; + const memoryFlow = initialMemoryFlow + ? createMemoryFlowLiveBuffer(initialMemoryFlow, { + onChange: (snapshot) => { + plainProgress?.update(snapshot); + structuredProgress?.update(snapshot); + }, + }) + : undefined; const progress = args.outputMode === 'json' && !deps.progress ? undefined @@ -715,20 +751,29 @@ export async function runKtxIngest( : io, deps.progress, ); - const result = await executeMetabaseFanout({ - project: ingestProject, - adapters: createAdapters(ingestProject, adapterOptions), - metabaseConnectionId: args.connectionId, - ...localIngestOptions, - queryExecutor, - trigger: 'manual_resync', - jobIdFactory: deps.jobIdFactory, - ...(progress ? { progress } : {}), - }); - if (args.outputMode === 'json') { - io.stdout.write(`${JSON.stringify(result, null, 2)}\n`); - } else { - writeMetabaseFanoutStatus(result, io); + plainProgress?.start(); + structuredProgress?.start(); + let result: LocalMetabaseFanoutResult; + try { + result = await executeMetabaseFanout({ + project: ingestProject, + adapters: createAdapters(ingestProject, adapterOptions), + metabaseConnectionId: args.connectionId, + ...localIngestOptions, + queryExecutor, + trigger: 'manual_resync', + jobIdFactory: deps.jobIdFactory, + ...(memoryFlow ? { memoryFlow } : {}), + ...(progress ? { progress } : {}), + }); + plainProgress?.flush(); + if (args.outputMode === 'json') { + io.stdout.write(`${JSON.stringify(result, null, 2)}\n`); + } else { + writeMetabaseFanoutStatus(result, io); + } + } finally { + plainProgress?.flush(); } return result.status === 'all_succeeded' ? 0 : 1; } diff --git a/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md b/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md index 3821537d..a0952293 100644 --- a/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md +++ b/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md @@ -1,5 +1,12 @@ -You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, Notion pages, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and discoverable with `discover_data`. +You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit +gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, +Metabase card JSONs, Notion pages, or similar) and you must translate that +slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. +You run in an isolated WorkUnit worktree. Deterministic projection output, +existing project memory, and listed dependency paths are visible; sibling +WorkUnit edits from this same job are not visible until the runner integrates +accepted patches. @@ -8,9 +15,19 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing 1. Read this WorkUnit's section at the end of the user prompt. It lists your `rawFiles`, any unchanged `dependencyPaths` you may need to resolve references, the `peerFileIndex` (paths only; you CANNOT read them), the source's `skillNames`, and any `priorProvenance` rows telling you what earlier syncs produced from these files. -2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `wiki_capture`, and `ingest_triage` last. The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping. +2. Load the per-source review skill first (for example `lookml_ingest`, + `metricflow_ingest`, or `dbt_ingest`), then `sl_capture` and + `wiki_capture`, and `ingest_triage` last. The triage skill tells you how to + react when existing project memory, deterministic projection output, or + prior provenance overlaps with what this WorkUnit is about to write. 3. If the system prompt includes ``, read those pins before choosing artifact keys. A pin's `canonicalArtifactKey` is the preferred artifact for its `contestedKey`: prefer editing the pinned canonical artifact when it already exists or when this raw file clearly updates it. Do not create a duplicate contested artifact when a pin says another artifact is canonical; use a specific disambiguated key only when the raw file describes a genuinely different domain. -4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `discover_data` for each candidate source, table, metric, or topic name to find prior-WU writes, existing wiki pages, SL sources, and raw warehouse matches; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip. +4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large + files) to load content. Before writing a new SL source or wiki page, call + `discover_data` for each candidate source, table, metric, or topic name to + find existing wiki pages, SL sources, deterministic projection output, prior + sync artifacts, and raw warehouse matches; apply `ingest_triage` when you hit + one, and apply any matching canonical pin before deciding whether to edit, + rename, or skip. 5. For every `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call, include `rawPaths` with only the raw file paths that directly support that action. If one artifact synthesizes several files, list each contributing raw file. Do not include unrelated files from the same WorkUnit. 6. When `priorProvenance` names an existing artifact for one of your raw files, prefer `sl_edit` over `sl_write` for that artifact: the re-ingest change rule says expression-only changes replace silently, grain/column/filter changes replace and flag. 7. When a raw file cannot map to normal SL and you use a fallback path, call `emit_unmapped_fallback` exactly once for that raw file and reason. Use `fallback: "sql_standalone"` for a standalone SQL source, `fallback: "wiki_only"` for documentation-only capture, and `fallback: "flagged"` when no reliable artifact can be written. @@ -28,5 +45,7 @@ Wiki keys must be flat slugs like `paid-order-lifecycle`, not directory paths li - Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`discover_data`, `sl_discover`, `entity_details`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source. - Do not write context-source overlays into the context source connection just because that is the current WorkUnit connection. Use `sl_discover` across data sources and write the SL artifact to the warehouse/data-source connection that owns the matching manifest. If there is no confirmed target connection, use `emit_unmapped_fallback` and wiki capture. - Do not duplicate an artifact that prior provenance says you already produced; update it. -- Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`. +- Do not silently accept a name collision with visible existing memory, + deterministic projection output, or prior provenance when the formula differs. + Trigger `ingest_triage`. diff --git a/packages/context/skills/ingest_triage/SKILL.md b/packages/context/skills/ingest_triage/SKILL.md index 77872e75..86727ec3 100644 --- a/packages/context/skills/ingest_triage/SKILL.md +++ b/packages/context/skills/ingest_triage/SKILL.md @@ -7,8 +7,11 @@ callers: [memory_agent] # Ingest Triage - conflict classification and resolution This skill is loaded in two contexts: -- By a Stage 3 WorkUnit agent when `sl_discover` reveals that a prior WU (or a prior sync) already wrote something that overlaps with what the current WU is about to write. -- By the Stage 4 reconciliation agent for cross-WU sweeps and for eviction decisions. +- By a Stage 3 WorkUnit agent when `sl_discover`, deterministic projection + output, existing project memory, or prior provenance overlaps with what the + current WorkUnit is about to write. +- By the Stage 4 reconciliation agent for cross-WorkUnit sweeps, accepted patch + overlap, and eviction decisions. Apply the rules below before every write that could collide with an existing artifact. @@ -23,7 +26,8 @@ Apply the rules below before every write that could collide with an existing art 3. **If the difference is structural - grain, columns, filter, join shape - is the current bundle the re-ingest of a previously-ingested bundle (i.e. `priorProvenance` has a row for this raw file and artifact)?** Re-ingest change (semantic break): replace + flag. Record in the IngestReport's `conflicts_resolved` list with `flagged_for_human: true`. -4. **If there's no prior-sync row (both are from THIS job), check for same-ingest contradictions:** +4. **If reconciliation sees accepted patches from this same job with no +prior-sync row, check for same-ingest contradictions:** | Kind | Detection | Resolution | |---|---|---| diff --git a/packages/context/src/core/git.service.patch.test.ts b/packages/context/src/core/git.service.patch.test.ts new file mode 100644 index 00000000..de1ccb9f --- /dev/null +++ b/packages/context/src/core/git.service.patch.test.ts @@ -0,0 +1,45 @@ +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { GitService } from './git.service.js'; + +async function makeGit() { + const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-')); + const configDir = join(homeDir, 'config'); + const git = new GitService({ + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'init', + bootstrapAuthor: 'system', + bootstrapAuthorEmail: 'system@example.com', + }, + }); + await git.onModuleInit(); + return { homeDir, configDir, git }; +} + +describe('GitService patch helpers', () => { + it('collects binary-safe no-rename patches and applies them with --3way --index', async () => { + const { homeDir, configDir, git } = await makeGit(); + await mkdir(join(configDir, 'wiki/global'), { recursive: true }); + await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n'); + await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com'); + const base = await git.revParseHead(); + + await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n'); + await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'proposal.patch'); + await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath); + + const targetDir = join(homeDir, 'target'); + await git.addWorktree(targetDir, 'target', base); + const targetGit = git.forWorktree(targetDir); + await targetGit.applyPatchFile3WayIndex(patchPath); + await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com'); + + await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n'); + }); +}); diff --git a/packages/context/src/core/git.service.ts b/packages/context/src/core/git.service.ts index 7db4863b..a3e0c133 100644 --- a/packages/context/src/core/git.service.ts +++ b/packages/context/src/core/git.service.ts @@ -1,5 +1,5 @@ import { promises as fs } from 'node:fs'; -import { join } from 'node:path'; +import { dirname, join } from 'node:path'; import type { SimpleGit } from 'simple-git'; import { noopLogger, resolveConfigDir, type KtxCoreConfig, type KtxLogger } from './config.js'; import { createSimpleGit } from './git-env.js'; @@ -747,6 +747,55 @@ export class GitService { } } + async writeBinaryNoRenamePatch(from: string, to: string, patchPath: string): Promise { + await this.withMutationQueue(async () => { + const patch = await this.git.raw(['diff', '--binary', '--no-renames', `${from}..${to}`]); + await fs.mkdir(dirname(patchPath), { recursive: true }); + await fs.writeFile(patchPath, patch, 'utf-8'); + }); + } + + async applyPatchFile3WayIndex(patchPath: string): Promise { + await this.withMutationQueue(async () => { + await this.git.raw(['apply', '--3way', '--index', patchPath]); + }); + } + + async commitStaged(commitMessage: string, author: string, authorEmail: string): Promise { + return this.withMutationQueue(async () => { + const stagedChanges = await this.git.diff(['--cached', '--name-only']); + if (!stagedChanges.trim()) { + const head = (await this.git.revparse(['HEAD'])).trim(); + const log = await this.git.log({ maxCount: 1 }); + const latest = log.latest; + return { + commitHash: head, + shortHash: head.substring(0, 8), + message: latest?.message ?? '', + author: latest?.author_name ?? '', + authorEmail: latest?.author_email ?? '', + timestamp: latest?.date ?? new Date(0).toISOString(), + committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date(0).toISOString(), + created: false, + }; + } + await this.git.commit(commitMessage, { '--author': `${author} <${authorEmail}>` }); + const head = (await this.git.revparse(['HEAD'])).trim(); + const log = await this.git.log({ maxCount: 1 }); + const latest = log.latest; + return { + commitHash: head, + shortHash: head.substring(0, 8), + message: latest?.message ?? commitMessage, + author: latest?.author_name ?? author, + authorEmail: latest?.author_email ?? authorEmail, + timestamp: latest?.date ?? new Date().toISOString(), + committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date().toISOString(), + created: true, + }; + }); + } + private async fileExists(path: string): Promise { try { await fs.access(path); diff --git a/packages/context/src/ingest/adapters/metabase/fetch.test.ts b/packages/context/src/ingest/adapters/metabase/fetch.test.ts index 7e7e4e4a..1f93765e 100644 --- a/packages/context/src/ingest/adapters/metabase/fetch.test.ts +++ b/packages/context/src/ingest/adapters/metabase/fetch.test.ts @@ -138,6 +138,52 @@ describe('fetchMetabaseBundle', () => { expect(warn).not.toHaveBeenCalled(); }); + it('emits memory-flow progress while fetching Metabase cards', async () => { + const events: unknown[] = []; + + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: { + ...makeFetchContext(), + memoryFlow: { + emit: (event) => events.push(event), + update: vi.fn(), + finish: vi.fn(), + snapshot: vi.fn(), + }, + }, + clientFactory, + sourceStateReader, + }); + + expect(events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: 'stage_progress', + stage: 'source', + message: 'Fetching Metabase database 42 metadata', + }), + expect.objectContaining({ + type: 'stage_progress', + stage: 'source', + message: 'Fetching 1 Metabase card for database 42', + }), + expect.objectContaining({ + type: 'stage_progress', + stage: 'source', + message: 'Checked 1/1 Metabase cards for database 42; wrote 1', + transient: true, + }), + expect.objectContaining({ + type: 'stage_progress', + stage: 'source', + message: 'Fetched Metabase database 42: 1 cards, 0 unresolved', + }), + ]), + ); + }); + it('routes Metabase fetch warnings through the injected logger', async () => { const logger = { log: vi.fn(), diff --git a/packages/context/src/ingest/adapters/metabase/fetch.ts b/packages/context/src/ingest/adapters/metabase/fetch.ts index d4e8b59b..c67dc2a7 100644 --- a/packages/context/src/ingest/adapters/metabase/fetch.ts +++ b/packages/context/src/ingest/adapters/metabase/fetch.ts @@ -83,6 +83,15 @@ function resolvePath(index: Map, collectionId: export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Promise { const pullConfig: MetabasePullConfig = parseMetabasePullConfig(params.pullConfig); const logger = params.logger ?? noopMetabaseFetchLogger; + const emitFetchProgress = (percent: number, message: string, transient = false): void => { + params.ctx.memoryFlow?.emit({ + type: 'stage_progress', + stage: 'source', + percent, + message, + ...(transient ? { transient } : {}), + }); + }; const syncState = await params.sourceStateReader.getSourceState(pullConfig.metabaseConnectionId); const mapping = syncState.mappings.find( (m) => m.metabaseDatabaseId === pullConfig.metabaseDatabaseId && m.syncEnabled, @@ -100,6 +109,7 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr const client = await params.clientFactory.createClient(pullConfig, params.ctx); try { + emitFetchProgress(26, `Fetching Metabase database ${pullConfig.metabaseDatabaseId} metadata`); let mappingDatabaseName = mapping.metabaseDatabaseName; let mappingEngine = mapping.metabaseEngine; if (mappingDatabaseName === null) { @@ -133,6 +143,12 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr await mkdir(join(params.stagedDir, STAGED_FILES.databasesDir), { recursive: true }); const cardIdsToFetch = await resolveCardIdsToFetch(client, scope, pullConfig.metabaseDatabaseId, logger); + emitFetchProgress( + 28, + `Fetching ${cardIdsToFetch.length} Metabase card${cardIdsToFetch.length === 1 ? '' : 's'} for database ${ + pullConfig.metabaseDatabaseId + }`, + ); const referencedCollectionIds = new Set(); let writtenCards = 0; @@ -212,7 +228,19 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr } } } + const knownTotal = Math.max(cardIdsToFetch.length, fetched.size + queue.length); + if (fetched.size === 1 || fetched.size % 10 === 0 || queue.length === 0) { + emitFetchProgress( + 30, + `Checked ${fetched.size}/${knownTotal} Metabase cards for database ${pullConfig.metabaseDatabaseId}; wrote ${writtenCards}`, + true, + ); + } } + emitFetchProgress( + 32, + `Fetched Metabase database ${pullConfig.metabaseDatabaseId}: ${writtenCards} cards, ${unresolvedCards.length} unresolved`, + ); for (const colId of referencedCollectionIds) { const node = collectionIndex.get(colId); diff --git a/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts index 19bb6cdc..232624a5 100644 --- a/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts +++ b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts @@ -1,10 +1,12 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js'; import type { SourceAdapter } from '../../types.js'; +import type { MetricFlowParseResult } from './deep-parse.js'; import { MetricflowSourceAdapter } from './metricflow.adapter.js'; +import { readMetricflowProjectionConfig, writeMetricflowProjectionConfig } from './projection-config.js'; function compileOnlyRequiredDepsCheck(): void { // @ts-expect-error MetricflowSourceAdapter requires an explicit cache home. @@ -22,6 +24,25 @@ async function makeRepo(tmpRoot: string, files: Record) { return makeLocalGitRepo(fixtureDir, join(tmpRoot, 'origin')); } +function metricflowParseResult(): MetricFlowParseResult { + return { + semanticModels: [ + { + name: 'orders', + description: 'Orders', + modelRef: 'orders', + dimensions: [{ name: 'status', column: 'status', type: 'string', label: 'Status' }], + measures: [{ type: 'simple', name: 'order_count', column: 'id', aggregation: 'count' }], + entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }], + defaultTimeDimension: null, + }, + ], + crossModelMetrics: [], + relationships: [], + warnings: ['parser warning'], + }; +} + describe('MetricflowSourceAdapter', () => { let tmpRoot: string; let stagedDir: string; @@ -127,4 +148,119 @@ describe('MetricflowSourceAdapter', () => { await expect(readFile(join(stagedDir, 'models/orders.yml'), 'utf-8')).resolves.toContain('semantic_models'); expect(await adapter.detect(stagedDir)).toBe(true); }); + + it('persists parsed target tables for deterministic projection during fetch', async () => { + const repo = await makeRepo(tmpRoot, { + 'dbt_project.yml': 'name: analytics\n', + 'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n', + }); + + await adapter.fetch?.( + { + repoUrl: repo.repoUrl, + branch: 'main', + path: null, + authToken: null, + parsedTargetTables: { + orders: { + ok: true, + catalog: null, + schema: 'analytics', + name: 'orders', + canonicalTable: 'analytics.orders', + }, + }, + }, + stagedDir, + { connectionId: 'warehouse-1', sourceKey: 'metricflow' }, + ); + + await expect(readMetricflowProjectionConfig(stagedDir)).resolves.toMatchObject({ + parsedTargetTables: { + orders: { + ok: true, + schema: 'analytics', + name: 'orders', + }, + }, + }); + }); + + it('projects parsed MetricFlow semantic models in the integration worktree', async () => { + await writeMetricflowProjectionConfig(stagedDir, { + parsedTargetTables: { + orders: { + ok: true, + catalog: null, + schema: 'analytics', + name: 'orders', + canonicalTable: 'analytics.orders', + }, + }, + }); + const scoped = { + getManifestEntry: vi.fn().mockResolvedValue(null), + isManifestBacked: vi.fn().mockResolvedValue(false), + loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }), + loadSource: vi.fn().mockResolvedValue(null), + writeSource: vi.fn().mockResolvedValue({ warnings: [] }), + }; + const semanticLayerService = { + forWorktree: vi.fn().mockReturnValue(scoped), + getManifestEntry: vi.fn(), + isManifestBacked: vi.fn(), + loadAllSources: vi.fn(), + loadSource: vi.fn(), + writeSource: vi.fn(), + }; + + const result = await adapter.project?.({ + connectionId: 'warehouse-1', + sourceKey: 'metricflow', + syncId: 'sync-1', + jobId: 'job-1', + runId: 'run-1', + stagedDir, + workdir: '/tmp/metricflow-integration', + parseArtifacts: metricflowParseResult(), + semanticLayerService: semanticLayerService as never, + }); + + expect(semanticLayerService.forWorktree).toHaveBeenCalledWith('/tmp/metricflow-integration'); + expect(scoped.writeSource).toHaveBeenCalledWith( + 'warehouse-1', + expect.objectContaining({ name: 'orders' }), + 'dbt MetricFlow', + expect.any(String), + 'dbt MetricFlow sync: create source orders', + { skipValidation: true }, + ); + expect(result).toMatchObject({ + warnings: ['parser warning'], + errors: [], + touchedSources: [{ connectionId: 'warehouse-1', sourceName: 'orders' }], + changedWikiPageKeys: [], + }); + }); + + it('returns a projection error when parse artifacts are missing', async () => { + const result = await adapter.project?.({ + connectionId: 'warehouse-1', + sourceKey: 'metricflow', + syncId: 'sync-1', + jobId: 'job-1', + runId: 'run-1', + stagedDir, + workdir: '/tmp/metricflow-integration', + parseArtifacts: undefined, + semanticLayerService: {} as never, + }); + + expect(result).toMatchObject({ + warnings: [], + errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'], + touchedSources: [], + changedWikiPageKeys: [], + }); + }); }); diff --git a/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts index c8182ed8..8aae1df7 100644 --- a/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts +++ b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts @@ -1,10 +1,23 @@ import { join } from 'node:path'; -import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js'; +import type { + ChunkResult, + DeterministicProjectionContext, + DiffSet, + FetchContext, + ProjectionResult, + SourceAdapter, +} from '../../types.js'; import { chunkMetricFlowProject } from './chunk.js'; import { detectMetricFlowStagedDir } from './detect.js'; import { parseMetricflowFiles, type MetricFlowParseResult } from './deep-parse.js'; import { fetchMetricflowRepo } from './fetch.js'; +import { importMetricflowSemanticModels } from './import-semantic-models.js'; import { parseMetricFlowStagedDir, type ParsedMetricFlowProject } from './parse.js'; +import { + metricflowHostTablesFromParsedTargets, + readMetricflowProjectionConfig, + writeMetricflowProjectionConfig, +} from './projection-config.js'; import { parseMetricflowPullConfig } from './pull-config.js'; export interface MetricflowSourceAdapterDeps { @@ -33,6 +46,9 @@ export class MetricflowSourceAdapter implements SourceAdapter { cacheDir: this.resolveCacheDir(ctx.connectionId), stagedDir, }); + await writeMetricflowProjectionConfig(stagedDir, { + parsedTargetTables: config.parsedTargetTables, + }); } async listTargetConnectionIds(_stagedDir: string): Promise { @@ -46,6 +62,37 @@ export class MetricflowSourceAdapter implements SourceAdapter { return { ...chunk, parseArtifacts }; } + async project(ctx: DeterministicProjectionContext): Promise { + if (!isMetricFlowParseResult(ctx.parseArtifacts)) { + return { + warnings: [], + errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'], + touchedSources: [], + changedWikiPageKeys: [], + }; + } + + const projectionConfig = await readMetricflowProjectionConfig(ctx.stagedDir); + const result = await importMetricflowSemanticModels( + { semanticLayerService: ctx.semanticLayerService }, + { + connectionId: ctx.connectionId, + parseResult: ctx.parseArtifacts, + targetSchema: null, + hostTables: metricflowHostTablesFromParsedTargets(projectionConfig.parsedTargetTables), + workdir: ctx.workdir, + }, + ); + + return { + result, + warnings: result.warnings, + errors: result.errors, + touchedSources: result.touchedSources, + changedWikiPageKeys: [], + }; + } + private resolveCacheDir(connectionId: string): string { return join(this.deps.homeDir, 'ingest-metricflow-repos', connectionId); } @@ -54,3 +101,16 @@ export class MetricflowSourceAdapter implements SourceAdapter { function parseMetricflowStagedDirForImport(project: ParsedMetricFlowProject): MetricFlowParseResult { return parseMetricflowFiles(project.files); } + +function isMetricFlowParseResult(value: unknown): value is MetricFlowParseResult { + if (!value || typeof value !== 'object') { + return false; + } + const candidate = value as Partial; + return ( + Array.isArray(candidate.semanticModels) && + Array.isArray(candidate.crossModelMetrics) && + Array.isArray(candidate.relationships) && + Array.isArray(candidate.warnings) + ); +} diff --git a/packages/context/src/ingest/adapters/metricflow/projection-config.ts b/packages/context/src/ingest/adapters/metricflow/projection-config.ts new file mode 100644 index 00000000..2a61fb15 --- /dev/null +++ b/packages/context/src/ingest/adapters/metricflow/projection-config.ts @@ -0,0 +1,54 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { z } from 'zod'; +import { parsedTargetTableSchema, type ParsedTargetTable } from '../../parsed-target-table.js'; +import type { MetricflowHostTable } from './semantic-models.js'; + +const METRICFLOW_PROJECTION_CONFIG_FILE = 'sync-config.json'; + +const metricflowProjectionConfigSchema = z.object({ + parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}), +}); + +export type MetricflowProjectionConfig = z.infer; + +export async function writeMetricflowProjectionConfig( + stagedDir: string, + config: MetricflowProjectionConfig, +): Promise { + const parsed = metricflowProjectionConfigSchema.parse(config); + await mkdir(stagedDir, { recursive: true }); + await writeFile(join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE), `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8'); +} + +export async function readMetricflowProjectionConfig(stagedDir: string): Promise { + const path = join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE); + try { + return metricflowProjectionConfigSchema.parse(JSON.parse(await readFile(path, 'utf-8'))); + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return { parsedTargetTables: {} }; + } + throw error; + } +} + +export function metricflowHostTablesFromParsedTargets( + parsedTargetTables: Record, +): MetricflowHostTable[] { + return Object.entries(parsedTargetTables) + .flatMap(([id, table]) => + table.ok + ? [ + { + id, + name: table.name, + catalog: table.catalog, + db: table.schema, + columns: [], + }, + ] + : [], + ) + .sort((left, right) => left.id.localeCompare(right.id)); +} diff --git a/packages/context/src/ingest/artifact-gates.test.ts b/packages/context/src/ingest/artifact-gates.test.ts new file mode 100644 index 00000000..cc786409 --- /dev/null +++ b/packages/context/src/ingest/artifact-gates.test.ts @@ -0,0 +1,190 @@ +import { describe, expect, it, vi } from 'vitest'; +import { validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js'; + +function wikiServiceWithPages( + pages: Record, +) { + return { + listPageKeys: vi.fn().mockResolvedValue(Object.keys(pages)), + readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, pageKey: string) => { + const page = pages[pageKey]; + if (!page) { + return Promise.resolve(null); + } + return Promise.resolve({ + pageKey, + frontmatter: { + summary: pageKey, + usage_mode: 'auto', + refs: page.refs, + sl_refs: page.slRefs, + }, + content: page.content ?? '', + }); + }), + }; +} + +describe('artifact gates', () => { + it('fails the final tree when wiki body references a stale semantic-layer measure', async () => { + const wikiService = wikiServiceWithPages({ + 'account-segments': { + slRefs: ['mart_account_segments'], + content: 'ARR is `mart_account_segments.total_contract_arr_cents`.', + }, + }); + const semanticLayerService = { + loadAllSources: vi.fn().mockResolvedValue({ + sources: [ + { + name: 'mart_account_segments', + grain: ['account_id'], + columns: [{ name: 'account_id', type: 'string' }], + joins: [], + measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }], + table: 'analytics.mart_account_segments', + }, + ], + loadErrors: [], + }), + }; + + await expect( + validateFinalIngestArtifacts({ + connectionIds: ['warehouse'], + changedWikiPageKeys: ['account-segments'], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }], + wikiService: wikiService as never, + semanticLayerService: semanticLayerService as never, + validateTouchedSources: async () => ({ invalidSources: [], validSources: ['mart_account_segments'] }), + tableExists: async () => true, + }), + ).rejects.toThrow(/unknown semantic-layer entity mart_account_segments\.total_contract_arr_cents/); + }); + + it('fails before provenance insertion when a raw path cannot be tied to the current snapshot or eviction set', () => { + expect(() => + validateProvenanceRawPaths({ + rows: [{ rawPath: 'cards/missing.json' }], + currentRawPaths: new Set(['cards/present.json']), + deletedRawPaths: new Set(['cards/deleted.json']), + }), + ).toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/); + }); + + it('fails measure-level wiki frontmatter sl_refs that point at missing entities', async () => { + const wikiService = wikiServiceWithPages({ + 'account-segments': { + slRefs: ['mart_account_segments.total_contract_arr_cents'], + content: 'ARR uses a renamed measure.', + }, + }); + const semanticLayerService = { + loadAllSources: vi.fn().mockResolvedValue({ + sources: [ + { + name: 'mart_account_segments', + grain: ['account_id'], + columns: [{ name: 'account_id', type: 'string' }], + joins: [], + measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }], + table: 'analytics.mart_account_segments', + }, + ], + loadErrors: [], + }), + }; + + await expect( + validateFinalIngestArtifacts({ + connectionIds: ['warehouse'], + changedWikiPageKeys: ['account-segments'], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }], + wikiService: wikiService as never, + semanticLayerService: semanticLayerService as never, + validateTouchedSources: async () => ({ invalidSources: [], validSources: ['warehouse:mart_account_segments'] }), + tableExists: async () => true, + }), + ).rejects.toThrow(/unknown sl_refs entity mart_account_segments\.total_contract_arr_cents/); + }); + + it('validates direct declared-join neighbors of touched semantic-layer sources', async () => { + const semanticLayerService = { + loadAllSources: vi.fn().mockResolvedValue({ + sources: [ + { + name: 'orders', + grain: ['order_id'], + columns: [ + { name: 'order_id', type: 'string' }, + { name: 'account_id', type: 'string' }, + ], + joins: [{ to: 'accounts', on: 'orders.account_id = accounts.account_id', relationship: 'many_to_one' }], + measures: [{ name: 'order_count', expr: 'count(*)' }], + }, + { + name: 'accounts', + grain: ['account_id'], + columns: [{ name: 'account_id', type: 'string' }], + joins: [], + measures: [{ name: 'account_count', expr: 'count(*)' }], + }, + { + name: 'segments', + grain: ['segment_id'], + columns: [ + { name: 'segment_id', type: 'string' }, + { name: 'account_id', type: 'string' }, + ], + joins: [{ to: 'accounts', on: 'segments.account_id = accounts.account_id', relationship: 'many_to_one' }], + measures: [], + }, + ], + loadErrors: [], + }), + }; + const validateTouchedSources = vi.fn().mockResolvedValue({ invalidSources: [], validSources: [] }); + + await validateFinalIngestArtifacts({ + connectionIds: ['warehouse'], + changedWikiPageKeys: [], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'accounts' }], + wikiService: { readPage: vi.fn() } as never, + semanticLayerService: semanticLayerService as never, + validateTouchedSources, + tableExists: async () => true, + }); + + expect(validateTouchedSources).toHaveBeenCalledWith([ + { connectionId: 'warehouse', sourceName: 'accounts' }, + { connectionId: 'warehouse', sourceName: 'orders' }, + { connectionId: 'warehouse', sourceName: 'segments' }, + ]); + }); + + it('fails final gates when a changed wiki page references a missing wiki page', async () => { + const wikiService = wikiServiceWithPages({ + 'account-segments': { + refs: ['missing-frontmatter-page'], + content: 'See [[missing-inline-page]] for the related process.', + }, + }); + const semanticLayerService = { + loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }), + }; + + await expect( + validateFinalIngestArtifacts({ + connectionIds: ['warehouse'], + changedWikiPageKeys: ['account-segments'], + touchedSlSources: [], + wikiService: wikiService as never, + semanticLayerService: semanticLayerService as never, + validateTouchedSources: async () => ({ invalidSources: [], validSources: [] }), + tableExists: async () => true, + }), + ).rejects.toThrow( + /wiki references target missing page\(s\): account-segments -> missing-frontmatter-page, account-segments -> missing-inline-page/, + ); + }); +}); diff --git a/packages/context/src/ingest/artifact-gates.ts b/packages/context/src/ingest/artifact-gates.ts new file mode 100644 index 00000000..44f7a66f --- /dev/null +++ b/packages/context/src/ingest/artifact-gates.ts @@ -0,0 +1,188 @@ +import type { SemanticLayerService } from '../sl/index.js'; +import type { TouchedSlSource } from '../tools/index.js'; +import type { KnowledgeWikiService } from '../wiki/index.js'; +import { findMissingWikiRefs } from '../wiki/wiki-ref-validation.js'; +import { findInvalidWikiBodyRefs } from './wiki-body-refs.js'; + +export interface TouchedValidationResult { + invalidSources: string[]; + validSources: string[]; +} + +export interface FinalArtifactGateInput { + connectionIds: string[]; + changedWikiPageKeys: string[]; + touchedSlSources: TouchedSlSource[]; + wikiService: KnowledgeWikiService; + semanticLayerService: SemanticLayerService; + validateTouchedSources(touched: TouchedSlSource[]): Promise; + tableExists(connectionId: string, tableRef: string): Promise; +} + +export interface ProvenanceRawPathValidationInput { + rows: Array<{ rawPath: string }>; + currentRawPaths: Set; + deletedRawPaths: Set; +} + +function parseSlRef(ref: string): { connectionId: string | null; sourceName: string; entityName: string | null } { + const withoutConnection = ref.includes('/') ? ref.slice(ref.indexOf('/') + 1) : ref; + const connectionId = ref.includes('/') ? ref.slice(0, ref.indexOf('/')) : null; + const [sourceName = '', entityName = null] = withoutConnection.split('.', 2); + return { connectionId, sourceName, entityName }; +} + +function slEntityNames(source: Awaited>['sources'][number]): Set { + return new Set([ + ...(source.measures ?? []).map((measure) => measure.name), + ...(source.columns ?? []).map((column) => column.name), + ...(source.segments ?? []).map((segment) => segment.name), + ]); +} + +function uniqueTouchedSources(sources: TouchedSlSource[]): TouchedSlSource[] { + const seen = new Set(); + const unique: TouchedSlSource[] = []; + for (const source of sources) { + const key = `${source.connectionId}:${source.sourceName}`; + if (seen.has(key)) { + continue; + } + seen.add(key); + unique.push(source); + } + return unique.sort((left, right) => { + const byConnection = left.connectionId.localeCompare(right.connectionId); + return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection; + }); +} + +async function expandTouchedSlSourcesWithDirectJoinNeighbors(input: FinalArtifactGateInput): Promise { + const expanded = [...input.touchedSlSources]; + const touchedByConnection = new Map>(); + for (const source of input.touchedSlSources) { + const bucket = touchedByConnection.get(source.connectionId) ?? new Set(); + bucket.add(source.sourceName); + touchedByConnection.set(source.connectionId, bucket); + } + + for (const connectionId of input.connectionIds) { + const touched = touchedByConnection.get(connectionId); + if (!touched || touched.size === 0) { + continue; + } + const { sources } = await input.semanticLayerService.loadAllSources(connectionId); + for (const source of sources) { + const sourceIsTouched = touched.has(source.name); + if (sourceIsTouched) { + for (const join of source.joins ?? []) { + expanded.push({ connectionId, sourceName: join.to }); + } + } + if ((source.joins ?? []).some((join) => touched.has(join.to))) { + expanded.push({ connectionId, sourceName: source.name }); + } + } + } + + return uniqueTouchedSources(expanded); +} + +async function validateWikiSlRefs(input: FinalArtifactGateInput): Promise { + const errors: string[] = []; + const sourcesByConnection = new Map>['sources']>(); + for (const connectionId of input.connectionIds) { + const { sources } = await input.semanticLayerService.loadAllSources(connectionId); + sourcesByConnection.set(connectionId, sources); + } + + for (const pageKey of input.changedWikiPageKeys) { + const page = await input.wikiService.readPage('GLOBAL', null, pageKey); + if (!page) { + continue; + } + for (const ref of page.frontmatter.sl_refs ?? []) { + const parsed = parseSlRef(ref); + const candidateConnections = parsed.connectionId ? [parsed.connectionId] : input.connectionIds; + let source: Awaited>['sources'][number] | undefined; + for (const connectionId of candidateConnections) { + source = sourcesByConnection.get(connectionId)?.find((candidate) => candidate.name === parsed.sourceName); + if (source) { + break; + } + } + if (!source) { + errors.push(`${pageKey}: unknown sl_refs entry ${ref}`); + continue; + } + if (parsed.entityName && !slEntityNames(source).has(parsed.entityName)) { + errors.push(`${pageKey}: unknown sl_refs entity ${ref}`); + } + } + } + return errors; +} + +async function validateWikiRefs(input: FinalArtifactGateInput): Promise { + const dangling: string[] = []; + for (const pageKey of input.changedWikiPageKeys) { + const page = await input.wikiService.readPage('GLOBAL', null, pageKey); + if (!page) { + continue; + } + const missingRefs = await findMissingWikiRefs({ + wikiService: input.wikiService, + scope: 'GLOBAL', + scopeId: null, + pageKey, + refs: page.frontmatter.refs, + content: page.content, + }); + for (const missingRef of missingRefs) { + dangling.push(`${pageKey} -> ${missingRef}`); + } + } + return dangling; +} + +export async function validateFinalIngestArtifacts(input: FinalArtifactGateInput): Promise { + const touchedWithDependencies = await expandTouchedSlSourcesWithDirectJoinNeighbors(input); + const validation = await input.validateTouchedSources(touchedWithDependencies); + const errors: string[] = validation.invalidSources.map((source) => `semantic-layer validation failed for ${source}`); + errors.push(...(await validateWikiSlRefs(input))); + const danglingWikiRefs = await validateWikiRefs(input); + if (danglingWikiRefs.length > 0) { + errors.push(`wiki references target missing page(s): ${danglingWikiRefs.join(', ')}`); + } + + for (const pageKey of input.changedWikiPageKeys) { + const page = await input.wikiService.readPage('GLOBAL', null, pageKey); + if (!page) { + continue; + } + errors.push( + ...(await findInvalidWikiBodyRefs({ + pageKey, + body: page.content, + visibleConnectionIds: input.connectionIds, + loadSources: async (connectionId) => { + const { sources } = await input.semanticLayerService.loadAllSources(connectionId); + return sources; + }, + tableExists: input.tableExists, + })), + ); + } + + if (errors.length > 0) { + throw new Error(`final artifact gates failed:\n${errors.join('\n')}`); + } +} + +export function validateProvenanceRawPaths(input: ProvenanceRawPathValidationInput): void { + for (const row of input.rows) { + if (!input.currentRawPaths.has(row.rawPath) && !input.deletedRawPaths.has(row.rawPath)) { + throw new Error(`provenance row references raw path outside this snapshot: ${row.rawPath}`); + } + } +} diff --git a/packages/context/src/ingest/final-gate-repair.test.ts b/packages/context/src/ingest/final-gate-repair.test.ts new file mode 100644 index 00000000..90ad707d --- /dev/null +++ b/packages/context/src/ingest/final-gate-repair.test.ts @@ -0,0 +1,136 @@ +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { finalGateRepairPaths, repairFinalGateFailure } from './final-gate-repair.js'; +import { FileIngestTraceWriter } from './ingest-trace.js'; + +async function makeHarness() { + const root = await mkdtemp(join(tmpdir(), 'ktx-final-gate-repair-')); + const workdir = join(root, 'workdir'); + await mkdir(join(workdir, 'wiki/global'), { recursive: true }); + await mkdir(join(workdir, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(workdir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\n---\n\nARR uses `mart_account_segments.total_contract_arr_cents`.\n', + 'utf-8', + ); + await writeFile( + join(workdir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + 'utf-8', + ); + const trace = new FileIngestTraceWriter({ + tracePath: join(root, 'trace.jsonl'), + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + runId: 'run-1', + syncId: 'sync-1', + level: 'trace', + }); + return { root, workdir, trace }; +} + +describe('finalGateRepairPaths', () => { + it('derives sorted wiki and semantic-layer file paths', () => { + expect( + finalGateRepairPaths({ + changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'], + touchedSlSources: [ + { connectionId: 'warehouse', sourceName: 'mart_account_segments' }, + { connectionId: 'warehouse', sourceName: 'orders' }, + { connectionId: 'warehouse', sourceName: 'orders' }, + ], + }), + ).toEqual([ + 'semantic-layer/warehouse/mart_account_segments.yaml', + 'semantic-layer/warehouse/orders.yaml', + 'wiki/global/account-segments.md', + 'wiki/global/overview.md', + ]); + }); +}); + +describe('repairFinalGateFailure', () => { + it('lets the repair agent read gate errors and edit only allowed files', async () => { + const { workdir, trace } = await makeHarness(); + const agentRunner = { + runLoop: vi.fn(async (params: any) => { + const error = await params.toolSet.read_gate_error.execute({}); + expect(error.markdown).toContain('total_contract_arr_cents'); + + const page = await params.toolSet.read_repair_file.execute({ + path: 'wiki/global/account-segments.md', + }); + expect(page.markdown).toContain('total_contract_arr_cents'); + + await expect( + params.toolSet.write_repair_file.execute({ + path: 'wiki/global/other.md', + content: 'not allowed', + }), + ).rejects.toThrow(/gate repair path not allowed/); + + await params.toolSet.write_repair_file.execute({ + path: 'wiki/global/account-segments.md', + content: page.markdown.replace('total_contract_arr_cents', 'total_contract_arr'), + }); + return { stopReason: 'natural' as const }; + }), + }; + + const result = await repairFinalGateFailure({ + agentRunner, + workdir, + gateError: + 'final artifact gates failed:\naccount-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents', + allowedPaths: ['wiki/global/account-segments.md'], + trace, + repairKind: 'final_artifact_gate', + maxAttempts: 1, + stepBudget: 8, + }); + + expect(result).toEqual({ + status: 'repaired', + attempts: 1, + changedPaths: ['wiki/global/account-segments.md'], + }); + await expect(readFile(join(workdir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.toContain( + 'total_contract_arr', + ); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_repaired'); + expect(agentRunner.runLoop).toHaveBeenCalledWith( + expect.objectContaining({ + modelRole: 'repair', + stepBudget: 8, + telemetryTags: expect.objectContaining({ + operationName: 'ingest-isolated-diff-gate-repair', + repairKind: 'final_artifact_gate', + }), + }), + ); + }); + + it('returns failed when the repair agent edits no allowed file', async () => { + const { workdir, trace } = await makeHarness(); + const result = await repairFinalGateFailure({ + agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) }, + workdir, + gateError: 'final artifact gates failed:\naccount-segments: unknown semantic-layer entity', + allowedPaths: ['wiki/global/account-segments.md'], + trace, + repairKind: 'final_artifact_gate', + maxAttempts: 1, + stepBudget: 8, + }); + + expect(result).toEqual({ + status: 'failed', + attempts: 1, + reason: 'gate repair completed without editing an allowed path', + }); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_failed'); + }); +}); diff --git a/packages/context/src/ingest/final-gate-repair.ts b/packages/context/src/ingest/final-gate-repair.ts new file mode 100644 index 00000000..57ff1619 --- /dev/null +++ b/packages/context/src/ingest/final-gate-repair.ts @@ -0,0 +1,230 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { z } from 'zod'; +import type { AgentRunnerPort, KtxRuntimeToolSet } from '../llm/index.js'; +import type { TouchedSlSource } from '../tools/index.js'; +import type { IngestTraceWriter } from './ingest-trace.js'; +import { traceTimed } from './ingest-trace.js'; + +type FinalGateRepairKind = 'patch_semantic_gate' | 'final_artifact_gate'; + +export type FinalGateRepairResult = + | { status: 'repaired'; attempts: number; changedPaths: string[] } + | { status: 'failed'; attempts: number; reason: string }; + +export interface RepairFinalGateFailureInput { + agentRunner: AgentRunnerPort; + workdir: string; + gateError: string; + allowedPaths: string[]; + trace: IngestTraceWriter; + repairKind: FinalGateRepairKind; + maxAttempts?: number; + stepBudget?: number; +} + +const readRepairFileSchema = z.object({ + path: z.string().min(1), +}); + +const writeRepairFileSchema = z.object({ + path: z.string().min(1), + content: z.string(), +}); + +function normalizeRepoPath(path: string): string { + const normalized = path.replace(/\\/g, '/').replace(/^\/+/, ''); + const parts = normalized.split('/').filter((part) => part.length > 0); + if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) { + throw new Error(`gate repair path must be a repository-relative path: ${path}`); + } + return parts.join('/'); +} + +function assertAllowedPath(path: string, allowedPaths: ReadonlySet): string { + const normalized = normalizeRepoPath(path); + if (!allowedPaths.has(normalized)) { + throw new Error(`gate repair path not allowed: ${normalized}`); + } + return normalized; +} + +async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> { + try { + return { exists: true, content: await readFile(path, 'utf-8') }; + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return { exists: false, content: '' }; + } + throw error; + } +} + +function buildGateRepairSystemPrompt(): string { + return ` +You repair one KTX isolated-diff artifact gate failure inside the integration worktree. + + + +- Use read_gate_error first. +- Read only files exposed by read_repair_file. +- Edit only paths exposed by write_repair_file. +- Prefer the smallest text edit that makes the gate pass. +- Preserve accepted work-unit, reconciliation, and deterministic projection content. +- Do not invent warehouse facts, business definitions, or semantic-layer entities. +- If the gate error requires choosing between conflicting facts without evidence, stop without editing. +`; +} + +function buildGateRepairUserPrompt(input: { + gateError: string; + allowedPaths: string[]; + repairKind: FinalGateRepairKind; + attempt: number; + maxAttempts: number; +}): string { + return `Repair isolated-diff artifact gates. + +Repair kind: ${input.repairKind} +Attempt: ${input.attempt} of ${input.maxAttempts} + +Allowed files: +${input.allowedPaths.map((path) => `- ${path}`).join('\n')} + +Gate error: +${input.gateError} + +Use read_gate_error first. Then inspect only the allowed files, write the +minimal repaired content, and stop.`; +} + +function buildToolSet(input: { + workdir: string; + gateError: string; + allowedPaths: ReadonlySet; + editedPaths: Set; +}): KtxRuntimeToolSet { + return { + read_gate_error: { + name: 'read_gate_error', + description: 'Read the artifact gate failure that must be repaired.', + inputSchema: z.object({}), + execute: async () => ({ + markdown: input.gateError, + structured: { gateError: input.gateError }, + }), + }, + read_repair_file: { + name: 'read_repair_file', + description: 'Read one allowed file from the integration worktree.', + inputSchema: readRepairFileSchema, + execute: async ({ path }: z.infer) => { + const normalized = assertAllowedPath(path, input.allowedPaths); + const file = await readOptionalFile(join(input.workdir, normalized)); + return { + markdown: file.exists ? file.content : `(missing file: ${normalized})`, + structured: { path: normalized, exists: file.exists }, + }; + }, + }, + write_repair_file: { + name: 'write_repair_file', + description: 'Replace one allowed integration worktree file with repaired text content.', + inputSchema: writeRepairFileSchema, + execute: async ({ path, content }: z.infer) => { + const normalized = assertAllowedPath(path, input.allowedPaths); + const fullPath = join(input.workdir, normalized); + await mkdir(dirname(fullPath), { recursive: true }); + await writeFile(fullPath, content, 'utf-8'); + input.editedPaths.add(normalized); + return { + markdown: `Wrote ${normalized}`, + structured: { path: normalized, bytes: Buffer.byteLength(content) }, + }; + }, + }, + }; +} + +export function finalGateRepairPaths(input: { + changedWikiPageKeys: string[]; + touchedSlSources: TouchedSlSource[]; +}): string[] { + return [ + ...new Set([ + ...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`), + ...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`), + ]), + ].sort(); +} + +export async function repairFinalGateFailure( + input: RepairFinalGateFailureInput, +): Promise { + const allowedPaths = new Set(input.allowedPaths.map(normalizeRepoPath)); + const maxAttempts = input.maxAttempts ?? 1; + const stepBudget = input.stepBudget ?? 16; + let lastFailure = 'gate repair did not run'; + + for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { + const editedPaths = new Set(); + const sortedAllowedPaths = [...allowedPaths].sort(); + const traceData = { + repairKind: input.repairKind, + attempt, + maxAttempts, + allowedPaths: sortedAllowedPaths, + gateError: input.gateError, + }; + const result = await traceTimed(input.trace, 'gate_repair', 'gate_repair', traceData, async () => + input.agentRunner.runLoop({ + modelRole: 'repair', + systemPrompt: buildGateRepairSystemPrompt(), + userPrompt: buildGateRepairUserPrompt({ + gateError: input.gateError, + allowedPaths: sortedAllowedPaths, + repairKind: input.repairKind, + attempt, + maxAttempts, + }), + toolSet: buildToolSet({ + workdir: input.workdir, + gateError: input.gateError, + allowedPaths, + editedPaths, + }), + stepBudget, + telemetryTags: { + operationName: 'ingest-isolated-diff-gate-repair', + source: input.trace.context.sourceKey, + jobId: input.trace.context.jobId, + repairKind: input.repairKind, + }, + }), + ); + + if (result.stopReason === 'error') { + lastFailure = result.error?.message ?? 'gate repair agent loop errored'; + await input.trace.event('error', 'gate_repair', 'gate_repair_failed', traceData, result.error); + continue; + } + + const changedPaths = [...editedPaths].sort(); + if (changedPaths.length === 0) { + lastFailure = 'gate repair completed without editing an allowed path'; + await input.trace.event('error', 'gate_repair', 'gate_repair_failed', { + ...traceData, + reason: lastFailure, + }); + continue; + } + + await input.trace.event('debug', 'gate_repair', 'gate_repair_repaired', { + ...traceData, + changedPaths, + }); + return { status: 'repaired', attempts: attempt, changedPaths }; + } + + return { status: 'failed', attempts: maxAttempts, reason: lastFailure }; +} diff --git a/packages/context/src/ingest/index.ts b/packages/context/src/ingest/index.ts index 1a7ed721..450306dc 100644 --- a/packages/context/src/ingest/index.ts +++ b/packages/context/src/ingest/index.ts @@ -17,6 +17,11 @@ export { buildLiveDatabaseTableNaturalKey, ktxSchemaSnapshotToExtractedSchema, } from './adapters/live-database/extracted-schema.js'; +export { + assertSemanticLayerTargetPathsAllowed, + findDisallowedSemanticLayerTargetPaths, + semanticLayerConnectionIdFromPath, +} from './semantic-layer-target-policy.js'; export { LiveDatabaseSourceAdapter } from './adapters/live-database/live-database.adapter.js'; export type { BuildLiveDatabaseManifestShardsInput, @@ -609,6 +614,11 @@ export { } from './raw-sources-paths.js'; export { ingestReportSnapshotSchema, parseIngestReportSnapshot } from './report-snapshot.js'; export type { IngestReportBody, IngestReportSnapshot } from './reports.js'; +export * from './artifact-gates.js'; +export * from './ingest-trace.js'; +export * from './isolated-diff/git-patch.js'; +export * from './isolated-diff/patch-integrator.js'; +export * from './isolated-diff/work-unit-executor.js'; export * from './reports.js'; export { SourceAdapterRegistry } from './source-adapter-registry.js'; export type { SqliteBundleIngestStoreOptions } from './sqlite-bundle-ingest-store.js'; @@ -652,4 +662,7 @@ export type { TriageSignals, UnresolvedCardInfo, WorkUnit, + DeterministicProjectionContext, + ProjectionResult, } from './types.js'; +export * from './wiki-body-refs.js'; diff --git a/packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts b/packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts new file mode 100644 index 00000000..f84d8fd1 --- /dev/null +++ b/packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts @@ -0,0 +1,2163 @@ +import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { GitService, SessionWorktreeService } from '../core/index.js'; +import { LocalGitFileStore } from '../project/local-git-file-store.js'; +import { addTouchedSlSource } from '../tools/index.js'; +import { IngestBundleRunner } from './ingest-bundle.runner.js'; +import type { IngestBundleRunnerDeps } from './ports.js'; + +async function makeRealGitRuntime() { + const homeDir = await mkdtemp(join(tmpdir(), 'ktx-isolated-runner-')); + const configDir = join(homeDir, 'config'); + const git = new GitService({ + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'init', + bootstrapAuthor: 'system', + bootstrapAuthorEmail: 'system@example.com', + }, + }); + await git.onModuleInit(); + const configService = new LocalGitFileStore({ rootDir: configDir, git }); + const sessionWorktreeService = new SessionWorktreeService({ + coreConfig: { + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'init', + bootstrapAuthor: 'system', + bootstrapAuthorEmail: 'system@example.com', + }, + }, + gitService: git, + configService, + }); + return { homeDir, configDir, git, configService, sessionWorktreeService }; +} + +function rootOfConfig(configService: unknown, fallback: string): string { + const rootDir = (configService as { rootDir?: unknown }).rootDir; + return typeof rootDir === 'string' ? rootDir : fallback; +} + +async function loadSourcesFromRoot(root: string) { + const raw = await readFile(join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), 'utf-8').catch( + () => '', + ); + const hasCents = raw.includes('total_contract_arr_cents'); + const hasDollars = raw.includes('total_contract_arr'); + return { + sources: + hasCents || hasDollars + ? [ + { + name: 'mart_account_segments', + grain: ['account_id'], + columns: [{ name: 'account_id', type: 'string' }], + joins: [], + measures: [{ name: hasCents ? 'total_contract_arr_cents' : 'total_contract_arr', expr: 'sum(contract_arr)' }], + table: 'analytics.mart_account_segments', + }, + ] + : [], + loadErrors: [], + }; +} + +async function listGlobalWikiPageKeys(root: string): Promise { + const dir = join(root, 'wiki/global'); + const entries = await readdir(dir).catch(() => []); + return entries + .filter((entry) => entry.endsWith('.md')) + .map((entry) => entry.slice(0, -'.md'.length)) + .sort(); +} + +function frontmatterList(yaml: string, key: string): string[] { + const pattern = new RegExp(`(?:^|\\n)${key}:\\n((?: - .+\\n?)*)`); + return ( + pattern + .exec(yaml)?.[1] + ?.split('\n') + .map((line) => line.trim().replace(/^- /, '')) + .filter(Boolean) ?? [] + ); +} + +function legacyFallbackSettingKey(): string { + return ['sharedWorktree', 'SourceKeys'].join(''); +} + +function legacySharedTraceEvent(): string { + return ['shared', 'worktree', 'path', 'enabled'].join('_'); +} + +function makeWikiService(root: string) { + return { + listPageKeys: vi.fn(async (scope: string) => (scope === 'GLOBAL' ? listGlobalWikiPageKeys(root) : [])), + readPage: vi.fn(async (_scope: string, _scopeId: string | null, key: string) => { + const path = join(root, 'wiki/global', `${key}.md`); + const raw = await readFile(path, 'utf-8').catch(() => null); + if (!raw) { + return null; + } + const [, yaml = '', content = ''] = /^---\n([\s\S]*?)\n---\n?([\s\S]*)$/.exec(raw) ?? []; + return { + pageKey: key, + frontmatter: { + summary: key, + usage_mode: 'auto', + refs: frontmatterList(yaml, 'refs'), + sl_refs: frontmatterList(yaml, 'sl_refs'), + }, + content: content.trim(), + }; + }), + syncFromCommit: vi.fn(), + }; +} + +function makeDeps( + runtime: Awaited>, + sourceKey = 'metabase', + settings: Partial = {}, +) { + const adapter: any = { + source: sourceKey, + skillNames: [], + detect: vi.fn().mockResolvedValue(true), + chunk: vi.fn().mockResolvedValue({ + workUnits: [ + { unitKey: 'card-wiki', rawFiles: ['cards/wiki.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'card-source', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }), + }; + const wikiService = makeWikiService(runtime.configDir); + const semanticLayerService: any = { + loadAllSources: vi.fn(async () => loadSourcesFromRoot(runtime.configDir)), + listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']), + }; + semanticLayerService.forWorktree = vi.fn((workdir: string) => ({ + ...semanticLayerService, + loadAllSources: vi.fn(async () => loadSourcesFromRoot(workdir)), + listFilesForConnection: vi.fn().mockResolvedValue(['mart_account_segments.yaml']), + })); + + const deps: IngestBundleRunnerDeps = { + runs: { create: vi.fn().mockResolvedValue({ id: 'run-1' }), markCompleted: vi.fn(), markFailed: vi.fn() }, + provenance: { + insertMany: vi.fn(), + findLatestHashesForCompletedSyncs: vi.fn().mockResolvedValue(new Map()), + findLatestArtifactsForRawPaths: vi.fn().mockResolvedValue(new Map()), + }, + reports: { create: vi.fn().mockResolvedValue({ id: 'report-1' }), findByJobId: vi.fn().mockResolvedValue(null), markSuperseded: vi.fn() }, + canonicalPins: { listPins: vi.fn().mockResolvedValue([]) }, + registry: { get: vi.fn().mockReturnValue(adapter), register: vi.fn(), has: vi.fn(), list: vi.fn() }, + diffSetService: { + compute: vi.fn().mockResolvedValue({ added: ['cards/wiki.json', 'cards/source.json'], modified: [], deleted: [], unchanged: [] }), + }, + sessionWorktreeService: runtime.sessionWorktreeService, + agentRunner: { runLoop: vi.fn() }, + gitService: runtime.git, + lockingService: { withLock: vi.fn(async (_key, fn) => fn()) }, + storage: { + homeDir: join(runtime.configDir, '.ktx'), + systemGitAuthor: { name: 'KTX Test', email: 'system@ktx.local' }, + resolveUploadDir: (id) => join(runtime.homeDir, 'upload', id), + resolvePullDir: (id) => join(runtime.homeDir, 'pull', id), + resolveTranscriptDir: (id) => join(runtime.configDir, '.ktx/ingest-transcripts', id), + resolveTracePath: (id) => join(runtime.configDir, '.ktx/ingest-traces', id, 'trace.jsonl'), + }, + settings: { + memoryIngestionModel: 'test', + probeRowCount: 1, + ingestTraceLevel: 'trace', + ...settings, + }, + skillsRegistry: { + listSkills: vi.fn().mockResolvedValue([]), + getSkill: vi.fn().mockResolvedValue(null), + buildSkillsPrompt: vi.fn().mockReturnValue(''), + stripFrontmatter: vi.fn((body) => body), + } as never, + promptService: { loadPrompt: vi.fn().mockResolvedValue('base') } as never, + wikiService: { ...wikiService, forWorktree: vi.fn((workdir: string) => makeWikiService(workdir)) } as never, + knowledgeIndex: { listPagesForUser: vi.fn().mockResolvedValue([]) }, + knowledgeSlRefs: { syncFromWiki: vi.fn() }, + semanticLayerService, + slSearchService: { indexSources: vi.fn() } as never, + slSourcesRepository: {} as never, + slValidator: { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) }, + connections: { listEnabledConnections: vi.fn().mockResolvedValue([]), getConnectionById: vi.fn() } as never, + toolsetFactory: { createIngestWuToolset: vi.fn(() => ({ toRuntimeTools: vi.fn(() => ({})) })) }, + commitMessages: { enqueueForExternalCommit: vi.fn() }, + embedding: { maxBatchSize: 64, computeEmbedding: vi.fn(), computeEmbeddingsBulk: vi.fn() }, + }; + return { deps, adapter }; +} + +async function mockStageRawFiles( + runner: IngestBundleRunner, + runtime: Awaited>, + hashes: [string, string][], + sourceKey = 'metabase', +) { + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue(join(runtime.homeDir, 'stage')); + (runner as any).stageRawFilesStage1 = vi.fn(async ({ worktreeRoot }: any) => { + const rawDir = join(worktreeRoot, 'raw-sources/warehouse', sourceKey, 's'); + await mkdir(rawDir, { recursive: true }); + for (const [rawPath] of hashes) { + await mkdir(join(rawDir, rawPath.split('/').slice(0, -1).join('/')), { recursive: true }); + await writeFile(join(rawDir, rawPath), '{}'); + } + return { currentHashes: new Map(hashes), rawDirInWorktree: `raw-sources/warehouse/${sourceKey}/s` }; + }); +} + +describe('IngestBundleRunner isolated diff path', () => { + it('routes an unlisted direct-writing source through isolated diffs by default', async () => { + const runtime = await makeRealGitRuntime(); + try { + const sourceKey = 'custom-direct-source'; + const { deps, adapter } = makeDeps(runtime, sourceKey); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'custom-wiki', + rawFiles: ['custom/page.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName !== 'ingest-bundle-wu') { + return { stopReason: 'natural' }; + } + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/custom-isolated.md'), + '---\nsummary: Custom isolated write\nusage_mode: auto\n---\n\nCustom isolated write.\n', + 'utf-8', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'custom-isolated', + detail: 'Custom isolated write', + rawPaths: ['custom/page.json'], + }); + await currentSession.gitService.commitFiles( + ['wiki/global/custom-isolated.md'], + 'custom wiki', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['custom/page.json', 'h1']], sourceKey); + + await expect( + runner.run({ + jobId: 'job-custom-default', + connectionId: 'warehouse', + sourceKey, + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).resolves.toMatchObject({ + jobId: 'job-custom-default', + failedWorkUnits: [], + workUnitCount: 1, + }); + + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces/job-custom-default/trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('isolated_diff_enabled'); + expect(trace).toContain('work_unit_child_created'); + expect(trace).not.toContain(legacySharedTraceEvent()); + + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0]; + const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined; + expect(reportBody?.isolatedDiff).toMatchObject({ + enabled: true, + acceptedPatches: 1, + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('does not support shared-worktree fallback settings', async () => { + const runtime = await makeRealGitRuntime(); + try { + const sourceKey = 'legacy-source'; + const staleSettings = { + [legacyFallbackSettingKey()]: ['legacy-source'], + } as Partial & Record; + const { deps, adapter } = makeDeps(runtime, sourceKey, staleSettings); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'legacy-wiki', + rawFiles: ['legacy/page.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName !== 'ingest-bundle-wu') { + return { stopReason: 'natural' }; + } + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/legacy-isolated.md'), + '---\nsummary: Legacy isolated write\nusage_mode: auto\n---\n\nLegacy isolated write.\n', + 'utf-8', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'legacy-isolated', + detail: 'Legacy isolated write', + rawPaths: ['legacy/page.json'], + }); + await currentSession.gitService.commitFiles( + ['wiki/global/legacy-isolated.md'], + 'legacy isolated wiki', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['legacy/page.json', 'h1']], sourceKey); + + await expect( + runner.run({ + jobId: 'job-legacy-isolated', + connectionId: 'warehouse', + sourceKey, + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).resolves.toMatchObject({ + jobId: 'job-legacy-isolated', + failedWorkUnits: [], + workUnitCount: 1, + }); + + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces/job-legacy-isolated/trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('isolated_diff_enabled'); + expect(trace).toContain('work_unit_child_created'); + expect(trace).not.toContain(legacySharedTraceEvent()); + + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0]; + const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined; + expect(reportBody?.isolatedDiff).toMatchObject({ + enabled: true, + acceptedPatches: 1, + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('does not integrate failed isolated WorkUnit patches', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime, 'fake'); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + deps.diffSetService.compute = vi.fn().mockResolvedValue({ + added: ['good.raw', 'bad.raw'], + modified: [], + deleted: [], + unchanged: [], + }); + deps.slValidator.validateSingleSource = vi.fn( + async (_validationDeps: unknown, _connectionId: string, sourceName: string) => ({ + errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [], + warnings: [], + }), + ) as never; + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName !== 'ingest-bundle-wu') { + return { stopReason: 'natural' }; + } + const unitKey = params.telemetryTags.unitKey; + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + if (unitKey === 'wu-good') { + await writeFile(join(root, 'semantic-layer/warehouse/good.yaml'), 'name: good\n', 'utf-8'); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'good'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'good', + detail: 'good source', + targetConnectionId: 'warehouse', + rawPaths: ['good.raw'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/good.yaml'], + 'test: add good source', + 'KTX Test', + 'system@ktx.local', + ); + } + if (unitKey === 'wu-bad') { + await writeFile(join(root, 'semantic-layer/warehouse/bad.yaml'), 'name: bad\n', 'utf-8'); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'bad'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'bad', + detail: 'bad source', + targetConnectionId: 'warehouse', + rawPaths: ['bad.raw'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/bad.yaml'], + 'test: add bad source', + 'KTX Test', + 'system@ktx.local', + ); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles( + runner, + runtime, + [ + ['good.raw', 'good-hash'], + ['bad.raw', 'bad-hash'], + ], + 'fake', + ); + + const result = await runner.run({ + jobId: 'job-failed-wu-isolated', + connectionId: 'warehouse', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }); + + expect(result.failedWorkUnits).toEqual(['wu-bad']); + await expect(readFile(join(runtime.configDir, 'semantic-layer/warehouse/good.yaml'), 'utf-8')).resolves.toContain( + 'good', + ); + await expect(readFile(join(runtime.configDir, 'semantic-layer/warehouse/bad.yaml'), 'utf-8')).rejects.toThrow(); + + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0]; + const reportBody = reportCreate?.body as { + isolatedDiff?: { acceptedPatches?: number }; + failedWorkUnits?: string[]; + }; + expect(reportBody.failedWorkUnits).toEqual(['wu-bad']); + expect(reportBody.isolatedDiff).toMatchObject({ enabled: true, acceptedPatches: 1 }); + + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces/job-failed-wu-isolated/trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('work_unit_failed_before_patch'); + expect(trace).toContain('patch_accepted'); + expect(trace).not.toContain(legacySharedTraceEvent()); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it.each(['notion', 'lookml', 'looker', 'dbt', 'metricflow'] as const)( + 'routes %s direct writes through isolated child worktrees', + async (sourceKey) => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime, sourceKey); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: `${sourceKey}-wiki`, + rawFiles: [`${sourceKey}/page.json`], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName !== 'ingest-bundle-wu') { + return { stopReason: 'natural' }; + } + + expect(params.telemetryTags).toMatchObject({ + operationName: 'ingest-bundle-wu', + source: sourceKey, + unitKey: `${sourceKey}-wiki`, + }); + + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global', `${sourceKey}-isolated.md`), + `---\nsummary: ${sourceKey} isolated write\nusage_mode: auto\n---\n\nIsolated ${sourceKey} write.\n`, + 'utf-8', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: `${sourceKey}-isolated`, + detail: `${sourceKey} isolated write`, + rawPaths: [`${sourceKey}/page.json`], + }); + await currentSession.gitService.commitFiles( + [`wiki/global/${sourceKey}-isolated.md`], + `${sourceKey} wiki`, + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [[`${sourceKey}/page.json`, 'h1']], sourceKey); + + await expect( + runner.run({ + jobId: `job-${sourceKey}`, + connectionId: 'warehouse', + sourceKey, + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).resolves.toMatchObject({ + jobId: `job-${sourceKey}`, + failedWorkUnits: [], + workUnitCount: 1, + }); + + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces', `job-${sourceKey}`, 'trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('isolated_diff_enabled'); + expect(trace).toContain('work_unit_child_created'); + expect(trace).toContain('work_unit_patch_collected'); + expect(trace).toContain('patch_apply_started'); + expect(trace).not.toContain(legacySharedTraceEvent()); + + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0]; + const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined; + expect(reportBody?.isolatedDiff).toMatchObject({ + enabled: true, + acceptedPatches: 1, + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }, + ); + + it('rejects the Metabase stale-measure wiki body regression before squash', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.project = vi.fn(async ({ workdir }) => { + await mkdir(join(workdir, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(workdir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr_cents\n expr: sum(contract_arr)\n', + ); + return { + warnings: [], + errors: [], + touchedSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }], + changedWikiPageKeys: [], + }; + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + if (params.telemetryTags.unitKey === 'card-wiki') { + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nsl_refs:\n - mart_account_segments\n---\n\nARR is `mart_account_segments.total_contract_arr_cents`.\n', + ); + currentSession.actions.push({ target: 'wiki', type: 'created', key: 'account-segments', detail: 'Account segments' }); + await currentSession.gitService.commitFiles(['wiki/global/account-segments.md'], 'wu wiki', 'KTX Test', 'system@ktx.local'); + } + if (params.telemetryTags.unitKey === 'card-source') { + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + detail: 'Dollar measure', + targetConnectionId: 'warehouse', + }); + await currentSession.gitService.commitFiles(['semantic-layer/warehouse/mart_account_segments.yaml'], 'wu source', 'KTX Test', 'system@ktx.local'); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [ + ['cards/wiki.json', 'h1'], + ['cards/source.json', 'h2'], + ]); + + await expect( + runner.run({ jobId: 'job-1', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/total_contract_arr_cents/); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-1/trace.jsonl'), 'utf-8'); + expect(trace).toContain('input_snapshot'); + expect(trace).toContain('isolated_diff_enabled'); + expect(trace).toContain('work_unit_child_created'); + expect(trace).toContain('work_unit_patch_collected'); + expect(trace).toContain('patch_apply_started'); + expect(trace).toContain('final_artifact_gates_failed'); + expect(trace).toContain('ingest_failed'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('rejects unchanged wiki body refs made stale by isolated semantic-layer changes', async () => { + const runtime = await makeRealGitRuntime(); + try { + await mkdir(join(runtime.configDir, 'semantic-layer/warehouse'), { recursive: true }); + await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr_cents\n expr: sum(contract_arr)\n', + ); + await writeFile( + join(runtime.configDir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\n---\n\nExisting ARR uses `mart_account_segments.total_contract_arr_cents`.\n', + ); + await runtime.git.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'], + 'seed existing wiki body ref', + 'KTX Test', + 'system@ktx.local', + ); + const preRunHead = await runtime.git.revParseHead(); + + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'source-only', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + detail: 'Rename ARR measure', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml'], + 'wu source rename', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]); + + await expect( + runner.run({ + jobId: 'job-existing-body-stale', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/total_contract_arr_cents/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const events = (await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-existing-body-stale/trace.jsonl'), 'utf-8')) + .trim() + .split('\n') + .map((line) => JSON.parse(line)); + expect(events.map((event) => event.event)).toEqual( + expect.arrayContaining([ + 'final_artifact_gates_started', + 'final_artifact_gates_failed', + 'ingest_failed', + 'failure_report_created', + ]), + ); + expect(events.map((event) => event.event)).not.toContain('squash_finished'); + const gateFailure = events.find((event) => event.event === 'final_artifact_gates_failed'); + expect(gateFailure).toMatchObject({ + data: { + wikiReferenceGateScope: { + global: true, + reasons: expect.arrayContaining(['semantic_layer_changed']), + pageKeysValidated: expect.arrayContaining(['account-segments']), + }, + actionOrigins: expect.arrayContaining([ + expect.objectContaining({ + source: 'work_unit_action', + unitKey: 'source-only', + unitRawFiles: ['cards/source.json'], + action: expect.objectContaining({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + rawPaths: ['cards/source.json'], + targetConnectionId: 'warehouse', + }), + }), + ]), + }, + error: { message: expect.stringContaining('total_contract_arr_cents') }, + }); + + const failureReport = (deps.reports.create as any).mock.calls + .map((call: any[]) => call[0]) + .find((report: any) => report.body.status === 'failed'); + expect(failureReport.body.failure).toMatchObject({ + phase: 'final_gates', + message: expect.stringContaining('total_contract_arr_cents'), + details: expect.objectContaining({ + wikiReferenceGateScope: expect.objectContaining({ + global: true, + reasons: expect.arrayContaining(['semantic_layer_changed']), + pageKeysValidated: expect.arrayContaining(['account-segments']), + }), + touchedSlSources: expect.arrayContaining([ + expect.objectContaining({ connectionId: 'warehouse', sourceName: 'mart_account_segments' }), + ]), + actionOrigins: expect.arrayContaining([ + expect.objectContaining({ + source: 'work_unit_action', + unitKey: 'source-only', + action: expect.objectContaining({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + rawPaths: ['cards/source.json'], + targetConnectionId: 'warehouse', + }), + }), + ]), + }), + }); + expect(failureReport.body.workUnits).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + unitKey: 'source-only', + actions: expect.arrayContaining([ + expect.objectContaining({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + rawPaths: ['cards/source.json'], + }), + ]), + }), + ]), + ); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('accepts two isolated work units that edit different wiki pages', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'page-a', rawFiles: ['pages/a.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'page-b', rawFiles: ['pages/b.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const unitKey = params.telemetryTags.unitKey; + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile(join(root, `wiki/global/${unitKey}.md`), `---\nsummary: ${unitKey}\nusage_mode: auto\n---\n\n${unitKey}\n`); + currentSession.actions.push({ target: 'wiki', type: 'created', key: unitKey, detail: unitKey }); + await currentSession.gitService.commitFiles([`wiki/global/${unitKey}.md`], `wu ${unitKey}`, 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [ + ['pages/a.json', 'h1'], + ['pages/b.json', 'h2'], + ]); + + const result = await runner.run({ jobId: 'job-clean', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }); + expect(result.failedWorkUnits).toEqual([]); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-clean/trace.jsonl'), 'utf-8'); + expect(trace.match(/patch_accepted/g)).toHaveLength(2); + expect(trace).toContain('ingest_finished'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('classifies same-source patch application failure as a textual conflict', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'orders-a', rawFiles: ['orders/a.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'orders-b', rawFiles: ['orders/b.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-isolated-diff-textual-resolver') { + return { stopReason: 'natural' }; + } + const suffix = params.telemetryTags.unitKey === 'orders-a' ? 'a' : 'b'; + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/warehouse/orders.yaml'), + `name: orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures:\n - name: order_count_${suffix}\n expr: count(*)\n`, + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'orders'); + currentSession.actions.push({ target: 'sl', type: 'updated', key: 'orders', detail: suffix, targetConnectionId: 'warehouse' }); + await currentSession.gitService.commitFiles(['semantic-layer/warehouse/orders.yaml'], `wu ${suffix}`, 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [ + ['orders/a.json', 'h1'], + ['orders/b.json', 'h2'], + ]); + + await expect( + runner.run({ jobId: 'job-text-conflict', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/isolated diff textual conflict/); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-text-conflict/trace.jsonl'), 'utf-8'); + expect(trace).toContain('patch_textual_conflict'); + expect(trace).toContain('textual_conflict_resolver_failed'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('makes deterministic projection visible to child worktrees before WorkUnit synthesis', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'wiki-projected', rawFiles: ['projected/wiki.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + adapter.project = vi.fn(async ({ workdir }) => { + await mkdir(join(workdir, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(workdir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + return { + warnings: [], + errors: [], + touchedSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }], + changedWikiPageKeys: [], + }; + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await expect(readFile(join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), 'utf-8')).resolves.toContain( + 'total_contract_arr', + ); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/projected-orders.md'), + '---\nsummary: Projected orders\nusage_mode: auto\nsl_refs:\n - mart_account_segments\n---\n\nARR `mart_account_segments.total_contract_arr`.\n', + ); + currentSession.actions.push({ target: 'wiki', type: 'created', key: 'projected-orders', detail: 'Projected orders' }); + await currentSession.gitService.commitFiles(['wiki/global/projected-orders.md'], 'wu projected wiki', 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['projected/wiki.json', 'h1']]); + + const result = await runner.run({ jobId: 'job-projection', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }); + expect(result.failedWorkUnits).toEqual([]); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-projection/trace.jsonl'), 'utf-8'); + expect(trace).toContain('deterministic_projection_finished'); + expect(trace).toContain('deterministic_projection_committed'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('rejects Notion-style changed wiki pages with invalid sl_refs', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'notion-page', rawFiles: ['pages/notion.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-isolated-diff-gate-repair') { + return { stopReason: 'natural' as const }; + } + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile(join(root, 'wiki/global/notion-page.md'), '---\nsummary: Notion page\nusage_mode: auto\nsl_refs:\n - missing_source\n---\n\nBody\n'); + currentSession.actions.push({ target: 'wiki', type: 'created', key: 'notion-page', detail: 'Notion page' }); + await currentSession.gitService.commitFiles(['wiki/global/notion-page.md'], 'wu notion', 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['pages/notion.json', 'h1']]); + + await expect( + runner.run({ jobId: 'job-invalid-slrefs', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/gate repair completed without editing an allowed path/); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('runs final artifact gates after reconciliation mutates the integration tree', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'card-source', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'mart_account_segments', + detail: 'Source with renamed ARR measure', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml'], + 'wu source', + 'KTX Test', + 'system@ktx.local', + ); + } else { + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nsl_refs:\n - mart_account_segments\n---\n\nReconcile wrote stale ARR `mart_account_segments.total_contract_arr_cents`.\n', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'account-segments', + detail: 'Stale reconcile wiki page', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles(['wiki/global/account-segments.md'], 'reconcile wiki', 'KTX Test', 'system@ktx.local'); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]); + + await expect( + runner.run({ + jobId: 'job-reconcile-stale', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/total_contract_arr_cents/); + + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-reconcile-stale/trace.jsonl'), 'utf-8'); + expect(trace).toContain('reconciliation_finished'); + expect(trace).toContain('final_artifact_gates_failed'); + expect(trace).toContain('ingest_failed'); + expect(await runtime.git.revParseHead()).not.toContain('reconcile wiki'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('stores a failure report and postmortem trace for final gate failures', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + const createdReports: any[] = []; + deps.reports.create = vi.fn(async (args: any) => { + createdReports.push(args); + return { id: `report-${createdReports.length}` }; + }); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'card-wiki', rawFiles: ['cards/wiki.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'card-source', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + if (params.telemetryTags.unitKey === 'card-wiki') { + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\n---\n\nARR is `mart_account_segments.total_contract_arr_cents`.\n', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'account-segments', + detail: 'Account segments', + rawPaths: ['cards/wiki.json'], + }); + await currentSession.gitService.commitFiles(['wiki/global/account-segments.md'], 'wu wiki', 'KTX Test', 'system@ktx.local'); + } + if (params.telemetryTags.unitKey === 'card-source') { + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'mart_account_segments', + detail: 'Dollar measure', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml'], + 'wu source', + 'KTX Test', + 'system@ktx.local', + ); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [ + ['cards/wiki.json', 'h1'], + ['cards/source.json', 'h2'], + ]); + + await expect( + runner.run({ + jobId: 'job-trace-failure', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/total_contract_arr_cents/); + + const failureReport = createdReports.find((report) => report.body.status === 'failed'); + expect(failureReport.body.tracePath).toContain('job-trace-failure/trace.jsonl'); + expect(failureReport.body.failure).toMatchObject({ phase: 'final_gates' }); + + const events = (await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-trace-failure/trace.jsonl'), 'utf-8')) + .trim() + .split('\n') + .map((line) => JSON.parse(line)); + expect(events.map((event) => event.event)).toEqual( + expect.arrayContaining([ + 'ingest_started', + 'input_snapshot', + 'work_units_planned', + 'isolated_diff_enabled', + 'work_unit_child_created', + 'work_unit_patch_collected', + 'patch_apply_started', + 'patch_accepted', + 'reconciliation_finished', + 'final_artifact_gates_failed', + 'ingest_failed', + 'failure_report_created', + ]), + ); + const failed = events.find((event) => event.event === 'ingest_failed'); + expect(failed).toMatchObject({ + runId: 'run-1', + syncId: expect.any(String), + data: { phase: 'final_gates', tracePath: expect.stringContaining('trace.jsonl') }, + error: { message: expect.stringContaining('total_contract_arr_cents') }, + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('rejects invalid provenance raw paths before squash reaches main', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + const createdReports: any[] = []; + deps.reports.create = vi.fn(async (args: any) => { + createdReports.push(args); + return { id: `report-${createdReports.length}` }; + }); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'card-valid-artifacts', + rawFiles: ['cards/source.json'], + peerFileIndex: [], + dependencyPaths: [], + }, + ], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + await writeFile( + join(root, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nsl_refs:\n - mart_account_segments\n---\n\nARR is `mart_account_segments.total_contract_arr`.\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'mart_account_segments', + detail: 'Valid source', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'account-segments', + detail: 'Valid wiki with invalid provenance raw path', + rawPaths: ['cards/missing.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'], + 'valid artifacts with invalid provenance', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]); + const preRunHead = await runtime.git.revParseHead(); + + await expect( + runner.run({ + jobId: 'job-invalid-provenance', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + expect(deps.provenance.insertMany).not.toHaveBeenCalled(); + + const failureReport = createdReports.find((report) => report.body.status === 'failed'); + expect(failureReport.body.tracePath).toContain('job-invalid-provenance/trace.jsonl'); + expect(failureReport.body.failure).toMatchObject({ + phase: 'provenance_validation', + message: expect.stringContaining('cards/missing.json'), + }); + expect(failureReport.body.failure.details).toMatchObject({ + invalidRawPaths: ['cards/missing.json'], + currentRawPaths: ['cards/source.json'], + invalidRows: expect.arrayContaining([ + expect.objectContaining({ + row: expect.objectContaining({ + rawPath: 'cards/missing.json', + artifactKind: 'wiki', + artifactKey: 'account-segments', + actionType: 'wiki_written', + }), + origin: expect.objectContaining({ + source: 'work_unit_action', + unitKey: 'card-valid-artifacts', + actionIndex: 1, + unitRawFiles: ['cards/source.json'], + action: expect.objectContaining({ + target: 'wiki', + type: 'created', + key: 'account-segments', + rawPaths: ['cards/missing.json'], + }), + }), + }), + ]), + }); + expect(failureReport.body.provenanceRows).toEqual( + expect.arrayContaining([ + expect.objectContaining({ rawPath: 'cards/source.json', artifactKind: 'sl', artifactKey: 'mart_account_segments' }), + expect.objectContaining({ rawPath: 'cards/missing.json', artifactKind: 'wiki', artifactKey: 'account-segments' }), + ]), + ); + expect(failureReport.body.workUnits).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + unitKey: 'card-valid-artifacts', + rawFiles: ['cards/source.json'], + actions: expect.arrayContaining([ + expect.objectContaining({ + target: 'wiki', + key: 'account-segments', + rawPaths: ['cards/missing.json'], + }), + ]), + }), + ]), + ); + + const events = (await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-invalid-provenance/trace.jsonl'), 'utf-8')) + .trim() + .split('\n') + .map((line) => JSON.parse(line)); + expect(events.map((event) => event.event)).toEqual( + expect.arrayContaining([ + 'final_artifact_gates_finished', + 'provenance_rows_validation_failed', + 'ingest_failed', + 'failure_report_created', + ]), + ); + expect(events.map((event) => event.event)).not.toContain('squash_finished'); + const validationFailure = events.find((event) => event.event === 'provenance_rows_validation_failed'); + expect(validationFailure).toMatchObject({ + phase: 'provenance', + data: { + invalidRawPaths: ['cards/missing.json'], + currentRawPaths: ['cards/source.json'], + invalidRows: expect.arrayContaining([ + expect.objectContaining({ + row: expect.objectContaining({ rawPath: 'cards/missing.json' }), + origin: expect.objectContaining({ + source: 'work_unit_action', + unitKey: 'card-valid-artifacts', + actionIndex: 1, + }), + }), + ]), + }, + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('rejects slDisallowed patches that touch semantic-layer files', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { + unitKey: 'lookml-mismatch', + rawFiles: ['views/orders.lkml'], + peerFileIndex: [], + dependencyPaths: [], + slDisallowed: true, + slDisallowedReason: 'lookml_connection_mismatch', + }, + ], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/warehouse/orders.yaml'), + 'name: orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures: []\n', + ); + currentSession.actions.push({ target: 'sl', type: 'created', key: 'orders', detail: 'forbidden', targetConnectionId: 'warehouse' }); + await currentSession.gitService.commitFiles(['semantic-layer/warehouse/orders.yaml'], 'forbidden sl', 'KTX Test', 'system@ktx.local'); + return { stopReason: 'natural' }; + }) as never; + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['views/orders.lkml', 'h1']]); + + await expect( + runner.run({ jobId: 'job-sl-disallowed', connectionId: 'warehouse', sourceKey: 'metabase', trigger: 'upload', bundleRef: { kind: 'upload', uploadId: 'upload' } }), + ).rejects.toThrow(/isolated diff textual conflict/); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-sl-disallowed/trace.jsonl'), 'utf-8'); + expect(trace).toContain('patch_policy_rejected'); + expect(trace).toContain('slDisallowed WorkUnit lookml-mismatch touched semantic-layer/warehouse/orders.yaml'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('rejects final wiki refs broken by another accepted WorkUnit before squash', async () => { + const runtime = await makeRealGitRuntime(); + try { + await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(runtime.configDir, 'wiki/global/source-page.md'), + '---\nsummary: Source page\nusage_mode: auto\n---\n\nSource page\n', + ); + await runtime.git.commitFiles(['wiki/global/source-page.md'], 'seed source page', 'KTX Test', 'system@ktx.local'); + const preRunHead = await runtime.git.revParseHead(); + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'page-ref', rawFiles: ['pages/ref.json'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'page-delete', rawFiles: ['pages/delete.json'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + if (params.telemetryTags.unitKey === 'page-ref') { + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile( + join(root, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nrefs:\n - source-page\n---\n\nSee [[source-page]].\n', + ); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'account-segments', + detail: 'Page with wiki ref', + rawPaths: ['pages/ref.json'], + }); + await currentSession.gitService.commitFiles( + ['wiki/global/account-segments.md'], + 'wu page ref', + 'KTX Test', + 'system@ktx.local', + ); + } + if (params.telemetryTags.unitKey === 'page-delete') { + await rm(join(root, 'wiki/global/source-page.md'), { force: true }); + currentSession.actions.push({ + target: 'wiki', + type: 'removed', + key: 'source-page', + detail: 'Delete referenced page', + rawPaths: ['pages/delete.json'], + }); + await currentSession.gitService.commitFiles( + ['wiki/global/source-page.md'], + 'wu delete source page', + 'KTX Test', + 'system@ktx.local', + ); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [ + ['pages/ref.json', 'h1'], + ['pages/delete.json', 'h2'], + ]); + + await expect( + runner.run({ + jobId: 'job-wiki-ref-conflict', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/wiki references target missing page\(s\): account-segments -> source-page/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-wiki-ref-conflict/trace.jsonl'), 'utf-8'); + expect(trace).toContain('final_artifact_gates_failed'); + expect(trace).toContain('account-segments -> source-page'); + expect(trace).toContain('ingest_failed'); + expect(trace).toContain('failure_report_created'); + expect(trace).not.toContain('squash_finished'); + + const failureReport = (deps.reports.create as any).mock.calls + .map((call: any[]) => call[0]) + .find((report: any) => report.body.status === 'failed'); + expect(failureReport.body.failure).toMatchObject({ + phase: 'final_gates', + message: expect.stringContaining('account-segments -> source-page'), + details: expect.objectContaining({ + changedWikiPageKeys: expect.arrayContaining(['account-segments']), + workUnitPatchTouchedPaths: expect.arrayContaining([ + 'wiki/global/account-segments.md', + 'wiki/global/source-page.md', + ]), + }), + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('rejects unchanged inbound wiki refs broken by an isolated wiki deletion', async () => { + const runtime = await makeRealGitRuntime(); + try { + await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(runtime.configDir, 'wiki/global/source-page.md'), + '---\nsummary: Source page\nusage_mode: auto\n---\n\nSource page\n', + ); + await writeFile( + join(runtime.configDir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\nrefs:\n - source-page\n---\n\nSee [[source-page]].\n', + ); + await runtime.git.commitFiles( + ['wiki/global/source-page.md', 'wiki/global/account-segments.md'], + 'seed inbound wiki refs', + 'KTX Test', + 'system@ktx.local', + ); + const preRunHead = await runtime.git.revParseHead(); + + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'delete-target-page', rawFiles: ['pages/delete.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.unitKey !== 'delete-target-page') { + return { stopReason: 'natural' }; + } + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await rm(join(root, 'wiki/global/source-page.md'), { force: true }); + currentSession.actions.push({ + target: 'wiki', + type: 'removed', + key: 'source-page', + detail: 'Delete referenced page', + rawPaths: ['pages/delete.json'], + }); + await currentSession.gitService.commitFiles( + ['wiki/global/source-page.md'], + 'wu delete target page', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['pages/delete.json', 'h1']]); + + await expect( + runner.run({ + jobId: 'job-existing-wiki-ref-stale', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/wiki references target missing page\(s\): account-segments -> source-page/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const events = (await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-existing-wiki-ref-stale/trace.jsonl'), 'utf-8')) + .trim() + .split('\n') + .map((line) => JSON.parse(line)); + expect(events.map((event) => event.event)).toEqual( + expect.arrayContaining([ + 'final_artifact_gates_started', + 'final_artifact_gates_failed', + 'ingest_failed', + 'failure_report_created', + ]), + ); + expect(events.map((event) => event.event)).not.toContain('squash_finished'); + const gateFailure = events.find((event) => event.event === 'final_artifact_gates_failed'); + expect(gateFailure).toMatchObject({ + data: { + wikiReferenceGateScope: { + global: true, + reasons: expect.arrayContaining(['wiki_page_removed']), + removedWikiPageKeys: expect.arrayContaining(['source-page']), + pageKeysValidated: expect.arrayContaining(['account-segments']), + }, + actionOrigins: expect.arrayContaining([ + expect.objectContaining({ + source: 'work_unit_action', + unitKey: 'delete-target-page', + unitRawFiles: ['pages/delete.json'], + action: expect.objectContaining({ + target: 'wiki', + type: 'removed', + key: 'source-page', + rawPaths: ['pages/delete.json'], + }), + }), + ]), + }, + error: { message: expect.stringContaining('account-segments -> source-page') }, + }); + + const failureReport = (deps.reports.create as any).mock.calls + .map((call: any[]) => call[0]) + .find((report: any) => report.body.status === 'failed'); + expect(failureReport.body.failure).toMatchObject({ + phase: 'final_gates', + message: expect.stringContaining('account-segments -> source-page'), + details: expect.objectContaining({ + wikiReferenceGateScope: expect.objectContaining({ + global: true, + reasons: expect.arrayContaining(['wiki_page_removed']), + removedWikiPageKeys: expect.arrayContaining(['source-page']), + pageKeysValidated: expect.arrayContaining(['account-segments']), + }), + changedWikiPageKeys: expect.arrayContaining(['source-page']), + actionOrigins: expect.arrayContaining([ + expect.objectContaining({ + source: 'work_unit_action', + unitKey: 'delete-target-page', + action: expect.objectContaining({ + target: 'wiki', + type: 'removed', + key: 'source-page', + rawPaths: ['pages/delete.json'], + }), + }), + ]), + }), + }); + expect(failureReport.body.workUnits).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + unitKey: 'delete-target-page', + actions: expect.arrayContaining([ + expect.objectContaining({ + target: 'wiki', + type: 'removed', + key: 'source-page', + rawPaths: ['pages/delete.json'], + }), + ]), + }), + ]), + ); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('rejects WorkUnit patches that touch unauthorized semantic-layer target connections', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'finance-source', rawFiles: ['cards/finance.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async () => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'semantic-layer/finance'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/finance/orders.yaml'), + 'name: orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures: []\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'finance', 'orders'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'orders', + detail: 'Unauthorized target', + targetConnectionId: 'finance', + rawPaths: ['cards/finance.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/finance/orders.yaml'], + 'wu unauthorized target', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/finance.json', 'h1']]); + const preRunHead = await runtime.git.revParseHead(); + + await expect( + runner.run({ + jobId: 'job-unauthorized-wu-target', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/isolated diff textual conflict.*semantic-layer target connection not allowed/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-unauthorized-wu-target/trace.jsonl'), 'utf-8'); + expect(trace).toContain('patch_policy_rejected'); + expect(trace).toContain('semantic-layer/finance/orders.yaml'); + expect(trace).toContain('allowedTargetConnectionIds'); + expect(trace).toContain('ingest_failed'); + expect(trace).toContain('failure_report_created'); + expect(trace).not.toContain('squash_finished'); + + const failureReport = (deps.reports.create as any).mock.calls + .map((call: any[]) => call[0]) + .find((report: any) => report.body.status === 'failed'); + expect(failureReport.body.failure).toMatchObject({ + phase: 'integration', + message: expect.stringContaining('semantic-layer target connection not allowed'), + }); + expect(failureReport.body.failure.details).toMatchObject({ + unitKey: 'finance-source', + allowedTargetConnectionIds: ['warehouse'], + touchedPaths: ['semantic-layer/finance/orders.yaml'], + reason: expect.stringContaining('semantic-layer/finance/orders.yaml (finance)'), + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('rejects reconciliation mutations that touch unauthorized semantic-layer target connections before squash', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'valid-page', rawFiles: ['pages/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + const root = rootOfConfig(currentSession.configService, runtime.configDir); + if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + await mkdir(join(root, 'wiki/global'), { recursive: true }); + await writeFile(join(root, 'wiki/global/valid-page.md'), '---\nsummary: Valid page\nusage_mode: auto\n---\n\nValid\n'); + currentSession.actions.push({ + target: 'wiki', + type: 'created', + key: 'valid-page', + detail: 'Valid page', + rawPaths: ['pages/source.json'], + }); + await currentSession.gitService.commitFiles(['wiki/global/valid-page.md'], 'wu valid page', 'KTX Test', 'system@ktx.local'); + } else { + await mkdir(join(root, 'semantic-layer/finance'), { recursive: true }); + await writeFile( + join(root, 'semantic-layer/finance/reconcile_orders.yaml'), + 'name: reconcile_orders\ngrain: [id]\ncolumns: [{name: id, type: string}]\njoins: []\nmeasures: []\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'finance', 'reconcile_orders'); + currentSession.actions.push({ + target: 'sl', + type: 'created', + key: 'reconcile_orders', + detail: 'Unauthorized reconcile target', + targetConnectionId: 'finance', + rawPaths: ['pages/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/finance/reconcile_orders.yaml'], + 'reconcile unauthorized target', + 'KTX Test', + 'system@ktx.local', + ); + } + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['pages/source.json', 'h1']]); + const preRunHead = await runtime.git.revParseHead(); + + await expect( + runner.run({ + jobId: 'job-unauthorized-reconcile-target', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/semantic-layer target connection not allowed/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces/job-unauthorized-reconcile-target/trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('semantic_layer_target_policy_started'); + expect(trace).toContain('semantic_layer_target_policy_failed'); + expect(trace).toContain('allowedTargetConnectionIds'); + expect(trace).toContain('semantic-layer/finance/reconcile_orders.yaml'); + expect(trace).toContain('ingest_failed'); + expect(trace).toContain('failure_report_created'); + expect(trace).not.toContain('squash_finished'); + const failureReport = (deps.reports.create as any).mock.calls + .map((call: any[]) => call[0]) + .find((report: any) => report.body.status === 'failed'); + expect(failureReport.body.failure).toMatchObject({ + phase: 'target_policy', + message: expect.stringContaining('semantic-layer target connection not allowed'), + }); + expect(failureReport.body.failure.details).toMatchObject({ + allowedTargetConnectionIds: ['warehouse'], + touchedPaths: expect.arrayContaining(['semantic-layer/finance/reconcile_orders.yaml']), + }); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('repairs additive same-source textual conflicts before final gates and squash', async () => { + const runtime = await makeRealGitRuntime(); + try { + const { deps } = makeDeps(runtime); + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-isolated-diff-textual-resolver') { + const current = await params.toolSet.read_integration_file.execute({ + path: 'semantic-layer/warehouse/mart_account_segments.yaml', + }); + expect(current.markdown).toContain('total_contract_arr_cents'); + const patch = await params.toolSet.read_failed_patch.execute({}); + expect(patch.markdown).toContain('account_count'); + await params.toolSet.write_integration_file.execute({ + path: 'semantic-layer/warehouse/mart_account_segments.yaml', + content: + 'name: mart_account_segments\n' + + 'grain: [account_id]\n' + + 'columns: [{name: account_id, type: string}]\n' + + 'joins: []\n' + + 'measures:\n' + + ' - name: total_contract_arr_cents\n' + + ' expr: sum(contract_arr)\n' + + ' - name: account_count\n' + + ' expr: count_distinct(account_id)\n', + }); + return { stopReason: 'natural' }; + } + + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true }); + if (params.telemetryTags.unitKey === 'card-wiki') { + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\n' + + 'grain: [account_id]\n' + + 'columns: [{name: account_id, type: string}]\n' + + 'joins: []\n' + + 'measures:\n' + + ' - name: total_contract_arr_cents\n' + + ' expr: sum(contract_arr)\n', + ); + } else if (params.telemetryTags.unitKey === 'card-source') { + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\n' + + 'grain: [account_id]\n' + + 'columns: [{name: account_id, type: string}]\n' + + 'joins: []\n' + + 'measures:\n' + + ' - name: account_count\n' + + ' expr: count_distinct(account_id)\n', + ); + } + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + detail: 'Updated account segments source', + targetConnectionId: 'warehouse', + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml'], + `wu ${params.telemetryTags.unitKey}`, + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [ + ['cards/wiki.json', 'hash-a'], + ['cards/source.json', 'hash-b'], + ]); + + const result = await runner.run({ + jobId: 'job-resolver-e2e', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'manual_resync', + bundleRef: { kind: 'upload', uploadId: 'upload-1' }, + }); + + expect(result.commitSha).toBeTruthy(); + const source = await readFile(join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), 'utf-8'); + expect(source).toContain('total_contract_arr_cents'); + expect(source).toContain('account_count'); + expect(deps.agentRunner.runLoop).toHaveBeenCalledWith( + expect.objectContaining({ + modelRole: 'repair', + telemetryTags: expect.objectContaining({ + operationName: 'ingest-isolated-diff-textual-resolver', + unitKey: 'card-source', + }), + }), + ); + const successReport = (deps.reports.create as any).mock.calls.at(-1)?.[0]?.body; + expect(successReport.isolatedDiff).toMatchObject({ + acceptedPatches: 2, + textualConflicts: 1, + semanticConflicts: 0, + resolverAttempts: 1, + resolverRepairs: 1, + resolverFailures: 0, + }); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-resolver-e2e/trace.jsonl'), 'utf-8'); + expect(trace).toContain('textual_conflict_resolver_repaired'); + expect(trace).toContain('patch_accepted_after_textual_resolution'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('repairs final wiki body refs before squash when the repair agent edits the scoped page', async () => { + const runtime = await makeRealGitRuntime(); + try { + await mkdir(join(runtime.configDir, 'semantic-layer/warehouse'), { recursive: true }); + await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr_cents\n expr: sum(contract_arr)\n', + ); + await writeFile( + join(runtime.configDir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\n---\n\nExisting ARR uses `mart_account_segments.total_contract_arr_cents`.\n', + ); + await runtime.git.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'], + 'seed stale wiki body ref', + 'KTX Test', + 'system@ktx.local', + ); + + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'source-only', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-isolated-diff-gate-repair') { + const gateError = await params.toolSet.read_gate_error.execute({}); + expect(gateError.markdown).toContain('total_contract_arr_cents'); + const page = await params.toolSet.read_repair_file.execute({ + path: 'wiki/global/account-segments.md', + }); + await params.toolSet.write_repair_file.execute({ + path: 'wiki/global/account-segments.md', + content: page.markdown.replace('total_contract_arr_cents', 'total_contract_arr'), + }); + return { stopReason: 'natural' as const }; + } + if (params.modelRole === 'reconcile') { + return { stopReason: 'natural' as const }; + } + + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + detail: 'Rename ARR measure', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml'], + 'wu source rename', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' as const }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]); + + const result = await runner.run({ + jobId: 'job-final-gate-repair', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }); + + expect(result.commitSha).toBeTruthy(); + await expect(readFile(join(runtime.configDir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.toContain( + 'mart_account_segments.total_contract_arr', + ); + await expect(readFile(join(runtime.configDir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.not.toContain( + 'total_contract_arr_cents', + ); + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0] as any; + expect(reportCreate.body.isolatedDiff).toMatchObject({ + gateRepairAttempts: 1, + gateRepairs: 1, + gateRepairFailures: 0, + }); + const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-final-gate-repair/trace.jsonl'), 'utf-8'); + expect(trace).toContain('gate_repair_repaired'); + expect(trace).toContain('final_artifact_gates_after_gate_repair_finished'); + expect(trace).toContain('final_gate_repair_committed'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); + + it('fails before squash when final gate repair makes no edit', async () => { + const runtime = await makeRealGitRuntime(); + try { + await mkdir(join(runtime.configDir, 'semantic-layer/warehouse'), { recursive: true }); + await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true }); + await writeFile( + join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr_cents\n expr: sum(contract_arr)\n', + ); + await writeFile( + join(runtime.configDir, 'wiki/global/account-segments.md'), + '---\nsummary: Account segments\nusage_mode: auto\n---\n\nExisting ARR uses `mart_account_segments.total_contract_arr_cents`.\n', + ); + await runtime.git.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'], + 'seed stale wiki body ref', + 'KTX Test', + 'system@ktx.local', + ); + const preRunHead = await runtime.git.revParseHead(); + + const { deps, adapter } = makeDeps(runtime); + adapter.chunk.mockResolvedValue({ + workUnits: [{ unitKey: 'source-only', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }], + }); + + let currentSession: any = null; + deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => { + currentSession = toolSession; + return { toRuntimeTools: vi.fn(() => ({})) }; + }); + deps.agentRunner.runLoop = vi.fn(async (params: any) => { + if (params.telemetryTags.operationName === 'ingest-isolated-diff-gate-repair') { + return { stopReason: 'natural' as const }; + } + if (params.modelRole === 'reconcile') { + return { stopReason: 'natural' as const }; + } + + const root = rootOfConfig(currentSession.configService, runtime.configDir); + await writeFile( + join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'), + 'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n', + ); + addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments'); + currentSession.actions.push({ + target: 'sl', + type: 'updated', + key: 'mart_account_segments', + detail: 'Rename ARR measure', + targetConnectionId: 'warehouse', + rawPaths: ['cards/source.json'], + }); + await currentSession.gitService.commitFiles( + ['semantic-layer/warehouse/mart_account_segments.yaml'], + 'wu source rename', + 'KTX Test', + 'system@ktx.local', + ); + return { stopReason: 'natural' as const }; + }) as never; + + const runner = new IngestBundleRunner(deps); + await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]); + + await expect( + runner.run({ + jobId: 'job-final-gate-repair-fails', + connectionId: 'warehouse', + sourceKey: 'metabase', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload' }, + }), + ).rejects.toThrow(/gate repair completed without editing an allowed path/); + + expect(await runtime.git.revParseHead()).toBe(preRunHead); + const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0] as any; + expect(reportCreate.body.status).toBe('failed'); + expect(reportCreate.body.isolatedDiff).toMatchObject({ + gateRepairAttempts: 1, + gateRepairs: 0, + gateRepairFailures: 1, + }); + const trace = await readFile( + join(runtime.configDir, '.ktx/ingest-traces/job-final-gate-repair-fails/trace.jsonl'), + 'utf-8', + ); + expect(trace).toContain('gate_repair_failed'); + expect(trace).not.toContain('squash_finished'); + } finally { + await rm(runtime.homeDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/context/src/ingest/ingest-bundle.runner.test.ts b/packages/context/src/ingest/ingest-bundle.runner.test.ts index 9ccf1aef..e3a95c5f 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.test.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.test.ts @@ -1,8 +1,7 @@ -import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { beforeEach, describe, expect, it, vi } from 'vitest'; -import { GitService } from '../core/index.js'; import { addTouchedSlSource } from '../tools/index.js'; import { IngestBundleRunner } from './ingest-bundle.runner.js'; import { createMemoryFlowLiveBuffer } from './memory-flow/live-buffer.js'; @@ -123,9 +122,15 @@ const makeDeps = () => { }; const scopedGit = { revParseHead: vi.fn().mockResolvedValue('h'), - commitFiles: vi.fn(), + commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }), + commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }), resetHardTo: vi.fn(), assertWorktreeClean: vi.fn().mockResolvedValue(undefined), + writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => { + await writeFile(patchPath, '', 'utf-8'); + }), + applyPatchFile3WayIndex: vi.fn(), + diffNameStatus: vi.fn().mockResolvedValue([]), }; const sessionWorktreeService = { create: vi.fn().mockResolvedValue({ @@ -167,10 +172,12 @@ const makeDeps = () => { loadPrompt: vi.fn().mockResolvedValue('base-framing'), }; const wikiService = { - forWorktree: vi.fn().mockReturnValue({}), + forWorktree: vi.fn(), + listPageKeys: vi.fn().mockResolvedValue([]), readPage: vi.fn().mockResolvedValue(null), syncFromCommit: vi.fn().mockResolvedValue(undefined), }; + wikiService.forWorktree.mockReturnValue(wikiService); const knowledgeSlRefs = { syncFromWiki: vi.fn().mockResolvedValue({ inserted: 1, deleted: 0 }), }; @@ -178,7 +185,7 @@ const makeDeps = () => { listPagesForUser: vi.fn().mockResolvedValue([]), }; const semanticLayerService = { - forWorktree: vi.fn().mockReturnValue({}), + forWorktree: vi.fn(), listFilesForConnection: vi .fn() .mockImplementation((connectionId: string) => @@ -193,6 +200,7 @@ const makeDeps = () => { }), ), }; + semanticLayerService.forWorktree.mockReturnValue(semanticLayerService); const slSearchService = { indexSources: vi.fn().mockResolvedValue(undefined), }; @@ -255,8 +263,12 @@ const buildRunner = (deps: ReturnType = makeDeps(), overrides: resolveUploadDir: (uploadId) => `/tmp/ktx-test/ingest-uploads/${uploadId}`, resolvePullDir: (jobId) => `/tmp/ktx-test/ingest-pulls/${jobId}`, resolveTranscriptDir: (jobId) => `/tmp/ktx-test/run/wu-transcripts/${jobId}`, + resolveTracePath: (jobId) => `/tmp/ktx-test/ingest-traces/${jobId}/trace.jsonl`, + }, + settings: { + probeRowCount: 1, + memoryIngestionModel: 'test-model', }, - settings: { probeRowCount: 1, memoryIngestionModel: 'test-model' }, skillsRegistry: deps.skillsRegistry as any, promptService: deps.promptService as any, wikiService: deps.wikiService as any, @@ -1505,7 +1517,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { const runner = buildRunner(deps); (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ - currentHashes: new Map([['explores/b2b/sales_pipeline.json', 'h1']]), + currentHashes: new Map([['a.yml', 'h1']]), rawDirInWorktree: 'raw-sources/looker-run/fake/s', }); (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); @@ -1570,6 +1582,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }], parseArtifacts: { semanticModels: [{ name: 'orders' }] }, }); + deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']); deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) => Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }), ); @@ -1972,9 +1985,15 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { const assertError = new Error('Worktree has in-progress git operation (sequencer ...); refusing to proceed'); const sessionGit = { revParseHead: vi.fn().mockResolvedValue('h'), - commitFiles: vi.fn(), + commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }), + commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }), resetHardTo: vi.fn(), assertWorktreeClean: vi.fn().mockRejectedValue(assertError), + writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => { + await writeFile(patchPath, '', 'utf-8'); + }), + applyPatchFile3WayIndex: vi.fn(), + diffNameStatus: vi.fn().mockResolvedValue([]), }; deps.sessionWorktreeService.create.mockResolvedValue({ chatId: 'j1', @@ -2005,135 +2024,6 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled(); }); - it('squash-merges only successful WUs into main when one WU fails sl_validate', async () => { - const homeDir = await mkdtemp(join(tmpdir(), 'ingest-rollback-')); - try { - const configDir = join(homeDir, 'config'); - const mainGit = new GitService({ - storage: { configDir, homeDir }, - git: { - userName: 'System User', - userEmail: 'system@example.com', - bootstrapMessage: 'Initialize test config repo', - bootstrapAuthor: 'test-system', - bootstrapAuthorEmail: 'system@example.com', - }, - }); - await mainGit.onModuleInit(); - const baseSha = await mainGit.revParseHead(); - if (!baseSha) { - throw new Error('no base sha'); - } - - const deps = makeDeps(); - const sessionDir = join(homeDir, '.worktrees', 'session-j1'); - const sessionBranch = 'session/j1'; - let currentToolSession: any = null; - - deps.gitService = mainGit as any; - deps.sessionWorktreeService.create.mockImplementation(async (_jobId: string, startSha: string) => { - await mkdir(join(homeDir, '.worktrees'), { recursive: true }); - await mainGit.addWorktree(sessionDir, sessionBranch, startSha); - return { - chatId: 'j1', - workdir: sessionDir, - branch: sessionBranch, - baseSha: startSha, - createdAt: new Date(), - git: mainGit.forWorktree(sessionDir), - config: {}, - }; - }); - deps.sessionWorktreeService.cleanup.mockResolvedValue(undefined); - deps.adapter.chunk.mockResolvedValue({ - workUnits: [ - { unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] }, - { unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] }, - ], - }); - deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => { - currentToolSession = toolSession; - return { - toRuntimeTools: vi.fn().mockReturnValue({}), - getAllTools: vi.fn().mockReturnValue([]), - getToolNames: vi.fn().mockReturnValue([]), - }; - }); - deps.slValidator.validateSingleSource.mockImplementation( - (_validationDeps: unknown, _connectionId: string, sourceName: string) => ({ - errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [], - warnings: [], - }), - ); - deps.agentRunner.runLoop.mockImplementation(async (params: any) => { - const unitKey = params.telemetryTags?.unitKey; - if (unitKey === 'wu-good') { - await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true }); - await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'good.yaml'), 'name: good\n'); - addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'good'); - currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'good', detail: '' }); - await currentToolSession.gitService.commitFiles( - ['semantic-layer/c1/good.yaml'], - 'test: add good source', - 'KTX Test', - 'system@ktx.local', - ); - } - if (unitKey === 'wu-bad') { - await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true }); - await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'bad.yaml'), 'name: bad\n'); - addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'bad'); - currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'bad', detail: '' }); - await currentToolSession.gitService.commitFiles( - ['semantic-layer/c1/bad.yaml'], - 'test: add bad source', - 'KTX Test', - 'system@ktx.local', - ); - } - return { stopReason: 'natural' }; - }); - - const runner = buildRunner(deps); - (runner as any).stageRawFilesStage1 = vi.fn().mockImplementation(async ({ worktreeRoot }: any) => { - const rawDir = join(worktreeRoot, 'raw-sources', 'c1', 'fake', 's'); - await mkdir(rawDir, { recursive: true }); - await writeFile(join(rawDir, 'good.raw'), 'good raw'); - await writeFile(join(rawDir, 'bad.raw'), 'bad raw'); - return { - currentHashes: new Map([ - ['good.raw', 'good-hash'], - ['bad.raw', 'bad-hash'], - ]), - rawDirInWorktree: 'raw-sources/c1/fake/s', - }; - }); - (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); - - const result = await runner.run({ - jobId: 'j1', - connectionId: 'c1', - sourceKey: 'fake', - trigger: 'upload', - bundleRef: { kind: 'upload', uploadId: 'upload-x' }, - }); - - expect(result.failedWorkUnits).toEqual(['wu-bad']); - expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'good.yaml'), 'utf-8')).toContain('good'); - expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'bad.yaml'), 'utf-8').catch(() => null)).toBeNull(); - expect(deps.reportsRepo.create).toHaveBeenCalledWith( - expect.objectContaining({ - body: expect.objectContaining({ - failedWorkUnits: ['wu-bad'], - }), - }), - ); - await expect(stat(join(configDir, '.git', 'sequencer'))).rejects.toThrow(); - } finally { - await rm(homeDir, { recursive: true, force: true }); - } - }); - it('fails the run and rethrows when the adapter cannot detect the bundle', async () => { const deps = makeDeps(); deps.adapter.detect.mockResolvedValue(false); diff --git a/packages/context/src/ingest/ingest-bundle.runner.ts b/packages/context/src/ingest/ingest-bundle.runner.ts index 614f8aaa..a390ef08 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.ts @@ -1,4 +1,4 @@ -import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'; +import { cp, mkdir, readFile, rm, writeFile } from 'node:fs/promises'; import { dirname, join } from 'node:path'; import pLimit from 'p-limit'; import { z } from 'zod'; @@ -6,20 +6,38 @@ import { type KtxLogger, noopLogger } from '../core/index.js'; import { createRuntimeToolDescriptorFromAiTool, type KtxRuntimeToolSet } from '../llm/index.js'; import type { CaptureSession, MemoryAction } from '../memory/index.js'; import type { SemanticLayerService, SemanticLayerSource, SlValidationDeps } from '../sl/index.js'; -import { createTouchedSlSources, type ToolContext, type ToolSession } from '../tools/index.js'; +import { createTouchedSlSources, type ToolContext, type ToolSession, type TouchedSlSource } from '../tools/index.js'; +import type { KnowledgeWikiService } from '../wiki/index.js'; import { findDanglingWikiRefsForActions } from '../wiki/wiki-ref-validation.js'; import { actionTargetConnectionId } from './action-identity.js'; import { NOTION_DEFAULT_MAX_KNOWLEDGE_CREATES_PER_RUN } from './adapters/notion/types.js'; +import { validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js'; import { selectRelevantCanonicalPins } from './canonical-pins.js'; +import { finalGateRepairPaths, repairFinalGateFailure } from './final-gate-repair.js'; +import { FileIngestTraceWriter, ingestTracePathForJob, type IngestTraceWriter, traceTimed } from './ingest-trace.js'; +import { integrateWorkUnitPatch } from './isolated-diff/patch-integrator.js'; +import { resolveTextualConflict } from './isolated-diff/textual-conflict-resolver.js'; +import { runIsolatedWorkUnit } from './isolated-diff/work-unit-executor.js'; import { sanitizeMemoryFlowError } from './memory-flow/live-buffer.js'; -import type { MemoryFlowPlannedWorkUnit } from './memory-flow/types.js'; -import type { ContextEvidenceIndexSummary, IngestBundleRunnerDeps, PageTriageRunResult } from './ports.js'; +import type { CanonicalPin } from './canonical-pins.js'; +import type { MemoryFlowEvent, MemoryFlowEventSink, MemoryFlowPlannedWorkUnit } from './memory-flow/types.js'; +import type { + ContextEvidenceIndexSummary, + IngestBundleRunnerDeps, + IngestProvenanceInsert, + IngestProvenanceRow, + IngestRunsPort, + IngestSessionWorktree, + PageTriageRunResult, +} from './ports.js'; import { buildSyncId, rawSourcesDirForSync } from './raw-sources-paths.js'; import { buildStageIndexFromReportBody, postProcessorSavedMemoryCounts, type IngestReportPostProcessorOutcome, + type IngestReportProvenanceDetail, type IngestReportSnapshot, + type IngestReportWorkUnit, } from './reports.js'; import { buildReconcileSystemPrompt, @@ -32,6 +50,7 @@ import { executeWorkUnit, type WorkUnitOutcome } from './stages/stage-3-work-uni import { runReconciliationStage4 } from './stages/stage-4-reconciliation.js'; import type { StageIndex } from './stages/stage-index.types.js'; import { validateWuTouchedSources } from './stages/validate-wu-sources.js'; +import { assertSemanticLayerTargetPathsAllowed } from './semantic-layer-target-policy.js'; import { createEmitArtifactResolutionTool } from './tools/emit-artifact-resolution.tool.js'; import { createEmitConflictResolutionTool } from './tools/emit-conflict-resolution.tool.js'; import { createEmitEvictionDecisionTool } from './tools/emit-eviction-decision.tool.js'; @@ -47,6 +66,7 @@ import { type MutableToolTranscriptSummary, } from './tools/tool-transcript-summary.js'; import type { + IngestDiffSummary, EvictionUnit, IngestBundleJob, IngestBundleResult, @@ -56,6 +76,18 @@ import type { } from './types.js'; import { repairWikiSlRefs, type WikiSlRefRepairResult } from './wiki-sl-ref-repair.js'; +type MemoryFlowStageProgress = Extract; + +async function copyTransientIngestEvidence(sourceWorkdir: string, targetWorkdir: string): Promise { + const source = join(sourceWorkdir, '.ktx/ingest-evidence'); + const target = join(targetWorkdir, '.ktx/ingest-evidence'); + await cp(source, target, { recursive: true, force: true }).catch((error: NodeJS.ErrnoException) => { + if (error.code !== 'ENOENT') { + throw error; + } + }); +} + function workUnitToMemoryFlowPlannedWorkUnit(workUnit: WorkUnit): MemoryFlowPlannedWorkUnit { return { unitKey: workUnit.unitKey, @@ -128,6 +160,40 @@ function rawPathsForAction(action: MemoryAction, fallbackRawPaths: string[]): st return action.rawPaths && action.rawPaths.length > 0 ? [...new Set(action.rawPaths)] : fallbackRawPaths; } +type ProvenanceRowOrigin = + | { + source: 'work_unit_action'; + unitKey: string; + unitIndex: number; + unitRawFiles: string[]; + actionIndex: number; + action: MemoryAction; + } + | { + source: 'reconciliation_action'; + actionIndex: number; + action: MemoryAction; + } + | { + source: 'artifact_resolution'; + resolutionIndex: number; + resolution: NonNullable[number]; + } + | { + source: 'raw_snapshot_fallback'; + rawPath: string; + }; + +interface ProvenanceRowDiagnostic { + row: IngestProvenanceInsert; + origin: ProvenanceRowOrigin; +} + +interface ProvenancePlan { + rows: IngestProvenanceInsert[]; + diagnostics: ProvenanceRowDiagnostic[]; +} + export class IngestBundleRunner { private readonly logger: KtxLogger; private readonly chainByConnection = new Map>(); @@ -259,7 +325,7 @@ export class IngestBundleRunner { protected async resolveStagedDir( ref: IngestBundleJob['bundleRef'], - ctx: { connectionId: string; sourceKey: string; jobId: string }, + ctx: { connectionId: string; sourceKey: string; jobId: string; memoryFlow?: MemoryFlowEventSink }, ): Promise { if (ref.kind === 'upload') { return this.deps.storage.resolveUploadDir(ref.uploadId); @@ -273,7 +339,11 @@ export class IngestBundleRunner { if (!adapter.fetch) { throw new Error(`source adapter '${ctx.sourceKey}' does not support scheduled_pull (no fetch() method)`); } - await adapter.fetch(ref.config, stagedDir, { connectionId: ctx.connectionId, sourceKey: ctx.sourceKey }); + await adapter.fetch(ref.config, stagedDir, { + connectionId: ctx.connectionId, + sourceKey: ctx.sourceKey, + ...(ctx.memoryFlow ? { memoryFlow: ctx.memoryFlow } : {}), + }); return stagedDir; } @@ -365,24 +435,6 @@ export class IngestBundleRunner { }; } - private buildFailedWorkUnitOutcome(wu: WorkUnit, error: unknown): WorkUnitOutcome { - return { - unitKey: wu.unitKey, - status: 'failed', - reason: error instanceof Error ? error.message : String(error), - preSha: '', - postSha: '', - actions: [], - touchedSlSources: [], - slDisallowed: wu.slDisallowed, - slDisallowedReason: wu.slDisallowedReason, - }; - } - - private formatWorkUnitFailure(outcome: WorkUnitOutcome): string { - return `WorkUnit ${outcome.unitKey} failed: ${outcome.reason ?? 'unknown failure'}`; - } - private filterWorkUnitsForTriage( workUnits: WorkUnit[], triageResult: { enabled: boolean; fullRawPaths: Set } | null, @@ -393,15 +445,552 @@ export class IngestBundleRunner { return workUnits.filter((wu) => wu.rawFiles.some((rawPath) => triageResult.fullRawPaths.has(rawPath))); } + private createTrace(job: IngestBundleJob): IngestTraceWriter { + const storage = this.deps.storage as typeof this.deps.storage & { resolveTracePath?: (jobId: string) => string }; + return new FileIngestTraceWriter({ + tracePath: storage.resolveTracePath?.(job.jobId) ?? ingestTracePathForJob(this.deps.storage.homeDir, job.jobId), + jobId: job.jobId, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + level: this.deps.settings.ingestTraceLevel ?? 'debug', + }); + } + + private errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); + } + + private buildProvenancePlan(input: { + job: IngestBundleJob; + syncId: string; + currentHashes: Map; + stageIndex: StageIndex; + reconcileActions: MemoryAction[]; + }): ProvenancePlan { + const rows: IngestProvenanceInsert[] = []; + const diagnostics: ProvenanceRowDiagnostic[] = []; + const actionToType = (action: MemoryAction): IngestProvenanceInsert['actionType'] => { + if (action.target === 'wiki') { + return 'wiki_written'; + } + return action.type === 'created' ? 'source_created' : 'measure_added'; + }; + const producedPaths = new Set(); + const pushRow = (row: IngestProvenanceInsert, origin: ProvenanceRowOrigin): void => { + rows.push(row); + diagnostics.push({ row, origin }); + producedPaths.add(row.rawPath); + }; + const pushActionProvenance = (rawPath: string, action: MemoryAction, origin: ProvenanceRowOrigin): void => { + const hash = input.currentHashes.get(rawPath) ?? ''; + pushRow( + { + connectionId: input.job.connectionId, + sourceKey: input.job.sourceKey, + syncId: input.syncId, + rawPath, + rawContentHash: hash, + artifactKind: action.target, + artifactKey: action.key, + targetConnectionId: action.target === 'sl' ? actionTargetConnectionId(action, input.job.connectionId) : null, + artifactContentHash: null, + actionType: actionToType(action), + }, + origin, + ); + }; + + input.stageIndex.workUnits.forEach((wu, unitIndex) => { + wu.actions.forEach((action, actionIndex) => { + for (const rawPath of rawPathsForAction(action, wu.rawFiles)) { + pushActionProvenance(rawPath, action, { + source: 'work_unit_action', + unitKey: wu.unitKey, + unitIndex, + unitRawFiles: wu.rawFiles, + actionIndex, + action, + }); + } + }); + }); + input.reconcileActions.forEach((action, actionIndex) => { + for (const rawPath of action.rawPaths ?? []) { + pushActionProvenance(rawPath, action, { + source: 'reconciliation_action', + actionIndex, + action, + }); + } + }); + (input.stageIndex.artifactResolutions ?? []).forEach((resolution, resolutionIndex) => { + const hash = input.currentHashes.get(resolution.rawPath) ?? ''; + pushRow( + { + connectionId: input.job.connectionId, + sourceKey: input.job.sourceKey, + syncId: input.syncId, + rawPath: resolution.rawPath, + rawContentHash: hash, + artifactKind: resolution.artifactKind, + artifactKey: resolution.artifactKey, + targetConnectionId: null, + artifactContentHash: null, + actionType: resolution.actionType, + }, + { + source: 'artifact_resolution', + resolutionIndex, + resolution, + }, + ); + }); + for (const [rawPath, hash] of input.currentHashes) { + if (producedPaths.has(rawPath)) { + continue; + } + pushRow( + { + connectionId: input.job.connectionId, + sourceKey: input.job.sourceKey, + syncId: input.syncId, + rawPath, + rawContentHash: hash, + artifactKind: null, + artifactKey: null, + targetConnectionId: null, + artifactContentHash: null, + actionType: 'skipped', + }, + { source: 'raw_snapshot_fallback', rawPath }, + ); + } + + return { rows, diagnostics }; + } + + private toReportProvenanceRows(rows: IngestProvenanceInsert[]): IngestReportProvenanceDetail[] { + return rows.map(({ rawPath, artifactKind, artifactKey, actionType, targetConnectionId }) => ({ + rawPath, + artifactKind, + artifactKey, + targetConnectionId: targetConnectionId ?? null, + actionType, + })); + } + + private toReportWorkUnits(stageIndex: StageIndex): IngestReportWorkUnit[] { + return stageIndex.workUnits.map((wu) => ({ + unitKey: wu.unitKey, + rawFiles: wu.rawFiles, + status: wu.status, + reason: wu.reason, + actions: wu.actions, + touchedSlSources: wu.touchedSlSources, + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + })); + } + + private provenanceValidationTraceData(input: { + plan: ProvenancePlan; + currentRawPaths: Set; + deletedRawPaths: Set; + }): Record { + const invalidRows = input.plan.diagnostics.filter( + ({ row }) => !input.currentRawPaths.has(row.rawPath) && !input.deletedRawPaths.has(row.rawPath), + ); + return { + rowCount: input.plan.rows.length, + currentRawPathCount: input.currentRawPaths.size, + deletedRawPathCount: input.deletedRawPaths.size, + currentRawPaths: [...input.currentRawPaths].sort(), + deletedRawPaths: [...input.deletedRawPaths].sort(), + invalidRawPaths: [...new Set(invalidRows.map(({ row }) => row.rawPath))].sort(), + invalidRows, + }; + } + + private wikiPageKeysFromPaths(paths: string[]): string[] { + return [ + ...new Set( + paths + .filter((path) => path.startsWith('wiki/global/') && path.endsWith('.md')) + .map((path) => path.slice('wiki/global/'.length, -'.md'.length)), + ), + ].sort(); + } + + private touchedSlSourcesFromPaths(paths: string[]): TouchedSlSource[] { + return paths + .filter((path) => path.startsWith('semantic-layer/') && path.endsWith('.yaml') && !path.includes('/_schema/')) + .map((path) => { + const [, connectionId, fileName] = path.split('/'); + return { connectionId: connectionId ?? '', sourceName: (fileName ?? '').replace(/\.yaml$/, '') }; + }) + .filter((source) => source.connectionId.length > 0 && source.sourceName.length > 0); + } + + private touchedSlSourcesFromActions(actions: MemoryAction[], fallbackConnectionId: string): TouchedSlSource[] { + return actions + .filter((action) => action.target === 'sl') + .map((action) => ({ + connectionId: actionTargetConnectionId(action, fallbackConnectionId), + sourceName: action.key, + })); + } + + private wikiPageKeysFromActions(actions: MemoryAction[]): string[] { + return actions.filter((action) => action.target === 'wiki').map((action) => action.key); + } + + private uniqueWikiPageKeys(keys: string[]): string[] { + return [...new Set(keys.filter((key): key is string => typeof key === 'string' && key.length > 0))].sort(); + } + + private uniqueTouchedSlSources(sources: TouchedSlSource[]): TouchedSlSource[] { + const seen = new Set(); + const unique: TouchedSlSource[] = []; + for (const source of sources) { + const key = `${source.connectionId}:${source.sourceName}`; + if (seen.has(key)) { + continue; + } + seen.add(key); + unique.push(source); + } + return unique.sort((left, right) => { + const byConnection = left.connectionId.localeCompare(right.connectionId); + return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection; + }); + } + + private removedWikiPageKeysFromActions(actions: MemoryAction[]): string[] { + return this.uniqueWikiPageKeys( + actions.filter((action) => action.target === 'wiki' && action.type === 'removed').map((action) => action.key), + ); + } + + private finalGateActionOrigins(input: { + stageIndex: StageIndex; + reconcileActions: MemoryAction[]; + fallbackConnectionId: string; + }) { + const actionContext = (action: MemoryAction, fallbackRawPaths: string[]) => ({ + target: action.target, + type: action.type, + key: action.key, + detail: action.detail, + rawPaths: rawPathsForAction(action, fallbackRawPaths), + ...(action.target === 'sl' ? { targetConnectionId: actionTargetConnectionId(action, input.fallbackConnectionId) } : {}), + }); + + return [ + ...input.stageIndex.workUnits.flatMap((workUnit, unitIndex) => + workUnit.actions.map((action, actionIndex) => ({ + source: 'work_unit_action', + unitKey: workUnit.unitKey, + unitIndex, + unitRawFiles: workUnit.rawFiles, + actionIndex, + action: actionContext(action, workUnit.rawFiles), + })), + ), + ...input.reconcileActions.map((action, actionIndex) => ({ + source: 'reconciliation_action', + actionIndex, + action: actionContext(action, []), + })), + ]; + } + + private async wikiPageKeysForFinalGates(input: { + wikiService: ReturnType; + changedWikiPageKeys: string[]; + touchedSlSources: TouchedSlSource[]; + actions: MemoryAction[]; + }): Promise<{ + pageKeys: string[]; + trace: { + global: boolean; + reasons: string[]; + changedWikiPageKeys: string[]; + removedWikiPageKeys: string[]; + pageKeysValidated: string[]; + }; + }> { + const changedWikiPageKeys = this.uniqueWikiPageKeys(input.changedWikiPageKeys); + const removedWikiPageKeys = this.removedWikiPageKeysFromActions(input.actions); + const reasons: string[] = []; + if (input.touchedSlSources.length > 0) { + reasons.push('semantic_layer_changed'); + } + if (removedWikiPageKeys.length > 0) { + reasons.push('wiki_page_removed'); + } + + let pageKeys = changedWikiPageKeys; + if (reasons.length > 0) { + pageKeys = this.uniqueWikiPageKeys([ + ...changedWikiPageKeys, + ...(await input.wikiService.listPageKeys('GLOBAL', null)), + ]); + } + + return { + pageKeys, + trace: { + global: reasons.length > 0, + reasons, + changedWikiPageKeys, + removedWikiPageKeys, + pageKeysValidated: pageKeys, + }, + }; + } + + private async runWorkUnitInWorktree(input: { + job: IngestBundleJob; + syncId: string; + wu: WorkUnit; + worktree: IngestSessionWorktree; + stagedDir: string; + contextReport: ContextEvidenceIndexSummary | null; + ingestToolMetadata: { runId: string; jobId: string; syncId: string; sourceKey: string }; + slConnectionIds: string[]; + wikiIndex: string; + slIndex: string; + priorProvenance: Map; + scopedWikiService: ReturnType; + scopedSemanticLayerService: ReturnType; + baseFraming: string; + skillsPrompt: string; + canonicalPins: CanonicalPin[]; + workUnitSettings: { maxConcurrency: number; stepBudget: number; failureMode: 'abort' | 'continue' }; + transcriptDir: string; + transcriptSummaries: Map; + recordTranscriptEntry(path: string): (entry: ToolCallLogEntry) => void; + stageIndex: StageIndex; + includeContextEvidenceTools: boolean; + currentTableExists(tableRef: string): Promise; + memoryFlow?: MemoryFlowEventSink; + wuSkillNames: string[]; + onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void; + }): Promise { + const session: CaptureSession = { + userId: 'system', + chatId: input.wu.unitKey, + userMessage: `ingest(${input.job.sourceKey}) WU=${input.wu.unitKey}`, + connectionId: input.job.connectionId, + userScopedEnabled: false, + forceGlobalScope: true, + touchedSlSources: createTouchedSlSources(), + preHead: input.worktree.baseSha, + }; + const sessionActions: MemoryAction[] = []; + + const toolSession: ToolSession = { + connectionId: input.job.connectionId, + isWorktreeScoped: true, + preHead: input.worktree.baseSha, + touchedSlSources: session.touchedSlSources, + actions: sessionActions, + allowedRawPaths: new Set(input.wu.rawFiles), + allowedConnectionNames: new Set(input.slConnectionIds), + semanticLayerService: input.scopedSemanticLayerService, + wikiService: input.scopedWikiService, + configService: input.worktree.config, + gitService: input.worktree.git, + ingest: input.ingestToolMetadata, + }; + + const slValidationDeps: SlValidationDeps = { + semanticLayerService: input.scopedSemanticLayerService, + connections: this.deps.connections, + configService: input.worktree.config, + gitService: input.worktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + probeRowCount: this.deps.settings.probeRowCount, + }; + + const wuToolset = this.deps.toolsetFactory.createIngestWuToolset(toolSession, { + includeContextEvidenceTools: input.includeContextEvidenceTools, + }); + const wuToolContext: ToolContext = { + sourceId: 'ingest', + messageId: `${input.job.jobId}-wu-${input.wu.unitKey}`, + userId: 'system', + connectionId: input.job.connectionId, + ingest: input.ingestToolMetadata, + session: toolSession, + }; + + const skillsLoadedPerWu: string[] = []; + const loadSkillTool: KtxRuntimeToolSet = { + load_skill: { + name: 'load_skill', + description: + 'Load a skill to get specialized instructions. Call this when a skill listed in the system prompt matches the current task.', + inputSchema: z.object({ name: z.string() }), + execute: async ({ name }) => { + const skill = await this.deps.skillsRegistry.getSkill(name, 'memory_agent'); + if (!skill) { + const available = + (await this.deps.skillsRegistry.listSkills('memory_agent')).map((s) => s.name).join(', ') || '(none)'; + return { markdown: `Skill "${name}" not available. Available: ${available}` }; + } + const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); + if (!skillsLoadedPerWu.includes(skill.name)) { + skillsLoadedPerWu.push(skill.name); + } + const structured = { + name: skill.name, + skillDirectory: skill.path, + content: this.deps.skillsRegistry.stripFrontmatter(body), + }; + return { + markdown: `# ${structured.name}\n\n${structured.content}`, + structured, + }; + }, + }, + }; + + const wuEmitUnmappedFallbackTool = { + emit_unmapped_fallback: createRuntimeToolDescriptorFromAiTool( + 'emit_unmapped_fallback', + createEmitUnmappedFallbackTool({ + stageIndex: input.stageIndex, + allowedPaths: new Set(input.wu.rawFiles), + tableRefExists: input.currentTableExists, + }), + ), + }; + + const systemPrompt = buildWuSystemPrompt({ + baseFraming: input.baseFraming, + skillsPrompt: input.skillsPrompt, + syncId: input.syncId, + sourceKey: input.job.sourceKey, + canonicalPins: input.canonicalPins, + }); + + input.memoryFlow?.emit({ + type: 'work_unit_started', + unitKey: input.wu.unitKey, + skills: input.wuSkillNames, + stepBudget: input.workUnitSettings.stepBudget, + }); + return executeWorkUnit( + { + sessionWorktreeGit: input.worktree.git, + agentRunner: this.deps.agentRunner, + validateTouchedSources: (touched) => + validateWuTouchedSources({ ...slValidationDeps, slValidator: this.deps.slValidator }, touched), + validateWikiRefs: (actions) => + findDanglingWikiRefsForActions({ + wikiService: input.scopedWikiService, + scope: 'GLOBAL', + scopeId: null, + actions, + }), + resetHardTo: (targetSha) => input.worktree.git.resetHardTo(targetSha), + buildSystemPrompt: () => systemPrompt, + buildUserPrompt: (wuInner) => + buildWuUserPrompt({ + wu: wuInner, + wikiIndex: input.wikiIndex, + slIndex: input.slIndex, + priorProvenance: input.priorProvenance, + }), + buildToolSet: (wuInner) => + wrapToolsWithLogger( + buildWuToolSet({ + sourceKey: input.job.sourceKey, + stagedDir: input.stagedDir, + wu: wuInner, + loadSkillTool, + emitUnmappedFallbackTool: wuEmitUnmappedFallbackTool, + toolsetTools: wuToolset.toRuntimeTools(wuToolContext), + }), + join(input.transcriptDir, `${wuInner.unitKey}.jsonl`), + wuInner.unitKey, + { onEntry: input.recordTranscriptEntry(join(input.transcriptDir, `${wuInner.unitKey}.jsonl`)) }, + ), + captureSession: session, + sessionActions, + modelRole: 'candidateExtraction', + stepBudget: input.workUnitSettings.stepBudget, + sourceKey: input.job.sourceKey, + connectionId: input.job.connectionId, + jobId: input.job.jobId, + toolFailureCount: (unitKey) => input.transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0, + onStepFinish: input.onStepFinish, + }, + input.wu, + ); + } + protected async runInner(job: IngestBundleJob, ctx?: IngestJobContext): Promise> { const syncId = buildSyncId(new Date(), job.jobId); + const trace = this.createTrace(job); + const transcriptSummaries = new Map(); + let activeTrace: IngestTraceWriter = trace; + let activePhase = 'run'; + let runRow: Awaited> | null = null; + let latestDiffSummary: IngestDiffSummary = { added: 0, modified: 0, deleted: 0, unchanged: 0 }; + let latestWorkUnits: WorkUnitOutcome[] = []; + let latestFailedWorkUnits: string[] = []; + let latestReconciliationSkipped = true; + let latestReportWorkUnits: IngestReportWorkUnit[] = []; + let latestReconciliationActions: MemoryAction[] = []; + let latestConflictsResolved: StageIndex['conflictsResolved'] = []; + let latestEvictionsApplied: StageIndex['evictionsApplied'] = []; + let latestUnmappedFallbacks: StageIndex['unmappedFallbacks'] = []; + let latestArtifactResolutions: NonNullable = []; + let latestEvictionInputs: string[] = []; + let latestUnresolvedCards: UnresolvedCardInfo[] = []; + let latestReportProvenanceRows: IngestReportProvenanceDetail[] = []; + let activeFailureDetails: Record | undefined; + let latestIsolatedDiffSummary: + | { + enabled: boolean; + integrationWorktreePath?: string; + ingestionBaseSha?: string; + projectionSha?: string | null; + acceptedPatches: number; + textualConflicts: number; + semanticConflicts: number; + resolverAttempts: number; + resolverRepairs: number; + resolverFailures: number; + } + | undefined; + await trace.event('info', 'run', 'ingest_started', { + trigger: job.trigger, + bundleRefKind: job.bundleRef.kind, + }); + try { const memoryFlow = ctx?.memoryFlow; + const emitStageProgress = ( + stage: MemoryFlowStageProgress['stage'], + percent: number, + message: string, + options: { transient?: boolean } = {}, + ): void => { + memoryFlow?.emit({ + type: 'stage_progress', + stage, + percent, + message, + ...(options.transient !== undefined ? { transient: options.transient } : {}), + }); + }; const baseSha = await this.deps.lockingService.withLock('config:repo', () => this.deps.gitService.revParseHead()); if (!baseSha) { throw new Error('ingest-bundle: config repo has no HEAD'); } const transcriptDir = this.deps.storage.resolveTranscriptDir(job.jobId); - const transcriptSummaries = new Map(); const recordTranscriptEntry = (path: string) => (entry: ToolCallLogEntry): void => { @@ -416,17 +1005,29 @@ export class IngestBundleRunner { await stage1?.updateProgress(0.0, 'Fetching source files'); const adapter = this.deps.registry.get(job.sourceKey); - const stagedDir = overrideReport - ? await this.materializeOverrideSnapshot(overrideReport, { - connectionId: job.connectionId, - sourceKey: job.sourceKey, - jobId: job.jobId, - }) - : await this.resolveStagedDir(job.bundleRef, { - connectionId: job.connectionId, - sourceKey: job.sourceKey, - jobId: job.jobId, - }); + activePhase = 'fetch'; + const stagedDir = await traceTimed( + trace, + 'fetch', + 'resolve_staged_dir', + { + bundleRefKind: job.bundleRef.kind, + sourceKey: job.sourceKey, + }, + () => + overrideReport + ? this.materializeOverrideSnapshot(overrideReport, { + connectionId: job.connectionId, + sourceKey: job.sourceKey, + jobId: job.jobId, + }) + : this.resolveStagedDir(job.bundleRef, { + connectionId: job.connectionId, + sourceKey: job.sourceKey, + jobId: job.jobId, + ...(memoryFlow ? { memoryFlow } : {}), + }), + ); const fetchReport = adapter.readFetchReport ? await adapter.readFetchReport(stagedDir) : null; const scopeDescriptor = adapter.describeScope ? await adapter.describeScope(stagedDir) : null; @@ -434,16 +1035,30 @@ export class IngestBundleRunner { const sessionWorktree = await this.deps.lockingService.withLock('config:repo', () => this.deps.sessionWorktreeService.create(job.jobId, baseSha), ); - let cleanupOutcome: 'success' | 'crash' = 'crash'; + let cleanupOutcome: 'success' | 'crash' | 'conflict' = 'crash'; try { - const { currentHashes, rawDirInWorktree } = await this.stageRawFilesStage1({ - stagedDir, - worktreeRoot: sessionWorktree.workdir, - connectionId: job.connectionId, - sourceKey: job.sourceKey, - syncId, - }); + activePhase = 'stage_raw_files'; + const { currentHashes, rawDirInWorktree } = await traceTimed( + trace, + 'stage_raw_files', + 'stage_raw_files', + { + stagedDir, + worktreePath: sessionWorktree.workdir, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + }, + () => + this.stageRawFilesStage1({ + stagedDir, + worktreeRoot: sessionWorktree.workdir, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + }), + ); memoryFlow?.update({ connectionId: job.connectionId, adapter: job.sourceKey, @@ -468,11 +1083,24 @@ export class IngestBundleRunner { await stage1?.updateProgress(0.5, 'Checking what changed'); - const diffSet = await this.deps.diffSetService.compute( - job.connectionId, - job.sourceKey, - currentHashes, - scopeDescriptor ? scopeDescriptor.isPathInScope.bind(scopeDescriptor) : undefined, + activePhase = 'diff'; + const diffSet = await traceTimed( + trace, + 'diff', + 'compute_diff_set', + { + connectionId: job.connectionId, + sourceKey: job.sourceKey, + currentHashCount: currentHashes.size, + scopeFingerprint: scopeDescriptor?.fingerprint ?? null, + }, + () => + this.deps.diffSetService.compute( + job.connectionId, + job.sourceKey, + currentHashes, + scopeDescriptor ? scopeDescriptor.isPathInScope.bind(scopeDescriptor) : undefined, + ), ); const diffSummary = { added: diffSet.added.length, @@ -480,9 +1108,10 @@ export class IngestBundleRunner { deleted: diffSet.deleted.length, unchanged: diffSet.unchanged.length, }; + latestDiffSummary = diffSummary; memoryFlow?.emit({ type: 'diff_computed', ...diffSummary }); - const runRow = await this.deps.runs.create({ + runRow = await this.deps.runs.create({ jobId: job.jobId, connectionId: job.connectionId, sourceKey: job.sourceKey, @@ -497,13 +1126,28 @@ export class IngestBundleRunner { syncId, sourceKey: job.sourceKey, }; + const runTrace = trace.withContext({ runId: runRow.id, syncId }); + activeTrace = runTrace; + const createdRunRow = runRow; + await runTrace.event('debug', 'snapshot', 'input_snapshot', { + baseSha, + stagedDir, + rawFileCount: currentHashes.size, + rawDirInWorktree, + diffSummary, + scopeFingerprint: scopeDescriptor?.fingerprint ?? null, + }); await stage1?.updateProgress( 1.0, `${diffSet.added.length} new, ${diffSet.modified.length} changed, ${diffSet.deleted.length} removed`, ); - const detected = await adapter.detect(stagedDir); + activePhase = 'detect'; + const detected = await traceTimed(runTrace, 'detect', 'adapter_detect', { stagedDir, sourceKey: job.sourceKey }, () => + adapter.detect(stagedDir), + ); + await runTrace.event('debug', 'detect', 'adapter_detected', { detected }); if (!detected) { await this.deps.runs.markFailed(runRow.id); throw new Error(`source adapter '${job.sourceKey}' did not recognize staged dir`); @@ -524,6 +1168,7 @@ export class IngestBundleRunner { const stage2 = ctx?.startPhase(0.04); await stage2?.updateProgress(0.0, 'Planning updates'); + activePhase = 'planning'; let workUnits: WorkUnit[] = []; let eviction: EvictionUnit | undefined; let unresolvedCards: UnresolvedCardInfo[] | undefined; @@ -541,7 +1186,18 @@ export class IngestBundleRunner { unresolvedCards = overrideReport.body.unresolvedCards; await stage2?.updateProgress(1.0, `Loaded prior report ${overrideReport.jobId} for override reconciliation`); } else { - const chunk = await adapter.chunk(stagedDir, diffSet); + const chunk = await traceTimed( + runTrace, + 'planning', + 'chunk_work_units', + { + stagedDir, + added: diffSet.added.length, + modified: diffSet.modified.length, + deleted: diffSet.deleted.length, + }, + () => adapter.chunk(stagedDir, diffSet), + ); workUnits = chunk.workUnits; eviction = chunk.eviction; unresolvedCards = chunk.unresolvedCards; @@ -571,6 +1227,12 @@ export class IngestBundleRunner { } await stage2?.updateProgress(1.0, `Planned ${workUnits.length} update${workUnits.length === 1 ? '' : 's'}`); } + await runTrace.event('debug', 'planning', 'work_units_planned', { + workUnitCount: workUnits.length, + evictionCount: eviction?.deletedRawPaths.length ?? 0, + unresolvedCardCount: unresolvedCards?.length ?? 0, + triageEnabled: triageResult?.enabled ?? false, + }); const targetConnectionIds = new Set([job.connectionId]); if (!overrideReport && adapter.listTargetConnectionIds) { @@ -591,6 +1253,9 @@ export class IngestBundleRunner { } } const slConnectionIds = [...targetConnectionIds].sort(); + await runTrace.event('debug', 'planning', 'target_connections_resolved', { + connectionIds: slConnectionIds, + }); // Build shared per-job context. const [wikiIndex, slIndex] = await Promise.all([this.buildWikiIndex(), this.buildSlIndex(slConnectionIds)]); @@ -626,12 +1291,98 @@ export class IngestBundleRunner { workUnitCount: memoryFlowPlannedWorkUnits.length, evictionCount: eviction?.deletedRawPaths.length ?? 0, }); + const isolatedDiffEnabled = !overrideReport; + const isolatedDiffSummary = { + enabled: isolatedDiffEnabled, + integrationWorktreePath: isolatedDiffEnabled ? sessionWorktree.workdir : undefined, + ingestionBaseSha: undefined as string | undefined, + projectionSha: null as string | null, + acceptedPatches: 0, + textualConflicts: 0, + semanticConflicts: 0, + resolverAttempts: 0, + resolverRepairs: 0, + resolverFailures: 0, + gateRepairAttempts: 0, + gateRepairs: 0, + gateRepairFailures: 0, + }; + latestIsolatedDiffSummary = isolatedDiffSummary; const stage3 = ctx?.startPhase(0.6); await stage3?.updateProgress(0.0, `Processing ${workUnits.length} update${workUnits.length === 1 ? '' : 's'}`); + activePhase = 'work_units'; this.logger.log(`[ingest-bundle] job=${job.jobId} tool-call transcripts: ${transcriptDir}/`); + let projectionTouchedSources: TouchedSlSource[] = []; + let projectionChangedWikiPageKeys: string[] = []; + let projectionTouchedPaths: string[] = []; if (!overrideReport) { + await runTrace.event('info', 'routing', 'isolated_diff_enabled', { + sourceKey: job.sourceKey, + workUnitCount: workUnits.length, + integrationWorktreePath: sessionWorktree.workdir, + }); + + if (adapter.project) { + const preProjectionSha = await sessionWorktree.git.revParseHead(); + const projection = await traceTimed( + runTrace, + 'projection', + 'deterministic_projection', + { sourceKey: job.sourceKey }, + () => + adapter.project!({ + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + jobId: job.jobId, + runId: createdRunRow.id, + stagedDir, + workdir: sessionWorktree.workdir, + parseArtifacts, + semanticLayerService: this.deps.semanticLayerService, + }), + ); + if (projection.errors.length > 0) { + await this.deps.runs.markFailed(runRow.id); + throw new Error(`deterministic projection failed: ${projection.errors.join('; ')}`); + } + projectionTouchedSources = projection.touchedSources; + projectionChangedWikiPageKeys = projection.changedWikiPageKeys; + const projectionPaths = [ + ...projection.touchedSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`), + ...projection.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`), + ]; + projectionTouchedPaths = projectionPaths; + const projectionCommit = + projectionPaths.length > 0 + ? await sessionWorktree.git.commitFiles( + projectionPaths, + `ingest(${job.sourceKey}): deterministic projection syncId=${syncId}`, + this.deps.storage.systemGitAuthor.name, + this.deps.storage.systemGitAuthor.email, + ) + : await sessionWorktree.git.commitStaged( + `ingest(${job.sourceKey}): deterministic projection syncId=${syncId}`, + this.deps.storage.systemGitAuthor.name, + this.deps.storage.systemGitAuthor.email, + ); + isolatedDiffSummary.projectionSha = + projectionCommit.created || projectionCommit.commitHash !== preProjectionSha + ? projectionCommit.commitHash + : null; + await runTrace.event('debug', 'projection', 'deterministic_projection_committed', { + projectionSha: isolatedDiffSummary.projectionSha, + touchedSources: projectionTouchedSources, + changedWikiPageKeys: projectionChangedWikiPageKeys, + warnings: projection.warnings, + }); + } + + const ingestionBaseSha = await sessionWorktree.git.revParseHead(); + isolatedDiffSummary.ingestionBaseSha = ingestionBaseSha; + const patchDir = join(this.deps.storage.homeDir, 'ingest-patches', job.jobId); const workUnitSettings = { maxConcurrency: this.deps.settings.workUnitMaxConcurrency ?? 1, stepBudget: this.deps.settings.workUnitStepBudget ?? 40, @@ -640,168 +1391,6 @@ export class IngestBundleRunner { const limitWorkUnit = pLimit(workUnitSettings.maxConcurrency); const workUnitOutcomesByIndex: WorkUnitOutcome[] = []; let completedWorkUnits = 0; - let abortRequested = false; - - const runSingleWorkUnit = async (wu: WorkUnit): Promise => { - const session: CaptureSession = { - userId: 'system', - chatId: wu.unitKey, - userMessage: `ingest(${job.sourceKey}) WU=${wu.unitKey}`, - connectionId: job.connectionId, - userScopedEnabled: false, - forceGlobalScope: true, - touchedSlSources: createTouchedSlSources(), - preHead: sessionWorktree.baseSha, - }; - const sessionActions: MemoryAction[] = []; - - const scopedWikiService = this.deps.wikiService.forWorktree(sessionWorktree.workdir); - const scopedSemanticLayerService = this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir); - - const toolSession: ToolSession = { - connectionId: job.connectionId, - isWorktreeScoped: true, - preHead: sessionWorktree.baseSha, - touchedSlSources: session.touchedSlSources, - actions: sessionActions, - allowedRawPaths: new Set(wu.rawFiles), - allowedConnectionNames: new Set(slConnectionIds), - semanticLayerService: scopedSemanticLayerService, - wikiService: scopedWikiService, - configService: sessionWorktree.config, - gitService: sessionWorktree.git, - ingest: ingestToolMetadata, - }; - - const slValidationDeps: SlValidationDeps = { - semanticLayerService: scopedSemanticLayerService, - connections: this.deps.connections, - configService: sessionWorktree.config, - gitService: sessionWorktree.git, - slSourcesRepository: this.deps.slSourcesRepository, - probeRowCount: this.deps.settings.probeRowCount, - }; - - const wuToolset = this.deps.toolsetFactory.createIngestWuToolset(toolSession, { - includeContextEvidenceTools: adapter.evidenceIndexing === 'documents' && !!contextReport, - }); - const wuToolContext: ToolContext = { - sourceId: 'ingest', - messageId: `${job.jobId}-wu-${wu.unitKey}`, - userId: 'system', - connectionId: job.connectionId, - ingest: ingestToolMetadata, - session: toolSession, - }; - - const skillsLoadedPerWu: string[] = []; - const loadSkillTool: KtxRuntimeToolSet = { - load_skill: { - name: 'load_skill', - description: - 'Load a skill to get specialized instructions. Call this when a skill listed in the system prompt matches the current task.', - inputSchema: z.object({ name: z.string() }), - execute: async ({ name }) => { - const skill = await this.deps.skillsRegistry.getSkill(name, 'memory_agent'); - if (!skill) { - const available = - (await this.deps.skillsRegistry.listSkills('memory_agent')).map((s) => s.name).join(', ') || - '(none)'; - return { markdown: `Skill "${name}" not available. Available: ${available}` }; - } - const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); - if (!skillsLoadedPerWu.includes(skill.name)) { - skillsLoadedPerWu.push(skill.name); - } - const structured = { - name: skill.name, - skillDirectory: skill.path, - content: this.deps.skillsRegistry.stripFrontmatter(body), - }; - return { - markdown: `# ${structured.name}\n\n${structured.content}`, - structured, - }; - }, - }, - }; - - const priorProvenance = await this.deps.provenance.findLatestArtifactsForRawPaths( - job.connectionId, - job.sourceKey, - wu.rawFiles, - ); - const wuEmitUnmappedFallbackTool = { - emit_unmapped_fallback: createRuntimeToolDescriptorFromAiTool( - 'emit_unmapped_fallback', - createEmitUnmappedFallbackTool({ - stageIndex, - allowedPaths: new Set(wu.rawFiles), - tableRefExists: (tableRef) => - this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef), - }), - ), - }; - - const systemPrompt = buildWuSystemPrompt({ - baseFraming, - skillsPrompt, - syncId, - sourceKey: job.sourceKey, - canonicalPins, - }); - - memoryFlow?.emit({ - type: 'work_unit_started', - unitKey: wu.unitKey, - skills: wuSkillNames, - stepBudget: workUnitSettings.stepBudget, - }); - return executeWorkUnit( - { - sessionWorktreeGit: sessionWorktree.git, - agentRunner: this.deps.agentRunner, - validateTouchedSources: (touched) => - validateWuTouchedSources({ ...slValidationDeps, slValidator: this.deps.slValidator }, touched), - validateWikiRefs: (actions) => - findDanglingWikiRefsForActions({ - wikiService: scopedWikiService, - scope: 'GLOBAL', - scopeId: null, - actions, - }), - resetHardTo: (targetSha) => sessionWorktree.git.resetHardTo(targetSha), - buildSystemPrompt: () => systemPrompt, - buildUserPrompt: (wuInner) => buildWuUserPrompt({ wu: wuInner, wikiIndex, slIndex, priorProvenance }), - buildToolSet: (wuInner) => - wrapToolsWithLogger( - buildWuToolSet({ - sourceKey: job.sourceKey, - stagedDir, - wu: wuInner, - loadSkillTool, - emitUnmappedFallbackTool: wuEmitUnmappedFallbackTool, - toolsetTools: wuToolset.toRuntimeTools(wuToolContext), - }), - join(transcriptDir, `${wuInner.unitKey}.jsonl`), - wuInner.unitKey, - { onEntry: recordTranscriptEntry(join(transcriptDir, `${wuInner.unitKey}.jsonl`)) }, - ), - captureSession: session, - sessionActions, - modelRole: 'candidateExtraction', - stepBudget: workUnitSettings.stepBudget, - sourceKey: job.sourceKey, - connectionId: job.connectionId, - jobId: job.jobId, - toolFailureCount: (unitKey) => transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0, - onStepFinish: ({ stepIndex, stepBudget }) => { - memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget }); - }, - }, - wu, - ); - }; if (workUnits.length === 0) { await stage3?.updateProgress(1.0, '0 of 0 work units complete'); @@ -811,17 +1400,54 @@ export class IngestBundleRunner { await Promise.all( workUnits.map((wu, index) => limitWorkUnit(async () => { - if (abortRequested) { - return; - } - - let outcome: WorkUnitOutcome; - try { - outcome = await runSingleWorkUnit(wu); - } catch (error) { - outcome = this.buildFailedWorkUnitOutcome(wu, error); - } - + const outcome = await runIsolatedWorkUnit({ + unitIndex: index, + ingestionBaseSha, + sessionWorktreeService: this.deps.sessionWorktreeService, + patchDir, + trace: runTrace, + workUnit: wu, + afterSuccess: (child) => copyTransientIngestEvidence(child.workdir, sessionWorktree.workdir), + run: async (child) => { + const scopedWikiService = this.deps.wikiService.forWorktree(child.workdir); + const scopedSemanticLayerService = this.deps.semanticLayerService.forWorktree(child.workdir); + return this.runWorkUnitInWorktree({ + job, + syncId, + wu, + worktree: child, + stagedDir, + contextReport, + ingestToolMetadata, + slConnectionIds, + wikiIndex, + slIndex, + priorProvenance: await this.deps.provenance.findLatestArtifactsForRawPaths( + job.connectionId, + job.sourceKey, + wu.rawFiles, + ), + scopedWikiService, + scopedSemanticLayerService, + baseFraming, + skillsPrompt, + canonicalPins, + workUnitSettings, + transcriptDir, + transcriptSummaries, + recordTranscriptEntry, + stageIndex, + includeContextEvidenceTools: adapter.evidenceIndexing === 'documents' && !!contextReport, + currentTableExists: (tableRef) => + this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef), + memoryFlow, + wuSkillNames, + onStepFinish: ({ stepIndex, stepBudget }) => { + memoryFlow?.emit({ type: 'work_unit_step', unitKey: wu.unitKey, stepIndex, stepBudget }); + }, + }); + }, + }); workUnitOutcomesByIndex[index] = outcome; for (const action of outcome.actions) { memoryFlow?.emit({ @@ -843,14 +1469,6 @@ export class IngestBundleRunner { completedWorkUnits / workUnits.length, `${completedWorkUnits} of ${workUnits.length} work units complete`, ); - - if (outcome.status === 'failed') { - this.logger.warn(`[ingest-bundle] WU=${outcome.unitKey} failed: ${outcome.reason}`); - if (workUnitSettings.failureMode === 'abort') { - abortRequested = true; - throw new Error(this.formatWorkUnitFailure(outcome)); - } - } }), ), ); @@ -865,9 +1483,8 @@ export class IngestBundleRunner { failedWorkUnits.push( ...workUnitOutcomes.filter((outcome) => outcome.status === 'failed').map((outcome) => outcome.unitKey), ); - - // Complete the typed Stage Index from the outcomes once, and use it for - // Stage 4, provenance writes (Phase G), and the report body (Phase F3). + latestWorkUnits = workUnitOutcomes; + latestFailedWorkUnits = failedWorkUnits; stageIndex.workUnits = workUnitOutcomes.map((o) => ({ unitKey: o.unitKey, rawFiles: workUnits.find((w) => w.unitKey === o.unitKey)?.rawFiles ?? [], @@ -878,6 +1495,163 @@ export class IngestBundleRunner { slDisallowed: o.slDisallowed, slDisallowedReason: o.slDisallowedReason, })); + + activePhase = 'integration'; + const integrablePatchCount = workUnitOutcomesByIndex.filter( + (outcome) => outcome?.status === 'success' && !!outcome.patchPath, + ).length; + let integratedPatchCount = 0; + for (const [index, outcome] of workUnitOutcomesByIndex.entries()) { + if (!outcome || outcome.status !== 'success' || !outcome.patchPath) { + continue; + } + const wu = workUnits[index]; + if (!wu) { + continue; + } + const integrationFailureDetails = { + unitKey: outcome.unitKey, + patchPath: outcome.patchPath, + allowedTargetConnectionIds: slConnectionIds, + }; + activeFailureDetails = integrationFailureDetails; + emitStageProgress( + 'integration', + 80, + `Integrating ${integratedPatchCount + 1}/${integrablePatchCount} patches: ${outcome.unitKey}`, + ); + const integration = await integrateWorkUnitPatch({ + unitKey: outcome.unitKey, + patchPath: outcome.patchPath, + integrationGit: sessionWorktree.git, + trace: runTrace, + author: this.deps.storage.systemGitAuthor, + slDisallowed: wu.slDisallowed === true, + allowedTargetConnectionIds: new Set(slConnectionIds), + validateAppliedTree: async (touchedPaths) => { + await validateFinalIngestArtifacts({ + connectionIds: slConnectionIds, + changedWikiPageKeys: this.wikiPageKeysFromPaths(touchedPaths), + touchedSlSources: this.touchedSlSourcesFromPaths(touchedPaths), + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + validateTouchedSources: (touched) => + validateWuTouchedSources( + { + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + connections: this.deps.connections, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + probeRowCount: this.deps.settings.probeRowCount, + slValidator: this.deps.slValidator, + }, + touched, + ), + tableExists: (connectionId, tableRef) => + this.tableRefExistsInSemanticLayer( + this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + [connectionId], + tableRef, + ), + }); + }, + resolveTextualConflict: async (context) => { + emitStageProgress('integration', 81, `Resolving text conflict for ${context.unitKey}`); + const result = await resolveTextualConflict({ + agentRunner: this.deps.agentRunner, + workdir: sessionWorktree.workdir, + unitKey: context.unitKey, + patchPath: context.patchPath, + touchedPaths: context.touchedPaths, + trace: runTrace, + reason: context.reason, + maxAttempts: 1, + stepBudget: 12, + }); + emitStageProgress( + 'integration', + 82, + result.status === 'repaired' + ? `Resolved text conflict for ${context.unitKey}` + : `Text conflict resolver failed for ${context.unitKey}`, + ); + return result; + }, + repairGateFailure: async (context) => { + emitStageProgress('integration', 82, `Repairing semantic gate for ${context.unitKey}`); + const result = await repairFinalGateFailure({ + agentRunner: this.deps.agentRunner, + workdir: sessionWorktree.workdir, + gateError: context.reason, + allowedPaths: context.touchedPaths, + trace: runTrace, + repairKind: 'patch_semantic_gate', + maxAttempts: 1, + stepBudget: 16, + }); + emitStageProgress( + 'integration', + 83, + result.status === 'repaired' + ? `Repaired semantic gate for ${context.unitKey}` + : `Semantic gate repair failed for ${context.unitKey}`, + ); + return result; + }, + }); + if (integration.textualResolution) { + isolatedDiffSummary.resolverAttempts += integration.textualResolution.attempts; + if (integration.textualResolution.status === 'repaired') { + isolatedDiffSummary.textualConflicts += 1; + isolatedDiffSummary.resolverRepairs += 1; + } else { + isolatedDiffSummary.resolverFailures += 1; + } + } + if (integration.gateRepair) { + isolatedDiffSummary.gateRepairAttempts += integration.gateRepair.attempts; + if (integration.gateRepair.status === 'repaired') { + isolatedDiffSummary.semanticConflicts += 1; + isolatedDiffSummary.gateRepairs += 1; + } else { + isolatedDiffSummary.gateRepairFailures += 1; + } + } + if (integration.status === 'textual_conflict') { + isolatedDiffSummary.textualConflicts += 1; + await this.deps.runs.markFailed(runRow.id); + cleanupOutcome = 'conflict'; + activeFailureDetails = { + ...integrationFailureDetails, + touchedPaths: integration.touchedPaths, + reason: integration.reason, + }; + throw new Error(`isolated diff textual conflict in ${outcome.unitKey}: ${integration.reason}`); + } + if (integration.status === 'semantic_conflict') { + isolatedDiffSummary.semanticConflicts += 1; + await this.deps.runs.markFailed(runRow.id); + cleanupOutcome = 'conflict'; + activeFailureDetails = { + ...integrationFailureDetails, + touchedPaths: integration.touchedPaths, + reason: integration.reason, + }; + throw new Error(`isolated diff semantic conflict in ${outcome.unitKey}: ${integration.reason}`); + } + activeFailureDetails = undefined; + if (integration.touchedPaths.length > 0) { + isolatedDiffSummary.acceptedPatches += 1; + integratedPatchCount += 1; + } + emitStageProgress( + 'integration', + 83, + `Integrated ${integratedPatchCount}/${integrablePatchCount} patches`, + ); + } + } const carryForwardResult = contextReport && this.deps.contextCandidateCarryforward @@ -889,6 +1663,8 @@ export class IngestBundleRunner { : null; const dedupResult = contextReport && this.deps.candidateDedup ? await this.deps.candidateDedup.deduplicateRun(runRow.id) : null; + const preReconciliationSha = await sessionWorktree.git.revParseHead(); + activePhase = 'reconciliation'; // Stage 4 — reconciliation. Shares scoped wiki/SL with a fresh CaptureSession // so reconciliation writes land in the same worktree Stage 3 used. @@ -1029,6 +1805,7 @@ export class IngestBundleRunner { (eviction?.deletedRawPaths.length ?? 0) > 0 || hasCandidateReconcileWork; if (hasReconcileWork || overrideReport) { + emitStageProgress('reconciliation', 84, 'Reconciling results'); await stage4?.updateProgress(0.0, 'Reconciling results'); } @@ -1077,6 +1854,12 @@ export class IngestBundleRunner { getReconciliationActions: () => reconcileActions, onStepFinish: stage4 ? ({ passNumber, stepIndex, stepBudget }) => { + emitStageProgress( + 'reconciliation', + 85, + `Reconciling results: pass ${passNumber} step ${stepIndex}/${stepBudget}`, + { transient: true }, + ); void stage4.updateProgress( stepIndex / stepBudget, `Reconciling results · pass ${passNumber} step ${stepIndex}`, @@ -1130,11 +1913,15 @@ export class IngestBundleRunner { force: !!overrideReport, onStepFinish: stage4 ? ({ stepIndex, stepBudget }) => { + emitStageProgress('reconciliation', 85, `Reconciling results: step ${stepIndex}/${stepBudget}`, { + transient: true, + }); void stage4.updateProgress(stepIndex / stepBudget, `Reconciling results · step ${stepIndex}`); } : undefined, }); } + latestReconciliationSkipped = reconcileOutcome.skipped; const danglingReconcileWikiRefs = await findDanglingWikiRefsForActions({ wikiService: rcScopedWiki, @@ -1156,23 +1943,40 @@ export class IngestBundleRunner { conflictCount: stageIndex.conflictsResolved.length, fallbackCount: stageIndex.unmappedFallbacks.length, }); + await runTrace.event('debug', 'reconciliation', 'reconciliation_finished', { + skipped: reconcileOutcome.skipped, + stopReason: reconcileOutcome.stopReason ?? null, + actionCount: reconcileActions.length, + conflictCount: stageIndex.conflictsResolved.length, + fallbackCount: stageIndex.unmappedFallbacks.length, + artifactResolutionCount: stageIndex.artifactResolutions?.length ?? 0, + }); await stage4?.updateProgress(1.0, reconcileOutcome.skipped ? 'No reconciliation needed' : 'Reconciled'); const postProcessor = this.deps.postProcessors?.[job.sourceKey]; + activePhase = 'post_processor'; if (postProcessor) { const stagePostProcessor = ctx?.startPhase(0.04); + emitStageProgress('post_processor', 87, 'Running deterministic imports'); await stagePostProcessor?.updateProgress(0.0, 'Running deterministic imports'); try { - const result = await postProcessor.run({ - connectionId: job.connectionId, - sourceKey: job.sourceKey, - syncId, - jobId: job.jobId, - runId: runRow.id, - workdir: sessionWorktree.workdir, - parseArtifacts, - }); + const result = await traceTimed( + runTrace, + 'post_processor', + 'post_processor', + { sourceKey: job.sourceKey }, + () => + postProcessor.run({ + connectionId: job.connectionId, + sourceKey: job.sourceKey, + syncId, + jobId: job.jobId, + runId: createdRunRow.id, + workdir: sessionWorktree.workdir, + parseArtifacts, + }), + ); postProcessorOutcome = { sourceKey: job.sourceKey, status: result.errors.length > 0 && result.touchedSources.length === 0 ? 'failed' : 'success', @@ -1181,6 +1985,7 @@ export class IngestBundleRunner { warnings: result.warnings, touchedSources: result.touchedSources, }; + emitStageProgress('post_processor', 88, 'Deterministic imports complete'); await stagePostProcessor?.updateProgress(1.0, 'Deterministic imports complete'); } catch (error) { postProcessorOutcome = { @@ -1194,6 +1999,12 @@ export class IngestBundleRunner { throw error; } } + await runTrace.event('debug', 'post_processor', 'post_processor_finished', { + sourceKey: job.sourceKey, + status: postProcessorOutcome?.status ?? 'skipped', + touchedSources: postProcessorOutcome?.touchedSources ?? [], + warnings: postProcessorOutcome?.warnings ?? [], + }); const repairConnectionIds = [ ...new Set([ @@ -1201,15 +2012,266 @@ export class IngestBundleRunner { ...(postProcessorOutcome?.touchedSources ?? []).map((source) => source.connectionId), ]), ].sort(); - wikiSlRefRepairResult = await repairWikiSlRefs({ - wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), - semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), - configService: sessionWorktree.config, - connectionIds: repairConnectionIds, + activePhase = 'wiki_sl_ref_repair'; + emitStageProgress('wiki_sl_ref_repair', 88, 'Repairing wiki semantic-layer references'); + wikiSlRefRepairResult = await traceTimed( + runTrace, + 'wiki_sl_ref_repair', + 'wiki_sl_refs_repair', + { connectionIds: repairConnectionIds }, + () => + repairWikiSlRefs({ + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + configService: sessionWorktree.config, + connectionIds: repairConnectionIds, + }), + ); + await runTrace.event('debug', 'wiki_sl_ref_repair', 'wiki_sl_refs_repaired', { + repairCount: wikiSlRefRepairResult.repairs.length, + repairs: wikiSlRefRepairResult.repairs, + warnings: wikiSlRefRepairResult.warnings, }); + emitStageProgress('wiki_sl_ref_repair', 88, 'Checked wiki semantic-layer references'); + const postReconciliationSha = await sessionWorktree.git.revParseHead(); + const postReconciliationPaths = + preReconciliationSha && postReconciliationSha && preReconciliationSha !== postReconciliationSha + ? (await sessionWorktree.git.diffNameStatus(preReconciliationSha, postReconciliationSha)).map((entry) => entry.path) + : []; + const baseFinalChangedWikiPageKeys = this.uniqueWikiPageKeys([ + ...(isolatedDiffEnabled ? projectionChangedWikiPageKeys : []), + ...workUnitOutcomes + .flatMap((outcome) => outcome.patchTouchedPaths ?? []) + .flatMap((path) => this.wikiPageKeysFromPaths([path])), + ...this.wikiPageKeysFromActions(reconcileActions), + ...postReconciliationPaths.flatMap((path) => this.wikiPageKeysFromPaths([path])), + ...wikiSlRefRepairResult.repairs.filter((repair) => repair.scope === 'GLOBAL').map((repair) => repair.pageKey), + ]); + const finalTouchedSlSources = this.uniqueTouchedSlSources([ + ...(isolatedDiffEnabled ? projectionTouchedSources : []), + ...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources), + ...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId), + ...this.touchedSlSourcesFromPaths(postReconciliationPaths), + ...(postProcessorOutcome?.touchedSources ?? []), + ]); + const finalWikiGateScope = await this.wikiPageKeysForFinalGates({ + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + changedWikiPageKeys: baseFinalChangedWikiPageKeys, + touchedSlSources: finalTouchedSlSources, + actions: [...stageIndex.workUnits.flatMap((wu) => wu.actions), ...reconcileActions], + }); + const finalChangedWikiPageKeys = finalWikiGateScope.pageKeys; + + const finalTargetPolicyPaths = [ + ...projectionTouchedPaths, + ...workUnitOutcomes.flatMap((outcome) => outcome.patchTouchedPaths ?? []), + ...postReconciliationPaths, + ...(postProcessorOutcome?.touchedSources ?? []).map( + (source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`, + ), + ]; + const targetPolicyTraceData = { + allowedTargetConnectionIds: slConnectionIds, + touchedPaths: [...new Set(finalTargetPolicyPaths)].sort(), + }; + activePhase = 'target_policy'; + activeFailureDetails = targetPolicyTraceData; + emitStageProgress('final_gates', 88, 'Checking semantic-layer target policy'); + await traceTimed(runTrace, 'target_policy', 'semantic_layer_target_policy', targetPolicyTraceData, async () => { + assertSemanticLayerTargetPathsAllowed({ + paths: finalTargetPolicyPaths, + allowedConnectionIds: new Set(slConnectionIds), + }); + }); + activeFailureDetails = undefined; + + const finalArtifactGateTraceData = { + changedWikiPageKeys: finalChangedWikiPageKeys, + wikiReferenceGateScope: finalWikiGateScope.trace, + touchedSlSources: finalTouchedSlSources, + projectionTouchedPaths, + workUnitPatchTouchedPaths: workUnitOutcomes.flatMap((outcome) => outcome.patchTouchedPaths ?? []), + actionOrigins: this.finalGateActionOrigins({ + stageIndex, + reconcileActions, + fallbackConnectionId: job.connectionId, + }), + preReconciliationSha, + postReconciliationSha, + postReconciliationPaths, + reconciliationActionCount: reconcileActions.length, + wikiSlRefRepairCount: wikiSlRefRepairResult.repairs.length, + }; + activePhase = 'final_gates'; + activeFailureDetails = finalArtifactGateTraceData; + emitStageProgress('final_gates', 89, 'Running final artifact gates'); + try { + await traceTimed( + runTrace, + 'final_gates', + 'final_artifact_gates', + finalArtifactGateTraceData, + async () => { + await validateFinalIngestArtifacts({ + connectionIds: repairConnectionIds, + changedWikiPageKeys: finalChangedWikiPageKeys, + touchedSlSources: finalTouchedSlSources, + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + validateTouchedSources: (touched) => + validateWuTouchedSources( + { + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + connections: this.deps.connections, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + probeRowCount: this.deps.settings.probeRowCount, + slValidator: this.deps.slValidator, + }, + touched, + ), + tableExists: (connectionId, tableRef) => + this.tableRefExistsInSemanticLayer( + this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + [connectionId], + tableRef, + ), + }); + }, + ); + } catch (error) { + const gateError = this.errorMessage(error); + const repairPaths = finalGateRepairPaths({ + changedWikiPageKeys: finalChangedWikiPageKeys, + touchedSlSources: finalTouchedSlSources, + }); + emitStageProgress('final_gates', 89, 'Repairing final artifact gates'); + const gateRepair = await repairFinalGateFailure({ + agentRunner: this.deps.agentRunner, + workdir: sessionWorktree.workdir, + gateError, + allowedPaths: repairPaths, + trace: runTrace, + repairKind: 'final_artifact_gate', + maxAttempts: 1, + stepBudget: 16, + }); + + isolatedDiffSummary.gateRepairAttempts += gateRepair.attempts; + if (gateRepair.status === 'failed') { + isolatedDiffSummary.gateRepairFailures += 1; + activeFailureDetails = { + ...finalArtifactGateTraceData, + gateRepair, + gateError, + }; + throw new Error(`${gateError}\ngate repair failed: ${gateRepair.reason}`); + } + + isolatedDiffSummary.gateRepairs += 1; + await traceTimed( + runTrace, + 'final_gates', + 'final_artifact_gates_after_gate_repair', + { + ...finalArtifactGateTraceData, + repairedPaths: gateRepair.changedPaths, + }, + async () => { + await validateFinalIngestArtifacts({ + connectionIds: repairConnectionIds, + changedWikiPageKeys: finalChangedWikiPageKeys, + touchedSlSources: finalTouchedSlSources, + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + validateTouchedSources: (touched) => + validateWuTouchedSources( + { + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + connections: this.deps.connections, + configService: sessionWorktree.config, + gitService: sessionWorktree.git, + slSourcesRepository: this.deps.slSourcesRepository, + probeRowCount: this.deps.settings.probeRowCount, + slValidator: this.deps.slValidator, + }, + touched, + ), + tableExists: (connectionId, tableRef) => + this.tableRefExistsInSemanticLayer( + this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + [connectionId], + tableRef, + ), + }); + }, + ); + + const repairCommit = await sessionWorktree.git.commitFiles( + gateRepair.changedPaths, + `ingest(${job.sourceKey}): repair final gates syncId=${syncId}`, + this.deps.storage.systemGitAuthor.name, + this.deps.storage.systemGitAuthor.email, + ); + if (!repairCommit.created) { + isolatedDiffSummary.gateRepairFailures += 1; + throw new Error('final gate repair produced no committable changes'); + } + await runTrace.event('debug', 'final_gates', 'final_gate_repair_committed', { + commitSha: repairCommit.commitHash, + repairedPaths: gateRepair.changedPaths, + }); + } + activeFailureDetails = undefined; + + activePhase = 'provenance_validation'; + emitStageProgress('provenance', 90, 'Validating provenance rows'); + latestReportWorkUnits = this.toReportWorkUnits(stageIndex); + latestReconciliationActions = reconcileActions; + latestConflictsResolved = stageIndex.conflictsResolved; + latestEvictionsApplied = stageIndex.evictionsApplied; + latestUnmappedFallbacks = stageIndex.unmappedFallbacks; + latestArtifactResolutions = stageIndex.artifactResolutions ?? []; + latestEvictionInputs = eviction?.deletedRawPaths ?? []; + latestUnresolvedCards = unresolvedCards ?? []; + const provenancePlan = this.buildProvenancePlan({ + job, + syncId, + currentHashes, + stageIndex, + reconcileActions, + }); + const provenanceRows = provenancePlan.rows; + const currentRawPaths = new Set(currentHashes.keys()); + const deletedRawPaths = new Set(eviction?.deletedRawPaths ?? []); + const provenanceValidationData = this.provenanceValidationTraceData({ + plan: provenancePlan, + currentRawPaths, + deletedRawPaths, + }); + const reportProvenanceRows = this.toReportProvenanceRows(provenanceRows); + latestReportProvenanceRows = reportProvenanceRows; + activeFailureDetails = provenanceValidationData; + await traceTimed( + runTrace, + 'provenance', + 'provenance_rows_validation', + provenanceValidationData, + async () => { + validateProvenanceRawPaths({ + rows: provenanceRows, + currentRawPaths, + deletedRawPaths, + }); + }, + ); + activeFailureDetails = undefined; // Stage 6 — squash commit + activePhase = 'squash'; const stage6 = ctx?.startPhase(0.04); + emitStageProgress('save', 91, 'Saving changes'); await stage6?.updateProgress(0.0, 'Saving changes'); try { await sessionWorktree.git.assertWorktreeClean(); @@ -1234,6 +2296,10 @@ export class IngestBundleRunner { throw new Error(`squash merge conflict: ${mergeResult.conflictPaths.join(', ')}`); } const commitSha = mergeResult.touchedPaths.length === 0 ? null : mergeResult.squashSha; + await runTrace.event('debug', 'squash', 'squash_finished', { + commitSha, + touchedPaths: mergeResult.touchedPaths, + }); const memoryFlowSavedActions = stageIndex.workUnits.flatMap((wu) => wu.actions).concat(reconcileActions); const postProcessorMemoryCounts = postProcessorSavedMemoryCounts(postProcessorOutcome); memoryFlow?.emit({ @@ -1275,83 +2341,14 @@ export class IngestBundleRunner { } const stage5 = ctx?.startPhase(0.04); + emitStageProgress('provenance', 95, 'Recording history'); await stage5?.updateProgress(0.0, 'Recording history'); + activePhase = 'provenance'; - // Provenance rows: per-artifact when the WU emitted actions, plus a `skipped` - // fallback for raw files that produced nothing so the next DiffSet still sees - // them. - const provenanceRows: Parameters[0] = []; - const actionToType = (a: MemoryAction): 'source_created' | 'measure_added' | 'wiki_written' => { - if (a.target === 'wiki') { - return 'wiki_written'; - } - // SL action: 'created' → source_created; 'updated' → measure_added (coarse-grained; - // action.detail preserves the finer distinction for the report body). - return a.type === 'created' ? 'source_created' : 'measure_added'; - }; - const producedPaths = new Set(); - const pushActionProvenance = (rawPath: string, action: MemoryAction): void => { - const hash = currentHashes.get(rawPath) ?? 'unknown'; - provenanceRows.push({ - connectionId: job.connectionId, - sourceKey: job.sourceKey, - syncId, - rawPath, - rawContentHash: hash, - artifactKind: action.target, - artifactKey: action.key, - targetConnectionId: action.target === 'sl' ? actionTargetConnectionId(action, job.connectionId) : null, - artifactContentHash: null, - actionType: actionToType(action), - }); - producedPaths.add(rawPath); - }; - for (const wu of stageIndex.workUnits) { - for (const action of wu.actions) { - for (const rawPath of rawPathsForAction(action, wu.rawFiles)) { - pushActionProvenance(rawPath, action); - } - } - } - for (const action of reconcileActions) { - for (const rawPath of action.rawPaths ?? []) { - pushActionProvenance(rawPath, action); - } - } - for (const resolution of stageIndex.artifactResolutions ?? []) { - const hash = currentHashes.get(resolution.rawPath) ?? 'unknown'; - provenanceRows.push({ - connectionId: job.connectionId, - sourceKey: job.sourceKey, - syncId, - rawPath: resolution.rawPath, - rawContentHash: hash, - artifactKind: resolution.artifactKind, - artifactKey: resolution.artifactKey, - targetConnectionId: null, - artifactContentHash: null, - actionType: resolution.actionType, - }); - producedPaths.add(resolution.rawPath); - } - for (const [rawPath, hash] of currentHashes) { - if (producedPaths.has(rawPath)) { - continue; - } - provenanceRows.push({ - connectionId: job.connectionId, - sourceKey: job.sourceKey, - syncId, - rawPath, - rawContentHash: hash, - artifactKind: null, - artifactKey: null, - targetConnectionId: null, - artifactContentHash: null, - actionType: 'skipped', - }); - } await this.deps.provenance.insertMany(provenanceRows); + await runTrace.event('debug', 'provenance', 'provenance_rows_inserted', { + rowCount: provenanceRows.length, + }); memoryFlow?.emit({ type: 'provenance_recorded', rowCount: provenanceRows.length }); await stage5?.updateProgress( 1.0, @@ -1359,17 +2356,10 @@ export class IngestBundleRunner { ); const stage7 = ctx?.startPhase(0.04); + emitStageProgress('report', 97, 'Wrapping up'); await stage7?.updateProgress(0.0, 'Wrapping up'); + activePhase = 'report'; - const reportProvenanceRows = provenanceRows.map( - ({ rawPath, artifactKind, artifactKey, actionType, targetConnectionId }) => ({ - rawPath, - artifactKind, - artifactKey, - targetConnectionId: targetConnectionId ?? null, - actionType, - }), - ); const reportToolTranscripts = Array.from(transcriptSummaries.values()).map((summary) => ({ unitKey: summary.unitKey, path: summary.path, @@ -1395,10 +2385,13 @@ export class IngestBundleRunner { : undefined; const reportBody = { + status: 'completed' as const, syncId, diffSummary, fetch: fetchReport ?? undefined, commitSha, + tracePath: runTrace.tracePath, + isolatedDiff: !overrideReport ? isolatedDiffSummary : undefined, workUnits: stageIndex.workUnits.map((wu) => ({ unitKey: wu.unitKey, rawFiles: wu.rawFiles, @@ -1477,6 +2470,11 @@ export class IngestBundleRunner { body: reportBody, }); const reportId = reportIdFromCreateResult(createdReport); + await runTrace.event('debug', 'report', 'success_report_created', { + reportId, + runId: runRow.id, + tracePath: runTrace.tracePath, + }); memoryFlow?.update({ ...(reportId ? { reportId, reportPath: reportId } : {}), }); @@ -1514,6 +2512,12 @@ export class IngestBundleRunner { }); } await stage7?.updateProgress(1.0, 'Done'); + await runTrace.event('info', 'run', 'ingest_finished', { + status: 'completed', + commitSha, + failedWorkUnits, + tracePath: runTrace.tracePath, + }); cleanupOutcome = 'success'; return { @@ -1528,5 +2532,80 @@ export class IngestBundleRunner { } finally { await this.deps.sessionWorktreeService.cleanup(sessionWorktree, cleanupOutcome); } + } catch (error) { + await activeTrace.event( + 'error', + 'run', + 'ingest_failed', + { + tracePath: activeTrace.tracePath, + phase: activePhase, + runId: runRow?.id ?? null, + syncId, + }, + error, + ); + if (runRow) { + await this.deps.runs.markFailed(runRow.id); + await this.deps.reports.create({ + runId: runRow.id, + jobId: job.jobId, + connectionId: job.connectionId, + sourceKey: job.sourceKey, + body: { + status: 'failed' as const, + syncId, + diffSummary: latestDiffSummary, + commitSha: null, + tracePath: activeTrace.tracePath, + isolatedDiff: latestIsolatedDiffSummary, + failure: { + phase: activePhase, + message: this.errorMessage(error), + ...(activeFailureDetails ? { details: activeFailureDetails } : {}), + }, + workUnits: + latestReportWorkUnits.length > 0 + ? latestReportWorkUnits + : latestWorkUnits.map((wu) => ({ + unitKey: wu.unitKey, + rawFiles: [], + status: wu.status, + reason: wu.reason, + actions: wu.actions, + touchedSlSources: wu.touchedSlSources, + slDisallowed: wu.slDisallowed, + slDisallowedReason: wu.slDisallowedReason, + })), + failedWorkUnits: latestFailedWorkUnits, + reconciliationSkipped: latestReconciliationSkipped, + conflictsResolved: latestConflictsResolved, + evictionsApplied: latestEvictionsApplied, + unmappedFallbacks: latestUnmappedFallbacks, + artifactResolutions: latestArtifactResolutions, + evictionInputs: latestEvictionInputs, + reconciliationActions: latestReconciliationActions, + evictionDecisions: [], + unresolvedCards: latestUnresolvedCards, + supersededBy: null, + overrideOf: null, + provenanceRows: latestReportProvenanceRows, + toolTranscripts: Array.from(transcriptSummaries.values()).map((summary) => ({ + unitKey: summary.unitKey, + path: summary.path, + toolCallCount: summary.toolCallCount, + errorCount: summary.errorCount, + toolNames: Array.from(summary.toolNames).sort(), + })), + }, + }); + await activeTrace.event('info', 'report', 'failure_report_created', { + runId: runRow.id, + jobId: job.jobId, + tracePath: activeTrace.tracePath, + }); + } + throw error; + } } } diff --git a/packages/context/src/ingest/ingest-trace.test.ts b/packages/context/src/ingest/ingest-trace.test.ts new file mode 100644 index 00000000..88b56a37 --- /dev/null +++ b/packages/context/src/ingest/ingest-trace.test.ts @@ -0,0 +1,85 @@ +import { mkdtemp, readFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { FileIngestTraceWriter, ingestTracePathForJob, traceTimed } from './ingest-trace.js'; + +describe('FileIngestTraceWriter', () => { + it('persists structured trace events as JSONL', async () => { + const root = await mkdtemp(join(tmpdir(), 'ktx-trace-')); + const tracePath = ingestTracePathForJob(root, 'job-1'); + const trace = new FileIngestTraceWriter({ + tracePath, + jobId: 'job-1', + connectionId: 'metabase-main', + sourceKey: 'metabase', + level: 'debug', + }); + + await trace.event('debug', 'snapshot', 'input_snapshot', { + baseSha: 'abc123', + rawFileCount: 2, + diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 }, + }); + + const lines = (await readFile(tracePath, 'utf-8')) + .trim() + .split('\n') + .map((line) => JSON.parse(line)); + expect(lines).toHaveLength(1); + expect(lines[0]).toMatchObject({ + schemaVersion: 1, + jobId: 'job-1', + connectionId: 'metabase-main', + sourceKey: 'metabase', + level: 'debug', + phase: 'snapshot', + event: 'input_snapshot', + data: { + baseSha: 'abc123', + rawFileCount: 2, + diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 }, + }, + }); + expect(typeof lines[0].at).toBe('string'); + }); + + it('records timing and error context for postmortem inspection', async () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-05-17T12:00:00.000Z')); + const root = await mkdtemp(join(tmpdir(), 'ktx-trace-')); + const tracePath = ingestTracePathForJob(root, 'job-2'); + const trace = new FileIngestTraceWriter({ + tracePath, + jobId: 'job-2', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + await expect( + traceTimed(trace, 'integration', 'apply_patch', { unitKey: 'wu-1' }, async () => { + vi.advanceTimersByTime(17); + throw new Error('patch conflict'); + }), + ).rejects.toThrow('patch conflict'); + + const lines = (await readFile(tracePath, 'utf-8')) + .trim() + .split('\n') + .map((line) => JSON.parse(line)); + expect(lines.map((line) => line.event)).toEqual(['apply_patch_started', 'apply_patch_failed']); + expect(lines[1]).toMatchObject({ + level: 'error', + phase: 'integration', + data: { unitKey: 'wu-1' }, + error: { name: 'Error', message: 'patch conflict' }, + }); + expect(lines[1].durationMs).toBe(17); + vi.useRealTimers(); + }); + + it('uses the documented trace path layout', () => { + expect(ingestTracePathForJob('/project/.ktx', 'job-3')).toBe('/project/.ktx/ingest-traces/job-3/trace.jsonl'); + }); +}); diff --git a/packages/context/src/ingest/ingest-trace.ts b/packages/context/src/ingest/ingest-trace.ts new file mode 100644 index 00000000..eed0cfd5 --- /dev/null +++ b/packages/context/src/ingest/ingest-trace.ts @@ -0,0 +1,158 @@ +import { appendFile, mkdir } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; + +export type IngestTraceLevel = 'info' | 'debug' | 'trace' | 'error'; + +const TRACE_LEVEL_RANK: Record = { + error: 0, + info: 1, + debug: 2, + trace: 3, +}; + +export interface IngestTraceContext { + tracePath: string; + jobId: string; + connectionId: string; + sourceKey: string; + runId?: string; + syncId?: string; + level?: IngestTraceLevel; +} + +export interface IngestTraceEvent { + schemaVersion: 1; + at: string; + level: IngestTraceLevel; + jobId: string; + connectionId: string; + sourceKey: string; + runId?: string; + syncId?: string; + phase: string; + event: string; + durationMs?: number; + data?: Record; + error?: { + name: string; + message: string; + stack?: string; + }; +} + +export interface IngestTraceWriter { + readonly tracePath: string; + readonly context: IngestTraceContext; + withContext(context: Partial>): IngestTraceWriter; + event( + level: IngestTraceLevel, + phase: string, + event: string, + data?: Record, + error?: unknown, + durationMs?: number, + ): Promise; +} + +export function ingestTracePathForJob(homeDir: string, jobId: string): string { + return join(homeDir, 'ingest-traces', jobId, 'trace.jsonl'); +} + +function serializeError(error: unknown): IngestTraceEvent['error'] | undefined { + if (error === undefined || error === null) { + return undefined; + } + if (error instanceof Error) { + return { + name: error.name, + message: error.message, + ...(error.stack ? { stack: error.stack } : {}), + }; + } + return { name: 'Error', message: String(error) }; +} + +function shouldWrite(configured: IngestTraceLevel, incoming: IngestTraceLevel): boolean { + return TRACE_LEVEL_RANK[incoming] <= TRACE_LEVEL_RANK[configured]; +} + +export class FileIngestTraceWriter implements IngestTraceWriter { + readonly tracePath: string; + readonly context: IngestTraceContext; + + constructor(context: IngestTraceContext) { + this.context = { ...context, level: context.level ?? 'debug' }; + this.tracePath = context.tracePath; + } + + withContext(context: Partial>): IngestTraceWriter { + return new FileIngestTraceWriter({ ...this.context, ...context, tracePath: this.tracePath }); + } + + async event( + level: IngestTraceLevel, + phase: string, + event: string, + data?: Record, + error?: unknown, + durationMs?: number, + ): Promise { + if (!shouldWrite(this.context.level ?? 'debug', level)) { + return; + } + const serializedError = serializeError(error); + const payload: IngestTraceEvent = { + schemaVersion: 1, + at: new Date().toISOString(), + level, + jobId: this.context.jobId, + connectionId: this.context.connectionId, + sourceKey: this.context.sourceKey, + ...(this.context.runId ? { runId: this.context.runId } : {}), + ...(this.context.syncId ? { syncId: this.context.syncId } : {}), + phase, + event, + ...(durationMs !== undefined ? { durationMs } : {}), + ...(data ? { data } : {}), + ...(serializedError ? { error: serializedError } : {}), + }; + await mkdir(dirname(this.tracePath), { recursive: true }); + await appendFile(this.tracePath, `${JSON.stringify(payload)}\n`, 'utf-8'); + } +} + +export class NoopIngestTraceWriter implements IngestTraceWriter { + readonly tracePath = ''; + readonly context: IngestTraceContext = { + tracePath: '', + jobId: '', + connectionId: '', + sourceKey: '', + level: 'error', + }; + + withContext(): IngestTraceWriter { + return this; + } + + async event(): Promise {} +} + +export async function traceTimed( + trace: IngestTraceWriter, + phase: string, + event: string, + data: Record, + fn: () => Promise, +): Promise { + await trace.event('debug', phase, `${event}_started`, data); + const started = Date.now(); + try { + const result = await fn(); + await trace.event('debug', phase, `${event}_finished`, data, undefined, Date.now() - started); + return result; + } catch (error) { + await trace.event('error', phase, `${event}_failed`, data, error, Date.now() - started); + throw error; + } +} diff --git a/packages/context/src/ingest/isolated-diff/git-patch.test.ts b/packages/context/src/ingest/isolated-diff/git-patch.test.ts new file mode 100644 index 00000000..2a48ce9b --- /dev/null +++ b/packages/context/src/ingest/isolated-diff/git-patch.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from 'vitest'; +import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths, textArtifactRoots } from './git-patch.js'; + +describe('isolated diff patch contract', () => { + it('parses touched paths from no-rename git patches', () => { + const patch = [ + 'diff --git a/wiki/global/a.md b/wiki/global/a.md', + 'index 1111111..2222222 100644', + '--- a/wiki/global/a.md', + '+++ b/wiki/global/a.md', + '@@ -1 +1 @@', + '-old', + '+new', + 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml', + 'new file mode 100644', + '--- /dev/null', + '+++ b/semantic-layer/c1/orders.yaml', + '@@ -0,0 +1 @@', + '+name: orders', + '', + ].join('\n'); + + expect(parsePatchTouchedPaths(patch)).toEqual([ + { + path: 'wiki/global/a.md', + oldPath: 'wiki/global/a.md', + newPath: 'wiki/global/a.md', + mode: '100644', + binary: false, + }, + { + path: 'semantic-layer/c1/orders.yaml', + oldPath: 'semantic-layer/c1/orders.yaml', + newPath: 'semantic-layer/c1/orders.yaml', + mode: '100644', + binary: false, + }, + ]); + }); + + it('rejects semantic-layer paths for slDisallowed work units', () => { + const patch = 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml\nindex 1..2 100644\n'; + + expect(() => + assertPatchAllowedForWorkUnit({ + unitKey: 'lookml-mismatch', + patch, + slDisallowed: true, + }), + ).toThrow(/slDisallowed WorkUnit lookml-mismatch touched semantic-layer\/c1\/orders.yaml/); + }); + + it('rejects semantic-layer paths outside allowed target connections', () => { + const patch = + 'diff --git a/semantic-layer/finance/orders.yaml b/semantic-layer/finance/orders.yaml\nindex 1..2 100644\n'; + + expect(() => + assertPatchAllowedForWorkUnit({ + unitKey: 'wu-finance', + patch, + slDisallowed: false, + allowedTargetConnectionIds: new Set(['warehouse']), + }), + ).toThrow( + /semantic-layer target connection not allowed: semantic-layer\/finance\/orders.yaml \(finance\); allowed: warehouse/, + ); + }); + + it('rejects executable and binary changes under known text artifact roots', () => { + expect(textArtifactRoots).toEqual(['wiki/', 'semantic-layer/']); + + const executablePatch = + 'diff --git a/wiki/global/a.md b/wiki/global/a.md\nold mode 100644\nnew mode 100755\nindex 1..2\n'; + expect(() => + assertPatchAllowedForWorkUnit({ + unitKey: 'wu-1', + patch: executablePatch, + slDisallowed: false, + }), + ).toThrow(/unexpected executable mode under wiki\/global\/a.md/); + + const binaryPatch = [ + 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml', + 'index 1111111..2222222 100644', + 'GIT binary patch', + 'literal 0', + '', + ].join('\n'); + expect(() => + assertPatchAllowedForWorkUnit({ + unitKey: 'wu-2', + patch: binaryPatch, + slDisallowed: false, + }), + ).toThrow(/unexpected binary patch under semantic-layer\/c1\/orders.yaml/); + }); +}); diff --git a/packages/context/src/ingest/isolated-diff/git-patch.ts b/packages/context/src/ingest/isolated-diff/git-patch.ts new file mode 100644 index 00000000..ee7f0020 --- /dev/null +++ b/packages/context/src/ingest/isolated-diff/git-patch.ts @@ -0,0 +1,101 @@ +import { assertSemanticLayerTargetPathsAllowed } from '../semantic-layer-target-policy.js'; + +export const textArtifactRoots = ['wiki/', 'semantic-layer/'] as const; + +export interface PatchTouchedPath { + path: string; + oldPath: string; + newPath: string; + mode: string | null; + binary: boolean; +} + +export interface PatchPolicyInput { + unitKey: string; + patch: string; + slDisallowed: boolean; + allowedTargetConnectionIds?: ReadonlySet; +} + +function stripPrefix(path: string): string { + return path.replace(/^[ab]\//, ''); +} + +function isTextArtifactPath(path: string): boolean { + return textArtifactRoots.some((root) => path.startsWith(root)); +} + +export function parsePatchTouchedPaths(patch: string): PatchTouchedPath[] { + const lines = patch.split('\n'); + const entries: PatchTouchedPath[] = []; + let current: PatchTouchedPath | null = null; + + const pushCurrent = () => { + if (current) { + entries.push(current); + } + }; + + for (const line of lines) { + const diffMatch = /^diff --git (.+) (.+)$/.exec(line); + if (diffMatch) { + pushCurrent(); + const oldPath = stripPrefix(diffMatch[1] ?? ''); + const newPath = stripPrefix(diffMatch[2] ?? ''); + current = { + path: newPath === '/dev/null' ? oldPath : newPath, + oldPath, + newPath, + mode: null, + binary: false, + }; + continue; + } + if (!current) { + continue; + } + const indexMode = /^index [0-9a-f]+\.\.[0-9a-f]+(?: ([0-7]{6}))?$/.exec(line); + if (indexMode?.[1]) { + current.mode = indexMode[1]; + } + const newMode = /^new mode ([0-7]{6})$/.exec(line); + if (newMode) { + current.mode = newMode[1] ?? current.mode; + } + const newFileMode = /^new file mode ([0-7]{6})$/.exec(line); + if (newFileMode) { + current.mode = newFileMode[1] ?? current.mode; + } + if (line === 'GIT binary patch' || line.startsWith('Binary files ')) { + current.binary = true; + } + } + + pushCurrent(); + return entries; +} + +export function assertPatchAllowedForWorkUnit(input: PatchPolicyInput): PatchTouchedPath[] { + const touched = parsePatchTouchedPaths(input.patch); + if (input.allowedTargetConnectionIds) { + assertSemanticLayerTargetPathsAllowed({ + paths: touched.map((entry) => entry.path), + allowedConnectionIds: input.allowedTargetConnectionIds, + }); + } + for (const entry of touched) { + if (input.slDisallowed && entry.path.startsWith('semantic-layer/')) { + throw new Error(`slDisallowed WorkUnit ${input.unitKey} touched ${entry.path}`); + } + if (!isTextArtifactPath(entry.path)) { + continue; + } + if (entry.binary) { + throw new Error(`unexpected binary patch under ${entry.path}`); + } + if (entry.mode && entry.mode !== '100644') { + throw new Error(`unexpected executable mode under ${entry.path}: ${entry.mode}`); + } + } + return touched; +} diff --git a/packages/context/src/ingest/isolated-diff/patch-integrator.test.ts b/packages/context/src/ingest/isolated-diff/patch-integrator.test.ts new file mode 100644 index 00000000..d55cfc5b --- /dev/null +++ b/packages/context/src/ingest/isolated-diff/patch-integrator.test.ts @@ -0,0 +1,404 @@ +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { GitService } from '../../core/index.js'; +import { FileIngestTraceWriter } from '../ingest-trace.js'; +import { integrateWorkUnitPatch } from './patch-integrator.js'; + +async function makeRepo() { + const homeDir = await mkdtemp(join(tmpdir(), 'ktx-integrate-')); + const configDir = join(homeDir, 'config'); + const git = new GitService({ + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'init', + bootstrapAuthor: 'system', + bootstrapAuthorEmail: 'system@example.com', + }, + }); + await git.onModuleInit(); + await mkdir(join(configDir, 'wiki/global'), { recursive: true }); + await writeFile(join(configDir, 'wiki/global/a.md'), 'old\n'); + await git.commitFiles(['wiki/global/a.md'], 'base', 'System User', 'system@example.com'); + return { homeDir, configDir, git, baseSha: await git.revParseHead() }; +} + +describe('integrateWorkUnitPatch', () => { + it('applies a clean patch, runs semantic gates, and commits accepted changes', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child'); + await git.addWorktree(childDir, 'child', baseSha); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'new\n'); + await childGit.commitFiles(['wiki/global/a.md'], 'edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/wu.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'), + jobId: 'job-1', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-1', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockResolvedValue(undefined), + slDisallowed: false, + allowedTargetConnectionIds: new Set(['c1']), + }); + + expect(result.status).toBe('accepted'); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('new\n'); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_apply_finished'); + }); + + it('rolls back and classifies semantic conflicts', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child-semantic'); + await git.addWorktree(childDir, 'child-semantic', baseSha); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'bad\n'); + await childGit.commitFiles(['wiki/global/a.md'], 'bad edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/bad.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-2/trace.jsonl'), + jobId: 'job-2', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-bad', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockRejectedValue(new Error('final artifact gates failed')), + slDisallowed: false, + allowedTargetConnectionIds: new Set(['c1']), + }); + + expect(result.status).toBe('semantic_conflict'); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('old\n'); + }); + + it('classifies slDisallowed patch policy failures as traced textual conflicts', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + await mkdir(join(configDir, 'semantic-layer/c1'), { recursive: true }); + await git.commitFiles(['semantic-layer/c1'], 'empty sl dir', 'System User', 'system@example.com'); + const childDir = join(homeDir, 'child-policy'); + await git.addWorktree(childDir, 'child-policy', baseSha); + const childGit = git.forWorktree(childDir); + await mkdir(join(childDir, 'semantic-layer/c1'), { recursive: true }); + await writeFile(join(childDir, 'semantic-layer/c1/orders.yaml'), 'name: orders\ncolumns: []\njoins: []\nmeasures: []\n'); + await childGit.commitFiles(['semantic-layer/c1/orders.yaml'], 'forbidden sl', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/forbidden.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-policy/trace.jsonl'), + jobId: 'job-policy', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'lookml-mismatch', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockResolvedValue(undefined), + slDisallowed: true, + allowedTargetConnectionIds: new Set(['c1']), + }); + + expect(result).toMatchObject({ + status: 'textual_conflict', + touchedPaths: ['semantic-layer/c1/orders.yaml'], + }); + const rawTrace = await readFile(trace.tracePath, 'utf-8'); + expect(rawTrace).toContain('patch_policy_rejected'); + expect(rawTrace).toContain('slDisallowed WorkUnit lookml-mismatch touched semantic-layer/c1/orders.yaml'); + }); + + it('classifies unauthorized semantic-layer targets as traced textual conflicts', async () => { + const { homeDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child-target-policy'); + await git.addWorktree(childDir, 'child-target-policy', baseSha); + const childGit = git.forWorktree(childDir); + await mkdir(join(childDir, 'semantic-layer/finance'), { recursive: true }); + await writeFile( + join(childDir, 'semantic-layer/finance/orders.yaml'), + 'name: orders\ncolumns: []\njoins: []\nmeasures: []\n', + ); + await childGit.commitFiles(['semantic-layer/finance/orders.yaml'], 'unauthorized sl', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/unauthorized.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-target-policy/trace.jsonl'), + jobId: 'job-target-policy', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-finance', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockResolvedValue(undefined), + slDisallowed: false, + allowedTargetConnectionIds: new Set(['warehouse']), + }); + + expect(result).toMatchObject({ + status: 'textual_conflict', + touchedPaths: ['semantic-layer/finance/orders.yaml'], + }); + const rawTrace = await readFile(trace.tracePath, 'utf-8'); + expect(rawTrace).toContain('patch_policy_rejected'); + expect(rawTrace).toContain('semantic-layer target connection not allowed'); + expect(rawTrace).toContain('allowedTargetConnectionIds'); + }); + + it('repairs a textual conflict through the bounded resolver and commits repaired files', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + await mkdir(join(configDir, 'wiki/global'), { recursive: true }); + await writeFile(join(configDir, 'wiki/global/a.md'), 'base\n', 'utf-8'); + await git.commitFiles(['wiki/global/a.md'], 'base page', 'System User', 'system@example.com'); + const conflictBase = await git.revParseHead(); + + await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\n', 'utf-8'); + await git.commitFiles(['wiki/global/a.md'], 'accepted edit', 'System User', 'system@example.com'); + + const childDir = join(homeDir, 'child-conflict'); + await git.addWorktree(childDir, 'child-conflict', conflictBase); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'proposal\n', 'utf-8'); + await childGit.commitFiles(['wiki/global/a.md'], 'proposal edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'proposal.patch'); + await childGit.writeBinaryNoRenamePatch(conflictBase, 'HEAD', patchPath); + + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-resolver/trace.jsonl'), + jobId: 'job-resolver', + connectionId: 'warehouse', + sourceKey: 'metabase', + level: 'trace', + }); + + const validateAppliedTree = vi.fn(async (paths: string[]) => { + expect(paths).toEqual(['wiki/global/a.md']); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\nproposal\n'); + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-conflict', + patchPath, + integrationGit: git, + trace, + author: { name: 'System User', email: 'system@example.com' }, + slDisallowed: false, + allowedTargetConnectionIds: new Set(['warehouse']), + validateAppliedTree, + resolveTextualConflict: vi.fn(async (context) => { + expect(context).toMatchObject({ + unitKey: 'wu-conflict', + patchPath, + touchedPaths: ['wiki/global/a.md'], + }); + await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\nproposal\n', 'utf-8'); + return { + status: 'repaired' as const, + attempts: 1, + changedPaths: ['wiki/global/a.md'], + }; + }), + }); + + expect(result).toMatchObject({ + status: 'accepted', + touchedPaths: ['wiki/global/a.md'], + textualResolution: { + status: 'repaired', + attempts: 1, + changedPaths: ['wiki/global/a.md'], + }, + }); + expect(validateAppliedTree).toHaveBeenCalledOnce(); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\nproposal\n'); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_accepted_after_textual_resolution'); + expect(await git.revParseHead()).not.toBe(baseSha); + }); + + it('keeps the pre-apply integration tree when the resolver cannot repair a textual conflict', async () => { + const { homeDir, configDir, git } = await makeRepo(); + await mkdir(join(configDir, 'wiki/global'), { recursive: true }); + await writeFile(join(configDir, 'wiki/global/a.md'), 'base\n', 'utf-8'); + await git.commitFiles(['wiki/global/a.md'], 'base page', 'System User', 'system@example.com'); + const conflictBase = await git.revParseHead(); + + await writeFile(join(configDir, 'wiki/global/a.md'), 'accepted\n', 'utf-8'); + await git.commitFiles(['wiki/global/a.md'], 'accepted edit', 'System User', 'system@example.com'); + const acceptedHead = await git.revParseHead(); + + const childDir = join(homeDir, 'child-conflict-fails'); + await git.addWorktree(childDir, 'child-conflict-fails', conflictBase); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'proposal\n', 'utf-8'); + await childGit.commitFiles(['wiki/global/a.md'], 'proposal edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'proposal-fails.patch'); + await childGit.writeBinaryNoRenamePatch(conflictBase, 'HEAD', patchPath); + + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-resolver-fails/trace.jsonl'), + jobId: 'job-resolver-fails', + connectionId: 'warehouse', + sourceKey: 'metabase', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-conflict', + patchPath, + integrationGit: git, + trace, + author: { name: 'System User', email: 'system@example.com' }, + slDisallowed: false, + allowedTargetConnectionIds: new Set(['warehouse']), + validateAppliedTree: vi.fn(async () => {}), + resolveTextualConflict: vi.fn(async () => ({ + status: 'failed' as const, + attempts: 1, + reason: 'resolver completed without editing an allowed path', + })), + }); + + expect(result).toMatchObject({ + status: 'textual_conflict', + textualResolution: { + status: 'failed', + attempts: 1, + reason: 'resolver completed without editing an allowed path', + }, + }); + expect(await git.revParseHead()).toBe(acceptedHead); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('accepted\n'); + }); + + it('repairs semantic gate failures after a patch applies cleanly', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child-semantic-repair'); + await git.addWorktree(childDir, 'child-semantic-repair', baseSha); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'bad semantic ref\n'); + await childGit.commitFiles(['wiki/global/a.md'], 'bad semantic edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/semantic-repair.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-repair/trace.jsonl'), + jobId: 'job-semantic-repair', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + const validateAppliedTree = vi + .fn() + .mockRejectedValueOnce(new Error('final artifact gates failed:\na: unknown semantic-layer entity')) + .mockResolvedValueOnce(undefined); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-repairable', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree, + slDisallowed: false, + allowedTargetConnectionIds: new Set(['c1']), + repairGateFailure: vi.fn(async (context) => { + expect(context).toMatchObject({ + unitKey: 'wu-repairable', + patchPath, + touchedPaths: ['wiki/global/a.md'], + }); + await writeFile(join(configDir, 'wiki/global/a.md'), 'repaired semantic ref\n', 'utf-8'); + return { + status: 'repaired' as const, + attempts: 1, + changedPaths: ['wiki/global/a.md'], + }; + }), + }); + + expect(result).toMatchObject({ + status: 'accepted', + touchedPaths: ['wiki/global/a.md'], + gateRepair: { + status: 'repaired', + attempts: 1, + changedPaths: ['wiki/global/a.md'], + }, + }); + expect(validateAppliedTree).toHaveBeenCalledTimes(2); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('repaired semantic ref\n'); + await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('patch_accepted_after_gate_repair'); + }); + + it('keeps the pre-apply tree when semantic gate repair fails', async () => { + const { homeDir, configDir, git, baseSha } = await makeRepo(); + const childDir = join(homeDir, 'child-semantic-repair-fails'); + await git.addWorktree(childDir, 'child-semantic-repair-fails', baseSha); + const childGit = git.forWorktree(childDir); + await writeFile(join(childDir, 'wiki/global/a.md'), 'bad semantic ref\n'); + await childGit.commitFiles(['wiki/global/a.md'], 'bad semantic edit', 'System User', 'system@example.com'); + const patchPath = join(homeDir, 'patches/semantic-repair-fails.patch'); + await childGit.writeBinaryNoRenamePatch(baseSha, 'HEAD', patchPath); + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-semantic-repair-fails/trace.jsonl'), + jobId: 'job-semantic-repair-fails', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await integrateWorkUnitPatch({ + unitKey: 'wu-not-repaired', + patchPath, + integrationGit: git, + trace, + author: { name: 'KTX Test', email: 'system@ktx.local' }, + validateAppliedTree: vi.fn().mockRejectedValue(new Error('final artifact gates failed')), + slDisallowed: false, + allowedTargetConnectionIds: new Set(['c1']), + repairGateFailure: vi.fn(async () => ({ + status: 'failed' as const, + attempts: 1, + reason: 'gate repair completed without editing an allowed path', + })), + }); + + expect(result).toMatchObject({ + status: 'semantic_conflict', + gateRepair: { + status: 'failed', + attempts: 1, + reason: 'gate repair completed without editing an allowed path', + }, + }); + await expect(readFile(join(configDir, 'wiki/global/a.md'), 'utf-8')).resolves.toBe('old\n'); + }); +}); diff --git a/packages/context/src/ingest/isolated-diff/patch-integrator.ts b/packages/context/src/ingest/isolated-diff/patch-integrator.ts new file mode 100644 index 00000000..a4542576 --- /dev/null +++ b/packages/context/src/ingest/isolated-diff/patch-integrator.ts @@ -0,0 +1,321 @@ +import { readFile } from 'node:fs/promises'; +import type { GitService } from '../../core/index.js'; +import type { FinalGateRepairResult } from '../final-gate-repair.js'; +import type { IngestTraceWriter } from '../ingest-trace.js'; +import { traceTimed } from '../ingest-trace.js'; +import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths } from './git-patch.js'; +import type { TextualConflictResolutionResult } from './textual-conflict-resolver.js'; + +export type PatchIntegrationTextualResolution = + | { status: 'repaired'; attempts: number; changedPaths: string[] } + | { status: 'failed'; attempts: number; reason: string }; + +export type PatchIntegrationResult = + | { + status: 'accepted'; + commitSha: string; + touchedPaths: string[]; + textualResolution?: PatchIntegrationTextualResolution; + gateRepair?: FinalGateRepairResult; + } + | { + status: 'textual_conflict'; + reason: string; + touchedPaths: string[]; + textualResolution?: PatchIntegrationTextualResolution; + gateRepair?: FinalGateRepairResult; + } + | { + status: 'semantic_conflict'; + reason: string; + touchedPaths: string[]; + textualResolution?: PatchIntegrationTextualResolution; + gateRepair?: FinalGateRepairResult; + }; + +export interface IntegrateWorkUnitPatchInput { + unitKey: string; + patchPath: string; + integrationGit: GitService; + trace: IngestTraceWriter; + author: { name: string; email: string }; + slDisallowed: boolean; + allowedTargetConnectionIds: ReadonlySet; + validateAppliedTree(touchedPaths: string[]): Promise; + resolveTextualConflict?(input: { + unitKey: string; + patchPath: string; + touchedPaths: string[]; + reason: string; + }): Promise; + repairGateFailure?(input: { + unitKey: string; + patchPath: string; + touchedPaths: string[]; + reason: string; + }): Promise; +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +export async function integrateWorkUnitPatch(input: IntegrateWorkUnitPatchInput): Promise { + const preApplyHead = await input.integrationGit.revParseHead(); + const patch = await readFile(input.patchPath, 'utf-8'); + const touchedPaths = parsePatchTouchedPaths(patch).map((entry) => entry.path); + if (touchedPaths.length === 0) { + await input.trace.event('debug', 'integration', 'patch_noop_accepted', { + unitKey: input.unitKey, + patchPath: input.patchPath, + patchBytes: Buffer.byteLength(patch), + }); + return { status: 'accepted', commitSha: preApplyHead ?? '', touchedPaths }; + } + try { + assertPatchAllowedForWorkUnit({ + unitKey: input.unitKey, + patch, + slDisallowed: input.slDisallowed, + allowedTargetConnectionIds: input.allowedTargetConnectionIds, + }); + } catch (error) { + await input.trace.event('error', 'integration', 'patch_policy_rejected', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + allowedTargetConnectionIds: [...input.allowedTargetConnectionIds].sort(), + reason: errorMessage(error), + }); + return { + status: 'textual_conflict', + reason: errorMessage(error), + touchedPaths, + }; + } + + try { + await traceTimed( + input.trace, + 'integration', + 'patch_apply', + { unitKey: input.unitKey, patchPath: input.patchPath, touchedPaths }, + async () => { + await input.integrationGit.applyPatchFile3WayIndex(input.patchPath); + await input.integrationGit.assertWorktreeClean(); + }, + ); + } catch (error) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + const reason = errorMessage(error); + await input.trace.event('error', 'integration', 'patch_textual_conflict', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason, + }); + + if (!input.resolveTextualConflict) { + return { + status: 'textual_conflict', + reason, + touchedPaths, + }; + } + + const textualResolution = await input.resolveTextualConflict({ + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason, + }); + + if (textualResolution.status === 'failed') { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + return { + status: 'textual_conflict', + reason: textualResolution.reason, + touchedPaths, + textualResolution, + }; + } + + try { + await traceTimed( + input.trace, + 'integration', + 'semantic_gate_after_textual_resolution', + { unitKey: input.unitKey, touchedPaths: textualResolution.changedPaths }, + async () => { + await input.validateAppliedTree(textualResolution.changedPaths); + }, + ); + } catch (semanticError) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + await input.trace.event('error', 'integration', 'patch_semantic_conflict_after_textual_resolution', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths: textualResolution.changedPaths, + reason: errorMessage(semanticError), + }); + return { + status: 'semantic_conflict', + reason: errorMessage(semanticError), + touchedPaths: textualResolution.changedPaths, + textualResolution, + }; + } + + const commit = await input.integrationGit.commitFiles( + textualResolution.changedPaths, + `ingest: resolve WorkUnit ${input.unitKey} conflict`, + input.author.name, + input.author.email, + ); + if (!commit.created) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + const noChangeReason = 'textual resolver produced no committable changes'; + await input.trace.event('error', 'integration', 'textual_conflict_resolver_noop', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths: textualResolution.changedPaths, + }); + return { + status: 'textual_conflict', + reason: noChangeReason, + touchedPaths: textualResolution.changedPaths, + textualResolution, + }; + } + + await input.trace.event('debug', 'integration', 'patch_accepted_after_textual_resolution', { + unitKey: input.unitKey, + commitSha: commit.commitHash, + touchedPaths: textualResolution.changedPaths, + attempts: textualResolution.attempts, + }); + return { + status: 'accepted', + commitSha: commit.commitHash, + touchedPaths: textualResolution.changedPaths, + textualResolution, + }; + } + + try { + await traceTimed(input.trace, 'integration', 'semantic_gate', { unitKey: input.unitKey, touchedPaths }, async () => { + await input.validateAppliedTree(touchedPaths); + }); + } catch (error) { + const reason = errorMessage(error); + await input.trace.event('error', 'integration', 'patch_semantic_conflict', { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason, + }); + + if (input.repairGateFailure) { + const gateRepair = await input.repairGateFailure({ + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths, + reason, + }); + + if (gateRepair.status === 'failed') { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + return { + status: 'semantic_conflict', + reason: gateRepair.reason, + touchedPaths, + gateRepair, + }; + } + + try { + await traceTimed( + input.trace, + 'integration', + 'semantic_gate_after_gate_repair', + { unitKey: input.unitKey, touchedPaths: gateRepair.changedPaths }, + async () => { + await input.validateAppliedTree(gateRepair.changedPaths); + }, + ); + } catch (repairValidationError) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + return { + status: 'semantic_conflict', + reason: errorMessage(repairValidationError), + touchedPaths: gateRepair.changedPaths, + gateRepair, + }; + } + + const commit = await input.integrationGit.commitFiles( + gateRepair.changedPaths, + `ingest: repair WorkUnit ${input.unitKey} gates`, + input.author.name, + input.author.email, + ); + if (!commit.created) { + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + return { + status: 'semantic_conflict', + reason: 'gate repair produced no committable changes', + touchedPaths: gateRepair.changedPaths, + gateRepair, + }; + } + + await input.trace.event('debug', 'integration', 'patch_accepted_after_gate_repair', { + unitKey: input.unitKey, + commitSha: commit.commitHash, + touchedPaths: gateRepair.changedPaths, + attempts: gateRepair.attempts, + }); + return { + status: 'accepted', + commitSha: commit.commitHash, + touchedPaths: gateRepair.changedPaths, + gateRepair, + }; + } + + if (preApplyHead) { + await input.integrationGit.resetHardTo(preApplyHead); + } + return { + status: 'semantic_conflict', + reason, + touchedPaths, + }; + } + + const commit = await input.integrationGit.commitStaged( + `ingest: accept WorkUnit ${input.unitKey}`, + input.author.name, + input.author.email, + ); + await input.trace.event('debug', 'integration', 'patch_accepted', { + unitKey: input.unitKey, + commitSha: commit.commitHash, + touchedPaths, + }); + return { status: 'accepted', commitSha: commit.commitHash, touchedPaths }; +} diff --git a/packages/context/src/ingest/isolated-diff/textual-conflict-resolver.test.ts b/packages/context/src/ingest/isolated-diff/textual-conflict-resolver.test.ts new file mode 100644 index 00000000..ae5b4e21 --- /dev/null +++ b/packages/context/src/ingest/isolated-diff/textual-conflict-resolver.test.ts @@ -0,0 +1,120 @@ +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { FileIngestTraceWriter } from '../ingest-trace.js'; +import { resolveTextualConflict } from './textual-conflict-resolver.js'; + +async function makeHarness() { + const root = await mkdtemp(join(tmpdir(), 'ktx-textual-resolver-')); + const workdir = join(root, 'workdir'); + const patchPath = join(root, 'failed.patch'); + const trace = new FileIngestTraceWriter({ + tracePath: join(root, 'trace.jsonl'), + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + runId: 'run-1', + syncId: 'sync-1', + level: 'trace', + }); + await mkdir(join(workdir, 'wiki/global'), { recursive: true }); + await writeFile(join(workdir, 'wiki/global/account.md'), 'accepted line\n', 'utf-8'); + await writeFile( + patchPath, + [ + 'diff --git a/wiki/global/account.md b/wiki/global/account.md', + 'index 8877391..6f63f4d 100644', + '--- a/wiki/global/account.md', + '+++ b/wiki/global/account.md', + '@@ -1 +1 @@', + '-base line', + '+proposal line', + '', + ].join('\n'), + 'utf-8', + ); + return { root, workdir, patchPath, trace }; +} + +describe('resolveTextualConflict', () => { + it('lets the repair agent read the failed patch and write only touched paths', async () => { + const { workdir, patchPath, trace } = await makeHarness(); + const agentRunner = { + runLoop: vi.fn(async (params: any) => { + const current = await params.toolSet.read_integration_file.execute({ path: 'wiki/global/account.md' }); + expect(current.structured).toEqual({ path: 'wiki/global/account.md', exists: true }); + expect(current.markdown).toContain('accepted line'); + + const patch = await params.toolSet.read_failed_patch.execute({}); + expect(patch.markdown).toContain('proposal line'); + + await expect( + params.toolSet.write_integration_file.execute({ + path: 'wiki/global/not-allowed.md', + content: 'bad\n', + }), + ).rejects.toThrow(/resolver path not allowed/); + + await params.toolSet.write_integration_file.execute({ + path: 'wiki/global/account.md', + content: 'accepted line\nproposal line\n', + }); + return { stopReason: 'natural' as const }; + }), + }; + + const result = await resolveTextualConflict({ + agentRunner, + workdir, + unitKey: 'wu-a', + patchPath, + touchedPaths: ['wiki/global/account.md'], + trace, + reason: 'patch failed: wiki/global/account.md', + maxAttempts: 1, + stepBudget: 8, + }); + + expect(result).toEqual({ + status: 'repaired', + attempts: 1, + changedPaths: ['wiki/global/account.md'], + }); + await expect(readFile(join(workdir, 'wiki/global/account.md'), 'utf-8')).resolves.toBe( + 'accepted line\nproposal line\n', + ); + expect(agentRunner.runLoop).toHaveBeenCalledWith( + expect.objectContaining({ + modelRole: 'repair', + stepBudget: 8, + telemetryTags: expect.objectContaining({ + operationName: 'ingest-isolated-diff-textual-resolver', + jobId: 'job-1', + unitKey: 'wu-a', + }), + }), + ); + }); + + it('fails when the repair agent completes without editing any touched path', async () => { + const { workdir, patchPath, trace } = await makeHarness(); + const result = await resolveTextualConflict({ + agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) }, + workdir, + unitKey: 'wu-a', + patchPath, + touchedPaths: ['wiki/global/account.md'], + trace, + reason: 'patch failed: wiki/global/account.md', + maxAttempts: 1, + stepBudget: 8, + }); + + expect(result).toEqual({ + status: 'failed', + attempts: 1, + reason: 'resolver completed without editing an allowed path', + }); + }); +}); diff --git a/packages/context/src/ingest/isolated-diff/textual-conflict-resolver.ts b/packages/context/src/ingest/isolated-diff/textual-conflict-resolver.ts new file mode 100644 index 00000000..c5128291 --- /dev/null +++ b/packages/context/src/ingest/isolated-diff/textual-conflict-resolver.ts @@ -0,0 +1,238 @@ +import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'; +import { dirname, join } from 'node:path'; +import { z } from 'zod'; +import type { AgentRunnerPort, KtxRuntimeToolSet } from '../../llm/index.js'; +import type { IngestTraceWriter } from '../ingest-trace.js'; +import { traceTimed } from '../ingest-trace.js'; + +export type TextualConflictResolutionResult = + | { status: 'repaired'; attempts: number; changedPaths: string[] } + | { status: 'failed'; attempts: number; reason: string }; + +export interface ResolveTextualConflictInput { + agentRunner: AgentRunnerPort; + workdir: string; + unitKey: string; + patchPath: string; + touchedPaths: string[]; + trace: IngestTraceWriter; + reason: string; + maxAttempts?: number; + stepBudget?: number; +} + +const readIntegrationFileSchema = z.object({ + path: z.string().min(1), +}); + +const writeIntegrationFileSchema = z.object({ + path: z.string().min(1), + content: z.string(), +}); + +const deleteIntegrationFileSchema = z.object({ + path: z.string().min(1), +}); + +function normalizeRepoPath(path: string): string { + const normalized = path.replace(/\\/g, '/').replace(/^\/+/, ''); + const parts = normalized.split('/').filter((part) => part.length > 0); + if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) { + throw new Error(`resolver path must be a repository-relative path: ${path}`); + } + return parts.join('/'); +} + +function assertAllowedPath(path: string, allowedPaths: ReadonlySet): string { + const normalized = normalizeRepoPath(path); + if (!allowedPaths.has(normalized)) { + throw new Error(`resolver path not allowed: ${normalized}`); + } + return normalized; +} + +async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> { + try { + return { exists: true, content: await readFile(path, 'utf-8') }; + } catch (error) { + if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') { + return { exists: false, content: '' }; + } + throw error; + } +} + +function buildResolverSystemPrompt(): string { + return ` +You repair one failed KTX isolated-diff patch inside the integration worktree. + + + +- Preserve accepted integration content that is unrelated to the failed patch. +- Incorporate the failed patch only when the patch evidence is compatible with the current file. +- Edit only paths exposed by the resolver tools. +- Prefer the smallest text edit that makes the composed artifact coherent. +- Do not create new facts that are absent from the current file or failed patch. +- Stop after writing the repaired file content. +`; +} + +function buildResolverUserPrompt(input: { + unitKey: string; + patchPath: string; + touchedPaths: string[]; + reason: string; + attempt: number; + maxAttempts: number; +}): string { + return `Repair isolated-diff textual conflict. + +WorkUnit: ${input.unitKey} +Attempt: ${input.attempt} of ${input.maxAttempts} +Patch path: ${input.patchPath} +Touched paths: +${input.touchedPaths.map((path) => `- ${path}`).join('\n')} + +Git apply failure: +${input.reason} + +Use read_failed_patch first. Then read the touched integration files, write the +repaired content, and stop.`; +} + +function buildToolSet(input: { + workdir: string; + patchPath: string; + allowedPaths: ReadonlySet; + editedPaths: Set; +}): KtxRuntimeToolSet { + return { + read_failed_patch: { + name: 'read_failed_patch', + description: 'Read the failed Git patch that could not be applied to the integration worktree.', + inputSchema: z.object({}), + execute: async () => { + const patch = await readFile(input.patchPath, 'utf-8'); + return { + markdown: patch, + structured: { patchPath: input.patchPath, bytes: Buffer.byteLength(patch) }, + }; + }, + }, + read_integration_file: { + name: 'read_integration_file', + description: 'Read one allowed file from the current integration worktree.', + inputSchema: readIntegrationFileSchema, + execute: async ({ path }: z.infer) => { + const normalized = assertAllowedPath(path, input.allowedPaths); + const file = await readOptionalFile(join(input.workdir, normalized)); + return { + markdown: file.exists ? file.content : `(missing file: ${normalized})`, + structured: { path: normalized, exists: file.exists }, + }; + }, + }, + write_integration_file: { + name: 'write_integration_file', + description: 'Replace one allowed integration worktree file with repaired text content.', + inputSchema: writeIntegrationFileSchema, + execute: async ({ path, content }: z.infer) => { + const normalized = assertAllowedPath(path, input.allowedPaths); + const fullPath = join(input.workdir, normalized); + await mkdir(dirname(fullPath), { recursive: true }); + await writeFile(fullPath, content, 'utf-8'); + input.editedPaths.add(normalized); + return { + markdown: `Wrote ${normalized}`, + structured: { path: normalized, bytes: Buffer.byteLength(content) }, + }; + }, + }, + delete_integration_file: { + name: 'delete_integration_file', + description: 'Delete one allowed integration worktree file when the failed patch proves the deletion is correct.', + inputSchema: deleteIntegrationFileSchema, + execute: async ({ path }: z.infer) => { + const normalized = assertAllowedPath(path, input.allowedPaths); + await rm(join(input.workdir, normalized), { force: true }); + input.editedPaths.add(normalized); + return { + markdown: `Deleted ${normalized}`, + structured: { path: normalized }, + }; + }, + }, + }; +} + +export async function resolveTextualConflict( + input: ResolveTextualConflictInput, +): Promise { + const allowedPaths = new Set(input.touchedPaths.map(normalizeRepoPath)); + const maxAttempts = input.maxAttempts ?? 1; + const stepBudget = input.stepBudget ?? 12; + let lastFailure = 'resolver did not run'; + + for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { + const editedPaths = new Set(); + const traceData = { + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths: [...allowedPaths].sort(), + attempt, + maxAttempts, + reason: input.reason, + }; + const result = await traceTimed(input.trace, 'resolver', 'textual_conflict_resolver', traceData, async () => + input.agentRunner.runLoop({ + modelRole: 'repair', + systemPrompt: buildResolverSystemPrompt(), + userPrompt: buildResolverUserPrompt({ + unitKey: input.unitKey, + patchPath: input.patchPath, + touchedPaths: [...allowedPaths].sort(), + reason: input.reason, + attempt, + maxAttempts, + }), + toolSet: buildToolSet({ + workdir: input.workdir, + patchPath: input.patchPath, + allowedPaths, + editedPaths, + }), + stepBudget, + telemetryTags: { + operationName: 'ingest-isolated-diff-textual-resolver', + source: input.trace.context.sourceKey, + jobId: input.trace.context.jobId, + unitKey: input.unitKey, + }, + }), + ); + + if (result.stopReason === 'error') { + lastFailure = result.error?.message ?? 'resolver agent loop errored'; + await input.trace.event('error', 'resolver', 'textual_conflict_resolver_failed', traceData, result.error); + continue; + } + + const changedPaths = [...editedPaths].sort(); + if (changedPaths.length === 0) { + lastFailure = 'resolver completed without editing an allowed path'; + await input.trace.event('error', 'resolver', 'textual_conflict_resolver_failed', { + ...traceData, + reason: lastFailure, + }); + continue; + } + + await input.trace.event('debug', 'resolver', 'textual_conflict_resolver_repaired', { + ...traceData, + changedPaths, + }); + return { status: 'repaired', attempts: attempt, changedPaths }; + } + + return { status: 'failed', attempts: maxAttempts, reason: lastFailure }; +} diff --git a/packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts b/packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts new file mode 100644 index 00000000..6f8a7599 --- /dev/null +++ b/packages/context/src/ingest/isolated-diff/work-unit-executor.test.ts @@ -0,0 +1,144 @@ +import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it, vi } from 'vitest'; +import { GitService } from '../../core/index.js'; +import { FileIngestTraceWriter } from '../ingest-trace.js'; +import { runIsolatedWorkUnit } from './work-unit-executor.js'; + +async function makeGit() { + const homeDir = await mkdtemp(join(tmpdir(), 'ktx-isolated-wu-')); + const configDir = join(homeDir, 'config'); + const git = new GitService({ + storage: { configDir, homeDir }, + git: { + userName: 'System User', + userEmail: 'system@example.com', + bootstrapMessage: 'init', + bootstrapAuthor: 'system', + bootstrapAuthorEmail: 'system@example.com', + }, + }); + await git.onModuleInit(); + await mkdir(join(configDir, 'raw-sources/c1/fake/s'), { recursive: true }); + await writeFile(join(configDir, 'raw-sources/c1/fake/s/a.json'), '{}\n'); + await git.commitFiles(['raw-sources/c1/fake/s/a.json'], 'raw snapshot', 'System User', 'system@example.com'); + return { homeDir, configDir, git, baseSha: await git.revParseHead() }; +} + +describe('runIsolatedWorkUnit', () => { + it('creates a child worktree at the ingestion base and persists a patch proposal', async () => { + const { homeDir, git, baseSha } = await makeGit(); + const childDir = join(homeDir, '.worktrees/session-job-1-wu-1'); + const sessionWorktreeService = { + create: vi.fn(async (_key: string, startSha: string) => { + await mkdir(join(homeDir, '.worktrees'), { recursive: true }); + await git.addWorktree(childDir, 'session/job-1-wu-1', startSha); + const childGit = git.forWorktree(childDir); + return { + chatId: 'job-1-wu-1', + workdir: childDir, + branch: 'session/job-1-wu-1', + baseSha: startSha, + createdAt: new Date(), + git: childGit, + config: {}, + }; + }), + cleanup: vi.fn(async () => undefined), + }; + const tracePath = join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'); + const trace = new FileIngestTraceWriter({ + tracePath, + jobId: 'job-1', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await runIsolatedWorkUnit({ + unitIndex: 0, + ingestionBaseSha: baseSha, + sessionWorktreeService: sessionWorktreeService as never, + patchDir: join(homeDir, '.ktx/ingest-patches/job-1'), + trace, + run: async (child) => { + await mkdir(join(child.workdir, 'wiki/global'), { recursive: true }); + await writeFile(join(child.workdir, 'wiki/global/a.md'), '---\nsummary: A\nusage_mode: auto\n---\n\nBody\n'); + await child.git.commitFiles(['wiki/global/a.md'], 'test: write wiki', 'KTX Test', 'system@ktx.local'); + return { + unitKey: 'wu-1', + status: 'success', + preSha: baseSha, + postSha: await child.git.revParseHead(), + actions: [{ target: 'wiki', type: 'created', key: 'a', detail: 'A' }], + touchedSlSources: [], + }; + }, + workUnit: { unitKey: 'wu-1', rawFiles: ['a.json'], peerFileIndex: [], dependencyPaths: [] }, + }); + + expect(sessionWorktreeService.create).toHaveBeenCalledWith('job-1-wu-1', baseSha); + expect(sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success'); + expect(result.status).toBe('success'); + if (result.status !== 'success') { + throw new Error('expected successful work unit'); + } + const patchPath = result.patchPath; + if (!patchPath) { + throw new Error('expected patch path'); + } + expect(patchPath).toContain('0000-wu-1.patch'); + await expect(readFile(patchPath, 'utf-8')).resolves.toContain('wiki/global/a.md'); + await expect(readFile(tracePath, 'utf-8')).resolves.toContain('work_unit_child_created'); + }); + + it('removes child worktrees after failed WorkUnit outcomes are traced', async () => { + const { homeDir, git, baseSha } = await makeGit(); + const childDir = join(homeDir, '.worktrees/session-job-1-wu-fail'); + const sessionWorktreeService = { + create: vi.fn(async (_key: string, startSha: string) => { + await mkdir(join(homeDir, '.worktrees'), { recursive: true }); + await git.addWorktree(childDir, 'session/job-1-wu-fail', startSha); + return { + chatId: 'job-1-wu-fail', + workdir: childDir, + branch: 'session/job-1-wu-fail', + baseSha: startSha, + createdAt: new Date(), + git: git.forWorktree(childDir), + config: {}, + }; + }), + cleanup: vi.fn(async () => undefined), + }; + const trace = new FileIngestTraceWriter({ + tracePath: join(homeDir, '.ktx/ingest-traces/job-1/trace.jsonl'), + jobId: 'job-1', + connectionId: 'c1', + sourceKey: 'fake', + level: 'trace', + }); + + const result = await runIsolatedWorkUnit({ + unitIndex: 0, + ingestionBaseSha: baseSha, + sessionWorktreeService: sessionWorktreeService as never, + patchDir: join(homeDir, '.ktx/ingest-patches/job-1'), + trace, + run: async () => ({ + unitKey: 'wu-fail', + status: 'failed', + reason: 'agent loop errored', + preSha: baseSha, + postSha: baseSha, + actions: [], + touchedSlSources: [], + }), + workUnit: { unitKey: 'wu-fail', rawFiles: ['a.json'], peerFileIndex: [], dependencyPaths: [] }, + }); + + expect(result.status).toBe('failed'); + expect(sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success'); + }); +}); diff --git a/packages/context/src/ingest/isolated-diff/work-unit-executor.ts b/packages/context/src/ingest/isolated-diff/work-unit-executor.ts new file mode 100644 index 00000000..ac013d5a --- /dev/null +++ b/packages/context/src/ingest/isolated-diff/work-unit-executor.ts @@ -0,0 +1,85 @@ +import { mkdir, readFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { SessionOutcome } from '../../core/index.js'; +import type { IngestSessionWorktree, IngestSessionWorktreePort } from '../ports.js'; +import type { WorkUnit } from '../types.js'; +import type { IngestTraceWriter } from '../ingest-trace.js'; +import type { WorkUnitOutcome } from '../stages/stage-3-work-units.js'; +import { parsePatchTouchedPaths } from './git-patch.js'; + +export interface RunIsolatedWorkUnitInput { + unitIndex: number; + ingestionBaseSha: string; + sessionWorktreeService: IngestSessionWorktreePort; + patchDir: string; + trace: IngestTraceWriter; + workUnit: WorkUnit; + run(child: IngestSessionWorktree): Promise; + afterSuccess?(child: IngestSessionWorktree): Promise; +} + +function patchFileName(unitIndex: number, unitKey: string): string { + const safeKey = unitKey.replace(/[^a-zA-Z0-9_.-]+/g, '-'); + return `${String(unitIndex).padStart(4, '0')}-${safeKey}.patch`; +} + +export async function runIsolatedWorkUnit(input: RunIsolatedWorkUnitInput): Promise { + const sessionKey = `${input.trace.context.jobId}-${input.workUnit.unitKey}`; + let cleanupOutcome: SessionOutcome = 'crash'; + const child = await input.sessionWorktreeService.create(sessionKey, input.ingestionBaseSha); + await input.trace.event('debug', 'work_unit', 'work_unit_child_created', { + unitKey: input.workUnit.unitKey, + unitIndex: input.unitIndex, + worktreePath: child.workdir, + baseSha: input.ingestionBaseSha, + }); + + try { + const outcome = await input.run(child); + if (outcome.status !== 'success') { + cleanupOutcome = 'success'; + await input.trace.event('error', 'work_unit', 'work_unit_failed_before_patch', { + unitKey: input.workUnit.unitKey, + reason: outcome.reason ?? 'unknown failure', + }); + return { ...outcome, childWorktreePath: child.workdir }; + } + + await input.afterSuccess?.(child); + await mkdir(input.patchDir, { recursive: true }); + const patchPath = join(input.patchDir, patchFileName(input.unitIndex, input.workUnit.unitKey)); + await child.git.writeBinaryNoRenamePatch(input.ingestionBaseSha, 'HEAD', patchPath); + const patch = await readFile(patchPath, 'utf-8'); + const touched = parsePatchTouchedPaths(patch); + cleanupOutcome = 'success'; + await input.trace.event('debug', 'work_unit', 'work_unit_patch_collected', { + unitKey: input.workUnit.unitKey, + patchPath, + touchedPaths: touched.map((entry) => entry.path), + patchBytes: Buffer.byteLength(patch), + }); + return { + ...outcome, + patchPath, + patchTouchedPaths: touched.map((entry) => entry.path), + childWorktreePath: child.workdir, + }; + } catch (error) { + await input.trace.event( + 'error', + 'work_unit', + 'work_unit_child_failed', + { unitKey: input.workUnit.unitKey, worktreePath: child.workdir }, + error, + ); + cleanupOutcome = 'success'; + throw error; + } finally { + await input.sessionWorktreeService.cleanup(child, cleanupOutcome); + await input.trace.event('trace', 'work_unit', 'work_unit_child_cleanup', { + unitKey: input.workUnit.unitKey, + outcome: cleanupOutcome, + worktreePath: child.workdir, + }); + } +} diff --git a/packages/context/src/ingest/local-bundle-ingest.test.ts b/packages/context/src/ingest/local-bundle-ingest.test.ts index 44f25f2c..873243f9 100644 --- a/packages/context/src/ingest/local-bundle-ingest.test.ts +++ b/packages/context/src/ingest/local-bundle-ingest.test.ts @@ -694,6 +694,14 @@ describe('canonical local ingest', () => { ], }, }); + expect(result.report.body.isolatedDiff).toMatchObject({ + enabled: true, + acceptedPatches: 0, + projectionSha: expect.any(String), + }); + + const projectedSourcePath = join(metricflowProject.projectDir, 'semantic-layer/warehouse/orders.yaml'); + await expect(readFile(projectedSourcePath, 'utf-8')).resolves.toContain('name: orders'); const stagedRawPath = join( metricflowProject.projectDir, diff --git a/packages/context/src/ingest/local-bundle-runtime.test.ts b/packages/context/src/ingest/local-bundle-runtime.test.ts index 71e08817..a8ec8c20 100644 --- a/packages/context/src/ingest/local-bundle-runtime.test.ts +++ b/packages/context/src/ingest/local-bundle-runtime.test.ts @@ -17,6 +17,24 @@ type RuntimeWithConnectionDeps = { }; }; +type RuntimeWithSlValidationDeps = { + deps: { + slValidator: { + validateSingleSource( + deps: unknown, + connectionId: string, + sourceName: string, + ): Promise<{ errors: string[]; warnings: string[] }>; + }; + }; +}; + +type RuntimeWithSettingsDeps = { + deps: { + settings: Record; + }; +}; + function testAgentRunner(): AgentRunnerPort { return { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' as const }) }; } @@ -144,6 +162,77 @@ describe('createLocalBundleIngestRuntime', () => { ]); }); + it('validates manifest-backed scan sources during local ingest gates', async () => { + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/public.yaml', + [ + 'tables:', + ' payments:', + ' table: public.payments', + ' columns:', + ' - name: payment_id', + ' type: string', + ' - name: amount', + ' type: number', + '', + ].join('\n'), + 'ktx', + 'ktx@example.com', + 'Add warehouse manifest', + ); + const agentRunner = testAgentRunner(); + + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + agentRunner, + }); + const deps = (runtime.runner as unknown as RuntimeWithSlValidationDeps).deps; + + await expect(deps.slValidator.validateSingleSource(deps, 'warehouse', 'payments')).resolves.toEqual({ + errors: [], + warnings: expect.any(Array), + }); + }); + + it('does not mask malformed direct overlays with manifest-backed fallback validation', async () => { + await project.fileStore.writeFile( + 'semantic-layer/warehouse/_schema/public.yaml', + [ + 'tables:', + ' payments:', + ' table: public.payments', + ' columns:', + ' - name: payment_id', + ' type: string', + '', + ].join('\n'), + 'ktx', + 'ktx@example.com', + 'Add warehouse manifest', + ); + await project.fileStore.writeFile( + 'semantic-layer/warehouse/payments.yaml', + ['name: payments', 'columns:', ' - [', ''].join('\n'), + 'ktx', + 'ktx@example.com', + 'Add malformed overlay', + ); + const agentRunner = testAgentRunner(); + + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + agentRunner, + }); + const deps = (runtime.runner as unknown as RuntimeWithSlValidationDeps).deps; + + await expect(deps.slValidator.validateSingleSource(deps, 'warehouse', 'payments')).resolves.toEqual({ + errors: [expect.stringContaining('invalid YAML')], + warnings: [], + }); + }); + it('passes project connection config to local ingest query executors', async () => { const agentRunner = testAgentRunner(); const queryExecutor = { @@ -175,6 +264,27 @@ describe('createLocalBundleIngestRuntime', () => { }); }); + it('defaults local bundle ingest to isolated diffs without a shared-worktree fallback setting', () => { + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + agentRunner: testAgentRunner(), + }); + + const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings; + const fallbackSettingKey = ['sharedWorktree', 'SourceKeys'].join(''); + + expect(settings).not.toHaveProperty(fallbackSettingKey); + expect(Object.keys(settings).sort()).toEqual([ + 'ingestTraceLevel', + 'memoryIngestionModel', + 'probeRowCount', + 'workUnitFailureMode', + 'workUnitMaxConcurrency', + 'workUnitStepBudget', + ]); + }); + it('accepts a debug LLM request file when constructing the default agent runner', async () => { await writeFile( join(project.projectDir, 'ktx.yaml'), diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index 4f52684e..f8af0696 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -24,7 +24,6 @@ import { type KtxConnectionInfo, type KtxQueryResult, SemanticLayerService, - type SemanticLayerSource, type SlConnectionCatalogPort, SlDiscoverTool, SlEditSourceTool, @@ -76,6 +75,7 @@ import { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evide import { HistoricSqlProjectionPostProcessor } from './adapters/historic-sql/post-processor.js'; import { ContextEvidenceIndexService, SqliteContextEvidenceStore } from './context-evidence/index.js'; import { DiffSetService } from './diff-set.service.js'; +import { ingestTracePathForJob, type IngestTraceLevel } from './ingest-trace.js'; import { IngestBundleRunner } from './ingest-bundle.runner.js'; import { PageTriageService } from './page-triage/index.js'; import { createWarehouseVerificationTools } from './tools/warehouse-verification/index.js'; @@ -96,6 +96,12 @@ const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url)); const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url)); const LOCAL_AUTHOR = { name: 'KTX Local', email: 'local@ktx.local' }; const LOCAL_SHAPE_WARNING = 'Local ingest validates semantic-layer YAML shape only.'; +const INGEST_TRACE_LEVELS = new Set(['error', 'info', 'debug', 'trace']); + +function ingestTraceLevelFromEnv(env: NodeJS.ProcessEnv = process.env): IngestTraceLevel { + const raw = env.KTX_INGEST_TRACE_LEVEL; + return raw && INGEST_TRACE_LEVELS.has(raw as IngestTraceLevel) ? (raw as IngestTraceLevel) : 'debug'; +} export interface CreateLocalBundleIngestRuntimeOptions { project: KtxLocalProject; @@ -151,6 +157,10 @@ class LocalIngestStorage implements IngestStoragePort { resolveTranscriptDir(jobId: string): string { return join(this.project.projectDir, '.ktx/ingest-transcripts', jobId); } + + resolveTracePath(jobId: string): string { + return ingestTracePathForJob(this.homeDir, jobId); + } } class LocalIngestLock implements IngestLockPort { @@ -237,22 +247,63 @@ class LocalSlPythonPort implements SlPythonPort { } class LocalShapeOnlySlValidator implements SlValidatorPort { + private validateParsedSource(sourceName: string, parsed: Record) { + const isOverlay = parsed.table == null && parsed.sql == null; + const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed); + return result.success + ? { errors: [], warnings: [LOCAL_SHAPE_WARNING] } + : { + errors: result.error.issues.map( + (issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`, + ), + warnings: [], + }; + } + + private async validateComposedSource( + deps: SlValidationDeps, + connectionId: string, + sourceName: string, + readError: unknown, + ) { + try { + const { sources, loadErrors } = await deps.semanticLayerService.loadAllSources(connectionId); + const source = sources.find((candidate) => candidate.name === sourceName); + if (source) { + return this.validateParsedSource(sourceName, source as unknown as Record); + } + const detail = + loadErrors.length > 0 + ? loadErrors.join('; ') + : readError instanceof Error + ? readError.message + : String(readError); + return { errors: [`${sourceName}: ${detail}`], warnings: [] }; + } catch (fallbackError) { + return { + errors: [`${sourceName}: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`], + warnings: [], + }; + } + } + async validateSingleSource(deps: SlValidationDeps, connectionId: string, sourceName: string) { + let content: string; try { const file = await deps.semanticLayerService.readSourceFile(connectionId, sourceName); - const parsed = YAML.parse(file.content) as SemanticLayerSource; - const isOverlay = parsed.table == null && parsed.sql == null; - const result = (isOverlay ? sourceOverlaySchema : sourceDefinitionSchema).safeParse(parsed); - return result.success - ? { errors: [], warnings: [LOCAL_SHAPE_WARNING] } - : { - errors: result.error.issues.map( - (issue) => `${sourceName}: ${issue.path.join('.') || 'source'} ${issue.message}`, - ), - warnings: [], - }; + content = file.content; } catch (error) { - return { errors: [`${sourceName}: ${error instanceof Error ? error.message : String(error)}`], warnings: [] }; + return this.validateComposedSource(deps, connectionId, sourceName, error); + } + + try { + const parsed = YAML.parse(content) as unknown as Record; + return this.validateParsedSource(sourceName, parsed); + } catch (error) { + return { + errors: [`${sourceName}: invalid YAML — ${error instanceof Error ? error.message : String(error)}`], + warnings: [], + }; } } } @@ -671,6 +722,7 @@ export function createLocalBundleIngestRuntime( workUnitMaxConcurrency: options.project.config.ingest.workUnits.maxConcurrency, workUnitStepBudget: options.project.config.ingest.workUnits.stepBudget, workUnitFailureMode: options.project.config.ingest.workUnits.failureMode, + ingestTraceLevel: ingestTraceLevelFromEnv(), }, skillsRegistry: new SkillsRegistryService({ skillsDir, logger }), promptService, diff --git a/packages/context/src/ingest/memory-flow/schema.test.ts b/packages/context/src/ingest/memory-flow/schema.test.ts index c54752f8..b8c70856 100644 --- a/packages/context/src/ingest/memory-flow/schema.test.ts +++ b/packages/context/src/ingest/memory-flow/schema.test.ts @@ -21,6 +21,7 @@ function snapshot(overrides: Partial = {}): MemoryFlowRep { type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 }, { type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 0 }, { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, + { type: 'stage_progress', stage: 'integration', percent: 80, message: 'Integrating 1/1 patches: orders' }, { type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 }, { type: 'work_unit_step', unitKey: 'orders', stepIndex: 1, stepBudget: 40 }, { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'wiki/orders.md' }, diff --git a/packages/context/src/ingest/memory-flow/schema.ts b/packages/context/src/ingest/memory-flow/schema.ts index 0e268f17..7f00cde3 100644 --- a/packages/context/src/ingest/memory-flow/schema.ts +++ b/packages/context/src/ingest/memory-flow/schema.ts @@ -53,6 +53,23 @@ export const memoryFlowEventSchema = z.discriminatedUnion('type', [ stage: z.enum(['source', 'chunks', 'workUnits', 'actions', 'gates', 'saved']), reason: z.string().min(1), }), + eventSchema({ + type: z.literal('stage_progress'), + stage: z.enum([ + 'source', + 'integration', + 'reconciliation', + 'post_processor', + 'wiki_sl_ref_repair', + 'final_gates', + 'save', + 'provenance', + 'report', + ]), + percent: z.number().min(0).max(100), + message: z.string().min(1), + transient: z.boolean().optional(), + }), eventSchema({ type: z.literal('work_unit_started'), unitKey: z.string().min(1), diff --git a/packages/context/src/ingest/memory-flow/types.ts b/packages/context/src/ingest/memory-flow/types.ts index 8a40ac04..df8dfff3 100644 --- a/packages/context/src/ingest/memory-flow/types.ts +++ b/packages/context/src/ingest/memory-flow/types.ts @@ -44,6 +44,22 @@ type MemoryFlowEventPayload = stage: MemoryFlowColumnId; reason: string; } + | { + type: 'stage_progress'; + stage: + | 'source' + | 'integration' + | 'reconciliation' + | 'post_processor' + | 'wiki_sl_ref_repair' + | 'final_gates' + | 'save' + | 'provenance' + | 'report'; + percent: number; + message: string; + transient?: boolean; + } | { type: 'work_unit_started'; unitKey: string; diff --git a/packages/context/src/ingest/ports.ts b/packages/context/src/ingest/ports.ts index 6f0e9f1e..32410cbc 100644 --- a/packages/context/src/ingest/ports.ts +++ b/packages/context/src/ingest/ports.ts @@ -16,6 +16,7 @@ import type { import type { ToolContext, ToolSession, TouchedSlSource } from '../tools/index.js'; import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js'; import type { CanonicalPin } from './canonical-pins.js'; +import type { IngestTraceLevel } from './ingest-trace.js'; import type { IngestReportSnapshot } from './reports.js'; import type { ReconcileCandidateForPrompt, @@ -142,6 +143,7 @@ export interface IngestSettingsPort { workUnitMaxConcurrency?: number; workUnitStepBudget?: number; workUnitFailureMode?: 'abort' | 'continue'; + ingestTraceLevel?: IngestTraceLevel; } export interface IngestGitAuthor { @@ -155,6 +157,7 @@ export interface IngestStoragePort { resolveUploadDir(uploadId: string): string; resolvePullDir(jobId: string): string; resolveTranscriptDir(jobId: string): string; + resolveTracePath(jobId: string): string; } export interface IngestCommitMessagePort { diff --git a/packages/context/src/ingest/report-snapshot.test.ts b/packages/context/src/ingest/report-snapshot.test.ts index bdf5b193..028c222c 100644 --- a/packages/context/src/ingest/report-snapshot.test.ts +++ b/packages/context/src/ingest/report-snapshot.test.ts @@ -206,6 +206,47 @@ describe('parseIngestReportSnapshot', () => { expect(snapshot.body.toolTranscripts).toEqual([]); }); + it('parses failed ingest reports with trace and failure details', () => { + const snapshot = parseIngestReportSnapshot({ + id: 'report-failed', + runId: 'run-failed', + jobId: 'job-failed', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-17T12:00:00.000Z', + body: { + status: 'failed', + syncId: 'sync-failed', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + tracePath: '/project/.ktx/ingest-traces/job-failed/trace.jsonl', + failure: { + phase: 'final_gates', + message: 'final artifact gates failed', + }, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }); + + expect(snapshot.body.status).toBe('failed'); + expect(snapshot.body.failure).toEqual({ + phase: 'final_gates', + message: 'final artifact gates failed', + }); + expect(snapshot.body.tracePath).toContain('trace.jsonl'); + }); + it('rejects malformed report snapshots with a concise message', () => { const report = validReportSnapshot(); report.body.workUnits[0] = { @@ -215,4 +256,93 @@ describe('parseIngestReportSnapshot', () => { expect(() => parseIngestReportSnapshot(report)).toThrow('Invalid ingest report snapshot'); }); + + it('parses isolated-diff textual resolver counters', () => { + const snapshot = parseIngestReportSnapshot({ + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-18T00:00:00.000Z', + body: { + status: 'completed', + syncId: 'sync-1', + diffSummary: { added: 0, modified: 1, deleted: 0, unchanged: 0 }, + commitSha: 'abc123', + isolatedDiff: { + enabled: true, + acceptedPatches: 2, + textualConflicts: 1, + semanticConflicts: 0, + resolverAttempts: 1, + resolverRepairs: 1, + resolverFailures: 0, + }, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + artifactResolutions: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }); + + expect(snapshot.body.isolatedDiff).toMatchObject({ + resolverAttempts: 1, + resolverRepairs: 1, + resolverFailures: 0, + }); + }); + + it('parses isolated-diff gate repair counters', () => { + const snapshot = parseIngestReportSnapshot({ + id: 'report-1', + runId: 'run-1', + jobId: 'job-1', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-18T00:00:00.000Z', + body: { + status: 'completed', + syncId: 'sync-1', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: 'abc123', + isolatedDiff: { + enabled: true, + acceptedPatches: 1, + textualConflicts: 0, + semanticConflicts: 1, + gateRepairAttempts: 1, + gateRepairs: 1, + gateRepairFailures: 0, + }, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: true, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + }, + }); + + expect(snapshot.body.isolatedDiff).toMatchObject({ + gateRepairAttempts: 1, + gateRepairs: 1, + gateRepairFailures: 0, + }); + }); }); diff --git a/packages/context/src/ingest/report-snapshot.ts b/packages/context/src/ingest/report-snapshot.ts index de377dd5..eef64b48 100644 --- a/packages/context/src/ingest/report-snapshot.ts +++ b/packages/context/src/ingest/report-snapshot.ts @@ -123,6 +123,12 @@ const sourceFetchReportSchema = z.object({ warnings: z.array(sourceFetchIssueSchema).default([]), }); +const ingestReportFailureSchema = z.object({ + phase: z.string().min(1), + message: z.string().min(1), + details: z.record(z.string(), z.unknown()).optional(), +}); + export const ingestReportSnapshotSchema = z .object({ id: z.string().min(1), @@ -133,10 +139,30 @@ export const ingestReportSnapshotSchema = z createdAt: z.string().min(1), body: z .object({ + status: z.enum(['completed', 'failed']).optional(), syncId: z.string().min(1), diffSummary: ingestDiffSummarySchema, fetch: sourceFetchReportSchema.optional(), commitSha: z.string().nullable(), + tracePath: z.string().optional(), + failure: ingestReportFailureSchema.optional(), + isolatedDiff: z + .object({ + enabled: z.boolean(), + integrationWorktreePath: z.string().optional(), + ingestionBaseSha: z.string().optional(), + projectionSha: z.string().nullable().optional(), + acceptedPatches: z.number().int().min(0), + textualConflicts: z.number().int().min(0), + semanticConflicts: z.number().int().min(0), + resolverAttempts: z.number().int().min(0).default(0), + resolverRepairs: z.number().int().min(0).default(0), + resolverFailures: z.number().int().min(0).default(0), + gateRepairAttempts: z.number().int().min(0).default(0), + gateRepairs: z.number().int().min(0).default(0), + gateRepairFailures: z.number().int().min(0).default(0), + }) + .optional(), workUnits: z.array( z.object({ unitKey: z.string().min(1), diff --git a/packages/context/src/ingest/reports.ts b/packages/context/src/ingest/reports.ts index 672c5bfb..431e4063 100644 --- a/packages/context/src/ingest/reports.ts +++ b/packages/context/src/ingest/reports.ts @@ -48,11 +48,35 @@ export interface IngestReportPostProcessorOutcome { touchedSources: TouchedSlSource[]; } +export interface IngestReportFailure { + phase: string; + message: string; + details?: Record; +} + export interface IngestReportBody { + status?: 'completed' | 'failed'; syncId: string; diffSummary: IngestDiffSummary; fetch?: SourceFetchReport; commitSha: string | null; + tracePath?: string; + failure?: IngestReportFailure; + isolatedDiff?: { + enabled: boolean; + integrationWorktreePath?: string; + ingestionBaseSha?: string; + projectionSha?: string | null; + acceptedPatches: number; + textualConflicts: number; + semanticConflicts: number; + resolverAttempts?: number; + resolverRepairs?: number; + resolverFailures?: number; + gateRepairAttempts?: number; + gateRepairs?: number; + gateRepairFailures?: number; + }; workUnits: IngestReportWorkUnit[]; failedWorkUnits: string[]; reconciliationSkipped: boolean; diff --git a/packages/context/src/ingest/semantic-layer-target-policy.test.ts b/packages/context/src/ingest/semantic-layer-target-policy.test.ts new file mode 100644 index 00000000..73d09dc0 --- /dev/null +++ b/packages/context/src/ingest/semantic-layer-target-policy.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from 'vitest'; +import { + assertSemanticLayerTargetPathsAllowed, + findDisallowedSemanticLayerTargetPaths, + semanticLayerConnectionIdFromPath, +} from './semantic-layer-target-policy.js'; + +describe('semantic-layer target policy', () => { + it('extracts connection ids from semantic-layer paths', () => { + expect(semanticLayerConnectionIdFromPath('semantic-layer/warehouse/orders.yaml')).toBe('warehouse'); + expect(semanticLayerConnectionIdFromPath('a/semantic-layer/finance/orders.yaml')).toBe('finance'); + expect(semanticLayerConnectionIdFromPath('wiki/global/orders.md')).toBeNull(); + }); + + it('finds semantic-layer paths outside the allowed target connections', () => { + expect( + findDisallowedSemanticLayerTargetPaths({ + paths: [ + 'semantic-layer/warehouse/orders.yaml', + 'semantic-layer/finance/orders.yaml', + 'wiki/global/orders.md', + ], + allowedConnectionIds: new Set(['warehouse']), + }), + ).toEqual([{ path: 'semantic-layer/finance/orders.yaml', connectionId: 'finance' }]); + }); + + it('throws a deterministic error for unauthorized semantic-layer targets', () => { + expect(() => + assertSemanticLayerTargetPathsAllowed({ + paths: ['semantic-layer/finance/orders.yaml', 'semantic-layer/marketing/accounts.yaml'], + allowedConnectionIds: new Set(['warehouse']), + }), + ).toThrow( + /semantic-layer target connection not allowed: semantic-layer\/finance\/orders\.yaml \(finance\), semantic-layer\/marketing\/accounts\.yaml \(marketing\); allowed: warehouse/, + ); + }); +}); diff --git a/packages/context/src/ingest/semantic-layer-target-policy.ts b/packages/context/src/ingest/semantic-layer-target-policy.ts new file mode 100644 index 00000000..adf63b3b --- /dev/null +++ b/packages/context/src/ingest/semantic-layer-target-policy.ts @@ -0,0 +1,42 @@ +export interface SemanticLayerTargetPolicyInput { + paths: readonly string[]; + allowedConnectionIds: ReadonlySet; +} + +export interface SemanticLayerTargetPolicyViolation { + path: string; + connectionId: string; +} + +export function semanticLayerConnectionIdFromPath(path: string): string | null { + const normalized = path.replace(/^[ab]\//, ''); + const match = /^semantic-layer\/([^/]+)\//.exec(normalized); + return match?.[1] ?? null; +} + +export function findDisallowedSemanticLayerTargetPaths( + input: SemanticLayerTargetPolicyInput, +): SemanticLayerTargetPolicyViolation[] { + return input.paths + .map((path) => ({ path, connectionId: semanticLayerConnectionIdFromPath(path) })) + .filter((entry): entry is SemanticLayerTargetPolicyViolation => { + return entry.connectionId !== null && !input.allowedConnectionIds.has(entry.connectionId); + }) + .sort((left, right) => { + const byConnection = left.connectionId.localeCompare(right.connectionId); + return byConnection === 0 ? left.path.localeCompare(right.path) : byConnection; + }); +} + +export function assertSemanticLayerTargetPathsAllowed(input: SemanticLayerTargetPolicyInput): void { + const violations = findDisallowedSemanticLayerTargetPaths(input); + if (violations.length === 0) { + return; + } + const allowed = [...input.allowedConnectionIds].sort(); + throw new Error( + `semantic-layer target connection not allowed: ${violations + .map((violation) => `${violation.path} (${violation.connectionId})`) + .join(', ')}; allowed: ${allowed.length > 0 ? allowed.join(', ') : '(none)'}`, + ); +} diff --git a/packages/context/src/ingest/stages/stage-3-work-units.ts b/packages/context/src/ingest/stages/stage-3-work-units.ts index dde6efbe..caa78ebf 100644 --- a/packages/context/src/ingest/stages/stage-3-work-units.ts +++ b/packages/context/src/ingest/stages/stage-3-work-units.ts @@ -41,6 +41,9 @@ export interface WorkUnitOutcome { touchedSlSources: TouchedSlSource[]; slDisallowed?: boolean; slDisallowedReason?: 'lookml_connection_mismatch'; + patchPath?: string; + patchTouchedPaths?: string[]; + childWorktreePath?: string; } export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit): Promise { diff --git a/packages/context/src/ingest/types.ts b/packages/context/src/ingest/types.ts index 27472523..370c7511 100644 --- a/packages/context/src/ingest/types.ts +++ b/packages/context/src/ingest/types.ts @@ -1,4 +1,5 @@ import type { KtxEmbeddingPort } from '../core/embedding.js'; +import type { SemanticLayerService } from '../sl/index.js'; import type { MemoryFlowEventSink } from './memory-flow/types.js'; export type IngestTrigger = 'upload' | 'scheduled_pull' | 'manual_resync' | 'manual_override'; @@ -47,6 +48,7 @@ export interface ChunkResult { export interface FetchContext { connectionId: string; sourceKey: string; + memoryFlow?: MemoryFlowEventSink; } type SourceFetchIssueKind = @@ -96,6 +98,26 @@ export interface ClusterWorkUnitsContext { embedding: KtxEmbeddingPort; } +export interface DeterministicProjectionContext { + connectionId: string; + sourceKey: string; + syncId: string; + jobId: string; + runId: string; + stagedDir: string; + workdir: string; + parseArtifacts?: unknown; + semanticLayerService: SemanticLayerService; +} + +export interface ProjectionResult { + warnings: string[]; + errors: string[]; + touchedSources: Array<{ connectionId: string; sourceName: string }>; + changedWikiPageKeys: string[]; + result?: unknown; +} + export interface SourceAdapter { readonly source: string; readonly skillNames: string[]; @@ -109,6 +131,7 @@ export interface SourceAdapter { listTargetConnectionIds?(stagedDir: string): Promise; chunk(stagedDir: string, diffSet?: DiffSet): Promise; clusterWorkUnits?(ctx: ClusterWorkUnitsContext): Promise; + project?(ctx: DeterministicProjectionContext): Promise; describeScope?(stagedDir: string): Promise; onPullSucceeded?(ctx: { connectionId: string; diff --git a/packages/context/src/ingest/wiki-body-refs.test.ts b/packages/context/src/ingest/wiki-body-refs.test.ts new file mode 100644 index 00000000..2af8935f --- /dev/null +++ b/packages/context/src/ingest/wiki-body-refs.test.ts @@ -0,0 +1,153 @@ +import { describe, expect, it } from 'vitest'; +import { findInvalidWikiBodyRefs, parseWikiBodyRefs } from './wiki-body-refs.js'; + +const sources = [ + { + name: 'mart_account_segments', + grain: ['account_id'], + columns: [ + { name: 'account_id', type: 'string' }, + { name: 'segment', type: 'string' }, + ], + joins: [], + measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }], + segments: [{ name: 'enterprise', expr: "segment = 'enterprise'" }], + table: 'analytics.mart_account_segments', + }, +]; + +describe('wiki body refs', () => { + it('parses only explicit inline-code body references outside fenced blocks', () => { + const body = [ + 'Valid `mart_account_segments.total_contract_arr` and `source:mart_account_segments`.', + 'Also `warehouse/mart_account_segments.segment` and `table:analytics.mart_account_segments`.', + 'Ignore prose mart_account_segments.total_contract_arr_cents.', + 'Ignore `single_token`.', + 'Ignore wildcard pattern `mart_nrr_quarterly.*_arr_cents`.', + 'Ignore condition `users.is_internal = false`.', + '```sql', + 'select `mart_account_segments.total_contract_arr_cents`', + '```', + ].join('\n'); + + expect(parseWikiBodyRefs(body)).toEqual([ + { kind: 'sl_entity', connectionId: null, sourceName: 'mart_account_segments', entityName: 'total_contract_arr' }, + { kind: 'sl_source', connectionId: null, sourceName: 'mart_account_segments' }, + { kind: 'sl_entity', connectionId: 'warehouse', sourceName: 'mart_account_segments', entityName: 'segment' }, + { kind: 'table', connectionId: null, tableRef: 'analytics.mart_account_segments' }, + ]); + }); + + it('rejects stale inline-code semantic-layer references', async () => { + const invalid = await findInvalidWikiBodyRefs({ + pageKey: 'account-segments', + body: 'ARR is documented as `mart_account_segments.total_contract_arr_cents`.', + visibleConnectionIds: ['warehouse'], + loadSources: async () => sources, + tableExists: async () => true, + }); + + expect(invalid).toEqual([ + 'account-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents', + ]); + }); + + it('does not treat wildcard inline-code patterns as exact semantic-layer entity references', async () => { + const invalid = await findInvalidWikiBodyRefs({ + pageKey: 'revenue-metrics-encoding', + body: 'Cents columns include `mart_nrr_quarterly.*_arr_cents` and `mart_retention_movement_breakout.*_arr_cents`.', + visibleConnectionIds: ['warehouse'], + loadSources: async () => [ + { name: 'mart_nrr_quarterly', grain: [], columns: [], joins: [], measures: [], table: 'analytics.mart_nrr_quarterly' }, + { + name: 'mart_retention_movement_breakout', + grain: [], + columns: [], + joins: [], + measures: [], + table: 'analytics.mart_retention_movement_breakout', + }, + ], + tableExists: async () => true, + }); + + expect(invalid).toEqual([]); + }); + + it('does not treat inline-code SQL predicates as exact semantic-layer entity references', async () => { + const invalid = await findInvalidWikiBodyRefs({ + pageKey: 'account-reporting-exclusions', + body: 'Exclude internal users with `users.is_internal = false` and test users with `users.is_test = false`.', + visibleConnectionIds: ['warehouse'], + loadSources: async () => [ + { + name: 'users', + grain: [], + columns: [ + { name: 'is_internal', type: 'boolean' }, + { name: 'is_test', type: 'boolean' }, + ], + joins: [], + measures: [], + table: 'analytics.users', + }, + ], + tableExists: async () => true, + }); + + expect(invalid).toEqual([]); + }); + + it('validates source, dimension, segment, measure, and table references', async () => { + const invalid = await findInvalidWikiBodyRefs({ + pageKey: 'account-segments', + body: [ + '`mart_account_segments.total_contract_arr`', + '`mart_account_segments.segment`', + '`mart_account_segments.enterprise`', + '`source:mart_account_segments`', + '`table:analytics.mart_account_segments`', + ].join('\n'), + visibleConnectionIds: ['warehouse'], + loadSources: async () => sources, + tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments', + }); + + expect(invalid).toEqual([]); + }); + + it('ignores two-part inline code when the source is not visible', async () => { + const invalid = await findInvalidWikiBodyRefs({ + pageKey: 'engineering-notes', + body: [ + 'A version token like `node.v22` is not a semantic-layer reference.', + 'A raw table must use `table:analytics.mart_account_segments`.', + ].join('\n'), + visibleConnectionIds: ['warehouse'], + loadSources: async () => sources, + tableExists: async (_connectionId, tableRef) => tableRef === 'analytics.mart_account_segments', + }); + + expect(invalid).toEqual([]); + }); + + it('still rejects explicit missing source and table references', async () => { + const invalid = await findInvalidWikiBodyRefs({ + pageKey: 'account-segments', + body: [ + '`source:missing_source`', + '`warehouse/source:missing_source`', + '`table:analytics.missing_table`', + ].join('\n'), + visibleConnectionIds: ['warehouse'], + loadSources: async () => sources, + tableExists: async () => false, + }); + + expect(invalid).toEqual([ + 'account-segments: unknown semantic-layer source missing_source', + 'account-segments: unknown semantic-layer source warehouse/missing_source', + 'account-segments: unknown raw table analytics.missing_table', + ]); + }); +}); diff --git a/packages/context/src/ingest/wiki-body-refs.ts b/packages/context/src/ingest/wiki-body-refs.ts new file mode 100644 index 00000000..25f25eb3 --- /dev/null +++ b/packages/context/src/ingest/wiki-body-refs.ts @@ -0,0 +1,141 @@ +import type { SemanticLayerSource } from '../sl/index.js'; + +export type WikiBodyRef = + | { kind: 'sl_entity'; connectionId: string | null; sourceName: string; entityName: string } + | { kind: 'sl_source'; connectionId: string | null; sourceName: string } + | { kind: 'table'; connectionId: string | null; tableRef: string }; + +export interface WikiBodyRefValidationInput { + pageKey: string; + body: string; + visibleConnectionIds: string[]; + loadSources(connectionId: string): Promise; + tableExists(connectionId: string, tableRef: string): Promise; +} + +const inlineCodePattern = /`([^`\n]+)`/g; + +function visibleLinesOutsideFences(body: string): string[] { + const lines: string[] = []; + let fenced = false; + for (const line of body.split('\n')) { + if (/^\s*```/.test(line)) { + fenced = !fenced; + continue; + } + if (!fenced) { + lines.push(line); + } + } + return lines; +} + +function parseConnectionScoped(value: string): { connectionId: string | null; body: string } { + const slash = value.indexOf('/'); + if (slash <= 0) { + return { connectionId: null, body: value }; + } + return { connectionId: value.slice(0, slash), body: value.slice(slash + 1) }; +} + +function isIdentifierToken(value: string): boolean { + return /^[A-Za-z_][A-Za-z0-9_]*$/.test(value); +} + +export function parseWikiBodyRefs(body: string): WikiBodyRef[] { + const refs: WikiBodyRef[] = []; + for (const line of visibleLinesOutsideFences(body)) { + for (const match of line.matchAll(inlineCodePattern)) { + const token = (match[1] ?? '').trim(); + if (!token) { + continue; + } + const scoped = parseConnectionScoped(token); + if (scoped.body.startsWith('source:')) { + const sourceName = scoped.body.slice('source:'.length).trim(); + if (sourceName) { + refs.push({ kind: 'sl_source', connectionId: scoped.connectionId, sourceName }); + } + continue; + } + if (scoped.body.startsWith('table:')) { + const tableRef = scoped.body.slice('table:'.length).trim(); + if (tableRef) { + refs.push({ kind: 'table', connectionId: scoped.connectionId, tableRef }); + } + continue; + } + const parts = scoped.body.split('.'); + if (parts.length === 2 && isIdentifierToken(parts[0] ?? '') && isIdentifierToken(parts[1] ?? '')) { + refs.push({ + kind: 'sl_entity', + connectionId: scoped.connectionId, + sourceName: parts[0], + entityName: parts[1], + }); + } + } + } + return refs; +} + +function entityNames(source: SemanticLayerSource): Set { + return new Set([ + ...(source.measures ?? []).map((measure) => measure.name), + ...(source.columns ?? []).map((column) => column.name), + ...(source.segments ?? []).map((segment) => segment.name), + ]); +} + +export async function findInvalidWikiBodyRefs(input: WikiBodyRefValidationInput): Promise { + const errors: string[] = []; + const sourceCache = new Map(); + const loadSources = async (connectionId: string): Promise => { + const cached = sourceCache.get(connectionId); + if (cached) { + return cached; + } + const sources = await input.loadSources(connectionId); + sourceCache.set(connectionId, sources); + return sources; + }; + + const findSource = async ( + connectionIds: string[], + sourceName: string, + ): Promise<{ connectionId: string; source: SemanticLayerSource } | null> => { + for (const connectionId of connectionIds) { + const source = (await loadSources(connectionId)).find((candidate) => candidate.name === sourceName); + if (source) { + return { connectionId, source }; + } + } + return null; + }; + + for (const ref of parseWikiBodyRefs(input.body)) { + const connectionIds = ref.connectionId ? [ref.connectionId] : input.visibleConnectionIds; + if (ref.kind === 'table') { + const found = await Promise.all(connectionIds.map((connectionId) => input.tableExists(connectionId, ref.tableRef))); + if (!found.some(Boolean)) { + errors.push(`${input.pageKey}: unknown raw table ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.tableRef}`); + } + continue; + } + + const found = await findSource(connectionIds, ref.sourceName); + if (!found) { + if (ref.kind === 'sl_source') { + errors.push( + `${input.pageKey}: unknown semantic-layer source ${ref.connectionId ? `${ref.connectionId}/` : ''}${ref.sourceName}`, + ); + } + continue; + } + if (ref.kind === 'sl_entity' && !entityNames(found.source).has(ref.entityName)) { + errors.push(`${input.pageKey}: unknown semantic-layer entity ${ref.sourceName}.${ref.entityName}`); + } + } + + return errors; +} diff --git a/packages/context/src/llm/claude-code-runtime.test.ts b/packages/context/src/llm/claude-code-runtime.test.ts index f69c5d75..38959140 100644 --- a/packages/context/src/llm/claude-code-runtime.test.ts +++ b/packages/context/src/llm/claude-code-runtime.test.ts @@ -78,6 +78,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => { skills: [], plugins: [], tools: [], + managedSettings: { + allowManagedMcpServersOnly: true, + allowedMcpServers: [], + }, + strictMcpConfig: true, allowedTools: [], permissionMode: 'dontAsk', persistSession: false, @@ -144,6 +149,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => { const options = query.mock.calls[0][0].options; expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']); + expect(options.managedSettings).toEqual({ + allowManagedMcpServersOnly: true, + allowedMcpServers: [{ serverName: 'ktx' }], + }); + expect(options.strictMcpConfig).toBe(true); expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({ behavior: 'allow', toolUseID: '1', @@ -176,6 +186,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => { skills: [], plugins: [], tools: [], + managedSettings: { + allowManagedMcpServersOnly: true, + allowedMcpServers: [], + }, + strictMcpConfig: true, allowedTools: [], permissionMode: 'dontAsk', persistSession: false, @@ -268,6 +283,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => { const options = query.mock.calls[0][0].options; expect(options.allowedTools).toEqual(['mcp__ktx__load_skill']); + expect(options.managedSettings).toEqual({ + allowManagedMcpServersOnly: true, + allowedMcpServers: [{ serverName: 'ktx' }], + }); + expect(options.strictMcpConfig).toBe(true); expect(await options.canUseTool('mcp__ktx__load_skill', {}, { signal: new AbortController().signal, toolUseID: '1' })).toEqual({ behavior: 'allow', toolUseID: '1', @@ -334,6 +354,10 @@ describe('ClaudeCodeKtxLlmRuntime', () => { answer: 'yes', }); expect(objectQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ PATH: '/usr/bin' })); + expect(objectQuery.mock.calls[0][0].options.managedSettings).toEqual({ + allowManagedMcpServersOnly: true, + allowedMcpServers: [], + }); expect(objectQuery.mock.calls[0][0].options.env).not.toEqual( expect.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test', AWS_PROFILE: 'prod' }), // pragma: allowlist secret ); @@ -374,6 +398,10 @@ describe('ClaudeCodeKtxLlmRuntime', () => { telemetryTags: { operationName: 'test' }, }); expect(agentQuery.mock.calls[0][0].options.env).toEqual(expect.objectContaining({ HOME: '/Users/test' })); + expect(agentQuery.mock.calls[0][0].options.managedSettings).toEqual({ + allowManagedMcpServersOnly: true, + allowedMcpServers: [{ serverName: 'ktx' }], + }); expect(agentQuery.mock.calls[0][0].options.env).not.toEqual( expect.objectContaining({ ANTHROPIC_AUTH_TOKEN: 'token', CLAUDE_CODE_USE_VERTEX: '1' }), ); @@ -442,6 +470,11 @@ describe('ClaudeCodeKtxLlmRuntime', () => { skills: [], plugins: [], tools: [], + managedSettings: { + allowManagedMcpServersOnly: true, + allowedMcpServers: [], + }, + strictMcpConfig: true, allowedTools: [], persistSession: false, env: expect.not.objectContaining({ ANTHROPIC_API_KEY: 'sk-ant-test' }), diff --git a/packages/context/src/llm/claude-code-runtime.ts b/packages/context/src/llm/claude-code-runtime.ts index 5d8edf26..bf815445 100644 --- a/packages/context/src/llm/claude-code-runtime.ts +++ b/packages/context/src/llm/claude-code-runtime.ts @@ -45,6 +45,8 @@ const BUILTIN_TOOLS = [ 'TodoWrite', ]; +const KTX_MCP_SERVER_NAME = 'ktx'; + function isResult(message: SDKMessage): message is SDKResultMessage { return message.type === 'result'; } @@ -113,7 +115,14 @@ function assertInitIsolation( } function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set { - return tools && Object.keys(tools).length > 0 ? new Set(['ktx']) : new Set(); + return tools && Object.keys(tools).length > 0 ? new Set([KTX_MCP_SERVER_NAME]) : new Set(); +} + +function managedMcpSettings(serverNames: string[]): NonNullable { + return { + allowManagedMcpServersOnly: true, + allowedMcpServers: serverNames.map((serverName) => ({ serverName })), + }; } function baseOptions(input: { @@ -125,6 +134,7 @@ function baseOptions(input: { }): Options { const toolIds = mcpToolIds(input.tools ?? {}); const allowedToolIds = new Set(toolIds); + const expectedServerNames = [...expectedMcpServerNames(input.tools)]; return { cwd: input.projectDir, model: input.model, @@ -133,6 +143,8 @@ function baseOptions(input: { skills: [], plugins: [], tools: [], + managedSettings: managedMcpSettings(expectedServerNames), + strictMcpConfig: true, allowedTools: toolIds, disallowedTools: BUILTIN_TOOLS, canUseTool: async (toolName, _toolInput, options) => @@ -147,7 +159,14 @@ function baseOptions(input: { persistSession: false, env: createKtxClaudeCodeEnv(input.env), ...(input.tools && Object.keys(input.tools).length > 0 - ? { mcpServers: { ktx: createSdkMcpServer({ name: 'ktx', tools: createClaudeSdkTools(input.tools) }) } } + ? { + mcpServers: { + [KTX_MCP_SERVER_NAME]: createSdkMcpServer({ + name: KTX_MCP_SERVER_NAME, + tools: createClaudeSdkTools(input.tools), + }), + }, + } : {}), }; } diff --git a/packages/context/src/sl/tools/sl-edit-source.tool.test.ts b/packages/context/src/sl/tools/sl-edit-source.tool.test.ts index 49a8f757..75c753ef 100644 --- a/packages/context/src/sl/tools/sl-edit-source.tool.test.ts +++ b/packages/context/src/sl/tools/sl-edit-source.tool.test.ts @@ -99,6 +99,27 @@ describe('SlEditSourceTool — session gating', () => { ); }); + it('rejects session-scoped edits outside allowed target connections', async () => { + const { tool } = makeTool(); + const session = makeSession({ + allowedConnectionNames: new Set(['warehouse']), + }); + const context: ToolContext = { ...baseContext, session }; + + const result = await tool.call( + { + connectionId: 'finance', + sourceName: 'orders', + yaml_edits: [{ oldText: 'measures: []', newText: 'measures: []' }], + } as any, + context, + ); + + expect(result.structured.success).toBe(false); + expect(result.markdown).toContain('connectionId "finance" is outside this ingest session'); + expect(session.actions).toEqual([]); + }); + it('indexes normally when no session is present', async () => { const { tool, slSearchService } = makeTool(); const result = await tool.call( diff --git a/packages/context/src/sl/tools/sl-edit-source.tool.ts b/packages/context/src/sl/tools/sl-edit-source.tool.ts index 813072c0..f6669120 100644 --- a/packages/context/src/sl/tools/sl-edit-source.tool.ts +++ b/packages/context/src/sl/tools/sl-edit-source.tool.ts @@ -1,6 +1,12 @@ import YAML from 'yaml'; import { z } from 'zod'; -import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js'; +import { + addTouchedSlSource, + type ToolContext, + type ToolOutput, + validateActionRawPaths, + validateActionTargetConnection, +} from '../../tools/index.js'; import { applySqlEdits } from '../../tools/sql-edit-replacer.js'; import { normalizeSemanticLayerDescriptions } from '../description-normalization.js'; import type { SemanticLayerSource } from '../types.js'; @@ -79,6 +85,10 @@ If no source exists yet, use sl_write_source instead — this tool will reject t const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService; const skipIndex = context.session?.isWorktreeScoped === true; + const targetConnectionValidation = validateActionTargetConnection(context.session, connectionId); + if (!targetConnectionValidation.ok) { + return this.buildOutput(false, [targetConnectionValidation.error], sourceName); + } const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths); if (!rawPathValidation.ok) { return this.buildOutput(false, [rawPathValidation.error], sourceName); diff --git a/packages/context/src/sl/tools/sl-write-source.tool.test.ts b/packages/context/src/sl/tools/sl-write-source.tool.test.ts index 6f9cdbc0..186028b8 100644 --- a/packages/context/src/sl/tools/sl-write-source.tool.test.ts +++ b/packages/context/src/sl/tools/sl-write-source.tool.test.ts @@ -133,6 +133,34 @@ describe('SlWriteSourceTool — session gating', () => { ); }); + it('rejects session-scoped writes outside allowed target connections', async () => { + const { tool } = makeTool(); + const session = makeSession({ + allowedConnectionNames: new Set(['warehouse']), + }); + const context: ToolContext = { ...baseContext, session }; + + const result = await tool.call( + { + connectionId: 'finance', + sourceName: 'finance_orders', + source: { + name: 'finance_orders', + table: 'public.orders', + grain: ['id'], + columns: [{ name: 'id', type: 'string' }], + measures: [], + joins: [], + } as any, + } as any, + context, + ); + + expect(result.structured.success).toBe(false); + expect(result.markdown).toContain('connectionId "finance" is outside this ingest session'); + expect(session.actions).toEqual([]); + }); + it('indexes normally when no session is present', async () => { const { tool, slSearchService } = makeTool(); const result = await tool.call( diff --git a/packages/context/src/sl/tools/sl-write-source.tool.ts b/packages/context/src/sl/tools/sl-write-source.tool.ts index 357e7ca0..b9a79e6b 100644 --- a/packages/context/src/sl/tools/sl-write-source.tool.ts +++ b/packages/context/src/sl/tools/sl-write-source.tool.ts @@ -1,6 +1,12 @@ import YAML from 'yaml'; import { z } from 'zod'; -import { addTouchedSlSource, type ToolContext, type ToolOutput, validateActionRawPaths } from '../../tools/index.js'; +import { + addTouchedSlSource, + type ToolContext, + type ToolOutput, + validateActionRawPaths, + validateActionTargetConnection, +} from '../../tools/index.js'; import { sourceOverlaySchema } from '../schemas.js'; import type { SemanticLayerService } from '../semantic-layer.service.js'; import type { SemanticLayerSource } from '../types.js'; @@ -106,6 +112,10 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService; const skipIndex = context.session?.isWorktreeScoped === true; + const targetConnectionValidation = validateActionTargetConnection(context.session, connectionId); + if (!targetConnectionValidation.ok) { + return this.buildOutput(false, [targetConnectionValidation.error], sourceName); + } const rawPathValidation = validateActionRawPaths(context.session, input.rawPaths); if (!rawPathValidation.ok) { return this.buildOutput(false, [rawPathValidation.error], sourceName); diff --git a/packages/context/src/tools/action-target-connection.ts b/packages/context/src/tools/action-target-connection.ts new file mode 100644 index 00000000..4ba3f651 --- /dev/null +++ b/packages/context/src/tools/action-target-connection.ts @@ -0,0 +1,23 @@ +import type { ToolSession } from './tool-session.js'; + +type ActionTargetConnectionValidation = { ok: true } | { ok: false; error: string }; + +export function validateActionTargetConnection( + session: ToolSession | undefined, + connectionId: string, +): ActionTargetConnectionValidation { + const allowed = session?.allowedConnectionNames; + if (!allowed) { + return { ok: true }; + } + if (allowed.has(connectionId)) { + return { ok: true }; + } + const allowedList = [...allowed].sort(); + return { + ok: false, + error: `connectionId "${connectionId}" is outside this ingest session's allowed target connections: ${ + allowedList.length > 0 ? allowedList.join(', ') : '(none)' + }`, + }; +} diff --git a/packages/context/src/tools/index.ts b/packages/context/src/tools/index.ts index a3fc5e7b..c6a334d5 100644 --- a/packages/context/src/tools/index.ts +++ b/packages/context/src/tools/index.ts @@ -32,6 +32,7 @@ export type { SqlEdit } from './sql-edit-replacer.js'; export { applySqlEdits } from './sql-edit-replacer.js'; export type { IngestToolMetadata, MemoryAction, ToolSession } from './tool-session.js'; export { validateActionRawPaths } from './action-raw-paths.js'; +export { validateActionTargetConnection } from './action-target-connection.js'; export type { TouchedSlSource, TouchedSlSourceSet } from './touched-sl-sources.js'; export { addTouchedSlSource,