ktx/packages/cli/test/context/ingest/ingest-bundle.runner.test.ts

2381 lines
83 KiB
TypeScript
Raw Permalink Normal View History

feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
2026-05-10 23:12:26 +02:00
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { beforeEach, describe, expect, it, vi } from 'vitest';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import { addTouchedSlSource } from '../../../src/context/tools/touched-sl-sources.js';
import { IngestBundleRunner } from '../../../src/context/ingest/ingest-bundle.runner.js';
import { createMemoryFlowLiveBuffer } from '../../../src/context/ingest/memory-flow/live-buffer.js';
import type { MemoryFlowReplayInput } from '../../../src/context/ingest/memory-flow/types.js';
import type { IngestBundleRunnerDeps } from '../../../src/context/ingest/ports.js';
2026-05-10 23:12:26 +02:00
class TestJobContext {
private currentProgress = 0;
constructor(
public readonly jobId: string,
public readonly userId: string | null | undefined,
public readonly checkCancellation: () => Promise<void>,
private readonly updateProgressFn: (progress: number, message?: string) => Promise<void>,
private readonly parent?: TestJobContext,
private readonly start = 0,
private readonly span = 1,
) {}
async updateProgress(progress: number, message?: string): Promise<void> {
const local = Math.max(0, Math.min(1, progress));
this.currentProgress = local;
if (this.parent) {
await this.parent.updateProgress(Math.max(0, Math.min(1, this.start + this.span * local)), message);
return;
}
await this.updateProgressFn(local, message);
}
startPhase(fraction: number): TestJobContext {
return new TestJobContext(
this.jobId,
this.userId,
this.checkCancellation,
this.updateProgressFn,
this,
this.currentProgress,
Math.max(0, Math.min(1, fraction)),
);
}
}
const deferred = <T>() => {
let resolve!: (v: T) => void;
const promise = new Promise<T>((r) => {
resolve = r;
});
return { promise, resolve };
};
function bundleReplayInput(): MemoryFlowReplayInput {
return {
runId: 'pending',
connectionId: 'c1',
adapter: 'fake',
status: 'running',
sourceDir: '/tmp/stage/upload-x',
syncId: 'pending',
errors: [],
events: [],
plannedWorkUnits: [],
details: { actions: [], provenance: [], transcripts: [] },
};
}
const makeDeps = () => {
const runsRepo = {
create: vi.fn().mockResolvedValue({ id: 'run-1' }),
findMostRecentCompleted: vi.fn().mockResolvedValue(null),
markFailed: vi.fn(),
markCompleted: vi.fn(),
};
const provenanceRepo = {
insertMany: vi.fn(),
findHashesBySync: vi.fn().mockResolvedValue(new Map()),
findLatestArtifactsForRawPaths: vi.fn().mockResolvedValue(new Map()),
};
const reportsRepo = {
create: vi.fn().mockResolvedValue({ id: 'report-1' }),
findByJobId: vi.fn().mockResolvedValue(null),
markSuperseded: vi.fn().mockResolvedValue(undefined),
};
const canonicalPins = {
listPins: vi.fn().mockResolvedValue([]),
};
const adapter = {
source: 'fake',
skillNames: [] as string[],
reconcileSkillNames: undefined as undefined | string[],
evidenceIndexing: undefined as undefined | 'documents',
triageSupported: undefined as undefined | boolean,
detect: vi.fn().mockResolvedValue(true),
listTargetConnectionIds: undefined as undefined | ((stagedDir: string) => Promise<string[]>),
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
finalize: undefined as any,
2026-05-10 23:12:26 +02:00
chunk: vi.fn().mockResolvedValue({
workUnits: [{ unitKey: 'u1', rawFiles: ['a.yml'], peerFileIndex: [], dependencyPaths: [] }],
}),
};
const registry = { get: vi.fn().mockReturnValue(adapter) };
const diffSetService = {
compute: vi.fn().mockResolvedValue({ added: ['a.yml'], modified: [], deleted: [], unchanged: [] }),
};
const contextEvidenceIndex = {
indexStagedDir: vi.fn().mockResolvedValue({
documentsIndexed: 1,
chunksIndexed: 1,
documentsDeleted: 0,
embeddingFailures: 0,
warnings: [],
}),
publishSync: vi.fn().mockResolvedValue(undefined),
};
const pageTriage = {
triageRun: vi.fn().mockResolvedValue({
enabled: true,
fullRawPaths: new Set(['a.yml']),
warnings: [],
}),
};
const scopedGit = {
revParseHead: vi.fn().mockResolvedValue('h'),
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
2026-05-10 23:12:26 +02:00
resetHardTo: vi.fn(),
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
await writeFile(patchPath, '', 'utf-8');
}),
applyPatchFile3WayIndex: vi.fn(),
diffNameStatus: vi.fn().mockResolvedValue([]),
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
changedPaths: vi.fn().mockResolvedValue([]),
2026-05-10 23:12:26 +02:00
};
const sessionWorktreeService = {
create: vi.fn().mockResolvedValue({
chatId: 'j1',
workdir: '/tmp/wt',
branch: 'session/j1',
baseSha: 'b',
createdAt: new Date(),
git: scopedGit,
config: {},
}),
cleanup: vi.fn(),
};
const agentRunner = { runLoop: vi.fn().mockResolvedValue({ stopReason: 'natural' }) };
const gitService = {
revParseHead: vi.fn().mockResolvedValue('base'),
listFilesAtHead: vi.fn().mockResolvedValue([]),
getFileAtCommit: vi.fn(),
squashMergeIntoMain: vi
.fn()
.mockResolvedValue({ ok: true, squashSha: 'sq', touchedPaths: ['raw-sources/c1/fake/s/a.yml'] }),
};
const lockingService = {
withLock: vi.fn().mockImplementation(async (_k: string, fn: () => Promise<unknown>) => fn()),
};
const appSettingsService = {
settings: {
ai: { slValidation: { probeRowCount: 1 } },
llm: { memoryIngestionModel: 'test-model' },
},
};
const skillsRegistry = {
listSkills: vi.fn().mockResolvedValue([]),
getSkill: vi.fn().mockResolvedValue(null),
buildSkillsPrompt: vi.fn().mockReturnValue(''),
stripFrontmatter: vi.fn().mockImplementation((s: string) => s),
};
const promptService = {
loadPrompt: vi.fn().mockResolvedValue('base-framing'),
};
const wikiService = {
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
forWorktree: vi.fn(),
listPageKeys: vi.fn().mockResolvedValue([]),
2026-05-10 23:12:26 +02:00
readPage: vi.fn().mockResolvedValue(null),
syncFromCommit: vi.fn().mockResolvedValue(undefined),
};
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
wikiService.forWorktree.mockReturnValue(wikiService);
2026-05-10 23:12:26 +02:00
const knowledgeSlRefs = {
syncFromWiki: vi.fn().mockResolvedValue({ inserted: 1, deleted: 0 }),
};
const knowledgeIndex = {
listPagesForUser: vi.fn().mockResolvedValue([]),
};
const semanticLayerService = {
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
forWorktree: vi.fn(),
2026-05-10 23:12:26 +02:00
listFilesForConnection: vi
.fn()
.mockImplementation((connectionId: string) =>
Promise.resolve(connectionId === 'warehouse-2' ? ['looker__orders.yaml'] : []),
),
loadAllSources: vi
.fn()
.mockImplementation((connectionId: string) =>
fix(context): merge overlay columns onto manifest columns by name (#94) * fix(context): merge overlay columns onto manifest columns by name composeOverlay was appending overlay columns to the manifest column list, producing duplicate entries when dbt/metabase overlays declared a column just to attach descriptions. The duplicates carried no `type`, so the pydantic SourceDefinition rejected them at semantic-query time and broke `ktx sl query` for every overlay-backed measure. Now overlay columns match base columns by name (case-insensitive): same-name entries merge onto the manifest (overlay fields win, type/role fall back to the base, descriptions merge per source key) and only new names append. * refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract Overlay sources now have two distinct collections: `columns:` for computed columns (requiring `expr` + `type`) and `column_overrides:` for metadata patches to inherited manifest columns. Composing or loading an overlay that mixes the two — or references an unknown column — fails with a typed error. Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` / `toResolvedWire` as the strict shape sent to the Python engine, and add a schema contract test that diffs Zod against the Pydantic JSON schema dumped by `python -m semantic_layer dump-schema`. `SourceDefinition` is now `extra="forbid"` on the Python side. `loadAllSources` surfaces per-file load errors instead of swallowing them, so validation/query paths can report manifest shard parse failures. * fix(context): make scan description generation resilient and quiet A transient sampleTable failure during ingest used to take out every table in a connection: generateTableDescription returned a hardcoded 'Table not found' string into descriptions.ai, and KtxDescriptionGenerator was constructed without a logger, so the failure left no trail anywhere. - sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff, honouring KtxScanContext.signal via a new KtxAbortedError. - On retry exhaustion or missing capability, table generation falls back to a metadata-only prompt built from column name / native type / comment / rawDescriptions. The column path follows the same rule -- call the LLM when any of samples or rawDescriptions are available; skip only when both are absent. - Logger is now threaded from KtxScanContext into the generator. Failures emit structured KtxScanWarning entries (new description_fallback_used code, plus existing sampling_failed / enrichment_failed / connector_capability_missing). ktx scan groups warnings by code so a batch of identical failures collapses to one summary line plus sample. - Returns null on failure instead of the 'Table not found' sentinel; the manifest writer's existing guard already skips empty descriptions, so schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS already strips stale 'ai' on merge, so existing YAML clears on next run. Also suppress AI SDK v6 'system in messages' warning: pull system messages out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages helper and pass them top-level to generateText (preserves cacheControl providerOptions on the SystemModelMessage). Agent-runner's local splitSystemPromptMessages dedupes onto the shared helper. * test(docs): align examples-docs assertions with revamped docs PR #103 (setup/guide doc revamp) reworded several CLI examples and connection labels; the assertions in scripts/examples-docs.test.mjs still referenced the pre-revamp wording and were failing in CI on main. Update the regexes to match the post-revamp content: - drop the `--json` flag from the sl-query example expectation - move the `Driver:` / `Status: ok` probe to the connection reference, which is where that output now lives (driver id is lowercase `postgres`, not the display name `PostgreSQL`) - drop the obsolete `Install \`uv\`...` troubleshooting line - accept `<connectionId>` everywhere; the docs no longer use the hyphenated `<connection-id>` form - match the `warehouse` connection id used in the quickstart instead of the `postgres-warehouse` id only used in the README and setup ref * fix(sl): skip TS/Python schema contract test when uv is unavailable The TypeScript checks CI job does not install uv or Python, so the module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw ENOENT and failed the suite. Wrap the schema dump in a try/catch and guard the describe block with `describe.skipIf` so the test skips in environments without uv. Local dev and any CI job that has uv on PATH still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
Promise.resolve({
sources: connectionId === 'warehouse-2' ? [{ name: 'looker__orders' }] : [],
loadErrors: [],
}),
),
2026-05-10 23:12:26 +02:00
};
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
semanticLayerService.forWorktree.mockReturnValue(semanticLayerService);
2026-05-10 23:12:26 +02:00
const slSearchService = {
indexSources: vi.fn().mockResolvedValue(undefined),
};
const slSourcesRepository = {};
const slValidator = { validateSingleSource: vi.fn().mockResolvedValue({ errors: [], warnings: [] }) };
const toolsetFactory = {
createIngestWuToolset: vi.fn().mockReturnValue({
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
toRuntimeTools: vi.fn().mockReturnValue({}),
2026-05-10 23:12:26 +02:00
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
}),
};
const configService = {
enqueueCommitMessageJobForExternalCommit: vi.fn().mockResolvedValue(undefined),
};
return {
runsRepo,
provenanceRepo,
reportsRepo,
canonicalPins,
adapter,
registry,
diffSetService,
contextEvidenceIndex,
pageTriage,
sessionWorktreeService,
agentRunner,
gitService,
lockingService,
slValidator,
appSettingsService,
skillsRegistry,
promptService,
wikiService,
knowledgeSlRefs,
knowledgeIndex,
semanticLayerService,
slSearchService,
slSourcesRepository,
toolsetFactory,
configService,
};
};
const buildRunner = (deps: ReturnType<typeof makeDeps> = makeDeps(), overrides: Partial<IngestBundleRunnerDeps> = {}) =>
new IngestBundleRunner({
runs: deps.runsRepo as any,
provenance: deps.provenanceRepo as any,
registry: deps.registry as any,
diffSetService: deps.diffSetService as any,
contextEvidenceIndex: deps.contextEvidenceIndex,
pageTriage: deps.pageTriage as any,
sessionWorktreeService: deps.sessionWorktreeService as any,
agentRunner: deps.agentRunner as any,
gitService: deps.gitService as any,
lockingService: deps.lockingService as any,
storage: {
2026-05-10 23:51:24 +02:00
homeDir: '/tmp/ktx-test',
systemGitAuthor: { name: 'KTX Test', email: 'system@ktx.local' },
autoCommit: true,
2026-05-10 23:51:24 +02:00
resolveUploadDir: (uploadId) => `/tmp/ktx-test/ingest-uploads/${uploadId}`,
resolvePullDir: (jobId) => `/tmp/ktx-test/ingest-pulls/${jobId}`,
resolveTranscriptDir: (jobId) => `/tmp/ktx-test/run/wu-transcripts/${jobId}`,
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
resolveTracePath: (jobId) => `/tmp/ktx-test/ingest-traces/${jobId}/trace.jsonl`,
},
settings: {
probeRowCount: 1,
memoryIngestionModel: 'test-model',
2026-05-10 23:12:26 +02:00
},
skillsRegistry: deps.skillsRegistry as any,
promptService: deps.promptService as any,
wikiService: deps.wikiService as any,
knowledgeSlRefs: deps.knowledgeSlRefs as any,
knowledgeIndex: deps.knowledgeIndex,
semanticLayerService: deps.semanticLayerService as any,
slSearchService: deps.slSearchService as any,
slSourcesRepository: deps.slSourcesRepository as any,
connections: {
listEnabledConnections: vi.fn().mockResolvedValue([]),
getConnectionById: vi.fn().mockResolvedValue({ id: 'c1', name: 'warehouse', connectionType: 'POSTGRES' }),
executeQuery: vi.fn().mockResolvedValue({ headers: [], rows: [] }),
},
reports: deps.reportsRepo as any,
canonicalPins: deps.canonicalPins,
slValidator: deps.slValidator as any,
toolsetFactory: deps.toolsetFactory as any,
commitMessages: {
enqueueForExternalCommit: deps.configService.enqueueCommitMessageJobForExternalCommit,
},
embedding: {
maxBatchSize: 10,
computeEmbedding: async () => [0],
computeEmbeddingsBulk: async (texts: string[]) => texts.map(() => [0]),
},
...overrides,
});
describe('IngestBundleRunner — FIFO-per-connection', () => {
let spy: any;
beforeEach(() => {
spy = vi.fn();
});
it('serializes two jobs on the same connectionId', async () => {
const runner = buildRunner();
(runner as any).runInner = async (job: any) => {
spy(job.jobId);
await new Promise((r) => setTimeout(r, 5));
spy(`done-${job.jobId}`);
return {
runId: 'r',
syncId: 's',
diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 },
workUnitCount: 0,
failedWorkUnits: [],
artifactsWritten: 0,
commitSha: null,
};
};
const p1 = runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'u1' },
});
const p2 = runner.run({
jobId: 'j2',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'u2' },
});
await Promise.all([p1, p2]);
expect(spy.mock.calls.map((c: unknown[]) => c[0])).toEqual(['j1', 'done-j1', 'j2', 'done-j2']);
});
it('runs jobs on different connections in parallel', async () => {
const runner = buildRunner();
const d1 = deferred<void>();
const d2 = deferred<void>();
(runner as any).runInner = async (job: any) => {
spy(`start-${job.jobId}`);
if (job.jobId === 'j1') {
await d1.promise;
}
if (job.jobId === 'j2') {
await d2.promise;
}
return {
runId: 'r',
syncId: 's',
diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 },
workUnitCount: 0,
failedWorkUnits: [],
artifactsWritten: 0,
commitSha: null,
};
};
const p1 = runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'u1' },
});
const p2 = runner.run({
jobId: 'j2',
connectionId: 'c2',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'u2' },
});
await new Promise((r) => setTimeout(r, 10));
expect(spy.mock.calls.map((c: unknown[]) => c[0]).sort()).toEqual(['start-j1', 'start-j2']);
d1.resolve();
d2.resolve();
await Promise.all([p1, p2]);
});
});
describe('IngestBundleRunner — Stages 1 → 7', () => {
it('runs the full pipeline, creates a run row, stages files, chunks, squashes, writes provenance', async () => {
const deps = makeDeps();
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const result = await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(deps.runsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({ jobId: 'j1', connectionId: 'c1', sourceKey: 'fake', trigger: 'upload' }),
);
expect(deps.adapter.detect).toHaveBeenCalled();
expect(deps.adapter.chunk).toHaveBeenCalled();
expect(result.workUnitCount).toBe(1);
expect(deps.diffSetService.compute).toHaveBeenCalled();
expect(deps.gitService.squashMergeIntoMain).toHaveBeenCalledWith(
'session/j1',
expect.any(String),
expect.any(String),
expect.stringContaining('ingest(fake): j1'),
);
expect(deps.provenanceRepo.insertMany).toHaveBeenCalled();
expect(result.commitSha).toBe('sq');
expect(deps.runsRepo.markCompleted).toHaveBeenCalledWith('run-1', expect.any(Object), 'completed');
// Single touched path → path-scoped diff for the LLM commit-message note.
expect(deps.configService.enqueueCommitMessageJobForExternalCommit).toHaveBeenCalledWith(
{ commitHash: 'sq' },
expect.stringContaining('ingest(fake): j1'),
'raw-sources/c1/fake/s/a.yml',
);
});
feat(cli): add ingest LLM rate-limit governor with paced retries (#261) * feat(cli): add ingest rate limit governor * feat(cli): wire ingest rate-limit config * feat(cli): report provider rate-limit signals * feat(cli): show ingest rate-limit waits * fix(cli): complete rate-limit event coverage * fix(cli): abort ingest provider calls cleanly * fix(cli): propagate ingest cancellation * fix(cli): reject pre-aborted ingest rate-limit waits * fix(cli): honor Claude rate-limit reset waits * fix(cli): retry thrown Codex rate-limit failures * fix(cli): type Claude rate-limit result details * fix(cli): emit ingest rate-limit countdowns from rejected signals * fix(cli): report ai sdk rate-limit header utilization * fix(cli): gate LLM rate-limit retries on the governor budget The AI SDK and Codex runtimes retried 429 / opaque rate-limit failures up to 6-7 times with no backoff when constructed without a RateLimitGovernor (scan, memory, setup) or with pacing disabled, ignoring Retry-After and worsening the limit. The outer retry loop only cooperates with the governor's pause, so without active pacing there is no backoff to apply. Route the retry bound through a single source: RateLimitGovernor .maxRetryAttempts(), which returns retry.maxAttempts when enabled and 1 (no outer retry) when absent or disabled. All three runtimes (ai-sdk, codex, claude-code) now use it, so ingest.rateLimit.retry.maxAttempts genuinely controls attempts and the hard-coded 6 (plus Codex's off-by-one extra attempt) is gone. Backend-native retry (e.g. the AI SDK's maxRetries) still handles transient 429s. Also correct the ktx.yaml docs for maxWaitMs (caps each wait, not the whole run) and maxAttempts, and sync uv.lock ktx-sl/ktx-daemon to 0.9.0.
2026-06-05 12:10:27 +02:00
it('uses the rate-limit governor for work-unit start slots', async () => {
const deps = makeDeps();
const acquireWorkSlot = vi.fn(async () => vi.fn());
const runner = buildRunner(deps, {
settings: {
probeRowCount: 1,
memoryIngestionModel: 'test-model',
workUnitMaxConcurrency: 2,
rateLimitGovernor: { acquireWorkSlot, subscribe: vi.fn(() => vi.fn()) } as never,
},
});
deps.adapter.chunk.mockResolvedValue({
workUnits: [
{ unitKey: 'u1', rawFiles: ['a.yml'], peerFileIndex: [], dependencyPaths: [] },
{ unitKey: 'u2', rawFiles: ['b.yml'], peerFileIndex: [], dependencyPaths: [] },
],
});
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([
['a.yml', 'h1'],
['b.yml', 'h2'],
]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(acquireWorkSlot).toHaveBeenCalledTimes(2);
});
it('passes the job abort signal into rate-limit work-unit slots', async () => {
const deps = makeDeps();
const controller = new AbortController();
const acquireWorkSlot = vi.fn(async () => vi.fn());
const runner = buildRunner(deps, {
settings: {
probeRowCount: 1,
memoryIngestionModel: 'test-model',
workUnitMaxConcurrency: 1,
rateLimitGovernor: { acquireWorkSlot, subscribe: vi.fn(() => vi.fn()) } as never,
},
});
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run(
{
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
},
{ jobId: 'j1', abortSignal: controller.signal, startPhase: () => new TestJobContext('j1', null, async () => undefined, async () => undefined) } as any,
);
expect(acquireWorkSlot).toHaveBeenCalledWith(controller.signal);
});
it('does not convert aborted work-unit agent loops into failed work units', async () => {
const deps = makeDeps();
const controller = new AbortController();
deps.agentRunner.runLoop.mockImplementation(async () => {
controller.abort();
throw new DOMException('Aborted', 'AbortError');
});
const runner = buildRunner(deps, {
settings: {
probeRowCount: 1,
memoryIngestionModel: 'test-model',
workUnitMaxConcurrency: 1,
},
});
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await expect(
runner.run(
{
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
},
{ jobId: 'j1', abortSignal: controller.signal, startPhase: () => new TestJobContext('j1', null, async () => undefined, async () => undefined) } as any,
),
).rejects.toThrow(/Aborted/);
expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1');
expect(deps.reportsRepo.create).not.toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
failedWorkUnits: expect.arrayContaining(['u1']),
}),
}),
);
});
it('emits trace and memory-flow status for rate-limit waits', async () => {
const deps = makeDeps();
let subscriber: ((state: any) => void) | undefined;
const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput());
const runner = buildRunner(deps, {
settings: {
probeRowCount: 1,
memoryIngestionModel: 'test-model',
rateLimitGovernor: {
acquireWorkSlot: vi.fn(async () => vi.fn()),
subscribe: vi.fn((cb: (state: any) => void) => {
subscriber = cb;
return vi.fn();
}),
} as never,
},
});
(runner as any).runInner = async (_job: any, ctx: any) => {
subscriber?.({
kind: 'wait_tick',
provider: 'claude-subscription',
rateLimitType: 'five_hour',
resumeAtMs: 2_000,
remainingMs: 1_000,
});
ctx.memoryFlow.emit({ type: 'report_created', runId: 'run-1' });
return {
runId: 'run-1',
syncId: 'sync-1',
diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 },
workUnitCount: 0,
failedWorkUnits: [],
artifactsWritten: 0,
commitSha: null,
};
};
await runner.run(
{
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
},
{ memoryFlow } as any,
);
expect(memoryFlow.snapshot().events).toContainEqual(
expect.objectContaining({
type: 'rate_limit_wait',
provider: 'claude-subscription',
rateLimitType: 'five_hour',
resumeAtMs: 2_000,
remainingMs: 1_000,
}),
);
});
it('fails before squash when reconciliation leaves a touched wiki page with dangling refs', async () => {
const deps = makeDeps();
let currentToolSession: any = null;
const scopedWiki = {
listPageKeys: vi.fn().mockResolvedValue(['page-a']),
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, key: string) => {
if (key === 'page-a') {
return Promise.resolve({
pageKey: 'page-a',
frontmatter: { summary: 'Page A', usage_mode: 'auto', refs: ['missing-page'] },
content: 'See [[missing-page]].',
});
}
return Promise.resolve(null);
}),
};
deps.wikiService.forWorktree.mockReturnValue(scopedWiki);
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
currentToolSession.actions.push({ target: 'sl', type: 'updated', key: 'orders', detail: 'Orders source' });
}
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
currentToolSession.actions.push({ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' });
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await expect(
runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
}),
).rejects.toThrow(/wiki references target missing page\(s\): page-a -> missing-page/);
expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1');
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
});
it('allows reconciliation to save circular wiki refs once both pages exist', async () => {
const deps = makeDeps();
let currentToolSession: any = null;
const scopedWiki = {
listPageKeys: vi.fn().mockResolvedValue(['page-a', 'page-b']),
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, key: string) => {
if (key === 'page-a') {
return Promise.resolve({
pageKey: 'page-a',
frontmatter: { summary: 'Page A', usage_mode: 'auto', refs: ['page-b'] },
content: 'See [[page-b]].',
});
}
if (key === 'page-b') {
return Promise.resolve({
pageKey: 'page-b',
frontmatter: { summary: 'Page B', usage_mode: 'auto', refs: ['page-a'] },
content: 'See [[page-a]].',
});
}
return Promise.resolve(null);
}),
};
deps.wikiService.forWorktree.mockReturnValue(scopedWiki);
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
currentToolSession.actions.push({ target: 'sl', type: 'updated', key: 'orders', detail: 'Orders source' });
}
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
currentToolSession.actions.push(
{ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' },
{ target: 'wiki', type: 'created', key: 'page-b', detail: 'Page B' },
);
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const result = await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(result.failedWorkUnits).toEqual([]);
expect(deps.gitService.squashMergeIntoMain).toHaveBeenCalled();
expect(deps.runsRepo.markFailed).not.toHaveBeenCalled();
});
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
it('threads target warehouse connection names into WorkUnit and reconcile tool sessions', async () => {
const deps = makeDeps();
const sessions: any[] = [];
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse']);
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
sessions.push(toolSession);
return {
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
toRuntimeTools: vi.fn().mockReturnValue({}),
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.agentRunner.runLoop.mockResolvedValue({ stopReason: 'natural' });
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/notion/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'notion',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect([...sessions[0].allowedConnectionNames].sort()).toEqual(['notion', 'warehouse']);
});
it('reuses document evidence indexing and page triage for document WorkUnits', async () => {
2026-05-10 23:12:26 +02:00
const deps = makeDeps();
deps.adapter.source = 'notion';
deps.adapter.skillNames = ['notion_synthesize'];
deps.adapter.reconcileSkillNames = [];
2026-05-10 23:12:26 +02:00
deps.adapter.evidenceIndexing = 'documents';
deps.adapter.triageSupported = true;
deps.adapter.chunk.mockResolvedValue({
workUnits: [
{ unitKey: 'full', rawFiles: ['pages/full/metadata.json'], dependencyPaths: [], peerFileIndex: [] },
{ unitKey: 'skip', rawFiles: ['pages/skip/metadata.json'], dependencyPaths: [], peerFileIndex: [] },
2026-05-10 23:12:26 +02:00
],
});
deps.diffSetService.compute.mockResolvedValue({
added: ['pages/full/metadata.json', 'pages/skip/metadata.json'],
2026-05-10 23:12:26 +02:00
modified: [],
deleted: [],
unchanged: [],
});
deps.pageTriage.triageRun.mockResolvedValue({
enabled: true,
fullRawPaths: new Set(['pages/full/metadata.json']),
2026-05-10 23:12:26 +02:00
warnings: [],
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([
['pages/full/metadata.json', 'h-full'],
['pages/skip/metadata.json', 'h-skip'],
2026-05-10 23:12:26 +02:00
]),
rawDirInWorktree: 'raw-sources/c1/notion/s',
2026-05-10 23:12:26 +02:00
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const result = await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'notion',
2026-05-10 23:12:26 +02:00
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
const workUnitCalls = deps.agentRunner.runLoop.mock.calls.filter(
([params]) => params.telemetryTags?.operationName === 'ingest-bundle-wu',
);
expect(deps.contextEvidenceIndex.indexStagedDir).toHaveBeenCalled();
expect(deps.pageTriage.triageRun).toHaveBeenCalled();
expect(workUnitCalls).toHaveLength(1);
expect(workUnitCalls[0][0].telemetryTags.unitKey).toBe('full');
expect(result.workUnitCount).toBe(1);
});
it('emits memory-flow source and planning events for bundle ingest', async () => {
const deps = makeDeps();
deps.adapter.chunk.mockResolvedValue({
workUnits: [
{
unitKey: 'u1',
rawFiles: ['a.yml'],
peerFileIndex: ['peer.yml'],
dependencyPaths: ['manifest.yml'],
},
],
eviction: { deletedRawPaths: ['old.yml'] },
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const snapshots: MemoryFlowReplayInput[] = [];
const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput(), {
onChange: (snapshot) => snapshots.push(snapshot),
});
const ctx = new TestJobContext(
'j1',
null,
() => Promise.resolve(),
() => Promise.resolve(),
);
(ctx as any).memoryFlow = memoryFlow;
await runner.run(
{
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
},
ctx,
);
expect(memoryFlow.snapshot()).toMatchObject({
runId: 'run-1',
connectionId: 'c1',
adapter: 'fake',
sourceDir: '/tmp/stage/upload-x',
});
expect(memoryFlow.snapshot().plannedWorkUnits).toEqual([
{
unitKey: 'u1',
rawFiles: ['a.yml'],
peerFileCount: 1,
dependencyCount: 1,
},
]);
expect(memoryFlow.snapshot().events).toEqual(
expect.arrayContaining([
expect.objectContaining({ type: 'source_acquired', adapter: 'fake', trigger: 'upload', fileCount: 1 }),
expect.objectContaining({ type: 'scope_detected', fingerprint: null }),
expect.objectContaining({ type: 'raw_snapshot_written', rawFileCount: 1 }),
expect.objectContaining({ type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }),
expect.objectContaining({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 1 }),
]),
);
expect(snapshots.length).toBeGreaterThan(4);
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
memoryFlow: expect.objectContaining({
metadata: expect.objectContaining({
schemaVersion: 1,
mode: 'full',
origin: 'captured',
timing: 'captured',
}),
events: expect.arrayContaining([
expect.objectContaining({
type: 'source_acquired',
emittedAt: expect.stringMatching(/^\d{4}-\d{2}-\d{2}T/),
}),
]),
}),
}),
}),
);
});
it('emits memory-flow WorkUnit step, candidate action, and finish events', async () => {
const deps = makeDeps();
let currentToolSession: any = null;
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
toRuntimeTools: vi.fn().mockReturnValue({}),
2026-05-10 23:12:26 +02:00
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
// A real tool call drives the live work_unit_step heartbeat.
await params.toolSet.record_verification_ledger.execute(
{ summary: 'Captured order context.', verifiedIdentifiers: [], unverifiedIdentifiers: [] },
{ toolCallId: 'ledger-1', messages: [] },
);
2026-05-10 23:12:26 +02:00
currentToolSession.actions.push({
target: 'wiki',
type: 'created',
key: 'wiki/orders.md',
2026-05-10 23:12:26 +02:00
detail: 'captured order context',
});
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput());
const ctx = new TestJobContext(
'j1',
null,
() => Promise.resolve(),
() => Promise.resolve(),
);
(ctx as any).memoryFlow = memoryFlow;
await runner.run(
{
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
},
ctx,
);
expect(memoryFlow.snapshot().events).toEqual(
expect.arrayContaining([
expect.objectContaining({
type: 'work_unit_started',
unitKey: 'u1',
skills: ['ingest_triage', 'sl_capture', 'wiki_capture'],
2026-05-10 23:12:26 +02:00
}),
expect.objectContaining({ type: 'work_unit_step', unitKey: 'u1', toolCalls: 1 }),
2026-05-10 23:12:26 +02:00
expect.objectContaining({
type: 'candidate_action',
unitKey: 'u1',
target: 'wiki',
action: 'created',
key: 'wiki/orders.md',
2026-05-10 23:12:26 +02:00
}),
expect.objectContaining({ type: 'work_unit_finished', unitKey: 'u1', status: 'success' }),
]),
);
});
it('emits memory-flow gate, saved, provenance, and report events', async () => {
const deps = makeDeps();
let currentToolSession: any = null;
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
toRuntimeTools: vi.fn().mockReturnValue({}),
2026-05-10 23:12:26 +02:00
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
currentToolSession.actions.push({
target: 'sl',
type: 'updated',
key: 'orders',
detail: 'captured gross revenue',
});
}
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
await params.toolSet.record_verification_ledger.execute(
{
summary: 'Reconciliation emits no warehouse identifiers before fallback recording.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
},
{ toolCallId: 'ledger-1', messages: [] },
);
2026-05-10 23:12:26 +02:00
await params.toolSet.emit_conflict_resolution.execute(
{
kind: 'near_duplicate',
artifactKey: 'sl:orders',
detail: 'orders retained as canonical',
flaggedForHuman: false,
},
{ toolCallId: 'conflict-1', messages: [] },
);
await params.toolSet.emit_unmapped_fallback.execute(
{
rawPath: 'a.yml',
reason: 'parse_error',
clarification: 'semantic_not_representable',
2026-05-10 23:12:26 +02:00
fallback: 'flagged',
},
{ toolCallId: 'fallback-1', messages: [] },
);
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput());
const ctx = new TestJobContext(
'j1',
null,
() => Promise.resolve(),
() => Promise.resolve(),
);
(ctx as any).memoryFlow = memoryFlow;
await runner.run(
{
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
},
ctx,
);
expect(memoryFlow.snapshot()).toMatchObject({
reportId: 'report-1',
reportPath: 'report-1',
});
expect(memoryFlow.snapshot().events).toEqual(
expect.arrayContaining([
expect.objectContaining({ type: 'reconciliation_finished', conflictCount: 1, fallbackCount: 1 }),
expect.objectContaining({ type: 'saved', commitSha: 'sq', wikiCount: 0, slCount: 1 }),
expect.objectContaining({ type: 'provenance_recorded', rowCount: 1 }),
expect.objectContaining({ type: 'report_created', runId: 'run-1', reportPath: 'report-1' }),
]),
);
});
it('finishes successful bundle memory-flow runs as done', async () => {
const deps = makeDeps();
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput());
const ctx = new TestJobContext(
'j1',
null,
() => Promise.resolve(),
() => Promise.resolve(),
);
(ctx as any).memoryFlow = memoryFlow;
await runner.run(
{
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
},
ctx,
);
expect(memoryFlow.snapshot().status).toBe('done');
});
it('finishes bundle memory-flow runs with sanitized errors when the runner fails', async () => {
const deps = makeDeps();
const sensitiveMessage = [
'failed to read postgres://user',
':password',
'@localhost:5432/db?api_key=abc',
' token=',
'secret',
].join('');
deps.adapter.detect.mockRejectedValue(new Error(sensitiveMessage));
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput());
const ctx = new TestJobContext(
'j1',
null,
() => Promise.resolve(),
() => Promise.resolve(),
);
(ctx as any).memoryFlow = memoryFlow;
await expect(
runner.run(
{
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
},
ctx,
),
).rejects.toThrow(/failed to read/);
expect(memoryFlow.snapshot()).toMatchObject({
status: 'error',
errors: ['failed to read postgres://[redacted] token=[redacted]'],
});
expect(memoryFlow.snapshot().events).toEqual(
expect.arrayContaining([
expect.objectContaining({ type: 'source_acquired', adapter: 'fake', trigger: 'upload', fileCount: 1 }),
]),
);
});
it('stores memory-flow provenance and transcript summaries in the ingest report body', async () => {
const deps = makeDeps();
deps.toolsetFactory.createIngestWuToolset.mockReturnValue({
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
toRuntimeTools: vi.fn().mockReturnValue({
2026-05-10 23:12:26 +02:00
read_raw_span: {
description: 'read a raw span',
inputSchema: {},
execute: vi.fn().mockResolvedValue('safe excerpt'),
},
wiki_write: {
description: 'write wiki',
inputSchema: {},
execute: vi.fn().mockResolvedValue('written'),
},
}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
await params.toolSet.read_raw_span.execute(
{ path: 'a.yml', startLine: 1, endLine: 2 },
{ toolCallId: 'read-1', messages: [] },
);
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
await params.toolSet.record_verification_ledger.execute(
{
summary: 'Wiki write contains no warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
},
{ toolCallId: 'ledger-1', messages: [] },
);
2026-05-10 23:12:26 +02:00
await params.toolSet.wiki_write.execute(
{ key: 'wiki/a.md', content: 'safe summary' },
2026-05-10 23:12:26 +02:00
{ toolCallId: 'wiki-1', messages: [] },
);
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
provenanceRows: [
expect.objectContaining({
rawPath: 'a.yml',
artifactKind: null,
artifactKey: null,
actionType: 'skipped',
targetConnectionId: null,
}),
],
toolTranscripts: [
{
unitKey: 'u1',
2026-05-10 23:51:24 +02:00
path: '/tmp/ktx-test/run/wu-transcripts/j1/u1.jsonl',
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
toolCallCount: 3,
2026-05-10 23:12:26 +02:00
errorCount: 0,
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
toolNames: ['read_raw_span', 'record_verification_ledger', 'wiki_write'],
2026-05-10 23:12:26 +02:00
},
],
}),
}),
);
});
it('persists WorkUnit unmapped fallback records in the report body', async () => {
const deps = makeDeps();
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
await params.toolSet.record_verification_ledger.execute(
{
summary: 'Unmapped fallback records an unsupported conversion metric without verified warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
},
{ toolCallId: 'ledger-1', messages: [] },
);
2026-05-10 23:12:26 +02:00
await params.toolSet.emit_unmapped_fallback.execute(
{
rawPath: 'a.yml',
reason: 'conversion_metric_unsupported',
fallback: 'flagged',
},
{ toolCallId: 'fallback-1', messages: [] },
);
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
unmappedFallbacks: [
{
rawPath: 'a.yml',
reason: 'conversion_metric_unsupported',
detail: expect.stringContaining('conversion metric'),
2026-05-10 23:12:26 +02:00
fallback: 'flagged',
},
],
}),
}),
);
});
it('persists reconciliation conflict and eviction records in the report body', async () => {
const deps = makeDeps();
deps.diffSetService.compute.mockResolvedValue({
added: [],
modified: [],
deleted: ['views/old_orders.view.lkml'],
unchanged: [],
});
deps.adapter.chunk.mockResolvedValue({
workUnits: [],
eviction: { deletedRawPaths: ['views/old_orders.view.lkml'] },
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references
2026-05-13 13:43:23 +02:00
await params.toolSet.record_verification_ledger.execute(
{
summary: 'Reconciliation records conflict, eviction, and fallback decisions without warehouse identifiers.',
verifiedIdentifiers: [],
unverifiedIdentifiers: [],
},
{ toolCallId: 'ledger-1', messages: [] },
);
2026-05-10 23:12:26 +02:00
await params.toolSet.emit_conflict_resolution.execute(
{
kind: 'near_duplicate',
artifactKey: 'sl:orders',
detail: 'orders and old_orders overlapped; orders is retained as canonical',
flaggedForHuman: false,
},
{ toolCallId: 'conflict-1', messages: [] },
);
await params.toolSet.emit_eviction_decision.execute(
{
rawPath: 'views/old_orders.view.lkml',
artifactKind: 'sl',
artifactKey: 'old_orders',
action: 'removed',
reason: 'raw source disappeared in this sync',
},
{ toolCallId: 'eviction-1', messages: [] },
);
await params.toolSet.emit_unmapped_fallback.execute(
{
rawPath: 'cards/untranslated.json',
reason: 'parse_error',
clarification: 'metabase_sql_untranslated',
2026-05-10 23:12:26 +02:00
fallback: 'flagged',
},
{ toolCallId: 'fallback-1', messages: [] },
);
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['cards/untranslated.json', 'h-card']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
conflictsResolved: [
{
kind: 'near_duplicate',
artifactKey: 'sl:orders',
detail: 'orders and old_orders overlapped; orders is retained as canonical',
flaggedForHuman: false,
},
],
evictionsApplied: [
{
rawPath: 'views/old_orders.view.lkml',
artifactKind: 'sl',
artifactKey: 'old_orders',
action: 'removed',
reason: 'raw source disappeared in this sync',
},
],
unmappedFallbacks: [
{
rawPath: 'cards/untranslated.json',
reason: 'parse_error',
detail: expect.stringContaining('metabase_sql_untranslated'),
2026-05-10 23:12:26 +02:00
fallback: 'flagged',
},
],
}),
}),
);
});
it('persists reconciliation artifact resolutions as provenance rows', async () => {
const deps = makeDeps();
deps.diffSetService.compute.mockResolvedValue({
added: [],
modified: [],
deleted: ['looks/20.json'],
unchanged: ['explores/b2b/sales_pipeline.json'],
});
deps.adapter.chunk.mockResolvedValue({
workUnits: [],
eviction: { deletedRawPaths: ['looks/20.json'] },
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
await params.toolSet.emit_artifact_resolution.execute(
{
rawPath: 'explores/b2b/sales_pipeline.json',
artifactKind: 'sl',
artifactKey: 'looker__b2b__sales_pipeline',
actionType: 'subsumed',
reason: 'File adapter source b2b__sales_pipeline is canonical.',
},
{ toolCallId: 'resolution-1', messages: [] },
);
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['explores/b2b/sales_pipeline.json', 'h-explore']]),
rawDirInWorktree: 'raw-sources/c1/looker/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'looker',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(deps.provenanceRepo.insertMany).toHaveBeenCalledWith(
expect.arrayContaining([
expect.objectContaining({
rawPath: 'explores/b2b/sales_pipeline.json',
artifactKind: 'sl',
artifactKey: 'looker__b2b__sales_pipeline',
actionType: 'subsumed',
}),
]),
);
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
artifactResolutions: [
{
rawPath: 'explores/b2b/sales_pipeline.json',
artifactKind: 'sl',
artifactKey: 'looker__b2b__sales_pipeline',
actionType: 'subsumed',
reason: 'File adapter source b2b__sales_pipeline is canonical.',
},
],
}),
}),
);
});
it('runs manual override reconciliation from the prior report snapshot and marks the prior report superseded', async () => {
2026-05-10 23:51:24 +02:00
const tempRoot = await mkdtemp(join(tmpdir(), 'ktx-override-'));
2026-05-10 23:12:26 +02:00
const deps = makeDeps();
deps.reportsRepo.findByJobId.mockResolvedValue({
id: 'report-old',
runId: 'run-old',
jobId: 'job-old',
connectionId: 'c1',
sourceKey: 'fake',
createdAt: '2026-04-27T10:00:00.000Z',
body: {
syncId: '2026-04-27-100000-job-old',
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
commitSha: 'old-sha',
workUnits: [
{
unitKey: 'wu-orders',
rawFiles: ['a.yml'],
status: 'success',
actions: [
{
target: 'sl',
type: 'updated',
key: 'orders',
detail: 'captured gross_revenue as orders.gross_revenue',
},
],
touchedSlSources: ['orders'],
},
],
failedWorkUnits: [],
reconciliationSkipped: false,
conflictsResolved: [
{
kind: 'definitional_contradiction',
contestedKey: 'gross_revenue',
artifactKey: 'orders.gross_revenue',
detail: 'billing and orders disagree',
flaggedForHuman: true,
},
],
evictionsApplied: [],
unmappedFallbacks: [],
evictionInputs: [],
unresolvedCards: [],
supersededBy: null,
overrideOf: null,
},
});
deps.gitService.listFilesAtHead.mockResolvedValue(['raw-sources/c1/fake/2026-04-27-100000-job-old/a.yml']);
deps.gitService.getFileAtCommit.mockResolvedValue('name: orders\n');
deps.diffSetService.compute.mockResolvedValue({ added: [], modified: [], deleted: [], unchanged: ['a.yml'] });
deps.agentRunner.runLoop.mockImplementation(async (args: any) => {
await args.toolSet.emit_conflict_resolution.execute(
{
kind: 'definitional_contradiction',
contestedKey: 'gross_revenue',
artifactKey: 'orders.gross_revenue',
detail: 'canonical pin applied',
flaggedForHuman: false,
},
{ toolCallId: 'tc-1', messages: [] },
);
return { stopReason: 'natural' };
});
const runner = new IngestBundleRunner({
...(buildRunner(deps) as any).deps,
storage: {
homeDir: tempRoot,
2026-05-10 23:51:24 +02:00
systemGitAuthor: { name: 'KTX Test', email: 'system@ktx.local' },
autoCommit: true,
2026-05-10 23:12:26 +02:00
resolveUploadDir: (uploadId: string) => join(tempRoot, 'ingest-uploads', uploadId),
resolvePullDir: (jobId: string) => join(tempRoot, 'ingest-pulls', jobId),
resolveTranscriptDir: (jobId: string) => join(tempRoot, 'run', 'wu-transcripts', jobId),
},
});
await runner.run({
jobId: 'job-new',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'manual_override',
bundleRef: { kind: 'override', priorJobId: 'job-old' },
});
await expect(readFile(join(tempRoot, 'ingest-pulls/job-new/a.yml'), 'utf-8')).resolves.toBe('name: orders\n');
expect(deps.adapter.chunk).not.toHaveBeenCalled();
expect(deps.agentRunner.runLoop).toHaveBeenCalled();
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
jobId: 'job-new',
body: expect.objectContaining({
overrideOf: 'job-old',
supersededBy: null,
conflictsResolved: [
expect.objectContaining({
contestedKey: 'gross_revenue',
flaggedForHuman: false,
}),
],
}),
}),
);
expect(deps.reportsRepo.markSuperseded).toHaveBeenCalledWith('job-old', 'job-new');
await rm(tempRoot, { recursive: true, force: true });
});
it('passes connection canonical pins into each WorkUnit system prompt', async () => {
const deps = makeDeps();
deps.adapter.chunk.mockResolvedValue({
workUnits: [
{
unitKey: 'wu-orders',
rawFiles: ['cards/orders.yml'],
peerFileIndex: [],
dependencyPaths: [],
},
],
});
deps.canonicalPins.listPins.mockResolvedValue([
{
contestedKey: 'gross_revenue',
canonicalArtifactKey: 'finance.gross_revenue',
pinnedAt: '2026-04-27T12:00:00.000Z',
pinnedBy: 'user-1',
reason: 'finance owns revenue definitions',
},
]);
deps.agentRunner.runLoop.mockResolvedValue({ stopReason: 'natural' });
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['cards/orders.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
const workUnitCall = deps.agentRunner.runLoop.mock.calls.find(
([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu',
);
expect(workUnitCall?.[0].systemPrompt).toContain('<canonical_pins>');
expect(workUnitCall?.[0].systemPrompt).toContain('contestedKey: gross_revenue');
expect(workUnitCall?.[0].systemPrompt).toContain('canonicalArtifactKey: finance.gross_revenue');
expect(deps.canonicalPins.listPins).toHaveBeenCalledTimes(1);
expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['c1']);
});
it('builds WorkUnit SL index and canonical pins across adapter target connections', async () => {
const deps = makeDeps();
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']);
deps.adapter.chunk.mockResolvedValue({
workUnits: [
{
unitKey: 'looker-explore-b2b-orders',
rawFiles: ['explores/b2b/orders.json'],
peerFileIndex: [],
dependencyPaths: [],
},
],
});
deps.canonicalPins.listPins.mockResolvedValue([
{
contestedKey: 'gross_revenue',
canonicalArtifactKey: 'finance.gross_revenue',
pinnedAt: '2026-05-01T12:00:00.000Z',
pinnedBy: 'user-1',
reason: 'finance owns revenue definitions',
},
]);
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['explores/b2b/orders.json', 'h1']]),
rawDirInWorktree: 'raw-sources/looker-run/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'looker-run',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
const workUnitCall = deps.agentRunner.runLoop.mock.calls.find(
([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu',
);
expect(deps.adapter.listTargetConnectionIds).toHaveBeenCalledWith('/tmp/stage/upload-x');
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('looker-run');
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2');
2026-05-10 23:12:26 +02:00
expect(workUnitCall?.[0].userPrompt).toContain('looker__orders');
expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['looker-run', 'warehouse-2']);
});
it('syncs wiki refs, reindexes, and records provenance on SL target connections', async () => {
const deps = makeDeps();
let currentToolSession: any = null;
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']);
deps.wikiService.readPage = vi.fn().mockResolvedValue({
frontmatter: { sl_refs: ['looker__b2b__sales_pipeline.arr'] },
});
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
fix(context): merge overlay columns onto manifest columns by name (#94) * fix(context): merge overlay columns onto manifest columns by name composeOverlay was appending overlay columns to the manifest column list, producing duplicate entries when dbt/metabase overlays declared a column just to attach descriptions. The duplicates carried no `type`, so the pydantic SourceDefinition rejected them at semantic-query time and broke `ktx sl query` for every overlay-backed measure. Now overlay columns match base columns by name (case-insensitive): same-name entries merge onto the manifest (overlay fields win, type/role fall back to the base, descriptions merge per source key) and only new names append. * refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract Overlay sources now have two distinct collections: `columns:` for computed columns (requiring `expr` + `type`) and `column_overrides:` for metadata patches to inherited manifest columns. Composing or loading an overlay that mixes the two — or references an unknown column — fails with a typed error. Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` / `toResolvedWire` as the strict shape sent to the Python engine, and add a schema contract test that diffs Zod against the Pydantic JSON schema dumped by `python -m semantic_layer dump-schema`. `SourceDefinition` is now `extra="forbid"` on the Python side. `loadAllSources` surfaces per-file load errors instead of swallowing them, so validation/query paths can report manifest shard parse failures. * fix(context): make scan description generation resilient and quiet A transient sampleTable failure during ingest used to take out every table in a connection: generateTableDescription returned a hardcoded 'Table not found' string into descriptions.ai, and KtxDescriptionGenerator was constructed without a logger, so the failure left no trail anywhere. - sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff, honouring KtxScanContext.signal via a new KtxAbortedError. - On retry exhaustion or missing capability, table generation falls back to a metadata-only prompt built from column name / native type / comment / rawDescriptions. The column path follows the same rule -- call the LLM when any of samples or rawDescriptions are available; skip only when both are absent. - Logger is now threaded from KtxScanContext into the generator. Failures emit structured KtxScanWarning entries (new description_fallback_used code, plus existing sampling_failed / enrichment_failed / connector_capability_missing). ktx scan groups warnings by code so a batch of identical failures collapses to one summary line plus sample. - Returns null on failure instead of the 'Table not found' sentinel; the manifest writer's existing guard already skips empty descriptions, so schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS already strips stale 'ai' on merge, so existing YAML clears on next run. Also suppress AI SDK v6 'system in messages' warning: pull system messages out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages helper and pass them top-level to generateText (preserves cacheControl providerOptions on the SystemModelMessage). Agent-runner's local splitSystemPromptMessages dedupes onto the shared helper. * test(docs): align examples-docs assertions with revamped docs PR #103 (setup/guide doc revamp) reworded several CLI examples and connection labels; the assertions in scripts/examples-docs.test.mjs still referenced the pre-revamp wording and were failing in CI on main. Update the regexes to match the post-revamp content: - drop the `--json` flag from the sl-query example expectation - move the `Driver:` / `Status: ok` probe to the connection reference, which is where that output now lives (driver id is lowercase `postgres`, not the display name `PostgreSQL`) - drop the obsolete `Install \`uv\`...` troubleshooting line - accept `<connectionId>` everywhere; the docs no longer use the hyphenated `<connection-id>` form - match the `warehouse` connection id used in the quickstart instead of the `postgres-warehouse` id only used in the README and setup ref * fix(sl): skip TS/Python schema contract test when uv is unavailable The TypeScript checks CI job does not install uv or Python, so the module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw ENOENT and failed the suite. Wrap the schema dump in a try/catch and guard the describe block with `describe.skipIf` so the test skips in environments without uv. Local dev and any CI job that has uv on PATH still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
2026-05-10 23:12:26 +02:00
);
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
currentToolSession.actions.push(
{
target: 'wiki',
type: 'created',
key: 'wiki/global/pipeline.md',
2026-05-10 23:12:26 +02:00
detail: 'Pipeline article',
},
{
target: 'sl',
type: 'created',
key: 'looker__b2b__sales_pipeline',
detail: 'Created warehouse source',
targetConnectionId: 'warehouse-2',
},
);
addTouchedSlSource(currentToolSession.touchedSlSources, 'warehouse-2', 'looker__b2b__sales_pipeline');
}
return { stopReason: 'natural' };
});
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
feat: add claude-code llm backend with runtime port (#115) * docs: revise claude-code ingest backend spec * docs: keep claude-code spec focused on ingest * docs: expand claude-code spec to full llm parity * Refine claude-code backend spec after adversarial review iteration 1 * Refine claude-code backend spec after adversarial review iteration 2 * Refine claude-code backend spec after adversarial review iteration 3 * feat: recognize claude-code llm backend * feat: add ktx llm runtime port * feat: add claude-code llm runtime * feat: route non-agent llm calls through runtime * feat: run ingest agents through llm runtime * feat: support claude-code setup and status * test: verify claude-code backend runtime * docs: add claude-code backend v1 runtime plan * fix: close claude-code runtime isolation checks * fix: warn on claude-code prompt caching during setup * chore: verify claude-code v1 closure * docs: add claude-code backend v1 isolation closure plan * fix: update claude-code ingest setup guidance * docs: add claude-code backend v1 ingest guidance closure plan * docs: align claude-code isolation spec with sdk metadata * test: cover claude-code host discovery metadata * fix: tolerate claude-code host discovery metadata * docs: clarify claude-code host discovery metadata * docs: add claude-code auth-probe isolation fix plan * chore: prepare kaelio ktx rc1 release * chore: add semantic release workflow * fix: unblock ci checks * chore(release): 0.1.0-rc.1 * feat: add Claude Code model selection to setup * fix: keep git maintenance attached in local repos
2026-05-16 12:06:34 +02:00
toRuntimeTools: vi.fn().mockReturnValue({}),
2026-05-10 23:12:26 +02:00
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
currentHashes: new Map([['a.yml', 'h1']]),
2026-05-10 23:12:26 +02:00
rawDirInWorktree: 'raw-sources/looker-run/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'looker-run',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(deps.knowledgeSlRefs.syncFromWiki).toHaveBeenCalledWith({
wikiPageKey: 'wiki/global/pipeline.md',
2026-05-10 23:12:26 +02:00
wikiScope: 'GLOBAL',
wikiScopeId: null,
refs: [{ connectionId: 'warehouse-2', sourceName: 'looker__b2b__sales_pipeline' }],
});
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2');
expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [{ name: 'warehouse-2_source' }]);
expect(deps.provenanceRepo.insertMany).toHaveBeenCalledWith(
expect.arrayContaining([
expect.objectContaining({
connectionId: 'looker-run',
targetConnectionId: 'warehouse-2',
artifactKind: 'sl',
artifactKey: 'looker__b2b__sales_pipeline',
}),
expect.objectContaining({
connectionId: 'looker-run',
targetConnectionId: null,
artifactKind: 'wiki',
artifactKey: 'wiki/global/pipeline.md',
2026-05-10 23:12:26 +02:00
}),
]),
);
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
workUnits: [
expect.objectContaining({
touchedSlSources: [{ connectionId: 'warehouse-2', sourceName: 'looker__b2b__sales_pipeline' }],
}),
],
provenanceRows: expect.arrayContaining([
expect.objectContaining({
artifactKind: 'sl',
artifactKey: 'looker__b2b__sales_pipeline',
targetConnectionId: 'warehouse-2',
}),
]),
}),
}),
);
});
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
it('runs adapter finalization before squash, records the outcome, and reindexes touched sources', async () => {
2026-05-10 23:12:26 +02:00
const deps = makeDeps();
deps.adapter.source = 'metricflow';
deps.registry.get.mockReturnValue(deps.adapter);
deps.adapter.chunk.mockResolvedValue({
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
workUnits: [],
2026-05-10 23:12:26 +02:00
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
});
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']);
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
deps.adapter.finalize = vi.fn().mockResolvedValue({
result: { sourcesTouched: 1 },
warnings: ['kept going'],
errors: [],
touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
changedWikiPageKeys: [],
actions: [
{
target: 'sl',
type: 'updated',
key: 'orders',
targetConnectionId: 'warehouse-2',
detail: 'Finalized orders usage',
rawPaths: ['semantic_models.yml'],
},
],
});
2026-05-10 23:12:26 +02:00
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
fix(context): merge overlay columns onto manifest columns by name (#94) * fix(context): merge overlay columns onto manifest columns by name composeOverlay was appending overlay columns to the manifest column list, producing duplicate entries when dbt/metabase overlays declared a column just to attach descriptions. The duplicates carried no `type`, so the pydantic SourceDefinition rejected them at semantic-query time and broke `ktx sl query` for every overlay-backed measure. Now overlay columns match base columns by name (case-insensitive): same-name entries merge onto the manifest (overlay fields win, type/role fall back to the base, descriptions merge per source key) and only new names append. * refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract Overlay sources now have two distinct collections: `columns:` for computed columns (requiring `expr` + `type`) and `column_overrides:` for metadata patches to inherited manifest columns. Composing or loading an overlay that mixes the two — or references an unknown column — fails with a typed error. Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` / `toResolvedWire` as the strict shape sent to the Python engine, and add a schema contract test that diffs Zod against the Pydantic JSON schema dumped by `python -m semantic_layer dump-schema`. `SourceDefinition` is now `extra="forbid"` on the Python side. `loadAllSources` surfaces per-file load errors instead of swallowing them, so validation/query paths can report manifest shard parse failures. * fix(context): make scan description generation resilient and quiet A transient sampleTable failure during ingest used to take out every table in a connection: generateTableDescription returned a hardcoded 'Table not found' string into descriptions.ai, and KtxDescriptionGenerator was constructed without a logger, so the failure left no trail anywhere. - sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff, honouring KtxScanContext.signal via a new KtxAbortedError. - On retry exhaustion or missing capability, table generation falls back to a metadata-only prompt built from column name / native type / comment / rawDescriptions. The column path follows the same rule -- call the LLM when any of samples or rawDescriptions are available; skip only when both are absent. - Logger is now threaded from KtxScanContext into the generator. Failures emit structured KtxScanWarning entries (new description_fallback_used code, plus existing sampling_failed / enrichment_failed / connector_capability_missing). ktx scan groups warnings by code so a batch of identical failures collapses to one summary line plus sample. - Returns null on failure instead of the 'Table not found' sentinel; the manifest writer's existing guard already skips empty descriptions, so schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS already strips stale 'ai' on merge, so existing YAML clears on next run. Also suppress AI SDK v6 'system in messages' warning: pull system messages out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages helper and pass them top-level to generateText (preserves cacheControl providerOptions on the SystemModelMessage). Agent-runner's local splitSystemPromptMessages dedupes onto the shared helper. * test(docs): align examples-docs assertions with revamped docs PR #103 (setup/guide doc revamp) reworded several CLI examples and connection labels; the assertions in scripts/examples-docs.test.mjs still referenced the pre-revamp wording and were failing in CI on main. Update the regexes to match the post-revamp content: - drop the `--json` flag from the sl-query example expectation - move the `Driver:` / `Status: ok` probe to the connection reference, which is where that output now lives (driver id is lowercase `postgres`, not the display name `PostgreSQL`) - drop the obsolete `Install \`uv\`...` troubleshooting line - accept `<connectionId>` everywhere; the docs no longer use the hyphenated `<connection-id>` form - match the `warehouse` connection id used in the quickstart instead of the `postgres-warehouse` id only used in the README and setup ref * fix(sl): skip TS/Python schema contract test when uv is unavailable The TypeScript checks CI job does not install uv or Python, so the module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw ENOENT and failed the suite. Wrap the schema dump in a try/catch and guard the describe block with `describe.skipIf` so the test skips in environments without uv. Local dev and any CI job that has uv on PATH still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
2026-05-10 23:12:26 +02:00
);
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
let head = 'pre-finalization';
const git = {
revParseHead: vi.fn(async () => head),
commitFiles: vi.fn().mockImplementation(async (paths: string[]) => {
if (paths.includes('semantic-layer/warehouse-2/orders.yaml')) {
head = 'post-finalization';
return { created: true, commitHash: 'finalization-sha' };
}
return { created: true, commitHash: head };
}),
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'post-finalization' }),
resetHardTo: vi.fn(),
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
await writeFile(patchPath, '', 'utf-8');
2026-05-10 23:12:26 +02:00
}),
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
applyPatchFile3WayIndex: vi.fn(),
diffNameStatus: vi.fn().mockImplementation(async (from: string, to: string) =>
from === 'pre-finalization' && to === 'post-finalization'
? [{ status: 'M', path: 'semantic-layer/warehouse-2/orders.yaml' }]
: [],
),
changedPaths: vi.fn().mockResolvedValue(['semantic-layer/warehouse-2/orders.yaml']),
2026-05-10 23:12:26 +02:00
};
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
deps.sessionWorktreeService.create.mockResolvedValue({
chatId: 'j1',
workdir: '/tmp/wt',
branch: 'session/j1',
baseSha: 'b',
createdAt: new Date(),
git,
config: {},
});
const runner = buildRunner(deps);
2026-05-10 23:12:26 +02:00
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['semantic_models.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/metricflow/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'metricflow',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
expect(deps.adapter.finalize).toHaveBeenCalledWith(
expect.objectContaining({
connectionId: 'c1',
sourceKey: 'metricflow',
syncId: expect.any(String),
jobId: 'j1',
runId: 'run-1',
workdir: '/tmp/wt',
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
}),
);
2026-05-10 23:12:26 +02:00
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
finalization: expect.objectContaining({
2026-05-10 23:12:26 +02:00
sourceKey: 'metricflow',
status: 'success',
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
commitSha: 'finalization-sha',
touchedPaths: ['semantic-layer/warehouse-2/orders.yaml'],
derivedTouchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
declaredTouchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
actions: [expect.objectContaining({ key: 'orders' })],
}),
2026-05-10 23:12:26 +02:00
}),
}),
);
expect(deps.semanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse-2');
expect(deps.slSearchService.indexSources).toHaveBeenCalledWith('warehouse-2', [{ name: 'warehouse-2_source' }]);
expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
});
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
it('includes finalization actions in memory-flow saved counts', async () => {
const deps = makeDeps();
deps.adapter.source = 'historic-sql';
deps.registry.get.mockReturnValue(deps.adapter);
deps.adapter.chunk.mockResolvedValue({
workUnits: [
{
unitKey: 'historic-sql-table-public-orders',
rawFiles: ['tables/public/orders.json'],
peerFileIndex: [],
dependencyPaths: [],
},
],
});
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
deps.adapter.finalize = vi.fn().mockResolvedValue({
warnings: [],
errors: [],
touchedSources: [],
changedWikiPageKeys: [],
actions: [
{ target: 'sl', type: 'updated', key: 'orders', detail: 'Merged usage' },
{ target: 'sl', type: 'updated', key: 'customers', detail: 'Merged usage' },
{ target: 'wiki', type: 'created', key: 'historic-sql-orders', detail: 'Projected pattern' },
{ target: 'wiki', type: 'updated', key: 'historic-sql-customers', detail: 'Projected pattern' },
],
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['tables/public/orders.json', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/historic-sql/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput());
await runner.run(
{
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'historic-sql',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
},
{
jobId: 'j1',
memoryFlow,
startPhase: () => new TestJobContext('j1', null, () => Promise.resolve(), () => Promise.resolve()),
},
);
expect(memoryFlow.snapshot().events).toContainEqual(
expect.objectContaining({
type: 'saved',
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
wikiCount: 2,
slCount: 2,
}),
);
});
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
it('marks finalization infrastructure failure as failed and preserves worktree cleanup state', async () => {
2026-05-10 23:12:26 +02:00
const deps = makeDeps();
deps.adapter.source = 'metricflow';
deps.registry.get.mockReturnValue(deps.adapter);
deps.adapter.chunk.mockResolvedValue({
workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }],
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
});
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
deps.adapter.finalize = vi.fn().mockRejectedValue(new Error('worktree write failed'));
const runner = buildRunner(deps);
2026-05-10 23:12:26 +02:00
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['semantic_models.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/metricflow/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await expect(
runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'metricflow',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
}),
).rejects.toThrow('worktree write failed');
expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1');
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'crash');
});
feat(ingest): adapter-owned finalization replaces post-processor escape hatch (#136) * Refine adapter-owned ingest finalization design after adversarial review iteration 1 * Refine adapter-owned ingest finalization design after adversarial review iteration 2 * Refine adapter-owned ingest finalization design after adversarial review iteration 3 * Implement adapter-owned ingest finalization v1 Moves finalization from runner-owned post-processors into typed SourceAdapter.finalize() contracts. Adds finalization report schema, scope derivation, override replay context, and migrates historic-SQL projection. Removes IngestBundlePostProcessorPort wiring and HistoricSqlProjectionPostProcessor. * feat(ingest): export finalization adapter contract types * test(ingest): exercise historic sql finalization locally * docs(plans): add adapter-owned finalization v1 closure plan * fix(setup): unblock clean Linux installs and add enabled_tables allowlist - Pin managed Python runtime to 3.13 via `uv venv --python 3.13` so installs don't pick the system 3.12 on Ubuntu 24.04 and fail at wheel install. - Sanitize NO_PROXY/no_proxy for the daemon child process — drop IPv6 CIDR entries that httpx rejects with InvalidURL (OrbStack injects these by default). - Add `enabled_tables` allowlist on warehouse connections (zod schema + live-database introspection filter) to scope ingest to specific tables. - Add `getting-started/troubleshooting-linux` docs page covering the Python 3.13 prerequisite, IPv6 proxy gotcha, and a minimal working recipe; link it from the quickstart troubleshooting table and the llms-docs map. - Make docs-site origin overridable via `KTX_DOCS_ORIGIN` so local builds can serve under host.docker.internal. * Move docs changes to specs repo * fix(cli): keep managed runtime python version private * Deduplicate enabled tables filtering
2026-05-20 14:17:10 +02:00
it('reports finalization actions excluded from provenance when raw paths are not defensible', async () => {
const deps = makeDeps();
deps.adapter.finalize = vi.fn().mockResolvedValue({
warnings: [],
errors: [],
touchedSources: [],
changedWikiPageKeys: [],
actions: [
{ target: 'wiki', type: 'updated', key: 'historic-sql-pattern', detail: 'No raw path' },
{ target: 'sl', type: 'updated', key: 'orders', detail: 'Invalid raw path', rawPaths: ['missing.json'] },
],
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['current.json', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
finalization: expect.objectContaining({
provenanceExclusions: [
expect.objectContaining({ reason: 'missing_raw_paths' }),
expect.objectContaining({ reason: 'raw_path_not_defensible', invalidRawPaths: ['missing.json'] }),
],
}),
}),
}),
);
expect(deps.provenanceRepo.insertMany).not.toHaveBeenCalledWith(
expect.arrayContaining([expect.objectContaining({ rawPath: 'missing.json' })]),
);
});
it('passes explicit override replay metadata and no current work unit outcomes', async () => {
const deps = makeDeps();
deps.reportsRepo.findByJobId.mockResolvedValue({
id: 'prior-report',
runId: 'prior-run',
jobId: 'prior-job',
connectionId: 'c1',
sourceKey: 'fake',
createdAt: '2026-05-18T00:00:00.000Z',
body: {
status: 'completed',
syncId: 'prior-sync',
diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 },
commitSha: 'prior-sha',
workUnits: [
{
unitKey: 'prior-unit',
rawFiles: ['prior.json'],
status: 'success',
actions: [{ target: 'wiki', type: 'created', key: 'prior', detail: 'prior' }],
touchedSlSources: [],
},
],
failedWorkUnits: [],
reconciliationSkipped: false,
conflictsResolved: [],
evictionsApplied: [
{
rawPath: 'do-not-replay.json',
artifactKind: 'wiki',
artifactKey: 'old',
action: 'removed',
reason: 'prior',
},
],
unmappedFallbacks: [],
artifactResolutions: [],
evictionInputs: ['evicted-from-prior-report.json'],
unresolvedCards: [],
supersededBy: null,
overrideOf: null,
provenanceRows: [],
toolTranscripts: [],
},
});
deps.adapter.finalize = vi.fn().mockResolvedValue({
warnings: [],
errors: [],
touchedSources: [],
changedWikiPageKeys: [],
actions: [],
});
deps.gitService.listFilesAtHead.mockResolvedValue(['raw-sources/c1/fake/prior-sync/prior.json']);
deps.gitService.getFileAtCommit.mockResolvedValue('{"id":1}\n');
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['prior.json', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/prior-sync',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/prior');
await runner.run({
jobId: 'override-job',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'manual_override',
bundleRef: { kind: 'override', priorJobId: 'prior-job' },
});
expect(deps.adapter.finalize).toHaveBeenCalledWith(
expect.objectContaining({
workUnitOutcomes: [],
overrideReplay: {
priorJobId: 'prior-job',
priorRunId: 'prior-run',
priorSyncId: 'prior-sync',
evictionRawPaths: ['evicted-from-prior-report.json'],
},
}),
);
});
2026-05-10 23:12:26 +02:00
it('includes existing global wiki pages in WorkUnit prompts', async () => {
const deps = makeDeps();
deps.knowledgeIndex.listPagesForUser.mockResolvedValue([
{
page_key: 'revenue-recognition',
summary: 'Recognize revenue net of refunds after fulfillment.',
scope: 'GLOBAL',
scope_id: null,
},
]);
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['cards/orders.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
const workUnitCall = deps.agentRunner.runLoop.mock.calls.find(
([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu',
);
expect(workUnitCall?.[0].userPrompt).toContain('## Wiki Pages');
2026-05-10 23:12:26 +02:00
expect(workUnitCall?.[0].userPrompt).toContain(
'- revenue-recognition: Recognize revenue net of refunds after fulfillment.',
);
expect(deps.knowledgeIndex.listPagesForUser).toHaveBeenCalledWith('system');
});
it('includes manifest-backed target sources in WorkUnit prompts', async () => {
const deps = makeDeps();
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['postgres-warehouse']);
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
fix(context): merge overlay columns onto manifest columns by name (#94) * fix(context): merge overlay columns onto manifest columns by name composeOverlay was appending overlay columns to the manifest column list, producing duplicate entries when dbt/metabase overlays declared a column just to attach descriptions. The duplicates carried no `type`, so the pydantic SourceDefinition rejected them at semantic-query time and broke `ktx sl query` for every overlay-backed measure. Now overlay columns match base columns by name (case-insensitive): same-name entries merge onto the manifest (overlay fields win, type/role fall back to the base, descriptions merge per source key) and only new names append. * refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract Overlay sources now have two distinct collections: `columns:` for computed columns (requiring `expr` + `type`) and `column_overrides:` for metadata patches to inherited manifest columns. Composing or loading an overlay that mixes the two — or references an unknown column — fails with a typed error. Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` / `toResolvedWire` as the strict shape sent to the Python engine, and add a schema contract test that diffs Zod against the Pydantic JSON schema dumped by `python -m semantic_layer dump-schema`. `SourceDefinition` is now `extra="forbid"` on the Python side. `loadAllSources` surfaces per-file load errors instead of swallowing them, so validation/query paths can report manifest shard parse failures. * fix(context): make scan description generation resilient and quiet A transient sampleTable failure during ingest used to take out every table in a connection: generateTableDescription returned a hardcoded 'Table not found' string into descriptions.ai, and KtxDescriptionGenerator was constructed without a logger, so the failure left no trail anywhere. - sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff, honouring KtxScanContext.signal via a new KtxAbortedError. - On retry exhaustion or missing capability, table generation falls back to a metadata-only prompt built from column name / native type / comment / rawDescriptions. The column path follows the same rule -- call the LLM when any of samples or rawDescriptions are available; skip only when both are absent. - Logger is now threaded from KtxScanContext into the generator. Failures emit structured KtxScanWarning entries (new description_fallback_used code, plus existing sampling_failed / enrichment_failed / connector_capability_missing). ktx scan groups warnings by code so a batch of identical failures collapses to one summary line plus sample. - Returns null on failure instead of the 'Table not found' sentinel; the manifest writer's existing guard already skips empty descriptions, so schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS already strips stale 'ai' on merge, so existing YAML clears on next run. Also suppress AI SDK v6 'system in messages' warning: pull system messages out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages helper and pass them top-level to generateText (preserves cacheControl providerOptions on the SystemModelMessage). Agent-runner's local splitSystemPromptMessages dedupes onto the shared helper. * test(docs): align examples-docs assertions with revamped docs PR #103 (setup/guide doc revamp) reworded several CLI examples and connection labels; the assertions in scripts/examples-docs.test.mjs still referenced the pre-revamp wording and were failing in CI on main. Update the regexes to match the post-revamp content: - drop the `--json` flag from the sl-query example expectation - move the `Driver:` / `Status: ok` probe to the connection reference, which is where that output now lives (driver id is lowercase `postgres`, not the display name `PostgreSQL`) - drop the obsolete `Install \`uv\`...` troubleshooting line - accept `<connectionId>` everywhere; the docs no longer use the hyphenated `<connection-id>` form - match the `warehouse` connection id used in the quickstart instead of the `postgres-warehouse` id only used in the README and setup ref * fix(sl): skip TS/Python schema contract test when uv is unavailable The TypeScript checks CI job does not install uv or Python, so the module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw ENOENT and failed the suite. Wrap the schema dump in a try/catch and guard the describe block with `describe.skipIf` so the test skips in environments without uv. Local dev and any CI job that has uv on PATH still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
Promise.resolve({
sources: connectionId === 'postgres-warehouse' ? [{ name: 'stg_accounts' }] : [],
loadErrors: [],
}),
);
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['models/schema.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/dbt-main/dbt/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'dbt-main',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
const workUnitCall = deps.agentRunner.runLoop.mock.calls.find(
([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu',
);
expect(workUnitCall?.[0].userPrompt).toContain('## postgres-warehouse');
expect(workUnitCall?.[0].userPrompt).toContain('stg_accounts');
expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['dbt-main', 'postgres-warehouse']);
});
it('does not resolve qualified fallback table refs by source name alone', async () => {
const deps = makeDeps();
fix(context): merge overlay columns onto manifest columns by name (#94) * fix(context): merge overlay columns onto manifest columns by name composeOverlay was appending overlay columns to the manifest column list, producing duplicate entries when dbt/metabase overlays declared a column just to attach descriptions. The duplicates carried no `type`, so the pydantic SourceDefinition rejected them at semantic-query time and broke `ktx sl query` for every overlay-backed measure. Now overlay columns match base columns by name (case-insensitive): same-name entries merge onto the manifest (overlay fields win, type/role fall back to the base, descriptions merge per source key) and only new names append. * refactor(sl): split overlay columns from column_overrides and enforce TS/Python wire contract Overlay sources now have two distinct collections: `columns:` for computed columns (requiring `expr` + `type`) and `column_overrides:` for metadata patches to inherited manifest columns. Composing or loading an overlay that mixes the two — or references an unknown column — fails with a typed error. Introduce `ResolvedSemanticLayerSource` / `resolvedSourceSchema` / `toResolvedWire` as the strict shape sent to the Python engine, and add a schema contract test that diffs Zod against the Pydantic JSON schema dumped by `python -m semantic_layer dump-schema`. `SourceDefinition` is now `extra="forbid"` on the Python side. `loadAllSources` surfaces per-file load errors instead of swallowing them, so validation/query paths can report manifest shard parse failures. * fix(context): make scan description generation resilient and quiet A transient sampleTable failure during ingest used to take out every table in a connection: generateTableDescription returned a hardcoded 'Table not found' string into descriptions.ai, and KtxDescriptionGenerator was constructed without a logger, so the failure left no trail anywhere. - sampleTable / sampleColumn calls retry 3x with 200/400/800ms backoff, honouring KtxScanContext.signal via a new KtxAbortedError. - On retry exhaustion or missing capability, table generation falls back to a metadata-only prompt built from column name / native type / comment / rawDescriptions. The column path follows the same rule -- call the LLM when any of samples or rawDescriptions are available; skip only when both are absent. - Logger is now threaded from KtxScanContext into the generator. Failures emit structured KtxScanWarning entries (new description_fallback_used code, plus existing sampling_failed / enrichment_failed / connector_capability_missing). ktx scan groups warnings by code so a batch of identical failures collapses to one summary line plus sample. - Returns null on failure instead of the 'Table not found' sentinel; the manifest writer's existing guard already skips empty descriptions, so schema YAML no longer carries misleading text. SCAN_MANAGED_DESCRIPTION_KEYS already strips stale 'ai' on merge, so existing YAML clears on next run. Also suppress AI SDK v6 'system in messages' warning: pull system messages out of KtxMessageBuilder.wrapSimple's output via a new splitKtxSystemMessages helper and pass them top-level to generateText (preserves cacheControl providerOptions on the SystemModelMessage). Agent-runner's local splitSystemPromptMessages dedupes onto the shared helper. * test(docs): align examples-docs assertions with revamped docs PR #103 (setup/guide doc revamp) reworded several CLI examples and connection labels; the assertions in scripts/examples-docs.test.mjs still referenced the pre-revamp wording and were failing in CI on main. Update the regexes to match the post-revamp content: - drop the `--json` flag from the sl-query example expectation - move the `Driver:` / `Status: ok` probe to the connection reference, which is where that output now lives (driver id is lowercase `postgres`, not the display name `PostgreSQL`) - drop the obsolete `Install \`uv\`...` troubleshooting line - accept `<connectionId>` everywhere; the docs no longer use the hyphenated `<connection-id>` form - match the `warehouse` connection id used in the quickstart instead of the `postgres-warehouse` id only used in the README and setup ref * fix(sl): skip TS/Python schema contract test when uv is unavailable The TypeScript checks CI job does not install uv or Python, so the module-level `execFileSync('uv', ...)` in schemas.contract.test.ts threw ENOENT and failed the suite. Wrap the schema dump in a try/catch and guard the describe block with `describe.skipIf` so the test skips in environments without uv. Local dev and any CI job that has uv on PATH still runs the cross-language contract assertion.
2026-05-15 02:11:04 +02:00
deps.semanticLayerService.loadAllSources.mockResolvedValue({
sources: [{ name: 'orders', table: 'sales.orders' }],
loadErrors: [],
});
const runner = buildRunner(deps);
await expect(
(runner as any).tableRefExistsInSemanticLayer(deps.semanticLayerService, ['warehouse'], 'finance.orders'),
).resolves.toBe(false);
await expect(
(runner as any).tableRefExistsInSemanticLayer(deps.semanticLayerService, ['warehouse'], 'sales.orders'),
).resolves.toBe(true);
});
2026-05-10 23:12:26 +02:00
it('passes relevant canonical pins into the reconciliation system prompt', async () => {
const deps = makeDeps();
deps.diffSetService.compute.mockResolvedValue({
added: [],
modified: [],
deleted: ['metrics/old.yml'],
unchanged: [],
});
deps.adapter.chunk.mockResolvedValue({
workUnits: [
{
unitKey: 'wu-billing',
rawFiles: ['metrics/churn_risk_score.yml'],
peerFileIndex: [],
dependencyPaths: [],
},
],
eviction: { deletedRawPaths: ['metrics/old.yml'] },
});
deps.canonicalPins.listPins.mockResolvedValue([
{
contestedKey: 'churn_risk_score',
canonicalArtifactKey: 'billing.churn_risk_score',
pinnedAt: '2026-04-27T12:00:00.000Z',
pinnedBy: 'user-1',
reason: 'billing owns the contractual definition',
},
{
contestedKey: 'gross_margin',
canonicalArtifactKey: 'finance.gross_margin',
pinnedAt: '2026-04-27T12:01:00.000Z',
pinnedBy: 'user-2',
reason: null,
},
]);
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
return { stopReason: 'natural' };
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([
['metrics/churn_risk_score.yml', 'h1'],
['metrics/old.yml', 'h2'],
]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
const reconcileCall = deps.agentRunner.runLoop.mock.calls.find(
([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-reconcile',
);
expect(reconcileCall?.[0].systemPrompt).toContain('<canonical_pins>');
expect(reconcileCall?.[0].systemPrompt).toContain('contestedKey: churn_risk_score');
expect(reconcileCall?.[0].systemPrompt).not.toContain('gross_margin');
expect(deps.canonicalPins.listPins).toHaveBeenCalledWith(['c1']);
});
it('emits a monotonically non-decreasing progress sequence reaching 1.0, covering all 7 stages', async () => {
const deps = makeDeps();
deps.agentRunner.runLoop.mockImplementation(async () => ({ stopReason: 'natural' }));
2026-05-10 23:12:26 +02:00
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const observed: Array<{ p: number; m?: string }> = [];
const ctx = new TestJobContext(
'j1',
null,
() => Promise.resolve(),
(p, m) => {
observed.push({ p, m });
return Promise.resolve();
},
);
await runner.run(
{
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
},
ctx,
);
// Monotonic.
for (let i = 1; i < observed.length; i++) {
expect(observed[i].p).toBeGreaterThanOrEqual(observed[i - 1].p);
}
// Reaches completion.
expect(observed.at(-1)?.p).toBeCloseTo(1.0, 3);
// Every stage surfaces a user-facing message.
const phaseLabels = [
'Fetching source files',
'Planning updates',
'Processing',
/Reconcil|reconcil/,
'Saving changes',
'Recording history',
'Wrapping up',
];
for (const label of phaseLabels) {
expect(observed.some((o) => (typeof label === 'string' ? o.m?.includes(label) : label.test(o.m ?? '')))).toBe(
true,
);
}
});
it('a Stage 3 failure leaves the shared knowledge table untouched', async () => {
const deps = makeDeps();
// Agent runner returns a successful result but the adapter emits a WU whose
// outcome still produces no actions — the point is that the scoped wiki service
// must not touch indexRepository during Stage 3, and syncFromCommit is what
// drives the shared table. If we cancel the run before squash, syncFromCommit
// must not be called.
deps.gitService.squashMergeIntoMain.mockRejectedValue(new Error('simulated squash failure'));
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await expect(
runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
}),
).rejects.toThrow(/simulated squash failure/);
expect(deps.wikiService.syncFromCommit).not.toHaveBeenCalled();
});
it('refuses to squash-merge when the session worktree has an in-progress sequencer op', async () => {
const deps = makeDeps();
const assertError = new Error('Worktree has in-progress git operation (sequencer ...); refusing to proceed');
const sessionGit = {
revParseHead: vi.fn().mockResolvedValue('h'),
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
2026-05-10 23:12:26 +02:00
resetHardTo: vi.fn(),
assertWorktreeClean: vi.fn().mockRejectedValue(assertError),
feat(ingest): default local ingest to isolated diffs (#128) * docs: add isolated-diff ingestion design * Refine isolated-diff ingestion design after adversarial review iteration 1 * Refine isolated-diff ingestion design after adversarial review iteration 2 * Refine isolated-diff ingestion design after adversarial review iteration 3 * feat: persist ingest trace events * feat: add isolated ingest patch helpers * feat: validate wiki body semantic references * feat: add final ingest artifact gates * feat: execute ingest work units in child worktrees * feat: integrate isolated work unit patches * feat: route selected ingest sources through isolated diffs * test: cover isolated diff ingestion regressions * feat: add isolated diff ingestion v1 core * docs: document ingest trace inspection * docs: add isolated diff ingestion v1 core plan * fix(ingest): tighten final artifact gates * fix(ingest): gate isolated final integration tree * fix(ingest): persist postmortem failure traces * fix(ingest): trace policy conflicts and cleanup child worktrees * test(ingest): verify isolated diff postmortem coverage * docs: add isolated diff ingestion gates and trace closure plan * fix(ingest): gate provenance before isolated diff squash * docs: add isolated diff ingestion provenance gate closure plan * fix(ingest): gate final wiki references * fix(ingest): enforce SL target connection scope * fix(ingest): trace isolated SL target policy gates * test(ingest): cover isolated diff reference and target gates * chore(ingest): verify isolated diff gate closure * docs: add isolated diff ingestion reference and target gate closure plan * fix(ingest): gate global wiki references * docs: add isolated diff ingestion global wiki reference gate closure plan * fix(ingest): validate scan sources and wiki refs * test(ingest): cover isolated diff textual conflict resolver * test(ingest): cover isolated diff resolver integration * feat(ingest): repair isolated diff textual conflicts * feat(ingest): report isolated diff resolver outcomes * test(ingest): verify isolated diff textual conflict repair * test(ingest): align textual conflict failure coverage * docs: add isolated diff textual conflict resolver plan * test(ingest): cover isolated diff gate repair * feat(ingest): add isolated diff gate repair agent * feat(ingest): repair isolated diff semantic gate failures * feat(ingest): wire isolated diff gate repair * test(ingest): verify isolated diff final gate repair * chore(ingest): verify isolated diff gate repair * docs: add isolated diff gate repair plan * Improve ingest progress updates * feat(ingest): route direct-write connectors through isolated diffs * test(ingest): cover non-metabase isolated diff routing * feat(ingest): project metricflow semantic models before work units * test(ingest): verify metricflow isolated projection path * chore(ingest): verify isolated diff connector migration * docs: add isolated diff connector migration plan * feat(ingest): make isolated diff routing the private default * feat(ingest): promote isolated diff to default runner path * feat(ingest): default local ingest to isolated diffs * chore(ingest): remove isolated diff allowlist references * fix(ingest): preserve transient evidence for isolated work units * docs: add isolated diff default promotion plan * refactor(ingest): remove shared worktree WorkUnit path * docs(ingest): align WorkUnit prompts with isolated diffs * test(ingest): drop unused runner import * docs: add isolated diff shared worktree removal plan * docs: add isolated diff gate repair classification plan * fix: restrict claude-code mcp servers * docs: align ingest trace guidance with public CLI --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
2026-05-18 13:38:06 +02:00
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
await writeFile(patchPath, '', 'utf-8');
}),
applyPatchFile3WayIndex: vi.fn(),
diffNameStatus: vi.fn().mockResolvedValue([]),
2026-05-10 23:12:26 +02:00
};
deps.sessionWorktreeService.create.mockResolvedValue({
chatId: 'j1',
workdir: '/tmp/wt',
branch: 'session/j1',
baseSha: 'b',
createdAt: new Date(),
git: sessionGit,
config: {},
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await expect(
runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
}),
).rejects.toThrow(/in-progress git operation/);
expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1');
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
});
it('fails the run and rethrows when the adapter cannot detect the bundle', async () => {
const deps = makeDeps();
deps.adapter.detect.mockResolvedValue(false);
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await expect(
runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
}),
).rejects.toThrow(/did not recognize/);
expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1');
});
});