Merge commit 'c513d61dca' into next

This commit is contained in:
github-actions[bot] 2026-05-18 21:32:16 +00:00
commit 69535f86c9
158 changed files with 29523 additions and 1053 deletions

View file

@ -70,6 +70,7 @@ Agent integration ready: yes (codex:project)
| `ktx sl search "revenue"` | Search semantic-layer sources |
| `ktx sl validate <source> --connection-id <id>` | Validate a semantic source |
| `ktx sl query --measure <measure> --format sql` | Compile semantic-layer SQL |
| `ktx sql --connection <id> "select 1"` | Execute read-only SQL |
| `ktx wiki search "revenue definition"` | Search local wiki context |
| `ktx mcp start` | Start the local MCP server for agent clients |

View file

@ -25,6 +25,7 @@ ktx
search <query>
validate <sourceName>
query
sql
status
mcp
start
@ -79,6 +80,9 @@ ktx ingest --all
ktx sl search "revenue"
ktx wiki search "revenue recognition"
# Execute read-only SQL
ktx sql --connection warehouse "select count(*) from public.orders"
# Start the local MCP server for agent clients
ktx mcp start
```

View file

@ -31,6 +31,9 @@ ktx dev <subcommand> [options]
## `dev schema`
`ktx dev schema` does not require a `ktx.yaml` file or a configured project
directory. Use it from any directory to generate editor or agent schema files.
| Flag | Description | Default |
|------|-------------|---------|
| `--output <file>` | Write the schema to a file instead of stdout | — |

View file

@ -32,6 +32,7 @@ connections when you use `--all`.
| `--query-history-window-days <days>` | BigQuery/Snowflake query-history lookback window for this run | Stored connection default |
| `--plain` | Print plain text output | `true` |
| `--json` | Print JSON output | `false` |
| `--yes` | Install required managed runtime features without prompting | `false` |
| `--no-input` | Disable interactive terminal input | — |
`--fast` and `--deep` are mutually exclusive. Depth flags apply only to
@ -44,6 +45,12 @@ requires deep ingest readiness.
When `--all` selects both databases and context sources, database ingest runs
first, then source ingest and memory updates run for source connections.
Some ingest paths use the managed KTX Python runtime. Query-history ingest uses
it for SQL analysis, and Looker source ingest uses it for Looker identifier
parsing. In an interactive terminal, `ktx ingest` prompts before installing the
required runtime features. Use `--yes` to install them without prompting, or
use `--no-input` to fail fast with install guidance.
## `ktx ingest text` Options
Use `ktx ingest text` to capture free-form text artifacts into KTX memory.
@ -104,6 +111,41 @@ notion skipped skipped done done
Use `--json` when a script or agent needs the selected plan and per-target
results.
## Inspect source ingest traces
Source ingest writes persistent JSONL traces for postmortem debugging. Plain
ingest output prints the trace path near the report, run, and job identifiers
when a trace is available:
```text
Report: report-abc123
Run: run-abc123
Job: job-abc123
Trace: .ktx/ingest-traces/job-abc123/trace.jsonl
```
The trace file lives under the project directory at
`.ktx/ingest-traces/<jobId>/trace.jsonl`. Each line is a JSON event with the
job id, run id, sync id, connection id, source key, phase, event name, timing,
state snapshot, decision context, and error details. Failed runs also write a
stored ingest report with `status: "failed"`, `failure.phase`,
`failure.message`, and the same trace path.
Use `jq` or line-oriented tools to inspect a trace:
```bash
jq -c '. | {at, level, phase, event, durationMs, data, error}' \
.ktx/ingest-traces/<jobId>/trace.jsonl
```
KTX writes `debug` trace events by default. Set `KTX_INGEST_TRACE_LEVEL` to
`error`, `info`, `debug`, or `trace` before running ingest to change the trace
verbosity:
```bash
KTX_INGEST_TRACE_LEVEL=trace ktx ingest metabase
```
## Common errors
| Error | Cause | Recovery |
@ -111,6 +153,7 @@ results.
| Connection not configured | The connection id is not present in `ktx.yaml` | Add the connection with `ktx setup` or update `ktx.yaml` |
| Deep readiness is missing | `--deep` or query history needs model, embedding, and scan-enrichment configuration | Run `ktx setup` or rerun with `--fast` |
| Query history is unsupported | The selected database driver does not support query history | Run schema ingest without query-history flags |
| Python runtime is missing | The selected ingest target needs runtime-backed SQL analysis or source parsing | Accept the interactive prompt, rerun with `--yes`, or run the suggested `ktx dev runtime install` command |
| No ingest target was selected | No connection id was provided and `--all` was omitted | Run `ktx ingest <connectionId>` or `ktx ingest --all` |
| Source options were ignored | Depth and query-history flags were supplied for a non-database source | Omit database-only flags when ingesting source connections |
| Text ingest stops early | `--fail-fast` was used and one item failed | Fix the failed item or rerun without `--fail-fast` to collect all failures |

View file

@ -5,8 +5,8 @@ description: "Set up or resume a local KTX project."
`ktx setup` is the guided configuration flow for a local KTX project. It can
create or resume `ktx.yaml`, configure LLM and embedding providers, add
database and context-source connections, build initial context, and install
agent integrations.
database and context-source connections, prepare required runtime features,
build initial context, and install agent integrations.
When you run bare `ktx` in an interactive terminal outside any KTX project, the
CLI starts this same setup flow. Inside an existing project, `ktx setup`
@ -79,6 +79,23 @@ of Anthropic API key or Vertex flags. For Claude Code, `--llm-model` accepts
`sentence-transformers` uses the KTX-managed Python runtime. Choose only one
embedding credential source.
### Runtime
Setup prepares the managed Python runtime when your selected configuration
needs it. The runtime step runs after database and source setup and before the
initial context build.
KTX prepares the `core` runtime feature when agent integration, query-history
ingest, Looker source ingest, or daemon-backed context build paths need it. KTX
prepares the `local-embeddings` runtime feature when you choose managed local
`sentence-transformers` embeddings. Existing external daemon URLs, such as
`KTX_DAEMON_URL` or `KTX_SQL_ANALYSIS_URL`, satisfy the matching dependency and
skip managed runtime installation for that dependency.
Interactive setup prompts before installing runtime features. Use `--yes` to
install them without prompting. Use `--no-input` to fail fast when required
runtime features are missing.
### Databases
| Flag | Description |
@ -197,6 +214,7 @@ LLM ready: yes (claude-sonnet-4-6)
Embeddings ready: yes (text-embedding-3-small)
Databases configured: yes (postgres-warehouse)
Context sources configured: yes (dbt-main)
Runtime ready: yes (core)
KTX context built: yes
Agent integration ready: yes (codex:project)
```
@ -210,6 +228,7 @@ Use `ktx status` for repeatable readiness checks after setup exits.
| Setup resumes an unexpected project | `KTX_PROJECT_DIR` or nearest `ktx.yaml` points to another directory | Pass `--project-dir <path>` explicitly |
| Setup cannot run in CI | Required values are missing and `--no-input` disables prompts | Provide the relevant automation flags or create a fixture `ktx.yaml` |
| Provider health check fails | Provider key, model id, Vertex project, or Vertex location is invalid | Fix the `env:` or `file:` reference and rerun setup |
| Python runtime is missing | The selected setup needs runtime-backed agent, query-history, Looker, or local embedding features | Accept the interactive prompt, rerun with `--yes`, or run the suggested `ktx dev runtime install` command |
| `--enable-query-history` is rejected | The selected database driver does not support query history | Use Postgres, BigQuery, or Snowflake, or rerun without query-history flags |
| Source setup rejects location flags | Both `--source-path` and `--source-git-url` were supplied | Choose the local path or the Git URL, not both |
| Agent integration missing | Setup skipped the agents step | Run `ktx setup --agents --target <target>` |

View file

@ -141,6 +141,9 @@ Semantic-layer list and search commands return human-readable output by
default. Use `--json` on `list` or `search` when an agent needs structured
output. Use `--format sql` on `query` to inspect generated SQL before
execution, or leave `--format json` for the compiled query and optional rows.
Pretty `sl search` output shows `#1`, `#2`, and later rank badges for the
displayed results. Plain and JSON output keep the raw `score` value, which is a
ranking score rather than a percentage.
```json
{

View file

@ -0,0 +1,103 @@
---
title: "ktx sql"
description: "Execute parser-validated read-only SQL against a configured connection."
---
Run read-only SQL against a database connection in your KTX project. The command
validates the statement before execution and only accepts a single `SELECT` or
`WITH` query.
## Command signature
Use `ktx sql` with a required connection id and positional SQL text.
```bash
ktx sql --connection <id> [options] <sql...>
```
## Options
Use output flags to choose between terminal display, TSV rows, and structured
JSON.
| Flag | Description | Default |
|------|-------------|---------|
| `-c`, `--connection <id>` | KTX database connection id. Required. | - |
| `--max-rows <n>` | Maximum rows to return. Must be between `1` and `10000`. | `1000` |
| `--output <mode>` | Output mode: `pretty`, `plain` (TSV), or `json`. | `pretty` |
| `--json` | Shortcut for `--output=json` (overrides `--output`). | `false` |
## Examples
Quote SQL in shell scripts and when the query contains spaces or punctuation.
```bash
# Count rows in a table
ktx sql --connection warehouse "select count(*) from public.orders"
# Return a small result set
ktx sql \
--connection warehouse \
--max-rows 25 \
"select id, status from public.orders order by created_at desc"
# Print JSON for agents or scripts
ktx sql \
--connection warehouse \
--json \
"select status, count(*) from public.orders group by status"
# Print TSV rows
ktx sql \
-c warehouse \
--output plain \
"select id, status from public.orders"
```
## Output
Pretty output prints aligned columns and a final row count.
```text
status count
------ -----
paid 42
open 7
2 rows
```
Plain output prints a TSV header row followed by TSV data rows.
```text
status count
paid 42
open 7
```
JSON output preserves connection id, headers, optional header types, rows, and
row count.
```json
{
"connectionId": "warehouse",
"headers": ["status", "count"],
"headerTypes": ["text", "bigint"],
"rows": [
["paid", 42],
["open", 7]
],
"rowCount": 2
}
```
## Common errors
Use the error text to distinguish validation failures from connection failures.
| Error | Cause | Recovery |
|-------|-------|----------|
| `Only one SQL statement can be executed.` | The SQL text contains multiple statements. | Run one query at a time. |
| `SQL contains read/write operation` | The statement is not read-only. | Use a single `SELECT` or `WITH` query. |
| `Connection "<id>" is not configured in ktx.yaml` | The connection id is wrong or missing from the project. | Run `ktx connection list` and retry with an exact id. |
| `does not support read-only SQL execution` | The connection type has no local SQL executor. | Use a supported database connection or query through MCP where available. |

View file

@ -43,6 +43,12 @@ need to add or update wiki knowledge.
| `--output <mode>` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` |
| `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` |
`wiki search` uses hybrid search when `storage.search` is `sqlite-fts5`. KTX
combines lexical SQLite FTS5 matches, token matches, and semantic matches from
wiki page embeddings stored in `.ktx/db.sqlite`. If embeddings are not
configured or the embedding backend is unavailable, KTX skips the semantic lane
and keeps lexical and token results.
## Examples
```bash
@ -60,14 +66,21 @@ ktx wiki search "monthly recurring revenue" --json --limit 10
# Print search results as TSV
ktx wiki search "monthly recurring revenue" --output plain
# Inspect which search lanes were used
ktx --debug wiki search "monthly recurring revenue" --json
```
## Output
Wiki commands print clack-style pretty output in a TTY and TSV-style plain
output when requested. JSON output wraps the items with a command metadata
envelope. Open the matching Markdown files directly when you need the full page
contents.
envelope. Search results include `matchReasons` and `lanes` metadata so you can
see whether lexical, token, or semantic search contributed to the ranking. Open
the matching Markdown files directly when you need the full page contents.
Pretty search output shows `#1`, `#2`, and later rank badges for the displayed
results. Plain and JSON output keep the raw `score` value, which is a ranking
score rather than a percentage.
```json
{
@ -77,16 +90,49 @@ contents.
{
"key": "revenue-definitions",
"summary": "Canonical revenue metric definitions",
"score": 0.92
"score": 0.92,
"matchReasons": ["lexical", "semantic"],
"lanes": [
{
"lane": "lexical",
"status": "available",
"requestedCandidatePoolLimit": 25,
"effectiveCandidatePoolLimit": 25,
"returnedCandidateCount": 3,
"weight": 1.5
},
{
"lane": "semantic",
"status": "available",
"requestedCandidatePoolLimit": 25,
"effectiveCandidatePoolLimit": 25,
"returnedCandidateCount": 8,
"weight": 3
}
]
}
]
},
"meta": {
"command": "wiki search"
}
}
```
When you pass the global `--debug` flag, KTX writes search diagnostics to
stderr and leaves stdout unchanged. This is useful with `--json` because stdout
stays machine-readable:
```text
[debug] wiki search mode=sqlite-fts5 embedding=configured results=2
[debug] wiki search lane=lexical status=available returned=1 weight=1.5
[debug] wiki search lane=token status=available returned=1 weight=0.75
[debug] wiki search lane=semantic status=available returned=2 weight=3
```
## Common errors
| Error | Cause | Recovery |
|-------|-------|----------|
| Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms, then create a page if the knowledge is missing |
| Search returns no results | The query terms do not match summaries, tags, or content, and the semantic lane is unavailable or has no positive matches | Run with `--debug`, check the semantic lane status, retry with business synonyms, then create a page if the knowledge is missing |
| A page is missing | No Markdown file exists for that business context | Add a file under `wiki/` or run `ktx ingest <connectionId>` |

View file

@ -7,6 +7,7 @@
"ktx-connection",
"ktx-ingest",
"ktx-sl",
"ktx-sql",
"ktx-wiki",
"ktx-status",
"ktx-mcp",

View file

@ -63,9 +63,9 @@ Setup supports three LLM provider paths:
| Provider | Use when | Credential model |
|----------|----------|------------------|
| Anthropic API | You have an Anthropic API key | `ANTHROPIC_API_KEY` or a local `file:` secret |
| Claude subscription (Pro/Max) | You want KTX to use your local Claude Code session | Claude Code local authentication |
| Anthropic API key | You have an Anthropic API key | `ANTHROPIC_API_KEY` or a local `file:` secret |
| Google Vertex AI for Anthropic Claude | Your organization runs Claude through Google Cloud | Application Default Credentials plus Vertex project and location |
| Claude Code | You want KTX to use your local Claude Code session | Claude Code local authentication |
For Anthropic API, setup can read the key from the environment or save a pasted
key to `.ktx/secrets/anthropic-api-key`. `ktx.yaml` stores an `env:` or `file:`

View file

@ -24,10 +24,11 @@ other branches.
Before you publish, confirm these requirements:
- The repository has an Actions secret named `NPM_TOKEN`.
- `NPM_TOKEN` is a granular npm token that can publish `@kaelio/ktx`.
- The token can publish non-interactively if the npm account or package uses
two-factor authentication for writes.
- npm Trusted Publishing is configured for `@kaelio/ktx`.
- The trusted publisher points at the `Kaelio/ktx` repository and the
`.github/workflows/release.yml` workflow.
- The workflow keeps `id-token: write` permission so npm can verify the
GitHub Actions run through OpenID Connect.
- The repository has a baseline semantic-release tag for the latest published
package version, such as `v0.1.0-rc.1`.
@ -99,8 +100,17 @@ The artifact packaging and readiness scripts read `publicNpmPackageVersion`
from `release-policy.json`, so manual version edits in build scripts aren't
needed for rc releases.
## Trusted Publishing follow-up
The bundled Python runtime wheel also derives its version from
`publicNpmPackageVersion`. Stable npm versions are reused as-is, and rc
versions are normalized to Python's version format. For example,
`0.1.0-rc.2` becomes `0.1.0rc2` in the `kaelio-ktx` wheel filename and wheel
metadata.
This workflow uses `NPM_TOKEN` today. Move to npm Trusted Publishing after the
final publish command path is verified for the package manager and workflow
filename configured in npm package settings.
## npm authentication
The release workflow publishes through npm Trusted Publishing. It doesn't use
an `NPM_TOKEN` secret, and the publish step doesn't set `NODE_AUTH_TOKEN`.
If npm returns an authentication error, check the Trusted Publishing settings
for the `@kaelio/ktx` package before adding token-based authentication back to
the workflow.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,493 @@
# Isolated Diff Ingestion V1 Global Wiki Reference Gate Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or
> superpowers:executing-plans to implement this plan task-by-task. Steps use
> checkbox (`- [ ]`) syntax for tracking.
**Goal:** Reject final trees where an isolated-diff run changes semantic-layer
sources or deletes wiki pages and leaves pre-existing wiki pages with stale
body, `sl_refs`, frontmatter `refs`, or inline `[[page-key]]` references.
**Architecture:** Keep `artifact-gates.ts` validation-only. The runner expands
the final wiki gate scope before the existing final artifact gate: changed pages
are always validated, and all global wiki pages are validated when the run
changes any semantic-layer source or removes any wiki page. The final-gate trace
records the expanded scope and why it was expanded.
**Tech Stack:** TypeScript, Vitest, pnpm workspace commands, existing
`IngestBundleRunner`, `KnowledgeWikiService`, and isolated-diff test fixtures.
---
## Audit Summary
The implemented isolated-diff plans cover the core v1 flow: child worktrees,
binary no-rename patch proposals, `git apply --3way --index`, policy rejection,
final gates after reconciliation and repair, pre-squash provenance raw-path
validation, target-connection enforcement, failed reports, and persistent JSONL
traces.
One v1-blocking correctness gap remains. Final wiki gates currently validate
wiki pages changed by the run. They do not validate unchanged pages that become
invalid because the run changes a semantic-layer source or deletes a referenced
wiki page. Two concrete failures can therefore squash into main:
- A pre-existing wiki page body contains
`` `mart_account_segments.total_contract_arr_cents` `` while the run updates
`semantic-layer/warehouse/mart_account_segments.yaml` to define only
`total_contract_arr`.
- A pre-existing wiki page has `refs: [source-page]` or `[[source-page]]` while
the run deletes `wiki/global/source-page.md`.
This plan does not expand connector rollout, promote isolated diffs to the
default, add interactive resolution, add semantic auto-merge, remove the old
path, expand transitive semantic-layer dependencies, or move provenance into
files.
## File Structure
- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`.
Adds two failing end-to-end regressions for unchanged wiki pages made stale by
semantic-layer changes and wiki-page deletion.
- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`.
Adds a final wiki gate scope helper, expands validation to all global wiki
pages when final state changes can invalidate unchanged references, and records
scope details in the final-gate trace and failed report.
---
### Task 1: Add failing unchanged wiki regressions
**Files:**
- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
- [ ] **Step 1: Add the stale existing wiki body regression**
Insert this test inside `describe('IngestBundleRunner isolated diff path', ...)`
after the existing Metabase stale-measure regression:
```ts
it('rejects unchanged wiki body refs made stale by isolated semantic-layer changes', async () => {
const runtime = await makeRealGitRuntime();
try {
await mkdir(join(runtime.configDir, 'semantic-layer/warehouse'), { recursive: true });
await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true });
await writeFile(
join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'),
'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr_cents\n expr: sum(contract_arr)\n',
);
await writeFile(
join(runtime.configDir, 'wiki/global/account-segments.md'),
'---\nsummary: Account segments\nusage_mode: auto\n---\n\nExisting ARR uses `mart_account_segments.total_contract_arr_cents`.\n',
);
await runtime.git.commitFiles(
['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'],
'seed existing wiki body ref',
'KTX Test',
'system@ktx.local',
);
const preRunHead = await runtime.git.revParseHead();
const { deps, adapter } = makeDeps(runtime);
adapter.chunk.mockResolvedValue({
workUnits: [{ unitKey: 'source-only', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }],
});
let currentSession: any = null;
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
currentSession = toolSession;
return { toRuntimeTools: vi.fn(() => ({})) };
});
deps.agentRunner.runLoop = vi.fn(async () => {
const root = rootOfConfig(currentSession.configService, runtime.configDir);
await writeFile(
join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'),
'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n',
);
addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments');
currentSession.actions.push({
target: 'sl',
type: 'updated',
key: 'mart_account_segments',
detail: 'Rename ARR measure',
targetConnectionId: 'warehouse',
rawPaths: ['cards/source.json'],
});
await currentSession.gitService.commitFiles(
['semantic-layer/warehouse/mart_account_segments.yaml'],
'wu source rename',
'KTX Test',
'system@ktx.local',
);
return { stopReason: 'natural' };
}) as never;
const runner = new IngestBundleRunner(deps);
await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]);
await expect(
runner.run({
jobId: 'job-existing-body-stale',
connectionId: 'warehouse',
sourceKey: 'metabase',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload' },
}),
).rejects.toThrow(/total_contract_arr_cents/);
expect(await runtime.git.revParseHead()).toBe(preRunHead);
const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-existing-body-stale/trace.jsonl'), 'utf-8');
expect(trace).toContain('final_artifact_gates_failed');
expect(trace).toContain('account-segments');
expect(trace).toContain('semantic_layer_changed');
expect(trace).toContain('ingest_failed');
expect(trace).toContain('failure_report_created');
expect(trace).not.toContain('squash_finished');
} finally {
await rm(runtime.homeDir, { recursive: true, force: true });
}
});
```
- [ ] **Step 2: Add the stale existing wiki page-reference regression**
Insert this test near the existing final wiki reference regression:
```ts
it('rejects unchanged inbound wiki refs broken by an isolated wiki deletion', async () => {
const runtime = await makeRealGitRuntime();
try {
await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true });
await writeFile(
join(runtime.configDir, 'wiki/global/source-page.md'),
'---\nsummary: Source page\nusage_mode: auto\n---\n\nSource page\n',
);
await writeFile(
join(runtime.configDir, 'wiki/global/account-segments.md'),
'---\nsummary: Account segments\nusage_mode: auto\nrefs:\n - source-page\n---\n\nSee [[source-page]].\n',
);
await runtime.git.commitFiles(
['wiki/global/source-page.md', 'wiki/global/account-segments.md'],
'seed inbound wiki refs',
'KTX Test',
'system@ktx.local',
);
const preRunHead = await runtime.git.revParseHead();
const { deps, adapter } = makeDeps(runtime);
adapter.chunk.mockResolvedValue({
workUnits: [{ unitKey: 'delete-target-page', rawFiles: ['pages/delete.json'], peerFileIndex: [], dependencyPaths: [] }],
});
let currentSession: any = null;
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
currentSession = toolSession;
return { toRuntimeTools: vi.fn(() => ({})) };
});
deps.agentRunner.runLoop = vi.fn(async () => {
const root = rootOfConfig(currentSession.configService, runtime.configDir);
await rm(join(root, 'wiki/global/source-page.md'), { force: true });
currentSession.actions.push({
target: 'wiki',
type: 'removed',
key: 'source-page',
detail: 'Delete referenced page',
rawPaths: ['pages/delete.json'],
});
await currentSession.gitService.commitFiles(
['wiki/global/source-page.md'],
'wu delete target page',
'KTX Test',
'system@ktx.local',
);
return { stopReason: 'natural' };
}) as never;
const runner = new IngestBundleRunner(deps);
await mockStageRawFiles(runner, runtime, [['pages/delete.json', 'h1']]);
await expect(
runner.run({
jobId: 'job-existing-wiki-ref-stale',
connectionId: 'warehouse',
sourceKey: 'metabase',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload' },
}),
).rejects.toThrow(/wiki references target missing page\(s\): account-segments -> source-page/);
expect(await runtime.git.revParseHead()).toBe(preRunHead);
const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-existing-wiki-ref-stale/trace.jsonl'), 'utf-8');
expect(trace).toContain('final_artifact_gates_failed');
expect(trace).toContain('account-segments -> source-page');
expect(trace).toContain('wiki_page_removed');
expect(trace).toContain('ingest_failed');
expect(trace).toContain('failure_report_created');
expect(trace).not.toContain('squash_finished');
} finally {
await rm(runtime.homeDir, { recursive: true, force: true });
}
});
```
- [ ] **Step 3: Run the focused regressions and verify they fail**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unchanged wiki body refs|unchanged inbound wiki refs"
```
Expected: FAIL. The stale body test currently squashes successfully because the
unchanged `account-segments` page is not in `finalChangedWikiPageKeys`. The
inbound wiki ref test currently squashes successfully because the deleted
`source-page` is validated as a missing changed page and skipped, while the
unchanged page that references it is never validated.
---
### Task 2: Expand the final wiki validation scope
**Files:**
- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts`
- [ ] **Step 1: Add final wiki gate scope helpers**
Add these private methods after `uniqueTouchedSlSources()`:
```ts
private removedWikiPageKeysFromActions(actions: MemoryAction[]): string[] {
return this.uniqueWikiPageKeys(
actions.filter((action) => action.target === 'wiki' && action.type === 'removed').map((action) => action.key),
);
}
private async wikiPageKeysForFinalGates(input: {
wikiService: ReturnType<KnowledgeWikiService['forWorktree']>;
changedWikiPageKeys: string[];
touchedSlSources: TouchedSlSource[];
actions: MemoryAction[];
}): Promise<{
pageKeys: string[];
trace: {
global: boolean;
reasons: string[];
changedWikiPageKeys: string[];
removedWikiPageKeys: string[];
pageKeysValidated: string[];
};
}> {
const changedWikiPageKeys = this.uniqueWikiPageKeys(input.changedWikiPageKeys);
const removedWikiPageKeys = this.removedWikiPageKeysFromActions(input.actions);
const reasons: string[] = [];
if (input.touchedSlSources.length > 0) {
reasons.push('semantic_layer_changed');
}
if (removedWikiPageKeys.length > 0) {
reasons.push('wiki_page_removed');
}
let pageKeys = changedWikiPageKeys;
if (reasons.length > 0) {
pageKeys = this.uniqueWikiPageKeys([
...changedWikiPageKeys,
...(await input.wikiService.listPageKeys('GLOBAL', null)),
]);
}
return {
pageKeys,
trace: {
global: reasons.length > 0,
reasons,
changedWikiPageKeys,
removedWikiPageKeys,
pageKeysValidated: pageKeys,
},
};
}
```
- [ ] **Step 2: Use the expanded scope before final gates**
In `runInner()`, replace the current `finalChangedWikiPageKeys` and
`finalTouchedSlSources` block with this code:
```ts
const baseFinalChangedWikiPageKeys = this.uniqueWikiPageKeys([
...(isolatedDiffEnabled ? projectionChangedWikiPageKeys : []),
...workUnitOutcomes
.flatMap((outcome) => outcome.patchTouchedPaths ?? [])
.flatMap((path) => this.wikiPageKeysFromPaths([path])),
...this.wikiPageKeysFromActions(reconcileActions),
...postReconciliationPaths.flatMap((path) => this.wikiPageKeysFromPaths([path])),
...wikiSlRefRepairResult.repairs.filter((repair) => repair.scope === 'GLOBAL').map((repair) => repair.pageKey),
]);
const finalTouchedSlSources = this.uniqueTouchedSlSources([
...(isolatedDiffEnabled ? projectionTouchedSources : []),
...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources),
...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId),
...this.touchedSlSourcesFromPaths(postReconciliationPaths),
...(postProcessorOutcome?.touchedSources ?? []),
]);
const finalWikiGateScope = await this.wikiPageKeysForFinalGates({
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
changedWikiPageKeys: baseFinalChangedWikiPageKeys,
touchedSlSources: finalTouchedSlSources,
actions: [...stageIndex.workUnits.flatMap((wu) => wu.actions), ...reconcileActions],
});
const finalChangedWikiPageKeys = finalWikiGateScope.pageKeys;
```
This keeps the existing variable name used by `validateFinalIngestArtifacts()`,
but the value now means "wiki page keys to validate in final gates."
- [ ] **Step 3: Add scope details to final-gate trace data**
In the `finalArtifactGateTraceData` object, add the
`wikiReferenceGateScope` field:
```ts
const finalArtifactGateTraceData = {
changedWikiPageKeys: finalChangedWikiPageKeys,
wikiReferenceGateScope: finalWikiGateScope.trace,
touchedSlSources: finalTouchedSlSources,
projectionTouchedPaths,
workUnitPatchTouchedPaths: workUnitOutcomes.flatMap((outcome) => outcome.patchTouchedPaths ?? []),
preReconciliationSha,
postReconciliationSha,
postReconciliationPaths,
reconciliationActionCount: reconcileActions.length,
wikiSlRefRepairCount: wikiSlRefRepairResult.repairs.length,
};
```
The failure report already stores `activeFailureDetails`, so this trace data
also becomes persistent failed-report context when final gates fail.
- [ ] **Step 4: Run the focused regressions and verify they pass**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unchanged wiki body refs|unchanged inbound wiki refs"
```
Expected: PASS. Both traces include `final_artifact_gates_failed`,
`failure_report_created`, no `squash_finished`, and
`wikiReferenceGateScope` with either `semantic_layer_changed` or
`wiki_page_removed`.
---
### Task 3: Verification and commit
**Files:**
- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts`
- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
- [ ] **Step 1: Run the isolated-diff focused suite**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
src/ingest/artifact-gates.test.ts \
src/ingest/wiki-body-refs.test.ts \
src/ingest/semantic-layer-target-policy.test.ts \
src/ingest/isolated-diff/git-patch.test.ts \
src/ingest/isolated-diff/patch-integrator.test.ts \
src/ingest/isolated-diff/work-unit-executor.test.ts \
src/core/git.service.patch.test.ts
```
Expected: PASS.
- [ ] **Step 2: Type-check the context package**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS.
- [ ] **Step 3: Run dead-code analysis**
Run:
```bash
pnpm run dead-code
```
Expected: PASS, or only pre-existing findings unrelated to
`packages/context/src/ingest/ingest-bundle.runner.ts` and
`packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`.
Investigate any new finding before committing.
- [ ] **Step 4: Verify trace acceptance criteria**
Open the traces produced by the two new failing-run tests and confirm these
events and fields exist:
```text
job-existing-body-stale:
- final_artifact_gates_started
- final_artifact_gates_failed
- ingest_failed
- failure_report_created
- no squash_finished
- wikiReferenceGateScope.global is true
- wikiReferenceGateScope.reasons includes semantic_layer_changed
- wikiReferenceGateScope.pageKeysValidated includes account-segments
- error.message includes total_contract_arr_cents
job-existing-wiki-ref-stale:
- final_artifact_gates_started
- final_artifact_gates_failed
- ingest_failed
- failure_report_created
- no squash_finished
- wikiReferenceGateScope.global is true
- wikiReferenceGateScope.reasons includes wiki_page_removed
- wikiReferenceGateScope.removedWikiPageKeys includes source-page
- error.message includes account-segments -> source-page
```
- [ ] **Step 5: Commit**
Run:
```bash
git add packages/context/src/ingest/ingest-bundle.runner.ts \
packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts
git commit -m "fix(ingest): gate global wiki references"
```
Expected: one commit containing only the runner and isolated-diff runner test
changes.
---
## Self-Review
Spec coverage:
- Final global wiki body reference validation now covers unchanged wiki pages
when a run changes semantic-layer sources.
- Final global wiki page reference validation now covers unchanged inbound
references when a run deletes wiki pages.
- The plan keeps resolver behavior fail-fast and stops before squash.
- Persistent trace and failed-report acceptance criteria are explicit and tied
to the concrete failure modes.
Non-blocking gaps unchanged:
- Broader connector rollout.
- Isolated-diff default promotion.
- Old shared-worktree path removal.
- Interactive conflict resolution.
- Semantic auto-merge.
- Transitive semantic-layer dependency expansion.
- Provenance-as-files.

View file

@ -0,0 +1,494 @@
# Isolated Diff Ingestion V1 Provenance Gate Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Ensure invalid provenance raw paths are rejected before isolated-diff
ingestion squashes any integration worktree changes into the main project
worktree.
**Architecture:** Keep provenance insertion after squash, but derive and
validate the planned provenance rows immediately after final artifact gates and
before the squash stage. This makes provenance validation part of the final
pre-main safety boundary while preserving the existing report and database
write shape.
**Tech Stack:** TypeScript ESM/NodeNext, Vitest, existing
`IngestBundleRunner`, `validateProvenanceRawPaths`, ingest reports, and
persistent ingest traces.
---
## Audit Summary
The implemented isolated-diff path now covers the core v1 safety surface:
child worktrees, binary no-rename patches, `git apply --3way --index`, patch
policy rejection, final wiki and semantic-layer gates after reconciliation and
post-processing, failure reports, and persistent JSONL traces. The focused
isolated-diff test suite passes:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/ingest-trace.test.ts \
src/ingest/wiki-body-refs.test.ts \
src/ingest/artifact-gates.test.ts \
src/ingest/isolated-diff/git-patch.test.ts \
src/ingest/isolated-diff/work-unit-executor.test.ts \
src/ingest/isolated-diff/patch-integrator.test.ts \
src/ingest/ingest-bundle.runner.isolated-diff.test.ts
```
Current result: `7 passed`, `28 passed`.
One v1-blocking gap remains. `validateProvenanceRawPaths()` is called in
`packages/context/src/ingest/ingest-bundle.runner.ts` after
`squashMergeIntoMain()`. A work unit or reconciliation action can emit an
otherwise valid wiki or semantic-layer artifact whose `rawPaths` contain a path
outside the current raw snapshot and eviction set. Today the run fails during
provenance recording, but only after the invalidly-attributed artifacts have
already reached the main project worktree. That violates the spec requirement
that final global gates run before any changes reach main.
Observability for the already-implemented phases is sufficient for postmortem
reconstruction: traces include input snapshots, routing, child worktree
creation and cleanup, patch collection and application, conflict
classification, reconciliation, final gates, failure reports, and run outcome.
This plan adds only the missing provenance validation failure trace because it
corresponds to a concrete pre-main failure mode, not cosmetic trace expansion.
Non-blocking gaps that remain after this plan:
- Migrating Notion, LookML, Looker, dbt, MetricFlow, and historic-SQL direct
durable writes to the isolated path.
- Promoting isolated diffs as the default for all connectors.
- Removing the old shared-worktree WorkUnit execution path.
- Interactive, CLI, or agent-driven conflict resolution.
- Auto-merging semantic conflicts that cannot be proven correct.
- Transitive SQL-projection dependency expansion beyond direct declared joins.
- Moving provenance rows to worktree files.
- Adding failure reports for failures that happen before an ingest run row
exists. The trace file is still written at the deterministic job path.
## File Structure
- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`.
Add a regression proving invalid provenance raw paths fail before squash,
leave main unchanged, skip SQLite provenance insertion, and emit a
postmortem-grade trace event.
- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`.
Extract provenance row construction into private helpers, run provenance
raw-path validation before squash, trace validation success and failure, and
reuse the prevalidated rows for insertion and reports after squash.
---
### Task 1: Add the pre-squash provenance regression
**Files:**
- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
- [ ] **Step 1: Write the failing runner test**
Append this test inside the existing
`describe('IngestBundleRunner isolated diff path', ...)` block in
`packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`:
```ts
it('rejects invalid provenance raw paths before squash reaches main', async () => {
const runtime = await makeRealGitRuntime();
try {
const { deps, adapter } = makeDeps(runtime);
adapter.chunk.mockResolvedValue({
workUnits: [{ unitKey: 'card-valid-artifacts', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }],
});
let currentSession: any = null;
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
currentSession = toolSession;
return { toRuntimeTools: vi.fn(() => ({})) };
});
deps.agentRunner.runLoop = vi.fn(async () => {
const root = rootOfConfig(currentSession.configService, runtime.configDir);
await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true });
await mkdir(join(root, 'wiki/global'), { recursive: true });
await writeFile(
join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'),
'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n',
);
await writeFile(
join(root, 'wiki/global/account-segments.md'),
'---\nsummary: Account segments\nusage_mode: auto\nsl_refs:\n - mart_account_segments\n---\n\nARR is `mart_account_segments.total_contract_arr`.\n',
);
addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments');
currentSession.actions.push({
target: 'sl',
type: 'created',
key: 'mart_account_segments',
detail: 'Valid source',
targetConnectionId: 'warehouse',
rawPaths: ['cards/source.json'],
});
currentSession.actions.push({
target: 'wiki',
type: 'created',
key: 'account-segments',
detail: 'Valid wiki with invalid provenance raw path',
rawPaths: ['cards/missing.json'],
});
await currentSession.gitService.commitFiles(
['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'],
'valid artifacts with invalid provenance',
'KTX Test',
'system@ktx.local',
);
return { stopReason: 'natural' };
}) as never;
const runner = new IngestBundleRunner(deps);
await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]);
const preRunHead = await runtime.git.revParseHead();
await expect(
runner.run({
jobId: 'job-invalid-provenance',
connectionId: 'warehouse',
sourceKey: 'metabase',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload' },
}),
).rejects.toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/);
expect(await runtime.git.revParseHead()).toBe(preRunHead);
expect(deps.provenance.insertMany).not.toHaveBeenCalled();
const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-invalid-provenance/trace.jsonl'), 'utf-8');
expect(trace).toContain('final_artifact_gates_finished');
expect(trace).toContain('provenance_rows_validation_failed');
expect(trace).toContain('cards/missing.json');
expect(trace).toContain('ingest_failed');
expect(trace).not.toContain('squash_finished');
} finally {
await rm(runtime.homeDir, { recursive: true, force: true });
}
});
```
- [ ] **Step 2: Run the failing regression**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "invalid provenance raw paths"
```
Expected: FAIL because the current runner validates provenance after
`squashMergeIntoMain()`, so `runtime.git.revParseHead()` changes and the trace
does not contain `provenance_rows_validation_failed`.
### Task 2: Move provenance validation into the pre-squash gate boundary
**Files:**
- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts`
- [ ] **Step 1: Import the provenance report and insert types**
In `packages/context/src/ingest/ingest-bundle.runner.ts`, update the imports.
Replace this import block:
```ts
import type {
ContextEvidenceIndexSummary,
IngestBundleRunnerDeps,
IngestProvenanceRow,
IngestRunsPort,
IngestSessionWorktree,
PageTriageRunResult,
} from './ports.js';
```
With:
```ts
import type {
ContextEvidenceIndexSummary,
IngestBundleRunnerDeps,
IngestProvenanceInsert,
IngestProvenanceRow,
IngestRunsPort,
IngestSessionWorktree,
PageTriageRunResult,
} from './ports.js';
```
Replace this import block:
```ts
import {
buildStageIndexFromReportBody,
postProcessorSavedMemoryCounts,
type IngestReportPostProcessorOutcome,
type IngestReportSnapshot,
} from './reports.js';
```
With:
```ts
import {
buildStageIndexFromReportBody,
postProcessorSavedMemoryCounts,
type IngestReportPostProcessorOutcome,
type IngestReportProvenanceDetail,
type IngestReportSnapshot,
} from './reports.js';
```
- [ ] **Step 2: Add provenance row helpers**
Add these private methods after `private errorMessage(error: unknown): string`
in `packages/context/src/ingest/ingest-bundle.runner.ts`:
```ts
private buildProvenanceRows(input: {
job: IngestBundleJob;
syncId: string;
currentHashes: Map<string, string>;
stageIndex: StageIndex;
reconcileActions: MemoryAction[];
eviction?: EvictionUnit;
}): IngestProvenanceInsert[] {
const provenanceRows: IngestProvenanceInsert[] = [];
const actionToType = (action: MemoryAction): IngestProvenanceInsert['actionType'] => {
if (action.target === 'wiki') {
return 'wiki_written';
}
return action.type === 'created' ? 'source_created' : 'measure_added';
};
const producedPaths = new Set<string>();
const pushActionProvenance = (rawPath: string, action: MemoryAction): void => {
const hash = input.currentHashes.get(rawPath) ?? '';
provenanceRows.push({
connectionId: input.job.connectionId,
sourceKey: input.job.sourceKey,
syncId: input.syncId,
rawPath,
rawContentHash: hash,
artifactKind: action.target,
artifactKey: action.key,
targetConnectionId: action.target === 'sl' ? actionTargetConnectionId(action, input.job.connectionId) : null,
artifactContentHash: null,
actionType: actionToType(action),
});
producedPaths.add(rawPath);
};
for (const wu of input.stageIndex.workUnits) {
for (const action of wu.actions) {
for (const rawPath of rawPathsForAction(action, wu.rawFiles)) {
pushActionProvenance(rawPath, action);
}
}
}
for (const action of input.reconcileActions) {
for (const rawPath of action.rawPaths ?? []) {
pushActionProvenance(rawPath, action);
}
}
for (const resolution of input.stageIndex.artifactResolutions ?? []) {
const hash = input.currentHashes.get(resolution.rawPath) ?? '';
provenanceRows.push({
connectionId: input.job.connectionId,
sourceKey: input.job.sourceKey,
syncId: input.syncId,
rawPath: resolution.rawPath,
rawContentHash: hash,
artifactKind: resolution.artifactKind,
artifactKey: resolution.artifactKey,
targetConnectionId: null,
artifactContentHash: null,
actionType: resolution.actionType,
});
producedPaths.add(resolution.rawPath);
}
for (const [rawPath, hash] of input.currentHashes) {
if (producedPaths.has(rawPath)) {
continue;
}
provenanceRows.push({
connectionId: input.job.connectionId,
sourceKey: input.job.sourceKey,
syncId: input.syncId,
rawPath,
rawContentHash: hash,
artifactKind: null,
artifactKey: null,
targetConnectionId: null,
artifactContentHash: null,
actionType: 'skipped',
});
}
return provenanceRows;
}
private toReportProvenanceRows(rows: IngestProvenanceInsert[]): IngestReportProvenanceDetail[] {
return rows.map(({ rawPath, artifactKind, artifactKey, actionType, targetConnectionId }) => ({
rawPath,
artifactKind,
artifactKey,
targetConnectionId: targetConnectionId ?? null,
actionType,
}));
}
```
- [ ] **Step 3: Validate planned provenance rows before squash**
In `packages/context/src/ingest/ingest-bundle.runner.ts`, find the code that
sets `activePhase = 'final_gates';` and runs `traceTimed(...,
'final_artifact_gates', ...)`. Immediately after that `await traceTimed(...)`
block and before the `// Stage 6 — squash commit` comment, insert:
```ts
activePhase = 'provenance_validation';
const provenanceRows = this.buildProvenanceRows({
job,
syncId,
currentHashes,
stageIndex,
reconcileActions,
eviction,
});
await traceTimed(
runTrace,
'provenance',
'provenance_rows_validation',
{
rowCount: provenanceRows.length,
currentRawPathCount: currentHashes.size,
deletedRawPathCount: eviction?.deletedRawPaths.length ?? 0,
},
async () => {
validateProvenanceRawPaths({
rows: provenanceRows,
currentRawPaths: new Set(currentHashes.keys()),
deletedRawPaths: new Set(eviction?.deletedRawPaths ?? []),
});
},
);
const reportProvenanceRows = this.toReportProvenanceRows(provenanceRows);
```
- [ ] **Step 4: Replace the post-squash provenance construction block**
In `packages/context/src/ingest/ingest-bundle.runner.ts`, in the
`activePhase = 'provenance';` section after squash, delete the current block
that starts with:
```ts
// Provenance rows: per-artifact when the WU emitted actions, plus a `skipped`
// fallback for raw files that produced nothing so the next DiffSet still sees
// them.
const provenanceRows: Parameters<typeof this.deps.provenance.insertMany>[0] = [];
```
And ends with:
```ts
await runTrace.event('debug', 'provenance', 'provenance_rows_validated', {
rowCount: provenanceRows.length,
});
```
Do not delete the existing call to `await this.deps.provenance.insertMany(provenanceRows);`.
Immediately after that insertion call, add:
```ts
await runTrace.event('debug', 'provenance', 'provenance_rows_inserted', {
rowCount: provenanceRows.length,
});
```
Then delete the later `const reportProvenanceRows = provenanceRows.map(...)`
block because `reportProvenanceRows` is now created before squash from the
prevalidated rows.
- [ ] **Step 5: Run the provenance regression**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "invalid provenance raw paths"
```
Expected: PASS. The trace contains `provenance_rows_validation_failed`, main
HEAD remains unchanged, and `provenance.insertMany` is not called.
- [ ] **Step 6: Run the focused isolated-diff suite**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/ingest-trace.test.ts \
src/ingest/wiki-body-refs.test.ts \
src/ingest/artifact-gates.test.ts \
src/ingest/isolated-diff/git-patch.test.ts \
src/ingest/isolated-diff/work-unit-executor.test.ts \
src/ingest/isolated-diff/patch-integrator.test.ts \
src/ingest/ingest-bundle.runner.isolated-diff.test.ts
```
Expected: PASS.
### Task 3: Type-check, dead-code check, and commit
**Files:**
- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts`
- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
- [ ] **Step 1: Run the context package type-check**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS.
- [ ] **Step 2: Run the workspace dead-code check**
Run:
```bash
pnpm run dead-code
```
Expected: PASS, or only existing unrelated Knip/Biome findings. Investigate
any new findings in the two modified files before continuing.
- [ ] **Step 3: Commit the provenance gate closure**
Run:
```bash
git add packages/context/src/ingest/ingest-bundle.runner.ts \
packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts
git commit -m "fix(ingest): gate provenance before isolated diff squash"
```
Expected: one commit containing only the runner and isolated-diff runner test
changes.
## Self-Review
Spec coverage: this plan closes the remaining violation of the design's final
global gate invariant by proving invalid provenance raw paths fail before
squash and by moving provenance validation into the pre-main gate boundary.
Placeholder scan: no placeholder steps remain. Every implementation step names
the exact files, code, commands, and expected results.
Type consistency: the plan uses existing `IngestProvenanceInsert`,
`IngestReportProvenanceDetail`, `MemoryAction`, `EvictionUnit`, `StageIndex`,
`rawPathsForAction()`, and `validateProvenanceRawPaths()` names.

View file

@ -0,0 +1,320 @@
# Adapter-Owned Finalization V1 Closure Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Close the remaining adapter-owned finalization v1 verification gaps so the finalization contract is publicly typed and the historic-SQL local acceptance path passes through `SourceAdapter.finalize()`.
**Architecture:** The production runner already owns finalization execution, commits, target policy, final gates, reports, traces, and provenance. This plan keeps production behavior intact, exports the finalization adapter types through the ingest barrel, and updates the local historic-SQL acceptance fixture to model the real adapter-owned finalization path instead of the removed post-processor path.
**Tech Stack:** TypeScript ESM/NodeNext, Vitest, pnpm workspace commands, existing `SourceAdapter`, `projectHistoricSqlEvidence()`, and package export coverage.
---
## Audit summary
The audit compared
`docs/superpowers/specs/2026-05-18-adapter-owned-ingest-finalization-design.md`
against the implemented source, plan, and targeted tests.
Implemented v1 coverage:
- `SourceAdapter.finalize()` exists with typed context and result objects in
`packages/context/src/ingest/types.ts`.
- `IngestBundleRunnerDeps.postProcessors`, `IngestBundlePostProcessorPort`,
`HistoricSqlProjectionPostProcessor`, `post_processor` trace phases, and
`postProcessor` report fields are absent from production source.
- The runner invokes finalization after reconciliation and before
`wiki_sl_ref_repair`, target-policy checks, final artifact gates,
provenance validation, and squash.
- The runner derives finalization touched paths from the integration-worktree
diff, resolves semantic-layer scope including `_schema/*.yaml`, cross-checks
adapter declarations, commits finalization, records reports/traces, rejects
path overlap, and partitions finalization actions for provenance exclusions.
- Override replay passes explicit `overrideReplay` metadata, omits
`parseArtifacts`, and leaves current-run `workUnitOutcomes` empty.
- Historic SQL implements adapter-owned `finalize()` and uses
`projectHistoricSqlEvidence()` for aggregate projection maintenance.
V1-blocking gaps:
- `packages/context/src/ingest/index.ts` exports `SourceAdapter` and projection
types, but not `DeterministicFinalizationContext`,
`FinalizationOverrideReplay`, or `FinalizationResult`. The adapter contract is
less usable from the public ingest barrel than the spec requires.
- The targeted verification command currently fails because
`HistoricSqlEvidenceTestAdapter` in
`packages/context/src/ingest/local-bundle-ingest.test.ts` lacks
`finalize()`, so `result.report.body.finalization` is `undefined` in the
local historic-SQL projection acceptance test.
Non-blocking gaps:
- Older historical plan documents still mention post-processors. They are
archived implementation history and do not affect runtime behavior.
- The runner has helper-level declaration mismatch coverage, but no dedicated
local-bundle integration test for a finalization declaration mismatch. The
implementation path exists; adding a higher-level regression test can be a
later hardening pass.
- Finalization wiki page deletion could use a future global wiki-reference gate
regression. Historic-SQL v1 finalization updates or archives pages in place,
so this is not required for the current v1 acceptance path.
## File structure
- Modify `packages/context/src/ingest/index.ts`.
Re-export the typed finalization adapter contract next to the existing
projection contract.
- Modify `packages/context/src/package-exports.test.ts`.
Add compile-time coverage proving finalization adapter types are exported
from the ingest barrel.
- Modify `packages/context/src/ingest/local-bundle-ingest.test.ts`.
Make the historic-SQL local acceptance test adapter implement
`finalize()` by delegating to `projectHistoricSqlEvidence()`, and rename the
stale test label from post-processor to finalization.
---
### Task 1: Export finalization adapter contract types
**Files:**
- Modify: `packages/context/src/package-exports.test.ts`
- Modify: `packages/context/src/ingest/index.ts`
- [ ] **Step 1: Write failing type export coverage**
In `packages/context/src/package-exports.test.ts`, add this import after the
existing Vitest import:
```ts
import type {
DeterministicFinalizationContext,
FinalizationOverrideReplay,
FinalizationResult,
} from './ingest/index.js';
```
Then add this constant after `scanTypeExportCoverage`:
```ts
const ingestFinalizationTypeExportCoverage: Partial<{
context: DeterministicFinalizationContext;
overrideReplay: FinalizationOverrideReplay;
result: FinalizationResult;
}> = {};
```
Inside the existing package export test, place this assertion immediately after
`expect(scanTypeExportCoverage).toEqual({});`:
```ts
expect(ingestFinalizationTypeExportCoverage).toEqual({});
```
- [ ] **Step 2: Run type-check to verify the coverage fails**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: FAIL with TypeScript errors like:
```text
Module '"./ingest/index.js"' has no exported member 'DeterministicFinalizationContext'.
Module '"./ingest/index.js"' has no exported member 'FinalizationOverrideReplay'.
Module '"./ingest/index.js"' has no exported member 'FinalizationResult'.
```
- [ ] **Step 3: Export the finalization types**
In `packages/context/src/ingest/index.ts`, update the existing export block
from `./types.js` so the final lines read:
```ts
WorkUnit,
DeterministicProjectionContext,
ProjectionResult,
DeterministicFinalizationContext,
FinalizationOverrideReplay,
FinalizationResult,
} from './types.js';
```
- [ ] **Step 4: Run type-check and package export coverage**
Run:
```bash
pnpm --filter @ktx/context run type-check
pnpm --filter @ktx/context exec vitest run src/package-exports.test.ts
```
Expected: both commands PASS.
- [ ] **Step 5: Commit the type export closure**
Run:
```bash
git add packages/context/src/ingest/index.ts packages/context/src/package-exports.test.ts
git commit -m "feat(ingest): export finalization adapter contract types"
```
### Task 2: Repair the local historic-SQL finalization acceptance fixture
**Files:**
- Modify: `packages/context/src/ingest/local-bundle-ingest.test.ts`
- [ ] **Step 1: Import the projection helper and finalization types**
In `packages/context/src/ingest/local-bundle-ingest.test.ts`, add this import
after the fake adapter import:
```ts
import { projectHistoricSqlEvidence } from './adapters/historic-sql/projection.js';
```
Replace the existing type import from `./types.js` with:
```ts
import type {
ChunkResult,
DeterministicFinalizationContext,
DiffSet,
FinalizationResult,
SourceAdapter,
} from './types.js';
```
- [ ] **Step 2: Add adapter-owned finalization to the test adapter**
In `HistoricSqlEvidenceTestAdapter`, add this method after `chunk()`:
```ts
async finalize(ctx: DeterministicFinalizationContext): Promise<FinalizationResult> {
const projection = await projectHistoricSqlEvidence({
workdir: ctx.workdir,
connectionId: ctx.connectionId,
syncId: ctx.syncId,
runId: ctx.runId,
overrideReplay: ctx.overrideReplay,
});
return {
result: projection,
warnings: projection.warnings,
errors: [],
touchedSources: projection.touchedSources,
changedWikiPageKeys: projection.changedWikiPageKeys,
actions: projection.actions,
};
}
```
- [ ] **Step 3: Rename the stale test label**
Change the test name:
```ts
it('runs historic-SQL evidence projection through the local bundle post-processor', async () => {
```
to:
```ts
it('runs historic-SQL evidence projection through local bundle finalization', async () => {
```
- [ ] **Step 4: Run the focused failing test**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-ingest.test.ts -t "historic-SQL evidence projection"
```
Expected: PASS, and the assertion at
`packages/context/src/ingest/local-bundle-ingest.test.ts:551` receives a
`result.report.body.finalization` object with `status: "success"`.
- [ ] **Step 5: Commit the local acceptance fixture**
Run:
```bash
git add packages/context/src/ingest/local-bundle-ingest.test.ts
git commit -m "test(ingest): exercise historic sql finalization locally"
```
### Task 3: Run final verification
**Files:**
- Verify: `packages/context/src/ingest/finalization-scope.test.ts`
- Verify: `packages/context/src/ingest/ingest-bundle.runner.test.ts`
- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`
- Verify: `packages/context/src/ingest/local-bundle-ingest.test.ts`
- Verify: `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts`
- Verify: workspace TypeScript and dead-code checks
- [ ] **Step 1: Run the adapter-owned finalization targeted suite**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/finalization-scope.test.ts src/ingest/ingest-bundle.runner.test.ts src/ingest/ingest-bundle.runner.isolated-diff.test.ts src/ingest/adapters/historic-sql/projection.test.ts src/ingest/local-bundle-ingest.test.ts src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
```
Expected: PASS with all six test files passing.
- [ ] **Step 2: Run TypeScript validation**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS.
- [ ] **Step 3: Run dead-code validation**
Run:
```bash
pnpm run dead-code
```
Expected: PASS.
- [ ] **Step 4: Inspect final status**
Run:
```bash
git status --short
```
Expected: only the intended committed changes are present, or the worktree is
clean after the two commits.
## Docs impact
No `docs-site/content/docs/` update is required. The remaining v1 work is an
adapter contract type export and test acceptance closure; it does not change
CLI behavior, user configuration, setup flow, connector behavior, or public
documentation examples.
## Self-review
- Spec coverage: The plan covers the remaining adapter API usability gap and
the failing historic-SQL local finalization acceptance path. The main
runner, reports, traces, provenance, override replay, and historic-SQL
production finalization behavior already exist.
- Placeholder scan: The plan contains no placeholder tasks or unspecified
implementation steps.
- Type consistency: `DeterministicFinalizationContext`,
`FinalizationOverrideReplay`, and `FinalizationResult` match the existing
names in `packages/context/src/ingest/types.ts`; the test adapter delegates
to the existing `projectHistoricSqlEvidence()` result shape.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,754 @@
# Isolated Diff Ingestion V1 Default Promotion Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or
> superpowers:executing-plans to implement this plan task-by-task. Steps use
> checkbox (`- [ ]`) syntax for tracking.
**Goal:** Promote isolated-diff WorkUnit execution to the default ingest runner
path while keeping the old shared-worktree branch reachable by an explicit
private fallback setting for the final cleanup rollout.
**Architecture:** The runner stops asking whether a source is on an
isolated-diff allowlist. Instead, non-override bundle ingests use isolated
diffs unless the private settings object lists the source in
`sharedWorktreeSourceKeys`. Local runtime defaults that fallback list to empty,
and tests keep the old path covered with an explicit legacy source setting so
rollout step 11 can delete it safely.
**Tech Stack:** TypeScript ESM/NodeNext, Vitest, pnpm workspace commands,
existing `IngestBundleRunner`, `IngestSettingsPort`, local ingest runtime, and
isolated-diff runner tests.
---
## Audit summary
This audit read the original spec at
`docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md`, all
plans matching
`docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-*.md` and
`docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-*.md`, and the
current ingest runner code under `packages/context/src/ingest/`.
Implemented v1 rollout coverage:
- Rollout steps 1 and 2 are implemented by the core plan: child worktrees,
binary no-rename patch proposals, and `git apply --3way --index`
integration exist.
- Rollout step 3 is implemented by the textual conflict resolver plan:
`textual-conflict-resolver.ts` is wired through `patch-integrator.ts`.
- Rollout steps 4, 5, and 6 are implemented by the gates, provenance,
reference, global wiki, and gate-repair plans: final gates, persistent traces,
failure reports, provenance validation, target policy, and repair counters
exist.
- Rollout step 7 is implemented by the core and follow-up plans: Metabase has
isolated-diff stale-reference regression coverage.
- Rollout step 8 is implemented by
`2026-05-18-isolated-diff-ingestion-v1-connector-migration.md` and the
follow-up commits: Notion, LookML, Looker, dbt, and MetricFlow route through
isolated child worktrees, and MetricFlow projection runs before WorkUnits.
Current v1-blocking gaps:
- Rollout step 10 is not complete. `IngestBundleRunner.isIsolatedDiffEnabled()`
still checks `settings.isolatedDiffSourceKeys`, and
`local-bundle-runtime.ts` still installs the internal allowlist returned by
`defaultIsolatedDiffSourceKeys()`.
- Rollout step 11 remains blocked until step 10 lands. The old
shared-worktree WorkUnit branch is still present and must stay reachable in
this plan for final cleanup validation.
Non-blocking gaps:
- Rollout step 9 deterministic semantic merge helpers remain intentionally
deferred until v1 resolver metrics show frequent mechanical repairs.
- Transitive SQL-projection dependency expansion remains outside v1; current
gates cover direct declared join neighbors.
- Moving provenance into worktree files remains outside v1; the implemented
source of truth is the ingest provenance store and report body.
- Public connector knobs such as `executionMode`, `planningStrategy`, and
`conflictPolicy` remain non-goals and must not be added.
- Richer resolver context, such as full transcript excerpts for every
overlapping patch, can be evaluated after the default path has production
traces.
## File structure
- Modify `packages/context/src/ingest/isolated-diff/source-routing.ts`.
Replace the isolated-diff direct-write allowlist with an empty default
shared-worktree fallback list.
- Modify `packages/context/src/ingest/isolated-diff/source-routing.test.ts`.
Lock the fallback list semantics and remove direct-write allowlist
assertions.
- Modify `packages/context/src/ingest/ports.ts`.
Replace `isolatedDiffSourceKeys?: string[]` with
`sharedWorktreeSourceKeys?: string[]` on the private runner settings port.
- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`.
Make isolated diff the default for non-override runs and route to the old
shared branch only when `sharedWorktreeSourceKeys` contains the source.
- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`.
Prove an unlisted source uses isolated diffs by default and prove an
explicit fallback source can still reach the shared-worktree branch.
- Modify `packages/context/src/ingest/local-bundle-runtime.ts`.
Install the new empty fallback list instead of the old isolated-diff
allowlist.
- Modify `packages/context/src/ingest/local-bundle-runtime.test.ts`.
Assert local runtime settings do not expose `isolatedDiffSourceKeys` and do
default `sharedWorktreeSourceKeys` to `[]`.
---
### Task 1: Replace source routing semantics
**Files:**
- Modify: `packages/context/src/ingest/isolated-diff/source-routing.test.ts`
- Modify: `packages/context/src/ingest/isolated-diff/source-routing.ts`
- Modify: `packages/context/src/ingest/ports.ts`
- [ ] **Step 1: Write the failing source-routing tests**
Replace `packages/context/src/ingest/isolated-diff/source-routing.test.ts` with:
```ts
import { describe, expect, it } from 'vitest';
import { defaultSharedWorktreeSourceKeys, isSharedWorktreeFallbackSourceKey } from './source-routing.js';
describe('isolated-diff source routing', () => {
it('defaults every non-override source to isolated diffs', () => {
expect(defaultSharedWorktreeSourceKeys()).toEqual([]);
});
it('returns a mutable copy for runtime settings', () => {
const keys = defaultSharedWorktreeSourceKeys();
keys.push('legacy-source');
expect(defaultSharedWorktreeSourceKeys()).toEqual([]);
});
it('recognizes only explicitly configured shared-worktree fallback sources', () => {
expect(isSharedWorktreeFallbackSourceKey('notion', [])).toBe(false);
expect(isSharedWorktreeFallbackSourceKey('metricflow', [])).toBe(false);
expect(isSharedWorktreeFallbackSourceKey('legacy-source', ['legacy-source'])).toBe(true);
expect(isSharedWorktreeFallbackSourceKey('other-source', ['legacy-source'])).toBe(false);
});
});
```
- [ ] **Step 2: Run the source-routing tests to verify they fail**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/source-routing.test.ts
```
Expected: FAIL because `defaultSharedWorktreeSourceKeys()` and
`isSharedWorktreeFallbackSourceKey()` are not exported yet.
- [ ] **Step 3: Rewrite the routing helper**
Replace `packages/context/src/ingest/isolated-diff/source-routing.ts` with:
```ts
const DEFAULT_SHARED_WORKTREE_SOURCE_KEYS: readonly string[] = [];
export function defaultSharedWorktreeSourceKeys(): string[] {
return [...DEFAULT_SHARED_WORKTREE_SOURCE_KEYS];
}
export function isSharedWorktreeFallbackSourceKey(
sourceKey: string,
sharedWorktreeSourceKeys: readonly string[] = DEFAULT_SHARED_WORKTREE_SOURCE_KEYS,
): boolean {
return sharedWorktreeSourceKeys.includes(sourceKey);
}
```
- [ ] **Step 4: Rename the private settings field**
In `packages/context/src/ingest/ports.ts`, replace the
`IngestSettingsPort` interface with:
```ts
export interface IngestSettingsPort {
memoryIngestionModel: string;
probeRowCount: number;
workUnitMaxConcurrency?: number;
workUnitStepBudget?: number;
workUnitFailureMode?: 'abort' | 'continue';
sharedWorktreeSourceKeys?: string[];
ingestTraceLevel?: IngestTraceLevel;
}
```
- [ ] **Step 5: Run the source-routing tests again**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/source-routing.test.ts
```
Expected: PASS.
- [ ] **Step 6: Commit routing semantics**
Run:
```bash
git add packages/context/src/ingest/isolated-diff/source-routing.ts \
packages/context/src/ingest/isolated-diff/source-routing.test.ts \
packages/context/src/ingest/ports.ts
git commit -m "feat(ingest): make isolated diff routing the private default"
```
### Task 2: Promote the runner default
**Files:**
- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts`
- [ ] **Step 1: Update the isolated runner test imports and harness**
In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`,
replace the source-routing import with:
```ts
import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js';
```
Then change the `makeDeps()` signature and `settings` block to:
```ts
function makeDeps(
runtime: Awaited<ReturnType<typeof makeRealGitRuntime>>,
sourceKey = 'metabase',
settings: Partial<IngestBundleRunnerDeps['settings']> = {},
) {
```
```ts
settings: {
memoryIngestionModel: 'test',
probeRowCount: 1,
sharedWorktreeSourceKeys: defaultSharedWorktreeSourceKeys(),
ingestTraceLevel: 'trace',
...settings,
},
```
- [ ] **Step 2: Add the default-promotion regression tests**
Insert these tests inside
`describe('IngestBundleRunner isolated diff path', ...)`, before the existing
non-Metabase routing matrix:
```ts
it('routes an unlisted direct-writing source through isolated diffs by default', async () => {
const runtime = await makeRealGitRuntime();
try {
const sourceKey = 'custom-direct-source';
const { deps, adapter } = makeDeps(runtime, sourceKey);
adapter.chunk.mockResolvedValue({
workUnits: [
{
unitKey: 'custom-wiki',
rawFiles: ['custom/page.json'],
peerFileIndex: [],
dependencyPaths: [],
},
],
});
let currentSession: any = null;
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
currentSession = toolSession;
return { toRuntimeTools: vi.fn(() => ({})) };
});
deps.agentRunner.runLoop = vi.fn(async (params: any) => {
if (params.telemetryTags.operationName !== 'ingest-bundle-wu') {
return { stopReason: 'natural' };
}
const root = rootOfConfig(currentSession.configService, runtime.configDir);
await mkdir(join(root, 'wiki/global'), { recursive: true });
await writeFile(
join(root, 'wiki/global/custom-isolated.md'),
'---\nsummary: Custom isolated write\nusage_mode: auto\n---\n\nCustom isolated write.\n',
'utf-8',
);
currentSession.actions.push({
target: 'wiki',
type: 'created',
key: 'custom-isolated',
detail: 'Custom isolated write',
rawPaths: ['custom/page.json'],
});
await currentSession.gitService.commitFiles(
['wiki/global/custom-isolated.md'],
'custom wiki',
'KTX Test',
'system@ktx.local',
);
return { stopReason: 'natural' };
}) as never;
const runner = new IngestBundleRunner(deps);
await mockStageRawFiles(runner, runtime, [['custom/page.json', 'h1']], sourceKey);
await expect(
runner.run({
jobId: 'job-custom-default',
connectionId: 'warehouse',
sourceKey,
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload' },
}),
).resolves.toMatchObject({
jobId: 'job-custom-default',
failedWorkUnits: [],
workUnitCount: 1,
});
const trace = await readFile(
join(runtime.configDir, '.ktx/ingest-traces/job-custom-default/trace.jsonl'),
'utf-8',
);
expect(trace).toContain('isolated_diff_enabled');
expect(trace).toContain('work_unit_child_created');
expect(trace).not.toContain('shared_worktree_path_enabled');
const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0];
const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined;
expect(reportBody?.isolatedDiff).toMatchObject({
enabled: true,
acceptedPatches: 1,
});
} finally {
await rm(runtime.homeDir, { recursive: true, force: true });
}
});
it('keeps the shared-worktree path reachable through explicit private fallback settings', async () => {
const runtime = await makeRealGitRuntime();
try {
const sourceKey = 'legacy-source';
const { deps, adapter } = makeDeps(runtime, sourceKey, {
sharedWorktreeSourceKeys: ['legacy-source'],
});
adapter.chunk.mockResolvedValue({
workUnits: [
{
unitKey: 'legacy-wiki',
rawFiles: ['legacy/page.json'],
peerFileIndex: [],
dependencyPaths: [],
},
],
});
let currentSession: any = null;
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
currentSession = toolSession;
return { toRuntimeTools: vi.fn(() => ({})) };
});
deps.agentRunner.runLoop = vi.fn(async (params: any) => {
if (params.telemetryTags.operationName !== 'ingest-bundle-wu') {
return { stopReason: 'natural' };
}
const root = rootOfConfig(currentSession.configService, runtime.configDir);
await mkdir(join(root, 'wiki/global'), { recursive: true });
await writeFile(
join(root, 'wiki/global/legacy-shared.md'),
'---\nsummary: Legacy shared write\nusage_mode: auto\n---\n\nLegacy shared write.\n',
'utf-8',
);
currentSession.actions.push({
target: 'wiki',
type: 'created',
key: 'legacy-shared',
detail: 'Legacy shared write',
rawPaths: ['legacy/page.json'],
});
await currentSession.gitService.commitFiles(
['wiki/global/legacy-shared.md'],
'legacy wiki',
'KTX Test',
'system@ktx.local',
);
return { stopReason: 'natural' };
}) as never;
const runner = new IngestBundleRunner(deps);
await mockStageRawFiles(runner, runtime, [['legacy/page.json', 'h1']], sourceKey);
await expect(
runner.run({
jobId: 'job-legacy-shared',
connectionId: 'warehouse',
sourceKey,
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload' },
}),
).resolves.toMatchObject({
jobId: 'job-legacy-shared',
failedWorkUnits: [],
workUnitCount: 1,
});
const trace = await readFile(
join(runtime.configDir, '.ktx/ingest-traces/job-legacy-shared/trace.jsonl'),
'utf-8',
);
expect(trace).toContain('shared_worktree_path_enabled');
expect(trace).not.toContain('work_unit_child_created');
const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0];
const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined;
expect(reportBody?.isolatedDiff).toMatchObject({
enabled: false,
});
} finally {
await rm(runtime.homeDir, { recursive: true, force: true });
}
});
```
- [ ] **Step 3: Run the new runner tests to verify the default test fails**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unlisted direct-writing source|shared-worktree path reachable"
```
Expected: FAIL. The unlisted source still enters the old shared-worktree path
because the runner checks `isolatedDiffSourceKeys`.
- [ ] **Step 4: Change the runner routing decision**
In `packages/context/src/ingest/ingest-bundle.runner.ts`, replace
`isIsolatedDiffEnabled()` with:
```ts
private isSharedWorktreeFallbackEnabled(sourceKey: string): boolean {
return (this.deps.settings.sharedWorktreeSourceKeys ?? []).includes(sourceKey);
}
```
Then replace the isolated-diff routing line with:
```ts
const isolatedDiffEnabled = !overrideReport && !this.isSharedWorktreeFallbackEnabled(job.sourceKey);
```
Finally, replace the shared-path trace event with:
```ts
await runTrace.event('info', 'routing', 'shared_worktree_path_enabled', {
sourceKey: job.sourceKey,
reason: 'explicit_private_fallback',
});
```
- [ ] **Step 5: Run the new runner tests again**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unlisted direct-writing source|shared-worktree path reachable"
```
Expected: PASS.
- [ ] **Step 6: Commit runner default promotion**
Run:
```bash
git add packages/context/src/ingest/ingest-bundle.runner.ts \
packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts
git commit -m "feat(ingest): promote isolated diff to default runner path"
```
### Task 3: Update local runtime defaults
**Files:**
- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts`
- Modify: `packages/context/src/ingest/local-bundle-runtime.ts`
- [ ] **Step 1: Update the local runtime settings test type**
In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace
`RuntimeWithSettingsDeps` with:
```ts
type RuntimeWithSettingsDeps = {
deps: {
settings: {
sharedWorktreeSourceKeys?: string[];
isolatedDiffSourceKeys?: string[];
};
};
};
```
- [ ] **Step 2: Replace the local runtime settings assertion**
Replace the test named
`enables isolated-diff routing for direct durable-write connectors` with:
```ts
it('defaults local bundle ingest to isolated diffs without an allowlist', () => {
const runtime = createLocalBundleIngestRuntime({
project,
adapters: [new FakeSourceAdapter()],
agentRunner: testAgentRunner(),
});
const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings;
expect(settings.sharedWorktreeSourceKeys).toEqual([]);
expect('isolatedDiffSourceKeys' in settings).toBe(false);
});
```
- [ ] **Step 3: Run the local runtime settings test to verify it fails**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "defaults local bundle ingest"
```
Expected: FAIL because `local-bundle-runtime.ts` still sets
`isolatedDiffSourceKeys`.
- [ ] **Step 4: Update local runtime imports and settings**
In `packages/context/src/ingest/local-bundle-runtime.ts`, replace the
source-routing import with:
```ts
import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js';
```
Then replace the settings field:
```ts
isolatedDiffSourceKeys: defaultIsolatedDiffSourceKeys(),
```
with:
```ts
sharedWorktreeSourceKeys: defaultSharedWorktreeSourceKeys(),
```
- [ ] **Step 5: Run the local runtime settings test again**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "defaults local bundle ingest"
```
Expected: PASS.
- [ ] **Step 6: Commit local runtime defaults**
Run:
```bash
git add packages/context/src/ingest/local-bundle-runtime.ts \
packages/context/src/ingest/local-bundle-runtime.test.ts
git commit -m "feat(ingest): default local ingest to isolated diffs"
```
### Task 4: Remove stale allowlist references
**Files:**
- Verify: `packages/context/src/ingest/isolated-diff/source-routing.ts`
- Verify: `packages/context/src/ingest/local-bundle-runtime.ts`
- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts`
- Verify: `packages/context/src/ingest/ports.ts`
- Verify: `packages/context/src/ingest/**/*.test.ts`
- [ ] **Step 1: Search for old allowlist names**
Run:
```bash
rg -n "isolatedDiffSourceKeys|defaultIsolatedDiffSourceKeys|ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEYS|isIsolatedDiffDirectWriteSourceKey" packages/context/src
```
Expected: no matches.
- [ ] **Step 2: Search for the new fallback setting**
Run:
```bash
rg -n "sharedWorktreeSourceKeys|defaultSharedWorktreeSourceKeys|isSharedWorktreeFallbackSourceKey" packages/context/src
```
Expected: matches only in these files:
```text
packages/context/src/ingest/ports.ts
packages/context/src/ingest/ingest-bundle.runner.ts
packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts
packages/context/src/ingest/isolated-diff/source-routing.ts
packages/context/src/ingest/isolated-diff/source-routing.test.ts
packages/context/src/ingest/local-bundle-runtime.ts
packages/context/src/ingest/local-bundle-runtime.test.ts
```
- [ ] **Step 3: Run a focused no-allowlist regression suite**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/isolated-diff/source-routing.test.ts \
src/ingest/local-bundle-runtime.test.ts \
src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
-t "source routing|defaults local bundle ingest|unlisted direct-writing source|shared-worktree path reachable|routes notion|routes lookml|routes looker|routes dbt|routes metricflow"
```
Expected: PASS.
- [ ] **Step 4: Commit stale-reference cleanup if needed**
If Step 1 or Step 2 required any edits, run:
```bash
git add packages/context/src/ingest
git commit -m "chore(ingest): remove isolated diff allowlist references"
```
If no files changed, record that no cleanup commit was needed in the execution
notes for this task.
### Task 5: Final verification
**Files:**
- Verify: `packages/context/src/ingest/isolated-diff/source-routing.ts`
- Verify: `packages/context/src/ingest/isolated-diff/source-routing.test.ts`
- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts`
- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
- Verify: `packages/context/src/ingest/local-bundle-runtime.ts`
- Verify: `packages/context/src/ingest/local-bundle-runtime.test.ts`
- Verify: `packages/context/src/ingest/ports.ts`
- Verify: `docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-default-promotion.md`
- [ ] **Step 1: Run the full isolated-diff focused suite**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/ingest-trace.test.ts \
src/ingest/wiki-body-refs.test.ts \
src/ingest/artifact-gates.test.ts \
src/ingest/semantic-layer-target-policy.test.ts \
src/ingest/isolated-diff/source-routing.test.ts \
src/ingest/isolated-diff/git-patch.test.ts \
src/ingest/isolated-diff/work-unit-executor.test.ts \
src/ingest/isolated-diff/patch-integrator.test.ts \
src/ingest/isolated-diff/textual-conflict-resolver.test.ts \
src/ingest/final-gate-repair.test.ts \
src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
src/ingest/report-snapshot.test.ts \
src/ingest/local-bundle-runtime.test.ts
```
Expected: PASS.
- [ ] **Step 2: Run the MetricFlow local ingest regression**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-ingest.test.ts -t "runs full MetricFlow local ingest"
```
Expected: PASS. The report body includes `isolatedDiff.enabled: true`,
`acceptedPatches: 0`, and a string `projectionSha`.
- [ ] **Step 3: Run package type-check**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS.
- [ ] **Step 4: Run package tests**
Run:
```bash
pnpm --filter @ktx/context run test
```
Expected: PASS.
- [ ] **Step 5: Run TypeScript dead-code checks**
Run:
```bash
pnpm run dead-code
```
Expected: PASS, or only pre-existing findings unrelated to the files changed
by this plan. Investigate any finding that names `source-routing.ts`,
`ports.ts`, `local-bundle-runtime.ts`, or `ingest-bundle.runner.ts`.
- [ ] **Step 6: Decide whether docs-site needs an update**
No `docs-site/content/docs/` change is expected for this plan because the
change is an internal runner rollout switch and does not add or remove public
CLI commands, flags, config fields, connector setup steps, or user-facing
documentation concepts.
- [ ] **Step 7: Commit final verification notes**
Run:
```bash
git status --short
git add docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-default-promotion.md
git commit -m "docs: add isolated diff default promotion plan"
```
Only include the plan file in this commit if all implementation commits have
already captured their code changes.
## Completion criteria
This plan is complete when:
- `packages/context/src/ingest/ports.ts` has
`sharedWorktreeSourceKeys?: string[]` and no `isolatedDiffSourceKeys` field.
- `IngestBundleRunner` uses isolated diffs for every non-override source unless
`sharedWorktreeSourceKeys` explicitly contains that source.
- The trace for a default-routed source contains `isolated_diff_enabled` and
not `shared_worktree_path_enabled`.
- The trace for an explicitly fallback-routed source contains
`shared_worktree_path_enabled` and not `work_unit_child_created`.
- Local runtime settings default `sharedWorktreeSourceKeys` to `[]`.
- No production or test code under `packages/context/src` references the old
isolated-diff allowlist names.
- The focused isolated-diff suite, MetricFlow local ingest regression,
`@ktx/context` type-check, `@ktx/context` tests, and dead-code checks pass.
## Next rollout step
After this plan is implemented and verified, the only remaining v1-blocking
rollout item from the spec is step 11: remove the old shared-worktree WorkUnit
execution path and delete the private `sharedWorktreeSourceKeys` fallback
setting.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,980 @@
# Isolated Diff Ingestion V1 Shared Worktree Removal Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use
> superpowers:subagent-driven-development (recommended) or
> superpowers:executing-plans to implement this plan task-by-task. Steps use
> checkbox (`- [ ]`) syntax for tracking.
**Goal:** Remove the old shared-worktree WorkUnit execution path so every
non-override bundle ingest uses isolated WorkUnit diffs.
**Architecture:** Keep `IngestBundleRunner` with one non-override execution
path: raw snapshot, optional deterministic projection, child WorkUnit
worktrees, patch integration, reconciliation, final gates, provenance
validation, and squash. Delete the private fallback routing setting and all
legacy tests, traces, and agent instructions that existed only for shared
WorkUnit state.
**Tech Stack:** TypeScript, Vitest, pnpm, KTX ingest runner, Git worktrees.
---
## Audit summary
This audit read the original design in
`docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md`, every
implemented plan matching
`docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-*.md` and
`docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-*.md`, and the
current implementation under `packages/context/src/ingest/`,
`packages/context/prompts/`, and `packages/context/skills/`.
Implemented v1 rollout coverage:
- Rollout steps 1 and 2 exist in code: isolated child worktrees, binary
no-rename patch collection, and `git apply --3way --index` patch integration.
- Rollout step 3 exists in code:
`packages/context/src/ingest/isolated-diff/textual-conflict-resolver.ts` is
wired through the patch integrator and runner.
- Rollout steps 4, 5, and 6 exist in code: final wiki and semantic-layer gates,
provenance validation before squash, target policy checks, bounded gate
repair, failed reports, and trace counters.
- Rollout step 7 exists in code: the Metabase stale body-reference regression
is covered in `ingest-bundle.runner.isolated-diff.test.ts`.
- Rollout step 8 is committed: Notion, LookML, Looker, dbt, and MetricFlow
route through isolated child worktrees, and MetricFlow projection runs before
WorkUnits.
- Rollout step 10 is committed: non-override ingests default to isolated diffs,
and the old branch is reachable only through the private
`sharedWorktreeSourceKeys` fallback setting.
## Remaining gaps
The remaining v1-blocking gaps are all part of rollout step 11:
- `packages/context/src/ingest/ports.ts` still exposes the private
`sharedWorktreeSourceKeys?: string[]` setting.
- `packages/context/src/ingest/isolated-diff/source-routing.ts` and its test
exist only to support the fallback setting.
- `packages/context/src/ingest/local-bundle-runtime.ts` still installs
`sharedWorktreeSourceKeys: []`.
- `packages/context/src/ingest/ingest-bundle.runner.ts` still checks
`isSharedWorktreeFallbackEnabled()` and contains the
`shared_worktree_path_enabled` branch that runs WorkUnits against the mutable
integration worktree.
- `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
still has a regression proving the shared-worktree fallback is reachable.
- `packages/context/src/ingest/ingest-bundle.runner.test.ts` keeps broad runner
tests on the legacy path through `sharedWorktreeSourceKeys`; those tests must
either use the isolated mock harness or move coverage into the real-git
isolated suite.
- `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md` and
`packages/context/skills/ingest_triage/SKILL.md` still tell WorkUnit agents
that prior WorkUnit writes in the same job are visible in the current working
branch. That instruction is false after isolated diffs and must be removed
with the shared path.
Non-blocking gaps after this plan:
- Rollout step 9 deterministic semantic merge helpers remain intentionally
deferred until resolver metrics show frequent mechanical repairs.
- Semantic-layer dependency expansion remains direct declared joins only; the
spec explicitly defers transitive SQL-projection closure.
- Provenance remains in the ingest provenance store and report body; moving it
to worktree files is a separate schema migration.
- Resolver context can later include richer transcript excerpts and explicit
overlap summaries for every previously applied patch.
- Failures before an ingest run row exists still have deterministic trace files
but no stored ingest report.
## File structure
- Modify `packages/context/src/ingest/ports.ts`. Remove the private fallback
setting from `IngestSettingsPort`.
- Modify `packages/context/src/ingest/local-bundle-runtime.ts`. Stop importing
and installing default shared-worktree fallback settings.
- Delete `packages/context/src/ingest/isolated-diff/source-routing.ts`. This
helper has no responsibility once fallback routing is removed.
- Delete `packages/context/src/ingest/isolated-diff/source-routing.test.ts`.
Its assertions exist only for the fallback helper.
- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. Delete
`isSharedWorktreeFallbackEnabled()`, the old shared-worktree WorkUnit branch,
and helper methods that only served that branch.
- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`.
Remove fallback reachability coverage and add a stale-setting regression that
proves a runtime object cannot opt out of isolated diffs.
- Modify `packages/context/src/ingest/ingest-bundle.runner.test.ts`. Remove
the fallback setting from the broad test harness and make its mocked Git
session support no-op isolated patch collection.
- Modify `packages/context/src/ingest/local-bundle-runtime.test.ts`. Assert
local runtime settings do not contain the fallback key.
- Modify `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`.
Replace shared-branch WorkUnit visibility instructions with isolated-diff
instructions.
- Modify `packages/context/skills/ingest_triage/SKILL.md`. Remove Stage 3
prior-WorkUnit visibility language and keep cross-WorkUnit sweep guidance in
Stage 4 reconciliation.
---
### Task 1: Add removal-contract regressions
**Files:**
- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts`
- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
- [ ] **Step 1: Update the local runtime settings type**
In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace
`RuntimeWithSettingsDeps` with:
```ts
type RuntimeWithSettingsDeps = {
deps: {
settings: Record<string, unknown>;
};
};
```
- [ ] **Step 2: Replace the local runtime fallback-setting assertion**
In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace the test
named `defaults local bundle ingest to isolated diffs without an allowlist` with:
```ts
it('defaults local bundle ingest to isolated diffs without a shared-worktree fallback setting', () => {
const runtime = createLocalBundleIngestRuntime({
project,
adapters: [new FakeSourceAdapter()],
agentRunner: testAgentRunner(),
});
const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings;
expect(settings).not.toHaveProperty('sharedWorktreeSourceKeys');
expect(Object.keys(settings).sort()).toEqual([
'ingestTraceLevel',
'memoryIngestionModel',
'probeRowCount',
'workUnitFailureMode',
'workUnitMaxConcurrency',
'workUnitStepBudget',
]);
});
```
- [ ] **Step 3: Remove the source-routing import from the isolated runner test**
In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`,
delete this import:
```ts
import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js';
```
Then remove the `sharedWorktreeSourceKeys` line from the `settings` object in
`makeDeps()`:
```ts
settings: {
memoryIngestionModel: 'test',
probeRowCount: 1,
ingestTraceLevel: 'trace',
...settings,
},
```
- [ ] **Step 4: Replace the shared fallback reachability test**
In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`,
replace the test named
`keeps the shared-worktree path reachable through explicit private fallback settings`
with this stale-setting regression:
```ts
it('does not support shared-worktree fallback settings', async () => {
const runtime = await makeRealGitRuntime();
try {
const sourceKey = 'legacy-source';
const staleSettings = {
sharedWorktreeSourceKeys: ['legacy-source'],
} as Partial<IngestBundleRunnerDeps['settings']> & Record<string, unknown>;
const { deps, adapter } = makeDeps(runtime, sourceKey, staleSettings);
adapter.chunk.mockResolvedValue({
workUnits: [
{
unitKey: 'legacy-wiki',
rawFiles: ['legacy/page.json'],
peerFileIndex: [],
dependencyPaths: [],
},
],
});
let currentSession: any = null;
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
currentSession = toolSession;
return { toRuntimeTools: vi.fn(() => ({})) };
});
deps.agentRunner.runLoop = vi.fn(async (params: any) => {
if (params.telemetryTags.operationName !== 'ingest-bundle-wu') {
return { stopReason: 'natural' };
}
const root = rootOfConfig(currentSession.configService, runtime.configDir);
await mkdir(join(root, 'wiki/global'), { recursive: true });
await writeFile(
join(root, 'wiki/global/legacy-isolated.md'),
'---\nsummary: Legacy isolated write\nusage_mode: auto\n---\n\nLegacy isolated write.\n',
'utf-8',
);
currentSession.actions.push({
target: 'wiki',
type: 'created',
key: 'legacy-isolated',
detail: 'Legacy isolated write',
rawPaths: ['legacy/page.json'],
});
await currentSession.gitService.commitFiles(
['wiki/global/legacy-isolated.md'],
'legacy isolated wiki',
'KTX Test',
'system@ktx.local',
);
return { stopReason: 'natural' };
}) as never;
const runner = new IngestBundleRunner(deps);
await mockStageRawFiles(runner, runtime, [['legacy/page.json', 'h1']], sourceKey);
await expect(
runner.run({
jobId: 'job-legacy-isolated',
connectionId: 'warehouse',
sourceKey,
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload' },
}),
).resolves.toMatchObject({
jobId: 'job-legacy-isolated',
failedWorkUnits: [],
workUnitCount: 1,
});
const trace = await readFile(
join(runtime.configDir, '.ktx/ingest-traces/job-legacy-isolated/trace.jsonl'),
'utf-8',
);
expect(trace).toContain('isolated_diff_enabled');
expect(trace).toContain('work_unit_child_created');
expect(trace).not.toContain('shared_worktree_path_enabled');
const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0];
const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined;
expect(reportBody?.isolatedDiff).toMatchObject({
enabled: true,
acceptedPatches: 1,
});
} finally {
await rm(runtime.homeDir, { recursive: true, force: true });
}
});
```
- [ ] **Step 5: Run the removal regressions and confirm they fail**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/local-bundle-runtime.test.ts \
src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
-t "shared-worktree fallback|stale|defaults local bundle ingest|unlisted direct-writing source"
```
Expected: FAIL. The local runtime still exposes `sharedWorktreeSourceKeys`, and
the stale-setting runner test still reaches `shared_worktree_path_enabled`.
---
### Task 2: Remove the fallback setting and routing module
**Files:**
- Modify: `packages/context/src/ingest/ports.ts`
- Modify: `packages/context/src/ingest/local-bundle-runtime.ts`
- Delete: `packages/context/src/ingest/isolated-diff/source-routing.ts`
- Delete: `packages/context/src/ingest/isolated-diff/source-routing.test.ts`
- [ ] **Step 1: Remove the fallback setting from the runner settings port**
In `packages/context/src/ingest/ports.ts`, replace `IngestSettingsPort` with:
```ts
export interface IngestSettingsPort {
memoryIngestionModel: string;
probeRowCount: number;
workUnitMaxConcurrency?: number;
workUnitStepBudget?: number;
workUnitFailureMode?: 'abort' | 'continue';
ingestTraceLevel?: IngestTraceLevel;
}
```
- [ ] **Step 2: Remove the local runtime source-routing import**
In `packages/context/src/ingest/local-bundle-runtime.ts`, delete this import:
```ts
import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js';
```
- [ ] **Step 3: Remove the local runtime fallback setting**
In `packages/context/src/ingest/local-bundle-runtime.ts`, replace the settings
object with:
```ts
settings: {
memoryIngestionModel: options.project.config.llm.models.default ?? 'local-ingest-model',
probeRowCount: 0,
workUnitMaxConcurrency: options.project.config.ingest.workUnits.maxConcurrency,
workUnitStepBudget: options.project.config.ingest.workUnits.stepBudget,
workUnitFailureMode: options.project.config.ingest.workUnits.failureMode,
ingestTraceLevel: ingestTraceLevelFromEnv(),
},
```
- [ ] **Step 4: Delete the fallback routing helper files**
Delete:
```bash
git rm packages/context/src/ingest/isolated-diff/source-routing.ts
git rm packages/context/src/ingest/isolated-diff/source-routing.test.ts
```
- [ ] **Step 5: Confirm no fallback helper imports remain**
Run:
```bash
rg -n "defaultSharedWorktreeSourceKeys|isSharedWorktreeFallbackSourceKey|source-routing" packages/context/src
```
Expected: FAIL with no matches. `rg` exits with status 1 when the cleanup is
complete.
---
### Task 3: Delete the shared-worktree runner branch
**Files:**
- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts`
- [ ] **Step 1: Remove helper methods used only by the shared branch**
In `packages/context/src/ingest/ingest-bundle.runner.ts`, delete these private
methods:
```ts
private buildFailedWorkUnitOutcome(wu: WorkUnit, error: unknown): WorkUnitOutcome {
return {
unitKey: wu.unitKey,
status: 'failed',
reason: error instanceof Error ? error.message : String(error),
preSha: '',
postSha: '',
actions: [],
touchedSlSources: [],
slDisallowed: wu.slDisallowed,
slDisallowedReason: wu.slDisallowedReason,
};
}
private formatWorkUnitFailure(outcome: WorkUnitOutcome): string {
return `WorkUnit ${outcome.unitKey} failed: ${outcome.reason ?? 'unknown failure'}`;
}
private isSharedWorktreeFallbackEnabled(sourceKey: string): boolean {
return (this.deps.settings.sharedWorktreeSourceKeys ?? []).includes(sourceKey);
}
```
- [ ] **Step 2: Make non-override isolated routing unconditional**
In `packages/context/src/ingest/ingest-bundle.runner.ts`, replace:
```ts
const isolatedDiffEnabled = !overrideReport && !this.isSharedWorktreeFallbackEnabled(job.sourceKey);
```
with:
```ts
const isolatedDiffEnabled = !overrideReport;
```
Then replace:
```ts
if (!overrideReport && isolatedDiffEnabled) {
```
with:
```ts
if (!overrideReport) {
```
- [ ] **Step 3: Delete the old shared-worktree branch**
In `packages/context/src/ingest/ingest-bundle.runner.ts`, delete the whole
branch that starts with:
```ts
} else if (!overrideReport) {
await runTrace.event('info', 'routing', 'shared_worktree_path_enabled', {
sourceKey: job.sourceKey,
reason: 'explicit_private_fallback',
});
```
and ends with:
```ts
latestReportWorkUnits = this.toReportWorkUnits(stageIndex);
}
```
After the deletion, the surrounding code must read:
```ts
}
}
const carryForwardResult =
contextReport && this.deps.contextCandidateCarryforward
? await this.deps.contextCandidateCarryforward.carryForward({
runId: runRow.id,
connectionId: job.connectionId,
sourceKey: job.sourceKey,
})
: null;
```
- [ ] **Step 4: Confirm the branch trace event is gone**
Run:
```bash
rg -n "shared_worktree_path_enabled|explicit_private_fallback|isSharedWorktreeFallbackEnabled|sharedWorktreeSourceKeys" packages/context/src/ingest/ingest-bundle.runner.ts
```
Expected: FAIL with no matches.
---
### Task 4: Update runner tests for isolated-only execution
**Files:**
- Modify: `packages/context/src/ingest/ingest-bundle.runner.test.ts`
- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
- [ ] **Step 1: Remove the fallback setting from the broad runner test harness**
In `packages/context/src/ingest/ingest-bundle.runner.test.ts`, replace the
`settings` block in `buildRunner()` with:
```ts
settings: {
probeRowCount: 1,
memoryIngestionModel: 'test-model',
},
```
- [ ] **Step 2: Add no-op isolated patch support to the broad mock Git**
In `packages/context/src/ingest/ingest-bundle.runner.test.ts`, replace the
`scopedGit` object in `makeDeps()` with:
```ts
const scopedGit = {
revParseHead: vi.fn().mockResolvedValue('h'),
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
resetHardTo: vi.fn(),
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
await writeFile(patchPath, '', 'utf-8');
}),
applyPatchFile3WayIndex: vi.fn(),
diffNameStatus: vi.fn().mockResolvedValue([]),
};
```
- [ ] **Step 3: Update the custom sequencer test Git mock**
In the test named
`refuses to squash-merge when the session worktree has an in-progress sequencer op`,
replace the `sessionGit` object with:
```ts
const sessionGit = {
revParseHead: vi.fn().mockResolvedValue('h'),
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
resetHardTo: vi.fn(),
assertWorktreeClean: vi.fn().mockRejectedValue(assertError),
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
await writeFile(patchPath, '', 'utf-8');
}),
applyPatchFile3WayIndex: vi.fn(),
diffNameStatus: vi.fn().mockResolvedValue([]),
};
```
- [ ] **Step 4: Move the failed-WorkUnit integration regression to the isolated suite**
In `packages/context/src/ingest/ingest-bundle.runner.test.ts`, delete the test
named `squash-merges only successful WUs into main when one WU fails sl_validate`.
In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`,
add this test near the other real-git isolated runner regressions:
```ts
it('does not integrate failed isolated WorkUnit patches', async () => {
const runtime = await makeRealGitRuntime();
try {
const { deps, adapter } = makeDeps(runtime, 'fake');
adapter.chunk.mockResolvedValue({
workUnits: [
{ unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] },
{ unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] },
],
});
deps.diffSetService.compute = vi.fn().mockResolvedValue({
added: ['good.raw', 'bad.raw'],
modified: [],
deleted: [],
unchanged: [],
});
deps.slValidator.validateSingleSource = vi.fn(
async (_validationDeps: unknown, _connectionId: string, sourceName: string) => ({
errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [],
warnings: [],
}),
) as never;
let currentSession: any = null;
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
currentSession = toolSession;
return { toRuntimeTools: vi.fn(() => ({})) };
});
deps.agentRunner.runLoop = vi.fn(async (params: any) => {
if (params.telemetryTags.operationName !== 'ingest-bundle-wu') {
return { stopReason: 'natural' };
}
const unitKey = params.telemetryTags.unitKey;
const root = rootOfConfig(currentSession.configService, runtime.configDir);
await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true });
if (unitKey === 'wu-good') {
await writeFile(join(root, 'semantic-layer/warehouse/good.yaml'), 'name: good\n', 'utf-8');
addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'good');
currentSession.actions.push({
target: 'sl',
type: 'created',
key: 'good',
detail: 'good source',
targetConnectionId: 'warehouse',
rawPaths: ['good.raw'],
});
await currentSession.gitService.commitFiles(
['semantic-layer/warehouse/good.yaml'],
'test: add good source',
'KTX Test',
'system@ktx.local',
);
}
if (unitKey === 'wu-bad') {
await writeFile(join(root, 'semantic-layer/warehouse/bad.yaml'), 'name: bad\n', 'utf-8');
addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'bad');
currentSession.actions.push({
target: 'sl',
type: 'created',
key: 'bad',
detail: 'bad source',
targetConnectionId: 'warehouse',
rawPaths: ['bad.raw'],
});
await currentSession.gitService.commitFiles(
['semantic-layer/warehouse/bad.yaml'],
'test: add bad source',
'KTX Test',
'system@ktx.local',
);
}
return { stopReason: 'natural' };
}) as never;
const runner = new IngestBundleRunner(deps);
await mockStageRawFiles(
runner,
runtime,
[
['good.raw', 'good-hash'],
['bad.raw', 'bad-hash'],
],
'fake',
);
const result = await runner.run({
jobId: 'job-failed-wu-isolated',
connectionId: 'warehouse',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload' },
});
expect(result.failedWorkUnits).toEqual(['wu-bad']);
await expect(readFile(join(runtime.configDir, 'semantic-layer/warehouse/good.yaml'), 'utf-8')).resolves.toContain(
'good',
);
await expect(readFile(join(runtime.configDir, 'semantic-layer/warehouse/bad.yaml'), 'utf-8')).rejects.toThrow();
const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0];
const reportBody = reportCreate?.body as { isolatedDiff?: { acceptedPatches?: number }; failedWorkUnits?: string[] };
expect(reportBody.failedWorkUnits).toEqual(['wu-bad']);
expect(reportBody.isolatedDiff).toMatchObject({ enabled: true, acceptedPatches: 1 });
const trace = await readFile(
join(runtime.configDir, '.ktx/ingest-traces/job-failed-wu-isolated/trace.jsonl'),
'utf-8',
);
expect(trace).toContain('work_unit_failed_before_patch');
expect(trace).toContain('patch_accepted');
expect(trace).not.toContain('shared_worktree_path_enabled');
} finally {
await rm(runtime.homeDir, { recursive: true, force: true });
}
});
```
- [ ] **Step 5: Run the updated focused runner tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
src/ingest/local-bundle-runtime.test.ts \
-t "does not support shared-worktree|does not integrate failed isolated|defaults local bundle ingest|unlisted direct-writing source"
```
Expected: PASS. The traces contain `isolated_diff_enabled`, child worktree
events, and no `shared_worktree_path_enabled`.
- [ ] **Step 6: Run the broad runner suite**
Run:
```bash
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.test.ts
```
Expected: PASS. Broad runner coverage no longer depends on
`sharedWorktreeSourceKeys`.
- [ ] **Step 7: Commit the runner removal**
Run:
```bash
git add \
packages/context/src/ingest/ports.ts \
packages/context/src/ingest/local-bundle-runtime.ts \
packages/context/src/ingest/local-bundle-runtime.test.ts \
packages/context/src/ingest/ingest-bundle.runner.ts \
packages/context/src/ingest/ingest-bundle.runner.test.ts \
packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
packages/context/src/ingest/isolated-diff/source-routing.ts \
packages/context/src/ingest/isolated-diff/source-routing.test.ts
git commit -m "refactor(ingest): remove shared worktree WorkUnit path"
```
Expected: commit succeeds. The deleted routing files are included as deletions.
---
### Task 5: Remove shared-branch agent instructions
**Files:**
- Modify: `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`
- Modify: `packages/context/skills/ingest_triage/SKILL.md`
- Test: `packages/context/src/ingest/ingest-prompts.test.ts`
- Test: `packages/context/src/ingest/ingest-runtime-assets.test.ts`
- [ ] **Step 1: Update the WorkUnit role text**
In `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`, replace
the `<role>` block with:
```md
<role>
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit
gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs,
Metabase card JSONs, Notion pages, or similar) and you must translate that
slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass.
You run in an isolated WorkUnit worktree. Deterministic projection output,
existing project memory, and listed dependency paths are visible; sibling
WorkUnit edits from this same job are not visible until the runner integrates
accepted patches.
</role>
```
- [ ] **Step 2: Update the WorkUnit workflow text**
In the same prompt, replace workflow steps 2 and 4 with:
```md
2. Load the per-source review skill first (for example `lookml_ingest`,
`metricflow_ingest`, or `dbt_ingest`), then `sl_capture` and
`wiki_capture`, and `ingest_triage` last. The triage skill tells you how to
react when existing project memory, deterministic projection output, or
prior provenance overlaps with what this WorkUnit is about to write.
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large
files) to load content. Before writing a new SL source or wiki page, call
`discover_data` for each candidate source, table, metric, or topic name to
find existing wiki pages, SL sources, deterministic projection output, prior
sync artifacts, and raw warehouse matches; apply `ingest_triage` when you hit
one, and apply any matching canonical pin before deciding whether to edit,
rename, or skip.
```
- [ ] **Step 3: Update the WorkUnit do-not rule**
In the same prompt, replace:
```md
- Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`.
```
with:
```md
- Do not silently accept a name collision with visible existing memory,
deterministic projection output, or prior provenance when the formula differs.
Trigger `ingest_triage`.
```
- [ ] **Step 4: Update ingest triage caller guidance**
In `packages/context/skills/ingest_triage/SKILL.md`, replace:
```md
This skill is loaded in two contexts:
- By a Stage 3 WorkUnit agent when `sl_discover` reveals that a prior WU (or a prior sync) already wrote something that overlaps with what the current WU is about to write.
- By the Stage 4 reconciliation agent for cross-WU sweeps and for eviction decisions.
```
with:
```md
This skill is loaded in two contexts:
- By a Stage 3 WorkUnit agent when `sl_discover`, deterministic projection
output, existing project memory, or prior provenance overlaps with what the
current WorkUnit is about to write.
- By the Stage 4 reconciliation agent for cross-WorkUnit sweeps, accepted patch
overlap, and eviction decisions.
```
- [ ] **Step 5: Update same-ingest wording in ingest triage**
In `packages/context/skills/ingest_triage/SKILL.md`, replace:
```md
4. **If there's no prior-sync row (both are from THIS job), check for same-ingest contradictions:**
```
with:
```md
4. **If reconciliation sees accepted patches from this same job with no
prior-sync row, check for same-ingest contradictions:**
```
- [ ] **Step 6: Search for stale shared-state prompt language**
Run:
```bash
rg -n "prior WU|prior-WU|Prior WorkUnits|same job may have already written|visible on the working branch|shared_worktree_path_enabled|shared-worktree path reachable" packages/context/prompts packages/context/skills packages/context/src/ingest
```
Expected: FAIL with no matches.
- [ ] **Step 7: Run prompt asset tests**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/ingest-prompts.test.ts \
src/ingest/ingest-runtime-assets.test.ts
```
Expected: PASS. Prompt assets still load from packaged KTX assets.
- [ ] **Step 8: Commit the prompt cleanup**
Run:
```bash
git add \
packages/context/prompts/memory_agent_bundle_ingest_work_unit.md \
packages/context/skills/ingest_triage/SKILL.md
git commit -m "docs(ingest): align WorkUnit prompts with isolated diffs"
```
Expected: commit succeeds.
---
### Task 6: Final verification
**Files:**
- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts`
- Verify: `packages/context/src/ingest/ports.ts`
- Verify: `packages/context/src/ingest/local-bundle-runtime.ts`
- Verify: `packages/context/src/ingest/ingest-bundle.runner.test.ts`
- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
- Verify: `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`
- Verify: `packages/context/skills/ingest_triage/SKILL.md`
- [ ] **Step 1: Run the isolated-diff focused suite**
Run:
```bash
pnpm --filter @ktx/context exec vitest run \
src/ingest/ingest-trace.test.ts \
src/ingest/wiki-body-refs.test.ts \
src/ingest/artifact-gates.test.ts \
src/ingest/semantic-layer-target-policy.test.ts \
src/ingest/isolated-diff/git-patch.test.ts \
src/ingest/isolated-diff/work-unit-executor.test.ts \
src/ingest/isolated-diff/patch-integrator.test.ts \
src/ingest/isolated-diff/textual-conflict-resolver.test.ts \
src/ingest/final-gate-repair.test.ts \
src/ingest/report-snapshot.test.ts \
src/ingest/ingest-bundle.runner.isolated-diff.test.ts
```
Expected: PASS. The output includes the isolated-diff runner tests and no
`source-routing.test.ts`.
- [ ] **Step 2: Run the full context test suite**
Run:
```bash
pnpm --filter @ktx/context run test
```
Expected: PASS.
- [ ] **Step 3: Run context type-check**
Run:
```bash
pnpm --filter @ktx/context run type-check
```
Expected: PASS. There are no `sharedWorktreeSourceKeys` type errors because the
setting no longer exists.
- [ ] **Step 4: Run dead-code checks**
Run:
```bash
pnpm run dead-code
```
Expected: PASS. Knip does not report deleted source-routing exports, and Biome
does not report stale imports.
- [ ] **Step 5: Search for removed legacy path names**
Run:
```bash
rg -n "sharedWorktreeSourceKeys|defaultSharedWorktreeSourceKeys|isSharedWorktreeFallbackSourceKey|shared_worktree_path_enabled|explicit_private_fallback|source-routing" packages docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-shared-worktree-removal.md
```
Expected: matches only in this plan file. There must be no matches under
`packages/`.
- [ ] **Step 6: Confirm docs-site does not need an update**
Run:
```bash
rg -n "sharedWorktree|isolatedDiffSourceKeys|sharedWorktreeSourceKeys|executionMode|planningStrategy|conflictPolicy" docs-site README.md packages/*/README.md
```
Expected: either no matches or matches unrelated to a public user-facing knob.
This change removes an internal runner fallback and does not add, remove, or
rename public CLI behavior, configuration, or docs-site content.
- [ ] **Step 7: Commit final verification notes if files changed**
Run:
```bash
git status --short
```
Expected: clean after the two implementation commits. If this command reports
new changes, stop and inspect them before finishing; final verification should
not create extra source changes.
## Self-review
Spec coverage:
- Rollout step 11 is covered by Tasks 1 through 4: the private fallback setting,
helper module, old runner branch, trace event, and fallback tests are deleted.
- The isolated-diff WorkUnit flow remains covered by existing real-git tests and
the new failed-WorkUnit regression in Task 4.
- Agent-facing instructions are aligned with the spec's worktree invariant in
Task 5: sibling WorkUnit edits are not visible inside a child worktree.
- Override ingestion remains outside the WorkUnit execution branch and still
uses prior report materialization plus serial reconciliation.
Placeholder scan:
- This plan contains exact file paths, test names, replacement snippets,
commands, and expected results.
- There are no deferred implementation markers or unspecified edge-case
instructions.
Type consistency:
- `IngestSettingsPort` no longer includes `sharedWorktreeSourceKeys`.
- `isolatedDiffEnabled` remains the runner's internal summary flag and is
equivalent to `!overrideReport`.
- The removed trace event is `shared_worktree_path_enabled`; retained isolated
events include `isolated_diff_enabled`, `work_unit_child_created`, and
`work_unit_patch_collected`.
Execution handoff:
Plan complete and saved to
`docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-shared-worktree-removal.md`.
Two execution options:
1. **Subagent-Driven (recommended)** - Dispatch a fresh subagent per task,
review between tasks, and keep iteration fast.
2. **Inline Execution** - Execute tasks in this session using
`superpowers:executing-plans`, with batch execution and checkpoints.

View file

@ -0,0 +1,612 @@
# Isolated-diff ingestion design
**Date:** 2026-05-17
**Author:** Andrey Avtomonov
**Status:** Design - pending implementation plan
## Background
KTX ingests third-party context sources into durable project memory: raw source
snapshots, wiki pages, semantic-layer sources, evidence documents, candidates,
and fallback records. The current bundle runner stages raw source data in one
ingestion session worktree, then runs work units against that same mutable
worktree.
A Metabase ingestion run exposed the failure mode this design addresses. One
work unit inferred and wrote the semantic-layer measure
`mart_account_segments.total_contract_arr_cents`, a later work unit overwrote
the same source with `total_contract_arr`, and the generated wiki page kept
referencing the stale non-existent measure. The local per-work-unit checks did
not catch the final cross-artifact inconsistency because durable writes were
accepted into shared state before final integration.
The fix is not a Metabase-only validation patch. The same class of risk exists
any time LLM-authored work units mutate durable wiki or semantic-layer files:
Metabase cards, Notion pages and clusters, dbt YAML, MetricFlow YAML, Looker
dashboards and explores, and LookML models and views can all produce overlapping
or contested memory artifacts. KTX needs one ingestion execution model that
isolates agent-authored changes, integrates them deliberately, and validates
the final project state globally.
## Goals
This design creates one opinionated ingestion algorithm for all context sources.
Connector-specific code stays responsible for source-shaped work: fetching raw
data, normalizing raw files, planning work units, and optionally projecting
deterministic facts. The shared runner owns execution correctness.
The design has these goals:
- Run all agent-authored durable writes in isolated per-work-unit worktrees.
- Treat each work unit's git diff as its proposal artifact.
- Integrate accepted diffs through a shared artifact-aware merge path.
- Resolve expected cross-work-unit overlap with bounded agent repair before
failing the run.
- Run final global semantic gates before any changes reach the main project
worktree.
- Keep connector variance minimal and source-shaped, not pipeline-shaped.
- Avoid proposal manifests, typed candidates, and extra reporting entities for
the first implementation.
- Preserve deterministic projections for source systems with authoritative
structured metadata.
## Non-goals
This design does not change the wiki frontmatter schema, wiki page file layout,
the semantic-layer YAML format, or the raw source snapshot layouts. It does add
a narrow author-facing inline-code grammar for explicit wiki body references to
semantic-layer entities and raw tables, because body text is part of the
stale-reference failure class. It also does not remove source adapters' current
fetch and chunk logic in one large rewrite.
This design does not introduce public connector knobs such as
`executionMode`, `planningStrategy`, or `conflictPolicy`. The core runner
becomes more opinionated instead.
This design does not require all connectors to stop using candidates. Candidate
storage remains valid for flows that intentionally defer wiki curation. The
isolation model applies when a work unit writes durable project files.
## Locked design direction
The ingestion runner uses one flow for every source that can produce durable
changes.
```text
fetch raw
-> optional deterministic project
-> adapter plans WorkUnit[]
-> isolated WU diffs
-> artifact-aware integration
-> global semantic gates
-> squash
```
The important invariant is that the core runner does not know why a work unit
exists. A dbt adapter may plan by model, Notion may plan by page or cluster,
MetricFlow may plan by graph component, and Looker may plan by dashboard or
explore. Those differences describe the source system. They are not ingestion
execution modes.
## Architecture
The design splits ingestion into two layers with explicit responsibility
boundaries.
### Source adapter layer
The adapter owns source semantics. It fetches raw evidence, normalizes that
evidence into staged files, and plans work units from the staged snapshot and
diff scope.
The adapter may also provide deterministic projectors. A projector is code that
converts authoritative source facts into KTX artifacts without an agent. Good
examples are live database schema introspection and straightforward MetricFlow
semantic-model import.
The isolation-relevant adapter surface remains small:
```ts
interface SourceAdapter {
source: string;
skillNames: string[];
fetch?(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void>;
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult>;
project?(ctx: DeterministicProjectionContext): Promise<ProjectionResult>;
resolveSlTargets?(ctx: SlTargetResolutionContext): Promise<string[]>;
}
```
This is the subset the isolated-diff runner needs to understand source-shaped
planning and deterministic projection. It is not a proposal to delete existing
`SourceAdapter` fields. Existing lifecycle and source-support fields such as
`detect`, `readFetchReport`, `listTargetConnectionIds`, `clusterWorkUnits`,
`describeScope`, `onPullSucceeded`, `evidenceIndexing`, `triageSupported`,
`getTriageSignals`, and `reconcileSkillNames` stay part of the adapter contract
until a separate cleanup intentionally removes them with migration impact
called out.
`chunk()` returns ordinary `WorkUnit[]`. The runner does not need a
`planningStrategy` enum because the source adapter can plan by any domain shape
that makes sense.
### Ingestion execution layer
The runner owns correctness, isolation, and integration. After `WorkUnit[]`
exists, all connectors follow the same execution path.
The runner is responsible for:
- creating the ingestion integration worktree from the project base commit;
- committing deterministic projection in the integration worktree before child
worktree creation;
- creating one child worktree per work unit from the post-projection ingestion
base commit;
- scoping tools to the work unit's raw files and allowed target connections;
- running the agent loop inside the work unit worktree;
- validating touched artifacts before accepting the work unit diff;
- collecting the work unit git diff;
- applying accepted diffs into the integration worktree;
- resolving textual and artifact-level conflicts;
- running final global gates; and
- squashing the integration worktree back to the project main worktree.
## Worktree model
The design uses three levels of git state.
```text
project main worktree
ingest integration worktree
per-work-unit worktree(s)
```
The project main worktree is the durable KTX project state. The ingestion
integration worktree stages raw snapshots, deterministic projections, accepted
work-unit diffs, reconciliation changes, and final gate repairs before one
squash merge back to main.
Deterministic projection runs first in the integration worktree, after the raw
snapshot is staged and before any per-work-unit worktree is created. The runner
commits those projector changes as a single projection commit. The integration
worktree's post-projection HEAD is the ingestion base commit referenced in this
design. If the adapter has no projector, the raw-snapshot commit is the
ingestion base commit.
Each per-work-unit worktree starts from the same ingestion base commit. A work
unit never observes another concurrent work unit's transient edits. This makes
the work unit diff a clean proposal against a stable base. Work units observe
deterministic projection outputs, including through `dependencyPaths` context,
and do not re-derive authoritative projected facts.
The integration worktree and each per-work-unit worktree must share one Git
object database, created through `git worktree add` from the same repository.
This is required so `git apply --3way` can resolve the base blobs recorded in
each work-unit patch during integration.
The runner creates and runs child worktrees under the existing
`workUnitMaxConcurrency` setting. A run may have many planned work units, but no
more than that bound may be active or left on disk at once. The default remains
serial execution. Child worktrees must be cleaned up after the diff, transcript,
and outcome metadata are persisted, including failure paths. Adapters with
large fan-out, such as Notion, may use `clusterWorkUnits` before execution to
keep work-unit count tractable, but clustering remains source-shaped planning
rather than a separate execution mode.
## Work-unit lifecycle
Each work unit follows a fixed lifecycle.
1. Create a child worktree at the ingestion base commit.
2. Build a scoped tool session for the child worktree.
3. Run the source skill and agent loop.
4. Run work-unit-local gates against touched artifacts.
5. If gates pass, record `git diff --binary` from base to child HEAD.
6. If gates fail, mark the work unit failed and discard the child worktree.
7. Clean up the child worktree after the diff and transcript are persisted.
The work unit outcome stores the existing operational metadata KTX already
records: unit key, status, actions, touched semantic-layer sources, failure
reason, raw files, and transcript path. It does not add a proposal manifest.
The diff is the proposal.
For `slDisallowed` work units, isolation is defense in depth. The scoped
work-unit tools must withhold semantic-layer write and edit tools, and the
integration layer must reject any otherwise accepted diff from that work unit
that touches `semantic-layer/**`. This catches buggy or bypassed tool behavior
before an invalid LookML connection-mismatch write can reach the integration
worktree.
### Diff proposal contract
The proposal artifact is a Git patch with binary-safe content, not the existing
hash-based raw-source `DiffSet`.
The first implementation must use one pinned patch contract:
- collect `git diff --binary --no-renames <base>..HEAD`;
- disable rename and copy detection so renames are represented as delete plus
create in version one;
- preserve mode changes from the patch metadata, but reject unexpected
executable-mode or binary changes under known text artifact roots such as
`wiki/**` and `semantic-layer/**`;
- apply each accepted patch to the integration worktree with
`git apply --3way --index`;
- do not use `git apply --reject`, because partial hunk application is not an
accepted integration state; and
- if patch application fails, leaves conflicts, or touches a path disallowed for
that work unit, roll back the integration worktree to its pre-apply HEAD and
classify the outcome as a textual conflict.
Delete-versus-edit, recreate-versus-edit, and delete-versus-create races are
therefore textual conflicts when Git cannot apply the patch cleanly. If Git
applies the patch but known artifact validators reject the resulting tree, the
outcome is a semantic conflict.
## Integration lifecycle
The integration worktree applies accepted work-unit diffs after local gates
pass. The runner applies diffs in a deterministic order, using the original
work-unit index unless a future implementation introduces explicit dependency
ordering.
Integration has three conflict classes:
- Clean patch application: the diff applies without conflict.
- Textual conflict: git cannot apply the patch cleanly.
- Semantic conflict: the patch applies textually but creates an invalid or
inconsistent artifact.
Textual conflicts are resolved before semantic gates run when a bounded
resolver agent can produce a valid result. Overlapping work-unit writes are
normal, especially for Metabase cards that target the same semantic-layer marts
from different collections. The runner must treat overlap as an integration
case, not as a reason to fail immediately.
Version one is agent-first. If `git apply --3way --index` leaves conflicts,
the runner starts a resolver agent in the integration worktree. The resolver
receives only the failed patch, already-applied patches, conflicted files,
relevant work-unit transcripts, raw evidence paths, and the final-gate rules.
The resolver must preserve all non-conflicting accepted content, resolve
duplicate or competing artifact entries from evidence, and edit only files
touched by the failed patch or already-applied overlapping patches.
The runner then reruns artifact gates for the changed files and continues with
the remaining patches if validation passes. Resolver attempts are capped to
avoid an unbounded repair loop. A run fails only after the bounded resolver
attempts cannot produce a valid integration tree.
Deterministic semantic merge is a later optimization, not a version-one
requirement. After measuring resolver latency, cost, and failure modes, KTX can
add merge helpers for common semantic-layer YAML cases, such as additive
`measures`, `segments`, `columns`, `joins`, and `descriptions` updates keyed by
their stable logical identifiers. Those helpers can replace agent calls for
mechanical merges once the measured v1 behavior justifies the added complexity.
The integration worktree is preserved on failure with conflict markers or
resolver edits, work-unit patches, transcripts, trace events, and the failure
report. The runner never squashes a failed or partially repaired integration
tree back to the project main worktree.
### Gate repair stage
The gate repair stage handles cases where patches apply cleanly but the
combined tree fails final semantic or wiki gates. This is distinct from textual
conflict resolution: the tree is textually valid, but the artifacts violate KTX
contracts.
After each patch integration and after reconciliation, the runner runs final
artifact gates for the affected scope. If gates fail, the runner classifies the
errors before deciding whether to repair or fail.
Repairable gate errors include:
- stale wiki body references to renamed semantic-layer entities;
- invalid `sl_refs` entries that point to entities instead of sources;
- inline prose that accidentally uses explicit SL reference syntax;
- duplicate measures, segments, or joins with equivalent definitions;
- missing or stale wiki references created by accepted patches; and
- join or source references that can be corrected from the composed manifest
and work-unit evidence.
High-risk gate errors fail without automatic repair unless a later
implementation adds a stronger evidence contract:
- two work units define the same measure with different business meaning;
- a required warehouse table or column does not exist;
- a SQL source fails execution and no obvious localized rewrite exists; or
- the repair would require choosing between conflicting facts without evidence.
For repairable errors, the runner starts a gate repair agent with the exact
gate errors, changed files, relevant work-unit transcripts, raw evidence paths,
and final-gate rules. The agent may edit only the files involved in the gate
failure. The runner reruns gates after each repair attempt and caps attempts to
one or two passes per integration stage. If the tree still fails, the run stops
with the final gate report and preserved integration worktree.
### Reconciliation in the new flow
Reconciliation remains a shared runner stage, but it runs as a serial
integration-stage pass instead of a parallel work unit.
The runner applies all accepted work-unit diffs to the integration worktree,
resolves textual conflicts that can be resolved, and then runs reconciliation in
that integration worktree before final global gates and before squash.
Reconciliation must see the integrated state because its job is to resolve
cross-work-unit duplicates, evictions, fallbacks, and source-specific
reconcile guidance.
Reconciliation runs exactly once per integration pass, serially against the
integration worktree, after all accepted work-unit diffs have been applied and
after textual conflicts are resolved. It never runs inside a child worktree and
never overlaps with work-unit execution. This is the safety carve-out from the
isolation goal: concurrent agent writes are the failure mode being avoided, and
reconciliation is non-concurrent by construction.
Reconciliation is not allowed to mutate project main directly. Its changes are
captured as a reconciliation diff against the pre-reconciliation integration
HEAD and recorded in the existing stage/report metadata. Reconciliation gates
validate the artifacts touched by the reconciliation diff plus any wiki page or
semantic-layer source referenced by changed frontmatter or body references,
using the same artifact-class validators as work-unit gates. Reconciliation may
write only to target connections authorized by the adapter for the ingest run,
but it is not subject to any single work unit's `slDisallowed` scope. The final
global gates validate the combined tree after reconciliation. If reconciliation
introduces an invalid wiki or semantic-layer reference, touches an unauthorized
target, or records an unresolvable artifact conflict, the runner sends
repairable failures through the gate repair stage and stops before squash only
when bounded repair cannot produce a valid tree.
## Artifact-aware integration
KTX durable artifacts are structured enough that git-only merge is not a strong
correctness boundary. Artifact-aware integration must parse and validate known
file classes after diffs are applied.
The first implementation must cover these worktree file classes:
- semantic-layer source YAML;
- wiki markdown frontmatter;
- wiki body references to semantic-layer sources, measures, dimensions, and raw
warehouse tables.
Unmapped fallback records are not worktree files in version one. They remain
typed stage-index and report records emitted by `emit_unmapped_fallback`; the
integration layer validates their raw paths and structured reason codes as
report metadata, not as mergeable artifacts.
Provenance also stays out of the worktree in version one. The source of truth is
the ingest provenance store and report body. Before inserting provenance rows,
the global gate derives the planned rows from accepted work-unit actions,
reconciliation actions, artifact-resolution records, and skipped raw files, then
checks those rows against the integrated worktree and staged raw hashes. Moving
provenance to on-disk files would be a separate schema migration, not part of
this design.
Artifact-resolution records are the existing merged or subsumed reconciliation
outputs emitted through `emit_artifact_resolution` as
`ArtifactResolutionRecord` stage-index records. They are in-memory stage
records, not worktree files, and they feed the provenance gate.
Artifact-aware integration starts with validation plus bounded agent repair.
It does not need semantic-layer YAML merge helpers in version one. If two diffs
contest the same source YAML or wiki page and bounded agent repair cannot prove
correctness, the runner must stop rather than silently accepting stale
references. Deterministic semantic merge helpers can be added after v1 metrics
show which conflicts are frequent, mechanical, and worth optimizing.
## Global semantic gates
Final gates run after every accepted diff, deterministic projection, and
reconciliation change has landed in the integration worktree. These gates are
global because the final failure can emerge only after independent valid diffs
combine.
The final gates must include:
- semantic-layer validation for touched and dependency sources;
- wiki `wiki_refs` validation;
- wiki frontmatter `sl_refs` validation, including source-level and
measure-level references;
- wiki body validation for explicit semantic-layer source, measure, dimension,
and table references; and
- provenance validation for raw paths referenced by new or changed artifacts
before those rows are inserted into SQLite.
For semantic-layer validation, touched sources are sources changed by accepted
work-unit diffs, deterministic projection, or reconciliation. Dependency sources
are their direct declared-join neighbors in the composed semantic-layer graph,
including sources they join to and sources that join to them. Version one runs
the existing whole-connection structural checks and source-scoped checks with
the touched-and-dependency source set; it does not expand dependency scope to a
transitive SQL-projection closure.
The wiki body gate needs a narrow grammar so ordinary prose does not become a
semantic-layer reference. In version one, an explicit body reference is one of
these Markdown forms outside fenced code blocks:
- an inline code token in the form `source.entity`, where both parts are plain
identifier tokens, `source` matches a visible semantic-layer source, and
`entity` must match one of that source's measures, dimensions, or segments;
- an inline code token in the form `connectionId/source.entity`, where
`source.entity` follows the same plain-identifier rule and validates against
that specific target connection;
- an inline code token in the form `source:source_name`, which validates a
source-level semantic-layer reference; or
- an inline code token in the form `table:qualified_table_name`, which validates
a raw warehouse table reference against the visible raw table/catalog sources.
The parser ignores unformatted prose, fenced SQL examples, wildcard patterns
such as `mart_nrr_quarterly.*_arr_cents`, inline SQL predicates such as
`users.is_internal = false`, and unprefixed single-token inline code. Two-part
inline code that does not name a visible semantic-layer source is not treated
as an SL entity reference; use the `table:` prefix for raw warehouse table
references.
The `total_contract_arr_cents` incident is the regression case for this gate:
the integrated tree must fail if a wiki page references
`mart_account_segments.total_contract_arr_cents` as an inline-code body token
while the final semantic-layer source defines only `total_contract_arr`.
## Deterministic projection
Some connectors have authoritative structured inputs that do not need an LLM to
write KTX artifacts. Those connectors can provide deterministic projectors that
run in the integration worktree.
Projection is different from work-unit execution:
- projectors are code, not agents;
- projectors run against the integration worktree;
- projectors produce ordinary durable file changes; and
- projector outputs still pass final global gates.
The runner infers hybrid behavior from the adapter. If an adapter has both
projectors and work units, it is hybrid. If it has only projectors, it is
deterministic. If it has only work units, it uses isolated diffs. No public
`executionMode` knob is needed.
## Connector migration notes
Each connector keeps its source-shaped planning logic. The migration changes
where durable writes happen and how they are integrated.
### Metabase
Metabase must move first because it produced the observed stale-measure wiki
reference. Collection and card chunking can remain adapter-specific, but direct
wiki and semantic-layer writes must happen in per-work-unit worktrees.
The regression test must reproduce two work units that touch
`mart_account_segments`: one writes a wiki reference to an inferred measure and
another leaves the final source with a different measure name. The final global
gate must reject the integrated tree.
### dbt
dbt uses source-shaped planning by model or schema file. Deterministic
projection is appropriate for straightforward model, source, column, and
description facts when dbt artifacts are authoritative. Agent work units remain
useful for business wiki synthesis, ambiguous relationship interpretation, and
enrichment that is not directly represented in dbt YAML.
### MetricFlow
MetricFlow uses source-shaped planning by graph component. Existing
deterministic semantic-model import code becomes a projector in the ingestion
flow. Agent work units handle unsupported constructs, cross-model explanations,
and wiki synthesis.
### Looker
Looker already defers some dashboard and look knowledge through candidates.
That can continue. Any direct semantic-layer writes from explores or query
translation must run through isolated work-unit diffs.
Looker-specific API and file-adapter collisions remain connector domain logic,
but final correctness still belongs to the shared integration gates.
### LookML
LookML already has useful source-shaped ownership rules: models, views, orphan
views, dashboards, and connection-mismatch guards. Those rules stay in the
adapter. Direct semantic-layer writes move into isolated work-unit diffs.
Connection-mismatch work units can keep their existing write restrictions. The
runner enforces those restrictions through scoped tools and target connection
resolution.
### Notion
Notion pages and clusters can create overlapping durable wiki knowledge and can
write semantic-layer overlays after warehouse verification. Notion therefore
uses the same isolated-diff execution model for direct durable writes.
Large Notion workspaces still need source-shaped clustering to control context
size and cost. Clustering remains adapter logic; correctness comes from isolated
diffs and final global gates.
## Minimal connector variance
New connectors must not choose from a menu of ingestion architectures. They
must provide the small amount of source-specific behavior the shared runner
needs.
Every connector answers these questions:
- How does KTX fetch or receive raw evidence?
- How does KTX normalize that evidence into staged files?
- How does KTX split the staged evidence into `WorkUnit[]`?
- Are any source facts authoritative enough for deterministic projection?
- Which target semantic-layer connections can the connector write to?
Everything else is shared runner behavior.
## Regression tests
The implementation plan must start with narrow tests that prove the new
execution model prevents the known failure class.
The first test creates a fake or Metabase-like adapter with two work units
starting from the same base:
1. Work unit A writes a wiki page that references
`mart_account_segments.total_contract_arr_cents` as an inline-code body
token.
2. Work unit B writes or overwrites the final semantic-layer source with only
`total_contract_arr`.
3. Both work units pass their local gates in isolation.
4. Integration applies both diffs.
5. The final global gate fails the run before squash.
Additional tests cover:
- two work units editing different wiki pages without conflict;
- two work units editing the same semantic-layer overlay with additive changes,
where the resolver agent preserves both changes and gates the repaired file;
- two work units editing the same semantic-layer overlay with incompatible
definitions, where the resolver agent receives the conflict context and the
run fails only after bounded repair attempts cannot prove a result;
- a textual conflict in a wiki page where the resolver agent preserves
non-conflicting accepted content and gates the repaired page before squash;
- a cleanly merged tree that fails final gates, where the gate repair agent
fixes a stale wiki reference and the run continues;
- an unrepairable final-gate failure, such as a missing warehouse column, where
the runner stops with a preserved integration worktree and report;
- a hybrid adapter case where deterministic projector outputs are visible in a
child worktree before work-unit wiki synthesis, and the final global gate
catches any stale reference to a non-existent projected semantic-layer entity;
- Notion-style direct wiki writes with invalid `sl_refs`; and
- LookML-style `slDisallowed` work units where write tools are unavailable and
integration rejects any diff that still touches `semantic-layer/**`.
## Rollout
The rollout must be incremental because the current runner is shared by all
adapters.
The rollout switch is runner-owned. During migration it may be a private
per-source allowlist, or an internal `IngestSettingsPort` map keyed by
`sourceKey`, but it must not become a `SourceAdapter` field or public connector
configuration knob.
1. Add the per-work-unit worktree executor behind that internal runner setting.
2. Add diff collection and deterministic integration in the existing runner.
3. Add bounded resolver-agent handling for textual conflicts.
4. Add final global wiki and semantic-layer reference gates, including the wiki
body reference parser defined above.
5. Add bounded gate-repair-agent handling for repairable final-gate failures.
6. Instrument resolver latency, attempts, repaired files, and failure classes.
7. Migrate Metabase to the new execution path first.
8. Migrate Notion, LookML, Looker, dbt, and MetricFlow.
9. Add deterministic semantic merge helpers only after v1 metrics show which
agent repairs are frequent and mechanical enough to justify optimization.
10. Promote the new path to the default after the Metabase regression test and
at least one non-Metabase connector pass.
11. Remove the old shared-worktree work-unit execution path.
The rollout is complete when every connector that permits agent-authored durable
writes uses isolated diffs and all integrations pass the same final global
gates.

View file

@ -0,0 +1,443 @@
# Adapter-owned ingest finalization design
**Date:** 2026-05-18
**Author:** Andrey Avtomonov
**Status:** Design - pending implementation plan
## Background
The isolated-diff ingestion migration made KTX's shared bundle runner
responsible for one durable execution model: stage raw source data, run
source-planned work units in isolated child worktrees, integrate their diffs,
reconcile, run final gates, and squash the accepted integration tree back into
the project worktree.
That direction is correct, but the current code still has a runner-level
post-processing extension point. `IngestBundleRunnerDeps.postProcessors` maps a
source key to an arbitrary `IngestBundlePostProcessorPort`, and local runtime
wires `historic-sql` to `HistoricSqlProjectionPostProcessor`. That path can
write durable semantic-layer and wiki artifacts after work-unit integration and
reconciliation, outside the source adapter contract.
Historic SQL exposed why the extra path exists. Its table and pattern work units
emit typed evidence, then a deterministic projection step merges the evidence
into `_schema` usage and historic-SQL wiki pages. Some of that work is local to
one work unit, but other behavior is whole-run maintenance: marking stale table
usage, reusing existing pattern pages, and archiving old pattern pages. Those
aggregate decisions do not fit cleanly inside independent per-work-unit writes.
The design goal is to preserve legitimate adapter-owned deterministic
maintenance without keeping a generic runner-level escape hatch.
## Goals
This design tightens the isolated-diff architecture around a stable boundary:
the generic runner owns execution mechanics, and adapters own source semantics.
The design has these goals:
- Remove runner-level `postProcessors` as an alternate durable-write pipeline.
- Add a first-class `SourceAdapter.finalize?()` hook for deterministic
post-work-unit source maintenance.
- Keep `finalize?()` constrained, observable, and subject to the same final
validation gates as work-unit and reconciliation changes.
- Preserve historic-SQL aggregate projection behavior without treating it as a
hidden fallback ingestion path.
- Keep public execution knobs out of the adapter API.
## Non-goals
This design does not rework source-specific chunking, fetch formats, wiki page
frontmatter, semantic-layer YAML, or raw source layouts. It does not replace
agent-authored work units with deterministic projectors. It also does not add a
public `executionMode`, `planningStrategy`, `conflictPolicy`, or source-key
allowlist.
Override ingest remains a special correction operation that reuses a prior raw
snapshot and forces reconciliation. It should be documented and tested as
override replay, not as a fallback pipeline. This design does not require
override ingest to run source work units.
## Locked design direction
The shared ingestion runner keeps one ordered pipeline for sources that can
write durable project artifacts.
```text
fetch raw
-> adapter plans WorkUnit[]
-> optional adapter project
-> isolated WU diffs
-> artifact-aware integration
-> reconciliation
-> optional adapter finalize
-> runner wiki-SL-ref repair
-> final target policy and artifact gates
-> squash
```
The exact implementation may continue to call `chunk()` before `project()` so a
projector can consume `parseArtifacts`. The architectural invariant is that
`project()` runs in the integration worktree before child worktrees start, while
`finalize()` runs in the integration worktree after accepted work-unit and
reconciliation changes are present.
Adapters decide what source-specific work belongs in `project()`, work units,
or `finalize()`. The runner decides when those phases run, captures their git
effects, enforces target scope, runs gates, writes traces and reports, and
squashes the final tree.
## Adapter API
The source adapter contract should make deterministic source phases explicit.
```ts
interface SourceAdapter {
readonly source: string;
readonly skillNames: string[];
readonly reconcileSkillNames?: string[];
readonly evidenceIndexing?: 'documents';
readonly triageSupported?: boolean;
getTriageSignals?(stagedDir: string, externalId: string): Promise<TriageSignals>;
detect(stagedDir: string): Promise<boolean>;
fetch?(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void>;
readFetchReport?(stagedDir: string): Promise<SourceFetchReport | null>;
listTargetConnectionIds?(stagedDir: string): Promise<string[]>;
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult>;
clusterWorkUnits?(ctx: ClusterWorkUnitsContext): Promise<WorkUnit[]>;
project?(ctx: DeterministicProjectionContext): Promise<ProjectionResult>;
finalize?(ctx: DeterministicFinalizationContext): Promise<FinalizationResult>;
describeScope?(stagedDir: string): Promise<ScopeDescriptor>;
onPullSucceeded?(ctx: PullSucceededContext): Promise<void>;
}
```
`finalize?()` is not a compatibility wrapper for old post-processors. It is a
source-adapter method with a fixed location in the runner lifecycle.
```ts
interface DeterministicFinalizationContext {
connectionId: string;
sourceKey: string;
syncId: string;
jobId: string;
runId: string;
stagedDir: string;
workdir: string;
parseArtifacts?: unknown;
stageIndex: StageIndex;
workUnitOutcomes: WorkUnitOutcome[];
reconciliationActions: MemoryAction[];
overrideReplay?: FinalizationOverrideReplay;
}
interface FinalizationResult {
warnings: string[];
errors: string[];
touchedSources: TouchedSlSource[];
changedWikiPageKeys: string[];
actions?: MemoryAction[];
result?: unknown;
}
interface FinalizationOverrideReplay {
priorJobId: string;
priorRunId: string;
priorSyncId: string;
evictionRawPaths: string[];
}
```
The implementation plan can adjust exact type names to match the existing
module layout, but the contract must preserve these semantics:
- `finalize?()` is deterministic TypeScript code, not an agent loop.
- It runs only in the ingestion integration worktree.
- It may write ordinary durable project files.
- It must report the semantic-layer sources and wiki page keys it believes it
touched so the runner can verify that declaration against the worktree diff.
- Outside override replay, `stageIndex` is the canonical runner index for
accepted work-unit actions, touched sources, evictions, reconciliation records,
and artifact resolutions visible to the current run.
- In override replay, `stageIndex` is a prior-run replay index for work-unit
facts. It may contain prior-run work-unit actions, touched sources, and
artifact records, and adapters must not treat those entries as current-run
evidence. The runner must not replay prior-report `evictionsApplied` as
current-run eviction evidence. If override reconciliation records eviction
decisions, those records are fresh current-run `stageIndex.evictionsApplied`
entries.
- `workUnitOutcomes` contains only work units executed in the current run. It
is empty when override replay skips source work units.
- `reconciliationActions` contains only accepted reconciliation writes emitted
through the reconciliation tool session in the current run. These actions have
already mutated the integration worktree.
- `overrideReplay` being present is the canonical signal that source work units
did not produce current-run evidence unless another context field explicitly
carries fresh current-run deterministic input.
- `overrideReplay.evictionRawPaths` contains the deleted raw paths loaded from
the prior report's `evictionInputs` for the reused raw snapshot. It is the
only override-replay raw-path allowlist for removed-from-snapshot provenance.
It is not, by itself, proof that a particular durable artifact is stale or was
observed by current-run work units.
- `actions` in `FinalizationResult` are descriptive records for finalization
writes that the adapter already performed. The runner must not re-apply them.
When finalization actions are intended to create provenance rows, they must
carry defensible `rawPaths`: current-snapshot paths from the current raw
snapshot, removed-from-snapshot paths from current-run
`stageIndex.evictionsApplied`, or removed-from-snapshot paths from
`overrideReplay.evictionRawPaths` when override replay is present.
Finalization actions without defensible raw-path attribution are still
reported, but the runner must exclude them from provenance and surface that
exclusion explicitly.
- It cannot mutate the main project worktree directly.
- The finalization context must not pass a root-scoped service that can bypass
the integration worktree. `workdir` is the durable write boundary. If a future
helper is added to the context, the contract must name it as worktree-scoped
and state whether it is read-only or allowed to write.
The existing adapter API fields unrelated to deterministic projection and
finalization remain part of the contract. Adding `finalize?()` must not remove
triage or evidence-indexing support.
## Override replay
Override ingest remains a replay of a prior raw snapshot with forced
reconciliation. It does not execute source work units or call `adapter.chunk()`
in this design, so finalization must not silently assume fresh work-unit
evidence exists.
The runner should still enter the finalization phase for adapters that
implement `finalize?()`, but it must pass explicit override metadata. In that
mode, `workUnitOutcomes` is empty, `parseArtifacts` is absent,
`overrideReplay.evictionRawPaths` is populated from the prior report's
`evictionInputs`, `stageIndex` comes from the prior report with prior
`evictionsApplied` excluded, and `reconciliationActions` contains only new
override reconciliation actions.
If a future implementation intentionally re-parses the materialized override
raw snapshot, it must expose that fact through an explicit override-safe context
field instead of relying on `parseArtifacts` alone. `parseArtifacts` by itself
is never current work-unit evidence in override replay and never authorizes
historic-SQL whole-run cleanup.
Adapters must treat missing current-run deterministic inputs as a no-op, not as
negative evidence. For historic SQL, override replay must not mark tables stale,
mark pattern pages stale, or archive pattern pages from an empty current-run
evidence directory. Whole-run cleanup can run only when `overrideReplay` is
absent and current-run work-unit evidence exists, or when a future explicit
override-safe context field names equivalent facts. Any override-safe
finalization must be derived from the materialized raw snapshot or explicit
prior-report data. In particular, prior-run
`stageIndex.workUnits[*].actions`, prior-run touched sources, and prior-run
artifact records are not proof that the current override run observed or failed
to observe those artifacts.
## Runner responsibilities
The runner owns all reusable mechanics around `finalize?()`.
After reconciliation completes, the runner calls `adapter.finalize?()` if it
exists. The runner captures the pre-finalization commit, derives the
finalization changed paths from the integration-worktree git diff, commits those
changes, records the commit SHA and touched paths in the run trace/report,
includes finalization actions in saved-memory counts, and runs wiki-SL-ref
repair before final target-policy and artifact gates.
The integration-worktree diff is the source of truth for finalization touched
paths, changed wiki page keys, and semantic-layer paths. The adapter's
`touchedSources` and `changedWikiPageKeys` declaration is a verification input,
not the downstream authority. The runner must derive the final repair and gate
scope from the diff, cross-check the adapter declaration against that diff, and
fail the run on under-reporting or over-reporting that would make wiki-SL-ref
repair, target-policy checks, final gates, reports, traces, or provenance use a
different artifact set from the actual finalization commit.
The runner-derived semantic-layer scope must include logical
`TouchedSlSource` tuples, not only file paths. Standalone semantic-layer files
under `semantic-layer/<connectionId>/<sourceName>.yaml` can map structurally to
`{ connectionId, sourceName }`. Aggregate semantic-layer files, including
`semantic-layer/<connectionId>/_schema/*.yaml`, must be resolved by comparing
the pre-finalization and post-finalization materialized semantic-layer sources
with the worktree-scoped semantic-layer parser/loader. Wiki page keys continue
to map structurally from `wiki/global/<pageKey>.md`. If the runner cannot
resolve a changed semantic-layer path to logical touched sources with its own
resolver, the run must fail; it must not fall back to the adapter declaration as
the downstream scope.
`wiki_sl_ref_repair` remains a runner mechanic, not an adapter method. It runs
after finalization and before final gates, and it uses the normal target
connection set plus the runner-derived finalization touched sources to decide
which semantic-layer references are visible. Its writes are part of the same
integration worktree diff as finalization/reconciliation, so target-policy
checks, final artifact gates, reports, traces, and squash behavior cover those
writes before changes reach the main project worktree.
The runner must treat finalization like deterministic projection and
reconciliation, not like a free-form source-key plug-in. It must enforce the
same target-connection policy used for work-unit and reconciliation changes.
If finalization writes an unauthorized semantic-layer target, modifies artifacts
outside the authorized target set, references a missing semantic-layer entity, or
returns errors, the run fails before changes reach the main project worktree.
The runner should expose one trace phase named `finalization`. It should not
keep a `post_processor` stage, `IngestBundlePostProcessorPort`,
`deps.postProcessors`, or report fields that imply a parallel post-processor
pipeline.
## Adapter application
Each adapter continues to use the same generic runner mechanics, while keeping
source-specific choices inside the adapter.
- `metabase` fetches cards and dashboards, computes scope, plans
card/dashboard work units, and usually does not need `project()` or
`finalize()`.
- `notion` fetches pages, extracts triage signals, clusters page work units,
and usually does not need deterministic finalization.
- `dbt` fetches the repository, parses dbt project metadata, plans model work
units, and may later add `project()` if dbt YAML import becomes deterministic.
- `lookml` fetches LookML, produces validation artifacts, plans model and
explore work units, and may later add `project()` for deterministic LookML to
semantic-layer import.
- `looker` fetches runtime bundles, fetch reports, target connections, and
triage signals. It continues to rely on work-unit diffs and shared gates.
- `metricflow` is the current strong `project()` example. It imports
authoritative semantic models before child worktrees start, then lets any
work units observe those projected files.
- `live-database` can remain work-unit based, but database schema introspection
is a good future `project()` candidate because the schema is authoritative
structured metadata.
- `historic-sql` should move current post-processor behavior into the adapter.
Local table-usage and pattern-page writes may move into work-unit tools where
they are genuinely per-unit. Whole-run maintenance such as stale table usage,
pattern-page reuse, and stale/archive page decisions belongs in
`HistoricSqlSourceAdapter.finalize()`.
- `fake` remains a test adapter and does not need deterministic phases.
## Historic-SQL migration
Historic SQL should stop using evidence-only tool output plus runner-level
post-processing as its durable projection path.
The preferred migration is:
1. Keep historic-SQL work units responsible for source-shaped analysis.
2. Use source-specific tools for per-unit durable writes when the output is
local to that unit, such as a table's usage metadata or one pattern page.
3. Move whole-run deterministic cleanup into
`HistoricSqlSourceAdapter.finalize()`.
4. Delete `HistoricSqlProjectionPostProcessor`, `IngestBundlePostProcessorPort`,
`deps.postProcessors`, and `post_processor` memory-flow/report stages.
If the implementation keeps typed evidence as an internal handoff between
historic-SQL work units and `finalize()`, that evidence must be framed as
source-specific input to the adapter's deterministic finalization, not as a
generic runner post-processing mechanism. The evidence files must not become a
public compatibility surface.
Historic-SQL finalization must distinguish "no current-run evidence exists"
from "the current snapshot proves this artifact is stale." Whole-run cleanup
such as stale table usage, pattern-page staleness, and archive decisions can
run only when finalization has current-run historic-SQL evidence or an explicit
override-safe source of equivalent facts.
## Reports and observability
Reports should describe first-class pipeline phases, not historical extension
points. The isolated-diff summary should include finalization metadata when the
adapter implements `finalize?()`: whether it ran, finalization commit SHA,
touched paths, touched semantic-layer sources, changed wiki page keys,
warnings, descriptive finalization actions, and source-specific result payload.
Saved-memory counts should come from work-unit, reconciliation, and
finalization memory actions plus touched artifact reporting. Finalization
actions are reporting/provenance records for writes that already happened in
the integration worktree; they are not a second write channel. There should be
no special `postProcessorSavedMemoryCounts` or `postProcessor` report body.
Memory-flow phases should use `finalization` instead of `post_processor`.
The runner owns provenance for finalization. Adapters return touched artifacts
and optional descriptive actions, but they do not call the provenance port.
When finalization actions include valid `rawPaths`, the runner folds them into
the normal provenance plan using the current `sourceKey`, `syncId`, raw content
hashes, artifact kind, artifact key, target connection, and action type. The
finalization phase and commit SHA belong in trace/report metadata; they should
not be fabricated inside adapter-written files.
Finalization reports must show both the adapter-declared touched artifacts and
the runner-derived touched artifacts from the finalization git diff. When those
sets differ, the report and trace must include the mismatch and the run must
fail before wiki-SL-ref repair or final gates rely on the wrong scope. When a
finalization action is excluded from provenance because no defensible raw path
exists, the report must name the action and reason instead of silently dropping
it.
Traces must make finalization useful for postmortems. At minimum, record
`finalization_started`, `finalization_committed`, `finalization_skipped`, and
`finalization_failed` events with source key, touched paths, warnings, and
error summaries.
## Failure handling
Finalization failures are ingestion failures. If `finalize?()` returns errors,
throws, writes unauthorized targets, or causes final gates to fail, the runner
marks the run failed and leaves the main project worktree unchanged.
Finalization should run after reconciliation because it may need to inspect the
accepted work-unit and reconciliation result. Final gates should run after
finalization because finalization writes durable project artifacts.
Finalization must not be used to repair arbitrary integration conflicts or
rerun agent work. Conflict repair remains part of artifact-aware integration and
reconciliation.
Finalization must also preserve reconciliation and accepted work-unit writes
from the same run. The runner must remember the paths changed before
finalization and fail if `finalize?()` modifies the same path after
reconciliation. If a source needs deterministic maintenance for an artifact
created or edited by a work unit in the same run, that behavior belongs in the
source-specific work-unit tool or in a later run, not in post-reconciliation
finalization.
## Acceptance criteria
The implementation is complete when these conditions are true:
- No production runtime wiring references `deps.postProcessors`.
- `IngestBundlePostProcessorPort` and `HistoricSqlProjectionPostProcessor` are
removed from source exports and package export tests.
- `SourceAdapter.finalize?()` exists with typed context and result objects.
- The runner invokes `finalize?()` after reconciliation and before final gates.
- Finalization changes are committed in the integration worktree and included
in target-policy checks, final gates, reports, traces, and provenance inputs.
- Override replay passes explicit override metadata to finalization, including
`overrideReplay.evictionRawPaths`; leaves `workUnitOutcomes` empty when work
units are skipped; omits `parseArtifacts` unless a future explicit
override-safe input is added; and proves historic-SQL finalization does not
use prior-run `stageIndex` records as current-run evidence or stale/archive
artifacts from missing current-run evidence.
- Finalization provenance uses current raw paths, current-run
`stageIndex.evictionsApplied`, or `overrideReplay.evictionRawPaths`, and
actions without defensible raw-path attribution are reported as excluded from
provenance.
- The runner derives finalization touched paths, wiki page keys, and
semantic-layer scope from the integration-worktree git diff, resolves
aggregate semantic-layer files such as `_schema/*.yaml` to logical touched
sources with the runner's own semantic-layer parser/loader, cross-checks the
adapter's touched-artifact declaration, and fails on mismatches or
unresolvable changed semantic-layer paths.
- The runner fails when finalization modifies a path already changed by accepted
work-unit or reconciliation writes in the same run.
- `wiki_sl_ref_repair` remains a runner-owned step after finalization and
before final gates, consumes runner-derived finalization touched sources, and
has its writes covered by target-policy checks and final gates.
- Finalization `actions` are not re-applied by the runner; they are included
only in reporting, saved-memory counts, and provenance planning when their
raw-path attribution is valid.
- Historic SQL uses adapter-owned finalization for whole-run projection
maintenance.
- Tests cover a successful finalization, a finalization failure, unauthorized
finalization target rejection, override replay finalization behavior,
wiki-SL-ref repair placement, and historic-SQL projection behavior without
runner-level post-processors.

View file

@ -11,6 +11,7 @@
},
"scripts": {
"artifacts:build": "node scripts/package-artifacts.mjs build",
"artifacts:build-runtime": "node scripts/package-artifacts.mjs build-runtime",
"artifacts:check": "node scripts/package-artifacts.mjs check",
"artifacts:live-db-smoke": "node scripts/installed-live-database-smoke.mjs",
"artifacts:verify": "node scripts/package-artifacts.mjs verify",

View file

@ -1,5 +1,7 @@
import { cancel, confirm, isCancel, log, spinner } from '@clack/prompts';
const ESC = String.fromCharCode(0x1b);
export interface KtxCliSpinner {
start(message: string): void;
message(message: string): void;
@ -7,6 +9,10 @@ export interface KtxCliSpinner {
error(message: string): void;
}
export interface KtxCliSpinnerIo {
stderr: { write(chunk: string): void };
}
export interface KtxCliPromptAdapter {
confirm(options: { message: string; initialValue?: boolean }): Promise<boolean>;
cancel(message: string): void;
@ -31,6 +37,31 @@ export function createClackSpinner(): KtxCliSpinner {
return spinner();
}
function magenta(text: string): string {
return `${ESC}[35m${text}${ESC}[39m`;
}
function red(text: string): string {
return `${ESC}[31m${text}${ESC}[39m`;
}
export function createStaticCliSpinner(io: KtxCliSpinnerIo): KtxCliSpinner {
return {
start(message) {
io.stderr.write(`${magenta('◐')} ${message}\n`);
},
message(message) {
io.stderr.write(`${magenta('│')} ${message}\n`);
},
stop(message) {
io.stderr.write(`${magenta('◇')} ${message}\n`);
},
error(message) {
io.stderr.write(`${red('■')} ${message}\n`);
},
};
}
export function createClackPromptAdapter(): KtxCliPromptAdapter {
return {
async confirm(options) {

View file

@ -11,7 +11,13 @@ function stubIo(): KtxCliIo {
}
function stubPackageInfo(): KtxCliPackageInfo {
return { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' };
return {
name: '@ktx/cli',
version: '0.0.0-test',
packageVersion: '0.0.0-private',
runtimeVersion: '0.0.0-test',
contextPackageName: '@ktx/context',
};
}
describe('buildKtxProgram', () => {

View file

@ -8,6 +8,7 @@ import { registerWikiCommands } from './commands/knowledge-commands.js';
import { registerMcpCommands } from './commands/mcp-commands.js';
import { registerSetupCommands } from './commands/setup-commands.js';
import { registerSlCommands } from './commands/sl-commands.js';
import { registerSqlCommands } from './commands/sql-commands.js';
import { registerStatusCommands } from './commands/status-commands.js';
import { registerDevCommands } from './dev.js';
import { renderMissingProjectMessage } from './doctor.js';
@ -56,7 +57,8 @@ type CommandPathNode = CommandWithGlobalOptions & {
parent?: CommandPathNode | null;
};
const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'mcp']);
const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'sql', 'status', 'mcp']);
const PROJECT_INDEPENDENT_DEV_COMMANDS = new Set(['runtime', 'schema']);
const COMMANDS_THAT_CREATE_PROJECT = new Set(['setup', 'ktx dev init']);
const COMMANDS_WITH_OWN_MISSING_PROJECT_HANDLING = new Set(['status']);
const GLOBAL_OPTIONS_WITH_VALUE = new Set(['--project-dir']);
@ -171,7 +173,7 @@ function isProjectAwareCommand(path: string[]): boolean {
const rootCommand = path[1];
if (rootCommand === 'dev') {
return path[2] !== undefined && path[2] !== 'runtime';
return path[2] !== undefined && !PROJECT_INDEPENDENT_DEV_COMMANDS.has(path[2]);
}
return rootCommand !== undefined && PROJECT_AWARE_ROOT_COMMANDS.has(rootCommand);
}
@ -416,6 +418,7 @@ export function buildKtxProgram(options: BuildKtxProgramOptions): Command {
});
registerWikiCommands(program, context);
registerSlCommands(program, context);
registerSqlCommands(program, context);
registerStatusCommands(program, context);
registerMcpCommands(program, context);
registerDevCommands(program, context);

View file

@ -7,8 +7,10 @@ import type { KtxPublicIngestArgs } from './public-ingest.js';
import type { KtxRuntimeArgs } from './runtime.js';
import type { KtxSetupArgs } from './setup.js';
import type { KtxSlArgs } from './sl.js';
import type { KtxSqlArgs } from './sql.js';
import { profileMark, profileSpan } from './startup-profile.js';
import type { KtxTextIngestArgs } from './text-ingest.js';
import { resolveKtxRuntimeVersion } from './release-version.js';
profileMark('module:cli-runtime');
@ -17,6 +19,8 @@ const requirePackageJson = createRequire(import.meta.url);
export interface KtxCliPackageInfo {
name: string;
version: string;
packageVersion: string;
runtimeVersion: string;
contextPackageName: '@ktx/context';
}
@ -34,6 +38,7 @@ export interface KtxCliDeps {
runtime?: (args: KtxRuntimeArgs, io: KtxCliIo) => Promise<number>;
knowledge?: (args: KtxKnowledgeArgs, io: KtxCliIo) => Promise<number>;
sl?: (args: KtxSlArgs, io: KtxCliIo) => Promise<number>;
sql?: (args: KtxSqlArgs, io: KtxCliIo) => Promise<number>;
mcp?: {
startDaemon?: typeof import('./managed-mcp-daemon.js').startKtxMcpDaemon;
stopDaemon?: typeof import('./managed-mcp-daemon.js').stopKtxMcpDaemon;
@ -59,9 +64,16 @@ export function packageInfoFromJson(packageJson: unknown): KtxCliPackageInfo {
throw new Error('Invalid KTX CLI package metadata');
}
const runtimeVersion = resolveKtxRuntimeVersion({
packageName: packageJson.name,
packageVersion: packageJson.version,
});
return {
name: packageJson.name,
version: packageJson.version,
version: runtimeVersion,
packageVersion: packageJson.version,
runtimeVersion,
contextPackageName: '@ktx/context',
};
}

View file

@ -35,6 +35,7 @@ export function registerIngestCommands(
.option('--query-history-window-days <days>', 'Query-history lookback window for this run', parsePositiveIntegerOption)
.addOption(new Option('--plain', 'Print plain text output').conflicts(['json']))
.addOption(new Option('--json', 'Print JSON output').conflicts(['plain']))
.option('--yes', 'Install required managed runtime features without prompting')
.option('--no-input', 'Disable interactive terminal input')
.showHelpAfterError();

View file

@ -1,5 +1,6 @@
import { type Command, Option } from '@commander-js/extra-typings';
import {
type CommandWithGlobalOptions,
type KtxCliCommandContext,
parsePositiveIntegerOption,
resolveCommandProjectDir,
@ -14,6 +15,11 @@ async function runKnowledgeArgs(context: KtxCliCommandContext, args: KtxKnowledg
context.setExitCode(await runner(args, context.io));
}
function isDebugEnabled(command: CommandWithGlobalOptions): boolean {
const options = (command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as { debug?: unknown };
return options.debug === true;
}
export function registerWikiCommands(program: Command, context: KtxCliCommandContext): void {
const wiki = program
.command('wiki')
@ -83,6 +89,7 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon
userId: options.userId,
output: options.output,
json: options.json,
...(isDebugEnabled(command) ? { debug: true } : {}),
...(options.limit !== undefined ? { limit: options.limit } : {}),
});
},

View file

@ -0,0 +1,99 @@
import { Command } from '@commander-js/extra-typings';
import { describe, expect, it, vi } from 'vitest';
import type { KtxCliCommandContext } from '../cli-program.js';
import { registerSqlCommands } from './sql-commands.js';
function makeContext(overrides: Partial<KtxCliCommandContext> = {}): KtxCliCommandContext {
let exitCode = 0;
return {
io: {
stdout: { write: vi.fn() },
stderr: { write: vi.fn() },
},
deps: {},
packageInfo: { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' },
setExitCode: (code) => {
exitCode = code;
},
runInit: vi.fn(),
writeDebug: vi.fn(),
...overrides,
get exitCode() {
return exitCode;
},
} as KtxCliCommandContext;
}
describe('registerSqlCommands', () => {
it('routes positional SQL through the sql runner', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const sql = vi.fn(async () => 0);
const context = makeContext({ deps: { sql } });
registerSqlCommands(program, context);
await expect(
program.parseAsync(
['--project-dir', '/tmp/ktx-sql', 'sql', '--connection', 'warehouse', 'select', '1'],
{ from: 'user' },
),
).resolves.toBe(program);
expect(sql).toHaveBeenCalledWith(
{
command: 'execute',
projectDir: '/tmp/ktx-sql',
connectionId: 'warehouse',
sql: 'select 1',
maxRows: 1000,
output: undefined,
json: false,
cliVersion: '0.0.0-test',
},
context.io,
);
});
it('supports the short connection flag', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const sql = vi.fn(async () => 0);
const context = makeContext({ deps: { sql } });
registerSqlCommands(program, context);
await expect(
program.parseAsync(['--project-dir', '/tmp/ktx-sql', 'sql', '-c', 'warehouse', 'select 1'], {
from: 'user',
}),
).resolves.toBe(program);
expect(sql).toHaveBeenCalledWith(expect.objectContaining({ connectionId: 'warehouse', sql: 'select 1' }), context.io);
});
it('rejects missing SQL before invoking the runner', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const sql = vi.fn(async () => 0);
registerSqlCommands(program, makeContext({ deps: { sql } }));
await expect(
program.parseAsync(['--project-dir', '/tmp/ktx-sql', 'sql', '--connection', 'warehouse'], {
from: 'user',
}),
).rejects.toThrow('missing required argument');
expect(sql).not.toHaveBeenCalled();
});
it('rejects maxRows above the CLI cap', async () => {
const program = new Command().exitOverride().option('--project-dir <path>');
const sql = vi.fn(async () => 0);
registerSqlCommands(program, makeContext({ deps: { sql } }));
await expect(
program.parseAsync(
['--project-dir', '/tmp/ktx-sql', 'sql', '--connection', 'warehouse', '--max-rows', '10001', 'select 1'],
{ from: 'user' },
),
).rejects.toThrow('must be an integer between 1 and 10000');
expect(sql).not.toHaveBeenCalled();
});
});

View file

@ -0,0 +1,62 @@
import { type Command, InvalidArgumentError, Option } from '@commander-js/extra-typings';
import { type KtxCliCommandContext, resolveCommandProjectDir } from '../cli-program.js';
import type { KtxSqlArgs } from '../sql.js';
import { profileMark } from '../startup-profile.js';
profileMark('module:commands/sql-commands');
const DEFAULT_MAX_ROWS = 1000;
const MAX_ROWS_CAP = 10_000;
function parseSqlMaxRowsOption(value: string): number {
const parsed = Number(value);
if (!Number.isInteger(parsed) || parsed < 1 || parsed > MAX_ROWS_CAP) {
throw new InvalidArgumentError(`must be an integer between 1 and ${MAX_ROWS_CAP}`);
}
return parsed;
}
async function runSqlArgs(context: KtxCliCommandContext, args: KtxSqlArgs): Promise<void> {
const runner = context.deps.sql ?? (await import('../sql.js')).runKtxSql;
context.setExitCode(await runner(args, context.io));
}
export function registerSqlCommands(program: Command, context: KtxCliCommandContext): void {
program
.command('sql')
.description('Execute parser-validated read-only SQL against a configured connection')
.argument('<sql...>', 'SQL query to execute')
.requiredOption('-c, --connection <id>', 'KTX connection id')
.option('--max-rows <n>', 'Maximum rows to return', parseSqlMaxRowsOption, DEFAULT_MAX_ROWS)
.addOption(
new Option('--output <mode>', 'Output mode: pretty (default), plain (TSV), or json').choices([
'pretty',
'plain',
'json',
]),
)
.option('--json', 'Shortcut for --output=json (overrides --output)', false)
.action(
async (
sqlParts: string[],
options: {
connection: string;
maxRows: number;
output?: 'pretty' | 'plain' | 'json';
json?: boolean;
},
command,
) => {
await runSqlArgs(context, {
command: 'execute',
projectDir: resolveCommandProjectDir(command),
connectionId: options.connection,
sql: sqlParts.join(' '),
maxRows: options.maxRows,
output: options.output,
json: options.json === true,
cliVersion: context.packageInfo.version,
});
},
);
}

View file

@ -708,6 +708,10 @@ const INTERNAL_FAILURE_LINE_RE =
const ACTIONABLE_FAILURE_LINE_RE =
/^(Missing bundled Python runtime manifest|KTX Python runtime is required|KTX managed daemon|Error:|Failed\b|Could not\b|Cannot\b)/;
function trimErrorPrefix(line: string): string {
return line.replace(/^Error:\s*/, '');
}
function firstCapturedFailureLine(output: string | undefined): string | null {
const lines = (output ?? '')
.split(/\r?\n/)
@ -715,7 +719,8 @@ function firstCapturedFailureLine(output: string | undefined): string | null {
.filter((candidate) => candidate.length > 0)
.filter((candidate) => !candidate.startsWith('KTX scan completed'))
.filter((candidate) => !INTERNAL_FAILURE_LINE_RE.test(candidate));
return lines.find((candidate) => ACTIONABLE_FAILURE_LINE_RE.test(candidate)) ?? lines.at(-1) ?? null;
const line = lines.find((candidate) => ACTIONABLE_FAILURE_LINE_RE.test(candidate)) ?? lines.at(-1) ?? null;
return line ? trimErrorPrefix(line) : null;
}
function isGenericFailedAtDetail(target: KtxPublicIngestPlanTarget, detail: string | null | undefined): boolean {

View file

@ -102,6 +102,35 @@ describe('dev Commander tree', () => {
}
});
it('prints config schema without requiring a KTX project directory', async () => {
const { mkdtemp, rm } = await import('node:fs/promises');
const { tmpdir } = await import('node:os');
const { join } = await import('node:path');
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-dev-schema-'));
const missingProjectDir = join(tempDir, 'missing-project');
const originalProjectDir = process.env.KTX_PROJECT_DIR;
const testIo = makeIo();
try {
process.env.KTX_PROJECT_DIR = missingProjectDir;
await expect(runKtxCli(['dev', 'schema'], testIo.io)).resolves.toBe(0);
expect(JSON.parse(testIo.stdout())).toMatchObject({
title: 'ktx.yaml',
type: 'object',
});
expect(testIo.stderr()).toBe('');
} finally {
if (originalProjectDir === undefined) {
delete process.env.KTX_PROJECT_DIR;
} else {
process.env.KTX_PROJECT_DIR = originalProjectDir;
}
await rm(tempDir, { recursive: true, force: true });
}
});
it('rejects removed dev command groups', async () => {
for (const argv of [
['dev', 'doctor', 'setup'],

View file

@ -45,7 +45,9 @@ describe('getKtxCliPackageInfo', () => {
it('identifies the CLI package and its context dependency', () => {
expect(getKtxCliPackageInfo()).toEqual({
name: '@ktx/cli',
version: '0.0.0-private',
version: '0.1.0-rc.1',
packageVersion: '0.0.0-private',
runtimeVersion: '0.1.0-rc.1',
contextPackageName: '@ktx/context',
});
});
@ -68,6 +70,8 @@ describe('getKtxCliPackageInfo', () => {
).toEqual({
name: '@kaelio/ktx',
version: '0.1.0',
packageVersion: '0.1.0',
runtimeVersion: '0.1.0',
contextPackageName: '@ktx/context',
});
});
@ -114,7 +118,7 @@ describe('runKtxCli', () => {
await expect(runKtxCli(['--version'], testIo.io)).resolves.toBe(0);
expect(testIo.stdout()).toBe('@ktx/cli 0.0.0-private\n');
expect(testIo.stdout()).toBe('@ktx/cli 0.1.0-rc.1\n');
expect(testIo.stderr()).toBe('');
});
@ -171,6 +175,22 @@ describe('runKtxCli', () => {
},
searchIo.io,
);
const debugSearchIo = makeIo();
await expect(
runKtxCli(['--project-dir', tempDir, '--debug', 'wiki', 'search', 'revenue'], debugSearchIo.io, { knowledge }),
).resolves.toBe(0);
expect(knowledge).toHaveBeenLastCalledWith(
{
command: 'search',
projectDir: tempDir,
query: 'revenue',
userId: 'local',
json: false,
debug: true,
},
debugSearchIo.io,
);
});
it('rejects removed public wiki read and write commands', async () => {
@ -236,7 +256,7 @@ describe('runKtxCli', () => {
expect(listIo.stderr()).toContain("unknown option '--query'");
});
it('routes runtime management commands with the CLI package version', async () => {
it('routes runtime management commands with the release runtime version', async () => {
const runtime = vi.fn(async () => 0);
const installIo = makeIo();
const startIo = makeIo();
@ -262,7 +282,7 @@ describe('runKtxCli', () => {
1,
{
command: 'install',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
feature: 'local-embeddings',
force: true,
},
@ -272,7 +292,7 @@ describe('runKtxCli', () => {
2,
{
command: 'start',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
projectDir: expect.any(String),
feature: 'local-embeddings',
force: true,
@ -283,7 +303,7 @@ describe('runKtxCli', () => {
3,
{
command: 'stop',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
projectDir: expect.any(String),
all: false,
},
@ -293,7 +313,7 @@ describe('runKtxCli', () => {
4,
{
command: 'stop',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
projectDir: expect.any(String),
all: true,
},
@ -303,7 +323,7 @@ describe('runKtxCli', () => {
5,
{
command: 'status',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
json: true,
},
statusIo.io,
@ -376,7 +396,7 @@ describe('runKtxCli', () => {
expect.objectContaining({
command: 'query',
projectDir: tempDir,
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
runtimeInstallPolicy: 'prompt',
query: expect.objectContaining({ measures: ['orders.order_count'], dimensions: [] }),
}),
@ -391,7 +411,7 @@ describe('runKtxCli', () => {
).resolves.toBe(0);
expect(sl).toHaveBeenLastCalledWith(
expect.objectContaining({
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
runtimeInstallPolicy: 'auto',
}),
autoIo.io,
@ -407,7 +427,7 @@ describe('runKtxCli', () => {
).resolves.toBe(0);
expect(sl).toHaveBeenLastCalledWith(
expect.objectContaining({
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
runtimeInstallPolicy: 'never',
}),
noInputIo.io,
@ -546,7 +566,7 @@ describe('runKtxCli', () => {
skipAgents: false,
inputMode: 'auto',
yes: false,
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
skipLlm: false,
skipEmbeddings: false,
databaseSchemas: [],
@ -676,7 +696,7 @@ describe('runKtxCli', () => {
inputMode: 'disabled',
depth: 'fast',
queryHistory: 'default',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
runtimeInstallPolicy: 'never',
},
testIo.io,
@ -703,7 +723,7 @@ describe('runKtxCli', () => {
inputMode: 'auto',
depth: 'deep',
queryHistory: 'default',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
runtimeInstallPolicy: 'prompt',
},
testIo.io,
@ -711,6 +731,40 @@ describe('runKtxCli', () => {
expect(testIo.stderr()).toBe('');
});
it('routes public ingest --yes as automatic runtime installation', async () => {
const testIo = makeIo();
const publicIngest = vi.fn().mockResolvedValue(0);
await expect(
runKtxCli(['--project-dir', tempDir, 'ingest', 'warehouse', '--yes'], testIo.io, {
publicIngest,
}),
).resolves.toBe(0);
expect(publicIngest).toHaveBeenCalledWith(
expect.objectContaining({
projectDir: tempDir,
targetConnectionId: 'warehouse',
runtimeInstallPolicy: 'auto',
}),
testIo.io,
);
});
it('rejects conflicting public ingest runtime install modes', async () => {
const testIo = makeIo();
const publicIngest = vi.fn().mockResolvedValue(0);
await expect(
runKtxCli(['--project-dir', tempDir, 'ingest', 'warehouse', '--yes', '--no-input'], testIo.io, {
publicIngest,
}),
).resolves.toBe(1);
expect(publicIngest).not.toHaveBeenCalled();
expect(testIo.stderr()).toContain('Choose only one runtime install mode: --yes or --no-input');
});
it('rejects mutually exclusive public ingest depth flags before dispatch', async () => {
const testIo = makeIo();
const publicIngest = vi.fn().mockResolvedValue(0);
@ -746,7 +800,7 @@ describe('runKtxCli', () => {
json: false,
inputMode: 'disabled',
queryHistory: 'default',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
runtimeInstallPolicy: 'never',
},
testIo.io,
@ -1024,7 +1078,7 @@ describe('runKtxCli', () => {
command: 'run',
projectDir: tempDir,
inputMode: 'disabled',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret
anthropicModel: 'claude-sonnet-4-6',
skipLlm: false,
@ -1063,7 +1117,7 @@ describe('runKtxCli', () => {
command: 'run',
projectDir: tempDir,
inputMode: 'disabled',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
llmBackend: 'vertex',
vertexProject: 'local-gcp-project',
vertexLocation: 'us-east5',
@ -1100,7 +1154,7 @@ describe('runKtxCli', () => {
command: 'run',
projectDir: tempDir,
inputMode: 'disabled',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
llmBackend: 'claude-code',
llmModel: 'opus',
skipLlm: false,
@ -1208,7 +1262,7 @@ describe('runKtxCli', () => {
projectDir: '/tmp/project',
inputMode: 'disabled',
yes: true,
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
skipLlm: true,
skipEmbeddings: true,
databaseDrivers: ['postgres'],
@ -1526,7 +1580,7 @@ describe('runKtxCli', () => {
queryFile: '/tmp/query.json',
execute: false,
format: 'json',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
runtimeInstallPolicy: 'auto',
},
autoIo.io,
@ -1540,7 +1594,7 @@ describe('runKtxCli', () => {
queryFile: '/tmp/query.json',
execute: false,
format: 'json',
cliVersion: '0.0.0-private',
cliVersion: '0.1.0-rc.1',
runtimeInstallPolicy: 'never',
},
neverIo.io,

View file

@ -33,6 +33,7 @@ export type {
} from './setup-sources.js';
export { runKtxSetupSourcesStep } from './setup-sources.js';
export { runKtxRuntime, type KtxRuntimeArgs, type KtxRuntimeDeps } from './runtime.js';
export { runKtxSql, type KtxSqlArgs, type KtxSqlDeps } from './sql.js';
export {
allocateDaemonPort,
readManagedPythonDaemonStatus,

View file

@ -35,6 +35,11 @@ describe('runKtxIngest', () => {
let tempDir: string;
let originalTerm: string | undefined;
const interactiveEnv = (): NodeJS.ProcessEnv => ({ ...process.env, CI: 'false' });
const runtimeReady = (projectDir: string) => ({
status: 'ready' as const,
projectDir,
requirements: { features: ['core' as const], requirements: [] },
});
beforeEach(async () => {
resetVizFallbackWarningsForTest();
@ -285,6 +290,7 @@ describe('runKtxIngest', () => {
historicSqlProbe: async () => ({ ok: true, lines: ['PASS Historic SQL probe skipped in test'] }),
},
context: async () => ({ status: 'skipped', projectDir }),
runtime: async () => runtimeReady(projectDir),
},
),
).resolves.toBe(0);
@ -629,6 +635,117 @@ describe('runKtxIngest', () => {
expect(io.stderr()).not.toContain('Metabase ingest: prod-metabase');
});
it('emits structured child ingest progress during Metabase fan-out', async () => {
const projectDir = join(tempDir, 'project');
await writeMetabaseConfig(projectDir);
const io = makeIo();
const progressEvents: Array<{ percent: number; message: string; transient?: boolean }> = [];
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'prod-metabase',
adapter: 'metabase',
outputMode: 'json',
},
io.io,
{
progress: (event) => progressEvents.push(event),
runLocalMetabaseIngest: async (input) => {
input.progress?.onMetabaseFanoutPlanned?.({
metabaseConnectionId: 'prod-metabase',
children: [{ metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a' }],
});
input.progress?.onMetabaseChildStarted?.({
metabaseConnectionId: 'prod-metabase',
metabaseDatabaseId: 1,
targetConnectionId: 'warehouse_a',
jobId: 'metabase-child-1',
});
input.memoryFlow?.update({
plannedWorkUnits: [
{
unitKey: 'metabase-col-6',
rawFiles: ['cards/40.json'],
peerFileCount: 0,
dependencyCount: 0,
},
],
});
input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
input.memoryFlow?.emit({
type: 'work_unit_started',
unitKey: 'metabase-col-6',
skills: ['sl_capture'],
stepBudget: 40,
});
input.memoryFlow?.emit({
type: 'work_unit_step',
unitKey: 'metabase-col-6',
stepIndex: 7,
stepBudget: 40,
});
input.memoryFlow?.emit({
type: 'stage_progress',
stage: 'integration',
percent: 81,
message: 'Resolving text conflict for metabase-col-6',
});
input.memoryFlow?.emit({ type: 'work_unit_finished', unitKey: 'metabase-col-6', status: 'success' });
input.memoryFlow?.update({
plannedWorkUnits: [
{
unitKey: 'metabase-col-7',
rawFiles: ['cards/48.json'],
peerFileCount: 0,
dependencyCount: 0,
},
],
});
input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
input.memoryFlow?.emit({
type: 'work_unit_started',
unitKey: 'metabase-col-7',
skills: ['sl_capture'],
stepBudget: 40,
});
input.progress?.onMetabaseChildCompleted?.({
metabaseConnectionId: 'prod-metabase',
metabaseDatabaseId: 1,
targetConnectionId: 'warehouse_a',
jobId: 'metabase-child-1',
status: 'done',
});
return {
metabaseConnectionId: 'prod-metabase',
status: 'all_succeeded',
totals: { workUnits: 1, failedWorkUnits: 0 },
children: [],
};
},
},
),
).resolves.toBe(0);
expect(progressEvents).toEqual(
expect.arrayContaining([
{ percent: 45, message: 'Planned 1 task' },
{ percent: 55, message: 'Processing 1/1 tasks: metabase-col-6' },
{
percent: 60,
message: 'Processing tasks: 0/1 complete, 1 active; latest metabase-col-6 step 7/40',
transient: true,
},
{ percent: 81, message: 'Resolving text conflict for metabase-col-6' },
{ percent: 81, message: 'Processing 1/1 tasks: metabase-col-7' },
]),
);
expect(io.stdout()).toContain('"status": "all_succeeded"');
expect(io.stderr()).not.toContain('Metabase ingest: prod-metabase');
});
it('runs Metabase scheduled ingest through the public CLI command path with real fan-out', async () => {
const projectDir = join(tempDir, 'metabase-cli-project');
await writeWarehouseConfig(projectDir);
@ -903,9 +1020,16 @@ describe('runKtxIngest', () => {
sourceKey: 'historic-sql',
body: {
workUnits: [],
postProcessor: {
finalization: {
sourceKey: 'historic-sql',
status: 'success',
commitSha: 'finalization-sha',
touchedPaths: ['semantic-layer/warehouse/_schema/public.yaml', 'wiki/global/historic-sql-orders.md'],
declaredTouchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
derivedTouchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
declaredChangedWikiPageKeys: ['historic-sql-orders'],
derivedChangedWikiPageKeys: ['historic-sql-orders'],
mismatches: [],
result: {
tableUsageMerged: 56,
staleTablesMarked: 1,
@ -915,7 +1039,24 @@ describe('runKtxIngest', () => {
},
errors: [],
warnings: [],
touchedSources: [],
actions: [
...Array.from({ length: 57 }, (_, index) => ({
target: 'sl' as const,
type: 'updated' as const,
key: `orders-${index}`,
detail: 'Merged usage',
targetConnectionId: 'warehouse',
rawPaths: ['tables/public/orders.json'],
})),
...Array.from({ length: 35 }, (_, index) => ({
target: 'wiki' as const,
type: 'updated' as const,
key: `historic-sql-orders-${index}`,
detail: 'Projected pattern',
rawPaths: ['patterns/orders.json'],
})),
],
provenanceExclusions: [],
},
},
}),
@ -979,6 +1120,125 @@ describe('runKtxIngest', () => {
expect(io.stdout()).toContain('Status: error\n');
});
it('prints trace path and error status for stored failed ingest reports', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const io = makeIo();
const report = {
id: 'report-failed',
runId: 'run-failed',
jobId: 'job-failed',
connectionId: 'warehouse',
sourceKey: 'metabase',
createdAt: '2026-05-17T12:00:00.000Z',
body: {
status: 'failed',
syncId: 'sync-failed',
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
commitSha: null,
tracePath: '/project/.ktx/ingest-traces/job-failed/trace.jsonl',
failure: { phase: 'final_gates', message: 'final artifact gates failed' },
workUnits: [],
failedWorkUnits: [],
reconciliationSkipped: true,
conflictsResolved: [],
evictionsApplied: [],
unmappedFallbacks: [],
evictionInputs: [],
unresolvedCards: [],
supersededBy: null,
overrideOf: null,
provenanceRows: [],
toolTranscripts: [],
},
};
await runKtxIngest(
{
command: 'status',
projectDir,
reportFile: '/project/report-failed.json',
runId: 'run-failed',
outputMode: 'plain',
inputMode: 'disabled',
},
io.io,
{
readReportFile: vi.fn().mockResolvedValue(report),
},
);
expect(io.stdout()).toContain('Trace: /project/.ktx/ingest-traces/job-failed/trace.jsonl');
expect(io.stdout()).toContain('Status: error');
expect(io.stdout()).toContain('Error: final artifact gates failed');
});
it('prints a clear first failure reason when query-history work units fail', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);
const rawReason =
'{"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)","error_uri":"https://support.google.com/a/answer/9368756","error_subtype":"invalid_rapt"}';
const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> => {
const failedWorkUnit = {
...localFakeBundleReport('query-history-failed').body.workUnits[0],
unitKey: 'historic-sql-table-orders',
rawFiles: ['tables/orders.json'],
status: 'failed' as const,
reason: rawReason,
actions: [],
touchedSlSources: [],
};
const report = localFakeBundleReport('query-history-failed', {
id: 'report-query-history-failed',
runId: 'run-query-history-failed',
connectionId: input.connectionId,
sourceKey: 'historic-sql',
body: {
workUnits: [failedWorkUnit],
failedWorkUnits: [failedWorkUnit.unitKey],
},
});
return {
result: {
jobId: 'query-history-failed',
runId: report.runId,
syncId: report.body.syncId,
diffSummary: report.body.diffSummary,
workUnitCount: report.body.workUnits.length,
failedWorkUnits: report.body.failedWorkUnits,
artifactsWritten: report.body.provenanceRows.length,
commitSha: report.body.commitSha,
},
report,
};
});
const io = makeIo();
await expect(
runKtxIngest(
{
command: 'run',
projectDir,
connectionId: 'warehouse',
adapter: 'historic-sql',
outputMode: 'plain',
},
io.io,
{
runLocalIngest: runLocal,
jobIdFactory: () => 'query-history-failed',
},
),
).resolves.toBe(1);
expect(io.stdout()).toContain('Status: error\n');
expect(io.stdout()).toContain('Failed tasks: 1\n');
expect(io.stdout()).toContain(
'Error: Query history failed for 1 task. First failure: Google Cloud authentication failed while analyzing query history: application-default credentials expired or require reauthentication (invalid_grant / invalid_rapt). Run `gcloud auth application-default login`, then retry.',
);
expect(io.stdout()).not.toContain('error_uri');
});
it('passes the debug LLM request file to local ingest runs', async () => {
const projectDir = join(tempDir, 'project');
await writeWarehouseConfig(projectDir);

View file

@ -15,6 +15,7 @@ import {
runLocalIngest,
runLocalMetabaseIngest,
savedMemoryCountsForReport,
sanitizeMemoryFlowError,
} from '@ktx/context/ingest';
import type { KtxSqlQueryExecutorPort } from '@ktx/context/connections';
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
@ -101,7 +102,7 @@ export interface KtxIngestDeps {
}
function reportStatus(report: IngestReportSnapshot): 'done' | 'error' {
return report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
return report.body.status === 'failed' || report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
}
const REPORT_SOURCE_LABELS = new Map<string, string>([
@ -127,11 +128,79 @@ function reportSourceLabel(sourceKey: string): string {
.join(' ');
}
function jsonObjectFromFailureReason(reason: string): Record<string, unknown> | null {
const trimmed = reason.trim();
const start = trimmed.indexOf('{');
const end = trimmed.lastIndexOf('}');
if (start < 0 || end < start) {
return null;
}
try {
const parsed: unknown = JSON.parse(trimmed.slice(start, end + 1));
return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? (parsed as Record<string, unknown>) : null;
} catch {
return null;
}
}
function stringField(record: Record<string, unknown>, key: string): string | null {
const value = record[key];
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
}
function isGoogleReauthFailure(record: Record<string, unknown>): boolean {
const error = stringField(record, 'error')?.toLowerCase() ?? '';
const description = stringField(record, 'error_description')?.toLowerCase() ?? '';
const subtype = stringField(record, 'error_subtype')?.toLowerCase() ?? '';
return error === 'invalid_grant' && (description.includes('reauth') || subtype === 'invalid_rapt');
}
function formatFailureReason(sourceKey: string, reason: string): string {
const parsed = jsonObjectFromFailureReason(reason);
if (!parsed) {
return sanitizeMemoryFlowError(reason);
}
if (sourceKey === 'historic-sql' && isGoogleReauthFailure(parsed)) {
return 'Google Cloud authentication failed while analyzing query history: application-default credentials expired or require reauthentication (invalid_grant / invalid_rapt). Run `gcloud auth application-default login`, then retry.';
}
const error = stringField(parsed, 'error');
const description = stringField(parsed, 'error_description');
const subtype = stringField(parsed, 'error_subtype');
const parts = [error, description].filter((part): part is string => Boolean(part));
const message = parts.length > 0 ? parts.join(': ') : reason;
return subtype ? `${message} (${subtype})` : message;
}
function failedReportMessage(report: IngestReportSnapshot): string | null {
if (report.body.status === 'failed' && report.body.failure?.message) {
return sanitizeMemoryFlowError(report.body.failure.message);
}
const failedCount = report.body.failedWorkUnits.length;
if (failedCount === 0) {
return null;
}
const firstFailure = report.body.workUnits.find(
(workUnit) => workUnit.status === 'failed' && typeof workUnit.reason === 'string' && workUnit.reason.trim(),
);
const sourceLabel = reportSourceLabel(report.sourceKey);
const prefix = `${sourceLabel} failed for ${pluralize(failedCount, 'task')}.`;
if (!firstFailure?.reason) {
return prefix;
}
return `${prefix} First failure: ${formatFailureReason(report.sourceKey, firstFailure.reason)}`;
}
function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void {
const counts = savedMemoryCountsForReport(report);
const failedMessage = failedReportMessage(report);
io.stdout.write(`Report: ${report.id}\n`);
io.stdout.write(`Run: ${report.runId}\n`);
io.stdout.write(`Job: ${report.jobId}\n`);
if (report.body.tracePath) {
io.stdout.write(`Trace: ${report.body.tracePath}\n`);
}
io.stdout.write(`Status: ${reportStatus(report)}\n`);
io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`);
io.stdout.write(`Connection: ${report.connectionId}\n`);
@ -140,6 +209,12 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void
`Diff: +${report.body.diffSummary.added}/~${report.body.diffSummary.modified}/-${report.body.diffSummary.deleted}/=${report.body.diffSummary.unchanged}\n`,
);
io.stdout.write(`Tasks: ${report.body.workUnits.length}\n`);
if (report.body.failedWorkUnits.length > 0) {
io.stdout.write(`Failed tasks: ${report.body.failedWorkUnits.length}\n`);
}
if (failedMessage) {
io.stdout.write(`Error: ${failedMessage}\n`);
}
io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`);
io.stdout.write(`Provenance rows: ${report.body.provenanceRows.length}\n`);
}
@ -220,7 +295,11 @@ function formatDiffProgress(event: Extract<MemoryFlowEvent, { type: 'diff_comput
}
function workUnitEventsThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): MemoryFlowEvent[] {
return snapshot.events.slice(0, eventIndex + 1);
const latestPlanIndex = snapshot.events
.slice(0, eventIndex + 1)
.findLastIndex((event) => event.type === 'chunks_planned');
const startIndex = latestPlanIndex >= 0 ? latestPlanIndex + 1 : 0;
return snapshot.events.slice(startIndex, eventIndex + 1);
}
function completedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number {
@ -244,7 +323,8 @@ function plannedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex
if (snapshot.plannedWorkUnits.length > 0) {
return snapshot.plannedWorkUnits.length;
}
const planEvent = workUnitEventsThrough(snapshot, eventIndex)
const planEvent = snapshot.events
.slice(0, eventIndex + 1)
.filter((event) => event.type === 'chunks_planned')
.at(-1);
return planEvent?.workUnitCount ?? completedWorkUnitCountThrough(snapshot, eventIndex);
@ -290,6 +370,12 @@ function plainIngestEventProgress(
};
case 'stage_skipped':
return { percent: 45, message: `Skipped ${event.stage}: ${event.reason}` };
case 'stage_progress':
return {
percent: event.percent,
message: event.message,
...(event.transient !== undefined ? { transient: event.transient } : {}),
};
case 'work_unit_started': {
const total = plannedWorkUnitCountThrough(snapshot, eventIndex);
const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey);
@ -636,6 +722,25 @@ export async function runKtxIngest(
}
if (args.adapter === 'metabase') {
const executeMetabaseFanout = deps.runLocalMetabaseIngest ?? runLocalMetabaseIngest;
const runOutputMode = effectiveIngestOutputMode(args.outputMode, io, env, {
requireInput: (args.inputMode ?? 'auto') === 'auto',
});
const plainProgress = shouldWritePlainIngestProgress(runOutputMode, io, env)
? createPlainIngestProgressRenderer(args, io)
: null;
const structuredProgress = deps.progress
? createPlainIngestProgressObserver(args, deps.progress)
: null;
const initialMemoryFlow =
plainProgress || structuredProgress ? initialRunMemoryFlowInput(args, 'pending') : undefined;
const memoryFlow = initialMemoryFlow
? createMemoryFlowLiveBuffer(initialMemoryFlow, {
onChange: (snapshot) => {
plainProgress?.update(snapshot);
structuredProgress?.update(snapshot);
},
})
: undefined;
const progress =
args.outputMode === 'json' && !deps.progress
? undefined
@ -646,20 +751,29 @@ export async function runKtxIngest(
: io,
deps.progress,
);
const result = await executeMetabaseFanout({
project: ingestProject,
adapters: createAdapters(ingestProject, adapterOptions),
metabaseConnectionId: args.connectionId,
...localIngestOptions,
queryExecutor,
trigger: 'manual_resync',
jobIdFactory: deps.jobIdFactory,
...(progress ? { progress } : {}),
});
if (args.outputMode === 'json') {
io.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
} else {
writeMetabaseFanoutStatus(result, io);
plainProgress?.start();
structuredProgress?.start();
let result: LocalMetabaseFanoutResult;
try {
result = await executeMetabaseFanout({
project: ingestProject,
adapters: createAdapters(ingestProject, adapterOptions),
metabaseConnectionId: args.connectionId,
...localIngestOptions,
queryExecutor,
trigger: 'manual_resync',
jobIdFactory: deps.jobIdFactory,
...(memoryFlow ? { memoryFlow } : {}),
...(progress ? { progress } : {}),
});
plainProgress?.flush();
if (args.outputMode === 'json') {
io.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
} else {
writeMetabaseFanoutStatus(result, io);
}
} finally {
plainProgress?.flush();
}
return result.status === 'all_succeeded' ? 0 : 1;
}

View file

@ -1,6 +1,6 @@
import { describe, expect, it } from 'vitest';
import type { KtxCliIo } from '../cli-runtime.js';
import { printList, type PrintListColumn } from './print-list.js';
import { createRankBadgeFormatter, printList, type PrintListColumn } from './print-list.js';
import { SYMBOLS } from './symbols.js';
function recorder(): { io: KtxCliIo; out: () => string; err: () => string } {
@ -239,26 +239,26 @@ describe('printList — pretty mode', () => {
expect(out).toContain('2 pages');
});
it('renders a leading badge column with prettyFormat in pretty mode', () => {
it('renders a leading rank badge column in pretty mode', () => {
const r = recorder();
interface SearchRow { score: number; scope: string; key: string; summary: string }
const rows: SearchRow[] = [
{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' },
{ score: 0.04, scope: 'GLOBAL', key: 'beta', summary: 'second' },
];
const SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<SearchRow>> = [
{
key: 'score',
label: 'SCORE',
plain: 'score=',
role: 'badge',
prettyFormat: (v) => `${Math.round(Number(v) * 100)}%`,
prettyFormat: createRankBadgeFormatter(rows),
dim: true,
},
{ key: 'scope', label: 'SCOPE', plain: '' },
{ key: 'key', label: 'KEY', plain: '' },
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
];
const rows: SearchRow[] = [
{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' },
{ score: 0.04, scope: 'GLOBAL', key: 'beta', summary: 'second' },
];
printList<SearchRow>({
rows,
columns: SEARCH_COLUMNS,
@ -270,20 +270,22 @@ describe('printList — pretty mode', () => {
io: r.io,
});
const out = stripAnsi(r.out());
expect(out).toMatch(/87%\s+alpha\s+/);
expect(out).toMatch(/4%\s+beta\s+/);
expect(out).toMatch(/#1\s+alpha\s+/);
expect(out).toMatch(/#2\s+beta\s+/);
expect(out).not.toContain('%');
});
it('emits the badge column in plain mode using its plain prefix', () => {
const r = recorder();
interface SearchRow { score: number; scope: string; key: string; summary: string }
const rows: SearchRow[] = [{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' }];
const SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<SearchRow>> = [
{
key: 'score',
label: 'SCORE',
plain: 'score=',
role: 'badge',
prettyFormat: (v) => `${Math.round(Number(v) * 100)}%`,
prettyFormat: createRankBadgeFormatter(rows),
dim: true,
},
{ key: 'scope', label: 'SCOPE', plain: '' },
@ -291,7 +293,7 @@ describe('printList — pretty mode', () => {
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
];
printList<SearchRow>({
rows: [{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' }],
rows,
columns: SEARCH_COLUMNS,
groupBy: 'scope',
mode: 'plain',

View file

@ -24,7 +24,7 @@ export interface PrintListColumn<Row> {
* - `'suffix'` trailing em-dash optional value. Default: any column with `optional: true`.
*/
role?: 'name' | 'metric' | 'badge' | 'suffix';
/** Custom pretty-mode value formatter (e.g. score → "87%"). Plain/JSON unaffected. */
/** Custom pretty-mode value formatter (for example, score -> "#1"). Plain/JSON unaffected. */
prettyFormat?: (value: Row[keyof Row & string], row: Row) => string;
}
@ -67,6 +67,16 @@ export function printList<Row extends object>(args: PrintListArgs<Row>): void {
}
}
export function createRankBadgeFormatter<Row extends object>(
rows: ReadonlyArray<Row>,
): (_value: Row[keyof Row & string], row: Row) => string {
const ranks = new WeakMap<Row, number>();
rows.forEach((row, index) => {
ranks.set(row, index + 1);
});
return (_value, row) => `#${ranks.get(row) ?? rows.indexOf(row) + 1}`;
}
function isEmpty(value: unknown): boolean {
return value === undefined || value === null || value === '';
}

View file

@ -1,6 +1,7 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { stripVTControlCharacters } from 'node:util';
import { initKtxProject, loadKtxProject } from '@ktx/context/project';
import type { KtxEmbeddingPort } from '@ktx/context';
import { writeLocalKnowledgePage } from '@ktx/context/wiki';
@ -90,6 +91,24 @@ describe('runKtxKnowledge', () => {
expect(searchIo.stdout()).toContain('metrics-revenue');
});
it('prints wiki search rank badges in pretty output', async () => {
const projectDir = join(tempDir, 'rank-project');
await initKtxProject({ projectDir });
await seedWikiPage(projectDir);
const searchIo = makeIo();
await expect(
runKtxKnowledge(
{ command: 'search', projectDir, query: 'paid order', userId: 'local', output: 'pretty' },
searchIo.io,
),
).resolves.toBe(0);
const stdout = stripVTControlCharacters(searchIo.stdout());
expect(stdout).toMatch(/#1\s+metrics-revenue/);
expect(stdout).not.toContain('%');
});
it('prints wiki list and search as public JSON envelopes', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
@ -156,4 +175,29 @@ describe('runKtxKnowledge', () => {
expect(searchIo.stdout()).toContain('active-contract-arr-open-tickets');
expect(searchIo.stderr()).toBe('');
});
it('writes wiki search lane diagnostics to stderr when debug is enabled', async () => {
const projectDir = join(tempDir, 'debug-project');
await initKtxProject({ projectDir });
await seedWikiPage(projectDir);
const searchIo = makeIo();
await expect(
runKtxKnowledge(
{ command: 'search', projectDir, query: 'paid order', userId: 'local', json: true, debug: true },
searchIo.io,
{ embeddingService: new FakeEmbeddingPort() },
),
).resolves.toBe(0);
expect(JSON.parse(searchIo.stdout())).toMatchObject({
kind: 'list',
data: { items: [expect.objectContaining({ key: 'metrics-revenue' })] },
meta: { command: 'wiki search' },
});
expect(searchIo.stderr()).toContain('[debug] wiki search mode=sqlite-fts5');
expect(searchIo.stderr()).toContain('embedding=configured');
expect(searchIo.stderr()).toContain('lane=lexical status=available');
expect(searchIo.stderr()).toContain('lane=semantic status=available');
});
});

View file

@ -11,7 +11,7 @@ import {
searchLocalKnowledgePages,
} from '@ktx/context/wiki';
import { resolveOutputMode } from './io/mode.js';
import { printList, type PrintListColumn } from './io/print-list.js';
import { createRankBadgeFormatter, printList, type PrintListColumn } from './io/print-list.js';
export type KtxKnowledgeArgs =
| { command: 'list'; projectDir: string; userId: string; output?: string; json?: boolean }
@ -23,6 +23,7 @@ export type KtxKnowledgeArgs =
output?: string;
json?: boolean;
limit?: number;
debug?: boolean;
};
type KtxKnowledgeIo = import('./cli-runtime.js').KtxCliIo;
@ -33,19 +34,23 @@ const WIKI_LIST_COLUMNS: ReadonlyArray<PrintListColumn<LocalKnowledgeSummary>> =
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
];
const WIKI_SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<LocalKnowledgeSearchResult>> = [
{
key: 'score',
label: 'SCORE',
plain: 'score=',
role: 'badge',
prettyFormat: (value) => `${Math.round(Number(value) * 100)}%`,
dim: true,
},
{ key: 'scope', label: 'SCOPE', plain: '' },
{ key: 'key', label: 'KEY', plain: '' },
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
];
function wikiSearchColumns(
rows: ReadonlyArray<LocalKnowledgeSearchResult>,
): ReadonlyArray<PrintListColumn<LocalKnowledgeSearchResult>> {
return [
{
key: 'score',
label: 'SCORE',
plain: 'score=',
role: 'badge',
prettyFormat: createRankBadgeFormatter(rows),
dim: true,
},
{ key: 'scope', label: 'SCOPE', plain: '' },
{ key: 'key', label: 'KEY', plain: '' },
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
];
}
interface KtxKnowledgeDeps {
embeddingService?: KtxEmbeddingPort | null;
@ -65,6 +70,26 @@ function wikiSearchEmbeddingService(
return provider ? new KtxIngestEmbeddingPortAdapter(provider) : null;
}
function writeWikiSearchDebug(
io: KtxKnowledgeIo,
input: {
mode: string;
embeddingConfigured: boolean;
results: LocalKnowledgeSearchResult[];
},
): void {
io.stderr.write(
`[debug] wiki search mode=${input.mode} embedding=${input.embeddingConfigured ? 'configured' : 'unconfigured'} results=${input.results.length}\n`,
);
const lanes = input.results[0]?.lanes ?? [];
for (const lane of lanes) {
const reason = lane.reason ? ` reason=${lane.reason}` : '';
io.stderr.write(
`[debug] wiki search lane=${lane.lane} status=${lane.status} returned=${lane.returnedCandidateCount} weight=${lane.weight}${reason}\n`,
);
}
}
export async function runKtxKnowledge(
args: KtxKnowledgeArgs,
io: KtxKnowledgeIo = process,
@ -89,12 +114,20 @@ export async function runKtxKnowledge(
return 0;
}
if (args.command === 'search') {
const embeddingService = wikiSearchEmbeddingService(project, deps);
const results = await searchLocalKnowledgePages(project, {
query: args.query,
userId: args.userId,
embeddingService: wikiSearchEmbeddingService(project, deps),
embeddingService,
limit: args.limit,
});
if (args.debug) {
writeWikiSearchDebug(io, {
mode: project.config.storage.search,
embeddingConfigured: embeddingService !== null,
results,
});
}
const mode = resolveOutputMode({ explicit: args.output, json: args.json, io });
let emptyMessage = `No local wiki pages matched "${args.query}"`;
let emptyHint = 'Run `ktx wiki list` to inspect available pages.';
@ -107,7 +140,7 @@ export async function runKtxKnowledge(
}
printList<LocalKnowledgeSearchResult>({
rows: results,
columns: WIKI_SEARCH_COLUMNS,
columns: wikiSearchColumns(results),
groupBy: 'scope',
emptyMessage,
emptyHint,

View file

@ -1,6 +1,7 @@
import { describe, expect, it, vi } from 'vitest';
import {
createManagedPythonSemanticLayerComputePort,
ensureManagedPythonCommandRuntime,
managedRuntimeInstallCommand,
runtimeInstallPolicyFromFlags,
} from './managed-python-command.js';
@ -103,6 +104,17 @@ function installResult(features: KtxRuntimeFeature[] = ['core']): ManagedPythonR
};
}
function makeSpinnerEvents() {
const events: string[] = [];
const spinner = vi.fn(() => ({
start: (msg: string) => events.push(`start:${msg}`),
message: (msg: string) => events.push(`message:${msg}`),
stop: (msg: string) => events.push(`stop:${msg}`),
error: (msg: string) => events.push(`error:${msg}`),
}));
return { events, spinner };
}
describe('managedRuntimeInstallCommand', () => {
it('prints the exact command for each managed runtime feature', () => {
expect(managedRuntimeInstallCommand('core')).toBe('ktx dev runtime install --yes');
@ -128,6 +140,51 @@ describe('runtimeInstallPolicyFromFlags', () => {
});
describe('createManagedPythonSemanticLayerComputePort', () => {
it('uses non-animated runtime setup status by default', async () => {
const io = makeIo();
await expect(
ensureManagedPythonCommandRuntime({
cliVersion: '0.2.0',
installPolicy: 'auto',
io: io.io,
readStatus: vi.fn(async () => missingStatus()),
installRuntime: vi.fn(async () => installResult(['local-embeddings'])),
feature: 'local-embeddings',
}),
).resolves.toMatchObject({
layout: { versionDir: '/runtime/0.2.0' },
});
expect(io.stderr()).toContain('Installing KTX Python runtime (local-embeddings) with uv...');
expect(io.stderr()).toContain('KTX Python runtime ready: /runtime/0.2.0');
expect(io.stderr().match(/Installing KTX Python runtime/g)).toHaveLength(1);
});
it('shows runtime installation progress with the CLI spinner', async () => {
const io = makeIo();
const { events, spinner } = makeSpinnerEvents();
const options = {
cliVersion: '0.2.0',
installPolicy: 'auto' as const,
io: io.io,
readStatus: vi.fn(async () => missingStatus()),
installRuntime: vi.fn(async () => installResult(['local-embeddings'])),
feature: 'local-embeddings' as const,
spinner,
};
await expect(ensureManagedPythonCommandRuntime(options)).resolves.toMatchObject({
layout: { versionDir: '/runtime/0.2.0' },
});
expect(events).toEqual([
'start:Installing KTX Python runtime (local-embeddings) with uv...',
'stop:KTX Python runtime ready: /runtime/0.2.0',
]);
});
it('uses the managed ktx-daemon executable when the runtime is ready', async () => {
const io = makeIo();
const compute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() };
@ -170,6 +227,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
it('installs the core runtime without prompting when policy is auto', async () => {
const io = makeIo();
const { events, spinner } = makeSpinnerEvents();
const compute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() };
const createPythonCompute = vi.fn(() => compute);
const installRuntime = vi.fn(async () => installResult());
@ -182,6 +240,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
readStatus: vi.fn(async () => missingStatus()),
installRuntime,
createPythonCompute,
spinner,
}),
).resolves.toBe(compute);
@ -190,12 +249,15 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
features: ['core'],
force: false,
});
expect(io.stderr()).toContain('Installing KTX Python runtime (core) with uv');
expect(io.stderr()).toContain('KTX Python runtime ready: /runtime/0.2.0');
expect(events).toEqual([
'start:Installing KTX Python runtime (core) with uv...',
'stop:KTX Python runtime ready: /runtime/0.2.0',
]);
});
it('prompts before installing when policy is prompt', async () => {
const io = makeIo();
const { events, spinner } = makeSpinnerEvents();
const confirmInstall = vi.fn(async () => true);
const installRuntime = vi.fn(async () => installResult());
@ -207,6 +269,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
installRuntime,
createPythonCompute: vi.fn(() => ({ query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() })),
confirmInstall,
spinner,
});
expect(confirmInstall).toHaveBeenCalledWith(
@ -218,10 +281,12 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
features: ['core'],
force: false,
});
expect(events).toContainEqual('start:Installing KTX Python runtime (core) with uv...');
});
it('uses injected runtime confirmation instead of reading process TTY directly', async () => {
const io = makeIo();
const { events, spinner } = makeSpinnerEvents();
const compute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() };
const installRuntime = vi.fn(async (): Promise<ManagedPythonRuntimeInstallResult> => installResult());
const confirmInstall = vi.fn(async () => true);
@ -235,6 +300,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
installRuntime,
confirmInstall,
createPythonCompute: () => compute,
spinner,
}),
).resolves.toBe(compute);
@ -242,7 +308,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
'KTX needs to install the core Python runtime. This downloads Python dependencies with uv. Continue?',
io.io,
);
expect(io.stderr()).toContain('Installing KTX Python runtime (core) with uv...');
expect(events).toContainEqual('start:Installing KTX Python runtime (core) with uv...');
});
it('can decide default runtime prompting from injected io capabilities', async () => {

View file

@ -1,6 +1,6 @@
import { createPythonSemanticLayerComputePort, type KtxSemanticLayerComputePort } from '@ktx/context/daemon';
import type { KtxCliIo } from './cli-runtime.js';
import { createClackPromptAdapter } from './clack.js';
import { createClackPromptAdapter, createStaticCliSpinner, type KtxCliSpinner } from './clack.js';
import {
installManagedPythonRuntime,
readManagedPythonRuntimeStatus,
@ -37,6 +37,7 @@ export interface ManagedPythonCommandDeps {
readStatus?: (options: ManagedPythonRuntimeLayoutOptions) => Promise<ManagedPythonRuntimeStatus>;
installRuntime?: (options: ManagedPythonRuntimeInstallOptions) => Promise<ManagedPythonRuntimeInstallResult>;
confirmInstall?: (message: string, io: KtxCliIo) => Promise<boolean>;
spinner?: () => KtxCliSpinner;
}
export interface ManagedPythonCommandOptions extends ManagedPythonCommandDeps {
@ -101,14 +102,20 @@ export async function ensureManagedPythonCommandRuntime(
}
}
options.io.stderr.write(`Installing KTX Python runtime (${feature}) with uv...\n`);
const installed = await installRuntime({
cliVersion: options.cliVersion,
features: [feature],
force: false,
});
options.io.stderr.write(`KTX Python runtime ready: ${installed.layout.versionDir}\n`);
return { layout: installed.layout, manifest: installed.manifest };
const progress = (options.spinner ?? (() => createStaticCliSpinner(options.io)))();
progress.start(`Installing KTX Python runtime (${feature}) with uv...`);
try {
const installed = await installRuntime({
cliVersion: options.cliVersion,
features: [feature],
force: false,
});
progress.stop(`KTX Python runtime ready: ${installed.layout.versionDir}`);
return { layout: installed.layout, manifest: installed.manifest };
} catch (error) {
progress.error(`KTX Python runtime install failed: ${error instanceof Error ? error.message : String(error)}`);
throw error;
}
}
export async function createManagedPythonSemanticLayerComputePort(
@ -122,6 +129,7 @@ export async function createManagedPythonSemanticLayerComputePort(
...(options.readStatus ? { readStatus: options.readStatus } : {}),
...(options.installRuntime ? { installRuntime: options.installRuntime } : {}),
...(options.confirmInstall ? { confirmInstall: options.confirmInstall } : {}),
...(options.spinner ? { spinner: options.spinner } : {}),
});
const createPythonCompute = options.createPythonCompute ?? createPythonSemanticLayerComputePort;
return createPythonCompute({

View file

@ -11,7 +11,13 @@ function silentIo(): KtxCliIo {
}
function stubPackageInfo(): KtxCliPackageInfo {
return { name: '@ktx/cli', version: '0.0.0-docs', contextPackageName: '@ktx/context' };
return {
name: '@ktx/cli',
version: '0.0.0-docs',
packageVersion: '0.0.0-private',
runtimeVersion: '0.0.0-docs',
contextPackageName: '@ktx/context',
};
}
export function renderKtxCommandTree(): string {

View file

@ -6,6 +6,7 @@ import {
type KtxPublicIngestProject,
runKtxPublicIngest,
} from './public-ingest.js';
import type { ManagedPythonCommandRuntime } from './managed-python-command.js';
function makeIo(options: { isTTY?: boolean; interactive?: boolean } = {}) {
let stdout = '';
@ -750,6 +751,53 @@ describe('runKtxPublicIngest', () => {
expect(runScan).not.toHaveBeenCalled();
});
it('preflights foreground query-history runtime before starting the context-build view', async () => {
const io = makeIo({ isTTY: true, interactive: true });
const calls: string[] = [];
const project = projectWithConnections({
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
});
const ensureRuntime = vi.fn(async (): Promise<ManagedPythonCommandRuntime> => {
calls.push('runtime');
return {} as ManagedPythonCommandRuntime;
});
const runContextBuild = vi.fn(async () => {
calls.push('context-build');
return { exitCode: 0 };
});
await expect(
runKtxPublicIngest(
{
command: 'run',
projectDir: '/tmp/project',
targetConnectionId: 'warehouse',
all: false,
json: false,
inputMode: 'auto',
queryHistory: 'enabled',
cliVersion: '0.2.0',
runtimeInstallPolicy: 'prompt',
},
io.io,
{
loadProject: vi.fn(async () => project),
ensureRuntime,
runContextBuild,
},
),
).resolves.toBe(0);
expect(calls).toEqual(['runtime', 'context-build']);
expect(ensureRuntime).toHaveBeenCalledWith(
expect.objectContaining({
cliVersion: '0.2.0',
installPolicy: 'prompt',
feature: 'core',
}),
);
});
it('runs all independent targets and reports partial failures', async () => {
const io = makeIo();
const project = projectWithConnections({
@ -806,7 +854,12 @@ describe('runKtxPublicIngest', () => {
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
});
const runScan = vi.fn(async () => 0);
const runIngest = vi.fn(async () => 1);
const runIngest = vi.fn(async (_args, ingestIo) => {
ingestIo.stdout.write(
'Error: Query history failed for 60 tasks. First failure: Google Cloud authentication failed while analyzing query history: application-default credentials expired or require reauthentication (invalid_grant / invalid_rapt). Run `gcloud auth application-default login`, then retry.\n',
);
return 1;
});
await expect(
runKtxPublicIngest(
@ -824,11 +877,52 @@ describe('runKtxPublicIngest', () => {
),
).resolves.toBe(1);
expect(io.stdout()).toContain('warehouse failed at query-history.');
expect(io.stdout()).toMatch(/warehouse\s+done\s+failed\s+skipped\s+skipped/);
expect(io.stdout()).toContain(
'warehouse failed: Query history failed for 60 tasks. First failure: Google Cloud authentication failed while analyzing query history',
);
expect(io.stdout()).not.toContain('warehouse failed: Error:');
expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history');
expect(io.stdout()).not.toContain('historic-sql');
});
it('prints the runtime artifact build hint for missing query-history runtime assets', async () => {
const io = makeIo();
const project = deepReadyProject({
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
});
const runScan = vi.fn(async () => 0);
const runIngest = vi.fn(async (_args, ingestIo) => {
ingestIo.stderr.write('Missing bundled Python runtime manifest: /repo/packages/cli/assets/python/manifest.json\n');
ingestIo.stderr.write('In a source checkout, build the local runtime assets with: pnpm run artifacts:build\n');
ingestIo.stderr.write('Then retry the runtime-backed KTX command.\n');
return 1;
});
await expect(
runKtxPublicIngest(
{
command: 'run',
projectDir: '/tmp/project',
targetConnectionId: 'warehouse',
all: false,
json: false,
inputMode: 'disabled',
queryHistory: 'enabled',
},
io.io,
{ loadProject: vi.fn(async () => project), runScan, runIngest },
),
).resolves.toBe(1);
expect(io.stdout()).toContain('Missing bundled Python runtime manifest');
expect(io.stdout()).toContain(
'In a source checkout, build the local runtime assets with: pnpm run artifacts:build',
);
expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history');
expect(io.stdout()).not.toContain('Then retry the runtime-backed KTX command');
});
it('fails deep-readiness targets before work starts while continuing independent --all targets', async () => {
const io = makeIo();
const project = projectWithConnections({

View file

@ -9,8 +9,14 @@ import {
isDatabaseDriver,
normalizeConnectionDriver,
} from './ingest-depth.js';
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
import {
ensureManagedPythonCommandRuntime,
type KtxManagedPythonInstallPolicy,
type ManagedPythonCommandRuntime,
} from './managed-python-command.js';
import type { KtxRuntimeFeature } from './managed-python-runtime.js';
import { publicIngestOutputLine } from './public-ingest-copy.js';
import { resolvePublicIngestRuntimeRequirements } from './runtime-requirements.js';
import type { KtxScanArgs, KtxScanDeps } from './scan.js';
import { profileMark } from './startup-profile.js';
@ -94,6 +100,13 @@ export interface KtxPublicIngestDeps {
) => Promise<{ exitCode: number }>;
scanProgress?: KtxProgressPort;
ingestProgress?: (update: KtxIngestProgressUpdate) => void;
ensureRuntime?: (options: {
cliVersion: string;
installPolicy: KtxManagedPythonInstallPolicy;
io: KtxCliIo;
feature: KtxRuntimeFeature;
}) => Promise<ManagedPythonCommandRuntime>;
env?: NodeJS.ProcessEnv;
runtimeIo?: KtxCliIo;
onPhaseStart?: (phaseKey: KtxPublicIngestPhaseKey) => void;
onPhaseEnd?: (phaseKey: KtxPublicIngestPhaseKey, status: 'done' | 'failed' | 'skipped', summary?: string) => void;
@ -555,6 +568,7 @@ function markTargetResult(
): KtxPublicIngestTargetResult {
const selectedFailedOperation =
failedOperation ?? (target.operation === 'database-ingest' ? 'database-schema' : 'source-ingest');
const selectedFailedOperationIndex = target.steps.indexOf(selectedFailedOperation);
return {
connectionId: target.connectionId,
driver: target.driver,
@ -565,6 +579,10 @@ function markTargetResult(
if (status === 'done') {
return { ...step, status: 'done' };
}
const stepIndex = target.steps.indexOf(step.operation);
if (selectedFailedOperationIndex >= 0 && stepIndex >= 0 && stepIndex < selectedFailedOperationIndex) {
return { ...step, status: 'done' };
}
if (step.operation === selectedFailedOperation) {
return {
...step,
@ -663,16 +681,40 @@ function createCapturedPublicIngestIo(): CapturedPublicIngestIo {
const INTERNAL_STATUS_LINE_RE =
/^(Report|Run|Job|Status|Adapter|Connection|Sync|Diff|Tasks|Work units|Failed tasks|Saved memory|Provenance rows):\s*/;
const ACTIONABLE_FAILURE_LINE_RE =
/^(Missing bundled Python runtime manifest|KTX Python runtime is required|KTX managed daemon|Error:|Failed\b|Could not\b|Cannot\b)/;
const RUNTIME_BACKED_RETRY_LINE_RE = /^Then retry the runtime-backed KTX command\.?$/;
function firstCapturedFailureLine(output: string): string | undefined {
return output
function trimErrorPrefix(line: string): string {
return line.replace(/^Error:\s*/, '');
}
function capturedFailureMessage(output: string): string | undefined {
const lines = output
.split(/\r?\n/)
.map((line) => line.trim())
.filter((line) => line.length > 0)
.filter((line) => !line.startsWith('KTX scan completed'))
.filter((line) => !INTERNAL_STATUS_LINE_RE.test(line))
.map(publicIngestOutputLine)
.find((line) => line.length > 0);
.map(publicIngestOutputLine);
const actionableIndex = lines.findIndex((line) => ACTIONABLE_FAILURE_LINE_RE.test(line));
if (actionableIndex < 0) {
const line = lines.find((candidate) => candidate.length > 0);
return line ? trimErrorPrefix(line) : undefined;
}
const firstLine = lines[actionableIndex];
if (!firstLine?.startsWith('Missing bundled Python runtime manifest')) {
return trimErrorPrefix(firstLine);
}
const followupLines = lines
.slice(actionableIndex + 1)
.filter((line) => !RUNTIME_BACKED_RETRY_LINE_RE.test(line))
.filter((line) => !/\bRetry:\s/.test(line))
.filter((line) => line.startsWith('In a source checkout, build the local runtime assets with:'));
return [firstLine, ...followupLines].join('\n');
}
export async function executePublicIngestTarget(
@ -737,7 +779,7 @@ export async function executePublicIngestTarget(
args,
'failed',
'database-schema',
capturedScanIo ? firstCapturedFailureLine(capturedScanIo.capturedOutput()) : undefined,
capturedScanIo ? capturedFailureMessage(capturedScanIo.capturedOutput()) : undefined,
);
}
deps.onPhaseEnd?.('database-schema', 'done');
@ -779,7 +821,7 @@ export async function executePublicIngestTarget(
args,
'failed',
'query-history',
capturedIngestIo ? firstCapturedFailureLine(capturedIngestIo.capturedOutput()) : undefined,
capturedIngestIo ? capturedFailureMessage(capturedIngestIo.capturedOutput()) : undefined,
);
}
deps.onPhaseEnd?.('query-history', 'done');
@ -819,7 +861,7 @@ export async function executePublicIngestTarget(
args,
exitCode === 0 ? 'done' : 'failed',
'source-ingest',
capturedIngestIo ? firstCapturedFailureLine(capturedIngestIo.capturedOutput()) : undefined,
capturedIngestIo ? capturedFailureMessage(capturedIngestIo.capturedOutput()) : undefined,
);
}
@ -831,6 +873,22 @@ export async function runKtxPublicIngest(
const loadProject = deps.loadProject ?? loadKtxProject;
const project = await loadProject({ projectDir: args.projectDir });
if (shouldUseForegroundContextBuildView(args, io)) {
const plan = buildPublicIngestPlan(project, args);
const requirements = resolvePublicIngestRuntimeRequirements(plan, { env: deps.env ?? process.env });
const ensureRuntime = deps.ensureRuntime ?? ensureManagedPythonCommandRuntime;
for (const feature of requirements.features) {
try {
await ensureRuntime({
cliVersion: args.cliVersion ?? '0.0.0-private',
installPolicy: args.runtimeInstallPolicy ?? 'prompt',
io,
feature,
});
} catch (error) {
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return 1;
}
}
const { runContextBuild } = await import('./context-build-view.js');
const contextBuild = deps.runContextBuild ?? runContextBuild;
const result = await contextBuild(

View file

@ -0,0 +1,55 @@
import { existsSync, readFileSync } from 'node:fs';
import { dirname, join, parse } from 'node:path';
import { fileURLToPath } from 'node:url';
const semverPattern =
/^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-([0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*)?$/;
function isPlainObject(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function assertReleaseVersion(value: unknown, source: string): string {
if (typeof value !== 'string' || !semverPattern.test(value)) {
throw new Error(`Invalid KTX release version in ${source}`);
}
return value;
}
function findReleasePolicyPath(startDir: string): string | undefined {
let current = startDir;
const root = parse(current).root;
while (true) {
const candidate = join(current, 'release-policy.json');
if (existsSync(candidate)) {
return candidate;
}
if (current === root) {
return undefined;
}
current = dirname(current);
}
}
function readSourceReleaseVersion(startDir = dirname(fileURLToPath(import.meta.url))): string | undefined {
const policyPath = findReleasePolicyPath(startDir);
if (!policyPath) {
return undefined;
}
const policy = JSON.parse(readFileSync(policyPath, 'utf8')) as unknown;
if (!isPlainObject(policy)) {
throw new Error(`Invalid KTX release policy: ${policyPath}`);
}
return assertReleaseVersion(policy.publicNpmPackageVersion, policyPath);
}
export function resolveKtxRuntimeVersion(input: {
packageName: string;
packageVersion: string;
startDir?: string;
}): string {
if (input.packageName === '@kaelio/ktx') {
return assertReleaseVersion(input.packageVersion, `${input.packageName}/package.json`);
}
return readSourceReleaseVersion(input.startDir) ?? input.packageVersion;
}

View file

@ -0,0 +1,81 @@
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '@ktx/context/project';
import { describe, expect, it } from 'vitest';
import {
resolveProjectRuntimeRequirements,
resolvePublicIngestRuntimeRequirements,
} from './runtime-requirements.js';
describe('runtime requirement detection', () => {
it('requires core for agent/MCP setup', () => {
const config = buildDefaultKtxProjectConfig();
expect(resolveProjectRuntimeRequirements(config, { agents: true }).features).toEqual(['core']);
});
it('requires core for Looker source ingest unless an external daemon is configured', () => {
const config: KtxProjectConfig = {
...buildDefaultKtxProjectConfig(),
connections: {
looker: { driver: 'looker', base_url: 'https://looker.example.com', client_id: 'client-id' },
},
};
expect(resolveProjectRuntimeRequirements(config).features).toEqual(['core']);
expect(resolveProjectRuntimeRequirements(config, { env: { KTX_DAEMON_URL: 'http://127.0.0.1:8765' } }).features).toEqual(
[],
);
});
it('requires core for query-history ingest unless SQL analysis is externally configured', () => {
const config: KtxProjectConfig = {
...buildDefaultKtxProjectConfig(),
connections: {
warehouse: { driver: 'postgres', context: { queryHistory: { enabled: true } } },
},
};
expect(resolveProjectRuntimeRequirements(config).features).toEqual(['core']);
expect(
resolveProjectRuntimeRequirements(config, { env: { KTX_SQL_ANALYSIS_URL: 'http://127.0.0.1:8765' } }).features,
).toEqual([]);
});
it('requires local-embeddings for managed sentence-transformers embeddings', () => {
const config: KtxProjectConfig = {
...buildDefaultKtxProjectConfig(),
ingest: {
...buildDefaultKtxProjectConfig().ingest,
embeddings: {
backend: 'sentence-transformers' as const,
model: 'all-MiniLM-L6-v2',
dimensions: 384,
sentenceTransformers: {
base_url: MANAGED_SENTENCE_TRANSFORMERS_BASE_URL,
},
},
},
};
expect(resolveProjectRuntimeRequirements(config).features).toEqual(['local-embeddings']);
});
it('detects foreground ingest runtime needs from selected query-history targets', () => {
expect(
resolvePublicIngestRuntimeRequirements({
projectDir: '/tmp/project',
warnings: [],
targets: [
{
connectionId: 'warehouse',
driver: 'postgres',
operation: 'database-ingest',
debugCommand: 'ktx ingest warehouse --debug',
steps: ['database-schema', 'query-history'],
queryHistory: { enabled: true },
},
],
}).features,
).toEqual(['core']);
});
});

View file

@ -0,0 +1,168 @@
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
import type {
KtxProjectConfig,
KtxProjectConnectionConfig,
KtxProjectEmbeddingConfig,
} from '@ktx/context/project';
import type { KtxRuntimeFeature } from './managed-python-runtime.js';
import type { KtxPublicIngestPlan } from './public-ingest.js';
type KtxRuntimeRequirementReason =
| 'agent-mcp'
| 'query-history'
| 'looker-source'
| 'database-introspection'
| 'local-embeddings';
interface KtxRuntimeRequirement {
feature: KtxRuntimeFeature;
reason: KtxRuntimeRequirementReason;
detail: string;
}
export interface KtxRuntimeRequirements {
features: KtxRuntimeFeature[];
requirements: KtxRuntimeRequirement[];
}
export interface KtxProjectRuntimeRequirementOptions {
agents?: boolean;
databaseIntrospectionFallback?: boolean;
env?: NodeJS.ProcessEnv | Record<string, string | undefined>;
}
export interface KtxPublicIngestRuntimeRequirementOptions {
env?: NodeJS.ProcessEnv | Record<string, string | undefined>;
}
function normalizeDriver(driver: unknown): string {
return String(driver ?? '').trim().toLowerCase();
}
function recordValue(value: unknown): Record<string, unknown> {
return typeof value === 'object' && value !== null ? (value as Record<string, unknown>) : {};
}
function hasEnabledQueryHistory(connection: KtxProjectConnectionConfig): boolean {
const context = recordValue(recordValue(connection).context);
const queryHistory = recordValue(context.queryHistory);
return queryHistory.enabled === true;
}
function hasDaemonOverride(env: NodeJS.ProcessEnv | Record<string, string | undefined>): boolean {
return typeof env.KTX_DAEMON_URL === 'string' && env.KTX_DAEMON_URL.trim().length > 0;
}
function hasSqlAnalysisOverride(env: NodeJS.ProcessEnv | Record<string, string | undefined>): boolean {
return (
(typeof env.KTX_SQL_ANALYSIS_URL === 'string' && env.KTX_SQL_ANALYSIS_URL.trim().length > 0) ||
hasDaemonOverride(env)
);
}
function requiresManagedLocalEmbeddings(embeddings: KtxProjectEmbeddingConfig): boolean {
if (embeddings.backend !== 'sentence-transformers') {
return false;
}
const baseUrl = embeddings.sentenceTransformers?.base_url;
return baseUrl === undefined || baseUrl === '' || baseUrl === MANAGED_SENTENCE_TRANSFORMERS_BASE_URL;
}
function uniqueRequirements(requirements: KtxRuntimeRequirement[]): KtxRuntimeRequirements {
const seen = new Set<string>();
const deduped: KtxRuntimeRequirement[] = [];
for (const requirement of requirements) {
const key = `${requirement.feature}:${requirement.reason}:${requirement.detail}`;
if (seen.has(key)) {
continue;
}
seen.add(key);
deduped.push(requirement);
}
const features = [...new Set(deduped.map((requirement) => requirement.feature))].sort((left, right) =>
left.localeCompare(right),
);
return { features, requirements: deduped };
}
export function resolveProjectRuntimeRequirements(
config: KtxProjectConfig,
options: KtxProjectRuntimeRequirementOptions = {},
): KtxRuntimeRequirements {
const env = options.env ?? process.env;
const requirements: KtxRuntimeRequirement[] = [];
if (options.agents === true) {
requirements.push({
feature: 'core',
reason: 'agent-mcp',
detail: 'Agent MCP setup uses semantic-layer query tools and SQL validation.',
});
}
if (options.databaseIntrospectionFallback === true && !hasDaemonOverride(env)) {
requirements.push({
feature: 'core',
reason: 'database-introspection',
detail: 'Database introspection fallback uses the Python daemon.',
});
}
for (const [connectionId, connection] of Object.entries(config.connections)) {
const driver = normalizeDriver(connection.driver);
if ((driver === 'looker' || driver === 'local_looker') && !hasDaemonOverride(env)) {
requirements.push({
feature: 'core',
reason: 'looker-source',
detail: `${connectionId} uses Looker identifier parsing.`,
});
}
if (hasEnabledQueryHistory(connection) && !hasSqlAnalysisOverride(env)) {
requirements.push({
feature: 'core',
reason: 'query-history',
detail: `${connectionId} has query history enabled.`,
});
}
}
if (requiresManagedLocalEmbeddings(config.ingest.embeddings)) {
requirements.push({
feature: 'local-embeddings',
reason: 'local-embeddings',
detail: 'Local sentence-transformers embeddings use the managed Python runtime.',
});
}
return uniqueRequirements(requirements);
}
export function resolvePublicIngestRuntimeRequirements(
plan: KtxPublicIngestPlan,
options: KtxPublicIngestRuntimeRequirementOptions = {},
): KtxRuntimeRequirements {
const env = options.env ?? process.env;
const requirements: KtxRuntimeRequirement[] = [];
for (const target of plan.targets) {
const driver = normalizeDriver(target.driver);
const adapter = normalizeDriver(target.adapter);
if (target.queryHistory?.enabled === true && !hasSqlAnalysisOverride(env)) {
requirements.push({
feature: 'core',
reason: 'query-history',
detail: `${target.connectionId} query-history ingest uses SQL analysis.`,
});
}
if ((driver === 'looker' || driver === 'local_looker' || adapter === 'looker') && !hasDaemonOverride(env)) {
requirements.push({
feature: 'core',
reason: 'looker-source',
detail: `${target.connectionId} uses Looker identifier parsing.`,
});
}
}
return uniqueRequirements(requirements);
}

View file

@ -176,12 +176,33 @@ describe('setup embeddings step', () => {
expect(config.scan.enrichment.embeddings).toMatchObject(config.ingest.embeddings);
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('embeddings');
expect(spinnerEvents).toContainEqual(
'start:Testing local sentence-transformers embeddings (all-MiniLM-L6-v2, 384 dimensions). First run may take up to 60 seconds.',
);
expect(spinnerEvents).toContainEqual('start:Testing local embeddings (all-MiniLM-L6-v2)');
expect(io.stdout()).toContain('Embeddings ready: yes');
});
it('uses a short non-animated local embeddings health-check status by default', async () => {
const io = makeIo();
const healthCheck = vi.fn(async () => ({ ok: true as const }));
const prompts = makePromptAdapter({ selectValues: ['sentence-transformers'] });
const result = await runKtxSetupEmbeddingsStep(
{
projectDir: tempDir,
inputMode: 'auto',
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
skipEmbeddings: false,
},
io.io,
{ prompts, env: {}, healthCheck, ensureLocalEmbeddings: vi.fn(async () => managedDaemon()) },
);
expect(result.status).toBe('ready');
expect(io.stderr()).toContain('Testing local embeddings (all-MiniLM-L6-v2)');
expect(io.stderr()).not.toContain('First run may take up to 60 seconds');
expect(io.stderr().match(/Testing local embeddings/g)).toHaveLength(1);
});
it('shows live progress while local sentence-transformers embeddings are being tested', async () => {
const io = makeIo();
const prompts = makePromptAdapter({ selectValues: ['sentence-transformers'] });
@ -213,9 +234,7 @@ describe('setup embeddings step', () => {
);
await vi.waitFor(() => {
expect(spinnerEvents).toContainEqual(
'start:Testing local sentence-transformers embeddings (all-MiniLM-L6-v2, 384 dimensions). First run may take up to 60 seconds.',
);
expect(spinnerEvents).toContainEqual('start:Testing local embeddings (all-MiniLM-L6-v2)');
});
expect(resolveHealthCheck).toBeDefined();

View file

@ -10,7 +10,7 @@ import {
} from '@ktx/context/project';
import { type KtxEmbeddingConfig, type KtxEmbeddingHealthCheckResult, runKtxEmbeddingHealthCheck } from '@ktx/llm';
import type { KtxCliIo } from './cli-runtime.js';
import { createClackSpinner, type KtxCliSpinner } from './clack.js';
import { createStaticCliSpinner, type KtxCliSpinner } from './clack.js';
import {
ensureManagedLocalEmbeddingsDaemon,
managedLocalEmbeddingHealthConfig,
@ -316,10 +316,7 @@ async function promptAfterLocalEmbeddingFailure(
function healthCheckStartText(backend: KtxSetupEmbeddingBackend, model: string, dimensions: number): string {
if (backend === LOCAL_EMBEDDING_BACKEND) {
return [
`Testing local sentence-transformers embeddings (${model}, ${dimensions} dimensions).`,
'First run may take up to 60 seconds.',
].join(' ');
return `Testing local embeddings (${model})`;
}
return `Checking ${backend} embeddings (${model}, ${dimensions} dimensions).`;
}
@ -424,7 +421,7 @@ export async function runKtxSetupEmbeddingsStep(
dimensions,
credentialValue,
});
const healthSpinner = (deps.spinner ?? createClackSpinner)();
const healthSpinner = (deps.spinner ?? (() => createStaticCliSpinner(io)))();
const progress = startHealthCheckProgress(healthSpinner, healthCheckStartText(selectedBackend, model, dimensions));
let health: KtxEmbeddingHealthCheckResult;
try {

View file

@ -164,7 +164,7 @@ describe('setup Anthropic model step', () => {
);
});
it('offers Vertex AI as an Anthropic model provider option', async () => {
it('offers Anthropic provider paths in the preferred order', async () => {
const prompts = makePromptAdapter({ providerChoice: 'back' });
const result = await runKtxSetupAnthropicModelStep(
@ -177,10 +177,12 @@ describe('setup Anthropic model step', () => {
expect(prompts.select).toHaveBeenCalledWith(
expect.objectContaining({
message: expect.stringContaining('Which LLM provider should KTX use?'),
options: expect.arrayContaining([
options: [
{ value: 'claude-code', label: 'Claude subscription (Pro/Max)' },
{ value: 'anthropic', label: 'Anthropic API key' },
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
{ value: 'back', label: 'Back' },
]),
],
}),
);
});

View file

@ -509,12 +509,12 @@ async function chooseBackend(
}
const choice = await prompts.select({
message: 'Which LLM provider should KTX use?',
options: [
{ value: 'anthropic', label: 'Anthropic API' },
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
{ value: 'claude-code', label: 'Local Claude Code session' },
{ value: 'back', label: 'Back' },
],
options: [
{ value: 'claude-code', label: 'Claude subscription (Pro/Max)' },
{ value: 'anthropic', label: 'Anthropic API key' },
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
{ value: 'back', label: 'Back' },
],
});
if (choice === 'back') {
return { status: 'back' };

View file

@ -8,6 +8,7 @@ const readyStatus: KtxSetupStatus = {
embeddings: { backend: 'openai', ready: true, model: 'text-embedding-3-small', dimensions: 1536 },
databases: [{ connectionId: 'warehouse', ready: true }],
sources: [],
runtime: { required: false, ready: true, features: [] },
context: { ready: true, status: 'completed' },
agents: [{ target: 'codex', scope: 'project', ready: true }],
};
@ -16,6 +17,7 @@ describe('setup ready menu', () => {
it('recognizes a ready setup only when required sections are ready', () => {
expect(isKtxSetupReady(readyStatus)).toBe(true);
expect(isKtxSetupReady({ ...readyStatus, embeddings: { ready: false } })).toBe(false);
expect(isKtxSetupReady({ ...readyStatus, runtime: { required: true, ready: false, features: ['core'] } })).toBe(false);
expect(isKtxSetupReady({ ...readyStatus, context: { ready: false, status: 'not_started' } })).toBe(false);
expect(isKtxSetupReady({ ...readyStatus, agents: [] })).toBe(false);
});
@ -24,6 +26,9 @@ describe('setup ready menu', () => {
expect(isKtxPreAgentSetupReady(readyStatus)).toBe(true);
expect(isKtxPreAgentSetupReady({ ...readyStatus, agents: [] })).toBe(true);
expect(isKtxPreAgentSetupReady({ ...readyStatus, embeddings: { ready: false } })).toBe(false);
expect(isKtxPreAgentSetupReady({ ...readyStatus, runtime: { required: true, ready: false, features: ['core'] } })).toBe(
false,
);
expect(isKtxPreAgentSetupReady({ ...readyStatus, context: { ready: false, status: 'not_started' } })).toBe(false);
});

View file

@ -4,7 +4,15 @@ import {
} from './setup-prompts.js';
import type { KtxSetupStatus } from './setup.js';
export type KtxSetupReadyAction = 'models' | 'embeddings' | 'databases' | 'sources' | 'context' | 'agents' | 'exit';
export type KtxSetupReadyAction =
| 'models'
| 'embeddings'
| 'databases'
| 'sources'
| 'runtime'
| 'context'
| 'agents'
| 'exit';
export interface KtxSetupReadyMenuPromptAdapter {
select(options: { message: string; options: KtxSetupPromptOption[] }): Promise<string>;
@ -22,6 +30,7 @@ export function isKtxPreAgentSetupReady(status: KtxSetupStatus): boolean {
status.embeddings.ready &&
status.databases.every((database) => database.ready) &&
status.sources.every((source) => source.ready) &&
status.runtime.ready &&
status.context.ready
);
}
@ -46,6 +55,7 @@ export async function runKtxSetupReadyChangeMenu(
{ value: 'embeddings', label: 'Embeddings' },
{ value: 'databases', label: 'Databases' },
{ value: 'sources', label: 'Context sources' },
...(status.runtime.required ? [{ value: 'runtime', label: 'Runtime' }] : []),
{ value: 'context', label: 'Rebuild KTX context' },
{ value: 'agents', label: 'Agent integration' },
{ value: 'exit', label: 'Exit' },

View file

@ -0,0 +1,153 @@
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
import { buildDefaultKtxProjectConfig, readKtxSetupState, type KtxProjectConfig } from '@ktx/context/project';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { ManagedPythonCommandRuntime } from './managed-python-command.js';
import { runKtxSetupRuntimeStep } from './setup-runtime.js';
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
function projectConfig(config: KtxProjectConfig) {
return vi.fn(async () => ({ config }));
}
describe('runKtxSetupRuntimeStep', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-setup-runtime-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
it('ensures core runtime for agent setup and records the runtime step', async () => {
const io = makeIo();
const ensureRuntime = vi.fn(async (): Promise<ManagedPythonCommandRuntime> => ({} as ManagedPythonCommandRuntime));
await expect(
runKtxSetupRuntimeStep(
{
projectDir: tempDir,
inputMode: 'auto',
cliVersion: '0.2.0',
runtimeInstallPolicy: 'prompt',
agents: true,
},
io.io,
{
loadProject: projectConfig(buildDefaultKtxProjectConfig()),
ensureRuntime,
env: {},
},
),
).resolves.toMatchObject({ status: 'ready' });
expect(ensureRuntime).toHaveBeenCalledWith(
expect.objectContaining({
cliVersion: '0.2.0',
installPolicy: 'prompt',
feature: 'core',
}),
);
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('runtime');
expect(io.stdout()).toContain('Runtime ready: yes (core)');
});
it('fails fast when required runtime features cannot be installed in no-input mode', async () => {
const io = makeIo();
const ensureRuntime = vi.fn(async () => {
throw new Error('KTX Python runtime is required for this command. Run: ktx dev runtime install --yes');
});
await expect(
runKtxSetupRuntimeStep(
{
projectDir: tempDir,
inputMode: 'disabled',
cliVersion: '0.2.0',
runtimeInstallPolicy: 'never',
agents: true,
},
io.io,
{
loadProject: projectConfig(buildDefaultKtxProjectConfig()),
ensureRuntime,
env: {},
},
),
).resolves.toMatchObject({ status: 'failed' });
expect(ensureRuntime).toHaveBeenCalledWith(expect.objectContaining({ installPolicy: 'never' }));
expect((await readKtxSetupState(tempDir)).completed_steps).not.toContain('runtime');
expect(io.stderr()).toContain('ktx dev runtime install --yes');
});
it('starts the managed local embeddings daemon for configured sentence-transformers embeddings', async () => {
const io = makeIo();
const ensureLocalEmbeddings = vi.fn(async () => ({
baseUrl: 'http://127.0.0.1:61234',
env: { KTX_MANAGED_SENTENCE_TRANSFORMERS_BASE_URL: 'http://127.0.0.1:61234' },
}));
const config: KtxProjectConfig = {
...buildDefaultKtxProjectConfig(),
ingest: {
...buildDefaultKtxProjectConfig().ingest,
embeddings: {
backend: 'sentence-transformers',
model: 'all-MiniLM-L6-v2',
dimensions: 384,
sentenceTransformers: { base_url: MANAGED_SENTENCE_TRANSFORMERS_BASE_URL },
},
},
};
await expect(
runKtxSetupRuntimeStep(
{
projectDir: tempDir,
inputMode: 'auto',
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
agents: false,
},
io.io,
{
loadProject: projectConfig(config),
ensureLocalEmbeddings,
env: {},
},
),
).resolves.toMatchObject({ status: 'ready' });
expect(ensureLocalEmbeddings).toHaveBeenCalledWith(
expect.objectContaining({
projectDir: tempDir,
installPolicy: 'auto',
}),
);
expect(io.stdout()).toContain('Runtime ready: yes (local embeddings)');
});
});

View file

@ -0,0 +1,103 @@
import {
loadKtxProject,
markKtxSetupStateStepComplete,
type KtxLocalProject,
} from '@ktx/context/project';
import type { KtxCliIo } from './cli-runtime.js';
import {
ensureManagedLocalEmbeddingsDaemon,
type ManagedLocalEmbeddingsDaemon,
} from './managed-local-embeddings.js';
import {
ensureManagedPythonCommandRuntime,
type KtxManagedPythonInstallPolicy,
type ManagedPythonCommandRuntime,
} from './managed-python-command.js';
import type { KtxRuntimeFeature } from './managed-python-runtime.js';
import {
resolveProjectRuntimeRequirements,
type KtxRuntimeRequirements,
} from './runtime-requirements.js';
export interface KtxSetupRuntimeArgs {
projectDir: string;
inputMode: 'auto' | 'disabled';
cliVersion: string;
runtimeInstallPolicy: KtxManagedPythonInstallPolicy;
agents: boolean;
databaseIntrospectionFallback?: boolean;
}
export type KtxSetupRuntimeResult =
| { status: 'ready'; projectDir: string; requirements: KtxRuntimeRequirements }
| { status: 'skipped'; projectDir: string; requirements: KtxRuntimeRequirements }
| { status: 'failed'; projectDir: string; requirements: KtxRuntimeRequirements };
export interface KtxSetupRuntimeDeps {
env?: NodeJS.ProcessEnv;
loadProject?: (options: { projectDir: string }) => Promise<Pick<KtxLocalProject, 'config'>>;
ensureRuntime?: (options: {
cliVersion: string;
installPolicy: KtxManagedPythonInstallPolicy;
io: KtxCliIo;
feature: KtxRuntimeFeature;
}) => Promise<ManagedPythonCommandRuntime>;
ensureLocalEmbeddings?: (options: {
cliVersion: string;
projectDir: string;
installPolicy: KtxManagedPythonInstallPolicy;
io: KtxCliIo;
}) => Promise<ManagedLocalEmbeddingsDaemon>;
}
function formatRuntimeFeature(feature: KtxRuntimeFeature): string {
return feature === 'local-embeddings' ? 'local embeddings' : 'core';
}
export async function runKtxSetupRuntimeStep(
args: KtxSetupRuntimeArgs,
io: KtxCliIo,
deps: KtxSetupRuntimeDeps = {},
): Promise<KtxSetupRuntimeResult> {
const loadProjectForRuntime = deps.loadProject ?? loadKtxProject;
const project = await loadProjectForRuntime({ projectDir: args.projectDir });
const requirements = resolveProjectRuntimeRequirements(project.config, {
agents: args.agents,
databaseIntrospectionFallback: args.databaseIntrospectionFallback,
env: deps.env ?? process.env,
});
if (requirements.features.length === 0) {
io.stdout.write('│ Runtime setup skipped.\n');
return { status: 'skipped', projectDir: args.projectDir, requirements };
}
const ensureRuntime = deps.ensureRuntime ?? ensureManagedPythonCommandRuntime;
const ensureLocalEmbeddings = deps.ensureLocalEmbeddings ?? ensureManagedLocalEmbeddingsDaemon;
try {
for (const feature of requirements.features) {
if (feature === 'local-embeddings') {
await ensureLocalEmbeddings({
cliVersion: args.cliVersion,
projectDir: args.projectDir,
installPolicy: args.runtimeInstallPolicy,
io,
});
continue;
}
await ensureRuntime({
cliVersion: args.cliVersion,
installPolicy: args.runtimeInstallPolicy,
io,
feature,
});
}
} catch (error) {
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return { status: 'failed', projectDir: args.projectDir, requirements };
}
await markKtxSetupStateStepComplete(args.projectDir, 'runtime');
io.stdout.write(`│ Runtime ready: yes (${requirements.features.map(formatRuntimeFeature).join(', ')})\n`);
return { status: 'ready', projectDir: args.projectDir, requirements };
}

View file

@ -0,0 +1,137 @@
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import {
initKtxProject,
type KtxProjectConnectionConfig,
parseKtxProjectConfig,
serializeKtxProjectConfig,
} from '@ktx/context/project';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import {
runKtxSetupSourcesStep,
type KtxSetupSourcesPromptAdapter,
} from './setup-sources.js';
const notionMocks = vi.hoisted(() => ({
tokens: [] as string[],
retrieveBotUser: vi.fn(async () => ({ name: 'Docs Bot' })),
retrievePage: vi.fn(async () => ({ id: 'page-1' })),
}));
vi.mock('@ktx/context/ingest', async (importOriginal) => {
const actual = await importOriginal<typeof import('@ktx/context/ingest')>();
return {
...actual,
NotionClient: vi.fn().mockImplementation(function NotionClient(token: string) {
notionMocks.tokens.push(token);
return {
retrieveBotUser: notionMocks.retrieveBotUser,
retrievePage: notionMocks.retrievePage,
};
}),
};
});
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: true,
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
function prompts(values: { multiselect?: string[][]; select?: string[] }): KtxSetupSourcesPromptAdapter {
const multiselectValues = [...(values.multiselect ?? [])];
const selectValues = [...(values.select ?? [])];
return {
multiselect: vi.fn(async () => multiselectValues.shift() ?? []),
select: vi.fn(async () => selectValues.shift() ?? 'back'),
text: vi.fn(async () => ''),
password: vi.fn(async () => undefined),
cancel: vi.fn(),
log: vi.fn(),
};
}
describe('setup sources Notion validation', () => {
let tempDir: string;
let projectDir: string;
beforeEach(async () => {
notionMocks.tokens.length = 0;
notionMocks.retrieveBotUser.mockClear();
notionMocks.retrievePage.mockClear();
tempDir = await mkdtemp(join(tmpdir(), 'ktx-setup-sources-notion-'));
projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
async function readConfig() {
return parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'));
}
async function writeConfigConnection(connectionId: string, connection: KtxProjectConnectionConfig) {
const config = await readConfig();
await writeFile(
join(projectDir, 'ktx.yaml'),
serializeKtxProjectConfig({
...config,
connections: {
...config.connections,
warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' },
[connectionId]: connection,
},
setup: {
...config.setup,
database_connection_ids: ['warehouse'],
},
}),
'utf-8',
);
}
it('validates an existing Notion source that uses an inline auth token', async () => {
await writeConfigConnection('notion', {
driver: 'notion',
auth_token: 'ntn_inline_token',
crawl_mode: 'all_accessible',
});
const io = makeIo();
await expect(
runKtxSetupSourcesStep(
{ projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false },
io.io,
{
prompts: prompts({
multiselect: [['notion']],
select: ['existing:notion'],
}),
},
),
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion'] });
expect(notionMocks.tokens).toEqual(['ntn_inline_token']);
expect(notionMocks.retrieveBotUser).toHaveBeenCalledOnce();
expect(io.stderr()).toBe('');
});
});

View file

@ -2,7 +2,10 @@ import { mkdtemp, readdir, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join, relative, resolve } from 'node:path';
import { fileURLToPath, pathToFileURL } from 'node:url';
import { localConnectionTypeForConfig, resolveNotionAuthToken } from '@ktx/context/connections';
import {
localConnectionTypeForConfig,
resolveNotionConnectionAuthToken,
} from '@ktx/context/connections';
import { resolveKtxConfigReference } from '@ktx/context/core';
import {
cloneOrPull,
@ -620,7 +623,10 @@ async function defaultValidateLookml(connection: KtxProjectConnectionConfig): Pr
}
async function defaultValidateNotion(connection: KtxProjectConnectionConfig): Promise<SourceValidationResult> {
const token = await resolveNotionAuthToken(String(connection.auth_token_ref));
const token = await resolveNotionConnectionAuthToken({
auth_token: stringField(connection.auth_token) ?? null,
auth_token_ref: stringField(connection.auth_token_ref) ?? null,
});
const client: NotionApi = new NotionClient(token);
await client.retrieveBotUser();
const roots = Array.isArray(connection.root_page_ids)

View file

@ -38,6 +38,51 @@ function makeIo() {
};
}
function runtimeReady(projectDir: string) {
return { status: 'ready' as const, projectDir, requirements: { features: ['core' as const], requirements: [] } };
}
async function writeReadyRuntime(rootDir: string, cliVersion = '0.2.0') {
const runtimeRoot = join(rootDir, '.runtime');
const versionDir = join(runtimeRoot, cliVersion);
const pythonPath = join(versionDir, '.venv', 'bin', 'python');
const daemonPath = join(versionDir, '.venv', 'bin', 'ktx-daemon');
await mkdir(join(versionDir, '.venv', 'bin'), { recursive: true });
await writeFile(pythonPath, '', 'utf-8');
await writeFile(daemonPath, '', 'utf-8');
await writeFile(
join(versionDir, 'manifest.json'),
`${JSON.stringify(
{
schemaVersion: 1,
cliVersion,
installedAt: '2026-05-09T10:02:00.000Z',
asset: {
schemaVersion: 1,
distributionName: 'kaelio-ktx',
normalizedName: 'kaelio_ktx',
version: '0.1.0',
wheel: {
file: 'kaelio_ktx-0.1.0-py3-none-any.whl',
sha256: '0'.repeat(64),
bytes: 0,
},
},
features: ['core'],
python: {
executable: pythonPath,
daemonExecutable: daemonPath,
},
installLog: join(versionDir, 'install.log'),
},
null,
2,
)}\n`,
'utf-8',
);
return runtimeRoot;
}
describe('setup status', () => {
let tempDir: string;
@ -1054,7 +1099,7 @@ describe('setup status', () => {
);
});
it('auto-installs the managed runtime by default during setup', async () => {
it('prompts before installing the managed runtime by default during setup', async () => {
const io = makeIo();
const embeddings = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir }));
const context = vi.fn(async () => ({ status: 'failed' as const, projectDir: tempDir }));
@ -1088,14 +1133,14 @@ describe('setup status', () => {
expect(embeddings).toHaveBeenCalledWith(
expect.objectContaining({
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
runtimeInstallPolicy: 'prompt',
}),
io.io,
);
expect(context).toHaveBeenCalledWith(
expect.objectContaining({
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
runtimeInstallPolicy: 'prompt',
}),
io.io,
);
@ -1508,6 +1553,10 @@ describe('setup status', () => {
calls.push('sources');
return { status: 'skipped', projectDir: tempDir };
},
runtime: async () => {
calls.push('runtime');
return runtimeReady(tempDir);
},
context: async () => {
calls.push('context');
return { status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' };
@ -1524,7 +1573,7 @@ describe('setup status', () => {
),
).resolves.toBe(0);
expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources', 'context', 'agents']);
expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources', 'runtime', 'context', 'agents']);
});
it('commits setup config changes written by later setup steps', async () => {
@ -1565,6 +1614,7 @@ describe('setup status', () => {
return { status: 'skipped', projectDir: tempDir };
},
sources: async () => ({ status: 'skipped', projectDir: tempDir }),
runtime: async () => runtimeReady(tempDir),
context: async () => ({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' }),
agents: async () => ({
status: 'ready',
@ -1611,6 +1661,10 @@ describe('setup status', () => {
embeddings: async () => ({ status: 'skipped', projectDir: tempDir }),
databases: async () => ({ status: 'skipped', projectDir: tempDir }),
sources: async () => ({ status: 'skipped', projectDir: tempDir }),
runtime: async () => {
calls.push('runtime');
return runtimeReady(tempDir);
},
context: async () => {
calls.push('context');
return { status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' };
@ -1627,7 +1681,7 @@ describe('setup status', () => {
),
).resolves.toBe(0);
expect(calls).toEqual(['context', 'agents']);
expect(calls).toEqual(['runtime', 'context', 'agents']);
});
it('does not install agents when non-interactive --agents finds context incomplete', async () => {
@ -1660,6 +1714,7 @@ describe('setup status', () => {
},
io.io,
{
runtime: async () => runtimeReady(tempDir),
context: async () => ({ status: 'skipped', projectDir: tempDir }),
agents,
},
@ -1695,7 +1750,7 @@ describe('setup status', () => {
'utf-8',
);
await writeKtxSetupState(tempDir, {
completed_steps: ['project', 'llm', 'embeddings', 'sources', 'context', 'agents'],
completed_steps: ['project', 'llm', 'embeddings', 'sources', 'runtime', 'context', 'agents'],
});
await writeFile(
join(tempDir, '.ktx/agents/install-manifest.json'),
@ -1726,55 +1781,69 @@ describe('setup status', () => {
commands: contextBuildCommands(tempDir, 'setup-context-local-ready'),
});
await expect(
runKtxSetup(
{
command: 'run',
projectDir: tempDir,
mode: 'existing',
agents: false,
inputMode: 'auto',
yes: false,
cliVersion: '0.2.0',
skipLlm: false,
skipEmbeddings: false,
skipDatabases: false,
skipSources: false,
skipAgents: false,
databaseSchemas: [],
},
io.io,
{
readyMenuDeps: { prompts: { select: vi.fn(async () => 'agents'), cancel: vi.fn() } },
model: async (args) => {
expect(args.skipLlm).toBe(true);
return { status: 'skipped', projectDir: tempDir };
const previousRuntimeRoot = process.env.KTX_RUNTIME_ROOT;
process.env.KTX_RUNTIME_ROOT = await writeReadyRuntime(tempDir);
try {
await expect(
runKtxSetup(
{
command: 'run',
projectDir: tempDir,
mode: 'existing',
agents: false,
inputMode: 'auto',
yes: false,
cliVersion: '0.2.0',
skipLlm: false,
skipEmbeddings: false,
skipDatabases: false,
skipSources: false,
skipAgents: false,
databaseSchemas: [],
},
embeddings: async (args) => {
expect(args.skipEmbeddings).toBe(true);
return { status: 'skipped', projectDir: tempDir };
io.io,
{
readyMenuDeps: { prompts: { select: vi.fn(async () => 'agents'), cancel: vi.fn() } },
model: async (args) => {
expect(args.skipLlm).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
embeddings: async (args) => {
expect(args.skipEmbeddings).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
databases: async (args) => {
expect(args.skipDatabases).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
sources: async (args) => {
expect(args.skipSources).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
runtime: async () => {
calls.push('runtime');
return runtimeReady(tempDir);
},
agents: async () => {
calls.push('agents');
return {
status: 'ready',
projectDir: tempDir,
installs: [{ target: 'codex', scope: 'project', mode: 'mcp-cli' }],
};
},
},
databases: async (args) => {
expect(args.skipDatabases).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
sources: async (args) => {
expect(args.skipSources).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
agents: async () => {
calls.push('agents');
return {
status: 'ready',
projectDir: tempDir,
installs: [{ target: 'codex', scope: 'project', mode: 'mcp-cli' }],
};
},
},
),
).resolves.toBe(0);
),
).resolves.toBe(0);
} finally {
if (previousRuntimeRoot === undefined) {
delete process.env.KTX_RUNTIME_ROOT;
} else {
process.env.KTX_RUNTIME_ROOT = previousRuntimeRoot;
}
}
expect(calls).toEqual(['agents']);
expect(calls).toEqual(['runtime', 'agents']);
});
it('skips to agent setup when context is ready but agents are not configured', async () => {
@ -1854,6 +1923,10 @@ describe('setup status', () => {
expect(args.skipSources).toBe(true);
return { status: 'skipped', projectDir: tempDir };
},
runtime: async () => {
calls.push('runtime');
return runtimeReady(tempDir);
},
agents: async () => {
calls.push('agents');
return {
@ -1867,11 +1940,12 @@ describe('setup status', () => {
).resolves.toBe(0);
expect(readyMenuSelect).not.toHaveBeenCalled();
expect(calls).toEqual(['agents']);
expect(calls).toEqual(['runtime', 'agents']);
});
it('runs only project resolution, context gate, and agent setup in --agents mode', async () => {
it('runs only project resolution, runtime, context gate, and agent setup in --agents mode', async () => {
const io = makeIo();
const runtime = vi.fn(async () => runtimeReady(tempDir));
const context = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir, runId: 'setup-context-local-test' }));
const agents = vi.fn(async () => ({
status: 'ready' as const,
@ -1903,12 +1977,14 @@ describe('setup status', () => {
model: async () => {
throw new Error('model should not run');
},
runtime,
context,
agents,
},
),
).resolves.toBe(0);
expect(runtime).toHaveBeenCalledTimes(1);
expect(context).toHaveBeenCalledTimes(1);
expect(agents).toHaveBeenCalledTimes(1);
});

View file

@ -9,6 +9,9 @@ import {
} from '@ktx/context/project';
import type { KtxCliIo } from './cli-runtime.js';
import { formatSetupNextStepLines } from './next-steps.js';
import { runtimeInstallPolicyFromFlags } from './managed-python-command.js';
import { readManagedPythonRuntimeStatus } from './managed-python-runtime.js';
import { resolveProjectRuntimeRequirements } from './runtime-requirements.js';
import { isKtxSetupExitError } from './setup-interrupt.js';
import {
type KtxAgentScope,
@ -37,6 +40,11 @@ import {
runKtxSetupReadyChangeMenu,
} from './setup-ready-menu.js';
import { type KtxSetupSourcesDeps, type KtxSetupSourceType, runKtxSetupSourcesStep } from './setup-sources.js';
import {
type KtxSetupRuntimeDeps,
type KtxSetupRuntimeResult,
runKtxSetupRuntimeStep,
} from './setup-runtime.js';
import {
createKtxSetupPromptAdapter,
createKtxSetupUiAdapter,
@ -58,6 +66,7 @@ export interface KtxSetupStatus {
embeddings: { backend?: string; ready: boolean; model?: string; dimensions?: number };
databases: Array<{ connectionId: string; ready: boolean }>;
sources: Array<{ connectionId: string; type: string; ready: boolean }>;
runtime: { required: boolean; ready: boolean; features: string[]; detail?: string };
context: KtxSetupContextStatusSummary;
agents: Array<{ target: string; scope: string; ready: boolean }>;
}
@ -143,6 +152,8 @@ export interface KtxSetupDeps {
io: KtxCliIo,
) => Promise<Awaited<ReturnType<typeof runKtxSetupSourcesStep>>>;
sourcesDeps?: KtxSetupSourcesDeps;
runtime?: (args: Parameters<typeof runKtxSetupRuntimeStep>[0], io: KtxCliIo) => Promise<KtxSetupRuntimeResult>;
runtimeDeps?: KtxSetupRuntimeDeps;
agents?: (
args: Parameters<typeof runKtxSetupAgentsStep>[0],
io: KtxCliIo,
@ -158,7 +169,7 @@ export interface KtxSetupDeps {
const SOURCE_DRIVERS = new Set(['dbt', 'metricflow', 'metabase', 'looker', 'lookml', 'notion']);
type KtxSetupEntryAction = 'setup' | 'new-project' | 'agents' | 'status' | 'demo' | 'exit';
type KtxSetupFlowStep = 'models' | 'embeddings' | 'databases' | 'sources' | 'context' | 'agents';
type KtxSetupFlowStep = 'models' | 'embeddings' | 'databases' | 'sources' | 'runtime' | 'context' | 'agents';
type KtxSetupFlowStatus =
| 'ready'
| 'skipped'
@ -269,7 +280,16 @@ async function readIngestContextStatus(project: KtxLocalProject): Promise<KtxSet
};
}
export async function readKtxSetupStatus(projectDir: string): Promise<KtxSetupStatus> {
export interface ReadKtxSetupStatusOptions {
cliVersion?: string;
env?: NodeJS.ProcessEnv;
readRuntimeStatus?: typeof readManagedPythonRuntimeStatus;
}
export async function readKtxSetupStatus(
projectDir: string,
options: ReadKtxSetupStatusOptions = {},
): Promise<KtxSetupStatus> {
const resolvedProjectDir = resolve(projectDir);
if (!existsSync(join(resolvedProjectDir, 'ktx.yaml'))) {
return {
@ -278,6 +298,7 @@ export async function readKtxSetupStatus(projectDir: string): Promise<KtxSetupSt
embeddings: { ready: false },
databases: [],
sources: [],
runtime: { required: false, ready: true, features: [] },
context: setupContextStatusFromState(await readKtxSetupContextState(resolvedProjectDir)),
agents: [],
};
@ -316,6 +337,21 @@ export async function readKtxSetupStatus(projectDir: string): Promise<KtxSetupSt
});
}
const agents = [...agentMap.values()];
const runtimeRequirements = resolveProjectRuntimeRequirements(project.config, {
agents: agents.length > 0,
env: options.env ?? process.env,
});
let runtimeReady = runtimeRequirements.features.length === 0 || completedSteps.includes('runtime');
let runtimeDetail: string | undefined;
if (runtimeRequirements.features.length > 0 && options.cliVersion) {
const readRuntimeStatus = options.readRuntimeStatus ?? readManagedPythonRuntimeStatus;
const runtimeStatus = await readRuntimeStatus({ cliVersion: options.cliVersion, env: options.env ?? process.env });
runtimeDetail = runtimeStatus.detail;
runtimeReady =
runtimeStatus.kind === 'ready' &&
runtimeStatus.manifest !== undefined &&
runtimeRequirements.features.every((feature) => runtimeStatus.manifest?.features.includes(feature));
}
return {
project: { path: resolvedProjectDir, ready: true, name: basename(project.projectDir) || project.projectDir },
@ -329,6 +365,12 @@ export async function readKtxSetupStatus(projectDir: string): Promise<KtxSetupSt
...source,
ready: completedSteps.includes('sources'),
})),
runtime: {
required: runtimeRequirements.features.length > 0,
ready: runtimeReady,
features: runtimeRequirements.features,
...(runtimeDetail ? { detail: runtimeDetail } : {}),
},
context: ingestContextStatus ?? setupContextStatus,
agents,
};
@ -374,6 +416,13 @@ export function formatKtxSetupStatus(status: KtxSetupStatus): string {
}`,
`Databases configured: ${formatConnectionList(status.databases.map((database) => database.connectionId))}`,
`Context sources configured: ${formatConnectionList(status.sources.map((source) => source.connectionId))}`,
...(status.runtime.required
? [
`Runtime ready: ${formatReady(status.runtime.ready)}${
status.runtime.features.length > 0 ? ` (${status.runtime.features.join(', ')})` : ''
}`,
]
: []),
`KTX context built: ${formatContextBuilt(status.context)}`,
`Agent integration ready: ${formatReady(status.agents.some((agent) => agent.ready))}${
status.agents.length > 0 ? ` (${status.agents.map((agent) => `${agent.target}:${agent.scope}`).join(', ')})` : ''
@ -397,7 +446,8 @@ function setupStatusReady(status: KtxSetupStatus): boolean {
status.llm.ready &&
embeddingsReady(status.embeddings) &&
status.databases.every((database) => database.ready) &&
status.sources.every((source) => source.ready)
status.sources.every((source) => source.ready) &&
status.runtime.ready
);
}
@ -416,7 +466,10 @@ function writeContextNotReadyForAgents(projectDir: string, io: KtxCliIo): void {
}
function setupRuntimeInstallPolicy(args: Extract<KtxSetupArgs, { command: 'run' }>): 'prompt' | 'auto' | 'never' {
return args.inputMode === 'disabled' && !args.yes ? 'never' : 'auto';
if (args.yes) {
return 'auto';
}
return runtimeInstallPolicyFromFlags({ input: args.inputMode === 'disabled' ? false : true });
}
async function commitSetupConfigChanges(projectDir: string): Promise<void> {
@ -449,7 +502,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
setupLoop: while (true) {
entryAction = undefined;
if (canShowEntryMenu) {
const status = await readKtxSetupStatus(args.projectDir);
const status = await readKtxSetupStatus(args.projectDir, { cliVersion: args.cliVersion });
entryAction = (await runKtxSetupEntryMenu(status, deps.entryMenuDeps)).action;
if (entryAction === 'exit') {
(deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.');
@ -486,7 +539,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
}
const agentsRequested = args.agents || entryAction === 'agents';
const currentStatus = await readKtxSetupStatus(projectResult.projectDir);
const currentStatus = await readKtxSetupStatus(projectResult.projectDir, { cliVersion: args.cliVersion });
let readyAction: string | undefined;
if (args.inputMode !== 'disabled' && !agentsRequested) {
@ -503,13 +556,15 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
const shouldRunEmbeddings = !runOnly || runOnly === 'embeddings';
const shouldRunDatabases = !runOnly || runOnly === 'databases';
const shouldRunSources = !runOnly || runOnly === 'sources';
const shouldRunRuntime =
agentsRequested || !runOnly || runOnly === 'runtime' || runOnly === 'context' || runOnly === 'agents';
const shouldRunContext = agentsRequested || !runOnly || runOnly === 'context';
const shouldRunAgents = agentsRequested || !runOnly || runOnly === 'agents';
const showPromptInstructions = projectResult.confirmedCreation !== true;
const setupSteps: KtxSetupFlowStep[] = agentsRequested
? ['context']
: ['models', 'embeddings', 'databases', 'sources', 'context'];
? ['runtime', 'context']
: ['models', 'embeddings', 'databases', 'sources', 'runtime', 'context'];
if (shouldRunAgents && args.skipAgents !== true) {
setupSteps.push('agents');
}
@ -520,6 +575,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
if (step === 'embeddings') return !args.skipEmbeddings && shouldRunEmbeddings;
if (step === 'databases') return !args.skipDatabases && shouldRunDatabases;
if (step === 'sources') return args.skipSources !== true && shouldRunSources;
if (step === 'runtime') return shouldRunRuntime;
if (step === 'context') return shouldRunContext;
return shouldRunAgents && args.skipAgents !== true;
};
@ -636,6 +692,20 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
},
io,
);
} else if (step === 'runtime') {
const runtimeRunner =
deps.runtime ??
((runtimeArgs, runtimeIo) => runKtxSetupRuntimeStep(runtimeArgs, runtimeIo, deps.runtimeDeps));
stepResult = await runtimeRunner(
{
projectDir: projectResult.projectDir,
inputMode: args.inputMode,
cliVersion: args.cliVersion,
runtimeInstallPolicy: setupRuntimeInstallPolicy(args),
agents: shouldRunAgents && args.skipAgents !== true,
},
io,
);
} else if (step === 'context') {
const contextRunner =
deps.context ??
@ -706,7 +776,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
await commitSetupConfigChanges(projectResult.projectDir);
const status = await readKtxSetupStatus(projectResult.projectDir);
const status = await readKtxSetupStatus(projectResult.projectDir, { cliVersion: args.cliVersion });
const focusedOnAgents = args.agents || entryAction === 'agents';
if (!focusedOnAgents) {
setupUi.note(formatKtxSetupStatus(status).trimEnd(), 'Project status', io, {

View file

@ -1,6 +1,7 @@
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { stripVTControlCharacters } from 'node:util';
import Database from 'better-sqlite3';
import { initKtxProject } from '@ktx/context/project';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
@ -98,6 +99,23 @@ describe('runKtxSl', () => {
});
});
it('prints semantic-layer search rank badges in pretty output', async () => {
const projectDir = join(tempDir, 'rank-project');
await seedSlSource({ projectDir });
const searchIo = makeIo();
await expect(
runKtxSl(
{ command: 'search', projectDir, connectionId: 'warehouse', query: 'order', output: 'pretty' },
searchIo.io,
),
).resolves.toBe(0);
const stdout = stripVTControlCharacters(searchIo.stdout());
expect(stdout).toMatch(/#1\s+orders/);
expect(stdout).not.toContain('%');
});
it('prints semantic-layer list and search as public JSON envelopes', async () => {
const projectDir = join(tempDir, 'project');
await seedSlSource({

View file

@ -109,7 +109,7 @@ async function printSlSources(input: {
emptyHint?: string;
}): Promise<void> {
const { resolveOutputMode } = await import('./io/mode.js');
const { printList } = await import('./io/print-list.js');
const { createRankBadgeFormatter, printList } = await import('./io/print-list.js');
const mode = resolveOutputMode({ explicit: input.output, json: input.json, io: input.io });
if (input.command === 'sl search') {
@ -119,7 +119,7 @@ async function printSlSources(input: {
label: 'SCORE',
plain: 'score=',
role: 'badge',
prettyFormat: (value) => `${Math.round(Number(value) * 100)}%`,
prettyFormat: createRankBadgeFormatter(input.rows as ReadonlyArray<LocalSlSourceSearchResult>),
dim: true,
},
{ key: 'connectionId', label: 'CONNECTION', plain: '' },

View file

@ -0,0 +1,85 @@
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import type { KtxCliIo } from './cli-runtime.js';
import { runKtxSourceMapping } from './source-mapping.js';
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
} satisfies KtxCliIo,
stdout: () => stdout,
stderr: () => stderr,
};
}
describe('source mapping commands', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-source-mapping-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
async function writeConfig(metabaseMappings: string[]): Promise<void> {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'connections:',
' warehouse:',
' driver: postgres',
' url: env:DATABASE_URL',
' metabase:',
' driver: metabase',
' api_url: https://metabase.example.com',
...metabaseMappings,
'',
].join('\n'),
'utf-8',
);
}
it('fails Metabase validation when no sync-enabled target mapping exists', async () => {
await writeConfig([]);
const io = makeIo();
await expect(
runKtxSourceMapping({ command: 'validate', projectDir: tempDir, connectionId: 'metabase' }, io.io),
).resolves.toBe(1);
expect(io.stderr()).toContain('no sync-enabled mappings with a target connection for Metabase connection metabase');
});
it('passes Metabase validation when a sync-enabled target mapping exists', async () => {
await writeConfig([
' mappings:',
' databaseMappings:',
' "3": warehouse',
' syncEnabled:',
' "3": true',
]);
const io = makeIo();
await expect(
runKtxSourceMapping({ command: 'validate', projectDir: tempDir, connectionId: 'metabase' }, io.io),
).resolves.toBe(0);
expect(io.stdout()).toContain('Mapping validation passed: metabase');
});
});

View file

@ -12,6 +12,7 @@ import {
discoverMetabaseDatabases,
lookerCredentialsFromLocalConnection,
metabaseRuntimeConfigFromLocalConnection,
planMetabaseFanoutChildren,
seedLocalMappingStateFromKtxYaml,
validateLookerMappings,
validateMappingPhysicalMatch,
@ -198,6 +199,14 @@ export async function runKtxSourceMapping(
}
const rows = await store.listDatabaseMappings(args.connectionId);
planMetabaseFanoutChildren({
metabaseConnectionId: args.connectionId,
mappings: rows.map((row) => ({
metabaseDatabaseId: row.metabaseDatabaseId,
targetConnectionId: row.targetConnectionId,
syncEnabled: row.syncEnabled,
})),
});
const failures = rows.flatMap((row) => {
if (!row.targetConnectionId) {
return [];

View file

@ -0,0 +1,295 @@
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { initKtxProject, parseKtxProjectConfig, serializeKtxProjectConfig } from '@ktx/context/project';
import type { KtxScanConnector } from '@ktx/context/scan';
import type { SqlAnalysisPort } from '@ktx/context/sql-analysis';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { runKtxSql } from './sql.js';
function makeIo() {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
function makeSqlAnalysis(result: Awaited<ReturnType<SqlAnalysisPort['validateReadOnly']>>): SqlAnalysisPort {
return {
analyzeForFingerprint: vi.fn(),
analyzeBatch: vi.fn(),
validateReadOnly: vi.fn(async () => result),
};
}
function makeConnector(overrides: Partial<KtxScanConnector> = {}): KtxScanConnector {
return {
id: 'sqlite:warehouse',
driver: 'sqlite',
capabilities: {
structuralIntrospection: true,
tableSampling: true,
columnSampling: true,
columnStats: true,
readOnlySql: true,
nestedAnalysis: false,
eventStreamDiscovery: false,
formalForeignKeys: true,
estimatedRowCounts: true,
},
introspect: vi.fn(),
executeReadOnly: vi.fn(async () => ({
headers: ['id', 'status'],
headerTypes: ['integer', 'text'],
rows: [
[1, 'paid'],
[2, 'open'],
],
totalRows: 2,
rowCount: 2,
})),
cleanup: vi.fn(async () => undefined),
...overrides,
};
}
describe('runKtxSql', () => {
let tempDir: string;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-sql-'));
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
async function writeConnections(
projectDir: string,
connections: ReturnType<typeof parseKtxProjectConfig>['connections'],
): Promise<void> {
const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'));
await writeFile(join(projectDir, 'ktx.yaml'), serializeKtxProjectConfig({ ...config, connections }), 'utf-8');
}
it('validates SQL, executes through the scan connector, and prints a pretty table', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
const sqlAnalysis = makeSqlAnalysis({ ok: true, error: null });
const connector = makeConnector();
const createScanConnector = vi.fn(async () => connector);
const io = makeIo();
await expect(
runKtxSql(
{
command: 'execute',
projectDir,
connectionId: 'warehouse',
sql: 'select id, status from orders',
maxRows: 1000,
output: 'pretty',
json: false,
cliVersion: '0.0.0-test',
},
io.io,
{
createSqlAnalysis: () => sqlAnalysis,
createScanConnector,
},
),
).resolves.toBe(0);
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select id, status from orders', 'sqlite');
expect(createScanConnector).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'warehouse');
expect(connector.executeReadOnly).toHaveBeenCalledWith(
{ connectionId: 'warehouse', sql: 'select id, status from orders', maxRows: 1000 },
{ runId: 'cli-sql' },
);
expect(connector.cleanup).toHaveBeenCalledTimes(1);
expect(io.stdout()).toContain('id status');
expect(io.stdout()).toContain('1 paid');
expect(io.stdout()).toContain('2 open');
expect(io.stdout()).toContain('2 rows');
expect(io.stderr()).toBe('');
});
it('prints JSON output', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
const io = makeIo();
await expect(
runKtxSql(
{
command: 'execute',
projectDir,
connectionId: 'warehouse',
sql: 'select id from orders',
maxRows: 10,
output: undefined,
json: true,
cliVersion: '0.0.0-test',
},
io.io,
{
createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }),
createScanConnector: vi.fn(async () => makeConnector()),
},
),
).resolves.toBe(0);
expect(JSON.parse(io.stdout())).toEqual({
connectionId: 'warehouse',
headers: ['id', 'status'],
headerTypes: ['integer', 'text'],
rows: [
[1, 'paid'],
[2, 'open'],
],
rowCount: 2,
});
});
it('prints plain TSV output', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
const io = makeIo();
await expect(
runKtxSql(
{
command: 'execute',
projectDir,
connectionId: 'warehouse',
sql: 'select id from orders',
maxRows: 10,
output: 'plain',
json: false,
cliVersion: '0.0.0-test',
},
io.io,
{
createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }),
createScanConnector: vi.fn(async () => makeConnector()),
},
),
).resolves.toBe(0);
expect(io.stdout()).toBe('id\tstatus\n1\tpaid\n2\topen\n');
expect(io.stderr()).toBe('');
});
it('rejects non-read-only SQL before executing connector SQL', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
const connector = makeConnector();
const io = makeIo();
await expect(
runKtxSql(
{
command: 'execute',
projectDir,
connectionId: 'warehouse',
sql: 'delete from orders',
maxRows: 1000,
output: 'pretty',
json: false,
cliVersion: '0.0.0-test',
},
io.io,
{
createSqlAnalysis: () => makeSqlAnalysis({ ok: false, error: 'SQL contains read/write operation: Delete' }),
createScanConnector: vi.fn(async () => connector),
},
),
).resolves.toBe(1);
expect(connector.executeReadOnly).not.toHaveBeenCalled();
expect(connector.cleanup).not.toHaveBeenCalled();
expect(io.stderr()).toContain('SQL contains read/write operation: Delete');
});
it('rejects missing connections', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
const io = makeIo();
await expect(
runKtxSql(
{
command: 'execute',
projectDir,
connectionId: 'warehouse',
sql: 'select 1',
maxRows: 1000,
output: 'pretty',
json: false,
cliVersion: '0.0.0-test',
},
io.io,
{
createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }),
},
),
).resolves.toBe(1);
expect(io.stderr()).toContain('Connection "warehouse" is not configured in ktx.yaml');
});
it('rejects connectors without read-only SQL support and still cleans up', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
const connector = makeConnector({
capabilities: {
...makeConnector().capabilities,
readOnlySql: false,
},
});
const io = makeIo();
await expect(
runKtxSql(
{
command: 'execute',
projectDir,
connectionId: 'warehouse',
sql: 'select 1',
maxRows: 1000,
output: 'pretty',
json: false,
cliVersion: '0.0.0-test',
},
io.io,
{
createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }),
createScanConnector: vi.fn(async () => connector),
},
),
).resolves.toBe(1);
expect(connector.executeReadOnly).not.toHaveBeenCalled();
expect(connector.cleanup).toHaveBeenCalledTimes(1);
expect(io.stderr()).toContain('Connection "warehouse" does not support read-only SQL execution.');
});
});

171
packages/cli/src/sql.ts Normal file
View file

@ -0,0 +1,171 @@
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
import type { KtxQueryResult, KtxScanConnector } from '@ktx/context/scan';
import type { SqlAnalysisDialect, SqlAnalysisPort } from '@ktx/context/sql-analysis';
import type { KtxCliIo } from './cli-runtime.js';
import { createKtxCliScanConnector } from './local-scan-connectors.js';
import { createManagedDaemonSqlAnalysisPort } from './managed-python-http.js';
import { profileMark } from './startup-profile.js';
profileMark('module:sql');
type KtxSqlOutputMode = 'pretty' | 'plain' | 'json';
export type KtxSqlArgs = {
command: 'execute';
projectDir: string;
connectionId: string;
sql: string;
maxRows: number;
output?: KtxSqlOutputMode;
json?: boolean;
cliVersion: string;
};
export interface KtxSqlDeps {
loadProject?: typeof loadKtxProject;
createSqlAnalysis?: () => SqlAnalysisPort;
createScanConnector?: typeof createKtxCliScanConnector;
}
interface SqlExecutionOutput {
connectionId: string;
headers: string[];
headerTypes?: string[];
rows: unknown[][];
rowCount: number;
}
function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
const normalized = String(driver ?? '').trim().toLowerCase();
const map: Record<string, SqlAnalysisDialect> = {
postgres: 'postgres',
postgresql: 'postgres',
bigquery: 'bigquery',
snowflake: 'snowflake',
mysql: 'mysql',
sqlserver: 'tsql',
mssql: 'tsql',
sqlite: 'sqlite',
sqlite3: 'sqlite',
clickhouse: 'clickhouse',
redshift: 'redshift',
};
return map[normalized] ?? 'postgres';
}
function resolveOutputMode(args: KtxSqlArgs): KtxSqlOutputMode {
if (args.json === true) return 'json';
return args.output ?? 'pretty';
}
function formatValue(value: unknown): string {
if (value === null || value === undefined) return '';
if (typeof value === 'string') return value;
if (typeof value === 'number' || typeof value === 'boolean' || typeof value === 'bigint') return String(value);
return JSON.stringify(value);
}
function printJson(output: SqlExecutionOutput, io: KtxCliIo): void {
io.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
}
function printPlain(output: SqlExecutionOutput, io: KtxCliIo): void {
io.stdout.write(`${output.headers.join('\t')}\n`);
for (const row of output.rows) {
io.stdout.write(`${row.map(formatValue).join('\t')}\n`);
}
}
function printPretty(output: SqlExecutionOutput, io: KtxCliIo): void {
const rows = output.rows.map((row) => row.map(formatValue));
const widths = output.headers.map((header, index) =>
Math.max(header.length, ...rows.map((row) => row[index]?.length ?? 0)),
);
const renderRow = (cells: string[]): string =>
cells.map((cell, index) => cell.padEnd(widths[index] ?? cell.length)).join(' ').trimEnd();
if (output.headers.length > 0) {
io.stdout.write(`${renderRow(output.headers)}\n`);
io.stdout.write(`${renderRow(widths.map((width) => '-'.repeat(width)))}\n`);
}
for (const row of rows) {
io.stdout.write(`${renderRow(row)}\n`);
}
io.stdout.write(`\n${output.rowCount} ${output.rowCount === 1 ? 'row' : 'rows'}\n`);
}
function printSqlResult(output: SqlExecutionOutput, mode: KtxSqlOutputMode, io: KtxCliIo): void {
if (mode === 'json') {
printJson(output, io);
return;
}
if (mode === 'plain') {
printPlain(output, io);
return;
}
printPretty(output, io);
}
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
if (connector?.cleanup) {
await connector.cleanup();
}
}
function resultOutput(connectionId: string, result: KtxQueryResult): SqlExecutionOutput {
return {
connectionId,
headers: result.headers,
...(result.headerTypes ? { headerTypes: result.headerTypes } : {}),
rows: result.rows,
rowCount: result.rowCount ?? result.rows.length,
};
}
export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: KtxSqlDeps = {}): Promise<number> {
try {
const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir });
const connection = project.config.connections[args.connectionId];
if (!connection) {
throw new Error(`Connection "${args.connectionId}" is not configured in ktx.yaml`);
}
const sqlAnalysis =
deps.createSqlAnalysis ??
(() =>
createManagedDaemonSqlAnalysisPort({
cliVersion: args.cliVersion,
projectDir: args.projectDir,
installPolicy: 'auto',
io,
}));
const validation = await sqlAnalysis().validateReadOnly(args.sql, sqlAnalysisDialectForDriver(connection.driver));
if (!validation.ok) {
throw new Error(validation.error ?? 'SQL is not read-only.');
}
const createScanConnector = deps.createScanConnector ?? createKtxCliScanConnector;
let connector: KtxScanConnector | null = null;
try {
connector = await createScanConnector(project as KtxLocalProject, args.connectionId);
if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
throw new Error(`Connection "${args.connectionId}" does not support read-only SQL execution.`);
}
const result = await connector.executeReadOnly(
{
connectionId: args.connectionId,
sql: args.sql,
maxRows: args.maxRows,
},
{ runId: 'cli-sql' },
);
printSqlResult(resultOutput(args.connectionId, result), resolveOutputMode(args), io);
return 0;
} finally {
await cleanupConnector(connector);
}
} catch (error) {
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
return 1;
}
}

View file

@ -1,5 +1,12 @@
<role>
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, Notion pages, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and discoverable with `discover_data`.
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit
gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs,
Metabase card JSONs, Notion pages, or similar) and you must translate that
slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass.
You run in an isolated WorkUnit worktree. Deterministic projection output,
existing project memory, and listed dependency paths are visible; sibling
WorkUnit edits from this same job are not visible until the runner integrates
accepted patches.
</role>
<stance>
@ -8,9 +15,19 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing
<workflow>
1. Read this WorkUnit's section at the end of the user prompt. It lists your `rawFiles`, any unchanged `dependencyPaths` you may need to resolve references, the `peerFileIndex` (paths only; you CANNOT read them), the source's `skillNames`, and any `priorProvenance` rows telling you what earlier syncs produced from these files.
2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `wiki_capture`, and `ingest_triage` last. The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping.
2. Load the per-source review skill first (for example `lookml_ingest`,
`metricflow_ingest`, or `dbt_ingest`), then `sl_capture` and
`wiki_capture`, and `ingest_triage` last. The triage skill tells you how to
react when existing project memory, deterministic projection output, or
prior provenance overlaps with what this WorkUnit is about to write.
3. If the system prompt includes `<canonical_pins>`, read those pins before choosing artifact keys. A pin's `canonicalArtifactKey` is the preferred artifact for its `contestedKey`: prefer editing the pinned canonical artifact when it already exists or when this raw file clearly updates it. Do not create a duplicate contested artifact when a pin says another artifact is canonical; use a specific disambiguated key only when the raw file describes a genuinely different domain.
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `discover_data` for each candidate source, table, metric, or topic name to find prior-WU writes, existing wiki pages, SL sources, and raw warehouse matches; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip.
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large
files) to load content. Before writing a new SL source or wiki page, call
`discover_data` for each candidate source, table, metric, or topic name to
find existing wiki pages, SL sources, deterministic projection output, prior
sync artifacts, and raw warehouse matches; apply `ingest_triage` when you hit
one, and apply any matching canonical pin before deciding whether to edit,
rename, or skip.
5. For every `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call, include `rawPaths` with only the raw file paths that directly support that action. If one artifact synthesizes several files, list each contributing raw file. Do not include unrelated files from the same WorkUnit.
6. When `priorProvenance` names an existing artifact for one of your raw files, prefer `sl_edit` over `sl_write` for that artifact: the re-ingest change rule says expression-only changes replace silently, grain/column/filter changes replace and flag.
7. When a raw file cannot map to normal SL and you use a fallback path, call `emit_unmapped_fallback` exactly once for that raw file and reason. Use `fallback: "sql_standalone"` for a standalone SQL source, `fallback: "wiki_only"` for documentation-only capture, and `fallback: "flagged"` when no reliable artifact can be written.
@ -28,5 +45,7 @@ Wiki keys must be flat slugs like `paid-order-lifecycle`, not directory paths li
- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`discover_data`, `sl_discover`, `entity_details`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source.
- Do not write context-source overlays into the context source connection just because that is the current WorkUnit connection. Use `sl_discover` across data sources and write the SL artifact to the warehouse/data-source connection that owns the matching manifest. If there is no confirmed target connection, use `emit_unmapped_fallback` and wiki capture.
- Do not duplicate an artifact that prior provenance says you already produced; update it.
- Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`.
- Do not silently accept a name collision with visible existing memory,
deterministic projection output, or prior provenance when the formula differs.
Trigger `ingest_triage`.
</do_not>

View file

@ -7,8 +7,11 @@ callers: [memory_agent]
# Ingest Triage - conflict classification and resolution
This skill is loaded in two contexts:
- By a Stage 3 WorkUnit agent when `sl_discover` reveals that a prior WU (or a prior sync) already wrote something that overlaps with what the current WU is about to write.
- By the Stage 4 reconciliation agent for cross-WU sweeps and for eviction decisions.
- By a Stage 3 WorkUnit agent when `sl_discover`, deterministic projection
output, existing project memory, or prior provenance overlaps with what the
current WorkUnit is about to write.
- By the Stage 4 reconciliation agent for cross-WorkUnit sweeps, accepted patch
overlap, and eviction decisions.
Apply the rules below before every write that could collide with an existing artifact.
@ -23,7 +26,8 @@ Apply the rules below before every write that could collide with an existing art
3. **If the difference is structural - grain, columns, filter, join shape - is the current bundle the re-ingest of a previously-ingested bundle (i.e. `priorProvenance` has a row for this raw file and artifact)?**
Re-ingest change (semantic break): replace + flag. Record in the IngestReport's `conflicts_resolved` list with `flagged_for_human: true`.
4. **If there's no prior-sync row (both are from THIS job), check for same-ingest contradictions:**
4. **If reconciliation sees accepted patches from this same job with no
prior-sync row, check for same-ingest contradictions:**
| Kind | Detection | Resolution |
|---|---|---|

View file

@ -0,0 +1,45 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { GitService } from './git.service.js';
async function makeGit() {
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-'));
const configDir = join(homeDir, 'config');
const git = new GitService({
storage: { configDir, homeDir },
git: {
userName: 'System User',
userEmail: 'system@example.com',
bootstrapMessage: 'init',
bootstrapAuthor: 'system',
bootstrapAuthorEmail: 'system@example.com',
},
});
await git.onModuleInit();
return { homeDir, configDir, git };
}
describe('GitService patch helpers', () => {
it('collects binary-safe no-rename patches and applies them with --3way --index', async () => {
const { homeDir, configDir, git } = await makeGit();
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n');
await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com');
const base = await git.revParseHead();
await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n');
await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com');
const patchPath = join(homeDir, 'proposal.patch');
await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath);
const targetDir = join(homeDir, 'target');
await git.addWorktree(targetDir, 'target', base);
const targetGit = git.forWorktree(targetDir);
await targetGit.applyPatchFile3WayIndex(patchPath);
await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com');
await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n');
});
});

View file

@ -1,5 +1,5 @@
import { promises as fs } from 'node:fs';
import { join } from 'node:path';
import { dirname, join } from 'node:path';
import type { SimpleGit } from 'simple-git';
import { noopLogger, resolveConfigDir, type KtxCoreConfig, type KtxLogger } from './config.js';
import { createSimpleGit } from './git-env.js';
@ -533,6 +533,19 @@ export class GitService {
return out;
}
async changedPaths(): Promise<string[]> {
const raw = await this.git.raw(['status', '--porcelain=v1', '-z']);
const fields = raw.split('\0').filter(Boolean);
const paths: string[] = [];
for (const field of fields) {
const path = field.slice(3);
if (path.length > 0) {
paths.push(path);
}
}
return [...new Set(paths)].sort();
}
/**
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
* Used for the reconciler's first-ever run when there's no watermark to diff from.
@ -747,6 +760,55 @@ export class GitService {
}
}
async writeBinaryNoRenamePatch(from: string, to: string, patchPath: string): Promise<void> {
await this.withMutationQueue(async () => {
const patch = await this.git.raw(['diff', '--binary', '--no-renames', `${from}..${to}`]);
await fs.mkdir(dirname(patchPath), { recursive: true });
await fs.writeFile(patchPath, patch, 'utf-8');
});
}
async applyPatchFile3WayIndex(patchPath: string): Promise<void> {
await this.withMutationQueue(async () => {
await this.git.raw(['apply', '--3way', '--index', patchPath]);
});
}
async commitStaged(commitMessage: string, author: string, authorEmail: string): Promise<GitCommitInfo> {
return this.withMutationQueue(async () => {
const stagedChanges = await this.git.diff(['--cached', '--name-only']);
if (!stagedChanges.trim()) {
const head = (await this.git.revparse(['HEAD'])).trim();
const log = await this.git.log({ maxCount: 1 });
const latest = log.latest;
return {
commitHash: head,
shortHash: head.substring(0, 8),
message: latest?.message ?? '',
author: latest?.author_name ?? '',
authorEmail: latest?.author_email ?? '',
timestamp: latest?.date ?? new Date(0).toISOString(),
committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date(0).toISOString(),
created: false,
};
}
await this.git.commit(commitMessage, { '--author': `${author} <${authorEmail}>` });
const head = (await this.git.revparse(['HEAD'])).trim();
const log = await this.git.log({ maxCount: 1 });
const latest = log.latest;
return {
commitHash: head,
shortHash: head.substring(0, 8),
message: latest?.message ?? commitMessage,
author: latest?.author_name ?? author,
authorEmail: latest?.author_email ?? authorEmail,
timestamp: latest?.date ?? new Date().toISOString(),
committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date().toISOString(),
created: true,
};
});
}
private async fileExists(path: string): Promise<boolean> {
try {
await fs.access(path);

View file

@ -1,6 +1,15 @@
import type { ChunkResult, DiffSet, FetchContext, ScopeDescriptor, SourceAdapter } from '../../types.js';
import type {
ChunkResult,
DeterministicFinalizationContext,
DiffSet,
FetchContext,
FinalizationResult,
ScopeDescriptor,
SourceAdapter,
} from '../../types.js';
import { chunkHistoricSqlUnifiedStagedDir, describeHistoricSqlUnifiedScope } from './chunk-unified.js';
import { detectHistoricSqlStagedDir } from './detect.js';
import { projectHistoricSqlEvidence } from './projection.js';
import { stageHistoricSqlAggregatedSnapshot } from './stage-unified.js';
import { type HistoricSqlSourceAdapterDeps } from './types.js';
@ -35,4 +44,22 @@ export class HistoricSqlSourceAdapter implements SourceAdapter {
describeScope(stagedDir: string): Promise<ScopeDescriptor> {
return describeHistoricSqlUnifiedScope(stagedDir);
}
async finalize(ctx: DeterministicFinalizationContext): Promise<FinalizationResult> {
const projection = await projectHistoricSqlEvidence({
workdir: ctx.workdir,
connectionId: ctx.connectionId,
syncId: ctx.syncId,
runId: ctx.runId,
overrideReplay: ctx.overrideReplay,
});
return {
result: projection,
warnings: projection.warnings,
errors: [],
touchedSources: projection.touchedSources,
changedWikiPageKeys: projection.changedWikiPageKeys,
actions: projection.actions,
};
}
}

View file

@ -242,12 +242,12 @@ describe('historic-SQL local ingest retrieval acceptance', () => {
expect(result.result.failedWorkUnits).toEqual([]);
expect(result.result.workUnitCount).toBe(3);
expect(agentRunner.runLoop).toHaveBeenCalledTimes(3);
const postProcessor = result.report.body.postProcessor;
expect(postProcessor).toBeDefined();
if (!postProcessor) {
throw new Error('Expected historic-SQL post-processor result');
const finalization = result.report.body.finalization;
expect(finalization).toBeDefined();
if (!finalization) {
throw new Error('Expected historic-SQL finalization result');
}
expect(postProcessor).toMatchObject({
expect(finalization).toMatchObject({
sourceKey: 'historic-sql',
status: 'success',
result: {
@ -255,7 +255,7 @@ describe('historic-SQL local ingest retrieval acceptance', () => {
patternPagesWritten: 1,
},
});
expect(postProcessor.touchedSources).toEqual(
expect(finalization.declaredTouchedSources).toEqual(
expect.arrayContaining([
{ connectionId: 'warehouse', sourceName: 'customers' },
{ connectionId: 'warehouse', sourceName: 'orders' },

View file

@ -1,74 +0,0 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import YAML from 'yaml';
import { describe, expect, it } from 'vitest';
import { HistoricSqlProjectionPostProcessor } from './post-processor.js';
async function tempWorkdir(): Promise<string> {
return mkdtemp(join(tmpdir(), 'historic-sql-post-processor-'));
}
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
const target = join(root, relPath);
await mkdir(join(target, '..'), { recursive: true });
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
}
describe('HistoricSqlProjectionPostProcessor', () => {
it('projects current run evidence before the ingest squash commit', async () => {
const workdir = await tempWorkdir();
await mkdir(join(workdir, 'semantic-layer/warehouse/_schema'), { recursive: true });
await writeFile(
join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'),
YAML.stringify({ tables: { orders: { table: 'public.orders', columns: [{ name: 'id', type: 'string' }] } } }),
'utf-8',
);
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 1,
touchedTableCount: 1,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.orders.json', { table: 'public.orders' });
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/orders.json', {
kind: 'table_usage',
connectionId: 'warehouse',
table: 'public.orders',
rawPath: 'tables/public.orders.json',
usage: {
narrative: 'Orders are repeatedly queried by lifecycle status.',
frequencyTier: 'high',
commonFilters: ['status'],
commonJoins: [],
staleSince: null,
},
});
const result = await new HistoricSqlProjectionPostProcessor().run({
connectionId: 'warehouse',
sourceKey: 'historic-sql',
syncId: 'sync-1',
jobId: 'job-1',
runId: 'run-1',
workdir,
parseArtifacts: null,
});
expect(result.errors).toEqual([]);
expect(result.warnings).toEqual([]);
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
expect(result.result).toMatchObject({ tableUsageMerged: 1 });
await expect(readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves.toContain(
'Orders are repeatedly queried by lifecycle status.',
);
});
});

View file

@ -1,41 +0,0 @@
import type { IngestBundlePostProcessorInput, IngestBundlePostProcessorPort, IngestBundlePostProcessorResult } from '../../ports.js';
import { createSimpleGit } from '../../../core/git-env.js';
import { projectHistoricSqlEvidence } from './projection.js';
async function commitProjectionChanges(workdir: string): Promise<void> {
const git = createSimpleGit(workdir);
if (!(await git.checkIsRepo().catch(() => false))) {
return;
}
const status = await git.status();
const paths = status.files
.map((file) => file.path)
.filter((path) => path.startsWith('semantic-layer/') || path.startsWith('wiki/global/historic-sql'));
if (paths.length === 0) {
return;
}
await git.add(paths);
const staged = await git.diff(['--cached', '--name-only']);
if (!staged.trim()) {
return;
}
await git.commit('Project historic SQL evidence', { '--author': 'System User <system@example.com>' });
}
export class HistoricSqlProjectionPostProcessor implements IngestBundlePostProcessorPort {
async run(input: IngestBundlePostProcessorInput): Promise<IngestBundlePostProcessorResult> {
const projection = await projectHistoricSqlEvidence({
workdir: input.workdir,
connectionId: input.connectionId,
syncId: input.syncId,
runId: input.runId,
});
await commitProjectionChanges(input.workdir);
return {
result: projection,
warnings: projection.warnings,
errors: [],
touchedSources: projection.touchedSources,
};
}
}

View file

@ -74,6 +74,15 @@ describe('projectHistoricSqlEvidence', () => {
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
expect(result.actions).toEqual(
expect.arrayContaining([
expect.objectContaining({
target: 'sl',
key: 'orders',
rawPaths: ['tables/public.orders.json'],
}),
]),
);
const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'));
expect(shard.tables.orders.usage).toEqual({
ownerNote: 'keep me',
@ -164,6 +173,16 @@ describe('projectHistoricSqlEvidence', () => {
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
expect(result.patternPagesWritten).toBe(1);
expect(result.changedWikiPageKeys).toContain('historic-sql-old-order-lifecycle');
expect(result.actions).toEqual(
expect.arrayContaining([
expect.objectContaining({
target: 'wiki',
key: 'historic-sql-old-order-lifecycle',
rawPaths: ['patterns-input.json'],
}),
]),
);
await expect(readFile(join(workdir, 'wiki/global/historic-sql-old-order-lifecycle.md'), 'utf-8')).resolves.toContain(
'Order Lifecycle Analysis',
);
@ -320,6 +339,19 @@ describe('projectHistoricSqlEvidence', () => {
probeWarnings: [],
staleArchiveAfterDays: 90,
});
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/customers.json', {
kind: 'table_usage',
connectionId: 'warehouse',
table: 'public.customers',
rawPath: 'tables/public.customers.json',
usage: {
narrative: 'Customers were queried.',
frequencyTier: 'low',
commonFilters: [],
commonJoins: [],
staleSince: null,
},
});
await writeText(
workdir,
'wiki/global/historic-sql-old-template.md',
@ -346,6 +378,9 @@ describe('projectHistoricSqlEvidence', () => {
expect(result.staleTablesMarked).toBe(1);
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
const staleAction = result.actions.find((action) => action.target === 'sl' && action.key === 'orders');
expect(staleAction).toEqual(expect.objectContaining({ target: 'sl', key: 'orders' }));
expect(staleAction?.rawPaths).toBeUndefined();
const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'));
expect(shard.tables.orders.usage).toEqual({
ownerNote: 'keep analyst annotation',
@ -360,4 +395,63 @@ describe('projectHistoricSqlEvidence', () => {
'Old body',
);
});
it('does not mark stale or archive pages when override replay has no current-run evidence', async () => {
const workdir = await tempWorkdir();
await writeText(
workdir,
'semantic-layer/warehouse/_schema/public.yaml',
YAML.stringify({
tables: {
orders: {
table: 'public.orders',
usage: {
narrative: 'Orders were active before.',
frequencyTier: 'high',
commonFilters: ['status'],
commonGroupBys: ['status'],
commonJoins: [],
},
columns: [{ name: 'id', type: 'string' }],
},
},
}),
);
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/override-sync/manifest.json', {
source: 'historic-sql',
connectionId: 'warehouse',
dialect: 'postgres',
fetchedAt: '2026-05-11T00:00:00.000Z',
windowStart: '2026-02-10T00:00:00.000Z',
windowEnd: '2026-05-11T00:00:00.000Z',
snapshotRowCount: 0,
touchedTableCount: 0,
parseFailures: 0,
warnings: [],
probeWarnings: [],
staleArchiveAfterDays: 90,
});
const result = await projectHistoricSqlEvidence({
workdir,
connectionId: 'warehouse',
syncId: 'override-sync',
runId: 'override-run',
overrideReplay: {
priorJobId: 'prior-job',
priorRunId: 'prior-run',
priorSyncId: 'prior-sync',
evictionRawPaths: ['tables/public/orders.json'],
},
});
expect(result.tableUsageMerged).toBe(0);
expect(result.staleTablesMarked).toBe(0);
expect(result.patternPagesWritten).toBe(0);
expect(result.stalePatternPagesMarked).toBe(0);
expect(result.archivedPatternPages).toBe(0);
expect(result.touchedSources).toEqual([]);
expect(result.changedWikiPageKeys).toEqual([]);
expect(result.actions).toEqual([]);
});
});

View file

@ -1,7 +1,9 @@
import { access, mkdir, readdir, readFile, rename, writeFile } from 'node:fs/promises';
import { dirname, join, relative } from 'node:path';
import YAML from 'yaml';
import type { MemoryAction } from '../../../memory/index.js';
import { rawSourcesDirForSync } from '../../raw-sources-paths.js';
import type { FinalizationOverrideReplay } from '../../types.js';
import { mergeUsagePreservingExternal } from '../live-database/manifest.js';
import { historicSqlEvidenceEnvelopeSchema, type HistoricSqlEvidenceEnvelope } from './evidence.js';
import type { TableUsageOutput } from './skill-schemas.js';
@ -12,6 +14,7 @@ export interface HistoricSqlProjectionInput {
connectionId: string;
syncId: string;
runId: string;
overrideReplay?: FinalizationOverrideReplay;
}
export interface HistoricSqlProjectionResult {
@ -21,6 +24,8 @@ export interface HistoricSqlProjectionResult {
stalePatternPagesMarked: number;
archivedPatternPages: number;
touchedSources: Array<{ connectionId: string; sourceName: string }>;
changedWikiPageKeys: string[];
actions: MemoryAction[];
warnings: string[];
}
@ -223,6 +228,8 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
stalePatternPagesMarked: 0,
archivedPatternPages: 0,
touchedSources: [],
changedWikiPageKeys: [],
actions: [],
warnings: [],
};
const touchedKeys = new Set<string>();
@ -230,6 +237,16 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
const manifest = stagedManifestSchema.parse(await readJson(join(rawDir, 'manifest.json')));
const currentTables = await currentStagedTables(rawDir);
const evidence = await loadEvidence(input.workdir, input.runId);
if (input.overrideReplay && evidence.length === 0) {
result.warnings.push(
'historic-sql finalization skipped stale/archive cleanup during override replay without current-run evidence',
);
return result;
}
if (evidence.length === 0) {
result.warnings.push('historic-sql finalization skipped because no current-run evidence was emitted');
return result;
}
const tableEvidence = evidence.filter((entry): entry is HistoricSqlEvidenceEnvelope & { kind: 'table_usage' } => entry.kind === 'table_usage');
const patternEvidence = evidence.filter((entry): entry is HistoricSqlEvidenceEnvelope & { kind: 'pattern' } => entry.kind === 'pattern');
@ -255,6 +272,14 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
touchedKeys.add(key);
result.touchedSources.push({ connectionId: input.connectionId, sourceName });
}
result.actions.push({
target: 'sl',
type: 'updated',
key: sourceName,
targetConnectionId: input.connectionId,
detail: `Merged historic-SQL usage for ${matchingEvidence.table}`,
rawPaths: [matchingEvidence.rawPath],
});
}
} else if (entry.usage && !currentTables.has(tableRef)) {
const merged = mergeUsagePreservingExternal(entry.usage as TableUsageOutput | undefined, staleUsage(manifest.fetchedAt));
@ -267,6 +292,13 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
touchedKeys.add(key);
result.touchedSources.push({ connectionId: input.connectionId, sourceName });
}
result.actions.push({
target: 'sl',
type: 'updated',
key: sourceName,
targetConnectionId: input.connectionId,
detail: `Marked historic-SQL usage stale for ${tableRef}`,
});
}
}
}
@ -303,6 +335,14 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
await writeFile(pagePath, renderMarkdownPage(frontmatter, renderPatternMarkdown(pattern)), 'utf-8');
writtenKeys.add(key);
result.patternPagesWritten += 1;
result.changedWikiPageKeys.push(key);
result.actions.push({
target: 'wiki',
type: reusable ? 'updated' : 'created',
key,
detail: `Projected historic-SQL pattern ${pattern.pattern.title}`,
rawPaths: [pattern.rawPath],
});
}
for (const page of patternPages) {
@ -315,6 +355,13 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
'utf-8',
);
result.archivedPatternPages += 1;
result.changedWikiPageKeys.push(page.key);
result.actions.push({
target: 'wiki',
type: 'updated',
key: page.key,
detail: `Archived stale historic-SQL pattern page ${page.key}`,
});
continue;
}
const tags = [...new Set([...stringArray(page.frontmatter.tags), 'stale'])];
@ -324,7 +371,15 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
'utf-8',
);
result.stalePatternPagesMarked += 1;
result.changedWikiPageKeys.push(page.key);
result.actions.push({
target: 'wiki',
type: 'updated',
key: page.key,
detail: `Marked historic-SQL pattern page ${page.key} stale`,
});
}
result.changedWikiPageKeys = [...new Set(result.changedWikiPageKeys)].sort();
return result;
}

View file

@ -138,6 +138,52 @@ describe('fetchMetabaseBundle', () => {
expect(warn).not.toHaveBeenCalled();
});
it('emits memory-flow progress while fetching Metabase cards', async () => {
const events: unknown[] = [];
await fetchMetabaseBundle({
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
stagedDir,
ctx: {
...makeFetchContext(),
memoryFlow: {
emit: (event) => events.push(event),
update: vi.fn(),
finish: vi.fn(),
snapshot: vi.fn(),
},
},
clientFactory,
sourceStateReader,
});
expect(events).toEqual(
expect.arrayContaining([
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Fetching Metabase database 42 metadata',
}),
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Fetching 1 Metabase card for database 42',
}),
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Checked 1/1 Metabase cards for database 42; wrote 1',
transient: true,
}),
expect.objectContaining({
type: 'stage_progress',
stage: 'source',
message: 'Fetched Metabase database 42: 1 cards, 0 unresolved',
}),
]),
);
});
it('routes Metabase fetch warnings through the injected logger', async () => {
const logger = {
log: vi.fn(),

View file

@ -83,6 +83,15 @@ function resolvePath(index: Map<number | 'root', CollectionNode>, collectionId:
export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Promise<void> {
const pullConfig: MetabasePullConfig = parseMetabasePullConfig(params.pullConfig);
const logger = params.logger ?? noopMetabaseFetchLogger;
const emitFetchProgress = (percent: number, message: string, transient = false): void => {
params.ctx.memoryFlow?.emit({
type: 'stage_progress',
stage: 'source',
percent,
message,
...(transient ? { transient } : {}),
});
};
const syncState = await params.sourceStateReader.getSourceState(pullConfig.metabaseConnectionId);
const mapping = syncState.mappings.find(
(m) => m.metabaseDatabaseId === pullConfig.metabaseDatabaseId && m.syncEnabled,
@ -100,6 +109,7 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
const client = await params.clientFactory.createClient(pullConfig, params.ctx);
try {
emitFetchProgress(26, `Fetching Metabase database ${pullConfig.metabaseDatabaseId} metadata`);
let mappingDatabaseName = mapping.metabaseDatabaseName;
let mappingEngine = mapping.metabaseEngine;
if (mappingDatabaseName === null) {
@ -133,6 +143,12 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
await mkdir(join(params.stagedDir, STAGED_FILES.databasesDir), { recursive: true });
const cardIdsToFetch = await resolveCardIdsToFetch(client, scope, pullConfig.metabaseDatabaseId, logger);
emitFetchProgress(
28,
`Fetching ${cardIdsToFetch.length} Metabase card${cardIdsToFetch.length === 1 ? '' : 's'} for database ${
pullConfig.metabaseDatabaseId
}`,
);
const referencedCollectionIds = new Set<number>();
let writtenCards = 0;
@ -212,7 +228,19 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
}
}
}
const knownTotal = Math.max(cardIdsToFetch.length, fetched.size + queue.length);
if (fetched.size === 1 || fetched.size % 10 === 0 || queue.length === 0) {
emitFetchProgress(
30,
`Checked ${fetched.size}/${knownTotal} Metabase cards for database ${pullConfig.metabaseDatabaseId}; wrote ${writtenCards}`,
true,
);
}
}
emitFetchProgress(
32,
`Fetched Metabase database ${pullConfig.metabaseDatabaseId}: ${writtenCards} cards, ${unresolvedCards.length} unresolved`,
);
for (const colId of referencedCollectionIds) {
const node = collectionIndex.get(colId);

View file

@ -1,10 +1,12 @@
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js';
import type { SourceAdapter } from '../../types.js';
import type { MetricFlowParseResult } from './deep-parse.js';
import { MetricflowSourceAdapter } from './metricflow.adapter.js';
import { readMetricflowProjectionConfig, writeMetricflowProjectionConfig } from './projection-config.js';
function compileOnlyRequiredDepsCheck(): void {
// @ts-expect-error MetricflowSourceAdapter requires an explicit cache home.
@ -22,6 +24,25 @@ async function makeRepo(tmpRoot: string, files: Record<string, string>) {
return makeLocalGitRepo(fixtureDir, join(tmpRoot, 'origin'));
}
function metricflowParseResult(): MetricFlowParseResult {
return {
semanticModels: [
{
name: 'orders',
description: 'Orders',
modelRef: 'orders',
dimensions: [{ name: 'status', column: 'status', type: 'string', label: 'Status' }],
measures: [{ type: 'simple', name: 'order_count', column: 'id', aggregation: 'count' }],
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }],
defaultTimeDimension: null,
},
],
crossModelMetrics: [],
relationships: [],
warnings: ['parser warning'],
};
}
describe('MetricflowSourceAdapter', () => {
let tmpRoot: string;
let stagedDir: string;
@ -127,4 +148,119 @@ describe('MetricflowSourceAdapter', () => {
await expect(readFile(join(stagedDir, 'models/orders.yml'), 'utf-8')).resolves.toContain('semantic_models');
expect(await adapter.detect(stagedDir)).toBe(true);
});
it('persists parsed target tables for deterministic projection during fetch', async () => {
const repo = await makeRepo(tmpRoot, {
'dbt_project.yml': 'name: analytics\n',
'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n',
});
await adapter.fetch?.(
{
repoUrl: repo.repoUrl,
branch: 'main',
path: null,
authToken: null,
parsedTargetTables: {
orders: {
ok: true,
catalog: null,
schema: 'analytics',
name: 'orders',
canonicalTable: 'analytics.orders',
},
},
},
stagedDir,
{ connectionId: 'warehouse-1', sourceKey: 'metricflow' },
);
await expect(readMetricflowProjectionConfig(stagedDir)).resolves.toMatchObject({
parsedTargetTables: {
orders: {
ok: true,
schema: 'analytics',
name: 'orders',
},
},
});
});
it('projects parsed MetricFlow semantic models in the integration worktree', async () => {
await writeMetricflowProjectionConfig(stagedDir, {
parsedTargetTables: {
orders: {
ok: true,
catalog: null,
schema: 'analytics',
name: 'orders',
canonicalTable: 'analytics.orders',
},
},
});
const scoped = {
getManifestEntry: vi.fn().mockResolvedValue(null),
isManifestBacked: vi.fn().mockResolvedValue(false),
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
loadSource: vi.fn().mockResolvedValue(null),
writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue(scoped),
getManifestEntry: vi.fn(),
isManifestBacked: vi.fn(),
loadAllSources: vi.fn(),
loadSource: vi.fn(),
writeSource: vi.fn(),
};
const result = await adapter.project?.({
connectionId: 'warehouse-1',
sourceKey: 'metricflow',
syncId: 'sync-1',
jobId: 'job-1',
runId: 'run-1',
stagedDir,
workdir: '/tmp/metricflow-integration',
parseArtifacts: metricflowParseResult(),
semanticLayerService: semanticLayerService as never,
});
expect(semanticLayerService.forWorktree).toHaveBeenCalledWith('/tmp/metricflow-integration');
expect(scoped.writeSource).toHaveBeenCalledWith(
'warehouse-1',
expect.objectContaining({ name: 'orders' }),
'dbt MetricFlow',
expect.any(String),
'dbt MetricFlow sync: create source orders',
{ skipValidation: true },
);
expect(result).toMatchObject({
warnings: ['parser warning'],
errors: [],
touchedSources: [{ connectionId: 'warehouse-1', sourceName: 'orders' }],
changedWikiPageKeys: [],
});
});
it('returns a projection error when parse artifacts are missing', async () => {
const result = await adapter.project?.({
connectionId: 'warehouse-1',
sourceKey: 'metricflow',
syncId: 'sync-1',
jobId: 'job-1',
runId: 'run-1',
stagedDir,
workdir: '/tmp/metricflow-integration',
parseArtifacts: undefined,
semanticLayerService: {} as never,
});
expect(result).toMatchObject({
warnings: [],
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
touchedSources: [],
changedWikiPageKeys: [],
});
});
});

View file

@ -1,10 +1,23 @@
import { join } from 'node:path';
import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js';
import type {
ChunkResult,
DeterministicProjectionContext,
DiffSet,
FetchContext,
ProjectionResult,
SourceAdapter,
} from '../../types.js';
import { chunkMetricFlowProject } from './chunk.js';
import { detectMetricFlowStagedDir } from './detect.js';
import { parseMetricflowFiles, type MetricFlowParseResult } from './deep-parse.js';
import { fetchMetricflowRepo } from './fetch.js';
import { importMetricflowSemanticModels } from './import-semantic-models.js';
import { parseMetricFlowStagedDir, type ParsedMetricFlowProject } from './parse.js';
import {
metricflowHostTablesFromParsedTargets,
readMetricflowProjectionConfig,
writeMetricflowProjectionConfig,
} from './projection-config.js';
import { parseMetricflowPullConfig } from './pull-config.js';
export interface MetricflowSourceAdapterDeps {
@ -33,6 +46,9 @@ export class MetricflowSourceAdapter implements SourceAdapter {
cacheDir: this.resolveCacheDir(ctx.connectionId),
stagedDir,
});
await writeMetricflowProjectionConfig(stagedDir, {
parsedTargetTables: config.parsedTargetTables,
});
}
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
@ -46,6 +62,37 @@ export class MetricflowSourceAdapter implements SourceAdapter {
return { ...chunk, parseArtifacts };
}
async project(ctx: DeterministicProjectionContext): Promise<ProjectionResult> {
if (!isMetricFlowParseResult(ctx.parseArtifacts)) {
return {
warnings: [],
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
touchedSources: [],
changedWikiPageKeys: [],
};
}
const projectionConfig = await readMetricflowProjectionConfig(ctx.stagedDir);
const result = await importMetricflowSemanticModels(
{ semanticLayerService: ctx.semanticLayerService },
{
connectionId: ctx.connectionId,
parseResult: ctx.parseArtifacts,
targetSchema: null,
hostTables: metricflowHostTablesFromParsedTargets(projectionConfig.parsedTargetTables),
workdir: ctx.workdir,
},
);
return {
result,
warnings: result.warnings,
errors: result.errors,
touchedSources: result.touchedSources,
changedWikiPageKeys: [],
};
}
private resolveCacheDir(connectionId: string): string {
return join(this.deps.homeDir, 'ingest-metricflow-repos', connectionId);
}
@ -54,3 +101,16 @@ export class MetricflowSourceAdapter implements SourceAdapter {
function parseMetricflowStagedDirForImport(project: ParsedMetricFlowProject): MetricFlowParseResult {
return parseMetricflowFiles(project.files);
}
function isMetricFlowParseResult(value: unknown): value is MetricFlowParseResult {
if (!value || typeof value !== 'object') {
return false;
}
const candidate = value as Partial<MetricFlowParseResult>;
return (
Array.isArray(candidate.semanticModels) &&
Array.isArray(candidate.crossModelMetrics) &&
Array.isArray(candidate.relationships) &&
Array.isArray(candidate.warnings)
);
}

View file

@ -0,0 +1,54 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { z } from 'zod';
import { parsedTargetTableSchema, type ParsedTargetTable } from '../../parsed-target-table.js';
import type { MetricflowHostTable } from './semantic-models.js';
const METRICFLOW_PROJECTION_CONFIG_FILE = 'sync-config.json';
const metricflowProjectionConfigSchema = z.object({
parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}),
});
export type MetricflowProjectionConfig = z.infer<typeof metricflowProjectionConfigSchema>;
export async function writeMetricflowProjectionConfig(
stagedDir: string,
config: MetricflowProjectionConfig,
): Promise<void> {
const parsed = metricflowProjectionConfigSchema.parse(config);
await mkdir(stagedDir, { recursive: true });
await writeFile(join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE), `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8');
}
export async function readMetricflowProjectionConfig(stagedDir: string): Promise<MetricflowProjectionConfig> {
const path = join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE);
try {
return metricflowProjectionConfigSchema.parse(JSON.parse(await readFile(path, 'utf-8')));
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
return { parsedTargetTables: {} };
}
throw error;
}
}
export function metricflowHostTablesFromParsedTargets(
parsedTargetTables: Record<string, ParsedTargetTable>,
): MetricflowHostTable[] {
return Object.entries(parsedTargetTables)
.flatMap(([id, table]) =>
table.ok
? [
{
id,
name: table.name,
catalog: table.catalog,
db: table.schema,
columns: [],
},
]
: [],
)
.sort((left, right) => left.id.localeCompare(right.id));
}

View file

@ -0,0 +1,190 @@
import { describe, expect, it, vi } from 'vitest';
import { validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js';
function wikiServiceWithPages(
pages: Record<string, { refs?: string[]; content?: string; slRefs?: string[] }>,
) {
return {
listPageKeys: vi.fn().mockResolvedValue(Object.keys(pages)),
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, pageKey: string) => {
const page = pages[pageKey];
if (!page) {
return Promise.resolve(null);
}
return Promise.resolve({
pageKey,
frontmatter: {
summary: pageKey,
usage_mode: 'auto',
refs: page.refs,
sl_refs: page.slRefs,
},
content: page.content ?? '',
});
}),
};
}
describe('artifact gates', () => {
it('fails the final tree when wiki body references a stale semantic-layer measure', async () => {
const wikiService = wikiServiceWithPages({
'account-segments': {
slRefs: ['mart_account_segments'],
content: 'ARR is `mart_account_segments.total_contract_arr_cents`.',
},
});
const semanticLayerService = {
loadAllSources: vi.fn().mockResolvedValue({
sources: [
{
name: 'mart_account_segments',
grain: ['account_id'],
columns: [{ name: 'account_id', type: 'string' }],
joins: [],
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
table: 'analytics.mart_account_segments',
},
],
loadErrors: [],
}),
};
await expect(
validateFinalIngestArtifacts({
connectionIds: ['warehouse'],
changedWikiPageKeys: ['account-segments'],
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }],
wikiService: wikiService as never,
semanticLayerService: semanticLayerService as never,
validateTouchedSources: async () => ({ invalidSources: [], validSources: ['mart_account_segments'] }),
tableExists: async () => true,
}),
).rejects.toThrow(/unknown semantic-layer entity mart_account_segments\.total_contract_arr_cents/);
});
it('fails before provenance insertion when a raw path cannot be tied to the current snapshot or eviction set', () => {
expect(() =>
validateProvenanceRawPaths({
rows: [{ rawPath: 'cards/missing.json' }],
currentRawPaths: new Set(['cards/present.json']),
deletedRawPaths: new Set(['cards/deleted.json']),
}),
).toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/);
});
it('fails measure-level wiki frontmatter sl_refs that point at missing entities', async () => {
const wikiService = wikiServiceWithPages({
'account-segments': {
slRefs: ['mart_account_segments.total_contract_arr_cents'],
content: 'ARR uses a renamed measure.',
},
});
const semanticLayerService = {
loadAllSources: vi.fn().mockResolvedValue({
sources: [
{
name: 'mart_account_segments',
grain: ['account_id'],
columns: [{ name: 'account_id', type: 'string' }],
joins: [],
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
table: 'analytics.mart_account_segments',
},
],
loadErrors: [],
}),
};
await expect(
validateFinalIngestArtifacts({
connectionIds: ['warehouse'],
changedWikiPageKeys: ['account-segments'],
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }],
wikiService: wikiService as never,
semanticLayerService: semanticLayerService as never,
validateTouchedSources: async () => ({ invalidSources: [], validSources: ['warehouse:mart_account_segments'] }),
tableExists: async () => true,
}),
).rejects.toThrow(/unknown sl_refs entity mart_account_segments\.total_contract_arr_cents/);
});
it('validates direct declared-join neighbors of touched semantic-layer sources', async () => {
const semanticLayerService = {
loadAllSources: vi.fn().mockResolvedValue({
sources: [
{
name: 'orders',
grain: ['order_id'],
columns: [
{ name: 'order_id', type: 'string' },
{ name: 'account_id', type: 'string' },
],
joins: [{ to: 'accounts', on: 'orders.account_id = accounts.account_id', relationship: 'many_to_one' }],
measures: [{ name: 'order_count', expr: 'count(*)' }],
},
{
name: 'accounts',
grain: ['account_id'],
columns: [{ name: 'account_id', type: 'string' }],
joins: [],
measures: [{ name: 'account_count', expr: 'count(*)' }],
},
{
name: 'segments',
grain: ['segment_id'],
columns: [
{ name: 'segment_id', type: 'string' },
{ name: 'account_id', type: 'string' },
],
joins: [{ to: 'accounts', on: 'segments.account_id = accounts.account_id', relationship: 'many_to_one' }],
measures: [],
},
],
loadErrors: [],
}),
};
const validateTouchedSources = vi.fn().mockResolvedValue({ invalidSources: [], validSources: [] });
await validateFinalIngestArtifacts({
connectionIds: ['warehouse'],
changedWikiPageKeys: [],
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'accounts' }],
wikiService: { readPage: vi.fn() } as never,
semanticLayerService: semanticLayerService as never,
validateTouchedSources,
tableExists: async () => true,
});
expect(validateTouchedSources).toHaveBeenCalledWith([
{ connectionId: 'warehouse', sourceName: 'accounts' },
{ connectionId: 'warehouse', sourceName: 'orders' },
{ connectionId: 'warehouse', sourceName: 'segments' },
]);
});
it('fails final gates when a changed wiki page references a missing wiki page', async () => {
const wikiService = wikiServiceWithPages({
'account-segments': {
refs: ['missing-frontmatter-page'],
content: 'See [[missing-inline-page]] for the related process.',
},
});
const semanticLayerService = {
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
};
await expect(
validateFinalIngestArtifacts({
connectionIds: ['warehouse'],
changedWikiPageKeys: ['account-segments'],
touchedSlSources: [],
wikiService: wikiService as never,
semanticLayerService: semanticLayerService as never,
validateTouchedSources: async () => ({ invalidSources: [], validSources: [] }),
tableExists: async () => true,
}),
).rejects.toThrow(
/wiki references target missing page\(s\): account-segments -> missing-frontmatter-page, account-segments -> missing-inline-page/,
);
});
});

View file

@ -0,0 +1,188 @@
import type { SemanticLayerService } from '../sl/index.js';
import type { TouchedSlSource } from '../tools/index.js';
import type { KnowledgeWikiService } from '../wiki/index.js';
import { findMissingWikiRefs } from '../wiki/wiki-ref-validation.js';
import { findInvalidWikiBodyRefs } from './wiki-body-refs.js';
export interface TouchedValidationResult {
invalidSources: string[];
validSources: string[];
}
export interface FinalArtifactGateInput {
connectionIds: string[];
changedWikiPageKeys: string[];
touchedSlSources: TouchedSlSource[];
wikiService: KnowledgeWikiService;
semanticLayerService: SemanticLayerService;
validateTouchedSources(touched: TouchedSlSource[]): Promise<TouchedValidationResult>;
tableExists(connectionId: string, tableRef: string): Promise<boolean>;
}
export interface ProvenanceRawPathValidationInput {
rows: Array<{ rawPath: string }>;
currentRawPaths: Set<string>;
deletedRawPaths: Set<string>;
}
function parseSlRef(ref: string): { connectionId: string | null; sourceName: string; entityName: string | null } {
const withoutConnection = ref.includes('/') ? ref.slice(ref.indexOf('/') + 1) : ref;
const connectionId = ref.includes('/') ? ref.slice(0, ref.indexOf('/')) : null;
const [sourceName = '', entityName = null] = withoutConnection.split('.', 2);
return { connectionId, sourceName, entityName };
}
function slEntityNames(source: Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources'][number]): Set<string> {
return new Set([
...(source.measures ?? []).map((measure) => measure.name),
...(source.columns ?? []).map((column) => column.name),
...(source.segments ?? []).map((segment) => segment.name),
]);
}
function uniqueTouchedSources(sources: TouchedSlSource[]): TouchedSlSource[] {
const seen = new Set<string>();
const unique: TouchedSlSource[] = [];
for (const source of sources) {
const key = `${source.connectionId}:${source.sourceName}`;
if (seen.has(key)) {
continue;
}
seen.add(key);
unique.push(source);
}
return unique.sort((left, right) => {
const byConnection = left.connectionId.localeCompare(right.connectionId);
return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection;
});
}
async function expandTouchedSlSourcesWithDirectJoinNeighbors(input: FinalArtifactGateInput): Promise<TouchedSlSource[]> {
const expanded = [...input.touchedSlSources];
const touchedByConnection = new Map<string, Set<string>>();
for (const source of input.touchedSlSources) {
const bucket = touchedByConnection.get(source.connectionId) ?? new Set<string>();
bucket.add(source.sourceName);
touchedByConnection.set(source.connectionId, bucket);
}
for (const connectionId of input.connectionIds) {
const touched = touchedByConnection.get(connectionId);
if (!touched || touched.size === 0) {
continue;
}
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
for (const source of sources) {
const sourceIsTouched = touched.has(source.name);
if (sourceIsTouched) {
for (const join of source.joins ?? []) {
expanded.push({ connectionId, sourceName: join.to });
}
}
if ((source.joins ?? []).some((join) => touched.has(join.to))) {
expanded.push({ connectionId, sourceName: source.name });
}
}
}
return uniqueTouchedSources(expanded);
}
async function validateWikiSlRefs(input: FinalArtifactGateInput): Promise<string[]> {
const errors: string[] = [];
const sourcesByConnection = new Map<string, Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources']>();
for (const connectionId of input.connectionIds) {
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
sourcesByConnection.set(connectionId, sources);
}
for (const pageKey of input.changedWikiPageKeys) {
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
if (!page) {
continue;
}
for (const ref of page.frontmatter.sl_refs ?? []) {
const parsed = parseSlRef(ref);
const candidateConnections = parsed.connectionId ? [parsed.connectionId] : input.connectionIds;
let source: Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources'][number] | undefined;
for (const connectionId of candidateConnections) {
source = sourcesByConnection.get(connectionId)?.find((candidate) => candidate.name === parsed.sourceName);
if (source) {
break;
}
}
if (!source) {
errors.push(`${pageKey}: unknown sl_refs entry ${ref}`);
continue;
}
if (parsed.entityName && !slEntityNames(source).has(parsed.entityName)) {
errors.push(`${pageKey}: unknown sl_refs entity ${ref}`);
}
}
}
return errors;
}
async function validateWikiRefs(input: FinalArtifactGateInput): Promise<string[]> {
const dangling: string[] = [];
for (const pageKey of input.changedWikiPageKeys) {
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
if (!page) {
continue;
}
const missingRefs = await findMissingWikiRefs({
wikiService: input.wikiService,
scope: 'GLOBAL',
scopeId: null,
pageKey,
refs: page.frontmatter.refs,
content: page.content,
});
for (const missingRef of missingRefs) {
dangling.push(`${pageKey} -> ${missingRef}`);
}
}
return dangling;
}
export async function validateFinalIngestArtifacts(input: FinalArtifactGateInput): Promise<void> {
const touchedWithDependencies = await expandTouchedSlSourcesWithDirectJoinNeighbors(input);
const validation = await input.validateTouchedSources(touchedWithDependencies);
const errors: string[] = validation.invalidSources.map((source) => `semantic-layer validation failed for ${source}`);
errors.push(...(await validateWikiSlRefs(input)));
const danglingWikiRefs = await validateWikiRefs(input);
if (danglingWikiRefs.length > 0) {
errors.push(`wiki references target missing page(s): ${danglingWikiRefs.join(', ')}`);
}
for (const pageKey of input.changedWikiPageKeys) {
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
if (!page) {
continue;
}
errors.push(
...(await findInvalidWikiBodyRefs({
pageKey,
body: page.content,
visibleConnectionIds: input.connectionIds,
loadSources: async (connectionId) => {
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
return sources;
},
tableExists: input.tableExists,
})),
);
}
if (errors.length > 0) {
throw new Error(`final artifact gates failed:\n${errors.join('\n')}`);
}
}
export function validateProvenanceRawPaths(input: ProvenanceRawPathValidationInput): void {
for (const row of input.rows) {
if (!input.currentRawPaths.has(row.rawPath) && !input.deletedRawPaths.has(row.rawPath)) {
throw new Error(`provenance row references raw path outside this snapshot: ${row.rawPath}`);
}
}
}

View file

@ -0,0 +1,136 @@
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { finalGateRepairPaths, repairFinalGateFailure } from './final-gate-repair.js';
import { FileIngestTraceWriter } from './ingest-trace.js';
async function makeHarness() {
const root = await mkdtemp(join(tmpdir(), 'ktx-final-gate-repair-'));
const workdir = join(root, 'workdir');
await mkdir(join(workdir, 'wiki/global'), { recursive: true });
await mkdir(join(workdir, 'semantic-layer/warehouse'), { recursive: true });
await writeFile(
join(workdir, 'wiki/global/account-segments.md'),
'---\nsummary: Account segments\nusage_mode: auto\n---\n\nARR uses `mart_account_segments.total_contract_arr_cents`.\n',
'utf-8',
);
await writeFile(
join(workdir, 'semantic-layer/warehouse/mart_account_segments.yaml'),
'name: mart_account_segments\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n',
'utf-8',
);
const trace = new FileIngestTraceWriter({
tracePath: join(root, 'trace.jsonl'),
jobId: 'job-1',
connectionId: 'warehouse',
sourceKey: 'metabase',
runId: 'run-1',
syncId: 'sync-1',
level: 'trace',
});
return { root, workdir, trace };
}
describe('finalGateRepairPaths', () => {
it('derives sorted wiki and semantic-layer file paths', () => {
expect(
finalGateRepairPaths({
changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'],
touchedSlSources: [
{ connectionId: 'warehouse', sourceName: 'mart_account_segments' },
{ connectionId: 'warehouse', sourceName: 'orders' },
{ connectionId: 'warehouse', sourceName: 'orders' },
],
}),
).toEqual([
'semantic-layer/warehouse/mart_account_segments.yaml',
'semantic-layer/warehouse/orders.yaml',
'wiki/global/account-segments.md',
'wiki/global/overview.md',
]);
});
});
describe('repairFinalGateFailure', () => {
it('lets the repair agent read gate errors and edit only allowed files', async () => {
const { workdir, trace } = await makeHarness();
const agentRunner = {
runLoop: vi.fn(async (params: any) => {
const error = await params.toolSet.read_gate_error.execute({});
expect(error.markdown).toContain('total_contract_arr_cents');
const page = await params.toolSet.read_repair_file.execute({
path: 'wiki/global/account-segments.md',
});
expect(page.markdown).toContain('total_contract_arr_cents');
await expect(
params.toolSet.write_repair_file.execute({
path: 'wiki/global/other.md',
content: 'not allowed',
}),
).rejects.toThrow(/gate repair path not allowed/);
await params.toolSet.write_repair_file.execute({
path: 'wiki/global/account-segments.md',
content: page.markdown.replace('total_contract_arr_cents', 'total_contract_arr'),
});
return { stopReason: 'natural' as const };
}),
};
const result = await repairFinalGateFailure({
agentRunner,
workdir,
gateError:
'final artifact gates failed:\naccount-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents',
allowedPaths: ['wiki/global/account-segments.md'],
trace,
repairKind: 'final_artifact_gate',
maxAttempts: 1,
stepBudget: 8,
});
expect(result).toEqual({
status: 'repaired',
attempts: 1,
changedPaths: ['wiki/global/account-segments.md'],
});
await expect(readFile(join(workdir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.toContain(
'total_contract_arr',
);
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_repaired');
expect(agentRunner.runLoop).toHaveBeenCalledWith(
expect.objectContaining({
modelRole: 'repair',
stepBudget: 8,
telemetryTags: expect.objectContaining({
operationName: 'ingest-isolated-diff-gate-repair',
repairKind: 'final_artifact_gate',
}),
}),
);
});
it('returns failed when the repair agent edits no allowed file', async () => {
const { workdir, trace } = await makeHarness();
const result = await repairFinalGateFailure({
agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) },
workdir,
gateError: 'final artifact gates failed:\naccount-segments: unknown semantic-layer entity',
allowedPaths: ['wiki/global/account-segments.md'],
trace,
repairKind: 'final_artifact_gate',
maxAttempts: 1,
stepBudget: 8,
});
expect(result).toEqual({
status: 'failed',
attempts: 1,
reason: 'gate repair completed without editing an allowed path',
});
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_failed');
});
});

View file

@ -0,0 +1,230 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises';
import { dirname, join } from 'node:path';
import { z } from 'zod';
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../llm/index.js';
import type { TouchedSlSource } from '../tools/index.js';
import type { IngestTraceWriter } from './ingest-trace.js';
import { traceTimed } from './ingest-trace.js';
type FinalGateRepairKind = 'patch_semantic_gate' | 'final_artifact_gate';
export type FinalGateRepairResult =
| { status: 'repaired'; attempts: number; changedPaths: string[] }
| { status: 'failed'; attempts: number; reason: string };
export interface RepairFinalGateFailureInput {
agentRunner: AgentRunnerPort;
workdir: string;
gateError: string;
allowedPaths: string[];
trace: IngestTraceWriter;
repairKind: FinalGateRepairKind;
maxAttempts?: number;
stepBudget?: number;
}
const readRepairFileSchema = z.object({
path: z.string().min(1),
});
const writeRepairFileSchema = z.object({
path: z.string().min(1),
content: z.string(),
});
function normalizeRepoPath(path: string): string {
const normalized = path.replace(/\\/g, '/').replace(/^\/+/, '');
const parts = normalized.split('/').filter((part) => part.length > 0);
if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) {
throw new Error(`gate repair path must be a repository-relative path: ${path}`);
}
return parts.join('/');
}
function assertAllowedPath(path: string, allowedPaths: ReadonlySet<string>): string {
const normalized = normalizeRepoPath(path);
if (!allowedPaths.has(normalized)) {
throw new Error(`gate repair path not allowed: ${normalized}`);
}
return normalized;
}
async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> {
try {
return { exists: true, content: await readFile(path, 'utf-8') };
} catch (error) {
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
return { exists: false, content: '' };
}
throw error;
}
}
function buildGateRepairSystemPrompt(): string {
return `<role>
You repair one KTX isolated-diff artifact gate failure inside the integration worktree.
</role>
<rules>
- Use read_gate_error first.
- Read only files exposed by read_repair_file.
- Edit only paths exposed by write_repair_file.
- Prefer the smallest text edit that makes the gate pass.
- Preserve accepted work-unit, reconciliation, and deterministic projection content.
- Do not invent warehouse facts, business definitions, or semantic-layer entities.
- If the gate error requires choosing between conflicting facts without evidence, stop without editing.
</rules>`;
}
function buildGateRepairUserPrompt(input: {
gateError: string;
allowedPaths: string[];
repairKind: FinalGateRepairKind;
attempt: number;
maxAttempts: number;
}): string {
return `Repair isolated-diff artifact gates.
Repair kind: ${input.repairKind}
Attempt: ${input.attempt} of ${input.maxAttempts}
Allowed files:
${input.allowedPaths.map((path) => `- ${path}`).join('\n')}
Gate error:
${input.gateError}
Use read_gate_error first. Then inspect only the allowed files, write the
minimal repaired content, and stop.`;
}
function buildToolSet(input: {
workdir: string;
gateError: string;
allowedPaths: ReadonlySet<string>;
editedPaths: Set<string>;
}): KtxRuntimeToolSet {
return {
read_gate_error: {
name: 'read_gate_error',
description: 'Read the artifact gate failure that must be repaired.',
inputSchema: z.object({}),
execute: async () => ({
markdown: input.gateError,
structured: { gateError: input.gateError },
}),
},
read_repair_file: {
name: 'read_repair_file',
description: 'Read one allowed file from the integration worktree.',
inputSchema: readRepairFileSchema,
execute: async ({ path }: z.infer<typeof readRepairFileSchema>) => {
const normalized = assertAllowedPath(path, input.allowedPaths);
const file = await readOptionalFile(join(input.workdir, normalized));
return {
markdown: file.exists ? file.content : `(missing file: ${normalized})`,
structured: { path: normalized, exists: file.exists },
};
},
},
write_repair_file: {
name: 'write_repair_file',
description: 'Replace one allowed integration worktree file with repaired text content.',
inputSchema: writeRepairFileSchema,
execute: async ({ path, content }: z.infer<typeof writeRepairFileSchema>) => {
const normalized = assertAllowedPath(path, input.allowedPaths);
const fullPath = join(input.workdir, normalized);
await mkdir(dirname(fullPath), { recursive: true });
await writeFile(fullPath, content, 'utf-8');
input.editedPaths.add(normalized);
return {
markdown: `Wrote ${normalized}`,
structured: { path: normalized, bytes: Buffer.byteLength(content) },
};
},
},
};
}
export function finalGateRepairPaths(input: {
changedWikiPageKeys: string[];
touchedSlSources: TouchedSlSource[];
}): string[] {
return [
...new Set([
...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
]),
].sort();
}
export async function repairFinalGateFailure(
input: RepairFinalGateFailureInput,
): Promise<FinalGateRepairResult> {
const allowedPaths = new Set(input.allowedPaths.map(normalizeRepoPath));
const maxAttempts = input.maxAttempts ?? 1;
const stepBudget = input.stepBudget ?? 16;
let lastFailure = 'gate repair did not run';
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
const editedPaths = new Set<string>();
const sortedAllowedPaths = [...allowedPaths].sort();
const traceData = {
repairKind: input.repairKind,
attempt,
maxAttempts,
allowedPaths: sortedAllowedPaths,
gateError: input.gateError,
};
const result = await traceTimed(input.trace, 'gate_repair', 'gate_repair', traceData, async () =>
input.agentRunner.runLoop({
modelRole: 'repair',
systemPrompt: buildGateRepairSystemPrompt(),
userPrompt: buildGateRepairUserPrompt({
gateError: input.gateError,
allowedPaths: sortedAllowedPaths,
repairKind: input.repairKind,
attempt,
maxAttempts,
}),
toolSet: buildToolSet({
workdir: input.workdir,
gateError: input.gateError,
allowedPaths,
editedPaths,
}),
stepBudget,
telemetryTags: {
operationName: 'ingest-isolated-diff-gate-repair',
source: input.trace.context.sourceKey,
jobId: input.trace.context.jobId,
repairKind: input.repairKind,
},
}),
);
if (result.stopReason === 'error') {
lastFailure = result.error?.message ?? 'gate repair agent loop errored';
await input.trace.event('error', 'gate_repair', 'gate_repair_failed', traceData, result.error);
continue;
}
const changedPaths = [...editedPaths].sort();
if (changedPaths.length === 0) {
lastFailure = 'gate repair completed without editing an allowed path';
await input.trace.event('error', 'gate_repair', 'gate_repair_failed', {
...traceData,
reason: lastFailure,
});
continue;
}
await input.trace.event('debug', 'gate_repair', 'gate_repair_repaired', {
...traceData,
changedPaths,
});
return { status: 'repaired', attempts: attempt, changedPaths };
}
return { status: 'failed', attempts: maxAttempts, reason: lastFailure };
}

View file

@ -0,0 +1,131 @@
import { describe, expect, it } from 'vitest';
import {
compareFinalizationDeclarations,
deriveFinalizationTouchedSources,
deriveFinalizationWikiPageKeys,
} from './finalization-scope.js';
describe('deriveFinalizationWikiPageKeys', () => {
it('maps changed global wiki markdown paths to page keys', () => {
expect(
deriveFinalizationWikiPageKeys([
'wiki/global/historic-sql-orders.md',
'wiki/global/nested/page.md',
'README.md',
]),
).toEqual(['historic-sql-orders']);
});
});
describe('deriveFinalizationTouchedSources', () => {
it('maps standalone semantic-layer files directly', async () => {
const result = await deriveFinalizationTouchedSources({
changedPaths: ['semantic-layer/warehouse/orders.yaml'],
beforeSourcesByConnection: new Map(),
afterSourcesByConnection: new Map(),
});
expect(result).toEqual({
touchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
unresolvedPaths: [],
});
});
it('resolves aggregate _schema changes by comparing loaded source snapshots', async () => {
const beforeSourcesByConnection = new Map([
[
'warehouse',
[
{
name: 'orders',
grain: ['order_id'],
columns: [{ name: 'order_id', type: 'string' }],
joins: [],
measures: [],
usage: {
narrative: 'old',
frequencyTier: 'low' as const,
commonFilters: [],
commonJoins: [],
},
},
],
],
]);
const afterSourcesByConnection = new Map([
[
'warehouse',
[
{
name: 'orders',
grain: ['order_id'],
columns: [{ name: 'order_id', type: 'string' }],
joins: [],
measures: [],
usage: {
narrative: 'new',
frequencyTier: 'high' as const,
commonFilters: [],
commonJoins: [],
},
},
],
],
]);
const result = await deriveFinalizationTouchedSources({
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
beforeSourcesByConnection,
afterSourcesByConnection,
});
expect(result).toEqual({
touchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
unresolvedPaths: [],
});
});
it('flags aggregate _schema changes that cannot be resolved to logical sources', async () => {
const beforeSourcesByConnection = new Map([['warehouse', []]]);
const afterSourcesByConnection = new Map([['warehouse', []]]);
const result = await deriveFinalizationTouchedSources({
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
beforeSourcesByConnection,
afterSourcesByConnection,
});
expect(result).toEqual({
touchedSources: [],
unresolvedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
});
});
});
describe('compareFinalizationDeclarations', () => {
it('reports missing and extra adapter declarations', () => {
expect(
compareFinalizationDeclarations({
declaredTouchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
derivedTouchedSources: [{ connectionId: 'warehouse', sourceName: 'customers' }],
declaredChangedWikiPageKeys: ['orders'],
derivedChangedWikiPageKeys: ['orders', 'patterns'],
}),
).toEqual([
{
artifactKind: 'sl',
key: 'warehouse:customers',
direction: 'missing_from_adapter_declaration',
},
{
artifactKind: 'sl',
key: 'warehouse:orders',
direction: 'extra_in_adapter_declaration',
},
{
artifactKind: 'wiki',
key: 'patterns',
direction: 'missing_from_adapter_declaration',
},
]);
});
});

View file

@ -0,0 +1,145 @@
import type { SemanticLayerSource } from '../sl/index.js';
import type { TouchedSlSource } from '../tools/index.js';
import type { IngestReportFinalizationMismatch } from './reports.js';
interface DeriveTouchedSourcesInput {
changedPaths: string[];
beforeSourcesByConnection: Map<string, SemanticLayerSource[]>;
afterSourcesByConnection: Map<string, SemanticLayerSource[]>;
}
interface DeriveTouchedSourcesResult {
touchedSources: TouchedSlSource[];
unresolvedPaths: string[];
}
interface CompareFinalizationDeclarationsInput {
declaredTouchedSources: TouchedSlSource[];
derivedTouchedSources: TouchedSlSource[];
declaredChangedWikiPageKeys: string[];
derivedChangedWikiPageKeys: string[];
}
function uniqueSorted(values: string[]): string[] {
return [...new Set(values.filter((value) => value.length > 0))].sort();
}
function touchedKey(source: TouchedSlSource): string {
return `${source.connectionId}:${source.sourceName}`;
}
function stableJson(value: unknown): string {
if (Array.isArray(value)) {
return `[${value.map((entry) => stableJson(entry)).join(',')}]`;
}
if (value && typeof value === 'object') {
const record = value as Record<string, unknown>;
return `{${Object.keys(record)
.sort()
.map((key) => `${JSON.stringify(key)}:${stableJson(record[key])}`)
.join(',')}}`;
}
return JSON.stringify(value);
}
function changedSourceNames(
beforeSources: SemanticLayerSource[],
afterSources: SemanticLayerSource[],
): string[] {
const before = new Map(beforeSources.map((source) => [source.name, stableJson(source)]));
const after = new Map(afterSources.map((source) => [source.name, stableJson(source)]));
return uniqueSorted(
uniqueSorted([...before.keys(), ...after.keys()]).filter(
(sourceName) => before.get(sourceName) !== after.get(sourceName),
),
);
}
export function deriveFinalizationWikiPageKeys(paths: string[]): string[] {
return uniqueSorted(
paths
.filter((path) => path.startsWith('wiki/global/') && path.endsWith('.md'))
.filter((path) => !path.slice('wiki/global/'.length, -'.md'.length).includes('/'))
.map((path) => path.slice('wiki/global/'.length, -'.md'.length)),
);
}
export async function deriveFinalizationTouchedSources(
input: DeriveTouchedSourcesInput,
): Promise<DeriveTouchedSourcesResult> {
const touched = new Map<string, TouchedSlSource>();
const unresolvedPaths: string[] = [];
for (const path of input.changedPaths) {
if (!path.startsWith('semantic-layer/') || !(path.endsWith('.yaml') || path.endsWith('.yml'))) {
continue;
}
const parts = path.split('/');
const connectionId = parts[1] ?? '';
if (!connectionId) {
unresolvedPaths.push(path);
continue;
}
if (parts[2] !== '_schema') {
const fileName = parts.at(-1) ?? '';
const sourceName = fileName.replace(/\.ya?ml$/, '');
if (!sourceName) {
unresolvedPaths.push(path);
continue;
}
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
continue;
}
const changedNames = changedSourceNames(
input.beforeSourcesByConnection.get(connectionId) ?? [],
input.afterSourcesByConnection.get(connectionId) ?? [],
);
if (changedNames.length === 0) {
unresolvedPaths.push(path);
continue;
}
for (const sourceName of changedNames) {
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
}
}
return {
touchedSources: [...touched.values()].sort((left, right) =>
touchedKey(left).localeCompare(touchedKey(right)),
),
unresolvedPaths: uniqueSorted(unresolvedPaths),
};
}
export function compareFinalizationDeclarations(
input: CompareFinalizationDeclarationsInput,
): IngestReportFinalizationMismatch[] {
const mismatches: IngestReportFinalizationMismatch[] = [];
const declaredSl = new Set(input.declaredTouchedSources.map(touchedKey));
const derivedSl = new Set(input.derivedTouchedSources.map(touchedKey));
const declaredWiki = new Set(input.declaredChangedWikiPageKeys);
const derivedWiki = new Set(input.derivedChangedWikiPageKeys);
for (const key of [...derivedSl].sort()) {
if (!declaredSl.has(key)) {
mismatches.push({ artifactKind: 'sl', key, direction: 'missing_from_adapter_declaration' });
}
}
for (const key of [...declaredSl].sort()) {
if (!derivedSl.has(key)) {
mismatches.push({ artifactKind: 'sl', key, direction: 'extra_in_adapter_declaration' });
}
}
for (const key of [...derivedWiki].sort()) {
if (!declaredWiki.has(key)) {
mismatches.push({ artifactKind: 'wiki', key, direction: 'missing_from_adapter_declaration' });
}
}
for (const key of [...declaredWiki].sort()) {
if (!derivedWiki.has(key)) {
mismatches.push({ artifactKind: 'wiki', key, direction: 'extra_in_adapter_declaration' });
}
}
return mismatches;
}

View file

@ -17,6 +17,11 @@ export {
buildLiveDatabaseTableNaturalKey,
ktxSchemaSnapshotToExtractedSchema,
} from './adapters/live-database/extracted-schema.js';
export {
assertSemanticLayerTargetPathsAllowed,
findDisallowedSemanticLayerTargetPaths,
semanticLayerConnectionIdFromPath,
} from './semantic-layer-target-policy.js';
export { LiveDatabaseSourceAdapter } from './adapters/live-database/live-database.adapter.js';
export type {
BuildLiveDatabaseManifestShardsInput,
@ -344,7 +349,6 @@ export type {
HistoricSqlTableUsageEvidence,
} from './adapters/historic-sql/evidence.js';
export { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evidence-tool.js';
export { HistoricSqlProjectionPostProcessor } from './adapters/historic-sql/post-processor.js';
export { projectHistoricSqlEvidence } from './adapters/historic-sql/projection.js';
export type { HistoricSqlProjectionInput, HistoricSqlProjectionResult } from './adapters/historic-sql/projection.js';
export {
@ -609,6 +613,11 @@ export {
} from './raw-sources-paths.js';
export { ingestReportSnapshotSchema, parseIngestReportSnapshot } from './report-snapshot.js';
export type { IngestReportBody, IngestReportSnapshot } from './reports.js';
export * from './artifact-gates.js';
export * from './ingest-trace.js';
export * from './isolated-diff/git-patch.js';
export * from './isolated-diff/patch-integrator.js';
export * from './isolated-diff/work-unit-executor.js';
export * from './reports.js';
export { SourceAdapterRegistry } from './source-adapter-registry.js';
export type { SqliteBundleIngestStoreOptions } from './sqlite-bundle-ingest-store.js';
@ -652,4 +661,10 @@ export type {
TriageSignals,
UnresolvedCardInfo,
WorkUnit,
DeterministicProjectionContext,
ProjectionResult,
DeterministicFinalizationContext,
FinalizationOverrideReplay,
FinalizationResult,
} from './types.js';
export * from './wiki-body-refs.js';

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,7 @@
import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises';
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { beforeEach, describe, expect, it, vi } from 'vitest';
import { GitService } from '../core/index.js';
import { addTouchedSlSource } from '../tools/index.js';
import { IngestBundleRunner } from './ingest-bundle.runner.js';
import { createMemoryFlowLiveBuffer } from './memory-flow/live-buffer.js';
@ -96,6 +95,7 @@ const makeDeps = () => {
triageSupported: undefined as undefined | boolean,
detect: vi.fn().mockResolvedValue(true),
listTargetConnectionIds: undefined as undefined | ((stagedDir: string) => Promise<string[]>),
finalize: undefined as any,
chunk: vi.fn().mockResolvedValue({
workUnits: [{ unitKey: 'u1', rawFiles: ['a.yml'], peerFileIndex: [], dependencyPaths: [] }],
}),
@ -123,9 +123,16 @@ const makeDeps = () => {
};
const scopedGit = {
revParseHead: vi.fn().mockResolvedValue('h'),
commitFiles: vi.fn(),
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
resetHardTo: vi.fn(),
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
await writeFile(patchPath, '', 'utf-8');
}),
applyPatchFile3WayIndex: vi.fn(),
diffNameStatus: vi.fn().mockResolvedValue([]),
changedPaths: vi.fn().mockResolvedValue([]),
};
const sessionWorktreeService = {
create: vi.fn().mockResolvedValue({
@ -167,10 +174,12 @@ const makeDeps = () => {
loadPrompt: vi.fn().mockResolvedValue('base-framing'),
};
const wikiService = {
forWorktree: vi.fn().mockReturnValue({}),
forWorktree: vi.fn(),
listPageKeys: vi.fn().mockResolvedValue([]),
readPage: vi.fn().mockResolvedValue(null),
syncFromCommit: vi.fn().mockResolvedValue(undefined),
};
wikiService.forWorktree.mockReturnValue(wikiService);
const knowledgeSlRefs = {
syncFromWiki: vi.fn().mockResolvedValue({ inserted: 1, deleted: 0 }),
};
@ -178,7 +187,7 @@ const makeDeps = () => {
listPagesForUser: vi.fn().mockResolvedValue([]),
};
const semanticLayerService = {
forWorktree: vi.fn().mockReturnValue({}),
forWorktree: vi.fn(),
listFilesForConnection: vi
.fn()
.mockImplementation((connectionId: string) =>
@ -193,6 +202,7 @@ const makeDeps = () => {
}),
),
};
semanticLayerService.forWorktree.mockReturnValue(semanticLayerService);
const slSearchService = {
indexSources: vi.fn().mockResolvedValue(undefined),
};
@ -255,8 +265,12 @@ const buildRunner = (deps: ReturnType<typeof makeDeps> = makeDeps(), overrides:
resolveUploadDir: (uploadId) => `/tmp/ktx-test/ingest-uploads/${uploadId}`,
resolvePullDir: (jobId) => `/tmp/ktx-test/ingest-pulls/${jobId}`,
resolveTranscriptDir: (jobId) => `/tmp/ktx-test/run/wu-transcripts/${jobId}`,
resolveTracePath: (jobId) => `/tmp/ktx-test/ingest-traces/${jobId}/trace.jsonl`,
},
settings: {
probeRowCount: 1,
memoryIngestionModel: 'test-model',
},
settings: { probeRowCount: 1, memoryIngestionModel: 'test-model' },
skillsRegistry: deps.skillsRegistry as any,
promptService: deps.promptService as any,
wikiService: deps.wikiService as any,
@ -412,6 +426,127 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
);
});
it('fails before squash when reconciliation leaves a touched wiki page with dangling refs', async () => {
const deps = makeDeps();
let currentToolSession: any = null;
const scopedWiki = {
listPageKeys: vi.fn().mockResolvedValue(['page-a']),
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, key: string) => {
if (key === 'page-a') {
return Promise.resolve({
pageKey: 'page-a',
frontmatter: { summary: 'Page A', usage_mode: 'auto', refs: ['missing-page'] },
content: 'See [[missing-page]].',
});
}
return Promise.resolve(null);
}),
};
deps.wikiService.forWorktree.mockReturnValue(scopedWiki);
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
currentToolSession.actions.push({ target: 'sl', type: 'updated', key: 'orders', detail: 'Orders source' });
}
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
currentToolSession.actions.push({ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' });
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await expect(
runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
}),
).rejects.toThrow(/wiki references target missing page\(s\): page-a -> missing-page/);
expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1');
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
});
it('allows reconciliation to save circular wiki refs once both pages exist', async () => {
const deps = makeDeps();
let currentToolSession: any = null;
const scopedWiki = {
listPageKeys: vi.fn().mockResolvedValue(['page-a', 'page-b']),
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, key: string) => {
if (key === 'page-a') {
return Promise.resolve({
pageKey: 'page-a',
frontmatter: { summary: 'Page A', usage_mode: 'auto', refs: ['page-b'] },
content: 'See [[page-b]].',
});
}
if (key === 'page-b') {
return Promise.resolve({
pageKey: 'page-b',
frontmatter: { summary: 'Page B', usage_mode: 'auto', refs: ['page-a'] },
content: 'See [[page-a]].',
});
}
return Promise.resolve(null);
}),
};
deps.wikiService.forWorktree.mockReturnValue(scopedWiki);
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
currentToolSession.actions.push({ target: 'sl', type: 'updated', key: 'orders', detail: 'Orders source' });
}
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
currentToolSession.actions.push(
{ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' },
{ target: 'wiki', type: 'created', key: 'page-b', detail: 'Page B' },
);
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const result = await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(result.failedWorkUnits).toEqual([]);
expect(deps.gitService.squashMergeIntoMain).toHaveBeenCalled();
expect(deps.runsRepo.markFailed).not.toHaveBeenCalled();
});
it('threads target warehouse connection names into WorkUnit and reconcile tool sessions', async () => {
const deps = makeDeps();
const sessions: any[] = [];
@ -1384,7 +1519,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['explores/b2b/sales_pipeline.json', 'h1']]),
currentHashes: new Map([['a.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/looker-run/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
@ -1441,26 +1576,69 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
);
});
it('runs a registered post-processor before squash, records the outcome, and reindexes touched sources after squash', async () => {
it('runs adapter finalization before squash, records the outcome, and reindexes touched sources', async () => {
const deps = makeDeps();
deps.adapter.source = 'metricflow';
deps.registry.get.mockReturnValue(deps.adapter);
deps.adapter.chunk.mockResolvedValue({
workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }],
workUnits: [],
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
});
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']);
deps.adapter.finalize = vi.fn().mockResolvedValue({
result: { sourcesTouched: 1 },
warnings: ['kept going'],
errors: [],
touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
changedWikiPageKeys: [],
actions: [
{
target: 'sl',
type: 'updated',
key: 'orders',
targetConnectionId: 'warehouse-2',
detail: 'Finalized orders usage',
rawPaths: ['semantic_models.yml'],
},
],
});
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
);
const postProcessor = {
run: vi.fn().mockResolvedValue({
result: { sourcesCreated: 1 },
warnings: ['kept going'],
errors: [],
touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
let head = 'pre-finalization';
const git = {
revParseHead: vi.fn(async () => head),
commitFiles: vi.fn().mockImplementation(async (paths: string[]) => {
if (paths.includes('semantic-layer/warehouse-2/orders.yaml')) {
head = 'post-finalization';
return { created: true, commitHash: 'finalization-sha' };
}
return { created: true, commitHash: head };
}),
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'post-finalization' }),
resetHardTo: vi.fn(),
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
await writeFile(patchPath, '', 'utf-8');
}),
applyPatchFile3WayIndex: vi.fn(),
diffNameStatus: vi.fn().mockImplementation(async (from: string, to: string) =>
from === 'pre-finalization' && to === 'post-finalization'
? [{ status: 'M', path: 'semantic-layer/warehouse-2/orders.yaml' }]
: [],
),
changedPaths: vi.fn().mockResolvedValue(['semantic-layer/warehouse-2/orders.yaml']),
};
const runner = buildRunner(deps, { postProcessors: { metricflow: postProcessor } });
deps.sessionWorktreeService.create.mockResolvedValue({
chatId: 'j1',
workdir: '/tmp/wt',
branch: 'session/j1',
baseSha: 'b',
createdAt: new Date(),
git,
config: {},
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['semantic_models.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/metricflow/s',
@ -1475,26 +1653,29 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(postProcessor.run).toHaveBeenCalledWith({
connectionId: 'c1',
sourceKey: 'metricflow',
syncId: expect.any(String),
jobId: 'j1',
runId: 'run-1',
workdir: '/tmp/wt',
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
});
expect(deps.adapter.finalize).toHaveBeenCalledWith(
expect.objectContaining({
connectionId: 'c1',
sourceKey: 'metricflow',
syncId: expect.any(String),
jobId: 'j1',
runId: 'run-1',
workdir: '/tmp/wt',
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
}),
);
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
postProcessor: {
finalization: expect.objectContaining({
sourceKey: 'metricflow',
status: 'success',
result: { sourcesCreated: 1 },
warnings: ['kept going'],
errors: [],
touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
},
commitSha: 'finalization-sha',
touchedPaths: ['semantic-layer/warehouse-2/orders.yaml'],
derivedTouchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
declaredTouchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
actions: [expect.objectContaining({ key: 'orders' })],
}),
}),
}),
);
@ -1503,7 +1684,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
});
it('includes historic-sql post-processor output in memory-flow saved counts', async () => {
it('includes finalization actions in memory-flow saved counts', async () => {
const deps = makeDeps();
deps.adapter.source = 'historic-sql';
deps.registry.get.mockReturnValue(deps.adapter);
@ -1517,21 +1698,19 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
},
],
});
const postProcessor = {
run: vi.fn().mockResolvedValue({
result: {
tableUsageMerged: 2,
staleTablesMarked: 1,
patternPagesWritten: 3,
stalePatternPagesMarked: 1,
archivedPatternPages: 1,
},
warnings: [],
errors: [],
touchedSources: [{ connectionId: 'c1', sourceName: 'orders' }],
}),
};
const runner = buildRunner(deps, { postProcessors: { 'historic-sql': postProcessor } });
deps.adapter.finalize = vi.fn().mockResolvedValue({
warnings: [],
errors: [],
touchedSources: [],
changedWikiPageKeys: [],
actions: [
{ target: 'sl', type: 'updated', key: 'orders', detail: 'Merged usage' },
{ target: 'sl', type: 'updated', key: 'customers', detail: 'Merged usage' },
{ target: 'wiki', type: 'created', key: 'historic-sql-orders', detail: 'Projected pattern' },
{ target: 'wiki', type: 'updated', key: 'historic-sql-customers', detail: 'Projected pattern' },
],
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['tables/public/orders.json', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/historic-sql/s',
@ -1557,13 +1736,13 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
expect(memoryFlow.snapshot().events).toContainEqual(
expect.objectContaining({
type: 'saved',
wikiCount: 5,
slCount: 3,
wikiCount: 2,
slCount: 2,
}),
);
});
it('marks post-processor infrastructure failure as failed and preserves worktree cleanup state', async () => {
it('marks finalization infrastructure failure as failed and preserves worktree cleanup state', async () => {
const deps = makeDeps();
deps.adapter.source = 'metricflow';
deps.registry.get.mockReturnValue(deps.adapter);
@ -1571,8 +1750,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }],
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
});
const postProcessor = { run: vi.fn().mockRejectedValue(new Error('worktree write failed')) };
const runner = buildRunner(deps, { postProcessors: { metricflow: postProcessor } });
deps.adapter.finalize = vi.fn().mockRejectedValue(new Error('worktree write failed'));
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['semantic_models.yml', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/metricflow/s',
@ -1594,6 +1773,132 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'crash');
});
it('reports finalization actions excluded from provenance when raw paths are not defensible', async () => {
const deps = makeDeps();
deps.adapter.finalize = vi.fn().mockResolvedValue({
warnings: [],
errors: [],
touchedSources: [],
changedWikiPageKeys: [],
actions: [
{ target: 'wiki', type: 'updated', key: 'historic-sql-pattern', detail: 'No raw path' },
{ target: 'sl', type: 'updated', key: 'orders', detail: 'Invalid raw path', rawPaths: ['missing.json'] },
],
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['current.json', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
finalization: expect.objectContaining({
provenanceExclusions: [
expect.objectContaining({ reason: 'missing_raw_paths' }),
expect.objectContaining({ reason: 'raw_path_not_defensible', invalidRawPaths: ['missing.json'] }),
],
}),
}),
}),
);
expect(deps.provenanceRepo.insertMany).not.toHaveBeenCalledWith(
expect.arrayContaining([expect.objectContaining({ rawPath: 'missing.json' })]),
);
});
it('passes explicit override replay metadata and no current work unit outcomes', async () => {
const deps = makeDeps();
deps.reportsRepo.findByJobId.mockResolvedValue({
id: 'prior-report',
runId: 'prior-run',
jobId: 'prior-job',
connectionId: 'c1',
sourceKey: 'fake',
createdAt: '2026-05-18T00:00:00.000Z',
body: {
status: 'completed',
syncId: 'prior-sync',
diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 },
commitSha: 'prior-sha',
workUnits: [
{
unitKey: 'prior-unit',
rawFiles: ['prior.json'],
status: 'success',
actions: [{ target: 'wiki', type: 'created', key: 'prior', detail: 'prior' }],
touchedSlSources: [],
},
],
failedWorkUnits: [],
reconciliationSkipped: false,
conflictsResolved: [],
evictionsApplied: [
{
rawPath: 'do-not-replay.json',
artifactKind: 'wiki',
artifactKey: 'old',
action: 'removed',
reason: 'prior',
},
],
unmappedFallbacks: [],
artifactResolutions: [],
evictionInputs: ['evicted-from-prior-report.json'],
unresolvedCards: [],
supersededBy: null,
overrideOf: null,
provenanceRows: [],
toolTranscripts: [],
},
});
deps.adapter.finalize = vi.fn().mockResolvedValue({
warnings: [],
errors: [],
touchedSources: [],
changedWikiPageKeys: [],
actions: [],
});
deps.gitService.listFilesAtHead.mockResolvedValue(['raw-sources/c1/fake/prior-sync/prior.json']);
deps.gitService.getFileAtCommit.mockResolvedValue('{"id":1}\n');
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
currentHashes: new Map([['prior.json', 'h1']]),
rawDirInWorktree: 'raw-sources/c1/fake/prior-sync',
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/prior');
await runner.run({
jobId: 'override-job',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'manual_override',
bundleRef: { kind: 'override', priorJobId: 'prior-job' },
});
expect(deps.adapter.finalize).toHaveBeenCalledWith(
expect.objectContaining({
workUnitOutcomes: [],
overrideReplay: {
priorJobId: 'prior-job',
priorRunId: 'prior-run',
priorSyncId: 'prior-sync',
evictionRawPaths: ['evicted-from-prior-report.json'],
},
}),
);
});
it('includes existing global wiki pages in WorkUnit prompts', async () => {
const deps = makeDeps();
deps.knowledgeIndex.listPagesForUser.mockResolvedValue([
@ -1851,9 +2156,15 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
const assertError = new Error('Worktree has in-progress git operation (sequencer ...); refusing to proceed');
const sessionGit = {
revParseHead: vi.fn().mockResolvedValue('h'),
commitFiles: vi.fn(),
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
resetHardTo: vi.fn(),
assertWorktreeClean: vi.fn().mockRejectedValue(assertError),
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
await writeFile(patchPath, '', 'utf-8');
}),
applyPatchFile3WayIndex: vi.fn(),
diffNameStatus: vi.fn().mockResolvedValue([]),
};
deps.sessionWorktreeService.create.mockResolvedValue({
chatId: 'j1',
@ -1884,135 +2195,6 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
});
it('squash-merges only successful WUs into main when one WU fails sl_validate', async () => {
const homeDir = await mkdtemp(join(tmpdir(), 'ingest-rollback-'));
try {
const configDir = join(homeDir, 'config');
const mainGit = new GitService({
storage: { configDir, homeDir },
git: {
userName: 'System User',
userEmail: 'system@example.com',
bootstrapMessage: 'Initialize test config repo',
bootstrapAuthor: 'test-system',
bootstrapAuthorEmail: 'system@example.com',
},
});
await mainGit.onModuleInit();
const baseSha = await mainGit.revParseHead();
if (!baseSha) {
throw new Error('no base sha');
}
const deps = makeDeps();
const sessionDir = join(homeDir, '.worktrees', 'session-j1');
const sessionBranch = 'session/j1';
let currentToolSession: any = null;
deps.gitService = mainGit as any;
deps.sessionWorktreeService.create.mockImplementation(async (_jobId: string, startSha: string) => {
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
await mainGit.addWorktree(sessionDir, sessionBranch, startSha);
return {
chatId: 'j1',
workdir: sessionDir,
branch: sessionBranch,
baseSha: startSha,
createdAt: new Date(),
git: mainGit.forWorktree(sessionDir),
config: {},
};
});
deps.sessionWorktreeService.cleanup.mockResolvedValue(undefined);
deps.adapter.chunk.mockResolvedValue({
workUnits: [
{ unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] },
{ unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] },
],
});
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
currentToolSession = toolSession;
return {
toRuntimeTools: vi.fn().mockReturnValue({}),
getAllTools: vi.fn().mockReturnValue([]),
getToolNames: vi.fn().mockReturnValue([]),
};
});
deps.slValidator.validateSingleSource.mockImplementation(
(_validationDeps: unknown, _connectionId: string, sourceName: string) => ({
errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [],
warnings: [],
}),
);
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
const unitKey = params.telemetryTags?.unitKey;
if (unitKey === 'wu-good') {
await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true });
await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'good.yaml'), 'name: good\n');
addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'good');
currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'good', detail: '' });
await currentToolSession.gitService.commitFiles(
['semantic-layer/c1/good.yaml'],
'test: add good source',
'KTX Test',
'system@ktx.local',
);
}
if (unitKey === 'wu-bad') {
await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true });
await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'bad.yaml'), 'name: bad\n');
addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'bad');
currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'bad', detail: '' });
await currentToolSession.gitService.commitFiles(
['semantic-layer/c1/bad.yaml'],
'test: add bad source',
'KTX Test',
'system@ktx.local',
);
}
return { stopReason: 'natural' };
});
const runner = buildRunner(deps);
(runner as any).stageRawFilesStage1 = vi.fn().mockImplementation(async ({ worktreeRoot }: any) => {
const rawDir = join(worktreeRoot, 'raw-sources', 'c1', 'fake', 's');
await mkdir(rawDir, { recursive: true });
await writeFile(join(rawDir, 'good.raw'), 'good raw');
await writeFile(join(rawDir, 'bad.raw'), 'bad raw');
return {
currentHashes: new Map([
['good.raw', 'good-hash'],
['bad.raw', 'bad-hash'],
]),
rawDirInWorktree: 'raw-sources/c1/fake/s',
};
});
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
const result = await runner.run({
jobId: 'j1',
connectionId: 'c1',
sourceKey: 'fake',
trigger: 'upload',
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
});
expect(result.failedWorkUnits).toEqual(['wu-bad']);
expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'good.yaml'), 'utf-8')).toContain('good');
expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'bad.yaml'), 'utf-8').catch(() => null)).toBeNull();
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
expect.objectContaining({
body: expect.objectContaining({
failedWorkUnits: ['wu-bad'],
}),
}),
);
await expect(stat(join(configDir, '.git', 'sequencer'))).rejects.toThrow();
} finally {
await rm(homeDir, { recursive: true, force: true });
}
});
it('fails the run and rethrows when the adapter cannot detect the bundle', async () => {
const deps = makeDeps();
deps.adapter.detect.mockResolvedValue(false);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,85 @@
import { mkdtemp, readFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { describe, expect, it, vi } from 'vitest';
import { FileIngestTraceWriter, ingestTracePathForJob, traceTimed } from './ingest-trace.js';
describe('FileIngestTraceWriter', () => {
it('persists structured trace events as JSONL', async () => {
const root = await mkdtemp(join(tmpdir(), 'ktx-trace-'));
const tracePath = ingestTracePathForJob(root, 'job-1');
const trace = new FileIngestTraceWriter({
tracePath,
jobId: 'job-1',
connectionId: 'metabase-main',
sourceKey: 'metabase',
level: 'debug',
});
await trace.event('debug', 'snapshot', 'input_snapshot', {
baseSha: 'abc123',
rawFileCount: 2,
diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 },
});
const lines = (await readFile(tracePath, 'utf-8'))
.trim()
.split('\n')
.map((line) => JSON.parse(line));
expect(lines).toHaveLength(1);
expect(lines[0]).toMatchObject({
schemaVersion: 1,
jobId: 'job-1',
connectionId: 'metabase-main',
sourceKey: 'metabase',
level: 'debug',
phase: 'snapshot',
event: 'input_snapshot',
data: {
baseSha: 'abc123',
rawFileCount: 2,
diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 },
},
});
expect(typeof lines[0].at).toBe('string');
});
it('records timing and error context for postmortem inspection', async () => {
vi.useFakeTimers();
vi.setSystemTime(new Date('2026-05-17T12:00:00.000Z'));
const root = await mkdtemp(join(tmpdir(), 'ktx-trace-'));
const tracePath = ingestTracePathForJob(root, 'job-2');
const trace = new FileIngestTraceWriter({
tracePath,
jobId: 'job-2',
connectionId: 'c1',
sourceKey: 'fake',
level: 'trace',
});
await expect(
traceTimed(trace, 'integration', 'apply_patch', { unitKey: 'wu-1' }, async () => {
vi.advanceTimersByTime(17);
throw new Error('patch conflict');
}),
).rejects.toThrow('patch conflict');
const lines = (await readFile(tracePath, 'utf-8'))
.trim()
.split('\n')
.map((line) => JSON.parse(line));
expect(lines.map((line) => line.event)).toEqual(['apply_patch_started', 'apply_patch_failed']);
expect(lines[1]).toMatchObject({
level: 'error',
phase: 'integration',
data: { unitKey: 'wu-1' },
error: { name: 'Error', message: 'patch conflict' },
});
expect(lines[1].durationMs).toBe(17);
vi.useRealTimers();
});
it('uses the documented trace path layout', () => {
expect(ingestTracePathForJob('/project/.ktx', 'job-3')).toBe('/project/.ktx/ingest-traces/job-3/trace.jsonl');
});
});

View file

@ -0,0 +1,158 @@
import { appendFile, mkdir } from 'node:fs/promises';
import { dirname, join } from 'node:path';
export type IngestTraceLevel = 'info' | 'debug' | 'trace' | 'error';
const TRACE_LEVEL_RANK: Record<IngestTraceLevel, number> = {
error: 0,
info: 1,
debug: 2,
trace: 3,
};
export interface IngestTraceContext {
tracePath: string;
jobId: string;
connectionId: string;
sourceKey: string;
runId?: string;
syncId?: string;
level?: IngestTraceLevel;
}
export interface IngestTraceEvent {
schemaVersion: 1;
at: string;
level: IngestTraceLevel;
jobId: string;
connectionId: string;
sourceKey: string;
runId?: string;
syncId?: string;
phase: string;
event: string;
durationMs?: number;
data?: Record<string, unknown>;
error?: {
name: string;
message: string;
stack?: string;
};
}
export interface IngestTraceWriter {
readonly tracePath: string;
readonly context: IngestTraceContext;
withContext(context: Partial<Pick<IngestTraceContext, 'runId' | 'syncId'>>): IngestTraceWriter;
event(
level: IngestTraceLevel,
phase: string,
event: string,
data?: Record<string, unknown>,
error?: unknown,
durationMs?: number,
): Promise<void>;
}
export function ingestTracePathForJob(homeDir: string, jobId: string): string {
return join(homeDir, 'ingest-traces', jobId, 'trace.jsonl');
}
function serializeError(error: unknown): IngestTraceEvent['error'] | undefined {
if (error === undefined || error === null) {
return undefined;
}
if (error instanceof Error) {
return {
name: error.name,
message: error.message,
...(error.stack ? { stack: error.stack } : {}),
};
}
return { name: 'Error', message: String(error) };
}
function shouldWrite(configured: IngestTraceLevel, incoming: IngestTraceLevel): boolean {
return TRACE_LEVEL_RANK[incoming] <= TRACE_LEVEL_RANK[configured];
}
export class FileIngestTraceWriter implements IngestTraceWriter {
readonly tracePath: string;
readonly context: IngestTraceContext;
constructor(context: IngestTraceContext) {
this.context = { ...context, level: context.level ?? 'debug' };
this.tracePath = context.tracePath;
}
withContext(context: Partial<Pick<IngestTraceContext, 'runId' | 'syncId'>>): IngestTraceWriter {
return new FileIngestTraceWriter({ ...this.context, ...context, tracePath: this.tracePath });
}
async event(
level: IngestTraceLevel,
phase: string,
event: string,
data?: Record<string, unknown>,
error?: unknown,
durationMs?: number,
): Promise<void> {
if (!shouldWrite(this.context.level ?? 'debug', level)) {
return;
}
const serializedError = serializeError(error);
const payload: IngestTraceEvent = {
schemaVersion: 1,
at: new Date().toISOString(),
level,
jobId: this.context.jobId,
connectionId: this.context.connectionId,
sourceKey: this.context.sourceKey,
...(this.context.runId ? { runId: this.context.runId } : {}),
...(this.context.syncId ? { syncId: this.context.syncId } : {}),
phase,
event,
...(durationMs !== undefined ? { durationMs } : {}),
...(data ? { data } : {}),
...(serializedError ? { error: serializedError } : {}),
};
await mkdir(dirname(this.tracePath), { recursive: true });
await appendFile(this.tracePath, `${JSON.stringify(payload)}\n`, 'utf-8');
}
}
export class NoopIngestTraceWriter implements IngestTraceWriter {
readonly tracePath = '';
readonly context: IngestTraceContext = {
tracePath: '',
jobId: '',
connectionId: '',
sourceKey: '',
level: 'error',
};
withContext(): IngestTraceWriter {
return this;
}
async event(): Promise<void> {}
}
export async function traceTimed<T>(
trace: IngestTraceWriter,
phase: string,
event: string,
data: Record<string, unknown>,
fn: () => Promise<T>,
): Promise<T> {
await trace.event('debug', phase, `${event}_started`, data);
const started = Date.now();
try {
const result = await fn();
await trace.event('debug', phase, `${event}_finished`, data, undefined, Date.now() - started);
return result;
} catch (error) {
await trace.event('error', phase, `${event}_failed`, data, error, Date.now() - started);
throw error;
}
}

View file

@ -0,0 +1,97 @@
import { describe, expect, it } from 'vitest';
import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths, textArtifactRoots } from './git-patch.js';
describe('isolated diff patch contract', () => {
it('parses touched paths from no-rename git patches', () => {
const patch = [
'diff --git a/wiki/global/a.md b/wiki/global/a.md',
'index 1111111..2222222 100644',
'--- a/wiki/global/a.md',
'+++ b/wiki/global/a.md',
'@@ -1 +1 @@',
'-old',
'+new',
'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml',
'new file mode 100644',
'--- /dev/null',
'+++ b/semantic-layer/c1/orders.yaml',
'@@ -0,0 +1 @@',
'+name: orders',
'',
].join('\n');
expect(parsePatchTouchedPaths(patch)).toEqual([
{
path: 'wiki/global/a.md',
oldPath: 'wiki/global/a.md',
newPath: 'wiki/global/a.md',
mode: '100644',
binary: false,
},
{
path: 'semantic-layer/c1/orders.yaml',
oldPath: 'semantic-layer/c1/orders.yaml',
newPath: 'semantic-layer/c1/orders.yaml',
mode: '100644',
binary: false,
},
]);
});
it('rejects semantic-layer paths for slDisallowed work units', () => {
const patch = 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml\nindex 1..2 100644\n';
expect(() =>
assertPatchAllowedForWorkUnit({
unitKey: 'lookml-mismatch',
patch,
slDisallowed: true,
}),
).toThrow(/slDisallowed WorkUnit lookml-mismatch touched semantic-layer\/c1\/orders.yaml/);
});
it('rejects semantic-layer paths outside allowed target connections', () => {
const patch =
'diff --git a/semantic-layer/finance/orders.yaml b/semantic-layer/finance/orders.yaml\nindex 1..2 100644\n';
expect(() =>
assertPatchAllowedForWorkUnit({
unitKey: 'wu-finance',
patch,
slDisallowed: false,
allowedTargetConnectionIds: new Set(['warehouse']),
}),
).toThrow(
/semantic-layer target connection not allowed: semantic-layer\/finance\/orders.yaml \(finance\); allowed: warehouse/,
);
});
it('rejects executable and binary changes under known text artifact roots', () => {
expect(textArtifactRoots).toEqual(['wiki/', 'semantic-layer/']);
const executablePatch =
'diff --git a/wiki/global/a.md b/wiki/global/a.md\nold mode 100644\nnew mode 100755\nindex 1..2\n';
expect(() =>
assertPatchAllowedForWorkUnit({
unitKey: 'wu-1',
patch: executablePatch,
slDisallowed: false,
}),
).toThrow(/unexpected executable mode under wiki\/global\/a.md/);
const binaryPatch = [
'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml',
'index 1111111..2222222 100644',
'GIT binary patch',
'literal 0',
'',
].join('\n');
expect(() =>
assertPatchAllowedForWorkUnit({
unitKey: 'wu-2',
patch: binaryPatch,
slDisallowed: false,
}),
).toThrow(/unexpected binary patch under semantic-layer\/c1\/orders.yaml/);
});
});

View file

@ -0,0 +1,101 @@
import { assertSemanticLayerTargetPathsAllowed } from '../semantic-layer-target-policy.js';
export const textArtifactRoots = ['wiki/', 'semantic-layer/'] as const;
export interface PatchTouchedPath {
path: string;
oldPath: string;
newPath: string;
mode: string | null;
binary: boolean;
}
export interface PatchPolicyInput {
unitKey: string;
patch: string;
slDisallowed: boolean;
allowedTargetConnectionIds?: ReadonlySet<string>;
}
function stripPrefix(path: string): string {
return path.replace(/^[ab]\//, '');
}
function isTextArtifactPath(path: string): boolean {
return textArtifactRoots.some((root) => path.startsWith(root));
}
export function parsePatchTouchedPaths(patch: string): PatchTouchedPath[] {
const lines = patch.split('\n');
const entries: PatchTouchedPath[] = [];
let current: PatchTouchedPath | null = null;
const pushCurrent = () => {
if (current) {
entries.push(current);
}
};
for (const line of lines) {
const diffMatch = /^diff --git (.+) (.+)$/.exec(line);
if (diffMatch) {
pushCurrent();
const oldPath = stripPrefix(diffMatch[1] ?? '');
const newPath = stripPrefix(diffMatch[2] ?? '');
current = {
path: newPath === '/dev/null' ? oldPath : newPath,
oldPath,
newPath,
mode: null,
binary: false,
};
continue;
}
if (!current) {
continue;
}
const indexMode = /^index [0-9a-f]+\.\.[0-9a-f]+(?: ([0-7]{6}))?$/.exec(line);
if (indexMode?.[1]) {
current.mode = indexMode[1];
}
const newMode = /^new mode ([0-7]{6})$/.exec(line);
if (newMode) {
current.mode = newMode[1] ?? current.mode;
}
const newFileMode = /^new file mode ([0-7]{6})$/.exec(line);
if (newFileMode) {
current.mode = newFileMode[1] ?? current.mode;
}
if (line === 'GIT binary patch' || line.startsWith('Binary files ')) {
current.binary = true;
}
}
pushCurrent();
return entries;
}
export function assertPatchAllowedForWorkUnit(input: PatchPolicyInput): PatchTouchedPath[] {
const touched = parsePatchTouchedPaths(input.patch);
if (input.allowedTargetConnectionIds) {
assertSemanticLayerTargetPathsAllowed({
paths: touched.map((entry) => entry.path),
allowedConnectionIds: input.allowedTargetConnectionIds,
});
}
for (const entry of touched) {
if (input.slDisallowed && entry.path.startsWith('semantic-layer/')) {
throw new Error(`slDisallowed WorkUnit ${input.unitKey} touched ${entry.path}`);
}
if (!isTextArtifactPath(entry.path)) {
continue;
}
if (entry.binary) {
throw new Error(`unexpected binary patch under ${entry.path}`);
}
if (entry.mode && entry.mode !== '100644') {
throw new Error(`unexpected executable mode under ${entry.path}: ${entry.mode}`);
}
}
return touched;
}

Some files were not shown because too many files have changed in this diff Show more