mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
Merge commit 'c513d61dca' into next
This commit is contained in:
commit
69535f86c9
158 changed files with 29523 additions and 1053 deletions
|
|
@ -70,6 +70,7 @@ Agent integration ready: yes (codex:project)
|
|||
| `ktx sl search "revenue"` | Search semantic-layer sources |
|
||||
| `ktx sl validate <source> --connection-id <id>` | Validate a semantic source |
|
||||
| `ktx sl query --measure <measure> --format sql` | Compile semantic-layer SQL |
|
||||
| `ktx sql --connection <id> "select 1"` | Execute read-only SQL |
|
||||
| `ktx wiki search "revenue definition"` | Search local wiki context |
|
||||
| `ktx mcp start` | Start the local MCP server for agent clients |
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ ktx
|
|||
search <query>
|
||||
validate <sourceName>
|
||||
query
|
||||
sql
|
||||
status
|
||||
mcp
|
||||
start
|
||||
|
|
@ -79,6 +80,9 @@ ktx ingest --all
|
|||
ktx sl search "revenue"
|
||||
ktx wiki search "revenue recognition"
|
||||
|
||||
# Execute read-only SQL
|
||||
ktx sql --connection warehouse "select count(*) from public.orders"
|
||||
|
||||
# Start the local MCP server for agent clients
|
||||
ktx mcp start
|
||||
```
|
||||
|
|
|
|||
|
|
@ -31,6 +31,9 @@ ktx dev <subcommand> [options]
|
|||
|
||||
## `dev schema`
|
||||
|
||||
`ktx dev schema` does not require a `ktx.yaml` file or a configured project
|
||||
directory. Use it from any directory to generate editor or agent schema files.
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--output <file>` | Write the schema to a file instead of stdout | — |
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ connections when you use `--all`.
|
|||
| `--query-history-window-days <days>` | BigQuery/Snowflake query-history lookback window for this run | Stored connection default |
|
||||
| `--plain` | Print plain text output | `true` |
|
||||
| `--json` | Print JSON output | `false` |
|
||||
| `--yes` | Install required managed runtime features without prompting | `false` |
|
||||
| `--no-input` | Disable interactive terminal input | — |
|
||||
|
||||
`--fast` and `--deep` are mutually exclusive. Depth flags apply only to
|
||||
|
|
@ -44,6 +45,12 @@ requires deep ingest readiness.
|
|||
When `--all` selects both databases and context sources, database ingest runs
|
||||
first, then source ingest and memory updates run for source connections.
|
||||
|
||||
Some ingest paths use the managed KTX Python runtime. Query-history ingest uses
|
||||
it for SQL analysis, and Looker source ingest uses it for Looker identifier
|
||||
parsing. In an interactive terminal, `ktx ingest` prompts before installing the
|
||||
required runtime features. Use `--yes` to install them without prompting, or
|
||||
use `--no-input` to fail fast with install guidance.
|
||||
|
||||
## `ktx ingest text` Options
|
||||
|
||||
Use `ktx ingest text` to capture free-form text artifacts into KTX memory.
|
||||
|
|
@ -104,6 +111,41 @@ notion skipped skipped done done
|
|||
Use `--json` when a script or agent needs the selected plan and per-target
|
||||
results.
|
||||
|
||||
## Inspect source ingest traces
|
||||
|
||||
Source ingest writes persistent JSONL traces for postmortem debugging. Plain
|
||||
ingest output prints the trace path near the report, run, and job identifiers
|
||||
when a trace is available:
|
||||
|
||||
```text
|
||||
Report: report-abc123
|
||||
Run: run-abc123
|
||||
Job: job-abc123
|
||||
Trace: .ktx/ingest-traces/job-abc123/trace.jsonl
|
||||
```
|
||||
|
||||
The trace file lives under the project directory at
|
||||
`.ktx/ingest-traces/<jobId>/trace.jsonl`. Each line is a JSON event with the
|
||||
job id, run id, sync id, connection id, source key, phase, event name, timing,
|
||||
state snapshot, decision context, and error details. Failed runs also write a
|
||||
stored ingest report with `status: "failed"`, `failure.phase`,
|
||||
`failure.message`, and the same trace path.
|
||||
|
||||
Use `jq` or line-oriented tools to inspect a trace:
|
||||
|
||||
```bash
|
||||
jq -c '. | {at, level, phase, event, durationMs, data, error}' \
|
||||
.ktx/ingest-traces/<jobId>/trace.jsonl
|
||||
```
|
||||
|
||||
KTX writes `debug` trace events by default. Set `KTX_INGEST_TRACE_LEVEL` to
|
||||
`error`, `info`, `debug`, or `trace` before running ingest to change the trace
|
||||
verbosity:
|
||||
|
||||
```bash
|
||||
KTX_INGEST_TRACE_LEVEL=trace ktx ingest metabase
|
||||
```
|
||||
|
||||
## Common errors
|
||||
|
||||
| Error | Cause | Recovery |
|
||||
|
|
@ -111,6 +153,7 @@ results.
|
|||
| Connection not configured | The connection id is not present in `ktx.yaml` | Add the connection with `ktx setup` or update `ktx.yaml` |
|
||||
| Deep readiness is missing | `--deep` or query history needs model, embedding, and scan-enrichment configuration | Run `ktx setup` or rerun with `--fast` |
|
||||
| Query history is unsupported | The selected database driver does not support query history | Run schema ingest without query-history flags |
|
||||
| Python runtime is missing | The selected ingest target needs runtime-backed SQL analysis or source parsing | Accept the interactive prompt, rerun with `--yes`, or run the suggested `ktx dev runtime install` command |
|
||||
| No ingest target was selected | No connection id was provided and `--all` was omitted | Run `ktx ingest <connectionId>` or `ktx ingest --all` |
|
||||
| Source options were ignored | Depth and query-history flags were supplied for a non-database source | Omit database-only flags when ingesting source connections |
|
||||
| Text ingest stops early | `--fail-fast` was used and one item failed | Fix the failed item or rerun without `--fail-fast` to collect all failures |
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@ description: "Set up or resume a local KTX project."
|
|||
|
||||
`ktx setup` is the guided configuration flow for a local KTX project. It can
|
||||
create or resume `ktx.yaml`, configure LLM and embedding providers, add
|
||||
database and context-source connections, build initial context, and install
|
||||
agent integrations.
|
||||
database and context-source connections, prepare required runtime features,
|
||||
build initial context, and install agent integrations.
|
||||
|
||||
When you run bare `ktx` in an interactive terminal outside any KTX project, the
|
||||
CLI starts this same setup flow. Inside an existing project, `ktx setup`
|
||||
|
|
@ -79,6 +79,23 @@ of Anthropic API key or Vertex flags. For Claude Code, `--llm-model` accepts
|
|||
`sentence-transformers` uses the KTX-managed Python runtime. Choose only one
|
||||
embedding credential source.
|
||||
|
||||
### Runtime
|
||||
|
||||
Setup prepares the managed Python runtime when your selected configuration
|
||||
needs it. The runtime step runs after database and source setup and before the
|
||||
initial context build.
|
||||
|
||||
KTX prepares the `core` runtime feature when agent integration, query-history
|
||||
ingest, Looker source ingest, or daemon-backed context build paths need it. KTX
|
||||
prepares the `local-embeddings` runtime feature when you choose managed local
|
||||
`sentence-transformers` embeddings. Existing external daemon URLs, such as
|
||||
`KTX_DAEMON_URL` or `KTX_SQL_ANALYSIS_URL`, satisfy the matching dependency and
|
||||
skip managed runtime installation for that dependency.
|
||||
|
||||
Interactive setup prompts before installing runtime features. Use `--yes` to
|
||||
install them without prompting. Use `--no-input` to fail fast when required
|
||||
runtime features are missing.
|
||||
|
||||
### Databases
|
||||
|
||||
| Flag | Description |
|
||||
|
|
@ -197,6 +214,7 @@ LLM ready: yes (claude-sonnet-4-6)
|
|||
Embeddings ready: yes (text-embedding-3-small)
|
||||
Databases configured: yes (postgres-warehouse)
|
||||
Context sources configured: yes (dbt-main)
|
||||
Runtime ready: yes (core)
|
||||
KTX context built: yes
|
||||
Agent integration ready: yes (codex:project)
|
||||
```
|
||||
|
|
@ -210,6 +228,7 @@ Use `ktx status` for repeatable readiness checks after setup exits.
|
|||
| Setup resumes an unexpected project | `KTX_PROJECT_DIR` or nearest `ktx.yaml` points to another directory | Pass `--project-dir <path>` explicitly |
|
||||
| Setup cannot run in CI | Required values are missing and `--no-input` disables prompts | Provide the relevant automation flags or create a fixture `ktx.yaml` |
|
||||
| Provider health check fails | Provider key, model id, Vertex project, or Vertex location is invalid | Fix the `env:` or `file:` reference and rerun setup |
|
||||
| Python runtime is missing | The selected setup needs runtime-backed agent, query-history, Looker, or local embedding features | Accept the interactive prompt, rerun with `--yes`, or run the suggested `ktx dev runtime install` command |
|
||||
| `--enable-query-history` is rejected | The selected database driver does not support query history | Use Postgres, BigQuery, or Snowflake, or rerun without query-history flags |
|
||||
| Source setup rejects location flags | Both `--source-path` and `--source-git-url` were supplied | Choose the local path or the Git URL, not both |
|
||||
| Agent integration missing | Setup skipped the agents step | Run `ktx setup --agents --target <target>` |
|
||||
|
|
|
|||
|
|
@ -141,6 +141,9 @@ Semantic-layer list and search commands return human-readable output by
|
|||
default. Use `--json` on `list` or `search` when an agent needs structured
|
||||
output. Use `--format sql` on `query` to inspect generated SQL before
|
||||
execution, or leave `--format json` for the compiled query and optional rows.
|
||||
Pretty `sl search` output shows `#1`, `#2`, and later rank badges for the
|
||||
displayed results. Plain and JSON output keep the raw `score` value, which is a
|
||||
ranking score rather than a percentage.
|
||||
|
||||
```json
|
||||
{
|
||||
|
|
|
|||
103
docs-site/content/docs/cli-reference/ktx-sql.mdx
Normal file
103
docs-site/content/docs/cli-reference/ktx-sql.mdx
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
---
|
||||
title: "ktx sql"
|
||||
description: "Execute parser-validated read-only SQL against a configured connection."
|
||||
---
|
||||
|
||||
Run read-only SQL against a database connection in your KTX project. The command
|
||||
validates the statement before execution and only accepts a single `SELECT` or
|
||||
`WITH` query.
|
||||
|
||||
## Command signature
|
||||
|
||||
Use `ktx sql` with a required connection id and positional SQL text.
|
||||
|
||||
```bash
|
||||
ktx sql --connection <id> [options] <sql...>
|
||||
```
|
||||
|
||||
## Options
|
||||
|
||||
Use output flags to choose between terminal display, TSV rows, and structured
|
||||
JSON.
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `-c`, `--connection <id>` | KTX database connection id. Required. | - |
|
||||
| `--max-rows <n>` | Maximum rows to return. Must be between `1` and `10000`. | `1000` |
|
||||
| `--output <mode>` | Output mode: `pretty`, `plain` (TSV), or `json`. | `pretty` |
|
||||
| `--json` | Shortcut for `--output=json` (overrides `--output`). | `false` |
|
||||
|
||||
## Examples
|
||||
|
||||
Quote SQL in shell scripts and when the query contains spaces or punctuation.
|
||||
|
||||
```bash
|
||||
# Count rows in a table
|
||||
ktx sql --connection warehouse "select count(*) from public.orders"
|
||||
|
||||
# Return a small result set
|
||||
ktx sql \
|
||||
--connection warehouse \
|
||||
--max-rows 25 \
|
||||
"select id, status from public.orders order by created_at desc"
|
||||
|
||||
# Print JSON for agents or scripts
|
||||
ktx sql \
|
||||
--connection warehouse \
|
||||
--json \
|
||||
"select status, count(*) from public.orders group by status"
|
||||
|
||||
# Print TSV rows
|
||||
ktx sql \
|
||||
-c warehouse \
|
||||
--output plain \
|
||||
"select id, status from public.orders"
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
Pretty output prints aligned columns and a final row count.
|
||||
|
||||
```text
|
||||
status count
|
||||
------ -----
|
||||
paid 42
|
||||
open 7
|
||||
|
||||
2 rows
|
||||
```
|
||||
|
||||
Plain output prints a TSV header row followed by TSV data rows.
|
||||
|
||||
```text
|
||||
status count
|
||||
paid 42
|
||||
open 7
|
||||
```
|
||||
|
||||
JSON output preserves connection id, headers, optional header types, rows, and
|
||||
row count.
|
||||
|
||||
```json
|
||||
{
|
||||
"connectionId": "warehouse",
|
||||
"headers": ["status", "count"],
|
||||
"headerTypes": ["text", "bigint"],
|
||||
"rows": [
|
||||
["paid", 42],
|
||||
["open", 7]
|
||||
],
|
||||
"rowCount": 2
|
||||
}
|
||||
```
|
||||
|
||||
## Common errors
|
||||
|
||||
Use the error text to distinguish validation failures from connection failures.
|
||||
|
||||
| Error | Cause | Recovery |
|
||||
|-------|-------|----------|
|
||||
| `Only one SQL statement can be executed.` | The SQL text contains multiple statements. | Run one query at a time. |
|
||||
| `SQL contains read/write operation` | The statement is not read-only. | Use a single `SELECT` or `WITH` query. |
|
||||
| `Connection "<id>" is not configured in ktx.yaml` | The connection id is wrong or missing from the project. | Run `ktx connection list` and retry with an exact id. |
|
||||
| `does not support read-only SQL execution` | The connection type has no local SQL executor. | Use a supported database connection or query through MCP where available. |
|
||||
|
|
@ -43,6 +43,12 @@ need to add or update wiki knowledge.
|
|||
| `--output <mode>` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` |
|
||||
| `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` |
|
||||
|
||||
`wiki search` uses hybrid search when `storage.search` is `sqlite-fts5`. KTX
|
||||
combines lexical SQLite FTS5 matches, token matches, and semantic matches from
|
||||
wiki page embeddings stored in `.ktx/db.sqlite`. If embeddings are not
|
||||
configured or the embedding backend is unavailable, KTX skips the semantic lane
|
||||
and keeps lexical and token results.
|
||||
|
||||
## Examples
|
||||
|
||||
```bash
|
||||
|
|
@ -60,14 +66,21 @@ ktx wiki search "monthly recurring revenue" --json --limit 10
|
|||
|
||||
# Print search results as TSV
|
||||
ktx wiki search "monthly recurring revenue" --output plain
|
||||
|
||||
# Inspect which search lanes were used
|
||||
ktx --debug wiki search "monthly recurring revenue" --json
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
Wiki commands print clack-style pretty output in a TTY and TSV-style plain
|
||||
output when requested. JSON output wraps the items with a command metadata
|
||||
envelope. Open the matching Markdown files directly when you need the full page
|
||||
contents.
|
||||
envelope. Search results include `matchReasons` and `lanes` metadata so you can
|
||||
see whether lexical, token, or semantic search contributed to the ranking. Open
|
||||
the matching Markdown files directly when you need the full page contents.
|
||||
Pretty search output shows `#1`, `#2`, and later rank badges for the displayed
|
||||
results. Plain and JSON output keep the raw `score` value, which is a ranking
|
||||
score rather than a percentage.
|
||||
|
||||
```json
|
||||
{
|
||||
|
|
@ -77,16 +90,49 @@ contents.
|
|||
{
|
||||
"key": "revenue-definitions",
|
||||
"summary": "Canonical revenue metric definitions",
|
||||
"score": 0.92
|
||||
"score": 0.92,
|
||||
"matchReasons": ["lexical", "semantic"],
|
||||
"lanes": [
|
||||
{
|
||||
"lane": "lexical",
|
||||
"status": "available",
|
||||
"requestedCandidatePoolLimit": 25,
|
||||
"effectiveCandidatePoolLimit": 25,
|
||||
"returnedCandidateCount": 3,
|
||||
"weight": 1.5
|
||||
},
|
||||
{
|
||||
"lane": "semantic",
|
||||
"status": "available",
|
||||
"requestedCandidatePoolLimit": 25,
|
||||
"effectiveCandidatePoolLimit": 25,
|
||||
"returnedCandidateCount": 8,
|
||||
"weight": 3
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"meta": {
|
||||
"command": "wiki search"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When you pass the global `--debug` flag, KTX writes search diagnostics to
|
||||
stderr and leaves stdout unchanged. This is useful with `--json` because stdout
|
||||
stays machine-readable:
|
||||
|
||||
```text
|
||||
[debug] wiki search mode=sqlite-fts5 embedding=configured results=2
|
||||
[debug] wiki search lane=lexical status=available returned=1 weight=1.5
|
||||
[debug] wiki search lane=token status=available returned=1 weight=0.75
|
||||
[debug] wiki search lane=semantic status=available returned=2 weight=3
|
||||
```
|
||||
|
||||
## Common errors
|
||||
|
||||
| Error | Cause | Recovery |
|
||||
|-------|-------|----------|
|
||||
| Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms, then create a page if the knowledge is missing |
|
||||
| Search returns no results | The query terms do not match summaries, tags, or content, and the semantic lane is unavailable or has no positive matches | Run with `--debug`, check the semantic lane status, retry with business synonyms, then create a page if the knowledge is missing |
|
||||
| A page is missing | No Markdown file exists for that business context | Add a file under `wiki/` or run `ktx ingest <connectionId>` |
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
"ktx-connection",
|
||||
"ktx-ingest",
|
||||
"ktx-sl",
|
||||
"ktx-sql",
|
||||
"ktx-wiki",
|
||||
"ktx-status",
|
||||
"ktx-mcp",
|
||||
|
|
|
|||
|
|
@ -63,9 +63,9 @@ Setup supports three LLM provider paths:
|
|||
|
||||
| Provider | Use when | Credential model |
|
||||
|----------|----------|------------------|
|
||||
| Anthropic API | You have an Anthropic API key | `ANTHROPIC_API_KEY` or a local `file:` secret |
|
||||
| Claude subscription (Pro/Max) | You want KTX to use your local Claude Code session | Claude Code local authentication |
|
||||
| Anthropic API key | You have an Anthropic API key | `ANTHROPIC_API_KEY` or a local `file:` secret |
|
||||
| Google Vertex AI for Anthropic Claude | Your organization runs Claude through Google Cloud | Application Default Credentials plus Vertex project and location |
|
||||
| Claude Code | You want KTX to use your local Claude Code session | Claude Code local authentication |
|
||||
|
||||
For Anthropic API, setup can read the key from the environment or save a pasted
|
||||
key to `.ktx/secrets/anthropic-api-key`. `ktx.yaml` stores an `env:` or `file:`
|
||||
|
|
|
|||
|
|
@ -24,10 +24,11 @@ other branches.
|
|||
|
||||
Before you publish, confirm these requirements:
|
||||
|
||||
- The repository has an Actions secret named `NPM_TOKEN`.
|
||||
- `NPM_TOKEN` is a granular npm token that can publish `@kaelio/ktx`.
|
||||
- The token can publish non-interactively if the npm account or package uses
|
||||
two-factor authentication for writes.
|
||||
- npm Trusted Publishing is configured for `@kaelio/ktx`.
|
||||
- The trusted publisher points at the `Kaelio/ktx` repository and the
|
||||
`.github/workflows/release.yml` workflow.
|
||||
- The workflow keeps `id-token: write` permission so npm can verify the
|
||||
GitHub Actions run through OpenID Connect.
|
||||
- The repository has a baseline semantic-release tag for the latest published
|
||||
package version, such as `v0.1.0-rc.1`.
|
||||
|
||||
|
|
@ -99,8 +100,17 @@ The artifact packaging and readiness scripts read `publicNpmPackageVersion`
|
|||
from `release-policy.json`, so manual version edits in build scripts aren't
|
||||
needed for rc releases.
|
||||
|
||||
## Trusted Publishing follow-up
|
||||
The bundled Python runtime wheel also derives its version from
|
||||
`publicNpmPackageVersion`. Stable npm versions are reused as-is, and rc
|
||||
versions are normalized to Python's version format. For example,
|
||||
`0.1.0-rc.2` becomes `0.1.0rc2` in the `kaelio-ktx` wheel filename and wheel
|
||||
metadata.
|
||||
|
||||
This workflow uses `NPM_TOKEN` today. Move to npm Trusted Publishing after the
|
||||
final publish command path is verified for the package manager and workflow
|
||||
filename configured in npm package settings.
|
||||
## npm authentication
|
||||
|
||||
The release workflow publishes through npm Trusted Publishing. It doesn't use
|
||||
an `NPM_TOKEN` secret, and the publish step doesn't set `NODE_AUTH_TOKEN`.
|
||||
|
||||
If npm returns an authentication error, check the Trusted Publishing settings
|
||||
for the `@kaelio/ktx` package before adding token-based authentication back to
|
||||
the workflow.
|
||||
|
|
|
|||
2938
docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-core.md
Normal file
2938
docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-v1-core.md
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,493 @@
|
|||
# Isolated Diff Ingestion V1 Global Wiki Reference Gate Closure Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use
|
||||
> superpowers:subagent-driven-development (recommended) or
|
||||
> superpowers:executing-plans to implement this plan task-by-task. Steps use
|
||||
> checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Reject final trees where an isolated-diff run changes semantic-layer
|
||||
sources or deletes wiki pages and leaves pre-existing wiki pages with stale
|
||||
body, `sl_refs`, frontmatter `refs`, or inline `[[page-key]]` references.
|
||||
|
||||
**Architecture:** Keep `artifact-gates.ts` validation-only. The runner expands
|
||||
the final wiki gate scope before the existing final artifact gate: changed pages
|
||||
are always validated, and all global wiki pages are validated when the run
|
||||
changes any semantic-layer source or removes any wiki page. The final-gate trace
|
||||
records the expanded scope and why it was expanded.
|
||||
|
||||
**Tech Stack:** TypeScript, Vitest, pnpm workspace commands, existing
|
||||
`IngestBundleRunner`, `KnowledgeWikiService`, and isolated-diff test fixtures.
|
||||
|
||||
---
|
||||
|
||||
## Audit Summary
|
||||
|
||||
The implemented isolated-diff plans cover the core v1 flow: child worktrees,
|
||||
binary no-rename patch proposals, `git apply --3way --index`, policy rejection,
|
||||
final gates after reconciliation and repair, pre-squash provenance raw-path
|
||||
validation, target-connection enforcement, failed reports, and persistent JSONL
|
||||
traces.
|
||||
|
||||
One v1-blocking correctness gap remains. Final wiki gates currently validate
|
||||
wiki pages changed by the run. They do not validate unchanged pages that become
|
||||
invalid because the run changes a semantic-layer source or deletes a referenced
|
||||
wiki page. Two concrete failures can therefore squash into main:
|
||||
|
||||
- A pre-existing wiki page body contains
|
||||
`` `mart_account_segments.total_contract_arr_cents` `` while the run updates
|
||||
`semantic-layer/warehouse/mart_account_segments.yaml` to define only
|
||||
`total_contract_arr`.
|
||||
- A pre-existing wiki page has `refs: [source-page]` or `[[source-page]]` while
|
||||
the run deletes `wiki/global/source-page.md`.
|
||||
|
||||
This plan does not expand connector rollout, promote isolated diffs to the
|
||||
default, add interactive resolution, add semantic auto-merge, remove the old
|
||||
path, expand transitive semantic-layer dependencies, or move provenance into
|
||||
files.
|
||||
|
||||
## File Structure
|
||||
|
||||
- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`.
|
||||
Adds two failing end-to-end regressions for unchanged wiki pages made stale by
|
||||
semantic-layer changes and wiki-page deletion.
|
||||
- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`.
|
||||
Adds a final wiki gate scope helper, expands validation to all global wiki
|
||||
pages when final state changes can invalidate unchanged references, and records
|
||||
scope details in the final-gate trace and failed report.
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Add failing unchanged wiki regressions
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
|
||||
- [ ] **Step 1: Add the stale existing wiki body regression**
|
||||
|
||||
Insert this test inside `describe('IngestBundleRunner isolated diff path', ...)`
|
||||
after the existing Metabase stale-measure regression:
|
||||
|
||||
```ts
|
||||
it('rejects unchanged wiki body refs made stale by isolated semantic-layer changes', async () => {
|
||||
const runtime = await makeRealGitRuntime();
|
||||
try {
|
||||
await mkdir(join(runtime.configDir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(
|
||||
join(runtime.configDir, 'semantic-layer/warehouse/mart_account_segments.yaml'),
|
||||
'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr_cents\n expr: sum(contract_arr)\n',
|
||||
);
|
||||
await writeFile(
|
||||
join(runtime.configDir, 'wiki/global/account-segments.md'),
|
||||
'---\nsummary: Account segments\nusage_mode: auto\n---\n\nExisting ARR uses `mart_account_segments.total_contract_arr_cents`.\n',
|
||||
);
|
||||
await runtime.git.commitFiles(
|
||||
['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'],
|
||||
'seed existing wiki body ref',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
const preRunHead = await runtime.git.revParseHead();
|
||||
|
||||
const { deps, adapter } = makeDeps(runtime);
|
||||
adapter.chunk.mockResolvedValue({
|
||||
workUnits: [{ unitKey: 'source-only', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }],
|
||||
});
|
||||
|
||||
let currentSession: any = null;
|
||||
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
|
||||
currentSession = toolSession;
|
||||
return { toRuntimeTools: vi.fn(() => ({})) };
|
||||
});
|
||||
deps.agentRunner.runLoop = vi.fn(async () => {
|
||||
const root = rootOfConfig(currentSession.configService, runtime.configDir);
|
||||
await writeFile(
|
||||
join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'),
|
||||
'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n',
|
||||
);
|
||||
addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments');
|
||||
currentSession.actions.push({
|
||||
target: 'sl',
|
||||
type: 'updated',
|
||||
key: 'mart_account_segments',
|
||||
detail: 'Rename ARR measure',
|
||||
targetConnectionId: 'warehouse',
|
||||
rawPaths: ['cards/source.json'],
|
||||
});
|
||||
await currentSession.gitService.commitFiles(
|
||||
['semantic-layer/warehouse/mart_account_segments.yaml'],
|
||||
'wu source rename',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
return { stopReason: 'natural' };
|
||||
}) as never;
|
||||
|
||||
const runner = new IngestBundleRunner(deps);
|
||||
await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]);
|
||||
|
||||
await expect(
|
||||
runner.run({
|
||||
jobId: 'job-existing-body-stale',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload' },
|
||||
}),
|
||||
).rejects.toThrow(/total_contract_arr_cents/);
|
||||
|
||||
expect(await runtime.git.revParseHead()).toBe(preRunHead);
|
||||
const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-existing-body-stale/trace.jsonl'), 'utf-8');
|
||||
expect(trace).toContain('final_artifact_gates_failed');
|
||||
expect(trace).toContain('account-segments');
|
||||
expect(trace).toContain('semantic_layer_changed');
|
||||
expect(trace).toContain('ingest_failed');
|
||||
expect(trace).toContain('failure_report_created');
|
||||
expect(trace).not.toContain('squash_finished');
|
||||
} finally {
|
||||
await rm(runtime.homeDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Add the stale existing wiki page-reference regression**
|
||||
|
||||
Insert this test near the existing final wiki reference regression:
|
||||
|
||||
```ts
|
||||
it('rejects unchanged inbound wiki refs broken by an isolated wiki deletion', async () => {
|
||||
const runtime = await makeRealGitRuntime();
|
||||
try {
|
||||
await mkdir(join(runtime.configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(
|
||||
join(runtime.configDir, 'wiki/global/source-page.md'),
|
||||
'---\nsummary: Source page\nusage_mode: auto\n---\n\nSource page\n',
|
||||
);
|
||||
await writeFile(
|
||||
join(runtime.configDir, 'wiki/global/account-segments.md'),
|
||||
'---\nsummary: Account segments\nusage_mode: auto\nrefs:\n - source-page\n---\n\nSee [[source-page]].\n',
|
||||
);
|
||||
await runtime.git.commitFiles(
|
||||
['wiki/global/source-page.md', 'wiki/global/account-segments.md'],
|
||||
'seed inbound wiki refs',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
const preRunHead = await runtime.git.revParseHead();
|
||||
|
||||
const { deps, adapter } = makeDeps(runtime);
|
||||
adapter.chunk.mockResolvedValue({
|
||||
workUnits: [{ unitKey: 'delete-target-page', rawFiles: ['pages/delete.json'], peerFileIndex: [], dependencyPaths: [] }],
|
||||
});
|
||||
|
||||
let currentSession: any = null;
|
||||
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
|
||||
currentSession = toolSession;
|
||||
return { toRuntimeTools: vi.fn(() => ({})) };
|
||||
});
|
||||
deps.agentRunner.runLoop = vi.fn(async () => {
|
||||
const root = rootOfConfig(currentSession.configService, runtime.configDir);
|
||||
await rm(join(root, 'wiki/global/source-page.md'), { force: true });
|
||||
currentSession.actions.push({
|
||||
target: 'wiki',
|
||||
type: 'removed',
|
||||
key: 'source-page',
|
||||
detail: 'Delete referenced page',
|
||||
rawPaths: ['pages/delete.json'],
|
||||
});
|
||||
await currentSession.gitService.commitFiles(
|
||||
['wiki/global/source-page.md'],
|
||||
'wu delete target page',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
return { stopReason: 'natural' };
|
||||
}) as never;
|
||||
|
||||
const runner = new IngestBundleRunner(deps);
|
||||
await mockStageRawFiles(runner, runtime, [['pages/delete.json', 'h1']]);
|
||||
|
||||
await expect(
|
||||
runner.run({
|
||||
jobId: 'job-existing-wiki-ref-stale',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload' },
|
||||
}),
|
||||
).rejects.toThrow(/wiki references target missing page\(s\): account-segments -> source-page/);
|
||||
|
||||
expect(await runtime.git.revParseHead()).toBe(preRunHead);
|
||||
const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-existing-wiki-ref-stale/trace.jsonl'), 'utf-8');
|
||||
expect(trace).toContain('final_artifact_gates_failed');
|
||||
expect(trace).toContain('account-segments -> source-page');
|
||||
expect(trace).toContain('wiki_page_removed');
|
||||
expect(trace).toContain('ingest_failed');
|
||||
expect(trace).toContain('failure_report_created');
|
||||
expect(trace).not.toContain('squash_finished');
|
||||
} finally {
|
||||
await rm(runtime.homeDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run the focused regressions and verify they fail**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unchanged wiki body refs|unchanged inbound wiki refs"
|
||||
```
|
||||
|
||||
Expected: FAIL. The stale body test currently squashes successfully because the
|
||||
unchanged `account-segments` page is not in `finalChangedWikiPageKeys`. The
|
||||
inbound wiki ref test currently squashes successfully because the deleted
|
||||
`source-page` is validated as a missing changed page and skipped, while the
|
||||
unchanged page that references it is never validated.
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Expand the final wiki validation scope
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts`
|
||||
|
||||
- [ ] **Step 1: Add final wiki gate scope helpers**
|
||||
|
||||
Add these private methods after `uniqueTouchedSlSources()`:
|
||||
|
||||
```ts
|
||||
private removedWikiPageKeysFromActions(actions: MemoryAction[]): string[] {
|
||||
return this.uniqueWikiPageKeys(
|
||||
actions.filter((action) => action.target === 'wiki' && action.type === 'removed').map((action) => action.key),
|
||||
);
|
||||
}
|
||||
|
||||
private async wikiPageKeysForFinalGates(input: {
|
||||
wikiService: ReturnType<KnowledgeWikiService['forWorktree']>;
|
||||
changedWikiPageKeys: string[];
|
||||
touchedSlSources: TouchedSlSource[];
|
||||
actions: MemoryAction[];
|
||||
}): Promise<{
|
||||
pageKeys: string[];
|
||||
trace: {
|
||||
global: boolean;
|
||||
reasons: string[];
|
||||
changedWikiPageKeys: string[];
|
||||
removedWikiPageKeys: string[];
|
||||
pageKeysValidated: string[];
|
||||
};
|
||||
}> {
|
||||
const changedWikiPageKeys = this.uniqueWikiPageKeys(input.changedWikiPageKeys);
|
||||
const removedWikiPageKeys = this.removedWikiPageKeysFromActions(input.actions);
|
||||
const reasons: string[] = [];
|
||||
if (input.touchedSlSources.length > 0) {
|
||||
reasons.push('semantic_layer_changed');
|
||||
}
|
||||
if (removedWikiPageKeys.length > 0) {
|
||||
reasons.push('wiki_page_removed');
|
||||
}
|
||||
|
||||
let pageKeys = changedWikiPageKeys;
|
||||
if (reasons.length > 0) {
|
||||
pageKeys = this.uniqueWikiPageKeys([
|
||||
...changedWikiPageKeys,
|
||||
...(await input.wikiService.listPageKeys('GLOBAL', null)),
|
||||
]);
|
||||
}
|
||||
|
||||
return {
|
||||
pageKeys,
|
||||
trace: {
|
||||
global: reasons.length > 0,
|
||||
reasons,
|
||||
changedWikiPageKeys,
|
||||
removedWikiPageKeys,
|
||||
pageKeysValidated: pageKeys,
|
||||
},
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Use the expanded scope before final gates**
|
||||
|
||||
In `runInner()`, replace the current `finalChangedWikiPageKeys` and
|
||||
`finalTouchedSlSources` block with this code:
|
||||
|
||||
```ts
|
||||
const baseFinalChangedWikiPageKeys = this.uniqueWikiPageKeys([
|
||||
...(isolatedDiffEnabled ? projectionChangedWikiPageKeys : []),
|
||||
...workUnitOutcomes
|
||||
.flatMap((outcome) => outcome.patchTouchedPaths ?? [])
|
||||
.flatMap((path) => this.wikiPageKeysFromPaths([path])),
|
||||
...this.wikiPageKeysFromActions(reconcileActions),
|
||||
...postReconciliationPaths.flatMap((path) => this.wikiPageKeysFromPaths([path])),
|
||||
...wikiSlRefRepairResult.repairs.filter((repair) => repair.scope === 'GLOBAL').map((repair) => repair.pageKey),
|
||||
]);
|
||||
const finalTouchedSlSources = this.uniqueTouchedSlSources([
|
||||
...(isolatedDiffEnabled ? projectionTouchedSources : []),
|
||||
...workUnitOutcomes.flatMap((outcome) => outcome.touchedSlSources),
|
||||
...this.touchedSlSourcesFromActions(reconcileActions, job.connectionId),
|
||||
...this.touchedSlSourcesFromPaths(postReconciliationPaths),
|
||||
...(postProcessorOutcome?.touchedSources ?? []),
|
||||
]);
|
||||
const finalWikiGateScope = await this.wikiPageKeysForFinalGates({
|
||||
wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir),
|
||||
changedWikiPageKeys: baseFinalChangedWikiPageKeys,
|
||||
touchedSlSources: finalTouchedSlSources,
|
||||
actions: [...stageIndex.workUnits.flatMap((wu) => wu.actions), ...reconcileActions],
|
||||
});
|
||||
const finalChangedWikiPageKeys = finalWikiGateScope.pageKeys;
|
||||
```
|
||||
|
||||
This keeps the existing variable name used by `validateFinalIngestArtifacts()`,
|
||||
but the value now means "wiki page keys to validate in final gates."
|
||||
|
||||
- [ ] **Step 3: Add scope details to final-gate trace data**
|
||||
|
||||
In the `finalArtifactGateTraceData` object, add the
|
||||
`wikiReferenceGateScope` field:
|
||||
|
||||
```ts
|
||||
const finalArtifactGateTraceData = {
|
||||
changedWikiPageKeys: finalChangedWikiPageKeys,
|
||||
wikiReferenceGateScope: finalWikiGateScope.trace,
|
||||
touchedSlSources: finalTouchedSlSources,
|
||||
projectionTouchedPaths,
|
||||
workUnitPatchTouchedPaths: workUnitOutcomes.flatMap((outcome) => outcome.patchTouchedPaths ?? []),
|
||||
preReconciliationSha,
|
||||
postReconciliationSha,
|
||||
postReconciliationPaths,
|
||||
reconciliationActionCount: reconcileActions.length,
|
||||
wikiSlRefRepairCount: wikiSlRefRepairResult.repairs.length,
|
||||
};
|
||||
```
|
||||
|
||||
The failure report already stores `activeFailureDetails`, so this trace data
|
||||
also becomes persistent failed-report context when final gates fail.
|
||||
|
||||
- [ ] **Step 4: Run the focused regressions and verify they pass**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unchanged wiki body refs|unchanged inbound wiki refs"
|
||||
```
|
||||
|
||||
Expected: PASS. Both traces include `final_artifact_gates_failed`,
|
||||
`failure_report_created`, no `squash_finished`, and
|
||||
`wikiReferenceGateScope` with either `semantic_layer_changed` or
|
||||
`wiki_page_removed`.
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Verification and commit
|
||||
|
||||
**Files:**
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts`
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
|
||||
- [ ] **Step 1: Run the isolated-diff focused suite**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
|
||||
src/ingest/artifact-gates.test.ts \
|
||||
src/ingest/wiki-body-refs.test.ts \
|
||||
src/ingest/semantic-layer-target-policy.test.ts \
|
||||
src/ingest/isolated-diff/git-patch.test.ts \
|
||||
src/ingest/isolated-diff/patch-integrator.test.ts \
|
||||
src/ingest/isolated-diff/work-unit-executor.test.ts \
|
||||
src/core/git.service.patch.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 2: Type-check the context package**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Run dead-code analysis**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm run dead-code
|
||||
```
|
||||
|
||||
Expected: PASS, or only pre-existing findings unrelated to
|
||||
`packages/context/src/ingest/ingest-bundle.runner.ts` and
|
||||
`packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`.
|
||||
Investigate any new finding before committing.
|
||||
|
||||
- [ ] **Step 4: Verify trace acceptance criteria**
|
||||
|
||||
Open the traces produced by the two new failing-run tests and confirm these
|
||||
events and fields exist:
|
||||
|
||||
```text
|
||||
job-existing-body-stale:
|
||||
- final_artifact_gates_started
|
||||
- final_artifact_gates_failed
|
||||
- ingest_failed
|
||||
- failure_report_created
|
||||
- no squash_finished
|
||||
- wikiReferenceGateScope.global is true
|
||||
- wikiReferenceGateScope.reasons includes semantic_layer_changed
|
||||
- wikiReferenceGateScope.pageKeysValidated includes account-segments
|
||||
- error.message includes total_contract_arr_cents
|
||||
|
||||
job-existing-wiki-ref-stale:
|
||||
- final_artifact_gates_started
|
||||
- final_artifact_gates_failed
|
||||
- ingest_failed
|
||||
- failure_report_created
|
||||
- no squash_finished
|
||||
- wikiReferenceGateScope.global is true
|
||||
- wikiReferenceGateScope.reasons includes wiki_page_removed
|
||||
- wikiReferenceGateScope.removedWikiPageKeys includes source-page
|
||||
- error.message includes account-segments -> source-page
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Commit**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/ingest/ingest-bundle.runner.ts \
|
||||
packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts
|
||||
git commit -m "fix(ingest): gate global wiki references"
|
||||
```
|
||||
|
||||
Expected: one commit containing only the runner and isolated-diff runner test
|
||||
changes.
|
||||
|
||||
---
|
||||
|
||||
## Self-Review
|
||||
|
||||
Spec coverage:
|
||||
- Final global wiki body reference validation now covers unchanged wiki pages
|
||||
when a run changes semantic-layer sources.
|
||||
- Final global wiki page reference validation now covers unchanged inbound
|
||||
references when a run deletes wiki pages.
|
||||
- The plan keeps resolver behavior fail-fast and stops before squash.
|
||||
- Persistent trace and failed-report acceptance criteria are explicit and tied
|
||||
to the concrete failure modes.
|
||||
|
||||
Non-blocking gaps unchanged:
|
||||
- Broader connector rollout.
|
||||
- Isolated-diff default promotion.
|
||||
- Old shared-worktree path removal.
|
||||
- Interactive conflict resolution.
|
||||
- Semantic auto-merge.
|
||||
- Transitive semantic-layer dependency expansion.
|
||||
- Provenance-as-files.
|
||||
|
|
@ -0,0 +1,494 @@
|
|||
# Isolated Diff Ingestion V1 Provenance Gate Closure Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Ensure invalid provenance raw paths are rejected before isolated-diff
|
||||
ingestion squashes any integration worktree changes into the main project
|
||||
worktree.
|
||||
|
||||
**Architecture:** Keep provenance insertion after squash, but derive and
|
||||
validate the planned provenance rows immediately after final artifact gates and
|
||||
before the squash stage. This makes provenance validation part of the final
|
||||
pre-main safety boundary while preserving the existing report and database
|
||||
write shape.
|
||||
|
||||
**Tech Stack:** TypeScript ESM/NodeNext, Vitest, existing
|
||||
`IngestBundleRunner`, `validateProvenanceRawPaths`, ingest reports, and
|
||||
persistent ingest traces.
|
||||
|
||||
---
|
||||
|
||||
## Audit Summary
|
||||
|
||||
The implemented isolated-diff path now covers the core v1 safety surface:
|
||||
child worktrees, binary no-rename patches, `git apply --3way --index`, patch
|
||||
policy rejection, final wiki and semantic-layer gates after reconciliation and
|
||||
post-processing, failure reports, and persistent JSONL traces. The focused
|
||||
isolated-diff test suite passes:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/ingest-trace.test.ts \
|
||||
src/ingest/wiki-body-refs.test.ts \
|
||||
src/ingest/artifact-gates.test.ts \
|
||||
src/ingest/isolated-diff/git-patch.test.ts \
|
||||
src/ingest/isolated-diff/work-unit-executor.test.ts \
|
||||
src/ingest/isolated-diff/patch-integrator.test.ts \
|
||||
src/ingest/ingest-bundle.runner.isolated-diff.test.ts
|
||||
```
|
||||
|
||||
Current result: `7 passed`, `28 passed`.
|
||||
|
||||
One v1-blocking gap remains. `validateProvenanceRawPaths()` is called in
|
||||
`packages/context/src/ingest/ingest-bundle.runner.ts` after
|
||||
`squashMergeIntoMain()`. A work unit or reconciliation action can emit an
|
||||
otherwise valid wiki or semantic-layer artifact whose `rawPaths` contain a path
|
||||
outside the current raw snapshot and eviction set. Today the run fails during
|
||||
provenance recording, but only after the invalidly-attributed artifacts have
|
||||
already reached the main project worktree. That violates the spec requirement
|
||||
that final global gates run before any changes reach main.
|
||||
|
||||
Observability for the already-implemented phases is sufficient for postmortem
|
||||
reconstruction: traces include input snapshots, routing, child worktree
|
||||
creation and cleanup, patch collection and application, conflict
|
||||
classification, reconciliation, final gates, failure reports, and run outcome.
|
||||
This plan adds only the missing provenance validation failure trace because it
|
||||
corresponds to a concrete pre-main failure mode, not cosmetic trace expansion.
|
||||
|
||||
Non-blocking gaps that remain after this plan:
|
||||
|
||||
- Migrating Notion, LookML, Looker, dbt, MetricFlow, and historic-SQL direct
|
||||
durable writes to the isolated path.
|
||||
- Promoting isolated diffs as the default for all connectors.
|
||||
- Removing the old shared-worktree WorkUnit execution path.
|
||||
- Interactive, CLI, or agent-driven conflict resolution.
|
||||
- Auto-merging semantic conflicts that cannot be proven correct.
|
||||
- Transitive SQL-projection dependency expansion beyond direct declared joins.
|
||||
- Moving provenance rows to worktree files.
|
||||
- Adding failure reports for failures that happen before an ingest run row
|
||||
exists. The trace file is still written at the deterministic job path.
|
||||
|
||||
## File Structure
|
||||
|
||||
- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`.
|
||||
Add a regression proving invalid provenance raw paths fail before squash,
|
||||
leave main unchanged, skip SQLite provenance insertion, and emit a
|
||||
postmortem-grade trace event.
|
||||
- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`.
|
||||
Extract provenance row construction into private helpers, run provenance
|
||||
raw-path validation before squash, trace validation success and failure, and
|
||||
reuse the prevalidated rows for insertion and reports after squash.
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Add the pre-squash provenance regression
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing runner test**
|
||||
|
||||
Append this test inside the existing
|
||||
`describe('IngestBundleRunner isolated diff path', ...)` block in
|
||||
`packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`:
|
||||
|
||||
```ts
|
||||
it('rejects invalid provenance raw paths before squash reaches main', async () => {
|
||||
const runtime = await makeRealGitRuntime();
|
||||
try {
|
||||
const { deps, adapter } = makeDeps(runtime);
|
||||
adapter.chunk.mockResolvedValue({
|
||||
workUnits: [{ unitKey: 'card-valid-artifacts', rawFiles: ['cards/source.json'], peerFileIndex: [], dependencyPaths: [] }],
|
||||
});
|
||||
|
||||
let currentSession: any = null;
|
||||
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
|
||||
currentSession = toolSession;
|
||||
return { toRuntimeTools: vi.fn(() => ({})) };
|
||||
});
|
||||
deps.agentRunner.runLoop = vi.fn(async () => {
|
||||
const root = rootOfConfig(currentSession.configService, runtime.configDir);
|
||||
await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await mkdir(join(root, 'wiki/global'), { recursive: true });
|
||||
await writeFile(
|
||||
join(root, 'semantic-layer/warehouse/mart_account_segments.yaml'),
|
||||
'name: mart_account_segments\ngrain: [account_id]\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n',
|
||||
);
|
||||
await writeFile(
|
||||
join(root, 'wiki/global/account-segments.md'),
|
||||
'---\nsummary: Account segments\nusage_mode: auto\nsl_refs:\n - mart_account_segments\n---\n\nARR is `mart_account_segments.total_contract_arr`.\n',
|
||||
);
|
||||
addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'mart_account_segments');
|
||||
currentSession.actions.push({
|
||||
target: 'sl',
|
||||
type: 'created',
|
||||
key: 'mart_account_segments',
|
||||
detail: 'Valid source',
|
||||
targetConnectionId: 'warehouse',
|
||||
rawPaths: ['cards/source.json'],
|
||||
});
|
||||
currentSession.actions.push({
|
||||
target: 'wiki',
|
||||
type: 'created',
|
||||
key: 'account-segments',
|
||||
detail: 'Valid wiki with invalid provenance raw path',
|
||||
rawPaths: ['cards/missing.json'],
|
||||
});
|
||||
await currentSession.gitService.commitFiles(
|
||||
['semantic-layer/warehouse/mart_account_segments.yaml', 'wiki/global/account-segments.md'],
|
||||
'valid artifacts with invalid provenance',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
return { stopReason: 'natural' };
|
||||
}) as never;
|
||||
|
||||
const runner = new IngestBundleRunner(deps);
|
||||
await mockStageRawFiles(runner, runtime, [['cards/source.json', 'h1']]);
|
||||
const preRunHead = await runtime.git.revParseHead();
|
||||
|
||||
await expect(
|
||||
runner.run({
|
||||
jobId: 'job-invalid-provenance',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload' },
|
||||
}),
|
||||
).rejects.toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/);
|
||||
|
||||
expect(await runtime.git.revParseHead()).toBe(preRunHead);
|
||||
expect(deps.provenance.insertMany).not.toHaveBeenCalled();
|
||||
const trace = await readFile(join(runtime.configDir, '.ktx/ingest-traces/job-invalid-provenance/trace.jsonl'), 'utf-8');
|
||||
expect(trace).toContain('final_artifact_gates_finished');
|
||||
expect(trace).toContain('provenance_rows_validation_failed');
|
||||
expect(trace).toContain('cards/missing.json');
|
||||
expect(trace).toContain('ingest_failed');
|
||||
expect(trace).not.toContain('squash_finished');
|
||||
} finally {
|
||||
await rm(runtime.homeDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the failing regression**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "invalid provenance raw paths"
|
||||
```
|
||||
|
||||
Expected: FAIL because the current runner validates provenance after
|
||||
`squashMergeIntoMain()`, so `runtime.git.revParseHead()` changes and the trace
|
||||
does not contain `provenance_rows_validation_failed`.
|
||||
|
||||
### Task 2: Move provenance validation into the pre-squash gate boundary
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts`
|
||||
|
||||
- [ ] **Step 1: Import the provenance report and insert types**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.ts`, update the imports.
|
||||
|
||||
Replace this import block:
|
||||
|
||||
```ts
|
||||
import type {
|
||||
ContextEvidenceIndexSummary,
|
||||
IngestBundleRunnerDeps,
|
||||
IngestProvenanceRow,
|
||||
IngestRunsPort,
|
||||
IngestSessionWorktree,
|
||||
PageTriageRunResult,
|
||||
} from './ports.js';
|
||||
```
|
||||
|
||||
With:
|
||||
|
||||
```ts
|
||||
import type {
|
||||
ContextEvidenceIndexSummary,
|
||||
IngestBundleRunnerDeps,
|
||||
IngestProvenanceInsert,
|
||||
IngestProvenanceRow,
|
||||
IngestRunsPort,
|
||||
IngestSessionWorktree,
|
||||
PageTriageRunResult,
|
||||
} from './ports.js';
|
||||
```
|
||||
|
||||
Replace this import block:
|
||||
|
||||
```ts
|
||||
import {
|
||||
buildStageIndexFromReportBody,
|
||||
postProcessorSavedMemoryCounts,
|
||||
type IngestReportPostProcessorOutcome,
|
||||
type IngestReportSnapshot,
|
||||
} from './reports.js';
|
||||
```
|
||||
|
||||
With:
|
||||
|
||||
```ts
|
||||
import {
|
||||
buildStageIndexFromReportBody,
|
||||
postProcessorSavedMemoryCounts,
|
||||
type IngestReportPostProcessorOutcome,
|
||||
type IngestReportProvenanceDetail,
|
||||
type IngestReportSnapshot,
|
||||
} from './reports.js';
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Add provenance row helpers**
|
||||
|
||||
Add these private methods after `private errorMessage(error: unknown): string`
|
||||
in `packages/context/src/ingest/ingest-bundle.runner.ts`:
|
||||
|
||||
```ts
|
||||
private buildProvenanceRows(input: {
|
||||
job: IngestBundleJob;
|
||||
syncId: string;
|
||||
currentHashes: Map<string, string>;
|
||||
stageIndex: StageIndex;
|
||||
reconcileActions: MemoryAction[];
|
||||
eviction?: EvictionUnit;
|
||||
}): IngestProvenanceInsert[] {
|
||||
const provenanceRows: IngestProvenanceInsert[] = [];
|
||||
const actionToType = (action: MemoryAction): IngestProvenanceInsert['actionType'] => {
|
||||
if (action.target === 'wiki') {
|
||||
return 'wiki_written';
|
||||
}
|
||||
return action.type === 'created' ? 'source_created' : 'measure_added';
|
||||
};
|
||||
const producedPaths = new Set<string>();
|
||||
const pushActionProvenance = (rawPath: string, action: MemoryAction): void => {
|
||||
const hash = input.currentHashes.get(rawPath) ?? '';
|
||||
provenanceRows.push({
|
||||
connectionId: input.job.connectionId,
|
||||
sourceKey: input.job.sourceKey,
|
||||
syncId: input.syncId,
|
||||
rawPath,
|
||||
rawContentHash: hash,
|
||||
artifactKind: action.target,
|
||||
artifactKey: action.key,
|
||||
targetConnectionId: action.target === 'sl' ? actionTargetConnectionId(action, input.job.connectionId) : null,
|
||||
artifactContentHash: null,
|
||||
actionType: actionToType(action),
|
||||
});
|
||||
producedPaths.add(rawPath);
|
||||
};
|
||||
|
||||
for (const wu of input.stageIndex.workUnits) {
|
||||
for (const action of wu.actions) {
|
||||
for (const rawPath of rawPathsForAction(action, wu.rawFiles)) {
|
||||
pushActionProvenance(rawPath, action);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const action of input.reconcileActions) {
|
||||
for (const rawPath of action.rawPaths ?? []) {
|
||||
pushActionProvenance(rawPath, action);
|
||||
}
|
||||
}
|
||||
for (const resolution of input.stageIndex.artifactResolutions ?? []) {
|
||||
const hash = input.currentHashes.get(resolution.rawPath) ?? '';
|
||||
provenanceRows.push({
|
||||
connectionId: input.job.connectionId,
|
||||
sourceKey: input.job.sourceKey,
|
||||
syncId: input.syncId,
|
||||
rawPath: resolution.rawPath,
|
||||
rawContentHash: hash,
|
||||
artifactKind: resolution.artifactKind,
|
||||
artifactKey: resolution.artifactKey,
|
||||
targetConnectionId: null,
|
||||
artifactContentHash: null,
|
||||
actionType: resolution.actionType,
|
||||
});
|
||||
producedPaths.add(resolution.rawPath);
|
||||
}
|
||||
for (const [rawPath, hash] of input.currentHashes) {
|
||||
if (producedPaths.has(rawPath)) {
|
||||
continue;
|
||||
}
|
||||
provenanceRows.push({
|
||||
connectionId: input.job.connectionId,
|
||||
sourceKey: input.job.sourceKey,
|
||||
syncId: input.syncId,
|
||||
rawPath,
|
||||
rawContentHash: hash,
|
||||
artifactKind: null,
|
||||
artifactKey: null,
|
||||
targetConnectionId: null,
|
||||
artifactContentHash: null,
|
||||
actionType: 'skipped',
|
||||
});
|
||||
}
|
||||
|
||||
return provenanceRows;
|
||||
}
|
||||
|
||||
private toReportProvenanceRows(rows: IngestProvenanceInsert[]): IngestReportProvenanceDetail[] {
|
||||
return rows.map(({ rawPath, artifactKind, artifactKey, actionType, targetConnectionId }) => ({
|
||||
rawPath,
|
||||
artifactKind,
|
||||
artifactKey,
|
||||
targetConnectionId: targetConnectionId ?? null,
|
||||
actionType,
|
||||
}));
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Validate planned provenance rows before squash**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.ts`, find the code that
|
||||
sets `activePhase = 'final_gates';` and runs `traceTimed(...,
|
||||
'final_artifact_gates', ...)`. Immediately after that `await traceTimed(...)`
|
||||
block and before the `// Stage 6 — squash commit` comment, insert:
|
||||
|
||||
```ts
|
||||
activePhase = 'provenance_validation';
|
||||
const provenanceRows = this.buildProvenanceRows({
|
||||
job,
|
||||
syncId,
|
||||
currentHashes,
|
||||
stageIndex,
|
||||
reconcileActions,
|
||||
eviction,
|
||||
});
|
||||
await traceTimed(
|
||||
runTrace,
|
||||
'provenance',
|
||||
'provenance_rows_validation',
|
||||
{
|
||||
rowCount: provenanceRows.length,
|
||||
currentRawPathCount: currentHashes.size,
|
||||
deletedRawPathCount: eviction?.deletedRawPaths.length ?? 0,
|
||||
},
|
||||
async () => {
|
||||
validateProvenanceRawPaths({
|
||||
rows: provenanceRows,
|
||||
currentRawPaths: new Set(currentHashes.keys()),
|
||||
deletedRawPaths: new Set(eviction?.deletedRawPaths ?? []),
|
||||
});
|
||||
},
|
||||
);
|
||||
const reportProvenanceRows = this.toReportProvenanceRows(provenanceRows);
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Replace the post-squash provenance construction block**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.ts`, in the
|
||||
`activePhase = 'provenance';` section after squash, delete the current block
|
||||
that starts with:
|
||||
|
||||
```ts
|
||||
// Provenance rows: per-artifact when the WU emitted actions, plus a `skipped`
|
||||
// fallback for raw files that produced nothing so the next DiffSet still sees
|
||||
// them.
|
||||
const provenanceRows: Parameters<typeof this.deps.provenance.insertMany>[0] = [];
|
||||
```
|
||||
|
||||
And ends with:
|
||||
|
||||
```ts
|
||||
await runTrace.event('debug', 'provenance', 'provenance_rows_validated', {
|
||||
rowCount: provenanceRows.length,
|
||||
});
|
||||
```
|
||||
|
||||
Do not delete the existing call to `await this.deps.provenance.insertMany(provenanceRows);`.
|
||||
Immediately after that insertion call, add:
|
||||
|
||||
```ts
|
||||
await runTrace.event('debug', 'provenance', 'provenance_rows_inserted', {
|
||||
rowCount: provenanceRows.length,
|
||||
});
|
||||
```
|
||||
|
||||
Then delete the later `const reportProvenanceRows = provenanceRows.map(...)`
|
||||
block because `reportProvenanceRows` is now created before squash from the
|
||||
prevalidated rows.
|
||||
|
||||
- [ ] **Step 5: Run the provenance regression**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "invalid provenance raw paths"
|
||||
```
|
||||
|
||||
Expected: PASS. The trace contains `provenance_rows_validation_failed`, main
|
||||
HEAD remains unchanged, and `provenance.insertMany` is not called.
|
||||
|
||||
- [ ] **Step 6: Run the focused isolated-diff suite**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/ingest-trace.test.ts \
|
||||
src/ingest/wiki-body-refs.test.ts \
|
||||
src/ingest/artifact-gates.test.ts \
|
||||
src/ingest/isolated-diff/git-patch.test.ts \
|
||||
src/ingest/isolated-diff/work-unit-executor.test.ts \
|
||||
src/ingest/isolated-diff/patch-integrator.test.ts \
|
||||
src/ingest/ingest-bundle.runner.isolated-diff.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
### Task 3: Type-check, dead-code check, and commit
|
||||
|
||||
**Files:**
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts`
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
|
||||
- [ ] **Step 1: Run the context package type-check**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 2: Run the workspace dead-code check**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm run dead-code
|
||||
```
|
||||
|
||||
Expected: PASS, or only existing unrelated Knip/Biome findings. Investigate
|
||||
any new findings in the two modified files before continuing.
|
||||
|
||||
- [ ] **Step 3: Commit the provenance gate closure**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/ingest/ingest-bundle.runner.ts \
|
||||
packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts
|
||||
git commit -m "fix(ingest): gate provenance before isolated diff squash"
|
||||
```
|
||||
|
||||
Expected: one commit containing only the runner and isolated-diff runner test
|
||||
changes.
|
||||
|
||||
## Self-Review
|
||||
|
||||
Spec coverage: this plan closes the remaining violation of the design's final
|
||||
global gate invariant by proving invalid provenance raw paths fail before
|
||||
squash and by moving provenance validation into the pre-main gate boundary.
|
||||
|
||||
Placeholder scan: no placeholder steps remain. Every implementation step names
|
||||
the exact files, code, commands, and expected results.
|
||||
|
||||
Type consistency: the plan uses existing `IngestProvenanceInsert`,
|
||||
`IngestReportProvenanceDetail`, `MemoryAction`, `EvictionUnit`, `StageIndex`,
|
||||
`rawPathsForAction()`, and `validateProvenanceRawPaths()` names.
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,320 @@
|
|||
# Adapter-Owned Finalization V1 Closure Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Close the remaining adapter-owned finalization v1 verification gaps so the finalization contract is publicly typed and the historic-SQL local acceptance path passes through `SourceAdapter.finalize()`.
|
||||
|
||||
**Architecture:** The production runner already owns finalization execution, commits, target policy, final gates, reports, traces, and provenance. This plan keeps production behavior intact, exports the finalization adapter types through the ingest barrel, and updates the local historic-SQL acceptance fixture to model the real adapter-owned finalization path instead of the removed post-processor path.
|
||||
|
||||
**Tech Stack:** TypeScript ESM/NodeNext, Vitest, pnpm workspace commands, existing `SourceAdapter`, `projectHistoricSqlEvidence()`, and package export coverage.
|
||||
|
||||
---
|
||||
|
||||
## Audit summary
|
||||
|
||||
The audit compared
|
||||
`docs/superpowers/specs/2026-05-18-adapter-owned-ingest-finalization-design.md`
|
||||
against the implemented source, plan, and targeted tests.
|
||||
|
||||
Implemented v1 coverage:
|
||||
|
||||
- `SourceAdapter.finalize()` exists with typed context and result objects in
|
||||
`packages/context/src/ingest/types.ts`.
|
||||
- `IngestBundleRunnerDeps.postProcessors`, `IngestBundlePostProcessorPort`,
|
||||
`HistoricSqlProjectionPostProcessor`, `post_processor` trace phases, and
|
||||
`postProcessor` report fields are absent from production source.
|
||||
- The runner invokes finalization after reconciliation and before
|
||||
`wiki_sl_ref_repair`, target-policy checks, final artifact gates,
|
||||
provenance validation, and squash.
|
||||
- The runner derives finalization touched paths from the integration-worktree
|
||||
diff, resolves semantic-layer scope including `_schema/*.yaml`, cross-checks
|
||||
adapter declarations, commits finalization, records reports/traces, rejects
|
||||
path overlap, and partitions finalization actions for provenance exclusions.
|
||||
- Override replay passes explicit `overrideReplay` metadata, omits
|
||||
`parseArtifacts`, and leaves current-run `workUnitOutcomes` empty.
|
||||
- Historic SQL implements adapter-owned `finalize()` and uses
|
||||
`projectHistoricSqlEvidence()` for aggregate projection maintenance.
|
||||
|
||||
V1-blocking gaps:
|
||||
|
||||
- `packages/context/src/ingest/index.ts` exports `SourceAdapter` and projection
|
||||
types, but not `DeterministicFinalizationContext`,
|
||||
`FinalizationOverrideReplay`, or `FinalizationResult`. The adapter contract is
|
||||
less usable from the public ingest barrel than the spec requires.
|
||||
- The targeted verification command currently fails because
|
||||
`HistoricSqlEvidenceTestAdapter` in
|
||||
`packages/context/src/ingest/local-bundle-ingest.test.ts` lacks
|
||||
`finalize()`, so `result.report.body.finalization` is `undefined` in the
|
||||
local historic-SQL projection acceptance test.
|
||||
|
||||
Non-blocking gaps:
|
||||
|
||||
- Older historical plan documents still mention post-processors. They are
|
||||
archived implementation history and do not affect runtime behavior.
|
||||
- The runner has helper-level declaration mismatch coverage, but no dedicated
|
||||
local-bundle integration test for a finalization declaration mismatch. The
|
||||
implementation path exists; adding a higher-level regression test can be a
|
||||
later hardening pass.
|
||||
- Finalization wiki page deletion could use a future global wiki-reference gate
|
||||
regression. Historic-SQL v1 finalization updates or archives pages in place,
|
||||
so this is not required for the current v1 acceptance path.
|
||||
|
||||
## File structure
|
||||
|
||||
- Modify `packages/context/src/ingest/index.ts`.
|
||||
Re-export the typed finalization adapter contract next to the existing
|
||||
projection contract.
|
||||
- Modify `packages/context/src/package-exports.test.ts`.
|
||||
Add compile-time coverage proving finalization adapter types are exported
|
||||
from the ingest barrel.
|
||||
- Modify `packages/context/src/ingest/local-bundle-ingest.test.ts`.
|
||||
Make the historic-SQL local acceptance test adapter implement
|
||||
`finalize()` by delegating to `projectHistoricSqlEvidence()`, and rename the
|
||||
stale test label from post-processor to finalization.
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Export finalization adapter contract types
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/package-exports.test.ts`
|
||||
- Modify: `packages/context/src/ingest/index.ts`
|
||||
|
||||
- [ ] **Step 1: Write failing type export coverage**
|
||||
|
||||
In `packages/context/src/package-exports.test.ts`, add this import after the
|
||||
existing Vitest import:
|
||||
|
||||
```ts
|
||||
import type {
|
||||
DeterministicFinalizationContext,
|
||||
FinalizationOverrideReplay,
|
||||
FinalizationResult,
|
||||
} from './ingest/index.js';
|
||||
```
|
||||
|
||||
Then add this constant after `scanTypeExportCoverage`:
|
||||
|
||||
```ts
|
||||
const ingestFinalizationTypeExportCoverage: Partial<{
|
||||
context: DeterministicFinalizationContext;
|
||||
overrideReplay: FinalizationOverrideReplay;
|
||||
result: FinalizationResult;
|
||||
}> = {};
|
||||
```
|
||||
|
||||
Inside the existing package export test, place this assertion immediately after
|
||||
`expect(scanTypeExportCoverage).toEqual({});`:
|
||||
|
||||
```ts
|
||||
expect(ingestFinalizationTypeExportCoverage).toEqual({});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run type-check to verify the coverage fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: FAIL with TypeScript errors like:
|
||||
|
||||
```text
|
||||
Module '"./ingest/index.js"' has no exported member 'DeterministicFinalizationContext'.
|
||||
Module '"./ingest/index.js"' has no exported member 'FinalizationOverrideReplay'.
|
||||
Module '"./ingest/index.js"' has no exported member 'FinalizationResult'.
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Export the finalization types**
|
||||
|
||||
In `packages/context/src/ingest/index.ts`, update the existing export block
|
||||
from `./types.js` so the final lines read:
|
||||
|
||||
```ts
|
||||
WorkUnit,
|
||||
DeterministicProjectionContext,
|
||||
ProjectionResult,
|
||||
DeterministicFinalizationContext,
|
||||
FinalizationOverrideReplay,
|
||||
FinalizationResult,
|
||||
} from './types.js';
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run type-check and package export coverage**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
pnpm --filter @ktx/context exec vitest run src/package-exports.test.ts
|
||||
```
|
||||
|
||||
Expected: both commands PASS.
|
||||
|
||||
- [ ] **Step 5: Commit the type export closure**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/ingest/index.ts packages/context/src/package-exports.test.ts
|
||||
git commit -m "feat(ingest): export finalization adapter contract types"
|
||||
```
|
||||
|
||||
### Task 2: Repair the local historic-SQL finalization acceptance fixture
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/local-bundle-ingest.test.ts`
|
||||
|
||||
- [ ] **Step 1: Import the projection helper and finalization types**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-ingest.test.ts`, add this import
|
||||
after the fake adapter import:
|
||||
|
||||
```ts
|
||||
import { projectHistoricSqlEvidence } from './adapters/historic-sql/projection.js';
|
||||
```
|
||||
|
||||
Replace the existing type import from `./types.js` with:
|
||||
|
||||
```ts
|
||||
import type {
|
||||
ChunkResult,
|
||||
DeterministicFinalizationContext,
|
||||
DiffSet,
|
||||
FinalizationResult,
|
||||
SourceAdapter,
|
||||
} from './types.js';
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Add adapter-owned finalization to the test adapter**
|
||||
|
||||
In `HistoricSqlEvidenceTestAdapter`, add this method after `chunk()`:
|
||||
|
||||
```ts
|
||||
async finalize(ctx: DeterministicFinalizationContext): Promise<FinalizationResult> {
|
||||
const projection = await projectHistoricSqlEvidence({
|
||||
workdir: ctx.workdir,
|
||||
connectionId: ctx.connectionId,
|
||||
syncId: ctx.syncId,
|
||||
runId: ctx.runId,
|
||||
overrideReplay: ctx.overrideReplay,
|
||||
});
|
||||
|
||||
return {
|
||||
result: projection,
|
||||
warnings: projection.warnings,
|
||||
errors: [],
|
||||
touchedSources: projection.touchedSources,
|
||||
changedWikiPageKeys: projection.changedWikiPageKeys,
|
||||
actions: projection.actions,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Rename the stale test label**
|
||||
|
||||
Change the test name:
|
||||
|
||||
```ts
|
||||
it('runs historic-SQL evidence projection through the local bundle post-processor', async () => {
|
||||
```
|
||||
|
||||
to:
|
||||
|
||||
```ts
|
||||
it('runs historic-SQL evidence projection through local bundle finalization', async () => {
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run the focused failing test**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-ingest.test.ts -t "historic-SQL evidence projection"
|
||||
```
|
||||
|
||||
Expected: PASS, and the assertion at
|
||||
`packages/context/src/ingest/local-bundle-ingest.test.ts:551` receives a
|
||||
`result.report.body.finalization` object with `status: "success"`.
|
||||
|
||||
- [ ] **Step 5: Commit the local acceptance fixture**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/ingest/local-bundle-ingest.test.ts
|
||||
git commit -m "test(ingest): exercise historic sql finalization locally"
|
||||
```
|
||||
|
||||
### Task 3: Run final verification
|
||||
|
||||
**Files:**
|
||||
- Verify: `packages/context/src/ingest/finalization-scope.test.ts`
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.test.ts`
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
- Verify: `packages/context/src/ingest/adapters/historic-sql/projection.test.ts`
|
||||
- Verify: `packages/context/src/ingest/local-bundle-ingest.test.ts`
|
||||
- Verify: `packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts`
|
||||
- Verify: workspace TypeScript and dead-code checks
|
||||
|
||||
- [ ] **Step 1: Run the adapter-owned finalization targeted suite**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/finalization-scope.test.ts src/ingest/ingest-bundle.runner.test.ts src/ingest/ingest-bundle.runner.isolated-diff.test.ts src/ingest/adapters/historic-sql/projection.test.ts src/ingest/local-bundle-ingest.test.ts src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS with all six test files passing.
|
||||
|
||||
- [ ] **Step 2: Run TypeScript validation**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Run dead-code validation**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm run dead-code
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 4: Inspect final status**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git status --short
|
||||
```
|
||||
|
||||
Expected: only the intended committed changes are present, or the worktree is
|
||||
clean after the two commits.
|
||||
|
||||
## Docs impact
|
||||
|
||||
No `docs-site/content/docs/` update is required. The remaining v1 work is an
|
||||
adapter contract type export and test acceptance closure; it does not change
|
||||
CLI behavior, user configuration, setup flow, connector behavior, or public
|
||||
documentation examples.
|
||||
|
||||
## Self-review
|
||||
|
||||
- Spec coverage: The plan covers the remaining adapter API usability gap and
|
||||
the failing historic-SQL local finalization acceptance path. The main
|
||||
runner, reports, traces, provenance, override replay, and historic-SQL
|
||||
production finalization behavior already exist.
|
||||
- Placeholder scan: The plan contains no placeholder tasks or unspecified
|
||||
implementation steps.
|
||||
- Type consistency: `DeterministicFinalizationContext`,
|
||||
`FinalizationOverrideReplay`, and `FinalizationResult` match the existing
|
||||
names in `packages/context/src/ingest/types.ts`; the test adapter delegates
|
||||
to the existing `projectHistoricSqlEvidence()` result shape.
|
||||
1862
docs/superpowers/plans/2026-05-18-adapter-owned-finalization-v1.md
Normal file
1862
docs/superpowers/plans/2026-05-18-adapter-owned-finalization-v1.md
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,754 @@
|
|||
# Isolated Diff Ingestion V1 Default Promotion Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use
|
||||
> superpowers:subagent-driven-development (recommended) or
|
||||
> superpowers:executing-plans to implement this plan task-by-task. Steps use
|
||||
> checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Promote isolated-diff WorkUnit execution to the default ingest runner
|
||||
path while keeping the old shared-worktree branch reachable by an explicit
|
||||
private fallback setting for the final cleanup rollout.
|
||||
|
||||
**Architecture:** The runner stops asking whether a source is on an
|
||||
isolated-diff allowlist. Instead, non-override bundle ingests use isolated
|
||||
diffs unless the private settings object lists the source in
|
||||
`sharedWorktreeSourceKeys`. Local runtime defaults that fallback list to empty,
|
||||
and tests keep the old path covered with an explicit legacy source setting so
|
||||
rollout step 11 can delete it safely.
|
||||
|
||||
**Tech Stack:** TypeScript ESM/NodeNext, Vitest, pnpm workspace commands,
|
||||
existing `IngestBundleRunner`, `IngestSettingsPort`, local ingest runtime, and
|
||||
isolated-diff runner tests.
|
||||
|
||||
---
|
||||
|
||||
## Audit summary
|
||||
|
||||
This audit read the original spec at
|
||||
`docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md`, all
|
||||
plans matching
|
||||
`docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-*.md` and
|
||||
`docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-*.md`, and the
|
||||
current ingest runner code under `packages/context/src/ingest/`.
|
||||
|
||||
Implemented v1 rollout coverage:
|
||||
|
||||
- Rollout steps 1 and 2 are implemented by the core plan: child worktrees,
|
||||
binary no-rename patch proposals, and `git apply --3way --index`
|
||||
integration exist.
|
||||
- Rollout step 3 is implemented by the textual conflict resolver plan:
|
||||
`textual-conflict-resolver.ts` is wired through `patch-integrator.ts`.
|
||||
- Rollout steps 4, 5, and 6 are implemented by the gates, provenance,
|
||||
reference, global wiki, and gate-repair plans: final gates, persistent traces,
|
||||
failure reports, provenance validation, target policy, and repair counters
|
||||
exist.
|
||||
- Rollout step 7 is implemented by the core and follow-up plans: Metabase has
|
||||
isolated-diff stale-reference regression coverage.
|
||||
- Rollout step 8 is implemented by
|
||||
`2026-05-18-isolated-diff-ingestion-v1-connector-migration.md` and the
|
||||
follow-up commits: Notion, LookML, Looker, dbt, and MetricFlow route through
|
||||
isolated child worktrees, and MetricFlow projection runs before WorkUnits.
|
||||
|
||||
Current v1-blocking gaps:
|
||||
|
||||
- Rollout step 10 is not complete. `IngestBundleRunner.isIsolatedDiffEnabled()`
|
||||
still checks `settings.isolatedDiffSourceKeys`, and
|
||||
`local-bundle-runtime.ts` still installs the internal allowlist returned by
|
||||
`defaultIsolatedDiffSourceKeys()`.
|
||||
- Rollout step 11 remains blocked until step 10 lands. The old
|
||||
shared-worktree WorkUnit branch is still present and must stay reachable in
|
||||
this plan for final cleanup validation.
|
||||
|
||||
Non-blocking gaps:
|
||||
|
||||
- Rollout step 9 deterministic semantic merge helpers remain intentionally
|
||||
deferred until v1 resolver metrics show frequent mechanical repairs.
|
||||
- Transitive SQL-projection dependency expansion remains outside v1; current
|
||||
gates cover direct declared join neighbors.
|
||||
- Moving provenance into worktree files remains outside v1; the implemented
|
||||
source of truth is the ingest provenance store and report body.
|
||||
- Public connector knobs such as `executionMode`, `planningStrategy`, and
|
||||
`conflictPolicy` remain non-goals and must not be added.
|
||||
- Richer resolver context, such as full transcript excerpts for every
|
||||
overlapping patch, can be evaluated after the default path has production
|
||||
traces.
|
||||
|
||||
## File structure
|
||||
|
||||
- Modify `packages/context/src/ingest/isolated-diff/source-routing.ts`.
|
||||
Replace the isolated-diff direct-write allowlist with an empty default
|
||||
shared-worktree fallback list.
|
||||
- Modify `packages/context/src/ingest/isolated-diff/source-routing.test.ts`.
|
||||
Lock the fallback list semantics and remove direct-write allowlist
|
||||
assertions.
|
||||
- Modify `packages/context/src/ingest/ports.ts`.
|
||||
Replace `isolatedDiffSourceKeys?: string[]` with
|
||||
`sharedWorktreeSourceKeys?: string[]` on the private runner settings port.
|
||||
- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`.
|
||||
Make isolated diff the default for non-override runs and route to the old
|
||||
shared branch only when `sharedWorktreeSourceKeys` contains the source.
|
||||
- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`.
|
||||
Prove an unlisted source uses isolated diffs by default and prove an
|
||||
explicit fallback source can still reach the shared-worktree branch.
|
||||
- Modify `packages/context/src/ingest/local-bundle-runtime.ts`.
|
||||
Install the new empty fallback list instead of the old isolated-diff
|
||||
allowlist.
|
||||
- Modify `packages/context/src/ingest/local-bundle-runtime.test.ts`.
|
||||
Assert local runtime settings do not expose `isolatedDiffSourceKeys` and do
|
||||
default `sharedWorktreeSourceKeys` to `[]`.
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Replace source routing semantics
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/isolated-diff/source-routing.test.ts`
|
||||
- Modify: `packages/context/src/ingest/isolated-diff/source-routing.ts`
|
||||
- Modify: `packages/context/src/ingest/ports.ts`
|
||||
|
||||
- [ ] **Step 1: Write the failing source-routing tests**
|
||||
|
||||
Replace `packages/context/src/ingest/isolated-diff/source-routing.test.ts` with:
|
||||
|
||||
```ts
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { defaultSharedWorktreeSourceKeys, isSharedWorktreeFallbackSourceKey } from './source-routing.js';
|
||||
|
||||
describe('isolated-diff source routing', () => {
|
||||
it('defaults every non-override source to isolated diffs', () => {
|
||||
expect(defaultSharedWorktreeSourceKeys()).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns a mutable copy for runtime settings', () => {
|
||||
const keys = defaultSharedWorktreeSourceKeys();
|
||||
keys.push('legacy-source');
|
||||
|
||||
expect(defaultSharedWorktreeSourceKeys()).toEqual([]);
|
||||
});
|
||||
|
||||
it('recognizes only explicitly configured shared-worktree fallback sources', () => {
|
||||
expect(isSharedWorktreeFallbackSourceKey('notion', [])).toBe(false);
|
||||
expect(isSharedWorktreeFallbackSourceKey('metricflow', [])).toBe(false);
|
||||
expect(isSharedWorktreeFallbackSourceKey('legacy-source', ['legacy-source'])).toBe(true);
|
||||
expect(isSharedWorktreeFallbackSourceKey('other-source', ['legacy-source'])).toBe(false);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run the source-routing tests to verify they fail**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/source-routing.test.ts
|
||||
```
|
||||
|
||||
Expected: FAIL because `defaultSharedWorktreeSourceKeys()` and
|
||||
`isSharedWorktreeFallbackSourceKey()` are not exported yet.
|
||||
|
||||
- [ ] **Step 3: Rewrite the routing helper**
|
||||
|
||||
Replace `packages/context/src/ingest/isolated-diff/source-routing.ts` with:
|
||||
|
||||
```ts
|
||||
const DEFAULT_SHARED_WORKTREE_SOURCE_KEYS: readonly string[] = [];
|
||||
|
||||
export function defaultSharedWorktreeSourceKeys(): string[] {
|
||||
return [...DEFAULT_SHARED_WORKTREE_SOURCE_KEYS];
|
||||
}
|
||||
|
||||
export function isSharedWorktreeFallbackSourceKey(
|
||||
sourceKey: string,
|
||||
sharedWorktreeSourceKeys: readonly string[] = DEFAULT_SHARED_WORKTREE_SOURCE_KEYS,
|
||||
): boolean {
|
||||
return sharedWorktreeSourceKeys.includes(sourceKey);
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Rename the private settings field**
|
||||
|
||||
In `packages/context/src/ingest/ports.ts`, replace the
|
||||
`IngestSettingsPort` interface with:
|
||||
|
||||
```ts
|
||||
export interface IngestSettingsPort {
|
||||
memoryIngestionModel: string;
|
||||
probeRowCount: number;
|
||||
workUnitMaxConcurrency?: number;
|
||||
workUnitStepBudget?: number;
|
||||
workUnitFailureMode?: 'abort' | 'continue';
|
||||
sharedWorktreeSourceKeys?: string[];
|
||||
ingestTraceLevel?: IngestTraceLevel;
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Run the source-routing tests again**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/isolated-diff/source-routing.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 6: Commit routing semantics**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/ingest/isolated-diff/source-routing.ts \
|
||||
packages/context/src/ingest/isolated-diff/source-routing.test.ts \
|
||||
packages/context/src/ingest/ports.ts
|
||||
git commit -m "feat(ingest): make isolated diff routing the private default"
|
||||
```
|
||||
|
||||
### Task 2: Promote the runner default
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts`
|
||||
|
||||
- [ ] **Step 1: Update the isolated runner test imports and harness**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`,
|
||||
replace the source-routing import with:
|
||||
|
||||
```ts
|
||||
import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js';
|
||||
```
|
||||
|
||||
Then change the `makeDeps()` signature and `settings` block to:
|
||||
|
||||
```ts
|
||||
function makeDeps(
|
||||
runtime: Awaited<ReturnType<typeof makeRealGitRuntime>>,
|
||||
sourceKey = 'metabase',
|
||||
settings: Partial<IngestBundleRunnerDeps['settings']> = {},
|
||||
) {
|
||||
```
|
||||
|
||||
```ts
|
||||
settings: {
|
||||
memoryIngestionModel: 'test',
|
||||
probeRowCount: 1,
|
||||
sharedWorktreeSourceKeys: defaultSharedWorktreeSourceKeys(),
|
||||
ingestTraceLevel: 'trace',
|
||||
...settings,
|
||||
},
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Add the default-promotion regression tests**
|
||||
|
||||
Insert these tests inside
|
||||
`describe('IngestBundleRunner isolated diff path', ...)`, before the existing
|
||||
non-Metabase routing matrix:
|
||||
|
||||
```ts
|
||||
it('routes an unlisted direct-writing source through isolated diffs by default', async () => {
|
||||
const runtime = await makeRealGitRuntime();
|
||||
try {
|
||||
const sourceKey = 'custom-direct-source';
|
||||
const { deps, adapter } = makeDeps(runtime, sourceKey);
|
||||
adapter.chunk.mockResolvedValue({
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'custom-wiki',
|
||||
rawFiles: ['custom/page.json'],
|
||||
peerFileIndex: [],
|
||||
dependencyPaths: [],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
let currentSession: any = null;
|
||||
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
|
||||
currentSession = toolSession;
|
||||
return { toRuntimeTools: vi.fn(() => ({})) };
|
||||
});
|
||||
deps.agentRunner.runLoop = vi.fn(async (params: any) => {
|
||||
if (params.telemetryTags.operationName !== 'ingest-bundle-wu') {
|
||||
return { stopReason: 'natural' };
|
||||
}
|
||||
const root = rootOfConfig(currentSession.configService, runtime.configDir);
|
||||
await mkdir(join(root, 'wiki/global'), { recursive: true });
|
||||
await writeFile(
|
||||
join(root, 'wiki/global/custom-isolated.md'),
|
||||
'---\nsummary: Custom isolated write\nusage_mode: auto\n---\n\nCustom isolated write.\n',
|
||||
'utf-8',
|
||||
);
|
||||
currentSession.actions.push({
|
||||
target: 'wiki',
|
||||
type: 'created',
|
||||
key: 'custom-isolated',
|
||||
detail: 'Custom isolated write',
|
||||
rawPaths: ['custom/page.json'],
|
||||
});
|
||||
await currentSession.gitService.commitFiles(
|
||||
['wiki/global/custom-isolated.md'],
|
||||
'custom wiki',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
return { stopReason: 'natural' };
|
||||
}) as never;
|
||||
|
||||
const runner = new IngestBundleRunner(deps);
|
||||
await mockStageRawFiles(runner, runtime, [['custom/page.json', 'h1']], sourceKey);
|
||||
|
||||
await expect(
|
||||
runner.run({
|
||||
jobId: 'job-custom-default',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey,
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload' },
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
jobId: 'job-custom-default',
|
||||
failedWorkUnits: [],
|
||||
workUnitCount: 1,
|
||||
});
|
||||
|
||||
const trace = await readFile(
|
||||
join(runtime.configDir, '.ktx/ingest-traces/job-custom-default/trace.jsonl'),
|
||||
'utf-8',
|
||||
);
|
||||
expect(trace).toContain('isolated_diff_enabled');
|
||||
expect(trace).toContain('work_unit_child_created');
|
||||
expect(trace).not.toContain('shared_worktree_path_enabled');
|
||||
|
||||
const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0];
|
||||
const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined;
|
||||
expect(reportBody?.isolatedDiff).toMatchObject({
|
||||
enabled: true,
|
||||
acceptedPatches: 1,
|
||||
});
|
||||
} finally {
|
||||
await rm(runtime.homeDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('keeps the shared-worktree path reachable through explicit private fallback settings', async () => {
|
||||
const runtime = await makeRealGitRuntime();
|
||||
try {
|
||||
const sourceKey = 'legacy-source';
|
||||
const { deps, adapter } = makeDeps(runtime, sourceKey, {
|
||||
sharedWorktreeSourceKeys: ['legacy-source'],
|
||||
});
|
||||
adapter.chunk.mockResolvedValue({
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'legacy-wiki',
|
||||
rawFiles: ['legacy/page.json'],
|
||||
peerFileIndex: [],
|
||||
dependencyPaths: [],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
let currentSession: any = null;
|
||||
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
|
||||
currentSession = toolSession;
|
||||
return { toRuntimeTools: vi.fn(() => ({})) };
|
||||
});
|
||||
deps.agentRunner.runLoop = vi.fn(async (params: any) => {
|
||||
if (params.telemetryTags.operationName !== 'ingest-bundle-wu') {
|
||||
return { stopReason: 'natural' };
|
||||
}
|
||||
const root = rootOfConfig(currentSession.configService, runtime.configDir);
|
||||
await mkdir(join(root, 'wiki/global'), { recursive: true });
|
||||
await writeFile(
|
||||
join(root, 'wiki/global/legacy-shared.md'),
|
||||
'---\nsummary: Legacy shared write\nusage_mode: auto\n---\n\nLegacy shared write.\n',
|
||||
'utf-8',
|
||||
);
|
||||
currentSession.actions.push({
|
||||
target: 'wiki',
|
||||
type: 'created',
|
||||
key: 'legacy-shared',
|
||||
detail: 'Legacy shared write',
|
||||
rawPaths: ['legacy/page.json'],
|
||||
});
|
||||
await currentSession.gitService.commitFiles(
|
||||
['wiki/global/legacy-shared.md'],
|
||||
'legacy wiki',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
return { stopReason: 'natural' };
|
||||
}) as never;
|
||||
|
||||
const runner = new IngestBundleRunner(deps);
|
||||
await mockStageRawFiles(runner, runtime, [['legacy/page.json', 'h1']], sourceKey);
|
||||
|
||||
await expect(
|
||||
runner.run({
|
||||
jobId: 'job-legacy-shared',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey,
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload' },
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
jobId: 'job-legacy-shared',
|
||||
failedWorkUnits: [],
|
||||
workUnitCount: 1,
|
||||
});
|
||||
|
||||
const trace = await readFile(
|
||||
join(runtime.configDir, '.ktx/ingest-traces/job-legacy-shared/trace.jsonl'),
|
||||
'utf-8',
|
||||
);
|
||||
expect(trace).toContain('shared_worktree_path_enabled');
|
||||
expect(trace).not.toContain('work_unit_child_created');
|
||||
|
||||
const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0];
|
||||
const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined;
|
||||
expect(reportBody?.isolatedDiff).toMatchObject({
|
||||
enabled: false,
|
||||
});
|
||||
} finally {
|
||||
await rm(runtime.homeDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run the new runner tests to verify the default test fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unlisted direct-writing source|shared-worktree path reachable"
|
||||
```
|
||||
|
||||
Expected: FAIL. The unlisted source still enters the old shared-worktree path
|
||||
because the runner checks `isolatedDiffSourceKeys`.
|
||||
|
||||
- [ ] **Step 4: Change the runner routing decision**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.ts`, replace
|
||||
`isIsolatedDiffEnabled()` with:
|
||||
|
||||
```ts
|
||||
private isSharedWorktreeFallbackEnabled(sourceKey: string): boolean {
|
||||
return (this.deps.settings.sharedWorktreeSourceKeys ?? []).includes(sourceKey);
|
||||
}
|
||||
```
|
||||
|
||||
Then replace the isolated-diff routing line with:
|
||||
|
||||
```ts
|
||||
const isolatedDiffEnabled = !overrideReport && !this.isSharedWorktreeFallbackEnabled(job.sourceKey);
|
||||
```
|
||||
|
||||
Finally, replace the shared-path trace event with:
|
||||
|
||||
```ts
|
||||
await runTrace.event('info', 'routing', 'shared_worktree_path_enabled', {
|
||||
sourceKey: job.sourceKey,
|
||||
reason: 'explicit_private_fallback',
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Run the new runner tests again**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.isolated-diff.test.ts -t "unlisted direct-writing source|shared-worktree path reachable"
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 6: Commit runner default promotion**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/ingest/ingest-bundle.runner.ts \
|
||||
packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts
|
||||
git commit -m "feat(ingest): promote isolated diff to default runner path"
|
||||
```
|
||||
|
||||
### Task 3: Update local runtime defaults
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts`
|
||||
- Modify: `packages/context/src/ingest/local-bundle-runtime.ts`
|
||||
|
||||
- [ ] **Step 1: Update the local runtime settings test type**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace
|
||||
`RuntimeWithSettingsDeps` with:
|
||||
|
||||
```ts
|
||||
type RuntimeWithSettingsDeps = {
|
||||
deps: {
|
||||
settings: {
|
||||
sharedWorktreeSourceKeys?: string[];
|
||||
isolatedDiffSourceKeys?: string[];
|
||||
};
|
||||
};
|
||||
};
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Replace the local runtime settings assertion**
|
||||
|
||||
Replace the test named
|
||||
`enables isolated-diff routing for direct durable-write connectors` with:
|
||||
|
||||
```ts
|
||||
it('defaults local bundle ingest to isolated diffs without an allowlist', () => {
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
agentRunner: testAgentRunner(),
|
||||
});
|
||||
|
||||
const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings;
|
||||
|
||||
expect(settings.sharedWorktreeSourceKeys).toEqual([]);
|
||||
expect('isolatedDiffSourceKeys' in settings).toBe(false);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run the local runtime settings test to verify it fails**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "defaults local bundle ingest"
|
||||
```
|
||||
|
||||
Expected: FAIL because `local-bundle-runtime.ts` still sets
|
||||
`isolatedDiffSourceKeys`.
|
||||
|
||||
- [ ] **Step 4: Update local runtime imports and settings**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-runtime.ts`, replace the
|
||||
source-routing import with:
|
||||
|
||||
```ts
|
||||
import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js';
|
||||
```
|
||||
|
||||
Then replace the settings field:
|
||||
|
||||
```ts
|
||||
isolatedDiffSourceKeys: defaultIsolatedDiffSourceKeys(),
|
||||
```
|
||||
|
||||
with:
|
||||
|
||||
```ts
|
||||
sharedWorktreeSourceKeys: defaultSharedWorktreeSourceKeys(),
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Run the local runtime settings test again**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "defaults local bundle ingest"
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 6: Commit local runtime defaults**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/ingest/local-bundle-runtime.ts \
|
||||
packages/context/src/ingest/local-bundle-runtime.test.ts
|
||||
git commit -m "feat(ingest): default local ingest to isolated diffs"
|
||||
```
|
||||
|
||||
### Task 4: Remove stale allowlist references
|
||||
|
||||
**Files:**
|
||||
- Verify: `packages/context/src/ingest/isolated-diff/source-routing.ts`
|
||||
- Verify: `packages/context/src/ingest/local-bundle-runtime.ts`
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts`
|
||||
- Verify: `packages/context/src/ingest/ports.ts`
|
||||
- Verify: `packages/context/src/ingest/**/*.test.ts`
|
||||
|
||||
- [ ] **Step 1: Search for old allowlist names**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
rg -n "isolatedDiffSourceKeys|defaultIsolatedDiffSourceKeys|ISOLATED_DIFF_DIRECT_WRITE_SOURCE_KEYS|isIsolatedDiffDirectWriteSourceKey" packages/context/src
|
||||
```
|
||||
|
||||
Expected: no matches.
|
||||
|
||||
- [ ] **Step 2: Search for the new fallback setting**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
rg -n "sharedWorktreeSourceKeys|defaultSharedWorktreeSourceKeys|isSharedWorktreeFallbackSourceKey" packages/context/src
|
||||
```
|
||||
|
||||
Expected: matches only in these files:
|
||||
|
||||
```text
|
||||
packages/context/src/ingest/ports.ts
|
||||
packages/context/src/ingest/ingest-bundle.runner.ts
|
||||
packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts
|
||||
packages/context/src/ingest/isolated-diff/source-routing.ts
|
||||
packages/context/src/ingest/isolated-diff/source-routing.test.ts
|
||||
packages/context/src/ingest/local-bundle-runtime.ts
|
||||
packages/context/src/ingest/local-bundle-runtime.test.ts
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run a focused no-allowlist regression suite**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/isolated-diff/source-routing.test.ts \
|
||||
src/ingest/local-bundle-runtime.test.ts \
|
||||
src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
|
||||
-t "source routing|defaults local bundle ingest|unlisted direct-writing source|shared-worktree path reachable|routes notion|routes lookml|routes looker|routes dbt|routes metricflow"
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 4: Commit stale-reference cleanup if needed**
|
||||
|
||||
If Step 1 or Step 2 required any edits, run:
|
||||
|
||||
```bash
|
||||
git add packages/context/src/ingest
|
||||
git commit -m "chore(ingest): remove isolated diff allowlist references"
|
||||
```
|
||||
|
||||
If no files changed, record that no cleanup commit was needed in the execution
|
||||
notes for this task.
|
||||
|
||||
### Task 5: Final verification
|
||||
|
||||
**Files:**
|
||||
- Verify: `packages/context/src/ingest/isolated-diff/source-routing.ts`
|
||||
- Verify: `packages/context/src/ingest/isolated-diff/source-routing.test.ts`
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts`
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
- Verify: `packages/context/src/ingest/local-bundle-runtime.ts`
|
||||
- Verify: `packages/context/src/ingest/local-bundle-runtime.test.ts`
|
||||
- Verify: `packages/context/src/ingest/ports.ts`
|
||||
- Verify: `docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-default-promotion.md`
|
||||
|
||||
- [ ] **Step 1: Run the full isolated-diff focused suite**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/ingest-trace.test.ts \
|
||||
src/ingest/wiki-body-refs.test.ts \
|
||||
src/ingest/artifact-gates.test.ts \
|
||||
src/ingest/semantic-layer-target-policy.test.ts \
|
||||
src/ingest/isolated-diff/source-routing.test.ts \
|
||||
src/ingest/isolated-diff/git-patch.test.ts \
|
||||
src/ingest/isolated-diff/work-unit-executor.test.ts \
|
||||
src/ingest/isolated-diff/patch-integrator.test.ts \
|
||||
src/ingest/isolated-diff/textual-conflict-resolver.test.ts \
|
||||
src/ingest/final-gate-repair.test.ts \
|
||||
src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
|
||||
src/ingest/report-snapshot.test.ts \
|
||||
src/ingest/local-bundle-runtime.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 2: Run the MetricFlow local ingest regression**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-ingest.test.ts -t "runs full MetricFlow local ingest"
|
||||
```
|
||||
|
||||
Expected: PASS. The report body includes `isolatedDiff.enabled: true`,
|
||||
`acceptedPatches: 0`, and a string `projectionSha`.
|
||||
|
||||
- [ ] **Step 3: Run package type-check**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 4: Run package tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run test
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 5: Run TypeScript dead-code checks**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm run dead-code
|
||||
```
|
||||
|
||||
Expected: PASS, or only pre-existing findings unrelated to the files changed
|
||||
by this plan. Investigate any finding that names `source-routing.ts`,
|
||||
`ports.ts`, `local-bundle-runtime.ts`, or `ingest-bundle.runner.ts`.
|
||||
|
||||
- [ ] **Step 6: Decide whether docs-site needs an update**
|
||||
|
||||
No `docs-site/content/docs/` change is expected for this plan because the
|
||||
change is an internal runner rollout switch and does not add or remove public
|
||||
CLI commands, flags, config fields, connector setup steps, or user-facing
|
||||
documentation concepts.
|
||||
|
||||
- [ ] **Step 7: Commit final verification notes**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git status --short
|
||||
git add docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-default-promotion.md
|
||||
git commit -m "docs: add isolated diff default promotion plan"
|
||||
```
|
||||
|
||||
Only include the plan file in this commit if all implementation commits have
|
||||
already captured their code changes.
|
||||
|
||||
## Completion criteria
|
||||
|
||||
This plan is complete when:
|
||||
|
||||
- `packages/context/src/ingest/ports.ts` has
|
||||
`sharedWorktreeSourceKeys?: string[]` and no `isolatedDiffSourceKeys` field.
|
||||
- `IngestBundleRunner` uses isolated diffs for every non-override source unless
|
||||
`sharedWorktreeSourceKeys` explicitly contains that source.
|
||||
- The trace for a default-routed source contains `isolated_diff_enabled` and
|
||||
not `shared_worktree_path_enabled`.
|
||||
- The trace for an explicitly fallback-routed source contains
|
||||
`shared_worktree_path_enabled` and not `work_unit_child_created`.
|
||||
- Local runtime settings default `sharedWorktreeSourceKeys` to `[]`.
|
||||
- No production or test code under `packages/context/src` references the old
|
||||
isolated-diff allowlist names.
|
||||
- The focused isolated-diff suite, MetricFlow local ingest regression,
|
||||
`@ktx/context` type-check, `@ktx/context` tests, and dead-code checks pass.
|
||||
|
||||
## Next rollout step
|
||||
|
||||
After this plan is implemented and verified, the only remaining v1-blocking
|
||||
rollout item from the spec is step 11: remove the old shared-worktree WorkUnit
|
||||
execution path and delete the private `sharedWorktreeSourceKeys` fallback
|
||||
setting.
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,980 @@
|
|||
# Isolated Diff Ingestion V1 Shared Worktree Removal Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use
|
||||
> superpowers:subagent-driven-development (recommended) or
|
||||
> superpowers:executing-plans to implement this plan task-by-task. Steps use
|
||||
> checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Remove the old shared-worktree WorkUnit execution path so every
|
||||
non-override bundle ingest uses isolated WorkUnit diffs.
|
||||
|
||||
**Architecture:** Keep `IngestBundleRunner` with one non-override execution
|
||||
path: raw snapshot, optional deterministic projection, child WorkUnit
|
||||
worktrees, patch integration, reconciliation, final gates, provenance
|
||||
validation, and squash. Delete the private fallback routing setting and all
|
||||
legacy tests, traces, and agent instructions that existed only for shared
|
||||
WorkUnit state.
|
||||
|
||||
**Tech Stack:** TypeScript, Vitest, pnpm, KTX ingest runner, Git worktrees.
|
||||
|
||||
---
|
||||
|
||||
## Audit summary
|
||||
|
||||
This audit read the original design in
|
||||
`docs/superpowers/specs/2026-05-17-isolated-diff-ingestion-design.md`, every
|
||||
implemented plan matching
|
||||
`docs/superpowers/plans/2026-05-17-isolated-diff-ingestion-*.md` and
|
||||
`docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-*.md`, and the
|
||||
current implementation under `packages/context/src/ingest/`,
|
||||
`packages/context/prompts/`, and `packages/context/skills/`.
|
||||
|
||||
Implemented v1 rollout coverage:
|
||||
|
||||
- Rollout steps 1 and 2 exist in code: isolated child worktrees, binary
|
||||
no-rename patch collection, and `git apply --3way --index` patch integration.
|
||||
- Rollout step 3 exists in code:
|
||||
`packages/context/src/ingest/isolated-diff/textual-conflict-resolver.ts` is
|
||||
wired through the patch integrator and runner.
|
||||
- Rollout steps 4, 5, and 6 exist in code: final wiki and semantic-layer gates,
|
||||
provenance validation before squash, target policy checks, bounded gate
|
||||
repair, failed reports, and trace counters.
|
||||
- Rollout step 7 exists in code: the Metabase stale body-reference regression
|
||||
is covered in `ingest-bundle.runner.isolated-diff.test.ts`.
|
||||
- Rollout step 8 is committed: Notion, LookML, Looker, dbt, and MetricFlow
|
||||
route through isolated child worktrees, and MetricFlow projection runs before
|
||||
WorkUnits.
|
||||
- Rollout step 10 is committed: non-override ingests default to isolated diffs,
|
||||
and the old branch is reachable only through the private
|
||||
`sharedWorktreeSourceKeys` fallback setting.
|
||||
|
||||
## Remaining gaps
|
||||
|
||||
The remaining v1-blocking gaps are all part of rollout step 11:
|
||||
|
||||
- `packages/context/src/ingest/ports.ts` still exposes the private
|
||||
`sharedWorktreeSourceKeys?: string[]` setting.
|
||||
- `packages/context/src/ingest/isolated-diff/source-routing.ts` and its test
|
||||
exist only to support the fallback setting.
|
||||
- `packages/context/src/ingest/local-bundle-runtime.ts` still installs
|
||||
`sharedWorktreeSourceKeys: []`.
|
||||
- `packages/context/src/ingest/ingest-bundle.runner.ts` still checks
|
||||
`isSharedWorktreeFallbackEnabled()` and contains the
|
||||
`shared_worktree_path_enabled` branch that runs WorkUnits against the mutable
|
||||
integration worktree.
|
||||
- `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
still has a regression proving the shared-worktree fallback is reachable.
|
||||
- `packages/context/src/ingest/ingest-bundle.runner.test.ts` keeps broad runner
|
||||
tests on the legacy path through `sharedWorktreeSourceKeys`; those tests must
|
||||
either use the isolated mock harness or move coverage into the real-git
|
||||
isolated suite.
|
||||
- `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md` and
|
||||
`packages/context/skills/ingest_triage/SKILL.md` still tell WorkUnit agents
|
||||
that prior WorkUnit writes in the same job are visible in the current working
|
||||
branch. That instruction is false after isolated diffs and must be removed
|
||||
with the shared path.
|
||||
|
||||
Non-blocking gaps after this plan:
|
||||
|
||||
- Rollout step 9 deterministic semantic merge helpers remain intentionally
|
||||
deferred until resolver metrics show frequent mechanical repairs.
|
||||
- Semantic-layer dependency expansion remains direct declared joins only; the
|
||||
spec explicitly defers transitive SQL-projection closure.
|
||||
- Provenance remains in the ingest provenance store and report body; moving it
|
||||
to worktree files is a separate schema migration.
|
||||
- Resolver context can later include richer transcript excerpts and explicit
|
||||
overlap summaries for every previously applied patch.
|
||||
- Failures before an ingest run row exists still have deterministic trace files
|
||||
but no stored ingest report.
|
||||
|
||||
## File structure
|
||||
|
||||
- Modify `packages/context/src/ingest/ports.ts`. Remove the private fallback
|
||||
setting from `IngestSettingsPort`.
|
||||
- Modify `packages/context/src/ingest/local-bundle-runtime.ts`. Stop importing
|
||||
and installing default shared-worktree fallback settings.
|
||||
- Delete `packages/context/src/ingest/isolated-diff/source-routing.ts`. This
|
||||
helper has no responsibility once fallback routing is removed.
|
||||
- Delete `packages/context/src/ingest/isolated-diff/source-routing.test.ts`.
|
||||
Its assertions exist only for the fallback helper.
|
||||
- Modify `packages/context/src/ingest/ingest-bundle.runner.ts`. Delete
|
||||
`isSharedWorktreeFallbackEnabled()`, the old shared-worktree WorkUnit branch,
|
||||
and helper methods that only served that branch.
|
||||
- Modify `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`.
|
||||
Remove fallback reachability coverage and add a stale-setting regression that
|
||||
proves a runtime object cannot opt out of isolated diffs.
|
||||
- Modify `packages/context/src/ingest/ingest-bundle.runner.test.ts`. Remove
|
||||
the fallback setting from the broad test harness and make its mocked Git
|
||||
session support no-op isolated patch collection.
|
||||
- Modify `packages/context/src/ingest/local-bundle-runtime.test.ts`. Assert
|
||||
local runtime settings do not contain the fallback key.
|
||||
- Modify `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`.
|
||||
Replace shared-branch WorkUnit visibility instructions with isolated-diff
|
||||
instructions.
|
||||
- Modify `packages/context/skills/ingest_triage/SKILL.md`. Remove Stage 3
|
||||
prior-WorkUnit visibility language and keep cross-WorkUnit sweep guidance in
|
||||
Stage 4 reconciliation.
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Add removal-contract regressions
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts`
|
||||
- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
|
||||
- [ ] **Step 1: Update the local runtime settings type**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace
|
||||
`RuntimeWithSettingsDeps` with:
|
||||
|
||||
```ts
|
||||
type RuntimeWithSettingsDeps = {
|
||||
deps: {
|
||||
settings: Record<string, unknown>;
|
||||
};
|
||||
};
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Replace the local runtime fallback-setting assertion**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-runtime.test.ts`, replace the test
|
||||
named `defaults local bundle ingest to isolated diffs without an allowlist` with:
|
||||
|
||||
```ts
|
||||
it('defaults local bundle ingest to isolated diffs without a shared-worktree fallback setting', () => {
|
||||
const runtime = createLocalBundleIngestRuntime({
|
||||
project,
|
||||
adapters: [new FakeSourceAdapter()],
|
||||
agentRunner: testAgentRunner(),
|
||||
});
|
||||
|
||||
const settings = (runtime.runner as unknown as RuntimeWithSettingsDeps).deps.settings;
|
||||
|
||||
expect(settings).not.toHaveProperty('sharedWorktreeSourceKeys');
|
||||
expect(Object.keys(settings).sort()).toEqual([
|
||||
'ingestTraceLevel',
|
||||
'memoryIngestionModel',
|
||||
'probeRowCount',
|
||||
'workUnitFailureMode',
|
||||
'workUnitMaxConcurrency',
|
||||
'workUnitStepBudget',
|
||||
]);
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Remove the source-routing import from the isolated runner test**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`,
|
||||
delete this import:
|
||||
|
||||
```ts
|
||||
import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js';
|
||||
```
|
||||
|
||||
Then remove the `sharedWorktreeSourceKeys` line from the `settings` object in
|
||||
`makeDeps()`:
|
||||
|
||||
```ts
|
||||
settings: {
|
||||
memoryIngestionModel: 'test',
|
||||
probeRowCount: 1,
|
||||
ingestTraceLevel: 'trace',
|
||||
...settings,
|
||||
},
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Replace the shared fallback reachability test**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`,
|
||||
replace the test named
|
||||
`keeps the shared-worktree path reachable through explicit private fallback settings`
|
||||
with this stale-setting regression:
|
||||
|
||||
```ts
|
||||
it('does not support shared-worktree fallback settings', async () => {
|
||||
const runtime = await makeRealGitRuntime();
|
||||
try {
|
||||
const sourceKey = 'legacy-source';
|
||||
const staleSettings = {
|
||||
sharedWorktreeSourceKeys: ['legacy-source'],
|
||||
} as Partial<IngestBundleRunnerDeps['settings']> & Record<string, unknown>;
|
||||
const { deps, adapter } = makeDeps(runtime, sourceKey, staleSettings);
|
||||
adapter.chunk.mockResolvedValue({
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'legacy-wiki',
|
||||
rawFiles: ['legacy/page.json'],
|
||||
peerFileIndex: [],
|
||||
dependencyPaths: [],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
let currentSession: any = null;
|
||||
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
|
||||
currentSession = toolSession;
|
||||
return { toRuntimeTools: vi.fn(() => ({})) };
|
||||
});
|
||||
deps.agentRunner.runLoop = vi.fn(async (params: any) => {
|
||||
if (params.telemetryTags.operationName !== 'ingest-bundle-wu') {
|
||||
return { stopReason: 'natural' };
|
||||
}
|
||||
const root = rootOfConfig(currentSession.configService, runtime.configDir);
|
||||
await mkdir(join(root, 'wiki/global'), { recursive: true });
|
||||
await writeFile(
|
||||
join(root, 'wiki/global/legacy-isolated.md'),
|
||||
'---\nsummary: Legacy isolated write\nusage_mode: auto\n---\n\nLegacy isolated write.\n',
|
||||
'utf-8',
|
||||
);
|
||||
currentSession.actions.push({
|
||||
target: 'wiki',
|
||||
type: 'created',
|
||||
key: 'legacy-isolated',
|
||||
detail: 'Legacy isolated write',
|
||||
rawPaths: ['legacy/page.json'],
|
||||
});
|
||||
await currentSession.gitService.commitFiles(
|
||||
['wiki/global/legacy-isolated.md'],
|
||||
'legacy isolated wiki',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
return { stopReason: 'natural' };
|
||||
}) as never;
|
||||
|
||||
const runner = new IngestBundleRunner(deps);
|
||||
await mockStageRawFiles(runner, runtime, [['legacy/page.json', 'h1']], sourceKey);
|
||||
|
||||
await expect(
|
||||
runner.run({
|
||||
jobId: 'job-legacy-isolated',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey,
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload' },
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
jobId: 'job-legacy-isolated',
|
||||
failedWorkUnits: [],
|
||||
workUnitCount: 1,
|
||||
});
|
||||
|
||||
const trace = await readFile(
|
||||
join(runtime.configDir, '.ktx/ingest-traces/job-legacy-isolated/trace.jsonl'),
|
||||
'utf-8',
|
||||
);
|
||||
expect(trace).toContain('isolated_diff_enabled');
|
||||
expect(trace).toContain('work_unit_child_created');
|
||||
expect(trace).not.toContain('shared_worktree_path_enabled');
|
||||
|
||||
const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0];
|
||||
const reportBody = reportCreate?.body as { isolatedDiff?: unknown } | undefined;
|
||||
expect(reportBody?.isolatedDiff).toMatchObject({
|
||||
enabled: true,
|
||||
acceptedPatches: 1,
|
||||
});
|
||||
} finally {
|
||||
await rm(runtime.homeDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Run the removal regressions and confirm they fail**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/local-bundle-runtime.test.ts \
|
||||
src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
|
||||
-t "shared-worktree fallback|stale|defaults local bundle ingest|unlisted direct-writing source"
|
||||
```
|
||||
|
||||
Expected: FAIL. The local runtime still exposes `sharedWorktreeSourceKeys`, and
|
||||
the stale-setting runner test still reaches `shared_worktree_path_enabled`.
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Remove the fallback setting and routing module
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/ports.ts`
|
||||
- Modify: `packages/context/src/ingest/local-bundle-runtime.ts`
|
||||
- Delete: `packages/context/src/ingest/isolated-diff/source-routing.ts`
|
||||
- Delete: `packages/context/src/ingest/isolated-diff/source-routing.test.ts`
|
||||
|
||||
- [ ] **Step 1: Remove the fallback setting from the runner settings port**
|
||||
|
||||
In `packages/context/src/ingest/ports.ts`, replace `IngestSettingsPort` with:
|
||||
|
||||
```ts
|
||||
export interface IngestSettingsPort {
|
||||
memoryIngestionModel: string;
|
||||
probeRowCount: number;
|
||||
workUnitMaxConcurrency?: number;
|
||||
workUnitStepBudget?: number;
|
||||
workUnitFailureMode?: 'abort' | 'continue';
|
||||
ingestTraceLevel?: IngestTraceLevel;
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Remove the local runtime source-routing import**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-runtime.ts`, delete this import:
|
||||
|
||||
```ts
|
||||
import { defaultSharedWorktreeSourceKeys } from './isolated-diff/source-routing.js';
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Remove the local runtime fallback setting**
|
||||
|
||||
In `packages/context/src/ingest/local-bundle-runtime.ts`, replace the settings
|
||||
object with:
|
||||
|
||||
```ts
|
||||
settings: {
|
||||
memoryIngestionModel: options.project.config.llm.models.default ?? 'local-ingest-model',
|
||||
probeRowCount: 0,
|
||||
workUnitMaxConcurrency: options.project.config.ingest.workUnits.maxConcurrency,
|
||||
workUnitStepBudget: options.project.config.ingest.workUnits.stepBudget,
|
||||
workUnitFailureMode: options.project.config.ingest.workUnits.failureMode,
|
||||
ingestTraceLevel: ingestTraceLevelFromEnv(),
|
||||
},
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Delete the fallback routing helper files**
|
||||
|
||||
Delete:
|
||||
|
||||
```bash
|
||||
git rm packages/context/src/ingest/isolated-diff/source-routing.ts
|
||||
git rm packages/context/src/ingest/isolated-diff/source-routing.test.ts
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Confirm no fallback helper imports remain**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
rg -n "defaultSharedWorktreeSourceKeys|isSharedWorktreeFallbackSourceKey|source-routing" packages/context/src
|
||||
```
|
||||
|
||||
Expected: FAIL with no matches. `rg` exits with status 1 when the cleanup is
|
||||
complete.
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Delete the shared-worktree runner branch
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts`
|
||||
|
||||
- [ ] **Step 1: Remove helper methods used only by the shared branch**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.ts`, delete these private
|
||||
methods:
|
||||
|
||||
```ts
|
||||
private buildFailedWorkUnitOutcome(wu: WorkUnit, error: unknown): WorkUnitOutcome {
|
||||
return {
|
||||
unitKey: wu.unitKey,
|
||||
status: 'failed',
|
||||
reason: error instanceof Error ? error.message : String(error),
|
||||
preSha: '',
|
||||
postSha: '',
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
slDisallowed: wu.slDisallowed,
|
||||
slDisallowedReason: wu.slDisallowedReason,
|
||||
};
|
||||
}
|
||||
|
||||
private formatWorkUnitFailure(outcome: WorkUnitOutcome): string {
|
||||
return `WorkUnit ${outcome.unitKey} failed: ${outcome.reason ?? 'unknown failure'}`;
|
||||
}
|
||||
|
||||
private isSharedWorktreeFallbackEnabled(sourceKey: string): boolean {
|
||||
return (this.deps.settings.sharedWorktreeSourceKeys ?? []).includes(sourceKey);
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Make non-override isolated routing unconditional**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.ts`, replace:
|
||||
|
||||
```ts
|
||||
const isolatedDiffEnabled = !overrideReport && !this.isSharedWorktreeFallbackEnabled(job.sourceKey);
|
||||
```
|
||||
|
||||
with:
|
||||
|
||||
```ts
|
||||
const isolatedDiffEnabled = !overrideReport;
|
||||
```
|
||||
|
||||
Then replace:
|
||||
|
||||
```ts
|
||||
if (!overrideReport && isolatedDiffEnabled) {
|
||||
```
|
||||
|
||||
with:
|
||||
|
||||
```ts
|
||||
if (!overrideReport) {
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Delete the old shared-worktree branch**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.ts`, delete the whole
|
||||
branch that starts with:
|
||||
|
||||
```ts
|
||||
} else if (!overrideReport) {
|
||||
await runTrace.event('info', 'routing', 'shared_worktree_path_enabled', {
|
||||
sourceKey: job.sourceKey,
|
||||
reason: 'explicit_private_fallback',
|
||||
});
|
||||
```
|
||||
|
||||
and ends with:
|
||||
|
||||
```ts
|
||||
latestReportWorkUnits = this.toReportWorkUnits(stageIndex);
|
||||
}
|
||||
```
|
||||
|
||||
After the deletion, the surrounding code must read:
|
||||
|
||||
```ts
|
||||
}
|
||||
|
||||
}
|
||||
const carryForwardResult =
|
||||
contextReport && this.deps.contextCandidateCarryforward
|
||||
? await this.deps.contextCandidateCarryforward.carryForward({
|
||||
runId: runRow.id,
|
||||
connectionId: job.connectionId,
|
||||
sourceKey: job.sourceKey,
|
||||
})
|
||||
: null;
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Confirm the branch trace event is gone**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
rg -n "shared_worktree_path_enabled|explicit_private_fallback|isSharedWorktreeFallbackEnabled|sharedWorktreeSourceKeys" packages/context/src/ingest/ingest-bundle.runner.ts
|
||||
```
|
||||
|
||||
Expected: FAIL with no matches.
|
||||
|
||||
---
|
||||
|
||||
### Task 4: Update runner tests for isolated-only execution
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/src/ingest/ingest-bundle.runner.test.ts`
|
||||
- Modify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
|
||||
- [ ] **Step 1: Remove the fallback setting from the broad runner test harness**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.test.ts`, replace the
|
||||
`settings` block in `buildRunner()` with:
|
||||
|
||||
```ts
|
||||
settings: {
|
||||
probeRowCount: 1,
|
||||
memoryIngestionModel: 'test-model',
|
||||
},
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Add no-op isolated patch support to the broad mock Git**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.test.ts`, replace the
|
||||
`scopedGit` object in `makeDeps()` with:
|
||||
|
||||
```ts
|
||||
const scopedGit = {
|
||||
revParseHead: vi.fn().mockResolvedValue('h'),
|
||||
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
|
||||
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
|
||||
resetHardTo: vi.fn(),
|
||||
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
|
||||
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
|
||||
await writeFile(patchPath, '', 'utf-8');
|
||||
}),
|
||||
applyPatchFile3WayIndex: vi.fn(),
|
||||
diffNameStatus: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Update the custom sequencer test Git mock**
|
||||
|
||||
In the test named
|
||||
`refuses to squash-merge when the session worktree has an in-progress sequencer op`,
|
||||
replace the `sessionGit` object with:
|
||||
|
||||
```ts
|
||||
const sessionGit = {
|
||||
revParseHead: vi.fn().mockResolvedValue('h'),
|
||||
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
|
||||
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
|
||||
resetHardTo: vi.fn(),
|
||||
assertWorktreeClean: vi.fn().mockRejectedValue(assertError),
|
||||
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
|
||||
await writeFile(patchPath, '', 'utf-8');
|
||||
}),
|
||||
applyPatchFile3WayIndex: vi.fn(),
|
||||
diffNameStatus: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Move the failed-WorkUnit integration regression to the isolated suite**
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.test.ts`, delete the test
|
||||
named `squash-merges only successful WUs into main when one WU fails sl_validate`.
|
||||
|
||||
In `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`,
|
||||
add this test near the other real-git isolated runner regressions:
|
||||
|
||||
```ts
|
||||
it('does not integrate failed isolated WorkUnit patches', async () => {
|
||||
const runtime = await makeRealGitRuntime();
|
||||
try {
|
||||
const { deps, adapter } = makeDeps(runtime, 'fake');
|
||||
adapter.chunk.mockResolvedValue({
|
||||
workUnits: [
|
||||
{ unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] },
|
||||
{ unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] },
|
||||
],
|
||||
});
|
||||
deps.diffSetService.compute = vi.fn().mockResolvedValue({
|
||||
added: ['good.raw', 'bad.raw'],
|
||||
modified: [],
|
||||
deleted: [],
|
||||
unchanged: [],
|
||||
});
|
||||
deps.slValidator.validateSingleSource = vi.fn(
|
||||
async (_validationDeps: unknown, _connectionId: string, sourceName: string) => ({
|
||||
errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [],
|
||||
warnings: [],
|
||||
}),
|
||||
) as never;
|
||||
|
||||
let currentSession: any = null;
|
||||
deps.toolsetFactory.createIngestWuToolset = vi.fn((toolSession: any) => {
|
||||
currentSession = toolSession;
|
||||
return { toRuntimeTools: vi.fn(() => ({})) };
|
||||
});
|
||||
deps.agentRunner.runLoop = vi.fn(async (params: any) => {
|
||||
if (params.telemetryTags.operationName !== 'ingest-bundle-wu') {
|
||||
return { stopReason: 'natural' };
|
||||
}
|
||||
const unitKey = params.telemetryTags.unitKey;
|
||||
const root = rootOfConfig(currentSession.configService, runtime.configDir);
|
||||
await mkdir(join(root, 'semantic-layer/warehouse'), { recursive: true });
|
||||
if (unitKey === 'wu-good') {
|
||||
await writeFile(join(root, 'semantic-layer/warehouse/good.yaml'), 'name: good\n', 'utf-8');
|
||||
addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'good');
|
||||
currentSession.actions.push({
|
||||
target: 'sl',
|
||||
type: 'created',
|
||||
key: 'good',
|
||||
detail: 'good source',
|
||||
targetConnectionId: 'warehouse',
|
||||
rawPaths: ['good.raw'],
|
||||
});
|
||||
await currentSession.gitService.commitFiles(
|
||||
['semantic-layer/warehouse/good.yaml'],
|
||||
'test: add good source',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
}
|
||||
if (unitKey === 'wu-bad') {
|
||||
await writeFile(join(root, 'semantic-layer/warehouse/bad.yaml'), 'name: bad\n', 'utf-8');
|
||||
addTouchedSlSource(currentSession.touchedSlSources, 'warehouse', 'bad');
|
||||
currentSession.actions.push({
|
||||
target: 'sl',
|
||||
type: 'created',
|
||||
key: 'bad',
|
||||
detail: 'bad source',
|
||||
targetConnectionId: 'warehouse',
|
||||
rawPaths: ['bad.raw'],
|
||||
});
|
||||
await currentSession.gitService.commitFiles(
|
||||
['semantic-layer/warehouse/bad.yaml'],
|
||||
'test: add bad source',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
}
|
||||
return { stopReason: 'natural' };
|
||||
}) as never;
|
||||
|
||||
const runner = new IngestBundleRunner(deps);
|
||||
await mockStageRawFiles(
|
||||
runner,
|
||||
runtime,
|
||||
[
|
||||
['good.raw', 'good-hash'],
|
||||
['bad.raw', 'bad-hash'],
|
||||
],
|
||||
'fake',
|
||||
);
|
||||
|
||||
const result = await runner.run({
|
||||
jobId: 'job-failed-wu-isolated',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload' },
|
||||
});
|
||||
|
||||
expect(result.failedWorkUnits).toEqual(['wu-bad']);
|
||||
await expect(readFile(join(runtime.configDir, 'semantic-layer/warehouse/good.yaml'), 'utf-8')).resolves.toContain(
|
||||
'good',
|
||||
);
|
||||
await expect(readFile(join(runtime.configDir, 'semantic-layer/warehouse/bad.yaml'), 'utf-8')).rejects.toThrow();
|
||||
|
||||
const reportCreate = vi.mocked(deps.reports.create).mock.calls.at(-1)?.[0];
|
||||
const reportBody = reportCreate?.body as { isolatedDiff?: { acceptedPatches?: number }; failedWorkUnits?: string[] };
|
||||
expect(reportBody.failedWorkUnits).toEqual(['wu-bad']);
|
||||
expect(reportBody.isolatedDiff).toMatchObject({ enabled: true, acceptedPatches: 1 });
|
||||
|
||||
const trace = await readFile(
|
||||
join(runtime.configDir, '.ktx/ingest-traces/job-failed-wu-isolated/trace.jsonl'),
|
||||
'utf-8',
|
||||
);
|
||||
expect(trace).toContain('work_unit_failed_before_patch');
|
||||
expect(trace).toContain('patch_accepted');
|
||||
expect(trace).not.toContain('shared_worktree_path_enabled');
|
||||
} finally {
|
||||
await rm(runtime.homeDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Run the updated focused runner tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
|
||||
src/ingest/local-bundle-runtime.test.ts \
|
||||
-t "does not support shared-worktree|does not integrate failed isolated|defaults local bundle ingest|unlisted direct-writing source"
|
||||
```
|
||||
|
||||
Expected: PASS. The traces contain `isolated_diff_enabled`, child worktree
|
||||
events, and no `shared_worktree_path_enabled`.
|
||||
|
||||
- [ ] **Step 6: Run the broad runner suite**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS. Broad runner coverage no longer depends on
|
||||
`sharedWorktreeSourceKeys`.
|
||||
|
||||
- [ ] **Step 7: Commit the runner removal**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/context/src/ingest/ports.ts \
|
||||
packages/context/src/ingest/local-bundle-runtime.ts \
|
||||
packages/context/src/ingest/local-bundle-runtime.test.ts \
|
||||
packages/context/src/ingest/ingest-bundle.runner.ts \
|
||||
packages/context/src/ingest/ingest-bundle.runner.test.ts \
|
||||
packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts \
|
||||
packages/context/src/ingest/isolated-diff/source-routing.ts \
|
||||
packages/context/src/ingest/isolated-diff/source-routing.test.ts
|
||||
git commit -m "refactor(ingest): remove shared worktree WorkUnit path"
|
||||
```
|
||||
|
||||
Expected: commit succeeds. The deleted routing files are included as deletions.
|
||||
|
||||
---
|
||||
|
||||
### Task 5: Remove shared-branch agent instructions
|
||||
|
||||
**Files:**
|
||||
- Modify: `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`
|
||||
- Modify: `packages/context/skills/ingest_triage/SKILL.md`
|
||||
- Test: `packages/context/src/ingest/ingest-prompts.test.ts`
|
||||
- Test: `packages/context/src/ingest/ingest-runtime-assets.test.ts`
|
||||
|
||||
- [ ] **Step 1: Update the WorkUnit role text**
|
||||
|
||||
In `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`, replace
|
||||
the `<role>` block with:
|
||||
|
||||
```md
|
||||
<role>
|
||||
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit
|
||||
gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs,
|
||||
Metabase card JSONs, Notion pages, or similar) and you must translate that
|
||||
slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass.
|
||||
You run in an isolated WorkUnit worktree. Deterministic projection output,
|
||||
existing project memory, and listed dependency paths are visible; sibling
|
||||
WorkUnit edits from this same job are not visible until the runner integrates
|
||||
accepted patches.
|
||||
</role>
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Update the WorkUnit workflow text**
|
||||
|
||||
In the same prompt, replace workflow steps 2 and 4 with:
|
||||
|
||||
```md
|
||||
2. Load the per-source review skill first (for example `lookml_ingest`,
|
||||
`metricflow_ingest`, or `dbt_ingest`), then `sl_capture` and
|
||||
`wiki_capture`, and `ingest_triage` last. The triage skill tells you how to
|
||||
react when existing project memory, deterministic projection output, or
|
||||
prior provenance overlaps with what this WorkUnit is about to write.
|
||||
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large
|
||||
files) to load content. Before writing a new SL source or wiki page, call
|
||||
`discover_data` for each candidate source, table, metric, or topic name to
|
||||
find existing wiki pages, SL sources, deterministic projection output, prior
|
||||
sync artifacts, and raw warehouse matches; apply `ingest_triage` when you hit
|
||||
one, and apply any matching canonical pin before deciding whether to edit,
|
||||
rename, or skip.
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Update the WorkUnit do-not rule**
|
||||
|
||||
In the same prompt, replace:
|
||||
|
||||
```md
|
||||
- Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`.
|
||||
```
|
||||
|
||||
with:
|
||||
|
||||
```md
|
||||
- Do not silently accept a name collision with visible existing memory,
|
||||
deterministic projection output, or prior provenance when the formula differs.
|
||||
Trigger `ingest_triage`.
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Update ingest triage caller guidance**
|
||||
|
||||
In `packages/context/skills/ingest_triage/SKILL.md`, replace:
|
||||
|
||||
```md
|
||||
This skill is loaded in two contexts:
|
||||
- By a Stage 3 WorkUnit agent when `sl_discover` reveals that a prior WU (or a prior sync) already wrote something that overlaps with what the current WU is about to write.
|
||||
- By the Stage 4 reconciliation agent for cross-WU sweeps and for eviction decisions.
|
||||
```
|
||||
|
||||
with:
|
||||
|
||||
```md
|
||||
This skill is loaded in two contexts:
|
||||
- By a Stage 3 WorkUnit agent when `sl_discover`, deterministic projection
|
||||
output, existing project memory, or prior provenance overlaps with what the
|
||||
current WorkUnit is about to write.
|
||||
- By the Stage 4 reconciliation agent for cross-WorkUnit sweeps, accepted patch
|
||||
overlap, and eviction decisions.
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Update same-ingest wording in ingest triage**
|
||||
|
||||
In `packages/context/skills/ingest_triage/SKILL.md`, replace:
|
||||
|
||||
```md
|
||||
4. **If there's no prior-sync row (both are from THIS job), check for same-ingest contradictions:**
|
||||
```
|
||||
|
||||
with:
|
||||
|
||||
```md
|
||||
4. **If reconciliation sees accepted patches from this same job with no
|
||||
prior-sync row, check for same-ingest contradictions:**
|
||||
```
|
||||
|
||||
- [ ] **Step 6: Search for stale shared-state prompt language**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
rg -n "prior WU|prior-WU|Prior WorkUnits|same job may have already written|visible on the working branch|shared_worktree_path_enabled|shared-worktree path reachable" packages/context/prompts packages/context/skills packages/context/src/ingest
|
||||
```
|
||||
|
||||
Expected: FAIL with no matches.
|
||||
|
||||
- [ ] **Step 7: Run prompt asset tests**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/ingest-prompts.test.ts \
|
||||
src/ingest/ingest-runtime-assets.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS. Prompt assets still load from packaged KTX assets.
|
||||
|
||||
- [ ] **Step 8: Commit the prompt cleanup**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git add \
|
||||
packages/context/prompts/memory_agent_bundle_ingest_work_unit.md \
|
||||
packages/context/skills/ingest_triage/SKILL.md
|
||||
git commit -m "docs(ingest): align WorkUnit prompts with isolated diffs"
|
||||
```
|
||||
|
||||
Expected: commit succeeds.
|
||||
|
||||
---
|
||||
|
||||
### Task 6: Final verification
|
||||
|
||||
**Files:**
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.ts`
|
||||
- Verify: `packages/context/src/ingest/ports.ts`
|
||||
- Verify: `packages/context/src/ingest/local-bundle-runtime.ts`
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.test.ts`
|
||||
- Verify: `packages/context/src/ingest/ingest-bundle.runner.isolated-diff.test.ts`
|
||||
- Verify: `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`
|
||||
- Verify: `packages/context/skills/ingest_triage/SKILL.md`
|
||||
|
||||
- [ ] **Step 1: Run the isolated-diff focused suite**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context exec vitest run \
|
||||
src/ingest/ingest-trace.test.ts \
|
||||
src/ingest/wiki-body-refs.test.ts \
|
||||
src/ingest/artifact-gates.test.ts \
|
||||
src/ingest/semantic-layer-target-policy.test.ts \
|
||||
src/ingest/isolated-diff/git-patch.test.ts \
|
||||
src/ingest/isolated-diff/work-unit-executor.test.ts \
|
||||
src/ingest/isolated-diff/patch-integrator.test.ts \
|
||||
src/ingest/isolated-diff/textual-conflict-resolver.test.ts \
|
||||
src/ingest/final-gate-repair.test.ts \
|
||||
src/ingest/report-snapshot.test.ts \
|
||||
src/ingest/ingest-bundle.runner.isolated-diff.test.ts
|
||||
```
|
||||
|
||||
Expected: PASS. The output includes the isolated-diff runner tests and no
|
||||
`source-routing.test.ts`.
|
||||
|
||||
- [ ] **Step 2: Run the full context test suite**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run test
|
||||
```
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Run context type-check**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm --filter @ktx/context run type-check
|
||||
```
|
||||
|
||||
Expected: PASS. There are no `sharedWorktreeSourceKeys` type errors because the
|
||||
setting no longer exists.
|
||||
|
||||
- [ ] **Step 4: Run dead-code checks**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
pnpm run dead-code
|
||||
```
|
||||
|
||||
Expected: PASS. Knip does not report deleted source-routing exports, and Biome
|
||||
does not report stale imports.
|
||||
|
||||
- [ ] **Step 5: Search for removed legacy path names**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
rg -n "sharedWorktreeSourceKeys|defaultSharedWorktreeSourceKeys|isSharedWorktreeFallbackSourceKey|shared_worktree_path_enabled|explicit_private_fallback|source-routing" packages docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-shared-worktree-removal.md
|
||||
```
|
||||
|
||||
Expected: matches only in this plan file. There must be no matches under
|
||||
`packages/`.
|
||||
|
||||
- [ ] **Step 6: Confirm docs-site does not need an update**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
rg -n "sharedWorktree|isolatedDiffSourceKeys|sharedWorktreeSourceKeys|executionMode|planningStrategy|conflictPolicy" docs-site README.md packages/*/README.md
|
||||
```
|
||||
|
||||
Expected: either no matches or matches unrelated to a public user-facing knob.
|
||||
This change removes an internal runner fallback and does not add, remove, or
|
||||
rename public CLI behavior, configuration, or docs-site content.
|
||||
|
||||
- [ ] **Step 7: Commit final verification notes if files changed**
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
git status --short
|
||||
```
|
||||
|
||||
Expected: clean after the two implementation commits. If this command reports
|
||||
new changes, stop and inspect them before finishing; final verification should
|
||||
not create extra source changes.
|
||||
|
||||
## Self-review
|
||||
|
||||
Spec coverage:
|
||||
|
||||
- Rollout step 11 is covered by Tasks 1 through 4: the private fallback setting,
|
||||
helper module, old runner branch, trace event, and fallback tests are deleted.
|
||||
- The isolated-diff WorkUnit flow remains covered by existing real-git tests and
|
||||
the new failed-WorkUnit regression in Task 4.
|
||||
- Agent-facing instructions are aligned with the spec's worktree invariant in
|
||||
Task 5: sibling WorkUnit edits are not visible inside a child worktree.
|
||||
- Override ingestion remains outside the WorkUnit execution branch and still
|
||||
uses prior report materialization plus serial reconciliation.
|
||||
|
||||
Placeholder scan:
|
||||
|
||||
- This plan contains exact file paths, test names, replacement snippets,
|
||||
commands, and expected results.
|
||||
- There are no deferred implementation markers or unspecified edge-case
|
||||
instructions.
|
||||
|
||||
Type consistency:
|
||||
|
||||
- `IngestSettingsPort` no longer includes `sharedWorktreeSourceKeys`.
|
||||
- `isolatedDiffEnabled` remains the runner's internal summary flag and is
|
||||
equivalent to `!overrideReport`.
|
||||
- The removed trace event is `shared_worktree_path_enabled`; retained isolated
|
||||
events include `isolated_diff_enabled`, `work_unit_child_created`, and
|
||||
`work_unit_patch_collected`.
|
||||
|
||||
Execution handoff:
|
||||
|
||||
Plan complete and saved to
|
||||
`docs/superpowers/plans/2026-05-18-isolated-diff-ingestion-v1-shared-worktree-removal.md`.
|
||||
|
||||
Two execution options:
|
||||
|
||||
1. **Subagent-Driven (recommended)** - Dispatch a fresh subagent per task,
|
||||
review between tasks, and keep iteration fast.
|
||||
2. **Inline Execution** - Execute tasks in this session using
|
||||
`superpowers:executing-plans`, with batch execution and checkpoints.
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,612 @@
|
|||
# Isolated-diff ingestion design
|
||||
|
||||
**Date:** 2026-05-17
|
||||
**Author:** Andrey Avtomonov
|
||||
**Status:** Design - pending implementation plan
|
||||
|
||||
## Background
|
||||
|
||||
KTX ingests third-party context sources into durable project memory: raw source
|
||||
snapshots, wiki pages, semantic-layer sources, evidence documents, candidates,
|
||||
and fallback records. The current bundle runner stages raw source data in one
|
||||
ingestion session worktree, then runs work units against that same mutable
|
||||
worktree.
|
||||
|
||||
A Metabase ingestion run exposed the failure mode this design addresses. One
|
||||
work unit inferred and wrote the semantic-layer measure
|
||||
`mart_account_segments.total_contract_arr_cents`, a later work unit overwrote
|
||||
the same source with `total_contract_arr`, and the generated wiki page kept
|
||||
referencing the stale non-existent measure. The local per-work-unit checks did
|
||||
not catch the final cross-artifact inconsistency because durable writes were
|
||||
accepted into shared state before final integration.
|
||||
|
||||
The fix is not a Metabase-only validation patch. The same class of risk exists
|
||||
any time LLM-authored work units mutate durable wiki or semantic-layer files:
|
||||
Metabase cards, Notion pages and clusters, dbt YAML, MetricFlow YAML, Looker
|
||||
dashboards and explores, and LookML models and views can all produce overlapping
|
||||
or contested memory artifacts. KTX needs one ingestion execution model that
|
||||
isolates agent-authored changes, integrates them deliberately, and validates
|
||||
the final project state globally.
|
||||
|
||||
## Goals
|
||||
|
||||
This design creates one opinionated ingestion algorithm for all context sources.
|
||||
Connector-specific code stays responsible for source-shaped work: fetching raw
|
||||
data, normalizing raw files, planning work units, and optionally projecting
|
||||
deterministic facts. The shared runner owns execution correctness.
|
||||
|
||||
The design has these goals:
|
||||
|
||||
- Run all agent-authored durable writes in isolated per-work-unit worktrees.
|
||||
- Treat each work unit's git diff as its proposal artifact.
|
||||
- Integrate accepted diffs through a shared artifact-aware merge path.
|
||||
- Resolve expected cross-work-unit overlap with bounded agent repair before
|
||||
failing the run.
|
||||
- Run final global semantic gates before any changes reach the main project
|
||||
worktree.
|
||||
- Keep connector variance minimal and source-shaped, not pipeline-shaped.
|
||||
- Avoid proposal manifests, typed candidates, and extra reporting entities for
|
||||
the first implementation.
|
||||
- Preserve deterministic projections for source systems with authoritative
|
||||
structured metadata.
|
||||
|
||||
## Non-goals
|
||||
|
||||
This design does not change the wiki frontmatter schema, wiki page file layout,
|
||||
the semantic-layer YAML format, or the raw source snapshot layouts. It does add
|
||||
a narrow author-facing inline-code grammar for explicit wiki body references to
|
||||
semantic-layer entities and raw tables, because body text is part of the
|
||||
stale-reference failure class. It also does not remove source adapters' current
|
||||
fetch and chunk logic in one large rewrite.
|
||||
|
||||
This design does not introduce public connector knobs such as
|
||||
`executionMode`, `planningStrategy`, or `conflictPolicy`. The core runner
|
||||
becomes more opinionated instead.
|
||||
|
||||
This design does not require all connectors to stop using candidates. Candidate
|
||||
storage remains valid for flows that intentionally defer wiki curation. The
|
||||
isolation model applies when a work unit writes durable project files.
|
||||
|
||||
## Locked design direction
|
||||
|
||||
The ingestion runner uses one flow for every source that can produce durable
|
||||
changes.
|
||||
|
||||
```text
|
||||
fetch raw
|
||||
-> optional deterministic project
|
||||
-> adapter plans WorkUnit[]
|
||||
-> isolated WU diffs
|
||||
-> artifact-aware integration
|
||||
-> global semantic gates
|
||||
-> squash
|
||||
```
|
||||
|
||||
The important invariant is that the core runner does not know why a work unit
|
||||
exists. A dbt adapter may plan by model, Notion may plan by page or cluster,
|
||||
MetricFlow may plan by graph component, and Looker may plan by dashboard or
|
||||
explore. Those differences describe the source system. They are not ingestion
|
||||
execution modes.
|
||||
|
||||
## Architecture
|
||||
|
||||
The design splits ingestion into two layers with explicit responsibility
|
||||
boundaries.
|
||||
|
||||
### Source adapter layer
|
||||
|
||||
The adapter owns source semantics. It fetches raw evidence, normalizes that
|
||||
evidence into staged files, and plans work units from the staged snapshot and
|
||||
diff scope.
|
||||
|
||||
The adapter may also provide deterministic projectors. A projector is code that
|
||||
converts authoritative source facts into KTX artifacts without an agent. Good
|
||||
examples are live database schema introspection and straightforward MetricFlow
|
||||
semantic-model import.
|
||||
|
||||
The isolation-relevant adapter surface remains small:
|
||||
|
||||
```ts
|
||||
interface SourceAdapter {
|
||||
source: string;
|
||||
skillNames: string[];
|
||||
|
||||
fetch?(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void>;
|
||||
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult>;
|
||||
|
||||
project?(ctx: DeterministicProjectionContext): Promise<ProjectionResult>;
|
||||
resolveSlTargets?(ctx: SlTargetResolutionContext): Promise<string[]>;
|
||||
}
|
||||
```
|
||||
|
||||
This is the subset the isolated-diff runner needs to understand source-shaped
|
||||
planning and deterministic projection. It is not a proposal to delete existing
|
||||
`SourceAdapter` fields. Existing lifecycle and source-support fields such as
|
||||
`detect`, `readFetchReport`, `listTargetConnectionIds`, `clusterWorkUnits`,
|
||||
`describeScope`, `onPullSucceeded`, `evidenceIndexing`, `triageSupported`,
|
||||
`getTriageSignals`, and `reconcileSkillNames` stay part of the adapter contract
|
||||
until a separate cleanup intentionally removes them with migration impact
|
||||
called out.
|
||||
|
||||
`chunk()` returns ordinary `WorkUnit[]`. The runner does not need a
|
||||
`planningStrategy` enum because the source adapter can plan by any domain shape
|
||||
that makes sense.
|
||||
|
||||
### Ingestion execution layer
|
||||
|
||||
The runner owns correctness, isolation, and integration. After `WorkUnit[]`
|
||||
exists, all connectors follow the same execution path.
|
||||
|
||||
The runner is responsible for:
|
||||
|
||||
- creating the ingestion integration worktree from the project base commit;
|
||||
- committing deterministic projection in the integration worktree before child
|
||||
worktree creation;
|
||||
- creating one child worktree per work unit from the post-projection ingestion
|
||||
base commit;
|
||||
- scoping tools to the work unit's raw files and allowed target connections;
|
||||
- running the agent loop inside the work unit worktree;
|
||||
- validating touched artifacts before accepting the work unit diff;
|
||||
- collecting the work unit git diff;
|
||||
- applying accepted diffs into the integration worktree;
|
||||
- resolving textual and artifact-level conflicts;
|
||||
- running final global gates; and
|
||||
- squashing the integration worktree back to the project main worktree.
|
||||
|
||||
## Worktree model
|
||||
|
||||
The design uses three levels of git state.
|
||||
|
||||
```text
|
||||
project main worktree
|
||||
ingest integration worktree
|
||||
per-work-unit worktree(s)
|
||||
```
|
||||
|
||||
The project main worktree is the durable KTX project state. The ingestion
|
||||
integration worktree stages raw snapshots, deterministic projections, accepted
|
||||
work-unit diffs, reconciliation changes, and final gate repairs before one
|
||||
squash merge back to main.
|
||||
|
||||
Deterministic projection runs first in the integration worktree, after the raw
|
||||
snapshot is staged and before any per-work-unit worktree is created. The runner
|
||||
commits those projector changes as a single projection commit. The integration
|
||||
worktree's post-projection HEAD is the ingestion base commit referenced in this
|
||||
design. If the adapter has no projector, the raw-snapshot commit is the
|
||||
ingestion base commit.
|
||||
|
||||
Each per-work-unit worktree starts from the same ingestion base commit. A work
|
||||
unit never observes another concurrent work unit's transient edits. This makes
|
||||
the work unit diff a clean proposal against a stable base. Work units observe
|
||||
deterministic projection outputs, including through `dependencyPaths` context,
|
||||
and do not re-derive authoritative projected facts.
|
||||
|
||||
The integration worktree and each per-work-unit worktree must share one Git
|
||||
object database, created through `git worktree add` from the same repository.
|
||||
This is required so `git apply --3way` can resolve the base blobs recorded in
|
||||
each work-unit patch during integration.
|
||||
|
||||
The runner creates and runs child worktrees under the existing
|
||||
`workUnitMaxConcurrency` setting. A run may have many planned work units, but no
|
||||
more than that bound may be active or left on disk at once. The default remains
|
||||
serial execution. Child worktrees must be cleaned up after the diff, transcript,
|
||||
and outcome metadata are persisted, including failure paths. Adapters with
|
||||
large fan-out, such as Notion, may use `clusterWorkUnits` before execution to
|
||||
keep work-unit count tractable, but clustering remains source-shaped planning
|
||||
rather than a separate execution mode.
|
||||
|
||||
## Work-unit lifecycle
|
||||
|
||||
Each work unit follows a fixed lifecycle.
|
||||
|
||||
1. Create a child worktree at the ingestion base commit.
|
||||
2. Build a scoped tool session for the child worktree.
|
||||
3. Run the source skill and agent loop.
|
||||
4. Run work-unit-local gates against touched artifacts.
|
||||
5. If gates pass, record `git diff --binary` from base to child HEAD.
|
||||
6. If gates fail, mark the work unit failed and discard the child worktree.
|
||||
7. Clean up the child worktree after the diff and transcript are persisted.
|
||||
|
||||
The work unit outcome stores the existing operational metadata KTX already
|
||||
records: unit key, status, actions, touched semantic-layer sources, failure
|
||||
reason, raw files, and transcript path. It does not add a proposal manifest.
|
||||
The diff is the proposal.
|
||||
|
||||
For `slDisallowed` work units, isolation is defense in depth. The scoped
|
||||
work-unit tools must withhold semantic-layer write and edit tools, and the
|
||||
integration layer must reject any otherwise accepted diff from that work unit
|
||||
that touches `semantic-layer/**`. This catches buggy or bypassed tool behavior
|
||||
before an invalid LookML connection-mismatch write can reach the integration
|
||||
worktree.
|
||||
|
||||
### Diff proposal contract
|
||||
|
||||
The proposal artifact is a Git patch with binary-safe content, not the existing
|
||||
hash-based raw-source `DiffSet`.
|
||||
|
||||
The first implementation must use one pinned patch contract:
|
||||
|
||||
- collect `git diff --binary --no-renames <base>..HEAD`;
|
||||
- disable rename and copy detection so renames are represented as delete plus
|
||||
create in version one;
|
||||
- preserve mode changes from the patch metadata, but reject unexpected
|
||||
executable-mode or binary changes under known text artifact roots such as
|
||||
`wiki/**` and `semantic-layer/**`;
|
||||
- apply each accepted patch to the integration worktree with
|
||||
`git apply --3way --index`;
|
||||
- do not use `git apply --reject`, because partial hunk application is not an
|
||||
accepted integration state; and
|
||||
- if patch application fails, leaves conflicts, or touches a path disallowed for
|
||||
that work unit, roll back the integration worktree to its pre-apply HEAD and
|
||||
classify the outcome as a textual conflict.
|
||||
|
||||
Delete-versus-edit, recreate-versus-edit, and delete-versus-create races are
|
||||
therefore textual conflicts when Git cannot apply the patch cleanly. If Git
|
||||
applies the patch but known artifact validators reject the resulting tree, the
|
||||
outcome is a semantic conflict.
|
||||
|
||||
## Integration lifecycle
|
||||
|
||||
The integration worktree applies accepted work-unit diffs after local gates
|
||||
pass. The runner applies diffs in a deterministic order, using the original
|
||||
work-unit index unless a future implementation introduces explicit dependency
|
||||
ordering.
|
||||
|
||||
Integration has three conflict classes:
|
||||
|
||||
- Clean patch application: the diff applies without conflict.
|
||||
- Textual conflict: git cannot apply the patch cleanly.
|
||||
- Semantic conflict: the patch applies textually but creates an invalid or
|
||||
inconsistent artifact.
|
||||
|
||||
Textual conflicts are resolved before semantic gates run when a bounded
|
||||
resolver agent can produce a valid result. Overlapping work-unit writes are
|
||||
normal, especially for Metabase cards that target the same semantic-layer marts
|
||||
from different collections. The runner must treat overlap as an integration
|
||||
case, not as a reason to fail immediately.
|
||||
|
||||
Version one is agent-first. If `git apply --3way --index` leaves conflicts,
|
||||
the runner starts a resolver agent in the integration worktree. The resolver
|
||||
receives only the failed patch, already-applied patches, conflicted files,
|
||||
relevant work-unit transcripts, raw evidence paths, and the final-gate rules.
|
||||
The resolver must preserve all non-conflicting accepted content, resolve
|
||||
duplicate or competing artifact entries from evidence, and edit only files
|
||||
touched by the failed patch or already-applied overlapping patches.
|
||||
|
||||
The runner then reruns artifact gates for the changed files and continues with
|
||||
the remaining patches if validation passes. Resolver attempts are capped to
|
||||
avoid an unbounded repair loop. A run fails only after the bounded resolver
|
||||
attempts cannot produce a valid integration tree.
|
||||
|
||||
Deterministic semantic merge is a later optimization, not a version-one
|
||||
requirement. After measuring resolver latency, cost, and failure modes, KTX can
|
||||
add merge helpers for common semantic-layer YAML cases, such as additive
|
||||
`measures`, `segments`, `columns`, `joins`, and `descriptions` updates keyed by
|
||||
their stable logical identifiers. Those helpers can replace agent calls for
|
||||
mechanical merges once the measured v1 behavior justifies the added complexity.
|
||||
|
||||
The integration worktree is preserved on failure with conflict markers or
|
||||
resolver edits, work-unit patches, transcripts, trace events, and the failure
|
||||
report. The runner never squashes a failed or partially repaired integration
|
||||
tree back to the project main worktree.
|
||||
|
||||
### Gate repair stage
|
||||
|
||||
The gate repair stage handles cases where patches apply cleanly but the
|
||||
combined tree fails final semantic or wiki gates. This is distinct from textual
|
||||
conflict resolution: the tree is textually valid, but the artifacts violate KTX
|
||||
contracts.
|
||||
|
||||
After each patch integration and after reconciliation, the runner runs final
|
||||
artifact gates for the affected scope. If gates fail, the runner classifies the
|
||||
errors before deciding whether to repair or fail.
|
||||
|
||||
Repairable gate errors include:
|
||||
|
||||
- stale wiki body references to renamed semantic-layer entities;
|
||||
- invalid `sl_refs` entries that point to entities instead of sources;
|
||||
- inline prose that accidentally uses explicit SL reference syntax;
|
||||
- duplicate measures, segments, or joins with equivalent definitions;
|
||||
- missing or stale wiki references created by accepted patches; and
|
||||
- join or source references that can be corrected from the composed manifest
|
||||
and work-unit evidence.
|
||||
|
||||
High-risk gate errors fail without automatic repair unless a later
|
||||
implementation adds a stronger evidence contract:
|
||||
|
||||
- two work units define the same measure with different business meaning;
|
||||
- a required warehouse table or column does not exist;
|
||||
- a SQL source fails execution and no obvious localized rewrite exists; or
|
||||
- the repair would require choosing between conflicting facts without evidence.
|
||||
|
||||
For repairable errors, the runner starts a gate repair agent with the exact
|
||||
gate errors, changed files, relevant work-unit transcripts, raw evidence paths,
|
||||
and final-gate rules. The agent may edit only the files involved in the gate
|
||||
failure. The runner reruns gates after each repair attempt and caps attempts to
|
||||
one or two passes per integration stage. If the tree still fails, the run stops
|
||||
with the final gate report and preserved integration worktree.
|
||||
|
||||
### Reconciliation in the new flow
|
||||
|
||||
Reconciliation remains a shared runner stage, but it runs as a serial
|
||||
integration-stage pass instead of a parallel work unit.
|
||||
|
||||
The runner applies all accepted work-unit diffs to the integration worktree,
|
||||
resolves textual conflicts that can be resolved, and then runs reconciliation in
|
||||
that integration worktree before final global gates and before squash.
|
||||
Reconciliation must see the integrated state because its job is to resolve
|
||||
cross-work-unit duplicates, evictions, fallbacks, and source-specific
|
||||
reconcile guidance.
|
||||
|
||||
Reconciliation runs exactly once per integration pass, serially against the
|
||||
integration worktree, after all accepted work-unit diffs have been applied and
|
||||
after textual conflicts are resolved. It never runs inside a child worktree and
|
||||
never overlaps with work-unit execution. This is the safety carve-out from the
|
||||
isolation goal: concurrent agent writes are the failure mode being avoided, and
|
||||
reconciliation is non-concurrent by construction.
|
||||
|
||||
Reconciliation is not allowed to mutate project main directly. Its changes are
|
||||
captured as a reconciliation diff against the pre-reconciliation integration
|
||||
HEAD and recorded in the existing stage/report metadata. Reconciliation gates
|
||||
validate the artifacts touched by the reconciliation diff plus any wiki page or
|
||||
semantic-layer source referenced by changed frontmatter or body references,
|
||||
using the same artifact-class validators as work-unit gates. Reconciliation may
|
||||
write only to target connections authorized by the adapter for the ingest run,
|
||||
but it is not subject to any single work unit's `slDisallowed` scope. The final
|
||||
global gates validate the combined tree after reconciliation. If reconciliation
|
||||
introduces an invalid wiki or semantic-layer reference, touches an unauthorized
|
||||
target, or records an unresolvable artifact conflict, the runner sends
|
||||
repairable failures through the gate repair stage and stops before squash only
|
||||
when bounded repair cannot produce a valid tree.
|
||||
|
||||
## Artifact-aware integration
|
||||
|
||||
KTX durable artifacts are structured enough that git-only merge is not a strong
|
||||
correctness boundary. Artifact-aware integration must parse and validate known
|
||||
file classes after diffs are applied.
|
||||
|
||||
The first implementation must cover these worktree file classes:
|
||||
|
||||
- semantic-layer source YAML;
|
||||
- wiki markdown frontmatter;
|
||||
- wiki body references to semantic-layer sources, measures, dimensions, and raw
|
||||
warehouse tables.
|
||||
|
||||
Unmapped fallback records are not worktree files in version one. They remain
|
||||
typed stage-index and report records emitted by `emit_unmapped_fallback`; the
|
||||
integration layer validates their raw paths and structured reason codes as
|
||||
report metadata, not as mergeable artifacts.
|
||||
|
||||
Provenance also stays out of the worktree in version one. The source of truth is
|
||||
the ingest provenance store and report body. Before inserting provenance rows,
|
||||
the global gate derives the planned rows from accepted work-unit actions,
|
||||
reconciliation actions, artifact-resolution records, and skipped raw files, then
|
||||
checks those rows against the integrated worktree and staged raw hashes. Moving
|
||||
provenance to on-disk files would be a separate schema migration, not part of
|
||||
this design.
|
||||
|
||||
Artifact-resolution records are the existing merged or subsumed reconciliation
|
||||
outputs emitted through `emit_artifact_resolution` as
|
||||
`ArtifactResolutionRecord` stage-index records. They are in-memory stage
|
||||
records, not worktree files, and they feed the provenance gate.
|
||||
|
||||
Artifact-aware integration starts with validation plus bounded agent repair.
|
||||
It does not need semantic-layer YAML merge helpers in version one. If two diffs
|
||||
contest the same source YAML or wiki page and bounded agent repair cannot prove
|
||||
correctness, the runner must stop rather than silently accepting stale
|
||||
references. Deterministic semantic merge helpers can be added after v1 metrics
|
||||
show which conflicts are frequent, mechanical, and worth optimizing.
|
||||
|
||||
## Global semantic gates
|
||||
|
||||
Final gates run after every accepted diff, deterministic projection, and
|
||||
reconciliation change has landed in the integration worktree. These gates are
|
||||
global because the final failure can emerge only after independent valid diffs
|
||||
combine.
|
||||
|
||||
The final gates must include:
|
||||
|
||||
- semantic-layer validation for touched and dependency sources;
|
||||
- wiki `wiki_refs` validation;
|
||||
- wiki frontmatter `sl_refs` validation, including source-level and
|
||||
measure-level references;
|
||||
- wiki body validation for explicit semantic-layer source, measure, dimension,
|
||||
and table references; and
|
||||
- provenance validation for raw paths referenced by new or changed artifacts
|
||||
before those rows are inserted into SQLite.
|
||||
|
||||
For semantic-layer validation, touched sources are sources changed by accepted
|
||||
work-unit diffs, deterministic projection, or reconciliation. Dependency sources
|
||||
are their direct declared-join neighbors in the composed semantic-layer graph,
|
||||
including sources they join to and sources that join to them. Version one runs
|
||||
the existing whole-connection structural checks and source-scoped checks with
|
||||
the touched-and-dependency source set; it does not expand dependency scope to a
|
||||
transitive SQL-projection closure.
|
||||
|
||||
The wiki body gate needs a narrow grammar so ordinary prose does not become a
|
||||
semantic-layer reference. In version one, an explicit body reference is one of
|
||||
these Markdown forms outside fenced code blocks:
|
||||
|
||||
- an inline code token in the form `source.entity`, where both parts are plain
|
||||
identifier tokens, `source` matches a visible semantic-layer source, and
|
||||
`entity` must match one of that source's measures, dimensions, or segments;
|
||||
- an inline code token in the form `connectionId/source.entity`, where
|
||||
`source.entity` follows the same plain-identifier rule and validates against
|
||||
that specific target connection;
|
||||
- an inline code token in the form `source:source_name`, which validates a
|
||||
source-level semantic-layer reference; or
|
||||
- an inline code token in the form `table:qualified_table_name`, which validates
|
||||
a raw warehouse table reference against the visible raw table/catalog sources.
|
||||
|
||||
The parser ignores unformatted prose, fenced SQL examples, wildcard patterns
|
||||
such as `mart_nrr_quarterly.*_arr_cents`, inline SQL predicates such as
|
||||
`users.is_internal = false`, and unprefixed single-token inline code. Two-part
|
||||
inline code that does not name a visible semantic-layer source is not treated
|
||||
as an SL entity reference; use the `table:` prefix for raw warehouse table
|
||||
references.
|
||||
|
||||
The `total_contract_arr_cents` incident is the regression case for this gate:
|
||||
the integrated tree must fail if a wiki page references
|
||||
`mart_account_segments.total_contract_arr_cents` as an inline-code body token
|
||||
while the final semantic-layer source defines only `total_contract_arr`.
|
||||
|
||||
## Deterministic projection
|
||||
|
||||
Some connectors have authoritative structured inputs that do not need an LLM to
|
||||
write KTX artifacts. Those connectors can provide deterministic projectors that
|
||||
run in the integration worktree.
|
||||
|
||||
Projection is different from work-unit execution:
|
||||
|
||||
- projectors are code, not agents;
|
||||
- projectors run against the integration worktree;
|
||||
- projectors produce ordinary durable file changes; and
|
||||
- projector outputs still pass final global gates.
|
||||
|
||||
The runner infers hybrid behavior from the adapter. If an adapter has both
|
||||
projectors and work units, it is hybrid. If it has only projectors, it is
|
||||
deterministic. If it has only work units, it uses isolated diffs. No public
|
||||
`executionMode` knob is needed.
|
||||
|
||||
## Connector migration notes
|
||||
|
||||
Each connector keeps its source-shaped planning logic. The migration changes
|
||||
where durable writes happen and how they are integrated.
|
||||
|
||||
### Metabase
|
||||
|
||||
Metabase must move first because it produced the observed stale-measure wiki
|
||||
reference. Collection and card chunking can remain adapter-specific, but direct
|
||||
wiki and semantic-layer writes must happen in per-work-unit worktrees.
|
||||
|
||||
The regression test must reproduce two work units that touch
|
||||
`mart_account_segments`: one writes a wiki reference to an inferred measure and
|
||||
another leaves the final source with a different measure name. The final global
|
||||
gate must reject the integrated tree.
|
||||
|
||||
### dbt
|
||||
|
||||
dbt uses source-shaped planning by model or schema file. Deterministic
|
||||
projection is appropriate for straightforward model, source, column, and
|
||||
description facts when dbt artifacts are authoritative. Agent work units remain
|
||||
useful for business wiki synthesis, ambiguous relationship interpretation, and
|
||||
enrichment that is not directly represented in dbt YAML.
|
||||
|
||||
### MetricFlow
|
||||
|
||||
MetricFlow uses source-shaped planning by graph component. Existing
|
||||
deterministic semantic-model import code becomes a projector in the ingestion
|
||||
flow. Agent work units handle unsupported constructs, cross-model explanations,
|
||||
and wiki synthesis.
|
||||
|
||||
### Looker
|
||||
|
||||
Looker already defers some dashboard and look knowledge through candidates.
|
||||
That can continue. Any direct semantic-layer writes from explores or query
|
||||
translation must run through isolated work-unit diffs.
|
||||
|
||||
Looker-specific API and file-adapter collisions remain connector domain logic,
|
||||
but final correctness still belongs to the shared integration gates.
|
||||
|
||||
### LookML
|
||||
|
||||
LookML already has useful source-shaped ownership rules: models, views, orphan
|
||||
views, dashboards, and connection-mismatch guards. Those rules stay in the
|
||||
adapter. Direct semantic-layer writes move into isolated work-unit diffs.
|
||||
|
||||
Connection-mismatch work units can keep their existing write restrictions. The
|
||||
runner enforces those restrictions through scoped tools and target connection
|
||||
resolution.
|
||||
|
||||
### Notion
|
||||
|
||||
Notion pages and clusters can create overlapping durable wiki knowledge and can
|
||||
write semantic-layer overlays after warehouse verification. Notion therefore
|
||||
uses the same isolated-diff execution model for direct durable writes.
|
||||
|
||||
Large Notion workspaces still need source-shaped clustering to control context
|
||||
size and cost. Clustering remains adapter logic; correctness comes from isolated
|
||||
diffs and final global gates.
|
||||
|
||||
## Minimal connector variance
|
||||
|
||||
New connectors must not choose from a menu of ingestion architectures. They
|
||||
must provide the small amount of source-specific behavior the shared runner
|
||||
needs.
|
||||
|
||||
Every connector answers these questions:
|
||||
|
||||
- How does KTX fetch or receive raw evidence?
|
||||
- How does KTX normalize that evidence into staged files?
|
||||
- How does KTX split the staged evidence into `WorkUnit[]`?
|
||||
- Are any source facts authoritative enough for deterministic projection?
|
||||
- Which target semantic-layer connections can the connector write to?
|
||||
|
||||
Everything else is shared runner behavior.
|
||||
|
||||
## Regression tests
|
||||
|
||||
The implementation plan must start with narrow tests that prove the new
|
||||
execution model prevents the known failure class.
|
||||
|
||||
The first test creates a fake or Metabase-like adapter with two work units
|
||||
starting from the same base:
|
||||
|
||||
1. Work unit A writes a wiki page that references
|
||||
`mart_account_segments.total_contract_arr_cents` as an inline-code body
|
||||
token.
|
||||
2. Work unit B writes or overwrites the final semantic-layer source with only
|
||||
`total_contract_arr`.
|
||||
3. Both work units pass their local gates in isolation.
|
||||
4. Integration applies both diffs.
|
||||
5. The final global gate fails the run before squash.
|
||||
|
||||
Additional tests cover:
|
||||
|
||||
- two work units editing different wiki pages without conflict;
|
||||
- two work units editing the same semantic-layer overlay with additive changes,
|
||||
where the resolver agent preserves both changes and gates the repaired file;
|
||||
- two work units editing the same semantic-layer overlay with incompatible
|
||||
definitions, where the resolver agent receives the conflict context and the
|
||||
run fails only after bounded repair attempts cannot prove a result;
|
||||
- a textual conflict in a wiki page where the resolver agent preserves
|
||||
non-conflicting accepted content and gates the repaired page before squash;
|
||||
- a cleanly merged tree that fails final gates, where the gate repair agent
|
||||
fixes a stale wiki reference and the run continues;
|
||||
- an unrepairable final-gate failure, such as a missing warehouse column, where
|
||||
the runner stops with a preserved integration worktree and report;
|
||||
- a hybrid adapter case where deterministic projector outputs are visible in a
|
||||
child worktree before work-unit wiki synthesis, and the final global gate
|
||||
catches any stale reference to a non-existent projected semantic-layer entity;
|
||||
- Notion-style direct wiki writes with invalid `sl_refs`; and
|
||||
- LookML-style `slDisallowed` work units where write tools are unavailable and
|
||||
integration rejects any diff that still touches `semantic-layer/**`.
|
||||
|
||||
## Rollout
|
||||
|
||||
The rollout must be incremental because the current runner is shared by all
|
||||
adapters.
|
||||
|
||||
The rollout switch is runner-owned. During migration it may be a private
|
||||
per-source allowlist, or an internal `IngestSettingsPort` map keyed by
|
||||
`sourceKey`, but it must not become a `SourceAdapter` field or public connector
|
||||
configuration knob.
|
||||
|
||||
1. Add the per-work-unit worktree executor behind that internal runner setting.
|
||||
2. Add diff collection and deterministic integration in the existing runner.
|
||||
3. Add bounded resolver-agent handling for textual conflicts.
|
||||
4. Add final global wiki and semantic-layer reference gates, including the wiki
|
||||
body reference parser defined above.
|
||||
5. Add bounded gate-repair-agent handling for repairable final-gate failures.
|
||||
6. Instrument resolver latency, attempts, repaired files, and failure classes.
|
||||
7. Migrate Metabase to the new execution path first.
|
||||
8. Migrate Notion, LookML, Looker, dbt, and MetricFlow.
|
||||
9. Add deterministic semantic merge helpers only after v1 metrics show which
|
||||
agent repairs are frequent and mechanical enough to justify optimization.
|
||||
10. Promote the new path to the default after the Metabase regression test and
|
||||
at least one non-Metabase connector pass.
|
||||
11. Remove the old shared-worktree work-unit execution path.
|
||||
|
||||
The rollout is complete when every connector that permits agent-authored durable
|
||||
writes uses isolated diffs and all integrations pass the same final global
|
||||
gates.
|
||||
|
|
@ -0,0 +1,443 @@
|
|||
# Adapter-owned ingest finalization design
|
||||
|
||||
**Date:** 2026-05-18
|
||||
**Author:** Andrey Avtomonov
|
||||
**Status:** Design - pending implementation plan
|
||||
|
||||
## Background
|
||||
|
||||
The isolated-diff ingestion migration made KTX's shared bundle runner
|
||||
responsible for one durable execution model: stage raw source data, run
|
||||
source-planned work units in isolated child worktrees, integrate their diffs,
|
||||
reconcile, run final gates, and squash the accepted integration tree back into
|
||||
the project worktree.
|
||||
|
||||
That direction is correct, but the current code still has a runner-level
|
||||
post-processing extension point. `IngestBundleRunnerDeps.postProcessors` maps a
|
||||
source key to an arbitrary `IngestBundlePostProcessorPort`, and local runtime
|
||||
wires `historic-sql` to `HistoricSqlProjectionPostProcessor`. That path can
|
||||
write durable semantic-layer and wiki artifacts after work-unit integration and
|
||||
reconciliation, outside the source adapter contract.
|
||||
|
||||
Historic SQL exposed why the extra path exists. Its table and pattern work units
|
||||
emit typed evidence, then a deterministic projection step merges the evidence
|
||||
into `_schema` usage and historic-SQL wiki pages. Some of that work is local to
|
||||
one work unit, but other behavior is whole-run maintenance: marking stale table
|
||||
usage, reusing existing pattern pages, and archiving old pattern pages. Those
|
||||
aggregate decisions do not fit cleanly inside independent per-work-unit writes.
|
||||
|
||||
The design goal is to preserve legitimate adapter-owned deterministic
|
||||
maintenance without keeping a generic runner-level escape hatch.
|
||||
|
||||
## Goals
|
||||
|
||||
This design tightens the isolated-diff architecture around a stable boundary:
|
||||
the generic runner owns execution mechanics, and adapters own source semantics.
|
||||
|
||||
The design has these goals:
|
||||
|
||||
- Remove runner-level `postProcessors` as an alternate durable-write pipeline.
|
||||
- Add a first-class `SourceAdapter.finalize?()` hook for deterministic
|
||||
post-work-unit source maintenance.
|
||||
- Keep `finalize?()` constrained, observable, and subject to the same final
|
||||
validation gates as work-unit and reconciliation changes.
|
||||
- Preserve historic-SQL aggregate projection behavior without treating it as a
|
||||
hidden fallback ingestion path.
|
||||
- Keep public execution knobs out of the adapter API.
|
||||
|
||||
## Non-goals
|
||||
|
||||
This design does not rework source-specific chunking, fetch formats, wiki page
|
||||
frontmatter, semantic-layer YAML, or raw source layouts. It does not replace
|
||||
agent-authored work units with deterministic projectors. It also does not add a
|
||||
public `executionMode`, `planningStrategy`, `conflictPolicy`, or source-key
|
||||
allowlist.
|
||||
|
||||
Override ingest remains a special correction operation that reuses a prior raw
|
||||
snapshot and forces reconciliation. It should be documented and tested as
|
||||
override replay, not as a fallback pipeline. This design does not require
|
||||
override ingest to run source work units.
|
||||
|
||||
## Locked design direction
|
||||
|
||||
The shared ingestion runner keeps one ordered pipeline for sources that can
|
||||
write durable project artifacts.
|
||||
|
||||
```text
|
||||
fetch raw
|
||||
-> adapter plans WorkUnit[]
|
||||
-> optional adapter project
|
||||
-> isolated WU diffs
|
||||
-> artifact-aware integration
|
||||
-> reconciliation
|
||||
-> optional adapter finalize
|
||||
-> runner wiki-SL-ref repair
|
||||
-> final target policy and artifact gates
|
||||
-> squash
|
||||
```
|
||||
|
||||
The exact implementation may continue to call `chunk()` before `project()` so a
|
||||
projector can consume `parseArtifacts`. The architectural invariant is that
|
||||
`project()` runs in the integration worktree before child worktrees start, while
|
||||
`finalize()` runs in the integration worktree after accepted work-unit and
|
||||
reconciliation changes are present.
|
||||
|
||||
Adapters decide what source-specific work belongs in `project()`, work units,
|
||||
or `finalize()`. The runner decides when those phases run, captures their git
|
||||
effects, enforces target scope, runs gates, writes traces and reports, and
|
||||
squashes the final tree.
|
||||
|
||||
## Adapter API
|
||||
|
||||
The source adapter contract should make deterministic source phases explicit.
|
||||
|
||||
```ts
|
||||
interface SourceAdapter {
|
||||
readonly source: string;
|
||||
readonly skillNames: string[];
|
||||
readonly reconcileSkillNames?: string[];
|
||||
readonly evidenceIndexing?: 'documents';
|
||||
readonly triageSupported?: boolean;
|
||||
|
||||
getTriageSignals?(stagedDir: string, externalId: string): Promise<TriageSignals>;
|
||||
detect(stagedDir: string): Promise<boolean>;
|
||||
fetch?(pullConfig: unknown, stagedDir: string, ctx: FetchContext): Promise<void>;
|
||||
readFetchReport?(stagedDir: string): Promise<SourceFetchReport | null>;
|
||||
listTargetConnectionIds?(stagedDir: string): Promise<string[]>;
|
||||
chunk(stagedDir: string, diffSet?: DiffSet): Promise<ChunkResult>;
|
||||
clusterWorkUnits?(ctx: ClusterWorkUnitsContext): Promise<WorkUnit[]>;
|
||||
project?(ctx: DeterministicProjectionContext): Promise<ProjectionResult>;
|
||||
finalize?(ctx: DeterministicFinalizationContext): Promise<FinalizationResult>;
|
||||
describeScope?(stagedDir: string): Promise<ScopeDescriptor>;
|
||||
onPullSucceeded?(ctx: PullSucceededContext): Promise<void>;
|
||||
}
|
||||
```
|
||||
|
||||
`finalize?()` is not a compatibility wrapper for old post-processors. It is a
|
||||
source-adapter method with a fixed location in the runner lifecycle.
|
||||
|
||||
```ts
|
||||
interface DeterministicFinalizationContext {
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
syncId: string;
|
||||
jobId: string;
|
||||
runId: string;
|
||||
stagedDir: string;
|
||||
workdir: string;
|
||||
parseArtifacts?: unknown;
|
||||
stageIndex: StageIndex;
|
||||
workUnitOutcomes: WorkUnitOutcome[];
|
||||
reconciliationActions: MemoryAction[];
|
||||
overrideReplay?: FinalizationOverrideReplay;
|
||||
}
|
||||
|
||||
interface FinalizationResult {
|
||||
warnings: string[];
|
||||
errors: string[];
|
||||
touchedSources: TouchedSlSource[];
|
||||
changedWikiPageKeys: string[];
|
||||
actions?: MemoryAction[];
|
||||
result?: unknown;
|
||||
}
|
||||
|
||||
interface FinalizationOverrideReplay {
|
||||
priorJobId: string;
|
||||
priorRunId: string;
|
||||
priorSyncId: string;
|
||||
evictionRawPaths: string[];
|
||||
}
|
||||
```
|
||||
|
||||
The implementation plan can adjust exact type names to match the existing
|
||||
module layout, but the contract must preserve these semantics:
|
||||
|
||||
- `finalize?()` is deterministic TypeScript code, not an agent loop.
|
||||
- It runs only in the ingestion integration worktree.
|
||||
- It may write ordinary durable project files.
|
||||
- It must report the semantic-layer sources and wiki page keys it believes it
|
||||
touched so the runner can verify that declaration against the worktree diff.
|
||||
- Outside override replay, `stageIndex` is the canonical runner index for
|
||||
accepted work-unit actions, touched sources, evictions, reconciliation records,
|
||||
and artifact resolutions visible to the current run.
|
||||
- In override replay, `stageIndex` is a prior-run replay index for work-unit
|
||||
facts. It may contain prior-run work-unit actions, touched sources, and
|
||||
artifact records, and adapters must not treat those entries as current-run
|
||||
evidence. The runner must not replay prior-report `evictionsApplied` as
|
||||
current-run eviction evidence. If override reconciliation records eviction
|
||||
decisions, those records are fresh current-run `stageIndex.evictionsApplied`
|
||||
entries.
|
||||
- `workUnitOutcomes` contains only work units executed in the current run. It
|
||||
is empty when override replay skips source work units.
|
||||
- `reconciliationActions` contains only accepted reconciliation writes emitted
|
||||
through the reconciliation tool session in the current run. These actions have
|
||||
already mutated the integration worktree.
|
||||
- `overrideReplay` being present is the canonical signal that source work units
|
||||
did not produce current-run evidence unless another context field explicitly
|
||||
carries fresh current-run deterministic input.
|
||||
- `overrideReplay.evictionRawPaths` contains the deleted raw paths loaded from
|
||||
the prior report's `evictionInputs` for the reused raw snapshot. It is the
|
||||
only override-replay raw-path allowlist for removed-from-snapshot provenance.
|
||||
It is not, by itself, proof that a particular durable artifact is stale or was
|
||||
observed by current-run work units.
|
||||
- `actions` in `FinalizationResult` are descriptive records for finalization
|
||||
writes that the adapter already performed. The runner must not re-apply them.
|
||||
When finalization actions are intended to create provenance rows, they must
|
||||
carry defensible `rawPaths`: current-snapshot paths from the current raw
|
||||
snapshot, removed-from-snapshot paths from current-run
|
||||
`stageIndex.evictionsApplied`, or removed-from-snapshot paths from
|
||||
`overrideReplay.evictionRawPaths` when override replay is present.
|
||||
Finalization actions without defensible raw-path attribution are still
|
||||
reported, but the runner must exclude them from provenance and surface that
|
||||
exclusion explicitly.
|
||||
- It cannot mutate the main project worktree directly.
|
||||
- The finalization context must not pass a root-scoped service that can bypass
|
||||
the integration worktree. `workdir` is the durable write boundary. If a future
|
||||
helper is added to the context, the contract must name it as worktree-scoped
|
||||
and state whether it is read-only or allowed to write.
|
||||
|
||||
The existing adapter API fields unrelated to deterministic projection and
|
||||
finalization remain part of the contract. Adding `finalize?()` must not remove
|
||||
triage or evidence-indexing support.
|
||||
|
||||
## Override replay
|
||||
|
||||
Override ingest remains a replay of a prior raw snapshot with forced
|
||||
reconciliation. It does not execute source work units or call `adapter.chunk()`
|
||||
in this design, so finalization must not silently assume fresh work-unit
|
||||
evidence exists.
|
||||
|
||||
The runner should still enter the finalization phase for adapters that
|
||||
implement `finalize?()`, but it must pass explicit override metadata. In that
|
||||
mode, `workUnitOutcomes` is empty, `parseArtifacts` is absent,
|
||||
`overrideReplay.evictionRawPaths` is populated from the prior report's
|
||||
`evictionInputs`, `stageIndex` comes from the prior report with prior
|
||||
`evictionsApplied` excluded, and `reconciliationActions` contains only new
|
||||
override reconciliation actions.
|
||||
|
||||
If a future implementation intentionally re-parses the materialized override
|
||||
raw snapshot, it must expose that fact through an explicit override-safe context
|
||||
field instead of relying on `parseArtifacts` alone. `parseArtifacts` by itself
|
||||
is never current work-unit evidence in override replay and never authorizes
|
||||
historic-SQL whole-run cleanup.
|
||||
|
||||
Adapters must treat missing current-run deterministic inputs as a no-op, not as
|
||||
negative evidence. For historic SQL, override replay must not mark tables stale,
|
||||
mark pattern pages stale, or archive pattern pages from an empty current-run
|
||||
evidence directory. Whole-run cleanup can run only when `overrideReplay` is
|
||||
absent and current-run work-unit evidence exists, or when a future explicit
|
||||
override-safe context field names equivalent facts. Any override-safe
|
||||
finalization must be derived from the materialized raw snapshot or explicit
|
||||
prior-report data. In particular, prior-run
|
||||
`stageIndex.workUnits[*].actions`, prior-run touched sources, and prior-run
|
||||
artifact records are not proof that the current override run observed or failed
|
||||
to observe those artifacts.
|
||||
|
||||
## Runner responsibilities
|
||||
|
||||
The runner owns all reusable mechanics around `finalize?()`.
|
||||
|
||||
After reconciliation completes, the runner calls `adapter.finalize?()` if it
|
||||
exists. The runner captures the pre-finalization commit, derives the
|
||||
finalization changed paths from the integration-worktree git diff, commits those
|
||||
changes, records the commit SHA and touched paths in the run trace/report,
|
||||
includes finalization actions in saved-memory counts, and runs wiki-SL-ref
|
||||
repair before final target-policy and artifact gates.
|
||||
|
||||
The integration-worktree diff is the source of truth for finalization touched
|
||||
paths, changed wiki page keys, and semantic-layer paths. The adapter's
|
||||
`touchedSources` and `changedWikiPageKeys` declaration is a verification input,
|
||||
not the downstream authority. The runner must derive the final repair and gate
|
||||
scope from the diff, cross-check the adapter declaration against that diff, and
|
||||
fail the run on under-reporting or over-reporting that would make wiki-SL-ref
|
||||
repair, target-policy checks, final gates, reports, traces, or provenance use a
|
||||
different artifact set from the actual finalization commit.
|
||||
|
||||
The runner-derived semantic-layer scope must include logical
|
||||
`TouchedSlSource` tuples, not only file paths. Standalone semantic-layer files
|
||||
under `semantic-layer/<connectionId>/<sourceName>.yaml` can map structurally to
|
||||
`{ connectionId, sourceName }`. Aggregate semantic-layer files, including
|
||||
`semantic-layer/<connectionId>/_schema/*.yaml`, must be resolved by comparing
|
||||
the pre-finalization and post-finalization materialized semantic-layer sources
|
||||
with the worktree-scoped semantic-layer parser/loader. Wiki page keys continue
|
||||
to map structurally from `wiki/global/<pageKey>.md`. If the runner cannot
|
||||
resolve a changed semantic-layer path to logical touched sources with its own
|
||||
resolver, the run must fail; it must not fall back to the adapter declaration as
|
||||
the downstream scope.
|
||||
|
||||
`wiki_sl_ref_repair` remains a runner mechanic, not an adapter method. It runs
|
||||
after finalization and before final gates, and it uses the normal target
|
||||
connection set plus the runner-derived finalization touched sources to decide
|
||||
which semantic-layer references are visible. Its writes are part of the same
|
||||
integration worktree diff as finalization/reconciliation, so target-policy
|
||||
checks, final artifact gates, reports, traces, and squash behavior cover those
|
||||
writes before changes reach the main project worktree.
|
||||
|
||||
The runner must treat finalization like deterministic projection and
|
||||
reconciliation, not like a free-form source-key plug-in. It must enforce the
|
||||
same target-connection policy used for work-unit and reconciliation changes.
|
||||
If finalization writes an unauthorized semantic-layer target, modifies artifacts
|
||||
outside the authorized target set, references a missing semantic-layer entity, or
|
||||
returns errors, the run fails before changes reach the main project worktree.
|
||||
|
||||
The runner should expose one trace phase named `finalization`. It should not
|
||||
keep a `post_processor` stage, `IngestBundlePostProcessorPort`,
|
||||
`deps.postProcessors`, or report fields that imply a parallel post-processor
|
||||
pipeline.
|
||||
|
||||
## Adapter application
|
||||
|
||||
Each adapter continues to use the same generic runner mechanics, while keeping
|
||||
source-specific choices inside the adapter.
|
||||
|
||||
- `metabase` fetches cards and dashboards, computes scope, plans
|
||||
card/dashboard work units, and usually does not need `project()` or
|
||||
`finalize()`.
|
||||
- `notion` fetches pages, extracts triage signals, clusters page work units,
|
||||
and usually does not need deterministic finalization.
|
||||
- `dbt` fetches the repository, parses dbt project metadata, plans model work
|
||||
units, and may later add `project()` if dbt YAML import becomes deterministic.
|
||||
- `lookml` fetches LookML, produces validation artifacts, plans model and
|
||||
explore work units, and may later add `project()` for deterministic LookML to
|
||||
semantic-layer import.
|
||||
- `looker` fetches runtime bundles, fetch reports, target connections, and
|
||||
triage signals. It continues to rely on work-unit diffs and shared gates.
|
||||
- `metricflow` is the current strong `project()` example. It imports
|
||||
authoritative semantic models before child worktrees start, then lets any
|
||||
work units observe those projected files.
|
||||
- `live-database` can remain work-unit based, but database schema introspection
|
||||
is a good future `project()` candidate because the schema is authoritative
|
||||
structured metadata.
|
||||
- `historic-sql` should move current post-processor behavior into the adapter.
|
||||
Local table-usage and pattern-page writes may move into work-unit tools where
|
||||
they are genuinely per-unit. Whole-run maintenance such as stale table usage,
|
||||
pattern-page reuse, and stale/archive page decisions belongs in
|
||||
`HistoricSqlSourceAdapter.finalize()`.
|
||||
- `fake` remains a test adapter and does not need deterministic phases.
|
||||
|
||||
## Historic-SQL migration
|
||||
|
||||
Historic SQL should stop using evidence-only tool output plus runner-level
|
||||
post-processing as its durable projection path.
|
||||
|
||||
The preferred migration is:
|
||||
|
||||
1. Keep historic-SQL work units responsible for source-shaped analysis.
|
||||
2. Use source-specific tools for per-unit durable writes when the output is
|
||||
local to that unit, such as a table's usage metadata or one pattern page.
|
||||
3. Move whole-run deterministic cleanup into
|
||||
`HistoricSqlSourceAdapter.finalize()`.
|
||||
4. Delete `HistoricSqlProjectionPostProcessor`, `IngestBundlePostProcessorPort`,
|
||||
`deps.postProcessors`, and `post_processor` memory-flow/report stages.
|
||||
|
||||
If the implementation keeps typed evidence as an internal handoff between
|
||||
historic-SQL work units and `finalize()`, that evidence must be framed as
|
||||
source-specific input to the adapter's deterministic finalization, not as a
|
||||
generic runner post-processing mechanism. The evidence files must not become a
|
||||
public compatibility surface.
|
||||
|
||||
Historic-SQL finalization must distinguish "no current-run evidence exists"
|
||||
from "the current snapshot proves this artifact is stale." Whole-run cleanup
|
||||
such as stale table usage, pattern-page staleness, and archive decisions can
|
||||
run only when finalization has current-run historic-SQL evidence or an explicit
|
||||
override-safe source of equivalent facts.
|
||||
|
||||
## Reports and observability
|
||||
|
||||
Reports should describe first-class pipeline phases, not historical extension
|
||||
points. The isolated-diff summary should include finalization metadata when the
|
||||
adapter implements `finalize?()`: whether it ran, finalization commit SHA,
|
||||
touched paths, touched semantic-layer sources, changed wiki page keys,
|
||||
warnings, descriptive finalization actions, and source-specific result payload.
|
||||
|
||||
Saved-memory counts should come from work-unit, reconciliation, and
|
||||
finalization memory actions plus touched artifact reporting. Finalization
|
||||
actions are reporting/provenance records for writes that already happened in
|
||||
the integration worktree; they are not a second write channel. There should be
|
||||
no special `postProcessorSavedMemoryCounts` or `postProcessor` report body.
|
||||
Memory-flow phases should use `finalization` instead of `post_processor`.
|
||||
|
||||
The runner owns provenance for finalization. Adapters return touched artifacts
|
||||
and optional descriptive actions, but they do not call the provenance port.
|
||||
When finalization actions include valid `rawPaths`, the runner folds them into
|
||||
the normal provenance plan using the current `sourceKey`, `syncId`, raw content
|
||||
hashes, artifact kind, artifact key, target connection, and action type. The
|
||||
finalization phase and commit SHA belong in trace/report metadata; they should
|
||||
not be fabricated inside adapter-written files.
|
||||
|
||||
Finalization reports must show both the adapter-declared touched artifacts and
|
||||
the runner-derived touched artifacts from the finalization git diff. When those
|
||||
sets differ, the report and trace must include the mismatch and the run must
|
||||
fail before wiki-SL-ref repair or final gates rely on the wrong scope. When a
|
||||
finalization action is excluded from provenance because no defensible raw path
|
||||
exists, the report must name the action and reason instead of silently dropping
|
||||
it.
|
||||
|
||||
Traces must make finalization useful for postmortems. At minimum, record
|
||||
`finalization_started`, `finalization_committed`, `finalization_skipped`, and
|
||||
`finalization_failed` events with source key, touched paths, warnings, and
|
||||
error summaries.
|
||||
|
||||
## Failure handling
|
||||
|
||||
Finalization failures are ingestion failures. If `finalize?()` returns errors,
|
||||
throws, writes unauthorized targets, or causes final gates to fail, the runner
|
||||
marks the run failed and leaves the main project worktree unchanged.
|
||||
|
||||
Finalization should run after reconciliation because it may need to inspect the
|
||||
accepted work-unit and reconciliation result. Final gates should run after
|
||||
finalization because finalization writes durable project artifacts.
|
||||
|
||||
Finalization must not be used to repair arbitrary integration conflicts or
|
||||
rerun agent work. Conflict repair remains part of artifact-aware integration and
|
||||
reconciliation.
|
||||
|
||||
Finalization must also preserve reconciliation and accepted work-unit writes
|
||||
from the same run. The runner must remember the paths changed before
|
||||
finalization and fail if `finalize?()` modifies the same path after
|
||||
reconciliation. If a source needs deterministic maintenance for an artifact
|
||||
created or edited by a work unit in the same run, that behavior belongs in the
|
||||
source-specific work-unit tool or in a later run, not in post-reconciliation
|
||||
finalization.
|
||||
|
||||
## Acceptance criteria
|
||||
|
||||
The implementation is complete when these conditions are true:
|
||||
|
||||
- No production runtime wiring references `deps.postProcessors`.
|
||||
- `IngestBundlePostProcessorPort` and `HistoricSqlProjectionPostProcessor` are
|
||||
removed from source exports and package export tests.
|
||||
- `SourceAdapter.finalize?()` exists with typed context and result objects.
|
||||
- The runner invokes `finalize?()` after reconciliation and before final gates.
|
||||
- Finalization changes are committed in the integration worktree and included
|
||||
in target-policy checks, final gates, reports, traces, and provenance inputs.
|
||||
- Override replay passes explicit override metadata to finalization, including
|
||||
`overrideReplay.evictionRawPaths`; leaves `workUnitOutcomes` empty when work
|
||||
units are skipped; omits `parseArtifacts` unless a future explicit
|
||||
override-safe input is added; and proves historic-SQL finalization does not
|
||||
use prior-run `stageIndex` records as current-run evidence or stale/archive
|
||||
artifacts from missing current-run evidence.
|
||||
- Finalization provenance uses current raw paths, current-run
|
||||
`stageIndex.evictionsApplied`, or `overrideReplay.evictionRawPaths`, and
|
||||
actions without defensible raw-path attribution are reported as excluded from
|
||||
provenance.
|
||||
- The runner derives finalization touched paths, wiki page keys, and
|
||||
semantic-layer scope from the integration-worktree git diff, resolves
|
||||
aggregate semantic-layer files such as `_schema/*.yaml` to logical touched
|
||||
sources with the runner's own semantic-layer parser/loader, cross-checks the
|
||||
adapter's touched-artifact declaration, and fails on mismatches or
|
||||
unresolvable changed semantic-layer paths.
|
||||
- The runner fails when finalization modifies a path already changed by accepted
|
||||
work-unit or reconciliation writes in the same run.
|
||||
- `wiki_sl_ref_repair` remains a runner-owned step after finalization and
|
||||
before final gates, consumes runner-derived finalization touched sources, and
|
||||
has its writes covered by target-policy checks and final gates.
|
||||
- Finalization `actions` are not re-applied by the runner; they are included
|
||||
only in reporting, saved-memory counts, and provenance planning when their
|
||||
raw-path attribution is valid.
|
||||
- Historic SQL uses adapter-owned finalization for whole-run projection
|
||||
maintenance.
|
||||
- Tests cover a successful finalization, a finalization failure, unauthorized
|
||||
finalization target rejection, override replay finalization behavior,
|
||||
wiki-SL-ref repair placement, and historic-SQL projection behavior without
|
||||
runner-level post-processors.
|
||||
|
|
@ -11,6 +11,7 @@
|
|||
},
|
||||
"scripts": {
|
||||
"artifacts:build": "node scripts/package-artifacts.mjs build",
|
||||
"artifacts:build-runtime": "node scripts/package-artifacts.mjs build-runtime",
|
||||
"artifacts:check": "node scripts/package-artifacts.mjs check",
|
||||
"artifacts:live-db-smoke": "node scripts/installed-live-database-smoke.mjs",
|
||||
"artifacts:verify": "node scripts/package-artifacts.mjs verify",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
import { cancel, confirm, isCancel, log, spinner } from '@clack/prompts';
|
||||
|
||||
const ESC = String.fromCharCode(0x1b);
|
||||
|
||||
export interface KtxCliSpinner {
|
||||
start(message: string): void;
|
||||
message(message: string): void;
|
||||
|
|
@ -7,6 +9,10 @@ export interface KtxCliSpinner {
|
|||
error(message: string): void;
|
||||
}
|
||||
|
||||
export interface KtxCliSpinnerIo {
|
||||
stderr: { write(chunk: string): void };
|
||||
}
|
||||
|
||||
export interface KtxCliPromptAdapter {
|
||||
confirm(options: { message: string; initialValue?: boolean }): Promise<boolean>;
|
||||
cancel(message: string): void;
|
||||
|
|
@ -31,6 +37,31 @@ export function createClackSpinner(): KtxCliSpinner {
|
|||
return spinner();
|
||||
}
|
||||
|
||||
function magenta(text: string): string {
|
||||
return `${ESC}[35m${text}${ESC}[39m`;
|
||||
}
|
||||
|
||||
function red(text: string): string {
|
||||
return `${ESC}[31m${text}${ESC}[39m`;
|
||||
}
|
||||
|
||||
export function createStaticCliSpinner(io: KtxCliSpinnerIo): KtxCliSpinner {
|
||||
return {
|
||||
start(message) {
|
||||
io.stderr.write(`${magenta('◐')} ${message}\n`);
|
||||
},
|
||||
message(message) {
|
||||
io.stderr.write(`${magenta('│')} ${message}\n`);
|
||||
},
|
||||
stop(message) {
|
||||
io.stderr.write(`${magenta('◇')} ${message}\n`);
|
||||
},
|
||||
error(message) {
|
||||
io.stderr.write(`${red('■')} ${message}\n`);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function createClackPromptAdapter(): KtxCliPromptAdapter {
|
||||
return {
|
||||
async confirm(options) {
|
||||
|
|
|
|||
|
|
@ -11,7 +11,13 @@ function stubIo(): KtxCliIo {
|
|||
}
|
||||
|
||||
function stubPackageInfo(): KtxCliPackageInfo {
|
||||
return { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' };
|
||||
return {
|
||||
name: '@ktx/cli',
|
||||
version: '0.0.0-test',
|
||||
packageVersion: '0.0.0-private',
|
||||
runtimeVersion: '0.0.0-test',
|
||||
contextPackageName: '@ktx/context',
|
||||
};
|
||||
}
|
||||
|
||||
describe('buildKtxProgram', () => {
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import { registerWikiCommands } from './commands/knowledge-commands.js';
|
|||
import { registerMcpCommands } from './commands/mcp-commands.js';
|
||||
import { registerSetupCommands } from './commands/setup-commands.js';
|
||||
import { registerSlCommands } from './commands/sl-commands.js';
|
||||
import { registerSqlCommands } from './commands/sql-commands.js';
|
||||
import { registerStatusCommands } from './commands/status-commands.js';
|
||||
import { registerDevCommands } from './dev.js';
|
||||
import { renderMissingProjectMessage } from './doctor.js';
|
||||
|
|
@ -56,7 +57,8 @@ type CommandPathNode = CommandWithGlobalOptions & {
|
|||
parent?: CommandPathNode | null;
|
||||
};
|
||||
|
||||
const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'mcp']);
|
||||
const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'sql', 'status', 'mcp']);
|
||||
const PROJECT_INDEPENDENT_DEV_COMMANDS = new Set(['runtime', 'schema']);
|
||||
const COMMANDS_THAT_CREATE_PROJECT = new Set(['setup', 'ktx dev init']);
|
||||
const COMMANDS_WITH_OWN_MISSING_PROJECT_HANDLING = new Set(['status']);
|
||||
const GLOBAL_OPTIONS_WITH_VALUE = new Set(['--project-dir']);
|
||||
|
|
@ -171,7 +173,7 @@ function isProjectAwareCommand(path: string[]): boolean {
|
|||
|
||||
const rootCommand = path[1];
|
||||
if (rootCommand === 'dev') {
|
||||
return path[2] !== undefined && path[2] !== 'runtime';
|
||||
return path[2] !== undefined && !PROJECT_INDEPENDENT_DEV_COMMANDS.has(path[2]);
|
||||
}
|
||||
return rootCommand !== undefined && PROJECT_AWARE_ROOT_COMMANDS.has(rootCommand);
|
||||
}
|
||||
|
|
@ -416,6 +418,7 @@ export function buildKtxProgram(options: BuildKtxProgramOptions): Command {
|
|||
});
|
||||
registerWikiCommands(program, context);
|
||||
registerSlCommands(program, context);
|
||||
registerSqlCommands(program, context);
|
||||
registerStatusCommands(program, context);
|
||||
registerMcpCommands(program, context);
|
||||
registerDevCommands(program, context);
|
||||
|
|
|
|||
|
|
@ -7,8 +7,10 @@ import type { KtxPublicIngestArgs } from './public-ingest.js';
|
|||
import type { KtxRuntimeArgs } from './runtime.js';
|
||||
import type { KtxSetupArgs } from './setup.js';
|
||||
import type { KtxSlArgs } from './sl.js';
|
||||
import type { KtxSqlArgs } from './sql.js';
|
||||
import { profileMark, profileSpan } from './startup-profile.js';
|
||||
import type { KtxTextIngestArgs } from './text-ingest.js';
|
||||
import { resolveKtxRuntimeVersion } from './release-version.js';
|
||||
|
||||
profileMark('module:cli-runtime');
|
||||
|
||||
|
|
@ -17,6 +19,8 @@ const requirePackageJson = createRequire(import.meta.url);
|
|||
export interface KtxCliPackageInfo {
|
||||
name: string;
|
||||
version: string;
|
||||
packageVersion: string;
|
||||
runtimeVersion: string;
|
||||
contextPackageName: '@ktx/context';
|
||||
}
|
||||
|
||||
|
|
@ -34,6 +38,7 @@ export interface KtxCliDeps {
|
|||
runtime?: (args: KtxRuntimeArgs, io: KtxCliIo) => Promise<number>;
|
||||
knowledge?: (args: KtxKnowledgeArgs, io: KtxCliIo) => Promise<number>;
|
||||
sl?: (args: KtxSlArgs, io: KtxCliIo) => Promise<number>;
|
||||
sql?: (args: KtxSqlArgs, io: KtxCliIo) => Promise<number>;
|
||||
mcp?: {
|
||||
startDaemon?: typeof import('./managed-mcp-daemon.js').startKtxMcpDaemon;
|
||||
stopDaemon?: typeof import('./managed-mcp-daemon.js').stopKtxMcpDaemon;
|
||||
|
|
@ -59,9 +64,16 @@ export function packageInfoFromJson(packageJson: unknown): KtxCliPackageInfo {
|
|||
throw new Error('Invalid KTX CLI package metadata');
|
||||
}
|
||||
|
||||
const runtimeVersion = resolveKtxRuntimeVersion({
|
||||
packageName: packageJson.name,
|
||||
packageVersion: packageJson.version,
|
||||
});
|
||||
|
||||
return {
|
||||
name: packageJson.name,
|
||||
version: packageJson.version,
|
||||
version: runtimeVersion,
|
||||
packageVersion: packageJson.version,
|
||||
runtimeVersion,
|
||||
contextPackageName: '@ktx/context',
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ export function registerIngestCommands(
|
|||
.option('--query-history-window-days <days>', 'Query-history lookback window for this run', parsePositiveIntegerOption)
|
||||
.addOption(new Option('--plain', 'Print plain text output').conflicts(['json']))
|
||||
.addOption(new Option('--json', 'Print JSON output').conflicts(['plain']))
|
||||
.option('--yes', 'Install required managed runtime features without prompting')
|
||||
.option('--no-input', 'Disable interactive terminal input')
|
||||
.showHelpAfterError();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { type Command, Option } from '@commander-js/extra-typings';
|
||||
import {
|
||||
type CommandWithGlobalOptions,
|
||||
type KtxCliCommandContext,
|
||||
parsePositiveIntegerOption,
|
||||
resolveCommandProjectDir,
|
||||
|
|
@ -14,6 +15,11 @@ async function runKnowledgeArgs(context: KtxCliCommandContext, args: KtxKnowledg
|
|||
context.setExitCode(await runner(args, context.io));
|
||||
}
|
||||
|
||||
function isDebugEnabled(command: CommandWithGlobalOptions): boolean {
|
||||
const options = (command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as { debug?: unknown };
|
||||
return options.debug === true;
|
||||
}
|
||||
|
||||
export function registerWikiCommands(program: Command, context: KtxCliCommandContext): void {
|
||||
const wiki = program
|
||||
.command('wiki')
|
||||
|
|
@ -83,6 +89,7 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon
|
|||
userId: options.userId,
|
||||
output: options.output,
|
||||
json: options.json,
|
||||
...(isDebugEnabled(command) ? { debug: true } : {}),
|
||||
...(options.limit !== undefined ? { limit: options.limit } : {}),
|
||||
});
|
||||
},
|
||||
|
|
|
|||
99
packages/cli/src/commands/sql-commands.test.ts
Normal file
99
packages/cli/src/commands/sql-commands.test.ts
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
import { Command } from '@commander-js/extra-typings';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import type { KtxCliCommandContext } from '../cli-program.js';
|
||||
import { registerSqlCommands } from './sql-commands.js';
|
||||
|
||||
function makeContext(overrides: Partial<KtxCliCommandContext> = {}): KtxCliCommandContext {
|
||||
let exitCode = 0;
|
||||
return {
|
||||
io: {
|
||||
stdout: { write: vi.fn() },
|
||||
stderr: { write: vi.fn() },
|
||||
},
|
||||
deps: {},
|
||||
packageInfo: { name: '@ktx/cli', version: '0.0.0-test', contextPackageName: '@ktx/context' },
|
||||
setExitCode: (code) => {
|
||||
exitCode = code;
|
||||
},
|
||||
runInit: vi.fn(),
|
||||
writeDebug: vi.fn(),
|
||||
...overrides,
|
||||
get exitCode() {
|
||||
return exitCode;
|
||||
},
|
||||
} as KtxCliCommandContext;
|
||||
}
|
||||
|
||||
describe('registerSqlCommands', () => {
|
||||
it('routes positional SQL through the sql runner', async () => {
|
||||
const program = new Command().exitOverride().option('--project-dir <path>');
|
||||
const sql = vi.fn(async () => 0);
|
||||
const context = makeContext({ deps: { sql } });
|
||||
registerSqlCommands(program, context);
|
||||
|
||||
await expect(
|
||||
program.parseAsync(
|
||||
['--project-dir', '/tmp/ktx-sql', 'sql', '--connection', 'warehouse', 'select', '1'],
|
||||
{ from: 'user' },
|
||||
),
|
||||
).resolves.toBe(program);
|
||||
|
||||
expect(sql).toHaveBeenCalledWith(
|
||||
{
|
||||
command: 'execute',
|
||||
projectDir: '/tmp/ktx-sql',
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select 1',
|
||||
maxRows: 1000,
|
||||
output: undefined,
|
||||
json: false,
|
||||
cliVersion: '0.0.0-test',
|
||||
},
|
||||
context.io,
|
||||
);
|
||||
});
|
||||
|
||||
it('supports the short connection flag', async () => {
|
||||
const program = new Command().exitOverride().option('--project-dir <path>');
|
||||
const sql = vi.fn(async () => 0);
|
||||
const context = makeContext({ deps: { sql } });
|
||||
registerSqlCommands(program, context);
|
||||
|
||||
await expect(
|
||||
program.parseAsync(['--project-dir', '/tmp/ktx-sql', 'sql', '-c', 'warehouse', 'select 1'], {
|
||||
from: 'user',
|
||||
}),
|
||||
).resolves.toBe(program);
|
||||
|
||||
expect(sql).toHaveBeenCalledWith(expect.objectContaining({ connectionId: 'warehouse', sql: 'select 1' }), context.io);
|
||||
});
|
||||
|
||||
it('rejects missing SQL before invoking the runner', async () => {
|
||||
const program = new Command().exitOverride().option('--project-dir <path>');
|
||||
const sql = vi.fn(async () => 0);
|
||||
registerSqlCommands(program, makeContext({ deps: { sql } }));
|
||||
|
||||
await expect(
|
||||
program.parseAsync(['--project-dir', '/tmp/ktx-sql', 'sql', '--connection', 'warehouse'], {
|
||||
from: 'user',
|
||||
}),
|
||||
).rejects.toThrow('missing required argument');
|
||||
|
||||
expect(sql).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('rejects maxRows above the CLI cap', async () => {
|
||||
const program = new Command().exitOverride().option('--project-dir <path>');
|
||||
const sql = vi.fn(async () => 0);
|
||||
registerSqlCommands(program, makeContext({ deps: { sql } }));
|
||||
|
||||
await expect(
|
||||
program.parseAsync(
|
||||
['--project-dir', '/tmp/ktx-sql', 'sql', '--connection', 'warehouse', '--max-rows', '10001', 'select 1'],
|
||||
{ from: 'user' },
|
||||
),
|
||||
).rejects.toThrow('must be an integer between 1 and 10000');
|
||||
|
||||
expect(sql).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
62
packages/cli/src/commands/sql-commands.ts
Normal file
62
packages/cli/src/commands/sql-commands.ts
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
import { type Command, InvalidArgumentError, Option } from '@commander-js/extra-typings';
|
||||
import { type KtxCliCommandContext, resolveCommandProjectDir } from '../cli-program.js';
|
||||
import type { KtxSqlArgs } from '../sql.js';
|
||||
import { profileMark } from '../startup-profile.js';
|
||||
|
||||
profileMark('module:commands/sql-commands');
|
||||
|
||||
const DEFAULT_MAX_ROWS = 1000;
|
||||
const MAX_ROWS_CAP = 10_000;
|
||||
|
||||
function parseSqlMaxRowsOption(value: string): number {
|
||||
const parsed = Number(value);
|
||||
if (!Number.isInteger(parsed) || parsed < 1 || parsed > MAX_ROWS_CAP) {
|
||||
throw new InvalidArgumentError(`must be an integer between 1 and ${MAX_ROWS_CAP}`);
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
async function runSqlArgs(context: KtxCliCommandContext, args: KtxSqlArgs): Promise<void> {
|
||||
const runner = context.deps.sql ?? (await import('../sql.js')).runKtxSql;
|
||||
context.setExitCode(await runner(args, context.io));
|
||||
}
|
||||
|
||||
export function registerSqlCommands(program: Command, context: KtxCliCommandContext): void {
|
||||
program
|
||||
.command('sql')
|
||||
.description('Execute parser-validated read-only SQL against a configured connection')
|
||||
.argument('<sql...>', 'SQL query to execute')
|
||||
.requiredOption('-c, --connection <id>', 'KTX connection id')
|
||||
.option('--max-rows <n>', 'Maximum rows to return', parseSqlMaxRowsOption, DEFAULT_MAX_ROWS)
|
||||
.addOption(
|
||||
new Option('--output <mode>', 'Output mode: pretty (default), plain (TSV), or json').choices([
|
||||
'pretty',
|
||||
'plain',
|
||||
'json',
|
||||
]),
|
||||
)
|
||||
.option('--json', 'Shortcut for --output=json (overrides --output)', false)
|
||||
.action(
|
||||
async (
|
||||
sqlParts: string[],
|
||||
options: {
|
||||
connection: string;
|
||||
maxRows: number;
|
||||
output?: 'pretty' | 'plain' | 'json';
|
||||
json?: boolean;
|
||||
},
|
||||
command,
|
||||
) => {
|
||||
await runSqlArgs(context, {
|
||||
command: 'execute',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
connectionId: options.connection,
|
||||
sql: sqlParts.join(' '),
|
||||
maxRows: options.maxRows,
|
||||
output: options.output,
|
||||
json: options.json === true,
|
||||
cliVersion: context.packageInfo.version,
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
|
@ -708,6 +708,10 @@ const INTERNAL_FAILURE_LINE_RE =
|
|||
const ACTIONABLE_FAILURE_LINE_RE =
|
||||
/^(Missing bundled Python runtime manifest|KTX Python runtime is required|KTX managed daemon|Error:|Failed\b|Could not\b|Cannot\b)/;
|
||||
|
||||
function trimErrorPrefix(line: string): string {
|
||||
return line.replace(/^Error:\s*/, '');
|
||||
}
|
||||
|
||||
function firstCapturedFailureLine(output: string | undefined): string | null {
|
||||
const lines = (output ?? '')
|
||||
.split(/\r?\n/)
|
||||
|
|
@ -715,7 +719,8 @@ function firstCapturedFailureLine(output: string | undefined): string | null {
|
|||
.filter((candidate) => candidate.length > 0)
|
||||
.filter((candidate) => !candidate.startsWith('KTX scan completed'))
|
||||
.filter((candidate) => !INTERNAL_FAILURE_LINE_RE.test(candidate));
|
||||
return lines.find((candidate) => ACTIONABLE_FAILURE_LINE_RE.test(candidate)) ?? lines.at(-1) ?? null;
|
||||
const line = lines.find((candidate) => ACTIONABLE_FAILURE_LINE_RE.test(candidate)) ?? lines.at(-1) ?? null;
|
||||
return line ? trimErrorPrefix(line) : null;
|
||||
}
|
||||
|
||||
function isGenericFailedAtDetail(target: KtxPublicIngestPlanTarget, detail: string | null | undefined): boolean {
|
||||
|
|
|
|||
|
|
@ -102,6 +102,35 @@ describe('dev Commander tree', () => {
|
|||
}
|
||||
});
|
||||
|
||||
it('prints config schema without requiring a KTX project directory', async () => {
|
||||
const { mkdtemp, rm } = await import('node:fs/promises');
|
||||
const { tmpdir } = await import('node:os');
|
||||
const { join } = await import('node:path');
|
||||
const tempDir = await mkdtemp(join(tmpdir(), 'ktx-dev-schema-'));
|
||||
const missingProjectDir = join(tempDir, 'missing-project');
|
||||
const originalProjectDir = process.env.KTX_PROJECT_DIR;
|
||||
const testIo = makeIo();
|
||||
|
||||
try {
|
||||
process.env.KTX_PROJECT_DIR = missingProjectDir;
|
||||
|
||||
await expect(runKtxCli(['dev', 'schema'], testIo.io)).resolves.toBe(0);
|
||||
|
||||
expect(JSON.parse(testIo.stdout())).toMatchObject({
|
||||
title: 'ktx.yaml',
|
||||
type: 'object',
|
||||
});
|
||||
expect(testIo.stderr()).toBe('');
|
||||
} finally {
|
||||
if (originalProjectDir === undefined) {
|
||||
delete process.env.KTX_PROJECT_DIR;
|
||||
} else {
|
||||
process.env.KTX_PROJECT_DIR = originalProjectDir;
|
||||
}
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects removed dev command groups', async () => {
|
||||
for (const argv of [
|
||||
['dev', 'doctor', 'setup'],
|
||||
|
|
|
|||
|
|
@ -45,7 +45,9 @@ describe('getKtxCliPackageInfo', () => {
|
|||
it('identifies the CLI package and its context dependency', () => {
|
||||
expect(getKtxCliPackageInfo()).toEqual({
|
||||
name: '@ktx/cli',
|
||||
version: '0.0.0-private',
|
||||
version: '0.1.0-rc.1',
|
||||
packageVersion: '0.0.0-private',
|
||||
runtimeVersion: '0.1.0-rc.1',
|
||||
contextPackageName: '@ktx/context',
|
||||
});
|
||||
});
|
||||
|
|
@ -68,6 +70,8 @@ describe('getKtxCliPackageInfo', () => {
|
|||
).toEqual({
|
||||
name: '@kaelio/ktx',
|
||||
version: '0.1.0',
|
||||
packageVersion: '0.1.0',
|
||||
runtimeVersion: '0.1.0',
|
||||
contextPackageName: '@ktx/context',
|
||||
});
|
||||
});
|
||||
|
|
@ -114,7 +118,7 @@ describe('runKtxCli', () => {
|
|||
|
||||
await expect(runKtxCli(['--version'], testIo.io)).resolves.toBe(0);
|
||||
|
||||
expect(testIo.stdout()).toBe('@ktx/cli 0.0.0-private\n');
|
||||
expect(testIo.stdout()).toBe('@ktx/cli 0.1.0-rc.1\n');
|
||||
expect(testIo.stderr()).toBe('');
|
||||
});
|
||||
|
||||
|
|
@ -171,6 +175,22 @@ describe('runKtxCli', () => {
|
|||
},
|
||||
searchIo.io,
|
||||
);
|
||||
|
||||
const debugSearchIo = makeIo();
|
||||
await expect(
|
||||
runKtxCli(['--project-dir', tempDir, '--debug', 'wiki', 'search', 'revenue'], debugSearchIo.io, { knowledge }),
|
||||
).resolves.toBe(0);
|
||||
expect(knowledge).toHaveBeenLastCalledWith(
|
||||
{
|
||||
command: 'search',
|
||||
projectDir: tempDir,
|
||||
query: 'revenue',
|
||||
userId: 'local',
|
||||
json: false,
|
||||
debug: true,
|
||||
},
|
||||
debugSearchIo.io,
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects removed public wiki read and write commands', async () => {
|
||||
|
|
@ -236,7 +256,7 @@ describe('runKtxCli', () => {
|
|||
expect(listIo.stderr()).toContain("unknown option '--query'");
|
||||
});
|
||||
|
||||
it('routes runtime management commands with the CLI package version', async () => {
|
||||
it('routes runtime management commands with the release runtime version', async () => {
|
||||
const runtime = vi.fn(async () => 0);
|
||||
const installIo = makeIo();
|
||||
const startIo = makeIo();
|
||||
|
|
@ -262,7 +282,7 @@ describe('runKtxCli', () => {
|
|||
1,
|
||||
{
|
||||
command: 'install',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
feature: 'local-embeddings',
|
||||
force: true,
|
||||
},
|
||||
|
|
@ -272,7 +292,7 @@ describe('runKtxCli', () => {
|
|||
2,
|
||||
{
|
||||
command: 'start',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
projectDir: expect.any(String),
|
||||
feature: 'local-embeddings',
|
||||
force: true,
|
||||
|
|
@ -283,7 +303,7 @@ describe('runKtxCli', () => {
|
|||
3,
|
||||
{
|
||||
command: 'stop',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
projectDir: expect.any(String),
|
||||
all: false,
|
||||
},
|
||||
|
|
@ -293,7 +313,7 @@ describe('runKtxCli', () => {
|
|||
4,
|
||||
{
|
||||
command: 'stop',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
projectDir: expect.any(String),
|
||||
all: true,
|
||||
},
|
||||
|
|
@ -303,7 +323,7 @@ describe('runKtxCli', () => {
|
|||
5,
|
||||
{
|
||||
command: 'status',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
json: true,
|
||||
},
|
||||
statusIo.io,
|
||||
|
|
@ -376,7 +396,7 @@ describe('runKtxCli', () => {
|
|||
expect.objectContaining({
|
||||
command: 'query',
|
||||
projectDir: tempDir,
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
runtimeInstallPolicy: 'prompt',
|
||||
query: expect.objectContaining({ measures: ['orders.order_count'], dimensions: [] }),
|
||||
}),
|
||||
|
|
@ -391,7 +411,7 @@ describe('runKtxCli', () => {
|
|||
).resolves.toBe(0);
|
||||
expect(sl).toHaveBeenLastCalledWith(
|
||||
expect.objectContaining({
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
}),
|
||||
autoIo.io,
|
||||
|
|
@ -407,7 +427,7 @@ describe('runKtxCli', () => {
|
|||
).resolves.toBe(0);
|
||||
expect(sl).toHaveBeenLastCalledWith(
|
||||
expect.objectContaining({
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
runtimeInstallPolicy: 'never',
|
||||
}),
|
||||
noInputIo.io,
|
||||
|
|
@ -546,7 +566,7 @@ describe('runKtxCli', () => {
|
|||
skipAgents: false,
|
||||
inputMode: 'auto',
|
||||
yes: false,
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
skipLlm: false,
|
||||
skipEmbeddings: false,
|
||||
databaseSchemas: [],
|
||||
|
|
@ -676,7 +696,7 @@ describe('runKtxCli', () => {
|
|||
inputMode: 'disabled',
|
||||
depth: 'fast',
|
||||
queryHistory: 'default',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
runtimeInstallPolicy: 'never',
|
||||
},
|
||||
testIo.io,
|
||||
|
|
@ -703,7 +723,7 @@ describe('runKtxCli', () => {
|
|||
inputMode: 'auto',
|
||||
depth: 'deep',
|
||||
queryHistory: 'default',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
runtimeInstallPolicy: 'prompt',
|
||||
},
|
||||
testIo.io,
|
||||
|
|
@ -711,6 +731,40 @@ describe('runKtxCli', () => {
|
|||
expect(testIo.stderr()).toBe('');
|
||||
});
|
||||
|
||||
it('routes public ingest --yes as automatic runtime installation', async () => {
|
||||
const testIo = makeIo();
|
||||
const publicIngest = vi.fn().mockResolvedValue(0);
|
||||
|
||||
await expect(
|
||||
runKtxCli(['--project-dir', tempDir, 'ingest', 'warehouse', '--yes'], testIo.io, {
|
||||
publicIngest,
|
||||
}),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(publicIngest).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
projectDir: tempDir,
|
||||
targetConnectionId: 'warehouse',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
}),
|
||||
testIo.io,
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects conflicting public ingest runtime install modes', async () => {
|
||||
const testIo = makeIo();
|
||||
const publicIngest = vi.fn().mockResolvedValue(0);
|
||||
|
||||
await expect(
|
||||
runKtxCli(['--project-dir', tempDir, 'ingest', 'warehouse', '--yes', '--no-input'], testIo.io, {
|
||||
publicIngest,
|
||||
}),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(publicIngest).not.toHaveBeenCalled();
|
||||
expect(testIo.stderr()).toContain('Choose only one runtime install mode: --yes or --no-input');
|
||||
});
|
||||
|
||||
it('rejects mutually exclusive public ingest depth flags before dispatch', async () => {
|
||||
const testIo = makeIo();
|
||||
const publicIngest = vi.fn().mockResolvedValue(0);
|
||||
|
|
@ -746,7 +800,7 @@ describe('runKtxCli', () => {
|
|||
json: false,
|
||||
inputMode: 'disabled',
|
||||
queryHistory: 'default',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
runtimeInstallPolicy: 'never',
|
||||
},
|
||||
testIo.io,
|
||||
|
|
@ -1024,7 +1078,7 @@ describe('runKtxCli', () => {
|
|||
command: 'run',
|
||||
projectDir: tempDir,
|
||||
inputMode: 'disabled',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret
|
||||
anthropicModel: 'claude-sonnet-4-6',
|
||||
skipLlm: false,
|
||||
|
|
@ -1063,7 +1117,7 @@ describe('runKtxCli', () => {
|
|||
command: 'run',
|
||||
projectDir: tempDir,
|
||||
inputMode: 'disabled',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
llmBackend: 'vertex',
|
||||
vertexProject: 'local-gcp-project',
|
||||
vertexLocation: 'us-east5',
|
||||
|
|
@ -1100,7 +1154,7 @@ describe('runKtxCli', () => {
|
|||
command: 'run',
|
||||
projectDir: tempDir,
|
||||
inputMode: 'disabled',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
llmBackend: 'claude-code',
|
||||
llmModel: 'opus',
|
||||
skipLlm: false,
|
||||
|
|
@ -1208,7 +1262,7 @@ describe('runKtxCli', () => {
|
|||
projectDir: '/tmp/project',
|
||||
inputMode: 'disabled',
|
||||
yes: true,
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
skipLlm: true,
|
||||
skipEmbeddings: true,
|
||||
databaseDrivers: ['postgres'],
|
||||
|
|
@ -1526,7 +1580,7 @@ describe('runKtxCli', () => {
|
|||
queryFile: '/tmp/query.json',
|
||||
execute: false,
|
||||
format: 'json',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
},
|
||||
autoIo.io,
|
||||
|
|
@ -1540,7 +1594,7 @@ describe('runKtxCli', () => {
|
|||
queryFile: '/tmp/query.json',
|
||||
execute: false,
|
||||
format: 'json',
|
||||
cliVersion: '0.0.0-private',
|
||||
cliVersion: '0.1.0-rc.1',
|
||||
runtimeInstallPolicy: 'never',
|
||||
},
|
||||
neverIo.io,
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ export type {
|
|||
} from './setup-sources.js';
|
||||
export { runKtxSetupSourcesStep } from './setup-sources.js';
|
||||
export { runKtxRuntime, type KtxRuntimeArgs, type KtxRuntimeDeps } from './runtime.js';
|
||||
export { runKtxSql, type KtxSqlArgs, type KtxSqlDeps } from './sql.js';
|
||||
export {
|
||||
allocateDaemonPort,
|
||||
readManagedPythonDaemonStatus,
|
||||
|
|
|
|||
|
|
@ -35,6 +35,11 @@ describe('runKtxIngest', () => {
|
|||
let tempDir: string;
|
||||
let originalTerm: string | undefined;
|
||||
const interactiveEnv = (): NodeJS.ProcessEnv => ({ ...process.env, CI: 'false' });
|
||||
const runtimeReady = (projectDir: string) => ({
|
||||
status: 'ready' as const,
|
||||
projectDir,
|
||||
requirements: { features: ['core' as const], requirements: [] },
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
resetVizFallbackWarningsForTest();
|
||||
|
|
@ -285,6 +290,7 @@ describe('runKtxIngest', () => {
|
|||
historicSqlProbe: async () => ({ ok: true, lines: ['PASS Historic SQL probe skipped in test'] }),
|
||||
},
|
||||
context: async () => ({ status: 'skipped', projectDir }),
|
||||
runtime: async () => runtimeReady(projectDir),
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
|
@ -629,6 +635,117 @@ describe('runKtxIngest', () => {
|
|||
expect(io.stderr()).not.toContain('Metabase ingest: prod-metabase');
|
||||
});
|
||||
|
||||
it('emits structured child ingest progress during Metabase fan-out', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeMetabaseConfig(projectDir);
|
||||
const io = makeIo();
|
||||
const progressEvents: Array<{ percent: number; message: string; transient?: boolean }> = [];
|
||||
|
||||
await expect(
|
||||
runKtxIngest(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir,
|
||||
connectionId: 'prod-metabase',
|
||||
adapter: 'metabase',
|
||||
outputMode: 'json',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
progress: (event) => progressEvents.push(event),
|
||||
runLocalMetabaseIngest: async (input) => {
|
||||
input.progress?.onMetabaseFanoutPlanned?.({
|
||||
metabaseConnectionId: 'prod-metabase',
|
||||
children: [{ metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a' }],
|
||||
});
|
||||
input.progress?.onMetabaseChildStarted?.({
|
||||
metabaseConnectionId: 'prod-metabase',
|
||||
metabaseDatabaseId: 1,
|
||||
targetConnectionId: 'warehouse_a',
|
||||
jobId: 'metabase-child-1',
|
||||
});
|
||||
input.memoryFlow?.update({
|
||||
plannedWorkUnits: [
|
||||
{
|
||||
unitKey: 'metabase-col-6',
|
||||
rawFiles: ['cards/40.json'],
|
||||
peerFileCount: 0,
|
||||
dependencyCount: 0,
|
||||
},
|
||||
],
|
||||
});
|
||||
input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
|
||||
input.memoryFlow?.emit({
|
||||
type: 'work_unit_started',
|
||||
unitKey: 'metabase-col-6',
|
||||
skills: ['sl_capture'],
|
||||
stepBudget: 40,
|
||||
});
|
||||
input.memoryFlow?.emit({
|
||||
type: 'work_unit_step',
|
||||
unitKey: 'metabase-col-6',
|
||||
stepIndex: 7,
|
||||
stepBudget: 40,
|
||||
});
|
||||
input.memoryFlow?.emit({
|
||||
type: 'stage_progress',
|
||||
stage: 'integration',
|
||||
percent: 81,
|
||||
message: 'Resolving text conflict for metabase-col-6',
|
||||
});
|
||||
input.memoryFlow?.emit({ type: 'work_unit_finished', unitKey: 'metabase-col-6', status: 'success' });
|
||||
input.memoryFlow?.update({
|
||||
plannedWorkUnits: [
|
||||
{
|
||||
unitKey: 'metabase-col-7',
|
||||
rawFiles: ['cards/48.json'],
|
||||
peerFileCount: 0,
|
||||
dependencyCount: 0,
|
||||
},
|
||||
],
|
||||
});
|
||||
input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 });
|
||||
input.memoryFlow?.emit({
|
||||
type: 'work_unit_started',
|
||||
unitKey: 'metabase-col-7',
|
||||
skills: ['sl_capture'],
|
||||
stepBudget: 40,
|
||||
});
|
||||
input.progress?.onMetabaseChildCompleted?.({
|
||||
metabaseConnectionId: 'prod-metabase',
|
||||
metabaseDatabaseId: 1,
|
||||
targetConnectionId: 'warehouse_a',
|
||||
jobId: 'metabase-child-1',
|
||||
status: 'done',
|
||||
});
|
||||
return {
|
||||
metabaseConnectionId: 'prod-metabase',
|
||||
status: 'all_succeeded',
|
||||
totals: { workUnits: 1, failedWorkUnits: 0 },
|
||||
children: [],
|
||||
};
|
||||
},
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(progressEvents).toEqual(
|
||||
expect.arrayContaining([
|
||||
{ percent: 45, message: 'Planned 1 task' },
|
||||
{ percent: 55, message: 'Processing 1/1 tasks: metabase-col-6' },
|
||||
{
|
||||
percent: 60,
|
||||
message: 'Processing tasks: 0/1 complete, 1 active; latest metabase-col-6 step 7/40',
|
||||
transient: true,
|
||||
},
|
||||
{ percent: 81, message: 'Resolving text conflict for metabase-col-6' },
|
||||
{ percent: 81, message: 'Processing 1/1 tasks: metabase-col-7' },
|
||||
]),
|
||||
);
|
||||
expect(io.stdout()).toContain('"status": "all_succeeded"');
|
||||
expect(io.stderr()).not.toContain('Metabase ingest: prod-metabase');
|
||||
});
|
||||
|
||||
it('runs Metabase scheduled ingest through the public CLI command path with real fan-out', async () => {
|
||||
const projectDir = join(tempDir, 'metabase-cli-project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
|
|
@ -903,9 +1020,16 @@ describe('runKtxIngest', () => {
|
|||
sourceKey: 'historic-sql',
|
||||
body: {
|
||||
workUnits: [],
|
||||
postProcessor: {
|
||||
finalization: {
|
||||
sourceKey: 'historic-sql',
|
||||
status: 'success',
|
||||
commitSha: 'finalization-sha',
|
||||
touchedPaths: ['semantic-layer/warehouse/_schema/public.yaml', 'wiki/global/historic-sql-orders.md'],
|
||||
declaredTouchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
|
||||
derivedTouchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
|
||||
declaredChangedWikiPageKeys: ['historic-sql-orders'],
|
||||
derivedChangedWikiPageKeys: ['historic-sql-orders'],
|
||||
mismatches: [],
|
||||
result: {
|
||||
tableUsageMerged: 56,
|
||||
staleTablesMarked: 1,
|
||||
|
|
@ -915,7 +1039,24 @@ describe('runKtxIngest', () => {
|
|||
},
|
||||
errors: [],
|
||||
warnings: [],
|
||||
touchedSources: [],
|
||||
actions: [
|
||||
...Array.from({ length: 57 }, (_, index) => ({
|
||||
target: 'sl' as const,
|
||||
type: 'updated' as const,
|
||||
key: `orders-${index}`,
|
||||
detail: 'Merged usage',
|
||||
targetConnectionId: 'warehouse',
|
||||
rawPaths: ['tables/public/orders.json'],
|
||||
})),
|
||||
...Array.from({ length: 35 }, (_, index) => ({
|
||||
target: 'wiki' as const,
|
||||
type: 'updated' as const,
|
||||
key: `historic-sql-orders-${index}`,
|
||||
detail: 'Projected pattern',
|
||||
rawPaths: ['patterns/orders.json'],
|
||||
})),
|
||||
],
|
||||
provenanceExclusions: [],
|
||||
},
|
||||
},
|
||||
}),
|
||||
|
|
@ -979,6 +1120,125 @@ describe('runKtxIngest', () => {
|
|||
expect(io.stdout()).toContain('Status: error\n');
|
||||
});
|
||||
|
||||
it('prints trace path and error status for stored failed ingest reports', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
const io = makeIo();
|
||||
const report = {
|
||||
id: 'report-failed',
|
||||
runId: 'run-failed',
|
||||
jobId: 'job-failed',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
createdAt: '2026-05-17T12:00:00.000Z',
|
||||
body: {
|
||||
status: 'failed',
|
||||
syncId: 'sync-failed',
|
||||
diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 },
|
||||
commitSha: null,
|
||||
tracePath: '/project/.ktx/ingest-traces/job-failed/trace.jsonl',
|
||||
failure: { phase: 'final_gates', message: 'final artifact gates failed' },
|
||||
workUnits: [],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: true,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [],
|
||||
unmappedFallbacks: [],
|
||||
evictionInputs: [],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
},
|
||||
};
|
||||
|
||||
await runKtxIngest(
|
||||
{
|
||||
command: 'status',
|
||||
projectDir,
|
||||
reportFile: '/project/report-failed.json',
|
||||
runId: 'run-failed',
|
||||
outputMode: 'plain',
|
||||
inputMode: 'disabled',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
readReportFile: vi.fn().mockResolvedValue(report),
|
||||
},
|
||||
);
|
||||
|
||||
expect(io.stdout()).toContain('Trace: /project/.ktx/ingest-traces/job-failed/trace.jsonl');
|
||||
expect(io.stdout()).toContain('Status: error');
|
||||
expect(io.stdout()).toContain('Error: final artifact gates failed');
|
||||
});
|
||||
|
||||
it('prints a clear first failure reason when query-history work units fail', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
const rawReason =
|
||||
'{"error":"invalid_grant","error_description":"reauth related error (invalid_rapt)","error_uri":"https://support.google.com/a/answer/9368756","error_subtype":"invalid_rapt"}';
|
||||
const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise<LocalIngestResult> => {
|
||||
const failedWorkUnit = {
|
||||
...localFakeBundleReport('query-history-failed').body.workUnits[0],
|
||||
unitKey: 'historic-sql-table-orders',
|
||||
rawFiles: ['tables/orders.json'],
|
||||
status: 'failed' as const,
|
||||
reason: rawReason,
|
||||
actions: [],
|
||||
touchedSlSources: [],
|
||||
};
|
||||
const report = localFakeBundleReport('query-history-failed', {
|
||||
id: 'report-query-history-failed',
|
||||
runId: 'run-query-history-failed',
|
||||
connectionId: input.connectionId,
|
||||
sourceKey: 'historic-sql',
|
||||
body: {
|
||||
workUnits: [failedWorkUnit],
|
||||
failedWorkUnits: [failedWorkUnit.unitKey],
|
||||
},
|
||||
});
|
||||
return {
|
||||
result: {
|
||||
jobId: 'query-history-failed',
|
||||
runId: report.runId,
|
||||
syncId: report.body.syncId,
|
||||
diffSummary: report.body.diffSummary,
|
||||
workUnitCount: report.body.workUnits.length,
|
||||
failedWorkUnits: report.body.failedWorkUnits,
|
||||
artifactsWritten: report.body.provenanceRows.length,
|
||||
commitSha: report.body.commitSha,
|
||||
},
|
||||
report,
|
||||
};
|
||||
});
|
||||
|
||||
const io = makeIo();
|
||||
await expect(
|
||||
runKtxIngest(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
adapter: 'historic-sql',
|
||||
outputMode: 'plain',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
runLocalIngest: runLocal,
|
||||
jobIdFactory: () => 'query-history-failed',
|
||||
},
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(io.stdout()).toContain('Status: error\n');
|
||||
expect(io.stdout()).toContain('Failed tasks: 1\n');
|
||||
expect(io.stdout()).toContain(
|
||||
'Error: Query history failed for 1 task. First failure: Google Cloud authentication failed while analyzing query history: application-default credentials expired or require reauthentication (invalid_grant / invalid_rapt). Run `gcloud auth application-default login`, then retry.',
|
||||
);
|
||||
expect(io.stdout()).not.toContain('error_uri');
|
||||
});
|
||||
|
||||
it('passes the debug LLM request file to local ingest runs', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await writeWarehouseConfig(projectDir);
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import {
|
|||
runLocalIngest,
|
||||
runLocalMetabaseIngest,
|
||||
savedMemoryCountsForReport,
|
||||
sanitizeMemoryFlowError,
|
||||
} from '@ktx/context/ingest';
|
||||
import type { KtxSqlQueryExecutorPort } from '@ktx/context/connections';
|
||||
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
|
||||
|
|
@ -101,7 +102,7 @@ export interface KtxIngestDeps {
|
|||
}
|
||||
|
||||
function reportStatus(report: IngestReportSnapshot): 'done' | 'error' {
|
||||
return report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
|
||||
return report.body.status === 'failed' || report.body.failedWorkUnits.length > 0 ? 'error' : 'done';
|
||||
}
|
||||
|
||||
const REPORT_SOURCE_LABELS = new Map<string, string>([
|
||||
|
|
@ -127,11 +128,79 @@ function reportSourceLabel(sourceKey: string): string {
|
|||
.join(' ');
|
||||
}
|
||||
|
||||
function jsonObjectFromFailureReason(reason: string): Record<string, unknown> | null {
|
||||
const trimmed = reason.trim();
|
||||
const start = trimmed.indexOf('{');
|
||||
const end = trimmed.lastIndexOf('}');
|
||||
if (start < 0 || end < start) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const parsed: unknown = JSON.parse(trimmed.slice(start, end + 1));
|
||||
return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? (parsed as Record<string, unknown>) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function stringField(record: Record<string, unknown>, key: string): string | null {
|
||||
const value = record[key];
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
|
||||
}
|
||||
|
||||
function isGoogleReauthFailure(record: Record<string, unknown>): boolean {
|
||||
const error = stringField(record, 'error')?.toLowerCase() ?? '';
|
||||
const description = stringField(record, 'error_description')?.toLowerCase() ?? '';
|
||||
const subtype = stringField(record, 'error_subtype')?.toLowerCase() ?? '';
|
||||
return error === 'invalid_grant' && (description.includes('reauth') || subtype === 'invalid_rapt');
|
||||
}
|
||||
|
||||
function formatFailureReason(sourceKey: string, reason: string): string {
|
||||
const parsed = jsonObjectFromFailureReason(reason);
|
||||
if (!parsed) {
|
||||
return sanitizeMemoryFlowError(reason);
|
||||
}
|
||||
|
||||
if (sourceKey === 'historic-sql' && isGoogleReauthFailure(parsed)) {
|
||||
return 'Google Cloud authentication failed while analyzing query history: application-default credentials expired or require reauthentication (invalid_grant / invalid_rapt). Run `gcloud auth application-default login`, then retry.';
|
||||
}
|
||||
|
||||
const error = stringField(parsed, 'error');
|
||||
const description = stringField(parsed, 'error_description');
|
||||
const subtype = stringField(parsed, 'error_subtype');
|
||||
const parts = [error, description].filter((part): part is string => Boolean(part));
|
||||
const message = parts.length > 0 ? parts.join(': ') : reason;
|
||||
return subtype ? `${message} (${subtype})` : message;
|
||||
}
|
||||
|
||||
function failedReportMessage(report: IngestReportSnapshot): string | null {
|
||||
if (report.body.status === 'failed' && report.body.failure?.message) {
|
||||
return sanitizeMemoryFlowError(report.body.failure.message);
|
||||
}
|
||||
const failedCount = report.body.failedWorkUnits.length;
|
||||
if (failedCount === 0) {
|
||||
return null;
|
||||
}
|
||||
const firstFailure = report.body.workUnits.find(
|
||||
(workUnit) => workUnit.status === 'failed' && typeof workUnit.reason === 'string' && workUnit.reason.trim(),
|
||||
);
|
||||
const sourceLabel = reportSourceLabel(report.sourceKey);
|
||||
const prefix = `${sourceLabel} failed for ${pluralize(failedCount, 'task')}.`;
|
||||
if (!firstFailure?.reason) {
|
||||
return prefix;
|
||||
}
|
||||
return `${prefix} First failure: ${formatFailureReason(report.sourceKey, firstFailure.reason)}`;
|
||||
}
|
||||
|
||||
function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void {
|
||||
const counts = savedMemoryCountsForReport(report);
|
||||
const failedMessage = failedReportMessage(report);
|
||||
io.stdout.write(`Report: ${report.id}\n`);
|
||||
io.stdout.write(`Run: ${report.runId}\n`);
|
||||
io.stdout.write(`Job: ${report.jobId}\n`);
|
||||
if (report.body.tracePath) {
|
||||
io.stdout.write(`Trace: ${report.body.tracePath}\n`);
|
||||
}
|
||||
io.stdout.write(`Status: ${reportStatus(report)}\n`);
|
||||
io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`);
|
||||
io.stdout.write(`Connection: ${report.connectionId}\n`);
|
||||
|
|
@ -140,6 +209,12 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void
|
|||
`Diff: +${report.body.diffSummary.added}/~${report.body.diffSummary.modified}/-${report.body.diffSummary.deleted}/=${report.body.diffSummary.unchanged}\n`,
|
||||
);
|
||||
io.stdout.write(`Tasks: ${report.body.workUnits.length}\n`);
|
||||
if (report.body.failedWorkUnits.length > 0) {
|
||||
io.stdout.write(`Failed tasks: ${report.body.failedWorkUnits.length}\n`);
|
||||
}
|
||||
if (failedMessage) {
|
||||
io.stdout.write(`Error: ${failedMessage}\n`);
|
||||
}
|
||||
io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`);
|
||||
io.stdout.write(`Provenance rows: ${report.body.provenanceRows.length}\n`);
|
||||
}
|
||||
|
|
@ -220,7 +295,11 @@ function formatDiffProgress(event: Extract<MemoryFlowEvent, { type: 'diff_comput
|
|||
}
|
||||
|
||||
function workUnitEventsThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): MemoryFlowEvent[] {
|
||||
return snapshot.events.slice(0, eventIndex + 1);
|
||||
const latestPlanIndex = snapshot.events
|
||||
.slice(0, eventIndex + 1)
|
||||
.findLastIndex((event) => event.type === 'chunks_planned');
|
||||
const startIndex = latestPlanIndex >= 0 ? latestPlanIndex + 1 : 0;
|
||||
return snapshot.events.slice(startIndex, eventIndex + 1);
|
||||
}
|
||||
|
||||
function completedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number {
|
||||
|
|
@ -244,7 +323,8 @@ function plannedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex
|
|||
if (snapshot.plannedWorkUnits.length > 0) {
|
||||
return snapshot.plannedWorkUnits.length;
|
||||
}
|
||||
const planEvent = workUnitEventsThrough(snapshot, eventIndex)
|
||||
const planEvent = snapshot.events
|
||||
.slice(0, eventIndex + 1)
|
||||
.filter((event) => event.type === 'chunks_planned')
|
||||
.at(-1);
|
||||
return planEvent?.workUnitCount ?? completedWorkUnitCountThrough(snapshot, eventIndex);
|
||||
|
|
@ -290,6 +370,12 @@ function plainIngestEventProgress(
|
|||
};
|
||||
case 'stage_skipped':
|
||||
return { percent: 45, message: `Skipped ${event.stage}: ${event.reason}` };
|
||||
case 'stage_progress':
|
||||
return {
|
||||
percent: event.percent,
|
||||
message: event.message,
|
||||
...(event.transient !== undefined ? { transient: event.transient } : {}),
|
||||
};
|
||||
case 'work_unit_started': {
|
||||
const total = plannedWorkUnitCountThrough(snapshot, eventIndex);
|
||||
const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey);
|
||||
|
|
@ -636,6 +722,25 @@ export async function runKtxIngest(
|
|||
}
|
||||
if (args.adapter === 'metabase') {
|
||||
const executeMetabaseFanout = deps.runLocalMetabaseIngest ?? runLocalMetabaseIngest;
|
||||
const runOutputMode = effectiveIngestOutputMode(args.outputMode, io, env, {
|
||||
requireInput: (args.inputMode ?? 'auto') === 'auto',
|
||||
});
|
||||
const plainProgress = shouldWritePlainIngestProgress(runOutputMode, io, env)
|
||||
? createPlainIngestProgressRenderer(args, io)
|
||||
: null;
|
||||
const structuredProgress = deps.progress
|
||||
? createPlainIngestProgressObserver(args, deps.progress)
|
||||
: null;
|
||||
const initialMemoryFlow =
|
||||
plainProgress || structuredProgress ? initialRunMemoryFlowInput(args, 'pending') : undefined;
|
||||
const memoryFlow = initialMemoryFlow
|
||||
? createMemoryFlowLiveBuffer(initialMemoryFlow, {
|
||||
onChange: (snapshot) => {
|
||||
plainProgress?.update(snapshot);
|
||||
structuredProgress?.update(snapshot);
|
||||
},
|
||||
})
|
||||
: undefined;
|
||||
const progress =
|
||||
args.outputMode === 'json' && !deps.progress
|
||||
? undefined
|
||||
|
|
@ -646,20 +751,29 @@ export async function runKtxIngest(
|
|||
: io,
|
||||
deps.progress,
|
||||
);
|
||||
const result = await executeMetabaseFanout({
|
||||
project: ingestProject,
|
||||
adapters: createAdapters(ingestProject, adapterOptions),
|
||||
metabaseConnectionId: args.connectionId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
trigger: 'manual_resync',
|
||||
jobIdFactory: deps.jobIdFactory,
|
||||
...(progress ? { progress } : {}),
|
||||
});
|
||||
if (args.outputMode === 'json') {
|
||||
io.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
||||
} else {
|
||||
writeMetabaseFanoutStatus(result, io);
|
||||
plainProgress?.start();
|
||||
structuredProgress?.start();
|
||||
let result: LocalMetabaseFanoutResult;
|
||||
try {
|
||||
result = await executeMetabaseFanout({
|
||||
project: ingestProject,
|
||||
adapters: createAdapters(ingestProject, adapterOptions),
|
||||
metabaseConnectionId: args.connectionId,
|
||||
...localIngestOptions,
|
||||
queryExecutor,
|
||||
trigger: 'manual_resync',
|
||||
jobIdFactory: deps.jobIdFactory,
|
||||
...(memoryFlow ? { memoryFlow } : {}),
|
||||
...(progress ? { progress } : {}),
|
||||
});
|
||||
plainProgress?.flush();
|
||||
if (args.outputMode === 'json') {
|
||||
io.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
|
||||
} else {
|
||||
writeMetabaseFanoutStatus(result, io);
|
||||
}
|
||||
} finally {
|
||||
plainProgress?.flush();
|
||||
}
|
||||
return result.status === 'all_succeeded' ? 0 : 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import type { KtxCliIo } from '../cli-runtime.js';
|
||||
import { printList, type PrintListColumn } from './print-list.js';
|
||||
import { createRankBadgeFormatter, printList, type PrintListColumn } from './print-list.js';
|
||||
import { SYMBOLS } from './symbols.js';
|
||||
|
||||
function recorder(): { io: KtxCliIo; out: () => string; err: () => string } {
|
||||
|
|
@ -239,26 +239,26 @@ describe('printList — pretty mode', () => {
|
|||
expect(out).toContain('2 pages');
|
||||
});
|
||||
|
||||
it('renders a leading badge column with prettyFormat in pretty mode', () => {
|
||||
it('renders a leading rank badge column in pretty mode', () => {
|
||||
const r = recorder();
|
||||
interface SearchRow { score: number; scope: string; key: string; summary: string }
|
||||
const rows: SearchRow[] = [
|
||||
{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' },
|
||||
{ score: 0.04, scope: 'GLOBAL', key: 'beta', summary: 'second' },
|
||||
];
|
||||
const SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<SearchRow>> = [
|
||||
{
|
||||
key: 'score',
|
||||
label: 'SCORE',
|
||||
plain: 'score=',
|
||||
role: 'badge',
|
||||
prettyFormat: (v) => `${Math.round(Number(v) * 100)}%`,
|
||||
prettyFormat: createRankBadgeFormatter(rows),
|
||||
dim: true,
|
||||
},
|
||||
{ key: 'scope', label: 'SCOPE', plain: '' },
|
||||
{ key: 'key', label: 'KEY', plain: '' },
|
||||
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
|
||||
];
|
||||
const rows: SearchRow[] = [
|
||||
{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' },
|
||||
{ score: 0.04, scope: 'GLOBAL', key: 'beta', summary: 'second' },
|
||||
];
|
||||
printList<SearchRow>({
|
||||
rows,
|
||||
columns: SEARCH_COLUMNS,
|
||||
|
|
@ -270,20 +270,22 @@ describe('printList — pretty mode', () => {
|
|||
io: r.io,
|
||||
});
|
||||
const out = stripAnsi(r.out());
|
||||
expect(out).toMatch(/87%\s+alpha\s+/);
|
||||
expect(out).toMatch(/4%\s+beta\s+/);
|
||||
expect(out).toMatch(/#1\s+alpha\s+/);
|
||||
expect(out).toMatch(/#2\s+beta\s+/);
|
||||
expect(out).not.toContain('%');
|
||||
});
|
||||
|
||||
it('emits the badge column in plain mode using its plain prefix', () => {
|
||||
const r = recorder();
|
||||
interface SearchRow { score: number; scope: string; key: string; summary: string }
|
||||
const rows: SearchRow[] = [{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' }];
|
||||
const SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<SearchRow>> = [
|
||||
{
|
||||
key: 'score',
|
||||
label: 'SCORE',
|
||||
plain: 'score=',
|
||||
role: 'badge',
|
||||
prettyFormat: (v) => `${Math.round(Number(v) * 100)}%`,
|
||||
prettyFormat: createRankBadgeFormatter(rows),
|
||||
dim: true,
|
||||
},
|
||||
{ key: 'scope', label: 'SCOPE', plain: '' },
|
||||
|
|
@ -291,7 +293,7 @@ describe('printList — pretty mode', () => {
|
|||
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
|
||||
];
|
||||
printList<SearchRow>({
|
||||
rows: [{ score: 0.87, scope: 'GLOBAL', key: 'alpha', summary: 'first' }],
|
||||
rows,
|
||||
columns: SEARCH_COLUMNS,
|
||||
groupBy: 'scope',
|
||||
mode: 'plain',
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ export interface PrintListColumn<Row> {
|
|||
* - `'suffix'` — trailing em-dash optional value. Default: any column with `optional: true`.
|
||||
*/
|
||||
role?: 'name' | 'metric' | 'badge' | 'suffix';
|
||||
/** Custom pretty-mode value formatter (e.g. score → "87%"). Plain/JSON unaffected. */
|
||||
/** Custom pretty-mode value formatter (for example, score -> "#1"). Plain/JSON unaffected. */
|
||||
prettyFormat?: (value: Row[keyof Row & string], row: Row) => string;
|
||||
}
|
||||
|
||||
|
|
@ -67,6 +67,16 @@ export function printList<Row extends object>(args: PrintListArgs<Row>): void {
|
|||
}
|
||||
}
|
||||
|
||||
export function createRankBadgeFormatter<Row extends object>(
|
||||
rows: ReadonlyArray<Row>,
|
||||
): (_value: Row[keyof Row & string], row: Row) => string {
|
||||
const ranks = new WeakMap<Row, number>();
|
||||
rows.forEach((row, index) => {
|
||||
ranks.set(row, index + 1);
|
||||
});
|
||||
return (_value, row) => `#${ranks.get(row) ?? rows.indexOf(row) + 1}`;
|
||||
}
|
||||
|
||||
function isEmpty(value: unknown): boolean {
|
||||
return value === undefined || value === null || value === '';
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { stripVTControlCharacters } from 'node:util';
|
||||
import { initKtxProject, loadKtxProject } from '@ktx/context/project';
|
||||
import type { KtxEmbeddingPort } from '@ktx/context';
|
||||
import { writeLocalKnowledgePage } from '@ktx/context/wiki';
|
||||
|
|
@ -90,6 +91,24 @@ describe('runKtxKnowledge', () => {
|
|||
expect(searchIo.stdout()).toContain('metrics-revenue');
|
||||
});
|
||||
|
||||
it('prints wiki search rank badges in pretty output', async () => {
|
||||
const projectDir = join(tempDir, 'rank-project');
|
||||
await initKtxProject({ projectDir });
|
||||
await seedWikiPage(projectDir);
|
||||
|
||||
const searchIo = makeIo();
|
||||
await expect(
|
||||
runKtxKnowledge(
|
||||
{ command: 'search', projectDir, query: 'paid order', userId: 'local', output: 'pretty' },
|
||||
searchIo.io,
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
const stdout = stripVTControlCharacters(searchIo.stdout());
|
||||
expect(stdout).toMatch(/#1\s+metrics-revenue/);
|
||||
expect(stdout).not.toContain('%');
|
||||
});
|
||||
|
||||
it('prints wiki list and search as public JSON envelopes', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
|
|
@ -156,4 +175,29 @@ describe('runKtxKnowledge', () => {
|
|||
expect(searchIo.stdout()).toContain('active-contract-arr-open-tickets');
|
||||
expect(searchIo.stderr()).toBe('');
|
||||
});
|
||||
|
||||
it('writes wiki search lane diagnostics to stderr when debug is enabled', async () => {
|
||||
const projectDir = join(tempDir, 'debug-project');
|
||||
await initKtxProject({ projectDir });
|
||||
await seedWikiPage(projectDir);
|
||||
|
||||
const searchIo = makeIo();
|
||||
await expect(
|
||||
runKtxKnowledge(
|
||||
{ command: 'search', projectDir, query: 'paid order', userId: 'local', json: true, debug: true },
|
||||
searchIo.io,
|
||||
{ embeddingService: new FakeEmbeddingPort() },
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(JSON.parse(searchIo.stdout())).toMatchObject({
|
||||
kind: 'list',
|
||||
data: { items: [expect.objectContaining({ key: 'metrics-revenue' })] },
|
||||
meta: { command: 'wiki search' },
|
||||
});
|
||||
expect(searchIo.stderr()).toContain('[debug] wiki search mode=sqlite-fts5');
|
||||
expect(searchIo.stderr()).toContain('embedding=configured');
|
||||
expect(searchIo.stderr()).toContain('lane=lexical status=available');
|
||||
expect(searchIo.stderr()).toContain('lane=semantic status=available');
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import {
|
|||
searchLocalKnowledgePages,
|
||||
} from '@ktx/context/wiki';
|
||||
import { resolveOutputMode } from './io/mode.js';
|
||||
import { printList, type PrintListColumn } from './io/print-list.js';
|
||||
import { createRankBadgeFormatter, printList, type PrintListColumn } from './io/print-list.js';
|
||||
|
||||
export type KtxKnowledgeArgs =
|
||||
| { command: 'list'; projectDir: string; userId: string; output?: string; json?: boolean }
|
||||
|
|
@ -23,6 +23,7 @@ export type KtxKnowledgeArgs =
|
|||
output?: string;
|
||||
json?: boolean;
|
||||
limit?: number;
|
||||
debug?: boolean;
|
||||
};
|
||||
|
||||
type KtxKnowledgeIo = import('./cli-runtime.js').KtxCliIo;
|
||||
|
|
@ -33,19 +34,23 @@ const WIKI_LIST_COLUMNS: ReadonlyArray<PrintListColumn<LocalKnowledgeSummary>> =
|
|||
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
|
||||
];
|
||||
|
||||
const WIKI_SEARCH_COLUMNS: ReadonlyArray<PrintListColumn<LocalKnowledgeSearchResult>> = [
|
||||
{
|
||||
key: 'score',
|
||||
label: 'SCORE',
|
||||
plain: 'score=',
|
||||
role: 'badge',
|
||||
prettyFormat: (value) => `${Math.round(Number(value) * 100)}%`,
|
||||
dim: true,
|
||||
},
|
||||
{ key: 'scope', label: 'SCOPE', plain: '' },
|
||||
{ key: 'key', label: 'KEY', plain: '' },
|
||||
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
|
||||
];
|
||||
function wikiSearchColumns(
|
||||
rows: ReadonlyArray<LocalKnowledgeSearchResult>,
|
||||
): ReadonlyArray<PrintListColumn<LocalKnowledgeSearchResult>> {
|
||||
return [
|
||||
{
|
||||
key: 'score',
|
||||
label: 'SCORE',
|
||||
plain: 'score=',
|
||||
role: 'badge',
|
||||
prettyFormat: createRankBadgeFormatter(rows),
|
||||
dim: true,
|
||||
},
|
||||
{ key: 'scope', label: 'SCOPE', plain: '' },
|
||||
{ key: 'key', label: 'KEY', plain: '' },
|
||||
{ key: 'summary', label: 'SUMMARY', plain: '', optional: true, dim: true },
|
||||
];
|
||||
}
|
||||
|
||||
interface KtxKnowledgeDeps {
|
||||
embeddingService?: KtxEmbeddingPort | null;
|
||||
|
|
@ -65,6 +70,26 @@ function wikiSearchEmbeddingService(
|
|||
return provider ? new KtxIngestEmbeddingPortAdapter(provider) : null;
|
||||
}
|
||||
|
||||
function writeWikiSearchDebug(
|
||||
io: KtxKnowledgeIo,
|
||||
input: {
|
||||
mode: string;
|
||||
embeddingConfigured: boolean;
|
||||
results: LocalKnowledgeSearchResult[];
|
||||
},
|
||||
): void {
|
||||
io.stderr.write(
|
||||
`[debug] wiki search mode=${input.mode} embedding=${input.embeddingConfigured ? 'configured' : 'unconfigured'} results=${input.results.length}\n`,
|
||||
);
|
||||
const lanes = input.results[0]?.lanes ?? [];
|
||||
for (const lane of lanes) {
|
||||
const reason = lane.reason ? ` reason=${lane.reason}` : '';
|
||||
io.stderr.write(
|
||||
`[debug] wiki search lane=${lane.lane} status=${lane.status} returned=${lane.returnedCandidateCount} weight=${lane.weight}${reason}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export async function runKtxKnowledge(
|
||||
args: KtxKnowledgeArgs,
|
||||
io: KtxKnowledgeIo = process,
|
||||
|
|
@ -89,12 +114,20 @@ export async function runKtxKnowledge(
|
|||
return 0;
|
||||
}
|
||||
if (args.command === 'search') {
|
||||
const embeddingService = wikiSearchEmbeddingService(project, deps);
|
||||
const results = await searchLocalKnowledgePages(project, {
|
||||
query: args.query,
|
||||
userId: args.userId,
|
||||
embeddingService: wikiSearchEmbeddingService(project, deps),
|
||||
embeddingService,
|
||||
limit: args.limit,
|
||||
});
|
||||
if (args.debug) {
|
||||
writeWikiSearchDebug(io, {
|
||||
mode: project.config.storage.search,
|
||||
embeddingConfigured: embeddingService !== null,
|
||||
results,
|
||||
});
|
||||
}
|
||||
const mode = resolveOutputMode({ explicit: args.output, json: args.json, io });
|
||||
let emptyMessage = `No local wiki pages matched "${args.query}"`;
|
||||
let emptyHint = 'Run `ktx wiki list` to inspect available pages.';
|
||||
|
|
@ -107,7 +140,7 @@ export async function runKtxKnowledge(
|
|||
}
|
||||
printList<LocalKnowledgeSearchResult>({
|
||||
rows: results,
|
||||
columns: WIKI_SEARCH_COLUMNS,
|
||||
columns: wikiSearchColumns(results),
|
||||
groupBy: 'scope',
|
||||
emptyMessage,
|
||||
emptyHint,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
createManagedPythonSemanticLayerComputePort,
|
||||
ensureManagedPythonCommandRuntime,
|
||||
managedRuntimeInstallCommand,
|
||||
runtimeInstallPolicyFromFlags,
|
||||
} from './managed-python-command.js';
|
||||
|
|
@ -103,6 +104,17 @@ function installResult(features: KtxRuntimeFeature[] = ['core']): ManagedPythonR
|
|||
};
|
||||
}
|
||||
|
||||
function makeSpinnerEvents() {
|
||||
const events: string[] = [];
|
||||
const spinner = vi.fn(() => ({
|
||||
start: (msg: string) => events.push(`start:${msg}`),
|
||||
message: (msg: string) => events.push(`message:${msg}`),
|
||||
stop: (msg: string) => events.push(`stop:${msg}`),
|
||||
error: (msg: string) => events.push(`error:${msg}`),
|
||||
}));
|
||||
return { events, spinner };
|
||||
}
|
||||
|
||||
describe('managedRuntimeInstallCommand', () => {
|
||||
it('prints the exact command for each managed runtime feature', () => {
|
||||
expect(managedRuntimeInstallCommand('core')).toBe('ktx dev runtime install --yes');
|
||||
|
|
@ -128,6 +140,51 @@ describe('runtimeInstallPolicyFromFlags', () => {
|
|||
});
|
||||
|
||||
describe('createManagedPythonSemanticLayerComputePort', () => {
|
||||
it('uses non-animated runtime setup status by default', async () => {
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
ensureManagedPythonCommandRuntime({
|
||||
cliVersion: '0.2.0',
|
||||
installPolicy: 'auto',
|
||||
io: io.io,
|
||||
readStatus: vi.fn(async () => missingStatus()),
|
||||
installRuntime: vi.fn(async () => installResult(['local-embeddings'])),
|
||||
feature: 'local-embeddings',
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
layout: { versionDir: '/runtime/0.2.0' },
|
||||
});
|
||||
|
||||
expect(io.stderr()).toContain('Installing KTX Python runtime (local-embeddings) with uv...');
|
||||
expect(io.stderr()).toContain('KTX Python runtime ready: /runtime/0.2.0');
|
||||
expect(io.stderr().match(/Installing KTX Python runtime/g)).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('shows runtime installation progress with the CLI spinner', async () => {
|
||||
const io = makeIo();
|
||||
const { events, spinner } = makeSpinnerEvents();
|
||||
|
||||
const options = {
|
||||
cliVersion: '0.2.0',
|
||||
installPolicy: 'auto' as const,
|
||||
io: io.io,
|
||||
readStatus: vi.fn(async () => missingStatus()),
|
||||
installRuntime: vi.fn(async () => installResult(['local-embeddings'])),
|
||||
feature: 'local-embeddings' as const,
|
||||
spinner,
|
||||
};
|
||||
|
||||
await expect(ensureManagedPythonCommandRuntime(options)).resolves.toMatchObject({
|
||||
layout: { versionDir: '/runtime/0.2.0' },
|
||||
});
|
||||
|
||||
expect(events).toEqual([
|
||||
'start:Installing KTX Python runtime (local-embeddings) with uv...',
|
||||
'stop:KTX Python runtime ready: /runtime/0.2.0',
|
||||
]);
|
||||
});
|
||||
|
||||
it('uses the managed ktx-daemon executable when the runtime is ready', async () => {
|
||||
const io = makeIo();
|
||||
const compute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() };
|
||||
|
|
@ -170,6 +227,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
|
|||
|
||||
it('installs the core runtime without prompting when policy is auto', async () => {
|
||||
const io = makeIo();
|
||||
const { events, spinner } = makeSpinnerEvents();
|
||||
const compute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() };
|
||||
const createPythonCompute = vi.fn(() => compute);
|
||||
const installRuntime = vi.fn(async () => installResult());
|
||||
|
|
@ -182,6 +240,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
|
|||
readStatus: vi.fn(async () => missingStatus()),
|
||||
installRuntime,
|
||||
createPythonCompute,
|
||||
spinner,
|
||||
}),
|
||||
).resolves.toBe(compute);
|
||||
|
||||
|
|
@ -190,12 +249,15 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
|
|||
features: ['core'],
|
||||
force: false,
|
||||
});
|
||||
expect(io.stderr()).toContain('Installing KTX Python runtime (core) with uv');
|
||||
expect(io.stderr()).toContain('KTX Python runtime ready: /runtime/0.2.0');
|
||||
expect(events).toEqual([
|
||||
'start:Installing KTX Python runtime (core) with uv...',
|
||||
'stop:KTX Python runtime ready: /runtime/0.2.0',
|
||||
]);
|
||||
});
|
||||
|
||||
it('prompts before installing when policy is prompt', async () => {
|
||||
const io = makeIo();
|
||||
const { events, spinner } = makeSpinnerEvents();
|
||||
const confirmInstall = vi.fn(async () => true);
|
||||
const installRuntime = vi.fn(async () => installResult());
|
||||
|
||||
|
|
@ -207,6 +269,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
|
|||
installRuntime,
|
||||
createPythonCompute: vi.fn(() => ({ query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() })),
|
||||
confirmInstall,
|
||||
spinner,
|
||||
});
|
||||
|
||||
expect(confirmInstall).toHaveBeenCalledWith(
|
||||
|
|
@ -218,10 +281,12 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
|
|||
features: ['core'],
|
||||
force: false,
|
||||
});
|
||||
expect(events).toContainEqual('start:Installing KTX Python runtime (core) with uv...');
|
||||
});
|
||||
|
||||
it('uses injected runtime confirmation instead of reading process TTY directly', async () => {
|
||||
const io = makeIo();
|
||||
const { events, spinner } = makeSpinnerEvents();
|
||||
const compute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() };
|
||||
const installRuntime = vi.fn(async (): Promise<ManagedPythonRuntimeInstallResult> => installResult());
|
||||
const confirmInstall = vi.fn(async () => true);
|
||||
|
|
@ -235,6 +300,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
|
|||
installRuntime,
|
||||
confirmInstall,
|
||||
createPythonCompute: () => compute,
|
||||
spinner,
|
||||
}),
|
||||
).resolves.toBe(compute);
|
||||
|
||||
|
|
@ -242,7 +308,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => {
|
|||
'KTX needs to install the core Python runtime. This downloads Python dependencies with uv. Continue?',
|
||||
io.io,
|
||||
);
|
||||
expect(io.stderr()).toContain('Installing KTX Python runtime (core) with uv...');
|
||||
expect(events).toContainEqual('start:Installing KTX Python runtime (core) with uv...');
|
||||
});
|
||||
|
||||
it('can decide default runtime prompting from injected io capabilities', async () => {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { createPythonSemanticLayerComputePort, type KtxSemanticLayerComputePort } from '@ktx/context/daemon';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import { createClackPromptAdapter } from './clack.js';
|
||||
import { createClackPromptAdapter, createStaticCliSpinner, type KtxCliSpinner } from './clack.js';
|
||||
import {
|
||||
installManagedPythonRuntime,
|
||||
readManagedPythonRuntimeStatus,
|
||||
|
|
@ -37,6 +37,7 @@ export interface ManagedPythonCommandDeps {
|
|||
readStatus?: (options: ManagedPythonRuntimeLayoutOptions) => Promise<ManagedPythonRuntimeStatus>;
|
||||
installRuntime?: (options: ManagedPythonRuntimeInstallOptions) => Promise<ManagedPythonRuntimeInstallResult>;
|
||||
confirmInstall?: (message: string, io: KtxCliIo) => Promise<boolean>;
|
||||
spinner?: () => KtxCliSpinner;
|
||||
}
|
||||
|
||||
export interface ManagedPythonCommandOptions extends ManagedPythonCommandDeps {
|
||||
|
|
@ -101,14 +102,20 @@ export async function ensureManagedPythonCommandRuntime(
|
|||
}
|
||||
}
|
||||
|
||||
options.io.stderr.write(`Installing KTX Python runtime (${feature}) with uv...\n`);
|
||||
const installed = await installRuntime({
|
||||
cliVersion: options.cliVersion,
|
||||
features: [feature],
|
||||
force: false,
|
||||
});
|
||||
options.io.stderr.write(`KTX Python runtime ready: ${installed.layout.versionDir}\n`);
|
||||
return { layout: installed.layout, manifest: installed.manifest };
|
||||
const progress = (options.spinner ?? (() => createStaticCliSpinner(options.io)))();
|
||||
progress.start(`Installing KTX Python runtime (${feature}) with uv...`);
|
||||
try {
|
||||
const installed = await installRuntime({
|
||||
cliVersion: options.cliVersion,
|
||||
features: [feature],
|
||||
force: false,
|
||||
});
|
||||
progress.stop(`KTX Python runtime ready: ${installed.layout.versionDir}`);
|
||||
return { layout: installed.layout, manifest: installed.manifest };
|
||||
} catch (error) {
|
||||
progress.error(`KTX Python runtime install failed: ${error instanceof Error ? error.message : String(error)}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function createManagedPythonSemanticLayerComputePort(
|
||||
|
|
@ -122,6 +129,7 @@ export async function createManagedPythonSemanticLayerComputePort(
|
|||
...(options.readStatus ? { readStatus: options.readStatus } : {}),
|
||||
...(options.installRuntime ? { installRuntime: options.installRuntime } : {}),
|
||||
...(options.confirmInstall ? { confirmInstall: options.confirmInstall } : {}),
|
||||
...(options.spinner ? { spinner: options.spinner } : {}),
|
||||
});
|
||||
const createPythonCompute = options.createPythonCompute ?? createPythonSemanticLayerComputePort;
|
||||
return createPythonCompute({
|
||||
|
|
|
|||
|
|
@ -11,7 +11,13 @@ function silentIo(): KtxCliIo {
|
|||
}
|
||||
|
||||
function stubPackageInfo(): KtxCliPackageInfo {
|
||||
return { name: '@ktx/cli', version: '0.0.0-docs', contextPackageName: '@ktx/context' };
|
||||
return {
|
||||
name: '@ktx/cli',
|
||||
version: '0.0.0-docs',
|
||||
packageVersion: '0.0.0-private',
|
||||
runtimeVersion: '0.0.0-docs',
|
||||
contextPackageName: '@ktx/context',
|
||||
};
|
||||
}
|
||||
|
||||
export function renderKtxCommandTree(): string {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import {
|
|||
type KtxPublicIngestProject,
|
||||
runKtxPublicIngest,
|
||||
} from './public-ingest.js';
|
||||
import type { ManagedPythonCommandRuntime } from './managed-python-command.js';
|
||||
|
||||
function makeIo(options: { isTTY?: boolean; interactive?: boolean } = {}) {
|
||||
let stdout = '';
|
||||
|
|
@ -750,6 +751,53 @@ describe('runKtxPublicIngest', () => {
|
|||
expect(runScan).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('preflights foreground query-history runtime before starting the context-build view', async () => {
|
||||
const io = makeIo({ isTTY: true, interactive: true });
|
||||
const calls: string[] = [];
|
||||
const project = projectWithConnections({
|
||||
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
|
||||
});
|
||||
const ensureRuntime = vi.fn(async (): Promise<ManagedPythonCommandRuntime> => {
|
||||
calls.push('runtime');
|
||||
return {} as ManagedPythonCommandRuntime;
|
||||
});
|
||||
const runContextBuild = vi.fn(async () => {
|
||||
calls.push('context-build');
|
||||
return { exitCode: 0 };
|
||||
});
|
||||
|
||||
await expect(
|
||||
runKtxPublicIngest(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir: '/tmp/project',
|
||||
targetConnectionId: 'warehouse',
|
||||
all: false,
|
||||
json: false,
|
||||
inputMode: 'auto',
|
||||
queryHistory: 'enabled',
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'prompt',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
loadProject: vi.fn(async () => project),
|
||||
ensureRuntime,
|
||||
runContextBuild,
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(calls).toEqual(['runtime', 'context-build']);
|
||||
expect(ensureRuntime).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
cliVersion: '0.2.0',
|
||||
installPolicy: 'prompt',
|
||||
feature: 'core',
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('runs all independent targets and reports partial failures', async () => {
|
||||
const io = makeIo();
|
||||
const project = projectWithConnections({
|
||||
|
|
@ -806,7 +854,12 @@ describe('runKtxPublicIngest', () => {
|
|||
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
|
||||
});
|
||||
const runScan = vi.fn(async () => 0);
|
||||
const runIngest = vi.fn(async () => 1);
|
||||
const runIngest = vi.fn(async (_args, ingestIo) => {
|
||||
ingestIo.stdout.write(
|
||||
'Error: Query history failed for 60 tasks. First failure: Google Cloud authentication failed while analyzing query history: application-default credentials expired or require reauthentication (invalid_grant / invalid_rapt). Run `gcloud auth application-default login`, then retry.\n',
|
||||
);
|
||||
return 1;
|
||||
});
|
||||
|
||||
await expect(
|
||||
runKtxPublicIngest(
|
||||
|
|
@ -824,11 +877,52 @@ describe('runKtxPublicIngest', () => {
|
|||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(io.stdout()).toContain('warehouse failed at query-history.');
|
||||
expect(io.stdout()).toMatch(/warehouse\s+done\s+failed\s+skipped\s+skipped/);
|
||||
expect(io.stdout()).toContain(
|
||||
'warehouse failed: Query history failed for 60 tasks. First failure: Google Cloud authentication failed while analyzing query history',
|
||||
);
|
||||
expect(io.stdout()).not.toContain('warehouse failed: Error:');
|
||||
expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history');
|
||||
expect(io.stdout()).not.toContain('historic-sql');
|
||||
});
|
||||
|
||||
it('prints the runtime artifact build hint for missing query-history runtime assets', async () => {
|
||||
const io = makeIo();
|
||||
const project = deepReadyProject({
|
||||
warehouse: { driver: 'postgres', context: { depth: 'deep' } },
|
||||
});
|
||||
const runScan = vi.fn(async () => 0);
|
||||
const runIngest = vi.fn(async (_args, ingestIo) => {
|
||||
ingestIo.stderr.write('Missing bundled Python runtime manifest: /repo/packages/cli/assets/python/manifest.json\n');
|
||||
ingestIo.stderr.write('In a source checkout, build the local runtime assets with: pnpm run artifacts:build\n');
|
||||
ingestIo.stderr.write('Then retry the runtime-backed KTX command.\n');
|
||||
return 1;
|
||||
});
|
||||
|
||||
await expect(
|
||||
runKtxPublicIngest(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir: '/tmp/project',
|
||||
targetConnectionId: 'warehouse',
|
||||
all: false,
|
||||
json: false,
|
||||
inputMode: 'disabled',
|
||||
queryHistory: 'enabled',
|
||||
},
|
||||
io.io,
|
||||
{ loadProject: vi.fn(async () => project), runScan, runIngest },
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(io.stdout()).toContain('Missing bundled Python runtime manifest');
|
||||
expect(io.stdout()).toContain(
|
||||
'In a source checkout, build the local runtime assets with: pnpm run artifacts:build',
|
||||
);
|
||||
expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history');
|
||||
expect(io.stdout()).not.toContain('Then retry the runtime-backed KTX command');
|
||||
});
|
||||
|
||||
it('fails deep-readiness targets before work starts while continuing independent --all targets', async () => {
|
||||
const io = makeIo();
|
||||
const project = projectWithConnections({
|
||||
|
|
|
|||
|
|
@ -9,8 +9,14 @@ import {
|
|||
isDatabaseDriver,
|
||||
normalizeConnectionDriver,
|
||||
} from './ingest-depth.js';
|
||||
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
|
||||
import {
|
||||
ensureManagedPythonCommandRuntime,
|
||||
type KtxManagedPythonInstallPolicy,
|
||||
type ManagedPythonCommandRuntime,
|
||||
} from './managed-python-command.js';
|
||||
import type { KtxRuntimeFeature } from './managed-python-runtime.js';
|
||||
import { publicIngestOutputLine } from './public-ingest-copy.js';
|
||||
import { resolvePublicIngestRuntimeRequirements } from './runtime-requirements.js';
|
||||
import type { KtxScanArgs, KtxScanDeps } from './scan.js';
|
||||
import { profileMark } from './startup-profile.js';
|
||||
|
||||
|
|
@ -94,6 +100,13 @@ export interface KtxPublicIngestDeps {
|
|||
) => Promise<{ exitCode: number }>;
|
||||
scanProgress?: KtxProgressPort;
|
||||
ingestProgress?: (update: KtxIngestProgressUpdate) => void;
|
||||
ensureRuntime?: (options: {
|
||||
cliVersion: string;
|
||||
installPolicy: KtxManagedPythonInstallPolicy;
|
||||
io: KtxCliIo;
|
||||
feature: KtxRuntimeFeature;
|
||||
}) => Promise<ManagedPythonCommandRuntime>;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
runtimeIo?: KtxCliIo;
|
||||
onPhaseStart?: (phaseKey: KtxPublicIngestPhaseKey) => void;
|
||||
onPhaseEnd?: (phaseKey: KtxPublicIngestPhaseKey, status: 'done' | 'failed' | 'skipped', summary?: string) => void;
|
||||
|
|
@ -555,6 +568,7 @@ function markTargetResult(
|
|||
): KtxPublicIngestTargetResult {
|
||||
const selectedFailedOperation =
|
||||
failedOperation ?? (target.operation === 'database-ingest' ? 'database-schema' : 'source-ingest');
|
||||
const selectedFailedOperationIndex = target.steps.indexOf(selectedFailedOperation);
|
||||
return {
|
||||
connectionId: target.connectionId,
|
||||
driver: target.driver,
|
||||
|
|
@ -565,6 +579,10 @@ function markTargetResult(
|
|||
if (status === 'done') {
|
||||
return { ...step, status: 'done' };
|
||||
}
|
||||
const stepIndex = target.steps.indexOf(step.operation);
|
||||
if (selectedFailedOperationIndex >= 0 && stepIndex >= 0 && stepIndex < selectedFailedOperationIndex) {
|
||||
return { ...step, status: 'done' };
|
||||
}
|
||||
if (step.operation === selectedFailedOperation) {
|
||||
return {
|
||||
...step,
|
||||
|
|
@ -663,16 +681,40 @@ function createCapturedPublicIngestIo(): CapturedPublicIngestIo {
|
|||
|
||||
const INTERNAL_STATUS_LINE_RE =
|
||||
/^(Report|Run|Job|Status|Adapter|Connection|Sync|Diff|Tasks|Work units|Failed tasks|Saved memory|Provenance rows):\s*/;
|
||||
const ACTIONABLE_FAILURE_LINE_RE =
|
||||
/^(Missing bundled Python runtime manifest|KTX Python runtime is required|KTX managed daemon|Error:|Failed\b|Could not\b|Cannot\b)/;
|
||||
const RUNTIME_BACKED_RETRY_LINE_RE = /^Then retry the runtime-backed KTX command\.?$/;
|
||||
|
||||
function firstCapturedFailureLine(output: string): string | undefined {
|
||||
return output
|
||||
function trimErrorPrefix(line: string): string {
|
||||
return line.replace(/^Error:\s*/, '');
|
||||
}
|
||||
|
||||
function capturedFailureMessage(output: string): string | undefined {
|
||||
const lines = output
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line.length > 0)
|
||||
.filter((line) => !line.startsWith('KTX scan completed'))
|
||||
.filter((line) => !INTERNAL_STATUS_LINE_RE.test(line))
|
||||
.map(publicIngestOutputLine)
|
||||
.find((line) => line.length > 0);
|
||||
.map(publicIngestOutputLine);
|
||||
|
||||
const actionableIndex = lines.findIndex((line) => ACTIONABLE_FAILURE_LINE_RE.test(line));
|
||||
if (actionableIndex < 0) {
|
||||
const line = lines.find((candidate) => candidate.length > 0);
|
||||
return line ? trimErrorPrefix(line) : undefined;
|
||||
}
|
||||
|
||||
const firstLine = lines[actionableIndex];
|
||||
if (!firstLine?.startsWith('Missing bundled Python runtime manifest')) {
|
||||
return trimErrorPrefix(firstLine);
|
||||
}
|
||||
|
||||
const followupLines = lines
|
||||
.slice(actionableIndex + 1)
|
||||
.filter((line) => !RUNTIME_BACKED_RETRY_LINE_RE.test(line))
|
||||
.filter((line) => !/\bRetry:\s/.test(line))
|
||||
.filter((line) => line.startsWith('In a source checkout, build the local runtime assets with:'));
|
||||
return [firstLine, ...followupLines].join('\n');
|
||||
}
|
||||
|
||||
export async function executePublicIngestTarget(
|
||||
|
|
@ -737,7 +779,7 @@ export async function executePublicIngestTarget(
|
|||
args,
|
||||
'failed',
|
||||
'database-schema',
|
||||
capturedScanIo ? firstCapturedFailureLine(capturedScanIo.capturedOutput()) : undefined,
|
||||
capturedScanIo ? capturedFailureMessage(capturedScanIo.capturedOutput()) : undefined,
|
||||
);
|
||||
}
|
||||
deps.onPhaseEnd?.('database-schema', 'done');
|
||||
|
|
@ -779,7 +821,7 @@ export async function executePublicIngestTarget(
|
|||
args,
|
||||
'failed',
|
||||
'query-history',
|
||||
capturedIngestIo ? firstCapturedFailureLine(capturedIngestIo.capturedOutput()) : undefined,
|
||||
capturedIngestIo ? capturedFailureMessage(capturedIngestIo.capturedOutput()) : undefined,
|
||||
);
|
||||
}
|
||||
deps.onPhaseEnd?.('query-history', 'done');
|
||||
|
|
@ -819,7 +861,7 @@ export async function executePublicIngestTarget(
|
|||
args,
|
||||
exitCode === 0 ? 'done' : 'failed',
|
||||
'source-ingest',
|
||||
capturedIngestIo ? firstCapturedFailureLine(capturedIngestIo.capturedOutput()) : undefined,
|
||||
capturedIngestIo ? capturedFailureMessage(capturedIngestIo.capturedOutput()) : undefined,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -831,6 +873,22 @@ export async function runKtxPublicIngest(
|
|||
const loadProject = deps.loadProject ?? loadKtxProject;
|
||||
const project = await loadProject({ projectDir: args.projectDir });
|
||||
if (shouldUseForegroundContextBuildView(args, io)) {
|
||||
const plan = buildPublicIngestPlan(project, args);
|
||||
const requirements = resolvePublicIngestRuntimeRequirements(plan, { env: deps.env ?? process.env });
|
||||
const ensureRuntime = deps.ensureRuntime ?? ensureManagedPythonCommandRuntime;
|
||||
for (const feature of requirements.features) {
|
||||
try {
|
||||
await ensureRuntime({
|
||||
cliVersion: args.cliVersion ?? '0.0.0-private',
|
||||
installPolicy: args.runtimeInstallPolicy ?? 'prompt',
|
||||
io,
|
||||
feature,
|
||||
});
|
||||
} catch (error) {
|
||||
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
const { runContextBuild } = await import('./context-build-view.js');
|
||||
const contextBuild = deps.runContextBuild ?? runContextBuild;
|
||||
const result = await contextBuild(
|
||||
|
|
|
|||
55
packages/cli/src/release-version.ts
Normal file
55
packages/cli/src/release-version.ts
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
import { existsSync, readFileSync } from 'node:fs';
|
||||
import { dirname, join, parse } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const semverPattern =
|
||||
/^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-([0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*)?$/;
|
||||
|
||||
function isPlainObject(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function assertReleaseVersion(value: unknown, source: string): string {
|
||||
if (typeof value !== 'string' || !semverPattern.test(value)) {
|
||||
throw new Error(`Invalid KTX release version in ${source}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function findReleasePolicyPath(startDir: string): string | undefined {
|
||||
let current = startDir;
|
||||
const root = parse(current).root;
|
||||
while (true) {
|
||||
const candidate = join(current, 'release-policy.json');
|
||||
if (existsSync(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
if (current === root) {
|
||||
return undefined;
|
||||
}
|
||||
current = dirname(current);
|
||||
}
|
||||
}
|
||||
|
||||
function readSourceReleaseVersion(startDir = dirname(fileURLToPath(import.meta.url))): string | undefined {
|
||||
const policyPath = findReleasePolicyPath(startDir);
|
||||
if (!policyPath) {
|
||||
return undefined;
|
||||
}
|
||||
const policy = JSON.parse(readFileSync(policyPath, 'utf8')) as unknown;
|
||||
if (!isPlainObject(policy)) {
|
||||
throw new Error(`Invalid KTX release policy: ${policyPath}`);
|
||||
}
|
||||
return assertReleaseVersion(policy.publicNpmPackageVersion, policyPath);
|
||||
}
|
||||
|
||||
export function resolveKtxRuntimeVersion(input: {
|
||||
packageName: string;
|
||||
packageVersion: string;
|
||||
startDir?: string;
|
||||
}): string {
|
||||
if (input.packageName === '@kaelio/ktx') {
|
||||
return assertReleaseVersion(input.packageVersion, `${input.packageName}/package.json`);
|
||||
}
|
||||
return readSourceReleaseVersion(input.startDir) ?? input.packageVersion;
|
||||
}
|
||||
81
packages/cli/src/runtime-requirements.test.ts
Normal file
81
packages/cli/src/runtime-requirements.test.ts
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
|
||||
import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '@ktx/context/project';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
resolveProjectRuntimeRequirements,
|
||||
resolvePublicIngestRuntimeRequirements,
|
||||
} from './runtime-requirements.js';
|
||||
|
||||
describe('runtime requirement detection', () => {
|
||||
it('requires core for agent/MCP setup', () => {
|
||||
const config = buildDefaultKtxProjectConfig();
|
||||
|
||||
expect(resolveProjectRuntimeRequirements(config, { agents: true }).features).toEqual(['core']);
|
||||
});
|
||||
|
||||
it('requires core for Looker source ingest unless an external daemon is configured', () => {
|
||||
const config: KtxProjectConfig = {
|
||||
...buildDefaultKtxProjectConfig(),
|
||||
connections: {
|
||||
looker: { driver: 'looker', base_url: 'https://looker.example.com', client_id: 'client-id' },
|
||||
},
|
||||
};
|
||||
|
||||
expect(resolveProjectRuntimeRequirements(config).features).toEqual(['core']);
|
||||
expect(resolveProjectRuntimeRequirements(config, { env: { KTX_DAEMON_URL: 'http://127.0.0.1:8765' } }).features).toEqual(
|
||||
[],
|
||||
);
|
||||
});
|
||||
|
||||
it('requires core for query-history ingest unless SQL analysis is externally configured', () => {
|
||||
const config: KtxProjectConfig = {
|
||||
...buildDefaultKtxProjectConfig(),
|
||||
connections: {
|
||||
warehouse: { driver: 'postgres', context: { queryHistory: { enabled: true } } },
|
||||
},
|
||||
};
|
||||
|
||||
expect(resolveProjectRuntimeRequirements(config).features).toEqual(['core']);
|
||||
expect(
|
||||
resolveProjectRuntimeRequirements(config, { env: { KTX_SQL_ANALYSIS_URL: 'http://127.0.0.1:8765' } }).features,
|
||||
).toEqual([]);
|
||||
});
|
||||
|
||||
it('requires local-embeddings for managed sentence-transformers embeddings', () => {
|
||||
const config: KtxProjectConfig = {
|
||||
...buildDefaultKtxProjectConfig(),
|
||||
ingest: {
|
||||
...buildDefaultKtxProjectConfig().ingest,
|
||||
embeddings: {
|
||||
backend: 'sentence-transformers' as const,
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: {
|
||||
base_url: MANAGED_SENTENCE_TRANSFORMERS_BASE_URL,
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
expect(resolveProjectRuntimeRequirements(config).features).toEqual(['local-embeddings']);
|
||||
});
|
||||
|
||||
it('detects foreground ingest runtime needs from selected query-history targets', () => {
|
||||
expect(
|
||||
resolvePublicIngestRuntimeRequirements({
|
||||
projectDir: '/tmp/project',
|
||||
warnings: [],
|
||||
targets: [
|
||||
{
|
||||
connectionId: 'warehouse',
|
||||
driver: 'postgres',
|
||||
operation: 'database-ingest',
|
||||
debugCommand: 'ktx ingest warehouse --debug',
|
||||
steps: ['database-schema', 'query-history'],
|
||||
queryHistory: { enabled: true },
|
||||
},
|
||||
],
|
||||
}).features,
|
||||
).toEqual(['core']);
|
||||
});
|
||||
});
|
||||
168
packages/cli/src/runtime-requirements.ts
Normal file
168
packages/cli/src/runtime-requirements.ts
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
|
||||
import type {
|
||||
KtxProjectConfig,
|
||||
KtxProjectConnectionConfig,
|
||||
KtxProjectEmbeddingConfig,
|
||||
} from '@ktx/context/project';
|
||||
import type { KtxRuntimeFeature } from './managed-python-runtime.js';
|
||||
import type { KtxPublicIngestPlan } from './public-ingest.js';
|
||||
|
||||
type KtxRuntimeRequirementReason =
|
||||
| 'agent-mcp'
|
||||
| 'query-history'
|
||||
| 'looker-source'
|
||||
| 'database-introspection'
|
||||
| 'local-embeddings';
|
||||
|
||||
interface KtxRuntimeRequirement {
|
||||
feature: KtxRuntimeFeature;
|
||||
reason: KtxRuntimeRequirementReason;
|
||||
detail: string;
|
||||
}
|
||||
|
||||
export interface KtxRuntimeRequirements {
|
||||
features: KtxRuntimeFeature[];
|
||||
requirements: KtxRuntimeRequirement[];
|
||||
}
|
||||
|
||||
export interface KtxProjectRuntimeRequirementOptions {
|
||||
agents?: boolean;
|
||||
databaseIntrospectionFallback?: boolean;
|
||||
env?: NodeJS.ProcessEnv | Record<string, string | undefined>;
|
||||
}
|
||||
|
||||
export interface KtxPublicIngestRuntimeRequirementOptions {
|
||||
env?: NodeJS.ProcessEnv | Record<string, string | undefined>;
|
||||
}
|
||||
|
||||
function normalizeDriver(driver: unknown): string {
|
||||
return String(driver ?? '').trim().toLowerCase();
|
||||
}
|
||||
|
||||
function recordValue(value: unknown): Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null ? (value as Record<string, unknown>) : {};
|
||||
}
|
||||
|
||||
function hasEnabledQueryHistory(connection: KtxProjectConnectionConfig): boolean {
|
||||
const context = recordValue(recordValue(connection).context);
|
||||
const queryHistory = recordValue(context.queryHistory);
|
||||
return queryHistory.enabled === true;
|
||||
}
|
||||
|
||||
function hasDaemonOverride(env: NodeJS.ProcessEnv | Record<string, string | undefined>): boolean {
|
||||
return typeof env.KTX_DAEMON_URL === 'string' && env.KTX_DAEMON_URL.trim().length > 0;
|
||||
}
|
||||
|
||||
function hasSqlAnalysisOverride(env: NodeJS.ProcessEnv | Record<string, string | undefined>): boolean {
|
||||
return (
|
||||
(typeof env.KTX_SQL_ANALYSIS_URL === 'string' && env.KTX_SQL_ANALYSIS_URL.trim().length > 0) ||
|
||||
hasDaemonOverride(env)
|
||||
);
|
||||
}
|
||||
|
||||
function requiresManagedLocalEmbeddings(embeddings: KtxProjectEmbeddingConfig): boolean {
|
||||
if (embeddings.backend !== 'sentence-transformers') {
|
||||
return false;
|
||||
}
|
||||
const baseUrl = embeddings.sentenceTransformers?.base_url;
|
||||
return baseUrl === undefined || baseUrl === '' || baseUrl === MANAGED_SENTENCE_TRANSFORMERS_BASE_URL;
|
||||
}
|
||||
|
||||
function uniqueRequirements(requirements: KtxRuntimeRequirement[]): KtxRuntimeRequirements {
|
||||
const seen = new Set<string>();
|
||||
const deduped: KtxRuntimeRequirement[] = [];
|
||||
for (const requirement of requirements) {
|
||||
const key = `${requirement.feature}:${requirement.reason}:${requirement.detail}`;
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
deduped.push(requirement);
|
||||
}
|
||||
const features = [...new Set(deduped.map((requirement) => requirement.feature))].sort((left, right) =>
|
||||
left.localeCompare(right),
|
||||
);
|
||||
return { features, requirements: deduped };
|
||||
}
|
||||
|
||||
export function resolveProjectRuntimeRequirements(
|
||||
config: KtxProjectConfig,
|
||||
options: KtxProjectRuntimeRequirementOptions = {},
|
||||
): KtxRuntimeRequirements {
|
||||
const env = options.env ?? process.env;
|
||||
const requirements: KtxRuntimeRequirement[] = [];
|
||||
|
||||
if (options.agents === true) {
|
||||
requirements.push({
|
||||
feature: 'core',
|
||||
reason: 'agent-mcp',
|
||||
detail: 'Agent MCP setup uses semantic-layer query tools and SQL validation.',
|
||||
});
|
||||
}
|
||||
|
||||
if (options.databaseIntrospectionFallback === true && !hasDaemonOverride(env)) {
|
||||
requirements.push({
|
||||
feature: 'core',
|
||||
reason: 'database-introspection',
|
||||
detail: 'Database introspection fallback uses the Python daemon.',
|
||||
});
|
||||
}
|
||||
|
||||
for (const [connectionId, connection] of Object.entries(config.connections)) {
|
||||
const driver = normalizeDriver(connection.driver);
|
||||
if ((driver === 'looker' || driver === 'local_looker') && !hasDaemonOverride(env)) {
|
||||
requirements.push({
|
||||
feature: 'core',
|
||||
reason: 'looker-source',
|
||||
detail: `${connectionId} uses Looker identifier parsing.`,
|
||||
});
|
||||
}
|
||||
|
||||
if (hasEnabledQueryHistory(connection) && !hasSqlAnalysisOverride(env)) {
|
||||
requirements.push({
|
||||
feature: 'core',
|
||||
reason: 'query-history',
|
||||
detail: `${connectionId} has query history enabled.`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (requiresManagedLocalEmbeddings(config.ingest.embeddings)) {
|
||||
requirements.push({
|
||||
feature: 'local-embeddings',
|
||||
reason: 'local-embeddings',
|
||||
detail: 'Local sentence-transformers embeddings use the managed Python runtime.',
|
||||
});
|
||||
}
|
||||
|
||||
return uniqueRequirements(requirements);
|
||||
}
|
||||
|
||||
export function resolvePublicIngestRuntimeRequirements(
|
||||
plan: KtxPublicIngestPlan,
|
||||
options: KtxPublicIngestRuntimeRequirementOptions = {},
|
||||
): KtxRuntimeRequirements {
|
||||
const env = options.env ?? process.env;
|
||||
const requirements: KtxRuntimeRequirement[] = [];
|
||||
|
||||
for (const target of plan.targets) {
|
||||
const driver = normalizeDriver(target.driver);
|
||||
const adapter = normalizeDriver(target.adapter);
|
||||
if (target.queryHistory?.enabled === true && !hasSqlAnalysisOverride(env)) {
|
||||
requirements.push({
|
||||
feature: 'core',
|
||||
reason: 'query-history',
|
||||
detail: `${target.connectionId} query-history ingest uses SQL analysis.`,
|
||||
});
|
||||
}
|
||||
if ((driver === 'looker' || driver === 'local_looker' || adapter === 'looker') && !hasDaemonOverride(env)) {
|
||||
requirements.push({
|
||||
feature: 'core',
|
||||
reason: 'looker-source',
|
||||
detail: `${target.connectionId} uses Looker identifier parsing.`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueRequirements(requirements);
|
||||
}
|
||||
|
|
@ -176,12 +176,33 @@ describe('setup embeddings step', () => {
|
|||
expect(config.scan.enrichment.embeddings).toMatchObject(config.ingest.embeddings);
|
||||
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
|
||||
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('embeddings');
|
||||
expect(spinnerEvents).toContainEqual(
|
||||
'start:Testing local sentence-transformers embeddings (all-MiniLM-L6-v2, 384 dimensions). First run may take up to 60 seconds.',
|
||||
);
|
||||
expect(spinnerEvents).toContainEqual('start:Testing local embeddings (all-MiniLM-L6-v2)');
|
||||
expect(io.stdout()).toContain('Embeddings ready: yes');
|
||||
});
|
||||
|
||||
it('uses a short non-animated local embeddings health-check status by default', async () => {
|
||||
const io = makeIo();
|
||||
const healthCheck = vi.fn(async () => ({ ok: true as const }));
|
||||
const prompts = makePromptAdapter({ selectValues: ['sentence-transformers'] });
|
||||
|
||||
const result = await runKtxSetupEmbeddingsStep(
|
||||
{
|
||||
projectDir: tempDir,
|
||||
inputMode: 'auto',
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
skipEmbeddings: false,
|
||||
},
|
||||
io.io,
|
||||
{ prompts, env: {}, healthCheck, ensureLocalEmbeddings: vi.fn(async () => managedDaemon()) },
|
||||
);
|
||||
|
||||
expect(result.status).toBe('ready');
|
||||
expect(io.stderr()).toContain('Testing local embeddings (all-MiniLM-L6-v2)');
|
||||
expect(io.stderr()).not.toContain('First run may take up to 60 seconds');
|
||||
expect(io.stderr().match(/Testing local embeddings/g)).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('shows live progress while local sentence-transformers embeddings are being tested', async () => {
|
||||
const io = makeIo();
|
||||
const prompts = makePromptAdapter({ selectValues: ['sentence-transformers'] });
|
||||
|
|
@ -213,9 +234,7 @@ describe('setup embeddings step', () => {
|
|||
);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(spinnerEvents).toContainEqual(
|
||||
'start:Testing local sentence-transformers embeddings (all-MiniLM-L6-v2, 384 dimensions). First run may take up to 60 seconds.',
|
||||
);
|
||||
expect(spinnerEvents).toContainEqual('start:Testing local embeddings (all-MiniLM-L6-v2)');
|
||||
});
|
||||
|
||||
expect(resolveHealthCheck).toBeDefined();
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import {
|
|||
} from '@ktx/context/project';
|
||||
import { type KtxEmbeddingConfig, type KtxEmbeddingHealthCheckResult, runKtxEmbeddingHealthCheck } from '@ktx/llm';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import { createClackSpinner, type KtxCliSpinner } from './clack.js';
|
||||
import { createStaticCliSpinner, type KtxCliSpinner } from './clack.js';
|
||||
import {
|
||||
ensureManagedLocalEmbeddingsDaemon,
|
||||
managedLocalEmbeddingHealthConfig,
|
||||
|
|
@ -316,10 +316,7 @@ async function promptAfterLocalEmbeddingFailure(
|
|||
|
||||
function healthCheckStartText(backend: KtxSetupEmbeddingBackend, model: string, dimensions: number): string {
|
||||
if (backend === LOCAL_EMBEDDING_BACKEND) {
|
||||
return [
|
||||
`Testing local sentence-transformers embeddings (${model}, ${dimensions} dimensions).`,
|
||||
'First run may take up to 60 seconds.',
|
||||
].join(' ');
|
||||
return `Testing local embeddings (${model})`;
|
||||
}
|
||||
return `Checking ${backend} embeddings (${model}, ${dimensions} dimensions).`;
|
||||
}
|
||||
|
|
@ -424,7 +421,7 @@ export async function runKtxSetupEmbeddingsStep(
|
|||
dimensions,
|
||||
credentialValue,
|
||||
});
|
||||
const healthSpinner = (deps.spinner ?? createClackSpinner)();
|
||||
const healthSpinner = (deps.spinner ?? (() => createStaticCliSpinner(io)))();
|
||||
const progress = startHealthCheckProgress(healthSpinner, healthCheckStartText(selectedBackend, model, dimensions));
|
||||
let health: KtxEmbeddingHealthCheckResult;
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ describe('setup Anthropic model step', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('offers Vertex AI as an Anthropic model provider option', async () => {
|
||||
it('offers Anthropic provider paths in the preferred order', async () => {
|
||||
const prompts = makePromptAdapter({ providerChoice: 'back' });
|
||||
|
||||
const result = await runKtxSetupAnthropicModelStep(
|
||||
|
|
@ -177,10 +177,12 @@ describe('setup Anthropic model step', () => {
|
|||
expect(prompts.select).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
message: expect.stringContaining('Which LLM provider should KTX use?'),
|
||||
options: expect.arrayContaining([
|
||||
options: [
|
||||
{ value: 'claude-code', label: 'Claude subscription (Pro/Max)' },
|
||||
{ value: 'anthropic', label: 'Anthropic API key' },
|
||||
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
]),
|
||||
],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -509,12 +509,12 @@ async function chooseBackend(
|
|||
}
|
||||
const choice = await prompts.select({
|
||||
message: 'Which LLM provider should KTX use?',
|
||||
options: [
|
||||
{ value: 'anthropic', label: 'Anthropic API' },
|
||||
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
|
||||
{ value: 'claude-code', label: 'Local Claude Code session' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
options: [
|
||||
{ value: 'claude-code', label: 'Claude subscription (Pro/Max)' },
|
||||
{ value: 'anthropic', label: 'Anthropic API key' },
|
||||
{ value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' },
|
||||
{ value: 'back', label: 'Back' },
|
||||
],
|
||||
});
|
||||
if (choice === 'back') {
|
||||
return { status: 'back' };
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ const readyStatus: KtxSetupStatus = {
|
|||
embeddings: { backend: 'openai', ready: true, model: 'text-embedding-3-small', dimensions: 1536 },
|
||||
databases: [{ connectionId: 'warehouse', ready: true }],
|
||||
sources: [],
|
||||
runtime: { required: false, ready: true, features: [] },
|
||||
context: { ready: true, status: 'completed' },
|
||||
agents: [{ target: 'codex', scope: 'project', ready: true }],
|
||||
};
|
||||
|
|
@ -16,6 +17,7 @@ describe('setup ready menu', () => {
|
|||
it('recognizes a ready setup only when required sections are ready', () => {
|
||||
expect(isKtxSetupReady(readyStatus)).toBe(true);
|
||||
expect(isKtxSetupReady({ ...readyStatus, embeddings: { ready: false } })).toBe(false);
|
||||
expect(isKtxSetupReady({ ...readyStatus, runtime: { required: true, ready: false, features: ['core'] } })).toBe(false);
|
||||
expect(isKtxSetupReady({ ...readyStatus, context: { ready: false, status: 'not_started' } })).toBe(false);
|
||||
expect(isKtxSetupReady({ ...readyStatus, agents: [] })).toBe(false);
|
||||
});
|
||||
|
|
@ -24,6 +26,9 @@ describe('setup ready menu', () => {
|
|||
expect(isKtxPreAgentSetupReady(readyStatus)).toBe(true);
|
||||
expect(isKtxPreAgentSetupReady({ ...readyStatus, agents: [] })).toBe(true);
|
||||
expect(isKtxPreAgentSetupReady({ ...readyStatus, embeddings: { ready: false } })).toBe(false);
|
||||
expect(isKtxPreAgentSetupReady({ ...readyStatus, runtime: { required: true, ready: false, features: ['core'] } })).toBe(
|
||||
false,
|
||||
);
|
||||
expect(isKtxPreAgentSetupReady({ ...readyStatus, context: { ready: false, status: 'not_started' } })).toBe(false);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,15 @@ import {
|
|||
} from './setup-prompts.js';
|
||||
import type { KtxSetupStatus } from './setup.js';
|
||||
|
||||
export type KtxSetupReadyAction = 'models' | 'embeddings' | 'databases' | 'sources' | 'context' | 'agents' | 'exit';
|
||||
export type KtxSetupReadyAction =
|
||||
| 'models'
|
||||
| 'embeddings'
|
||||
| 'databases'
|
||||
| 'sources'
|
||||
| 'runtime'
|
||||
| 'context'
|
||||
| 'agents'
|
||||
| 'exit';
|
||||
|
||||
export interface KtxSetupReadyMenuPromptAdapter {
|
||||
select(options: { message: string; options: KtxSetupPromptOption[] }): Promise<string>;
|
||||
|
|
@ -22,6 +30,7 @@ export function isKtxPreAgentSetupReady(status: KtxSetupStatus): boolean {
|
|||
status.embeddings.ready &&
|
||||
status.databases.every((database) => database.ready) &&
|
||||
status.sources.every((source) => source.ready) &&
|
||||
status.runtime.ready &&
|
||||
status.context.ready
|
||||
);
|
||||
}
|
||||
|
|
@ -46,6 +55,7 @@ export async function runKtxSetupReadyChangeMenu(
|
|||
{ value: 'embeddings', label: 'Embeddings' },
|
||||
{ value: 'databases', label: 'Databases' },
|
||||
{ value: 'sources', label: 'Context sources' },
|
||||
...(status.runtime.required ? [{ value: 'runtime', label: 'Runtime' }] : []),
|
||||
{ value: 'context', label: 'Rebuild KTX context' },
|
||||
{ value: 'agents', label: 'Agent integration' },
|
||||
{ value: 'exit', label: 'Exit' },
|
||||
|
|
|
|||
153
packages/cli/src/setup-runtime.test.ts
Normal file
153
packages/cli/src/setup-runtime.test.ts
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { MANAGED_SENTENCE_TRANSFORMERS_BASE_URL } from '@ktx/context';
|
||||
import { buildDefaultKtxProjectConfig, readKtxSetupState, type KtxProjectConfig } from '@ktx/context/project';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import type { ManagedPythonCommandRuntime } from './managed-python-command.js';
|
||||
import { runKtxSetupRuntimeStep } from './setup-runtime.js';
|
||||
|
||||
function makeIo() {
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
return {
|
||||
io: {
|
||||
stdout: {
|
||||
write: (chunk: string) => {
|
||||
stdout += chunk;
|
||||
},
|
||||
},
|
||||
stderr: {
|
||||
write: (chunk: string) => {
|
||||
stderr += chunk;
|
||||
},
|
||||
},
|
||||
},
|
||||
stdout: () => stdout,
|
||||
stderr: () => stderr,
|
||||
};
|
||||
}
|
||||
|
||||
function projectConfig(config: KtxProjectConfig) {
|
||||
return vi.fn(async () => ({ config }));
|
||||
}
|
||||
|
||||
describe('runKtxSetupRuntimeStep', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-setup-runtime-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('ensures core runtime for agent setup and records the runtime step', async () => {
|
||||
const io = makeIo();
|
||||
const ensureRuntime = vi.fn(async (): Promise<ManagedPythonCommandRuntime> => ({} as ManagedPythonCommandRuntime));
|
||||
|
||||
await expect(
|
||||
runKtxSetupRuntimeStep(
|
||||
{
|
||||
projectDir: tempDir,
|
||||
inputMode: 'auto',
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'prompt',
|
||||
agents: true,
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
loadProject: projectConfig(buildDefaultKtxProjectConfig()),
|
||||
ensureRuntime,
|
||||
env: {},
|
||||
},
|
||||
),
|
||||
).resolves.toMatchObject({ status: 'ready' });
|
||||
|
||||
expect(ensureRuntime).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
cliVersion: '0.2.0',
|
||||
installPolicy: 'prompt',
|
||||
feature: 'core',
|
||||
}),
|
||||
);
|
||||
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('runtime');
|
||||
expect(io.stdout()).toContain('Runtime ready: yes (core)');
|
||||
});
|
||||
|
||||
it('fails fast when required runtime features cannot be installed in no-input mode', async () => {
|
||||
const io = makeIo();
|
||||
const ensureRuntime = vi.fn(async () => {
|
||||
throw new Error('KTX Python runtime is required for this command. Run: ktx dev runtime install --yes');
|
||||
});
|
||||
|
||||
await expect(
|
||||
runKtxSetupRuntimeStep(
|
||||
{
|
||||
projectDir: tempDir,
|
||||
inputMode: 'disabled',
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'never',
|
||||
agents: true,
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
loadProject: projectConfig(buildDefaultKtxProjectConfig()),
|
||||
ensureRuntime,
|
||||
env: {},
|
||||
},
|
||||
),
|
||||
).resolves.toMatchObject({ status: 'failed' });
|
||||
|
||||
expect(ensureRuntime).toHaveBeenCalledWith(expect.objectContaining({ installPolicy: 'never' }));
|
||||
expect((await readKtxSetupState(tempDir)).completed_steps).not.toContain('runtime');
|
||||
expect(io.stderr()).toContain('ktx dev runtime install --yes');
|
||||
});
|
||||
|
||||
it('starts the managed local embeddings daemon for configured sentence-transformers embeddings', async () => {
|
||||
const io = makeIo();
|
||||
const ensureLocalEmbeddings = vi.fn(async () => ({
|
||||
baseUrl: 'http://127.0.0.1:61234',
|
||||
env: { KTX_MANAGED_SENTENCE_TRANSFORMERS_BASE_URL: 'http://127.0.0.1:61234' },
|
||||
}));
|
||||
const config: KtxProjectConfig = {
|
||||
...buildDefaultKtxProjectConfig(),
|
||||
ingest: {
|
||||
...buildDefaultKtxProjectConfig().ingest,
|
||||
embeddings: {
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: { base_url: MANAGED_SENTENCE_TRANSFORMERS_BASE_URL },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
await expect(
|
||||
runKtxSetupRuntimeStep(
|
||||
{
|
||||
projectDir: tempDir,
|
||||
inputMode: 'auto',
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
agents: false,
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
loadProject: projectConfig(config),
|
||||
ensureLocalEmbeddings,
|
||||
env: {},
|
||||
},
|
||||
),
|
||||
).resolves.toMatchObject({ status: 'ready' });
|
||||
|
||||
expect(ensureLocalEmbeddings).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
projectDir: tempDir,
|
||||
installPolicy: 'auto',
|
||||
}),
|
||||
);
|
||||
expect(io.stdout()).toContain('Runtime ready: yes (local embeddings)');
|
||||
});
|
||||
});
|
||||
103
packages/cli/src/setup-runtime.ts
Normal file
103
packages/cli/src/setup-runtime.ts
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
import {
|
||||
loadKtxProject,
|
||||
markKtxSetupStateStepComplete,
|
||||
type KtxLocalProject,
|
||||
} from '@ktx/context/project';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import {
|
||||
ensureManagedLocalEmbeddingsDaemon,
|
||||
type ManagedLocalEmbeddingsDaemon,
|
||||
} from './managed-local-embeddings.js';
|
||||
import {
|
||||
ensureManagedPythonCommandRuntime,
|
||||
type KtxManagedPythonInstallPolicy,
|
||||
type ManagedPythonCommandRuntime,
|
||||
} from './managed-python-command.js';
|
||||
import type { KtxRuntimeFeature } from './managed-python-runtime.js';
|
||||
import {
|
||||
resolveProjectRuntimeRequirements,
|
||||
type KtxRuntimeRequirements,
|
||||
} from './runtime-requirements.js';
|
||||
|
||||
export interface KtxSetupRuntimeArgs {
|
||||
projectDir: string;
|
||||
inputMode: 'auto' | 'disabled';
|
||||
cliVersion: string;
|
||||
runtimeInstallPolicy: KtxManagedPythonInstallPolicy;
|
||||
agents: boolean;
|
||||
databaseIntrospectionFallback?: boolean;
|
||||
}
|
||||
|
||||
export type KtxSetupRuntimeResult =
|
||||
| { status: 'ready'; projectDir: string; requirements: KtxRuntimeRequirements }
|
||||
| { status: 'skipped'; projectDir: string; requirements: KtxRuntimeRequirements }
|
||||
| { status: 'failed'; projectDir: string; requirements: KtxRuntimeRequirements };
|
||||
|
||||
export interface KtxSetupRuntimeDeps {
|
||||
env?: NodeJS.ProcessEnv;
|
||||
loadProject?: (options: { projectDir: string }) => Promise<Pick<KtxLocalProject, 'config'>>;
|
||||
ensureRuntime?: (options: {
|
||||
cliVersion: string;
|
||||
installPolicy: KtxManagedPythonInstallPolicy;
|
||||
io: KtxCliIo;
|
||||
feature: KtxRuntimeFeature;
|
||||
}) => Promise<ManagedPythonCommandRuntime>;
|
||||
ensureLocalEmbeddings?: (options: {
|
||||
cliVersion: string;
|
||||
projectDir: string;
|
||||
installPolicy: KtxManagedPythonInstallPolicy;
|
||||
io: KtxCliIo;
|
||||
}) => Promise<ManagedLocalEmbeddingsDaemon>;
|
||||
}
|
||||
|
||||
function formatRuntimeFeature(feature: KtxRuntimeFeature): string {
|
||||
return feature === 'local-embeddings' ? 'local embeddings' : 'core';
|
||||
}
|
||||
|
||||
export async function runKtxSetupRuntimeStep(
|
||||
args: KtxSetupRuntimeArgs,
|
||||
io: KtxCliIo,
|
||||
deps: KtxSetupRuntimeDeps = {},
|
||||
): Promise<KtxSetupRuntimeResult> {
|
||||
const loadProjectForRuntime = deps.loadProject ?? loadKtxProject;
|
||||
const project = await loadProjectForRuntime({ projectDir: args.projectDir });
|
||||
const requirements = resolveProjectRuntimeRequirements(project.config, {
|
||||
agents: args.agents,
|
||||
databaseIntrospectionFallback: args.databaseIntrospectionFallback,
|
||||
env: deps.env ?? process.env,
|
||||
});
|
||||
|
||||
if (requirements.features.length === 0) {
|
||||
io.stdout.write('│ Runtime setup skipped.\n');
|
||||
return { status: 'skipped', projectDir: args.projectDir, requirements };
|
||||
}
|
||||
|
||||
const ensureRuntime = deps.ensureRuntime ?? ensureManagedPythonCommandRuntime;
|
||||
const ensureLocalEmbeddings = deps.ensureLocalEmbeddings ?? ensureManagedLocalEmbeddingsDaemon;
|
||||
try {
|
||||
for (const feature of requirements.features) {
|
||||
if (feature === 'local-embeddings') {
|
||||
await ensureLocalEmbeddings({
|
||||
cliVersion: args.cliVersion,
|
||||
projectDir: args.projectDir,
|
||||
installPolicy: args.runtimeInstallPolicy,
|
||||
io,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
await ensureRuntime({
|
||||
cliVersion: args.cliVersion,
|
||||
installPolicy: args.runtimeInstallPolicy,
|
||||
io,
|
||||
feature,
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
|
||||
return { status: 'failed', projectDir: args.projectDir, requirements };
|
||||
}
|
||||
|
||||
await markKtxSetupStateStepComplete(args.projectDir, 'runtime');
|
||||
io.stdout.write(`│ Runtime ready: yes (${requirements.features.map(formatRuntimeFeature).join(', ')})\n`);
|
||||
return { status: 'ready', projectDir: args.projectDir, requirements };
|
||||
}
|
||||
137
packages/cli/src/setup-sources-notion.test.ts
Normal file
137
packages/cli/src/setup-sources-notion.test.ts
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
initKtxProject,
|
||||
type KtxProjectConnectionConfig,
|
||||
parseKtxProjectConfig,
|
||||
serializeKtxProjectConfig,
|
||||
} from '@ktx/context/project';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
runKtxSetupSourcesStep,
|
||||
type KtxSetupSourcesPromptAdapter,
|
||||
} from './setup-sources.js';
|
||||
|
||||
const notionMocks = vi.hoisted(() => ({
|
||||
tokens: [] as string[],
|
||||
retrieveBotUser: vi.fn(async () => ({ name: 'Docs Bot' })),
|
||||
retrievePage: vi.fn(async () => ({ id: 'page-1' })),
|
||||
}));
|
||||
|
||||
vi.mock('@ktx/context/ingest', async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import('@ktx/context/ingest')>();
|
||||
return {
|
||||
...actual,
|
||||
NotionClient: vi.fn().mockImplementation(function NotionClient(token: string) {
|
||||
notionMocks.tokens.push(token);
|
||||
return {
|
||||
retrieveBotUser: notionMocks.retrieveBotUser,
|
||||
retrievePage: notionMocks.retrievePage,
|
||||
};
|
||||
}),
|
||||
};
|
||||
});
|
||||
|
||||
function makeIo() {
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
return {
|
||||
io: {
|
||||
stdout: {
|
||||
isTTY: true,
|
||||
write: (chunk: string) => {
|
||||
stdout += chunk;
|
||||
},
|
||||
},
|
||||
stderr: {
|
||||
write: (chunk: string) => {
|
||||
stderr += chunk;
|
||||
},
|
||||
},
|
||||
},
|
||||
stdout: () => stdout,
|
||||
stderr: () => stderr,
|
||||
};
|
||||
}
|
||||
|
||||
function prompts(values: { multiselect?: string[][]; select?: string[] }): KtxSetupSourcesPromptAdapter {
|
||||
const multiselectValues = [...(values.multiselect ?? [])];
|
||||
const selectValues = [...(values.select ?? [])];
|
||||
return {
|
||||
multiselect: vi.fn(async () => multiselectValues.shift() ?? []),
|
||||
select: vi.fn(async () => selectValues.shift() ?? 'back'),
|
||||
text: vi.fn(async () => ''),
|
||||
password: vi.fn(async () => undefined),
|
||||
cancel: vi.fn(),
|
||||
log: vi.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
describe('setup sources Notion validation', () => {
|
||||
let tempDir: string;
|
||||
let projectDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
notionMocks.tokens.length = 0;
|
||||
notionMocks.retrieveBotUser.mockClear();
|
||||
notionMocks.retrievePage.mockClear();
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-setup-sources-notion-'));
|
||||
projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function readConfig() {
|
||||
return parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'));
|
||||
}
|
||||
|
||||
async function writeConfigConnection(connectionId: string, connection: KtxProjectConnectionConfig) {
|
||||
const config = await readConfig();
|
||||
await writeFile(
|
||||
join(projectDir, 'ktx.yaml'),
|
||||
serializeKtxProjectConfig({
|
||||
...config,
|
||||
connections: {
|
||||
...config.connections,
|
||||
warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' },
|
||||
[connectionId]: connection,
|
||||
},
|
||||
setup: {
|
||||
...config.setup,
|
||||
database_connection_ids: ['warehouse'],
|
||||
},
|
||||
}),
|
||||
'utf-8',
|
||||
);
|
||||
}
|
||||
|
||||
it('validates an existing Notion source that uses an inline auth token', async () => {
|
||||
await writeConfigConnection('notion', {
|
||||
driver: 'notion',
|
||||
auth_token: 'ntn_inline_token',
|
||||
crawl_mode: 'all_accessible',
|
||||
});
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxSetupSourcesStep(
|
||||
{ projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false },
|
||||
io.io,
|
||||
{
|
||||
prompts: prompts({
|
||||
multiselect: [['notion']],
|
||||
select: ['existing:notion'],
|
||||
}),
|
||||
},
|
||||
),
|
||||
).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion'] });
|
||||
|
||||
expect(notionMocks.tokens).toEqual(['ntn_inline_token']);
|
||||
expect(notionMocks.retrieveBotUser).toHaveBeenCalledOnce();
|
||||
expect(io.stderr()).toBe('');
|
||||
});
|
||||
});
|
||||
|
|
@ -2,7 +2,10 @@ import { mkdtemp, readdir, readFile, writeFile } from 'node:fs/promises';
|
|||
import { tmpdir } from 'node:os';
|
||||
import { join, relative, resolve } from 'node:path';
|
||||
import { fileURLToPath, pathToFileURL } from 'node:url';
|
||||
import { localConnectionTypeForConfig, resolveNotionAuthToken } from '@ktx/context/connections';
|
||||
import {
|
||||
localConnectionTypeForConfig,
|
||||
resolveNotionConnectionAuthToken,
|
||||
} from '@ktx/context/connections';
|
||||
import { resolveKtxConfigReference } from '@ktx/context/core';
|
||||
import {
|
||||
cloneOrPull,
|
||||
|
|
@ -620,7 +623,10 @@ async function defaultValidateLookml(connection: KtxProjectConnectionConfig): Pr
|
|||
}
|
||||
|
||||
async function defaultValidateNotion(connection: KtxProjectConnectionConfig): Promise<SourceValidationResult> {
|
||||
const token = await resolveNotionAuthToken(String(connection.auth_token_ref));
|
||||
const token = await resolveNotionConnectionAuthToken({
|
||||
auth_token: stringField(connection.auth_token) ?? null,
|
||||
auth_token_ref: stringField(connection.auth_token_ref) ?? null,
|
||||
});
|
||||
const client: NotionApi = new NotionClient(token);
|
||||
await client.retrieveBotUser();
|
||||
const roots = Array.isArray(connection.root_page_ids)
|
||||
|
|
|
|||
|
|
@ -38,6 +38,51 @@ function makeIo() {
|
|||
};
|
||||
}
|
||||
|
||||
function runtimeReady(projectDir: string) {
|
||||
return { status: 'ready' as const, projectDir, requirements: { features: ['core' as const], requirements: [] } };
|
||||
}
|
||||
|
||||
async function writeReadyRuntime(rootDir: string, cliVersion = '0.2.0') {
|
||||
const runtimeRoot = join(rootDir, '.runtime');
|
||||
const versionDir = join(runtimeRoot, cliVersion);
|
||||
const pythonPath = join(versionDir, '.venv', 'bin', 'python');
|
||||
const daemonPath = join(versionDir, '.venv', 'bin', 'ktx-daemon');
|
||||
await mkdir(join(versionDir, '.venv', 'bin'), { recursive: true });
|
||||
await writeFile(pythonPath, '', 'utf-8');
|
||||
await writeFile(daemonPath, '', 'utf-8');
|
||||
await writeFile(
|
||||
join(versionDir, 'manifest.json'),
|
||||
`${JSON.stringify(
|
||||
{
|
||||
schemaVersion: 1,
|
||||
cliVersion,
|
||||
installedAt: '2026-05-09T10:02:00.000Z',
|
||||
asset: {
|
||||
schemaVersion: 1,
|
||||
distributionName: 'kaelio-ktx',
|
||||
normalizedName: 'kaelio_ktx',
|
||||
version: '0.1.0',
|
||||
wheel: {
|
||||
file: 'kaelio_ktx-0.1.0-py3-none-any.whl',
|
||||
sha256: '0'.repeat(64),
|
||||
bytes: 0,
|
||||
},
|
||||
},
|
||||
features: ['core'],
|
||||
python: {
|
||||
executable: pythonPath,
|
||||
daemonExecutable: daemonPath,
|
||||
},
|
||||
installLog: join(versionDir, 'install.log'),
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
'utf-8',
|
||||
);
|
||||
return runtimeRoot;
|
||||
}
|
||||
|
||||
describe('setup status', () => {
|
||||
let tempDir: string;
|
||||
|
||||
|
|
@ -1054,7 +1099,7 @@ describe('setup status', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('auto-installs the managed runtime by default during setup', async () => {
|
||||
it('prompts before installing the managed runtime by default during setup', async () => {
|
||||
const io = makeIo();
|
||||
const embeddings = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir }));
|
||||
const context = vi.fn(async () => ({ status: 'failed' as const, projectDir: tempDir }));
|
||||
|
|
@ -1088,14 +1133,14 @@ describe('setup status', () => {
|
|||
expect(embeddings).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
runtimeInstallPolicy: 'prompt',
|
||||
}),
|
||||
io.io,
|
||||
);
|
||||
expect(context).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
cliVersion: '0.2.0',
|
||||
runtimeInstallPolicy: 'auto',
|
||||
runtimeInstallPolicy: 'prompt',
|
||||
}),
|
||||
io.io,
|
||||
);
|
||||
|
|
@ -1508,6 +1553,10 @@ describe('setup status', () => {
|
|||
calls.push('sources');
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
runtime: async () => {
|
||||
calls.push('runtime');
|
||||
return runtimeReady(tempDir);
|
||||
},
|
||||
context: async () => {
|
||||
calls.push('context');
|
||||
return { status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' };
|
||||
|
|
@ -1524,7 +1573,7 @@ describe('setup status', () => {
|
|||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources', 'context', 'agents']);
|
||||
expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources', 'runtime', 'context', 'agents']);
|
||||
});
|
||||
|
||||
it('commits setup config changes written by later setup steps', async () => {
|
||||
|
|
@ -1565,6 +1614,7 @@ describe('setup status', () => {
|
|||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
sources: async () => ({ status: 'skipped', projectDir: tempDir }),
|
||||
runtime: async () => runtimeReady(tempDir),
|
||||
context: async () => ({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' }),
|
||||
agents: async () => ({
|
||||
status: 'ready',
|
||||
|
|
@ -1611,6 +1661,10 @@ describe('setup status', () => {
|
|||
embeddings: async () => ({ status: 'skipped', projectDir: tempDir }),
|
||||
databases: async () => ({ status: 'skipped', projectDir: tempDir }),
|
||||
sources: async () => ({ status: 'skipped', projectDir: tempDir }),
|
||||
runtime: async () => {
|
||||
calls.push('runtime');
|
||||
return runtimeReady(tempDir);
|
||||
},
|
||||
context: async () => {
|
||||
calls.push('context');
|
||||
return { status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' };
|
||||
|
|
@ -1627,7 +1681,7 @@ describe('setup status', () => {
|
|||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(calls).toEqual(['context', 'agents']);
|
||||
expect(calls).toEqual(['runtime', 'context', 'agents']);
|
||||
});
|
||||
|
||||
it('does not install agents when non-interactive --agents finds context incomplete', async () => {
|
||||
|
|
@ -1660,6 +1714,7 @@ describe('setup status', () => {
|
|||
},
|
||||
io.io,
|
||||
{
|
||||
runtime: async () => runtimeReady(tempDir),
|
||||
context: async () => ({ status: 'skipped', projectDir: tempDir }),
|
||||
agents,
|
||||
},
|
||||
|
|
@ -1695,7 +1750,7 @@ describe('setup status', () => {
|
|||
'utf-8',
|
||||
);
|
||||
await writeKtxSetupState(tempDir, {
|
||||
completed_steps: ['project', 'llm', 'embeddings', 'sources', 'context', 'agents'],
|
||||
completed_steps: ['project', 'llm', 'embeddings', 'sources', 'runtime', 'context', 'agents'],
|
||||
});
|
||||
await writeFile(
|
||||
join(tempDir, '.ktx/agents/install-manifest.json'),
|
||||
|
|
@ -1726,55 +1781,69 @@ describe('setup status', () => {
|
|||
commands: contextBuildCommands(tempDir, 'setup-context-local-ready'),
|
||||
});
|
||||
|
||||
await expect(
|
||||
runKtxSetup(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir: tempDir,
|
||||
mode: 'existing',
|
||||
agents: false,
|
||||
inputMode: 'auto',
|
||||
yes: false,
|
||||
cliVersion: '0.2.0',
|
||||
skipLlm: false,
|
||||
skipEmbeddings: false,
|
||||
skipDatabases: false,
|
||||
skipSources: false,
|
||||
skipAgents: false,
|
||||
databaseSchemas: [],
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
readyMenuDeps: { prompts: { select: vi.fn(async () => 'agents'), cancel: vi.fn() } },
|
||||
model: async (args) => {
|
||||
expect(args.skipLlm).toBe(true);
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
const previousRuntimeRoot = process.env.KTX_RUNTIME_ROOT;
|
||||
process.env.KTX_RUNTIME_ROOT = await writeReadyRuntime(tempDir);
|
||||
try {
|
||||
await expect(
|
||||
runKtxSetup(
|
||||
{
|
||||
command: 'run',
|
||||
projectDir: tempDir,
|
||||
mode: 'existing',
|
||||
agents: false,
|
||||
inputMode: 'auto',
|
||||
yes: false,
|
||||
cliVersion: '0.2.0',
|
||||
skipLlm: false,
|
||||
skipEmbeddings: false,
|
||||
skipDatabases: false,
|
||||
skipSources: false,
|
||||
skipAgents: false,
|
||||
databaseSchemas: [],
|
||||
},
|
||||
embeddings: async (args) => {
|
||||
expect(args.skipEmbeddings).toBe(true);
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
io.io,
|
||||
{
|
||||
readyMenuDeps: { prompts: { select: vi.fn(async () => 'agents'), cancel: vi.fn() } },
|
||||
model: async (args) => {
|
||||
expect(args.skipLlm).toBe(true);
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
embeddings: async (args) => {
|
||||
expect(args.skipEmbeddings).toBe(true);
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
databases: async (args) => {
|
||||
expect(args.skipDatabases).toBe(true);
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
sources: async (args) => {
|
||||
expect(args.skipSources).toBe(true);
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
runtime: async () => {
|
||||
calls.push('runtime');
|
||||
return runtimeReady(tempDir);
|
||||
},
|
||||
agents: async () => {
|
||||
calls.push('agents');
|
||||
return {
|
||||
status: 'ready',
|
||||
projectDir: tempDir,
|
||||
installs: [{ target: 'codex', scope: 'project', mode: 'mcp-cli' }],
|
||||
};
|
||||
},
|
||||
},
|
||||
databases: async (args) => {
|
||||
expect(args.skipDatabases).toBe(true);
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
sources: async (args) => {
|
||||
expect(args.skipSources).toBe(true);
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
agents: async () => {
|
||||
calls.push('agents');
|
||||
return {
|
||||
status: 'ready',
|
||||
projectDir: tempDir,
|
||||
installs: [{ target: 'codex', scope: 'project', mode: 'mcp-cli' }],
|
||||
};
|
||||
},
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
} finally {
|
||||
if (previousRuntimeRoot === undefined) {
|
||||
delete process.env.KTX_RUNTIME_ROOT;
|
||||
} else {
|
||||
process.env.KTX_RUNTIME_ROOT = previousRuntimeRoot;
|
||||
}
|
||||
}
|
||||
|
||||
expect(calls).toEqual(['agents']);
|
||||
expect(calls).toEqual(['runtime', 'agents']);
|
||||
});
|
||||
|
||||
it('skips to agent setup when context is ready but agents are not configured', async () => {
|
||||
|
|
@ -1854,6 +1923,10 @@ describe('setup status', () => {
|
|||
expect(args.skipSources).toBe(true);
|
||||
return { status: 'skipped', projectDir: tempDir };
|
||||
},
|
||||
runtime: async () => {
|
||||
calls.push('runtime');
|
||||
return runtimeReady(tempDir);
|
||||
},
|
||||
agents: async () => {
|
||||
calls.push('agents');
|
||||
return {
|
||||
|
|
@ -1867,11 +1940,12 @@ describe('setup status', () => {
|
|||
).resolves.toBe(0);
|
||||
|
||||
expect(readyMenuSelect).not.toHaveBeenCalled();
|
||||
expect(calls).toEqual(['agents']);
|
||||
expect(calls).toEqual(['runtime', 'agents']);
|
||||
});
|
||||
|
||||
it('runs only project resolution, context gate, and agent setup in --agents mode', async () => {
|
||||
it('runs only project resolution, runtime, context gate, and agent setup in --agents mode', async () => {
|
||||
const io = makeIo();
|
||||
const runtime = vi.fn(async () => runtimeReady(tempDir));
|
||||
const context = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir, runId: 'setup-context-local-test' }));
|
||||
const agents = vi.fn(async () => ({
|
||||
status: 'ready' as const,
|
||||
|
|
@ -1903,12 +1977,14 @@ describe('setup status', () => {
|
|||
model: async () => {
|
||||
throw new Error('model should not run');
|
||||
},
|
||||
runtime,
|
||||
context,
|
||||
agents,
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(runtime).toHaveBeenCalledTimes(1);
|
||||
expect(context).toHaveBeenCalledTimes(1);
|
||||
expect(agents).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -9,6 +9,9 @@ import {
|
|||
} from '@ktx/context/project';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import { formatSetupNextStepLines } from './next-steps.js';
|
||||
import { runtimeInstallPolicyFromFlags } from './managed-python-command.js';
|
||||
import { readManagedPythonRuntimeStatus } from './managed-python-runtime.js';
|
||||
import { resolveProjectRuntimeRequirements } from './runtime-requirements.js';
|
||||
import { isKtxSetupExitError } from './setup-interrupt.js';
|
||||
import {
|
||||
type KtxAgentScope,
|
||||
|
|
@ -37,6 +40,11 @@ import {
|
|||
runKtxSetupReadyChangeMenu,
|
||||
} from './setup-ready-menu.js';
|
||||
import { type KtxSetupSourcesDeps, type KtxSetupSourceType, runKtxSetupSourcesStep } from './setup-sources.js';
|
||||
import {
|
||||
type KtxSetupRuntimeDeps,
|
||||
type KtxSetupRuntimeResult,
|
||||
runKtxSetupRuntimeStep,
|
||||
} from './setup-runtime.js';
|
||||
import {
|
||||
createKtxSetupPromptAdapter,
|
||||
createKtxSetupUiAdapter,
|
||||
|
|
@ -58,6 +66,7 @@ export interface KtxSetupStatus {
|
|||
embeddings: { backend?: string; ready: boolean; model?: string; dimensions?: number };
|
||||
databases: Array<{ connectionId: string; ready: boolean }>;
|
||||
sources: Array<{ connectionId: string; type: string; ready: boolean }>;
|
||||
runtime: { required: boolean; ready: boolean; features: string[]; detail?: string };
|
||||
context: KtxSetupContextStatusSummary;
|
||||
agents: Array<{ target: string; scope: string; ready: boolean }>;
|
||||
}
|
||||
|
|
@ -143,6 +152,8 @@ export interface KtxSetupDeps {
|
|||
io: KtxCliIo,
|
||||
) => Promise<Awaited<ReturnType<typeof runKtxSetupSourcesStep>>>;
|
||||
sourcesDeps?: KtxSetupSourcesDeps;
|
||||
runtime?: (args: Parameters<typeof runKtxSetupRuntimeStep>[0], io: KtxCliIo) => Promise<KtxSetupRuntimeResult>;
|
||||
runtimeDeps?: KtxSetupRuntimeDeps;
|
||||
agents?: (
|
||||
args: Parameters<typeof runKtxSetupAgentsStep>[0],
|
||||
io: KtxCliIo,
|
||||
|
|
@ -158,7 +169,7 @@ export interface KtxSetupDeps {
|
|||
const SOURCE_DRIVERS = new Set(['dbt', 'metricflow', 'metabase', 'looker', 'lookml', 'notion']);
|
||||
|
||||
type KtxSetupEntryAction = 'setup' | 'new-project' | 'agents' | 'status' | 'demo' | 'exit';
|
||||
type KtxSetupFlowStep = 'models' | 'embeddings' | 'databases' | 'sources' | 'context' | 'agents';
|
||||
type KtxSetupFlowStep = 'models' | 'embeddings' | 'databases' | 'sources' | 'runtime' | 'context' | 'agents';
|
||||
type KtxSetupFlowStatus =
|
||||
| 'ready'
|
||||
| 'skipped'
|
||||
|
|
@ -269,7 +280,16 @@ async function readIngestContextStatus(project: KtxLocalProject): Promise<KtxSet
|
|||
};
|
||||
}
|
||||
|
||||
export async function readKtxSetupStatus(projectDir: string): Promise<KtxSetupStatus> {
|
||||
export interface ReadKtxSetupStatusOptions {
|
||||
cliVersion?: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
readRuntimeStatus?: typeof readManagedPythonRuntimeStatus;
|
||||
}
|
||||
|
||||
export async function readKtxSetupStatus(
|
||||
projectDir: string,
|
||||
options: ReadKtxSetupStatusOptions = {},
|
||||
): Promise<KtxSetupStatus> {
|
||||
const resolvedProjectDir = resolve(projectDir);
|
||||
if (!existsSync(join(resolvedProjectDir, 'ktx.yaml'))) {
|
||||
return {
|
||||
|
|
@ -278,6 +298,7 @@ export async function readKtxSetupStatus(projectDir: string): Promise<KtxSetupSt
|
|||
embeddings: { ready: false },
|
||||
databases: [],
|
||||
sources: [],
|
||||
runtime: { required: false, ready: true, features: [] },
|
||||
context: setupContextStatusFromState(await readKtxSetupContextState(resolvedProjectDir)),
|
||||
agents: [],
|
||||
};
|
||||
|
|
@ -316,6 +337,21 @@ export async function readKtxSetupStatus(projectDir: string): Promise<KtxSetupSt
|
|||
});
|
||||
}
|
||||
const agents = [...agentMap.values()];
|
||||
const runtimeRequirements = resolveProjectRuntimeRequirements(project.config, {
|
||||
agents: agents.length > 0,
|
||||
env: options.env ?? process.env,
|
||||
});
|
||||
let runtimeReady = runtimeRequirements.features.length === 0 || completedSteps.includes('runtime');
|
||||
let runtimeDetail: string | undefined;
|
||||
if (runtimeRequirements.features.length > 0 && options.cliVersion) {
|
||||
const readRuntimeStatus = options.readRuntimeStatus ?? readManagedPythonRuntimeStatus;
|
||||
const runtimeStatus = await readRuntimeStatus({ cliVersion: options.cliVersion, env: options.env ?? process.env });
|
||||
runtimeDetail = runtimeStatus.detail;
|
||||
runtimeReady =
|
||||
runtimeStatus.kind === 'ready' &&
|
||||
runtimeStatus.manifest !== undefined &&
|
||||
runtimeRequirements.features.every((feature) => runtimeStatus.manifest?.features.includes(feature));
|
||||
}
|
||||
|
||||
return {
|
||||
project: { path: resolvedProjectDir, ready: true, name: basename(project.projectDir) || project.projectDir },
|
||||
|
|
@ -329,6 +365,12 @@ export async function readKtxSetupStatus(projectDir: string): Promise<KtxSetupSt
|
|||
...source,
|
||||
ready: completedSteps.includes('sources'),
|
||||
})),
|
||||
runtime: {
|
||||
required: runtimeRequirements.features.length > 0,
|
||||
ready: runtimeReady,
|
||||
features: runtimeRequirements.features,
|
||||
...(runtimeDetail ? { detail: runtimeDetail } : {}),
|
||||
},
|
||||
context: ingestContextStatus ?? setupContextStatus,
|
||||
agents,
|
||||
};
|
||||
|
|
@ -374,6 +416,13 @@ export function formatKtxSetupStatus(status: KtxSetupStatus): string {
|
|||
}`,
|
||||
`Databases configured: ${formatConnectionList(status.databases.map((database) => database.connectionId))}`,
|
||||
`Context sources configured: ${formatConnectionList(status.sources.map((source) => source.connectionId))}`,
|
||||
...(status.runtime.required
|
||||
? [
|
||||
`Runtime ready: ${formatReady(status.runtime.ready)}${
|
||||
status.runtime.features.length > 0 ? ` (${status.runtime.features.join(', ')})` : ''
|
||||
}`,
|
||||
]
|
||||
: []),
|
||||
`KTX context built: ${formatContextBuilt(status.context)}`,
|
||||
`Agent integration ready: ${formatReady(status.agents.some((agent) => agent.ready))}${
|
||||
status.agents.length > 0 ? ` (${status.agents.map((agent) => `${agent.target}:${agent.scope}`).join(', ')})` : ''
|
||||
|
|
@ -397,7 +446,8 @@ function setupStatusReady(status: KtxSetupStatus): boolean {
|
|||
status.llm.ready &&
|
||||
embeddingsReady(status.embeddings) &&
|
||||
status.databases.every((database) => database.ready) &&
|
||||
status.sources.every((source) => source.ready)
|
||||
status.sources.every((source) => source.ready) &&
|
||||
status.runtime.ready
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -416,7 +466,10 @@ function writeContextNotReadyForAgents(projectDir: string, io: KtxCliIo): void {
|
|||
}
|
||||
|
||||
function setupRuntimeInstallPolicy(args: Extract<KtxSetupArgs, { command: 'run' }>): 'prompt' | 'auto' | 'never' {
|
||||
return args.inputMode === 'disabled' && !args.yes ? 'never' : 'auto';
|
||||
if (args.yes) {
|
||||
return 'auto';
|
||||
}
|
||||
return runtimeInstallPolicyFromFlags({ input: args.inputMode === 'disabled' ? false : true });
|
||||
}
|
||||
|
||||
async function commitSetupConfigChanges(projectDir: string): Promise<void> {
|
||||
|
|
@ -449,7 +502,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
|
|||
setupLoop: while (true) {
|
||||
entryAction = undefined;
|
||||
if (canShowEntryMenu) {
|
||||
const status = await readKtxSetupStatus(args.projectDir);
|
||||
const status = await readKtxSetupStatus(args.projectDir, { cliVersion: args.cliVersion });
|
||||
entryAction = (await runKtxSetupEntryMenu(status, deps.entryMenuDeps)).action;
|
||||
if (entryAction === 'exit') {
|
||||
(deps.entryMenuDeps?.prompts ?? createEntryMenuPromptAdapter()).cancel('Setup cancelled.');
|
||||
|
|
@ -486,7 +539,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
|
|||
}
|
||||
|
||||
const agentsRequested = args.agents || entryAction === 'agents';
|
||||
const currentStatus = await readKtxSetupStatus(projectResult.projectDir);
|
||||
const currentStatus = await readKtxSetupStatus(projectResult.projectDir, { cliVersion: args.cliVersion });
|
||||
let readyAction: string | undefined;
|
||||
|
||||
if (args.inputMode !== 'disabled' && !agentsRequested) {
|
||||
|
|
@ -503,13 +556,15 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
|
|||
const shouldRunEmbeddings = !runOnly || runOnly === 'embeddings';
|
||||
const shouldRunDatabases = !runOnly || runOnly === 'databases';
|
||||
const shouldRunSources = !runOnly || runOnly === 'sources';
|
||||
const shouldRunRuntime =
|
||||
agentsRequested || !runOnly || runOnly === 'runtime' || runOnly === 'context' || runOnly === 'agents';
|
||||
const shouldRunContext = agentsRequested || !runOnly || runOnly === 'context';
|
||||
const shouldRunAgents = agentsRequested || !runOnly || runOnly === 'agents';
|
||||
const showPromptInstructions = projectResult.confirmedCreation !== true;
|
||||
|
||||
const setupSteps: KtxSetupFlowStep[] = agentsRequested
|
||||
? ['context']
|
||||
: ['models', 'embeddings', 'databases', 'sources', 'context'];
|
||||
? ['runtime', 'context']
|
||||
: ['models', 'embeddings', 'databases', 'sources', 'runtime', 'context'];
|
||||
if (shouldRunAgents && args.skipAgents !== true) {
|
||||
setupSteps.push('agents');
|
||||
}
|
||||
|
|
@ -520,6 +575,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
|
|||
if (step === 'embeddings') return !args.skipEmbeddings && shouldRunEmbeddings;
|
||||
if (step === 'databases') return !args.skipDatabases && shouldRunDatabases;
|
||||
if (step === 'sources') return args.skipSources !== true && shouldRunSources;
|
||||
if (step === 'runtime') return shouldRunRuntime;
|
||||
if (step === 'context') return shouldRunContext;
|
||||
return shouldRunAgents && args.skipAgents !== true;
|
||||
};
|
||||
|
|
@ -636,6 +692,20 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
|
|||
},
|
||||
io,
|
||||
);
|
||||
} else if (step === 'runtime') {
|
||||
const runtimeRunner =
|
||||
deps.runtime ??
|
||||
((runtimeArgs, runtimeIo) => runKtxSetupRuntimeStep(runtimeArgs, runtimeIo, deps.runtimeDeps));
|
||||
stepResult = await runtimeRunner(
|
||||
{
|
||||
projectDir: projectResult.projectDir,
|
||||
inputMode: args.inputMode,
|
||||
cliVersion: args.cliVersion,
|
||||
runtimeInstallPolicy: setupRuntimeInstallPolicy(args),
|
||||
agents: shouldRunAgents && args.skipAgents !== true,
|
||||
},
|
||||
io,
|
||||
);
|
||||
} else if (step === 'context') {
|
||||
const contextRunner =
|
||||
deps.context ??
|
||||
|
|
@ -706,7 +776,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
|
|||
|
||||
await commitSetupConfigChanges(projectResult.projectDir);
|
||||
|
||||
const status = await readKtxSetupStatus(projectResult.projectDir);
|
||||
const status = await readKtxSetupStatus(projectResult.projectDir, { cliVersion: args.cliVersion });
|
||||
const focusedOnAgents = args.agents || entryAction === 'agents';
|
||||
if (!focusedOnAgents) {
|
||||
setupUi.note(formatKtxSetupStatus(status).trimEnd(), 'Project status', io, {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { stripVTControlCharacters } from 'node:util';
|
||||
import Database from 'better-sqlite3';
|
||||
import { initKtxProject } from '@ktx/context/project';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
|
@ -98,6 +99,23 @@ describe('runKtxSl', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('prints semantic-layer search rank badges in pretty output', async () => {
|
||||
const projectDir = join(tempDir, 'rank-project');
|
||||
await seedSlSource({ projectDir });
|
||||
|
||||
const searchIo = makeIo();
|
||||
await expect(
|
||||
runKtxSl(
|
||||
{ command: 'search', projectDir, connectionId: 'warehouse', query: 'order', output: 'pretty' },
|
||||
searchIo.io,
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
const stdout = stripVTControlCharacters(searchIo.stdout());
|
||||
expect(stdout).toMatch(/#1\s+orders/);
|
||||
expect(stdout).not.toContain('%');
|
||||
});
|
||||
|
||||
it('prints semantic-layer list and search as public JSON envelopes', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await seedSlSource({
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ async function printSlSources(input: {
|
|||
emptyHint?: string;
|
||||
}): Promise<void> {
|
||||
const { resolveOutputMode } = await import('./io/mode.js');
|
||||
const { printList } = await import('./io/print-list.js');
|
||||
const { createRankBadgeFormatter, printList } = await import('./io/print-list.js');
|
||||
const mode = resolveOutputMode({ explicit: input.output, json: input.json, io: input.io });
|
||||
|
||||
if (input.command === 'sl search') {
|
||||
|
|
@ -119,7 +119,7 @@ async function printSlSources(input: {
|
|||
label: 'SCORE',
|
||||
plain: 'score=',
|
||||
role: 'badge',
|
||||
prettyFormat: (value) => `${Math.round(Number(value) * 100)}%`,
|
||||
prettyFormat: createRankBadgeFormatter(input.rows as ReadonlyArray<LocalSlSourceSearchResult>),
|
||||
dim: true,
|
||||
},
|
||||
{ key: 'connectionId', label: 'CONNECTION', plain: '' },
|
||||
|
|
|
|||
85
packages/cli/src/source-mapping.test.ts
Normal file
85
packages/cli/src/source-mapping.test.ts
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import { runKtxSourceMapping } from './source-mapping.js';
|
||||
|
||||
function makeIo() {
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
return {
|
||||
io: {
|
||||
stdout: {
|
||||
write: (chunk: string) => {
|
||||
stdout += chunk;
|
||||
},
|
||||
},
|
||||
stderr: {
|
||||
write: (chunk: string) => {
|
||||
stderr += chunk;
|
||||
},
|
||||
},
|
||||
} satisfies KtxCliIo,
|
||||
stdout: () => stdout,
|
||||
stderr: () => stderr,
|
||||
};
|
||||
}
|
||||
|
||||
describe('source mapping commands', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-source-mapping-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function writeConfig(metabaseMappings: string[]): Promise<void> {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: postgres',
|
||||
' url: env:DATABASE_URL',
|
||||
' metabase:',
|
||||
' driver: metabase',
|
||||
' api_url: https://metabase.example.com',
|
||||
...metabaseMappings,
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
}
|
||||
|
||||
it('fails Metabase validation when no sync-enabled target mapping exists', async () => {
|
||||
await writeConfig([]);
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxSourceMapping({ command: 'validate', projectDir: tempDir, connectionId: 'metabase' }, io.io),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(io.stderr()).toContain('no sync-enabled mappings with a target connection for Metabase connection metabase');
|
||||
});
|
||||
|
||||
it('passes Metabase validation when a sync-enabled target mapping exists', async () => {
|
||||
await writeConfig([
|
||||
' mappings:',
|
||||
' databaseMappings:',
|
||||
' "3": warehouse',
|
||||
' syncEnabled:',
|
||||
' "3": true',
|
||||
]);
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxSourceMapping({ command: 'validate', projectDir: tempDir, connectionId: 'metabase' }, io.io),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(io.stdout()).toContain('Mapping validation passed: metabase');
|
||||
});
|
||||
});
|
||||
|
|
@ -12,6 +12,7 @@ import {
|
|||
discoverMetabaseDatabases,
|
||||
lookerCredentialsFromLocalConnection,
|
||||
metabaseRuntimeConfigFromLocalConnection,
|
||||
planMetabaseFanoutChildren,
|
||||
seedLocalMappingStateFromKtxYaml,
|
||||
validateLookerMappings,
|
||||
validateMappingPhysicalMatch,
|
||||
|
|
@ -198,6 +199,14 @@ export async function runKtxSourceMapping(
|
|||
}
|
||||
|
||||
const rows = await store.listDatabaseMappings(args.connectionId);
|
||||
planMetabaseFanoutChildren({
|
||||
metabaseConnectionId: args.connectionId,
|
||||
mappings: rows.map((row) => ({
|
||||
metabaseDatabaseId: row.metabaseDatabaseId,
|
||||
targetConnectionId: row.targetConnectionId,
|
||||
syncEnabled: row.syncEnabled,
|
||||
})),
|
||||
});
|
||||
const failures = rows.flatMap((row) => {
|
||||
if (!row.targetConnectionId) {
|
||||
return [];
|
||||
|
|
|
|||
295
packages/cli/src/sql.test.ts
Normal file
295
packages/cli/src/sql.test.ts
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { initKtxProject, parseKtxProjectConfig, serializeKtxProjectConfig } from '@ktx/context/project';
|
||||
import type { KtxScanConnector } from '@ktx/context/scan';
|
||||
import type { SqlAnalysisPort } from '@ktx/context/sql-analysis';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { runKtxSql } from './sql.js';
|
||||
|
||||
function makeIo() {
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
return {
|
||||
io: {
|
||||
stdout: {
|
||||
write: (chunk: string) => {
|
||||
stdout += chunk;
|
||||
},
|
||||
},
|
||||
stderr: {
|
||||
write: (chunk: string) => {
|
||||
stderr += chunk;
|
||||
},
|
||||
},
|
||||
},
|
||||
stdout: () => stdout,
|
||||
stderr: () => stderr,
|
||||
};
|
||||
}
|
||||
|
||||
function makeSqlAnalysis(result: Awaited<ReturnType<SqlAnalysisPort['validateReadOnly']>>): SqlAnalysisPort {
|
||||
return {
|
||||
analyzeForFingerprint: vi.fn(),
|
||||
analyzeBatch: vi.fn(),
|
||||
validateReadOnly: vi.fn(async () => result),
|
||||
};
|
||||
}
|
||||
|
||||
function makeConnector(overrides: Partial<KtxScanConnector> = {}): KtxScanConnector {
|
||||
return {
|
||||
id: 'sqlite:warehouse',
|
||||
driver: 'sqlite',
|
||||
capabilities: {
|
||||
structuralIntrospection: true,
|
||||
tableSampling: true,
|
||||
columnSampling: true,
|
||||
columnStats: true,
|
||||
readOnlySql: true,
|
||||
nestedAnalysis: false,
|
||||
eventStreamDiscovery: false,
|
||||
formalForeignKeys: true,
|
||||
estimatedRowCounts: true,
|
||||
},
|
||||
introspect: vi.fn(),
|
||||
executeReadOnly: vi.fn(async () => ({
|
||||
headers: ['id', 'status'],
|
||||
headerTypes: ['integer', 'text'],
|
||||
rows: [
|
||||
[1, 'paid'],
|
||||
[2, 'open'],
|
||||
],
|
||||
totalRows: 2,
|
||||
rowCount: 2,
|
||||
})),
|
||||
cleanup: vi.fn(async () => undefined),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('runKtxSql', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-sql-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function writeConnections(
|
||||
projectDir: string,
|
||||
connections: ReturnType<typeof parseKtxProjectConfig>['connections'],
|
||||
): Promise<void> {
|
||||
const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'));
|
||||
await writeFile(join(projectDir, 'ktx.yaml'), serializeKtxProjectConfig({ ...config, connections }), 'utf-8');
|
||||
}
|
||||
|
||||
it('validates SQL, executes through the scan connector, and prints a pretty table', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
|
||||
const sqlAnalysis = makeSqlAnalysis({ ok: true, error: null });
|
||||
const connector = makeConnector();
|
||||
const createScanConnector = vi.fn(async () => connector);
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxSql(
|
||||
{
|
||||
command: 'execute',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select id, status from orders',
|
||||
maxRows: 1000,
|
||||
output: 'pretty',
|
||||
json: false,
|
||||
cliVersion: '0.0.0-test',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
createSqlAnalysis: () => sqlAnalysis,
|
||||
createScanConnector,
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(sqlAnalysis.validateReadOnly).toHaveBeenCalledWith('select id, status from orders', 'sqlite');
|
||||
expect(createScanConnector).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'warehouse');
|
||||
expect(connector.executeReadOnly).toHaveBeenCalledWith(
|
||||
{ connectionId: 'warehouse', sql: 'select id, status from orders', maxRows: 1000 },
|
||||
{ runId: 'cli-sql' },
|
||||
);
|
||||
expect(connector.cleanup).toHaveBeenCalledTimes(1);
|
||||
expect(io.stdout()).toContain('id status');
|
||||
expect(io.stdout()).toContain('1 paid');
|
||||
expect(io.stdout()).toContain('2 open');
|
||||
expect(io.stdout()).toContain('2 rows');
|
||||
expect(io.stderr()).toBe('');
|
||||
});
|
||||
|
||||
it('prints JSON output', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxSql(
|
||||
{
|
||||
command: 'execute',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select id from orders',
|
||||
maxRows: 10,
|
||||
output: undefined,
|
||||
json: true,
|
||||
cliVersion: '0.0.0-test',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }),
|
||||
createScanConnector: vi.fn(async () => makeConnector()),
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(JSON.parse(io.stdout())).toEqual({
|
||||
connectionId: 'warehouse',
|
||||
headers: ['id', 'status'],
|
||||
headerTypes: ['integer', 'text'],
|
||||
rows: [
|
||||
[1, 'paid'],
|
||||
[2, 'open'],
|
||||
],
|
||||
rowCount: 2,
|
||||
});
|
||||
});
|
||||
|
||||
it('prints plain TSV output', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxSql(
|
||||
{
|
||||
command: 'execute',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select id from orders',
|
||||
maxRows: 10,
|
||||
output: 'plain',
|
||||
json: false,
|
||||
cliVersion: '0.0.0-test',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }),
|
||||
createScanConnector: vi.fn(async () => makeConnector()),
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(io.stdout()).toBe('id\tstatus\n1\tpaid\n2\topen\n');
|
||||
expect(io.stderr()).toBe('');
|
||||
});
|
||||
|
||||
it('rejects non-read-only SQL before executing connector SQL', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
|
||||
const connector = makeConnector();
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxSql(
|
||||
{
|
||||
command: 'execute',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
sql: 'delete from orders',
|
||||
maxRows: 1000,
|
||||
output: 'pretty',
|
||||
json: false,
|
||||
cliVersion: '0.0.0-test',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
createSqlAnalysis: () => makeSqlAnalysis({ ok: false, error: 'SQL contains read/write operation: Delete' }),
|
||||
createScanConnector: vi.fn(async () => connector),
|
||||
},
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(connector.executeReadOnly).not.toHaveBeenCalled();
|
||||
expect(connector.cleanup).not.toHaveBeenCalled();
|
||||
expect(io.stderr()).toContain('SQL contains read/write operation: Delete');
|
||||
});
|
||||
|
||||
it('rejects missing connections', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxSql(
|
||||
{
|
||||
command: 'execute',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select 1',
|
||||
maxRows: 1000,
|
||||
output: 'pretty',
|
||||
json: false,
|
||||
cliVersion: '0.0.0-test',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }),
|
||||
},
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(io.stderr()).toContain('Connection "warehouse" is not configured in ktx.yaml');
|
||||
});
|
||||
|
||||
it('rejects connectors without read-only SQL support and still cleans up', async () => {
|
||||
const projectDir = join(tempDir, 'project');
|
||||
await initKtxProject({ projectDir });
|
||||
await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } });
|
||||
const connector = makeConnector({
|
||||
capabilities: {
|
||||
...makeConnector().capabilities,
|
||||
readOnlySql: false,
|
||||
},
|
||||
});
|
||||
const io = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxSql(
|
||||
{
|
||||
command: 'execute',
|
||||
projectDir,
|
||||
connectionId: 'warehouse',
|
||||
sql: 'select 1',
|
||||
maxRows: 1000,
|
||||
output: 'pretty',
|
||||
json: false,
|
||||
cliVersion: '0.0.0-test',
|
||||
},
|
||||
io.io,
|
||||
{
|
||||
createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }),
|
||||
createScanConnector: vi.fn(async () => connector),
|
||||
},
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(connector.executeReadOnly).not.toHaveBeenCalled();
|
||||
expect(connector.cleanup).toHaveBeenCalledTimes(1);
|
||||
expect(io.stderr()).toContain('Connection "warehouse" does not support read-only SQL execution.');
|
||||
});
|
||||
});
|
||||
171
packages/cli/src/sql.ts
Normal file
171
packages/cli/src/sql.ts
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project';
|
||||
import type { KtxQueryResult, KtxScanConnector } from '@ktx/context/scan';
|
||||
import type { SqlAnalysisDialect, SqlAnalysisPort } from '@ktx/context/sql-analysis';
|
||||
import type { KtxCliIo } from './cli-runtime.js';
|
||||
import { createKtxCliScanConnector } from './local-scan-connectors.js';
|
||||
import { createManagedDaemonSqlAnalysisPort } from './managed-python-http.js';
|
||||
import { profileMark } from './startup-profile.js';
|
||||
|
||||
profileMark('module:sql');
|
||||
|
||||
type KtxSqlOutputMode = 'pretty' | 'plain' | 'json';
|
||||
|
||||
export type KtxSqlArgs = {
|
||||
command: 'execute';
|
||||
projectDir: string;
|
||||
connectionId: string;
|
||||
sql: string;
|
||||
maxRows: number;
|
||||
output?: KtxSqlOutputMode;
|
||||
json?: boolean;
|
||||
cliVersion: string;
|
||||
};
|
||||
|
||||
export interface KtxSqlDeps {
|
||||
loadProject?: typeof loadKtxProject;
|
||||
createSqlAnalysis?: () => SqlAnalysisPort;
|
||||
createScanConnector?: typeof createKtxCliScanConnector;
|
||||
}
|
||||
|
||||
interface SqlExecutionOutput {
|
||||
connectionId: string;
|
||||
headers: string[];
|
||||
headerTypes?: string[];
|
||||
rows: unknown[][];
|
||||
rowCount: number;
|
||||
}
|
||||
|
||||
function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect {
|
||||
const normalized = String(driver ?? '').trim().toLowerCase();
|
||||
const map: Record<string, SqlAnalysisDialect> = {
|
||||
postgres: 'postgres',
|
||||
postgresql: 'postgres',
|
||||
bigquery: 'bigquery',
|
||||
snowflake: 'snowflake',
|
||||
mysql: 'mysql',
|
||||
sqlserver: 'tsql',
|
||||
mssql: 'tsql',
|
||||
sqlite: 'sqlite',
|
||||
sqlite3: 'sqlite',
|
||||
clickhouse: 'clickhouse',
|
||||
redshift: 'redshift',
|
||||
};
|
||||
return map[normalized] ?? 'postgres';
|
||||
}
|
||||
|
||||
function resolveOutputMode(args: KtxSqlArgs): KtxSqlOutputMode {
|
||||
if (args.json === true) return 'json';
|
||||
return args.output ?? 'pretty';
|
||||
}
|
||||
|
||||
function formatValue(value: unknown): string {
|
||||
if (value === null || value === undefined) return '';
|
||||
if (typeof value === 'string') return value;
|
||||
if (typeof value === 'number' || typeof value === 'boolean' || typeof value === 'bigint') return String(value);
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
|
||||
function printJson(output: SqlExecutionOutput, io: KtxCliIo): void {
|
||||
io.stdout.write(`${JSON.stringify(output, null, 2)}\n`);
|
||||
}
|
||||
|
||||
function printPlain(output: SqlExecutionOutput, io: KtxCliIo): void {
|
||||
io.stdout.write(`${output.headers.join('\t')}\n`);
|
||||
for (const row of output.rows) {
|
||||
io.stdout.write(`${row.map(formatValue).join('\t')}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
function printPretty(output: SqlExecutionOutput, io: KtxCliIo): void {
|
||||
const rows = output.rows.map((row) => row.map(formatValue));
|
||||
const widths = output.headers.map((header, index) =>
|
||||
Math.max(header.length, ...rows.map((row) => row[index]?.length ?? 0)),
|
||||
);
|
||||
const renderRow = (cells: string[]): string =>
|
||||
cells.map((cell, index) => cell.padEnd(widths[index] ?? cell.length)).join(' ').trimEnd();
|
||||
|
||||
if (output.headers.length > 0) {
|
||||
io.stdout.write(`${renderRow(output.headers)}\n`);
|
||||
io.stdout.write(`${renderRow(widths.map((width) => '-'.repeat(width)))}\n`);
|
||||
}
|
||||
for (const row of rows) {
|
||||
io.stdout.write(`${renderRow(row)}\n`);
|
||||
}
|
||||
io.stdout.write(`\n${output.rowCount} ${output.rowCount === 1 ? 'row' : 'rows'}\n`);
|
||||
}
|
||||
|
||||
function printSqlResult(output: SqlExecutionOutput, mode: KtxSqlOutputMode, io: KtxCliIo): void {
|
||||
if (mode === 'json') {
|
||||
printJson(output, io);
|
||||
return;
|
||||
}
|
||||
if (mode === 'plain') {
|
||||
printPlain(output, io);
|
||||
return;
|
||||
}
|
||||
printPretty(output, io);
|
||||
}
|
||||
|
||||
async function cleanupConnector(connector: KtxScanConnector | null): Promise<void> {
|
||||
if (connector?.cleanup) {
|
||||
await connector.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
function resultOutput(connectionId: string, result: KtxQueryResult): SqlExecutionOutput {
|
||||
return {
|
||||
connectionId,
|
||||
headers: result.headers,
|
||||
...(result.headerTypes ? { headerTypes: result.headerTypes } : {}),
|
||||
rows: result.rows,
|
||||
rowCount: result.rowCount ?? result.rows.length,
|
||||
};
|
||||
}
|
||||
|
||||
export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: KtxSqlDeps = {}): Promise<number> {
|
||||
try {
|
||||
const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir });
|
||||
const connection = project.config.connections[args.connectionId];
|
||||
if (!connection) {
|
||||
throw new Error(`Connection "${args.connectionId}" is not configured in ktx.yaml`);
|
||||
}
|
||||
|
||||
const sqlAnalysis =
|
||||
deps.createSqlAnalysis ??
|
||||
(() =>
|
||||
createManagedDaemonSqlAnalysisPort({
|
||||
cliVersion: args.cliVersion,
|
||||
projectDir: args.projectDir,
|
||||
installPolicy: 'auto',
|
||||
io,
|
||||
}));
|
||||
const validation = await sqlAnalysis().validateReadOnly(args.sql, sqlAnalysisDialectForDriver(connection.driver));
|
||||
if (!validation.ok) {
|
||||
throw new Error(validation.error ?? 'SQL is not read-only.');
|
||||
}
|
||||
|
||||
const createScanConnector = deps.createScanConnector ?? createKtxCliScanConnector;
|
||||
let connector: KtxScanConnector | null = null;
|
||||
try {
|
||||
connector = await createScanConnector(project as KtxLocalProject, args.connectionId);
|
||||
if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) {
|
||||
throw new Error(`Connection "${args.connectionId}" does not support read-only SQL execution.`);
|
||||
}
|
||||
const result = await connector.executeReadOnly(
|
||||
{
|
||||
connectionId: args.connectionId,
|
||||
sql: args.sql,
|
||||
maxRows: args.maxRows,
|
||||
},
|
||||
{ runId: 'cli-sql' },
|
||||
);
|
||||
printSqlResult(resultOutput(args.connectionId, result), resolveOutputMode(args), io);
|
||||
return 0;
|
||||
} finally {
|
||||
await cleanupConnector(connector);
|
||||
}
|
||||
} catch (error) {
|
||||
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,12 @@
|
|||
<role>
|
||||
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, Notion pages, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and discoverable with `discover_data`.
|
||||
You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit
|
||||
gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs,
|
||||
Metabase card JSONs, Notion pages, or similar) and you must translate that
|
||||
slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass.
|
||||
You run in an isolated WorkUnit worktree. Deterministic projection output,
|
||||
existing project memory, and listed dependency paths are visible; sibling
|
||||
WorkUnit edits from this same job are not visible until the runner integrates
|
||||
accepted patches.
|
||||
</role>
|
||||
|
||||
<stance>
|
||||
|
|
@ -8,9 +15,19 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing
|
|||
|
||||
<workflow>
|
||||
1. Read this WorkUnit's section at the end of the user prompt. It lists your `rawFiles`, any unchanged `dependencyPaths` you may need to resolve references, the `peerFileIndex` (paths only; you CANNOT read them), the source's `skillNames`, and any `priorProvenance` rows telling you what earlier syncs produced from these files.
|
||||
2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `wiki_capture`, and `ingest_triage` last. The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping.
|
||||
2. Load the per-source review skill first (for example `lookml_ingest`,
|
||||
`metricflow_ingest`, or `dbt_ingest`), then `sl_capture` and
|
||||
`wiki_capture`, and `ingest_triage` last. The triage skill tells you how to
|
||||
react when existing project memory, deterministic projection output, or
|
||||
prior provenance overlaps with what this WorkUnit is about to write.
|
||||
3. If the system prompt includes `<canonical_pins>`, read those pins before choosing artifact keys. A pin's `canonicalArtifactKey` is the preferred artifact for its `contestedKey`: prefer editing the pinned canonical artifact when it already exists or when this raw file clearly updates it. Do not create a duplicate contested artifact when a pin says another artifact is canonical; use a specific disambiguated key only when the raw file describes a genuinely different domain.
|
||||
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `discover_data` for each candidate source, table, metric, or topic name to find prior-WU writes, existing wiki pages, SL sources, and raw warehouse matches; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip.
|
||||
4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large
|
||||
files) to load content. Before writing a new SL source or wiki page, call
|
||||
`discover_data` for each candidate source, table, metric, or topic name to
|
||||
find existing wiki pages, SL sources, deterministic projection output, prior
|
||||
sync artifacts, and raw warehouse matches; apply `ingest_triage` when you hit
|
||||
one, and apply any matching canonical pin before deciding whether to edit,
|
||||
rename, or skip.
|
||||
5. For every `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call, include `rawPaths` with only the raw file paths that directly support that action. If one artifact synthesizes several files, list each contributing raw file. Do not include unrelated files from the same WorkUnit.
|
||||
6. When `priorProvenance` names an existing artifact for one of your raw files, prefer `sl_edit` over `sl_write` for that artifact: the re-ingest change rule says expression-only changes replace silently, grain/column/filter changes replace and flag.
|
||||
7. When a raw file cannot map to normal SL and you use a fallback path, call `emit_unmapped_fallback` exactly once for that raw file and reason. Use `fallback: "sql_standalone"` for a standalone SQL source, `fallback: "wiki_only"` for documentation-only capture, and `fallback: "flagged"` when no reliable artifact can be written.
|
||||
|
|
@ -28,5 +45,7 @@ Wiki keys must be flat slugs like `paid-order-lifecycle`, not directory paths li
|
|||
- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`discover_data`, `sl_discover`, `entity_details`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source.
|
||||
- Do not write context-source overlays into the context source connection just because that is the current WorkUnit connection. Use `sl_discover` across data sources and write the SL artifact to the warehouse/data-source connection that owns the matching manifest. If there is no confirmed target connection, use `emit_unmapped_fallback` and wiki capture.
|
||||
- Do not duplicate an artifact that prior provenance says you already produced; update it.
|
||||
- Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`.
|
||||
- Do not silently accept a name collision with visible existing memory,
|
||||
deterministic projection output, or prior provenance when the formula differs.
|
||||
Trigger `ingest_triage`.
|
||||
</do_not>
|
||||
|
|
|
|||
|
|
@ -7,8 +7,11 @@ callers: [memory_agent]
|
|||
# Ingest Triage - conflict classification and resolution
|
||||
|
||||
This skill is loaded in two contexts:
|
||||
- By a Stage 3 WorkUnit agent when `sl_discover` reveals that a prior WU (or a prior sync) already wrote something that overlaps with what the current WU is about to write.
|
||||
- By the Stage 4 reconciliation agent for cross-WU sweeps and for eviction decisions.
|
||||
- By a Stage 3 WorkUnit agent when `sl_discover`, deterministic projection
|
||||
output, existing project memory, or prior provenance overlaps with what the
|
||||
current WorkUnit is about to write.
|
||||
- By the Stage 4 reconciliation agent for cross-WorkUnit sweeps, accepted patch
|
||||
overlap, and eviction decisions.
|
||||
|
||||
Apply the rules below before every write that could collide with an existing artifact.
|
||||
|
||||
|
|
@ -23,7 +26,8 @@ Apply the rules below before every write that could collide with an existing art
|
|||
3. **If the difference is structural - grain, columns, filter, join shape - is the current bundle the re-ingest of a previously-ingested bundle (i.e. `priorProvenance` has a row for this raw file and artifact)?**
|
||||
Re-ingest change (semantic break): replace + flag. Record in the IngestReport's `conflicts_resolved` list with `flagged_for_human: true`.
|
||||
|
||||
4. **If there's no prior-sync row (both are from THIS job), check for same-ingest contradictions:**
|
||||
4. **If reconciliation sees accepted patches from this same job with no
|
||||
prior-sync row, check for same-ingest contradictions:**
|
||||
|
||||
| Kind | Detection | Resolution |
|
||||
|---|---|---|
|
||||
|
|
|
|||
45
packages/context/src/core/git.service.patch.test.ts
Normal file
45
packages/context/src/core/git.service.patch.test.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { GitService } from './git.service.js';
|
||||
|
||||
async function makeGit() {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ktx-git-patch-'));
|
||||
const configDir = join(homeDir, 'config');
|
||||
const git = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'init',
|
||||
bootstrapAuthor: 'system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await git.onModuleInit();
|
||||
return { homeDir, configDir, git };
|
||||
}
|
||||
|
||||
describe('GitService patch helpers', () => {
|
||||
it('collects binary-safe no-rename patches and applies them with --3way --index', async () => {
|
||||
const { homeDir, configDir, git } = await makeGit();
|
||||
await mkdir(join(configDir, 'wiki/global'), { recursive: true });
|
||||
await writeFile(join(configDir, 'wiki/global/page.md'), 'old\n');
|
||||
await git.commitFiles(['wiki/global/page.md'], 'add page', 'System User', 'system@example.com');
|
||||
const base = await git.revParseHead();
|
||||
|
||||
await writeFile(join(configDir, 'wiki/global/page.md'), 'new\n');
|
||||
await git.commitFiles(['wiki/global/page.md'], 'edit page', 'System User', 'system@example.com');
|
||||
const patchPath = join(homeDir, 'proposal.patch');
|
||||
await git.writeBinaryNoRenamePatch(base, 'HEAD', patchPath);
|
||||
|
||||
const targetDir = join(homeDir, 'target');
|
||||
await git.addWorktree(targetDir, 'target', base);
|
||||
const targetGit = git.forWorktree(targetDir);
|
||||
await targetGit.applyPatchFile3WayIndex(patchPath);
|
||||
await targetGit.commitStaged('apply proposal', 'System User', 'system@example.com');
|
||||
|
||||
await expect(readFile(join(targetDir, 'wiki/global/page.md'), 'utf-8')).resolves.toBe('new\n');
|
||||
});
|
||||
});
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
import { promises as fs } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { dirname, join } from 'node:path';
|
||||
import type { SimpleGit } from 'simple-git';
|
||||
import { noopLogger, resolveConfigDir, type KtxCoreConfig, type KtxLogger } from './config.js';
|
||||
import { createSimpleGit } from './git-env.js';
|
||||
|
|
@ -533,6 +533,19 @@ export class GitService {
|
|||
return out;
|
||||
}
|
||||
|
||||
async changedPaths(): Promise<string[]> {
|
||||
const raw = await this.git.raw(['status', '--porcelain=v1', '-z']);
|
||||
const fields = raw.split('\0').filter(Boolean);
|
||||
const paths: string[] = [];
|
||||
for (const field of fields) {
|
||||
const path = field.slice(3);
|
||||
if (path.length > 0) {
|
||||
paths.push(path);
|
||||
}
|
||||
}
|
||||
return [...new Set(paths)].sort();
|
||||
}
|
||||
|
||||
/**
|
||||
* List all paths under the working tree that match `pathSpec`, scoped to HEAD.
|
||||
* Used for the reconciler's first-ever run when there's no watermark to diff from.
|
||||
|
|
@ -747,6 +760,55 @@ export class GitService {
|
|||
}
|
||||
}
|
||||
|
||||
async writeBinaryNoRenamePatch(from: string, to: string, patchPath: string): Promise<void> {
|
||||
await this.withMutationQueue(async () => {
|
||||
const patch = await this.git.raw(['diff', '--binary', '--no-renames', `${from}..${to}`]);
|
||||
await fs.mkdir(dirname(patchPath), { recursive: true });
|
||||
await fs.writeFile(patchPath, patch, 'utf-8');
|
||||
});
|
||||
}
|
||||
|
||||
async applyPatchFile3WayIndex(patchPath: string): Promise<void> {
|
||||
await this.withMutationQueue(async () => {
|
||||
await this.git.raw(['apply', '--3way', '--index', patchPath]);
|
||||
});
|
||||
}
|
||||
|
||||
async commitStaged(commitMessage: string, author: string, authorEmail: string): Promise<GitCommitInfo> {
|
||||
return this.withMutationQueue(async () => {
|
||||
const stagedChanges = await this.git.diff(['--cached', '--name-only']);
|
||||
if (!stagedChanges.trim()) {
|
||||
const head = (await this.git.revparse(['HEAD'])).trim();
|
||||
const log = await this.git.log({ maxCount: 1 });
|
||||
const latest = log.latest;
|
||||
return {
|
||||
commitHash: head,
|
||||
shortHash: head.substring(0, 8),
|
||||
message: latest?.message ?? '',
|
||||
author: latest?.author_name ?? '',
|
||||
authorEmail: latest?.author_email ?? '',
|
||||
timestamp: latest?.date ?? new Date(0).toISOString(),
|
||||
committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date(0).toISOString(),
|
||||
created: false,
|
||||
};
|
||||
}
|
||||
await this.git.commit(commitMessage, { '--author': `${author} <${authorEmail}>` });
|
||||
const head = (await this.git.revparse(['HEAD'])).trim();
|
||||
const log = await this.git.log({ maxCount: 1 });
|
||||
const latest = log.latest;
|
||||
return {
|
||||
commitHash: head,
|
||||
shortHash: head.substring(0, 8),
|
||||
message: latest?.message ?? commitMessage,
|
||||
author: latest?.author_name ?? author,
|
||||
authorEmail: latest?.author_email ?? authorEmail,
|
||||
timestamp: latest?.date ?? new Date().toISOString(),
|
||||
committedDate: latest?.date ? new Date(latest.date).toISOString() : new Date().toISOString(),
|
||||
created: true,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private async fileExists(path: string): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(path);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,15 @@
|
|||
import type { ChunkResult, DiffSet, FetchContext, ScopeDescriptor, SourceAdapter } from '../../types.js';
|
||||
import type {
|
||||
ChunkResult,
|
||||
DeterministicFinalizationContext,
|
||||
DiffSet,
|
||||
FetchContext,
|
||||
FinalizationResult,
|
||||
ScopeDescriptor,
|
||||
SourceAdapter,
|
||||
} from '../../types.js';
|
||||
import { chunkHistoricSqlUnifiedStagedDir, describeHistoricSqlUnifiedScope } from './chunk-unified.js';
|
||||
import { detectHistoricSqlStagedDir } from './detect.js';
|
||||
import { projectHistoricSqlEvidence } from './projection.js';
|
||||
import { stageHistoricSqlAggregatedSnapshot } from './stage-unified.js';
|
||||
import { type HistoricSqlSourceAdapterDeps } from './types.js';
|
||||
|
||||
|
|
@ -35,4 +44,22 @@ export class HistoricSqlSourceAdapter implements SourceAdapter {
|
|||
describeScope(stagedDir: string): Promise<ScopeDescriptor> {
|
||||
return describeHistoricSqlUnifiedScope(stagedDir);
|
||||
}
|
||||
|
||||
async finalize(ctx: DeterministicFinalizationContext): Promise<FinalizationResult> {
|
||||
const projection = await projectHistoricSqlEvidence({
|
||||
workdir: ctx.workdir,
|
||||
connectionId: ctx.connectionId,
|
||||
syncId: ctx.syncId,
|
||||
runId: ctx.runId,
|
||||
overrideReplay: ctx.overrideReplay,
|
||||
});
|
||||
return {
|
||||
result: projection,
|
||||
warnings: projection.warnings,
|
||||
errors: [],
|
||||
touchedSources: projection.touchedSources,
|
||||
changedWikiPageKeys: projection.changedWikiPageKeys,
|
||||
actions: projection.actions,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -242,12 +242,12 @@ describe('historic-SQL local ingest retrieval acceptance', () => {
|
|||
expect(result.result.failedWorkUnits).toEqual([]);
|
||||
expect(result.result.workUnitCount).toBe(3);
|
||||
expect(agentRunner.runLoop).toHaveBeenCalledTimes(3);
|
||||
const postProcessor = result.report.body.postProcessor;
|
||||
expect(postProcessor).toBeDefined();
|
||||
if (!postProcessor) {
|
||||
throw new Error('Expected historic-SQL post-processor result');
|
||||
const finalization = result.report.body.finalization;
|
||||
expect(finalization).toBeDefined();
|
||||
if (!finalization) {
|
||||
throw new Error('Expected historic-SQL finalization result');
|
||||
}
|
||||
expect(postProcessor).toMatchObject({
|
||||
expect(finalization).toMatchObject({
|
||||
sourceKey: 'historic-sql',
|
||||
status: 'success',
|
||||
result: {
|
||||
|
|
@ -255,7 +255,7 @@ describe('historic-SQL local ingest retrieval acceptance', () => {
|
|||
patternPagesWritten: 1,
|
||||
},
|
||||
});
|
||||
expect(postProcessor.touchedSources).toEqual(
|
||||
expect(finalization.declaredTouchedSources).toEqual(
|
||||
expect.arrayContaining([
|
||||
{ connectionId: 'warehouse', sourceName: 'customers' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
|
|
|
|||
|
|
@ -1,74 +0,0 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import YAML from 'yaml';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { HistoricSqlProjectionPostProcessor } from './post-processor.js';
|
||||
|
||||
async function tempWorkdir(): Promise<string> {
|
||||
return mkdtemp(join(tmpdir(), 'historic-sql-post-processor-'));
|
||||
}
|
||||
|
||||
async function writeJson(root: string, relPath: string, value: unknown): Promise<void> {
|
||||
const target = join(root, relPath);
|
||||
await mkdir(join(target, '..'), { recursive: true });
|
||||
await writeFile(target, `${JSON.stringify(value, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
describe('HistoricSqlProjectionPostProcessor', () => {
|
||||
it('projects current run evidence before the ingest squash commit', async () => {
|
||||
const workdir = await tempWorkdir();
|
||||
await mkdir(join(workdir, 'semantic-layer/warehouse/_schema'), { recursive: true });
|
||||
await writeFile(
|
||||
join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'),
|
||||
YAML.stringify({ tables: { orders: { table: 'public.orders', columns: [{ name: 'id', type: 'string' }] } } }),
|
||||
'utf-8',
|
||||
);
|
||||
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', {
|
||||
source: 'historic-sql',
|
||||
connectionId: 'warehouse',
|
||||
dialect: 'postgres',
|
||||
fetchedAt: '2026-05-11T00:00:00.000Z',
|
||||
windowStart: '2026-02-10T00:00:00.000Z',
|
||||
windowEnd: '2026-05-11T00:00:00.000Z',
|
||||
snapshotRowCount: 1,
|
||||
touchedTableCount: 1,
|
||||
parseFailures: 0,
|
||||
warnings: [],
|
||||
probeWarnings: [],
|
||||
staleArchiveAfterDays: 90,
|
||||
});
|
||||
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.orders.json', { table: 'public.orders' });
|
||||
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/orders.json', {
|
||||
kind: 'table_usage',
|
||||
connectionId: 'warehouse',
|
||||
table: 'public.orders',
|
||||
rawPath: 'tables/public.orders.json',
|
||||
usage: {
|
||||
narrative: 'Orders are repeatedly queried by lifecycle status.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status'],
|
||||
commonJoins: [],
|
||||
staleSince: null,
|
||||
},
|
||||
});
|
||||
|
||||
const result = await new HistoricSqlProjectionPostProcessor().run({
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'historic-sql',
|
||||
syncId: 'sync-1',
|
||||
jobId: 'job-1',
|
||||
runId: 'run-1',
|
||||
workdir,
|
||||
parseArtifacts: null,
|
||||
});
|
||||
|
||||
expect(result.errors).toEqual([]);
|
||||
expect(result.warnings).toEqual([]);
|
||||
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
|
||||
expect(result.result).toMatchObject({ tableUsageMerged: 1 });
|
||||
await expect(readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves.toContain(
|
||||
'Orders are repeatedly queried by lifecycle status.',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
import type { IngestBundlePostProcessorInput, IngestBundlePostProcessorPort, IngestBundlePostProcessorResult } from '../../ports.js';
|
||||
import { createSimpleGit } from '../../../core/git-env.js';
|
||||
import { projectHistoricSqlEvidence } from './projection.js';
|
||||
|
||||
async function commitProjectionChanges(workdir: string): Promise<void> {
|
||||
const git = createSimpleGit(workdir);
|
||||
if (!(await git.checkIsRepo().catch(() => false))) {
|
||||
return;
|
||||
}
|
||||
const status = await git.status();
|
||||
const paths = status.files
|
||||
.map((file) => file.path)
|
||||
.filter((path) => path.startsWith('semantic-layer/') || path.startsWith('wiki/global/historic-sql'));
|
||||
if (paths.length === 0) {
|
||||
return;
|
||||
}
|
||||
await git.add(paths);
|
||||
const staged = await git.diff(['--cached', '--name-only']);
|
||||
if (!staged.trim()) {
|
||||
return;
|
||||
}
|
||||
await git.commit('Project historic SQL evidence', { '--author': 'System User <system@example.com>' });
|
||||
}
|
||||
|
||||
export class HistoricSqlProjectionPostProcessor implements IngestBundlePostProcessorPort {
|
||||
async run(input: IngestBundlePostProcessorInput): Promise<IngestBundlePostProcessorResult> {
|
||||
const projection = await projectHistoricSqlEvidence({
|
||||
workdir: input.workdir,
|
||||
connectionId: input.connectionId,
|
||||
syncId: input.syncId,
|
||||
runId: input.runId,
|
||||
});
|
||||
await commitProjectionChanges(input.workdir);
|
||||
return {
|
||||
result: projection,
|
||||
warnings: projection.warnings,
|
||||
errors: [],
|
||||
touchedSources: projection.touchedSources,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -74,6 +74,15 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
|
||||
|
||||
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
|
||||
expect(result.actions).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
target: 'sl',
|
||||
key: 'orders',
|
||||
rawPaths: ['tables/public.orders.json'],
|
||||
}),
|
||||
]),
|
||||
);
|
||||
const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'));
|
||||
expect(shard.tables.orders.usage).toEqual({
|
||||
ownerNote: 'keep me',
|
||||
|
|
@ -164,6 +173,16 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' });
|
||||
|
||||
expect(result.patternPagesWritten).toBe(1);
|
||||
expect(result.changedWikiPageKeys).toContain('historic-sql-old-order-lifecycle');
|
||||
expect(result.actions).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
target: 'wiki',
|
||||
key: 'historic-sql-old-order-lifecycle',
|
||||
rawPaths: ['patterns-input.json'],
|
||||
}),
|
||||
]),
|
||||
);
|
||||
await expect(readFile(join(workdir, 'wiki/global/historic-sql-old-order-lifecycle.md'), 'utf-8')).resolves.toContain(
|
||||
'Order Lifecycle Analysis',
|
||||
);
|
||||
|
|
@ -320,6 +339,19 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
probeWarnings: [],
|
||||
staleArchiveAfterDays: 90,
|
||||
});
|
||||
await writeJson(workdir, '.ktx/ingest-evidence/historic-sql/run-1/customers.json', {
|
||||
kind: 'table_usage',
|
||||
connectionId: 'warehouse',
|
||||
table: 'public.customers',
|
||||
rawPath: 'tables/public.customers.json',
|
||||
usage: {
|
||||
narrative: 'Customers were queried.',
|
||||
frequencyTier: 'low',
|
||||
commonFilters: [],
|
||||
commonJoins: [],
|
||||
staleSince: null,
|
||||
},
|
||||
});
|
||||
await writeText(
|
||||
workdir,
|
||||
'wiki/global/historic-sql-old-template.md',
|
||||
|
|
@ -346,6 +378,9 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
|
||||
expect(result.staleTablesMarked).toBe(1);
|
||||
expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]);
|
||||
const staleAction = result.actions.find((action) => action.target === 'sl' && action.key === 'orders');
|
||||
expect(staleAction).toEqual(expect.objectContaining({ target: 'sl', key: 'orders' }));
|
||||
expect(staleAction?.rawPaths).toBeUndefined();
|
||||
const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'));
|
||||
expect(shard.tables.orders.usage).toEqual({
|
||||
ownerNote: 'keep analyst annotation',
|
||||
|
|
@ -360,4 +395,63 @@ describe('projectHistoricSqlEvidence', () => {
|
|||
'Old body',
|
||||
);
|
||||
});
|
||||
|
||||
it('does not mark stale or archive pages when override replay has no current-run evidence', async () => {
|
||||
const workdir = await tempWorkdir();
|
||||
await writeText(
|
||||
workdir,
|
||||
'semantic-layer/warehouse/_schema/public.yaml',
|
||||
YAML.stringify({
|
||||
tables: {
|
||||
orders: {
|
||||
table: 'public.orders',
|
||||
usage: {
|
||||
narrative: 'Orders were active before.',
|
||||
frequencyTier: 'high',
|
||||
commonFilters: ['status'],
|
||||
commonGroupBys: ['status'],
|
||||
commonJoins: [],
|
||||
},
|
||||
columns: [{ name: 'id', type: 'string' }],
|
||||
},
|
||||
},
|
||||
}),
|
||||
);
|
||||
await writeJson(workdir, 'raw-sources/warehouse/historic-sql/override-sync/manifest.json', {
|
||||
source: 'historic-sql',
|
||||
connectionId: 'warehouse',
|
||||
dialect: 'postgres',
|
||||
fetchedAt: '2026-05-11T00:00:00.000Z',
|
||||
windowStart: '2026-02-10T00:00:00.000Z',
|
||||
windowEnd: '2026-05-11T00:00:00.000Z',
|
||||
snapshotRowCount: 0,
|
||||
touchedTableCount: 0,
|
||||
parseFailures: 0,
|
||||
warnings: [],
|
||||
probeWarnings: [],
|
||||
staleArchiveAfterDays: 90,
|
||||
});
|
||||
|
||||
const result = await projectHistoricSqlEvidence({
|
||||
workdir,
|
||||
connectionId: 'warehouse',
|
||||
syncId: 'override-sync',
|
||||
runId: 'override-run',
|
||||
overrideReplay: {
|
||||
priorJobId: 'prior-job',
|
||||
priorRunId: 'prior-run',
|
||||
priorSyncId: 'prior-sync',
|
||||
evictionRawPaths: ['tables/public/orders.json'],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.tableUsageMerged).toBe(0);
|
||||
expect(result.staleTablesMarked).toBe(0);
|
||||
expect(result.patternPagesWritten).toBe(0);
|
||||
expect(result.stalePatternPagesMarked).toBe(0);
|
||||
expect(result.archivedPatternPages).toBe(0);
|
||||
expect(result.touchedSources).toEqual([]);
|
||||
expect(result.changedWikiPageKeys).toEqual([]);
|
||||
expect(result.actions).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
import { access, mkdir, readdir, readFile, rename, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join, relative } from 'node:path';
|
||||
import YAML from 'yaml';
|
||||
import type { MemoryAction } from '../../../memory/index.js';
|
||||
import { rawSourcesDirForSync } from '../../raw-sources-paths.js';
|
||||
import type { FinalizationOverrideReplay } from '../../types.js';
|
||||
import { mergeUsagePreservingExternal } from '../live-database/manifest.js';
|
||||
import { historicSqlEvidenceEnvelopeSchema, type HistoricSqlEvidenceEnvelope } from './evidence.js';
|
||||
import type { TableUsageOutput } from './skill-schemas.js';
|
||||
|
|
@ -12,6 +14,7 @@ export interface HistoricSqlProjectionInput {
|
|||
connectionId: string;
|
||||
syncId: string;
|
||||
runId: string;
|
||||
overrideReplay?: FinalizationOverrideReplay;
|
||||
}
|
||||
|
||||
export interface HistoricSqlProjectionResult {
|
||||
|
|
@ -21,6 +24,8 @@ export interface HistoricSqlProjectionResult {
|
|||
stalePatternPagesMarked: number;
|
||||
archivedPatternPages: number;
|
||||
touchedSources: Array<{ connectionId: string; sourceName: string }>;
|
||||
changedWikiPageKeys: string[];
|
||||
actions: MemoryAction[];
|
||||
warnings: string[];
|
||||
}
|
||||
|
||||
|
|
@ -223,6 +228,8 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
stalePatternPagesMarked: 0,
|
||||
archivedPatternPages: 0,
|
||||
touchedSources: [],
|
||||
changedWikiPageKeys: [],
|
||||
actions: [],
|
||||
warnings: [],
|
||||
};
|
||||
const touchedKeys = new Set<string>();
|
||||
|
|
@ -230,6 +237,16 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
const manifest = stagedManifestSchema.parse(await readJson(join(rawDir, 'manifest.json')));
|
||||
const currentTables = await currentStagedTables(rawDir);
|
||||
const evidence = await loadEvidence(input.workdir, input.runId);
|
||||
if (input.overrideReplay && evidence.length === 0) {
|
||||
result.warnings.push(
|
||||
'historic-sql finalization skipped stale/archive cleanup during override replay without current-run evidence',
|
||||
);
|
||||
return result;
|
||||
}
|
||||
if (evidence.length === 0) {
|
||||
result.warnings.push('historic-sql finalization skipped because no current-run evidence was emitted');
|
||||
return result;
|
||||
}
|
||||
const tableEvidence = evidence.filter((entry): entry is HistoricSqlEvidenceEnvelope & { kind: 'table_usage' } => entry.kind === 'table_usage');
|
||||
const patternEvidence = evidence.filter((entry): entry is HistoricSqlEvidenceEnvelope & { kind: 'pattern' } => entry.kind === 'pattern');
|
||||
|
||||
|
|
@ -255,6 +272,14 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
touchedKeys.add(key);
|
||||
result.touchedSources.push({ connectionId: input.connectionId, sourceName });
|
||||
}
|
||||
result.actions.push({
|
||||
target: 'sl',
|
||||
type: 'updated',
|
||||
key: sourceName,
|
||||
targetConnectionId: input.connectionId,
|
||||
detail: `Merged historic-SQL usage for ${matchingEvidence.table}`,
|
||||
rawPaths: [matchingEvidence.rawPath],
|
||||
});
|
||||
}
|
||||
} else if (entry.usage && !currentTables.has(tableRef)) {
|
||||
const merged = mergeUsagePreservingExternal(entry.usage as TableUsageOutput | undefined, staleUsage(manifest.fetchedAt));
|
||||
|
|
@ -267,6 +292,13 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
touchedKeys.add(key);
|
||||
result.touchedSources.push({ connectionId: input.connectionId, sourceName });
|
||||
}
|
||||
result.actions.push({
|
||||
target: 'sl',
|
||||
type: 'updated',
|
||||
key: sourceName,
|
||||
targetConnectionId: input.connectionId,
|
||||
detail: `Marked historic-SQL usage stale for ${tableRef}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -303,6 +335,14 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
await writeFile(pagePath, renderMarkdownPage(frontmatter, renderPatternMarkdown(pattern)), 'utf-8');
|
||||
writtenKeys.add(key);
|
||||
result.patternPagesWritten += 1;
|
||||
result.changedWikiPageKeys.push(key);
|
||||
result.actions.push({
|
||||
target: 'wiki',
|
||||
type: reusable ? 'updated' : 'created',
|
||||
key,
|
||||
detail: `Projected historic-SQL pattern ${pattern.pattern.title}`,
|
||||
rawPaths: [pattern.rawPath],
|
||||
});
|
||||
}
|
||||
|
||||
for (const page of patternPages) {
|
||||
|
|
@ -315,6 +355,13 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
'utf-8',
|
||||
);
|
||||
result.archivedPatternPages += 1;
|
||||
result.changedWikiPageKeys.push(page.key);
|
||||
result.actions.push({
|
||||
target: 'wiki',
|
||||
type: 'updated',
|
||||
key: page.key,
|
||||
detail: `Archived stale historic-SQL pattern page ${page.key}`,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
const tags = [...new Set([...stringArray(page.frontmatter.tags), 'stale'])];
|
||||
|
|
@ -324,7 +371,15 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp
|
|||
'utf-8',
|
||||
);
|
||||
result.stalePatternPagesMarked += 1;
|
||||
result.changedWikiPageKeys.push(page.key);
|
||||
result.actions.push({
|
||||
target: 'wiki',
|
||||
type: 'updated',
|
||||
key: page.key,
|
||||
detail: `Marked historic-SQL pattern page ${page.key} stale`,
|
||||
});
|
||||
}
|
||||
|
||||
result.changedWikiPageKeys = [...new Set(result.changedWikiPageKeys)].sort();
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -138,6 +138,52 @@ describe('fetchMetabaseBundle', () => {
|
|||
expect(warn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('emits memory-flow progress while fetching Metabase cards', async () => {
|
||||
const events: unknown[] = [];
|
||||
|
||||
await fetchMetabaseBundle({
|
||||
pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 },
|
||||
stagedDir,
|
||||
ctx: {
|
||||
...makeFetchContext(),
|
||||
memoryFlow: {
|
||||
emit: (event) => events.push(event),
|
||||
update: vi.fn(),
|
||||
finish: vi.fn(),
|
||||
snapshot: vi.fn(),
|
||||
},
|
||||
},
|
||||
clientFactory,
|
||||
sourceStateReader,
|
||||
});
|
||||
|
||||
expect(events).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Fetching Metabase database 42 metadata',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Fetching 1 Metabase card for database 42',
|
||||
}),
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Checked 1/1 Metabase cards for database 42; wrote 1',
|
||||
transient: true,
|
||||
}),
|
||||
expect.objectContaining({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
message: 'Fetched Metabase database 42: 1 cards, 0 unresolved',
|
||||
}),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('routes Metabase fetch warnings through the injected logger', async () => {
|
||||
const logger = {
|
||||
log: vi.fn(),
|
||||
|
|
|
|||
|
|
@ -83,6 +83,15 @@ function resolvePath(index: Map<number | 'root', CollectionNode>, collectionId:
|
|||
export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Promise<void> {
|
||||
const pullConfig: MetabasePullConfig = parseMetabasePullConfig(params.pullConfig);
|
||||
const logger = params.logger ?? noopMetabaseFetchLogger;
|
||||
const emitFetchProgress = (percent: number, message: string, transient = false): void => {
|
||||
params.ctx.memoryFlow?.emit({
|
||||
type: 'stage_progress',
|
||||
stage: 'source',
|
||||
percent,
|
||||
message,
|
||||
...(transient ? { transient } : {}),
|
||||
});
|
||||
};
|
||||
const syncState = await params.sourceStateReader.getSourceState(pullConfig.metabaseConnectionId);
|
||||
const mapping = syncState.mappings.find(
|
||||
(m) => m.metabaseDatabaseId === pullConfig.metabaseDatabaseId && m.syncEnabled,
|
||||
|
|
@ -100,6 +109,7 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
|
|||
|
||||
const client = await params.clientFactory.createClient(pullConfig, params.ctx);
|
||||
try {
|
||||
emitFetchProgress(26, `Fetching Metabase database ${pullConfig.metabaseDatabaseId} metadata`);
|
||||
let mappingDatabaseName = mapping.metabaseDatabaseName;
|
||||
let mappingEngine = mapping.metabaseEngine;
|
||||
if (mappingDatabaseName === null) {
|
||||
|
|
@ -133,6 +143,12 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
|
|||
await mkdir(join(params.stagedDir, STAGED_FILES.databasesDir), { recursive: true });
|
||||
|
||||
const cardIdsToFetch = await resolveCardIdsToFetch(client, scope, pullConfig.metabaseDatabaseId, logger);
|
||||
emitFetchProgress(
|
||||
28,
|
||||
`Fetching ${cardIdsToFetch.length} Metabase card${cardIdsToFetch.length === 1 ? '' : 's'} for database ${
|
||||
pullConfig.metabaseDatabaseId
|
||||
}`,
|
||||
);
|
||||
|
||||
const referencedCollectionIds = new Set<number>();
|
||||
let writtenCards = 0;
|
||||
|
|
@ -212,7 +228,19 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr
|
|||
}
|
||||
}
|
||||
}
|
||||
const knownTotal = Math.max(cardIdsToFetch.length, fetched.size + queue.length);
|
||||
if (fetched.size === 1 || fetched.size % 10 === 0 || queue.length === 0) {
|
||||
emitFetchProgress(
|
||||
30,
|
||||
`Checked ${fetched.size}/${knownTotal} Metabase cards for database ${pullConfig.metabaseDatabaseId}; wrote ${writtenCards}`,
|
||||
true,
|
||||
);
|
||||
}
|
||||
}
|
||||
emitFetchProgress(
|
||||
32,
|
||||
`Fetched Metabase database ${pullConfig.metabaseDatabaseId}: ${writtenCards} cards, ${unresolvedCards.length} unresolved`,
|
||||
);
|
||||
|
||||
for (const colId of referencedCollectionIds) {
|
||||
const node = collectionIndex.get(colId);
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { makeLocalGitRepo } from '../../../test/make-local-git-repo.js';
|
||||
import type { SourceAdapter } from '../../types.js';
|
||||
import type { MetricFlowParseResult } from './deep-parse.js';
|
||||
import { MetricflowSourceAdapter } from './metricflow.adapter.js';
|
||||
import { readMetricflowProjectionConfig, writeMetricflowProjectionConfig } from './projection-config.js';
|
||||
|
||||
function compileOnlyRequiredDepsCheck(): void {
|
||||
// @ts-expect-error MetricflowSourceAdapter requires an explicit cache home.
|
||||
|
|
@ -22,6 +24,25 @@ async function makeRepo(tmpRoot: string, files: Record<string, string>) {
|
|||
return makeLocalGitRepo(fixtureDir, join(tmpRoot, 'origin'));
|
||||
}
|
||||
|
||||
function metricflowParseResult(): MetricFlowParseResult {
|
||||
return {
|
||||
semanticModels: [
|
||||
{
|
||||
name: 'orders',
|
||||
description: 'Orders',
|
||||
modelRef: 'orders',
|
||||
dimensions: [{ name: 'status', column: 'status', type: 'string', label: 'Status' }],
|
||||
measures: [{ type: 'simple', name: 'order_count', column: 'id', aggregation: 'count' }],
|
||||
entities: [{ name: 'customer', type: 'foreign', expr: 'customer_id' }],
|
||||
defaultTimeDimension: null,
|
||||
},
|
||||
],
|
||||
crossModelMetrics: [],
|
||||
relationships: [],
|
||||
warnings: ['parser warning'],
|
||||
};
|
||||
}
|
||||
|
||||
describe('MetricflowSourceAdapter', () => {
|
||||
let tmpRoot: string;
|
||||
let stagedDir: string;
|
||||
|
|
@ -127,4 +148,119 @@ describe('MetricflowSourceAdapter', () => {
|
|||
await expect(readFile(join(stagedDir, 'models/orders.yml'), 'utf-8')).resolves.toContain('semantic_models');
|
||||
expect(await adapter.detect(stagedDir)).toBe(true);
|
||||
});
|
||||
|
||||
it('persists parsed target tables for deterministic projection during fetch', async () => {
|
||||
const repo = await makeRepo(tmpRoot, {
|
||||
'dbt_project.yml': 'name: analytics\n',
|
||||
'models/orders.yml': 'semantic_models:\n - name: orders\n model: ref("orders")\n',
|
||||
});
|
||||
|
||||
await adapter.fetch?.(
|
||||
{
|
||||
repoUrl: repo.repoUrl,
|
||||
branch: 'main',
|
||||
path: null,
|
||||
authToken: null,
|
||||
parsedTargetTables: {
|
||||
orders: {
|
||||
ok: true,
|
||||
catalog: null,
|
||||
schema: 'analytics',
|
||||
name: 'orders',
|
||||
canonicalTable: 'analytics.orders',
|
||||
},
|
||||
},
|
||||
},
|
||||
stagedDir,
|
||||
{ connectionId: 'warehouse-1', sourceKey: 'metricflow' },
|
||||
);
|
||||
|
||||
await expect(readMetricflowProjectionConfig(stagedDir)).resolves.toMatchObject({
|
||||
parsedTargetTables: {
|
||||
orders: {
|
||||
ok: true,
|
||||
schema: 'analytics',
|
||||
name: 'orders',
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('projects parsed MetricFlow semantic models in the integration worktree', async () => {
|
||||
await writeMetricflowProjectionConfig(stagedDir, {
|
||||
parsedTargetTables: {
|
||||
orders: {
|
||||
ok: true,
|
||||
catalog: null,
|
||||
schema: 'analytics',
|
||||
name: 'orders',
|
||||
canonicalTable: 'analytics.orders',
|
||||
},
|
||||
},
|
||||
});
|
||||
const scoped = {
|
||||
getManifestEntry: vi.fn().mockResolvedValue(null),
|
||||
isManifestBacked: vi.fn().mockResolvedValue(false),
|
||||
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
|
||||
loadSource: vi.fn().mockResolvedValue(null),
|
||||
writeSource: vi.fn().mockResolvedValue({ warnings: [] }),
|
||||
};
|
||||
const semanticLayerService = {
|
||||
forWorktree: vi.fn().mockReturnValue(scoped),
|
||||
getManifestEntry: vi.fn(),
|
||||
isManifestBacked: vi.fn(),
|
||||
loadAllSources: vi.fn(),
|
||||
loadSource: vi.fn(),
|
||||
writeSource: vi.fn(),
|
||||
};
|
||||
|
||||
const result = await adapter.project?.({
|
||||
connectionId: 'warehouse-1',
|
||||
sourceKey: 'metricflow',
|
||||
syncId: 'sync-1',
|
||||
jobId: 'job-1',
|
||||
runId: 'run-1',
|
||||
stagedDir,
|
||||
workdir: '/tmp/metricflow-integration',
|
||||
parseArtifacts: metricflowParseResult(),
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
});
|
||||
|
||||
expect(semanticLayerService.forWorktree).toHaveBeenCalledWith('/tmp/metricflow-integration');
|
||||
expect(scoped.writeSource).toHaveBeenCalledWith(
|
||||
'warehouse-1',
|
||||
expect.objectContaining({ name: 'orders' }),
|
||||
'dbt MetricFlow',
|
||||
expect.any(String),
|
||||
'dbt MetricFlow sync: create source orders',
|
||||
{ skipValidation: true },
|
||||
);
|
||||
expect(result).toMatchObject({
|
||||
warnings: ['parser warning'],
|
||||
errors: [],
|
||||
touchedSources: [{ connectionId: 'warehouse-1', sourceName: 'orders' }],
|
||||
changedWikiPageKeys: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('returns a projection error when parse artifacts are missing', async () => {
|
||||
const result = await adapter.project?.({
|
||||
connectionId: 'warehouse-1',
|
||||
sourceKey: 'metricflow',
|
||||
syncId: 'sync-1',
|
||||
jobId: 'job-1',
|
||||
runId: 'run-1',
|
||||
stagedDir,
|
||||
workdir: '/tmp/metricflow-integration',
|
||||
parseArtifacts: undefined,
|
||||
semanticLayerService: {} as never,
|
||||
});
|
||||
|
||||
expect(result).toMatchObject({
|
||||
warnings: [],
|
||||
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
|
||||
touchedSources: [],
|
||||
changedWikiPageKeys: [],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,10 +1,23 @@
|
|||
import { join } from 'node:path';
|
||||
import type { ChunkResult, DiffSet, FetchContext, SourceAdapter } from '../../types.js';
|
||||
import type {
|
||||
ChunkResult,
|
||||
DeterministicProjectionContext,
|
||||
DiffSet,
|
||||
FetchContext,
|
||||
ProjectionResult,
|
||||
SourceAdapter,
|
||||
} from '../../types.js';
|
||||
import { chunkMetricFlowProject } from './chunk.js';
|
||||
import { detectMetricFlowStagedDir } from './detect.js';
|
||||
import { parseMetricflowFiles, type MetricFlowParseResult } from './deep-parse.js';
|
||||
import { fetchMetricflowRepo } from './fetch.js';
|
||||
import { importMetricflowSemanticModels } from './import-semantic-models.js';
|
||||
import { parseMetricFlowStagedDir, type ParsedMetricFlowProject } from './parse.js';
|
||||
import {
|
||||
metricflowHostTablesFromParsedTargets,
|
||||
readMetricflowProjectionConfig,
|
||||
writeMetricflowProjectionConfig,
|
||||
} from './projection-config.js';
|
||||
import { parseMetricflowPullConfig } from './pull-config.js';
|
||||
|
||||
export interface MetricflowSourceAdapterDeps {
|
||||
|
|
@ -33,6 +46,9 @@ export class MetricflowSourceAdapter implements SourceAdapter {
|
|||
cacheDir: this.resolveCacheDir(ctx.connectionId),
|
||||
stagedDir,
|
||||
});
|
||||
await writeMetricflowProjectionConfig(stagedDir, {
|
||||
parsedTargetTables: config.parsedTargetTables,
|
||||
});
|
||||
}
|
||||
|
||||
async listTargetConnectionIds(_stagedDir: string): Promise<string[]> {
|
||||
|
|
@ -46,6 +62,37 @@ export class MetricflowSourceAdapter implements SourceAdapter {
|
|||
return { ...chunk, parseArtifacts };
|
||||
}
|
||||
|
||||
async project(ctx: DeterministicProjectionContext): Promise<ProjectionResult> {
|
||||
if (!isMetricFlowParseResult(ctx.parseArtifacts)) {
|
||||
return {
|
||||
warnings: [],
|
||||
errors: ['MetricFlow deterministic projection requires parseArtifacts from chunk()'],
|
||||
touchedSources: [],
|
||||
changedWikiPageKeys: [],
|
||||
};
|
||||
}
|
||||
|
||||
const projectionConfig = await readMetricflowProjectionConfig(ctx.stagedDir);
|
||||
const result = await importMetricflowSemanticModels(
|
||||
{ semanticLayerService: ctx.semanticLayerService },
|
||||
{
|
||||
connectionId: ctx.connectionId,
|
||||
parseResult: ctx.parseArtifacts,
|
||||
targetSchema: null,
|
||||
hostTables: metricflowHostTablesFromParsedTargets(projectionConfig.parsedTargetTables),
|
||||
workdir: ctx.workdir,
|
||||
},
|
||||
);
|
||||
|
||||
return {
|
||||
result,
|
||||
warnings: result.warnings,
|
||||
errors: result.errors,
|
||||
touchedSources: result.touchedSources,
|
||||
changedWikiPageKeys: [],
|
||||
};
|
||||
}
|
||||
|
||||
private resolveCacheDir(connectionId: string): string {
|
||||
return join(this.deps.homeDir, 'ingest-metricflow-repos', connectionId);
|
||||
}
|
||||
|
|
@ -54,3 +101,16 @@ export class MetricflowSourceAdapter implements SourceAdapter {
|
|||
function parseMetricflowStagedDirForImport(project: ParsedMetricFlowProject): MetricFlowParseResult {
|
||||
return parseMetricflowFiles(project.files);
|
||||
}
|
||||
|
||||
function isMetricFlowParseResult(value: unknown): value is MetricFlowParseResult {
|
||||
if (!value || typeof value !== 'object') {
|
||||
return false;
|
||||
}
|
||||
const candidate = value as Partial<MetricFlowParseResult>;
|
||||
return (
|
||||
Array.isArray(candidate.semanticModels) &&
|
||||
Array.isArray(candidate.crossModelMetrics) &&
|
||||
Array.isArray(candidate.relationships) &&
|
||||
Array.isArray(candidate.warnings)
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,54 @@
|
|||
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import { parsedTargetTableSchema, type ParsedTargetTable } from '../../parsed-target-table.js';
|
||||
import type { MetricflowHostTable } from './semantic-models.js';
|
||||
|
||||
const METRICFLOW_PROJECTION_CONFIG_FILE = 'sync-config.json';
|
||||
|
||||
const metricflowProjectionConfigSchema = z.object({
|
||||
parsedTargetTables: z.record(z.string(), parsedTargetTableSchema).default({}),
|
||||
});
|
||||
|
||||
export type MetricflowProjectionConfig = z.infer<typeof metricflowProjectionConfigSchema>;
|
||||
|
||||
export async function writeMetricflowProjectionConfig(
|
||||
stagedDir: string,
|
||||
config: MetricflowProjectionConfig,
|
||||
): Promise<void> {
|
||||
const parsed = metricflowProjectionConfigSchema.parse(config);
|
||||
await mkdir(stagedDir, { recursive: true });
|
||||
await writeFile(join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE), `${JSON.stringify(parsed, null, 2)}\n`, 'utf-8');
|
||||
}
|
||||
|
||||
export async function readMetricflowProjectionConfig(stagedDir: string): Promise<MetricflowProjectionConfig> {
|
||||
const path = join(stagedDir, METRICFLOW_PROJECTION_CONFIG_FILE);
|
||||
try {
|
||||
return metricflowProjectionConfigSchema.parse(JSON.parse(await readFile(path, 'utf-8')));
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return { parsedTargetTables: {} };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export function metricflowHostTablesFromParsedTargets(
|
||||
parsedTargetTables: Record<string, ParsedTargetTable>,
|
||||
): MetricflowHostTable[] {
|
||||
return Object.entries(parsedTargetTables)
|
||||
.flatMap(([id, table]) =>
|
||||
table.ok
|
||||
? [
|
||||
{
|
||||
id,
|
||||
name: table.name,
|
||||
catalog: table.catalog,
|
||||
db: table.schema,
|
||||
columns: [],
|
||||
},
|
||||
]
|
||||
: [],
|
||||
)
|
||||
.sort((left, right) => left.id.localeCompare(right.id));
|
||||
}
|
||||
190
packages/context/src/ingest/artifact-gates.test.ts
Normal file
190
packages/context/src/ingest/artifact-gates.test.ts
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { validateFinalIngestArtifacts, validateProvenanceRawPaths } from './artifact-gates.js';
|
||||
|
||||
function wikiServiceWithPages(
|
||||
pages: Record<string, { refs?: string[]; content?: string; slRefs?: string[] }>,
|
||||
) {
|
||||
return {
|
||||
listPageKeys: vi.fn().mockResolvedValue(Object.keys(pages)),
|
||||
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, pageKey: string) => {
|
||||
const page = pages[pageKey];
|
||||
if (!page) {
|
||||
return Promise.resolve(null);
|
||||
}
|
||||
return Promise.resolve({
|
||||
pageKey,
|
||||
frontmatter: {
|
||||
summary: pageKey,
|
||||
usage_mode: 'auto',
|
||||
refs: page.refs,
|
||||
sl_refs: page.slRefs,
|
||||
},
|
||||
content: page.content ?? '',
|
||||
});
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
describe('artifact gates', () => {
|
||||
it('fails the final tree when wiki body references a stale semantic-layer measure', async () => {
|
||||
const wikiService = wikiServiceWithPages({
|
||||
'account-segments': {
|
||||
slRefs: ['mart_account_segments'],
|
||||
content: 'ARR is `mart_account_segments.total_contract_arr_cents`.',
|
||||
},
|
||||
});
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({
|
||||
sources: [
|
||||
{
|
||||
name: 'mart_account_segments',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
|
||||
table: 'analytics.mart_account_segments',
|
||||
},
|
||||
],
|
||||
loadErrors: [],
|
||||
}),
|
||||
};
|
||||
|
||||
await expect(
|
||||
validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: ['account-segments'],
|
||||
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }],
|
||||
wikiService: wikiService as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources: async () => ({ invalidSources: [], validSources: ['mart_account_segments'] }),
|
||||
tableExists: async () => true,
|
||||
}),
|
||||
).rejects.toThrow(/unknown semantic-layer entity mart_account_segments\.total_contract_arr_cents/);
|
||||
});
|
||||
|
||||
it('fails before provenance insertion when a raw path cannot be tied to the current snapshot or eviction set', () => {
|
||||
expect(() =>
|
||||
validateProvenanceRawPaths({
|
||||
rows: [{ rawPath: 'cards/missing.json' }],
|
||||
currentRawPaths: new Set(['cards/present.json']),
|
||||
deletedRawPaths: new Set(['cards/deleted.json']),
|
||||
}),
|
||||
).toThrow(/provenance row references raw path outside this snapshot: cards\/missing\.json/);
|
||||
});
|
||||
|
||||
it('fails measure-level wiki frontmatter sl_refs that point at missing entities', async () => {
|
||||
const wikiService = wikiServiceWithPages({
|
||||
'account-segments': {
|
||||
slRefs: ['mart_account_segments.total_contract_arr_cents'],
|
||||
content: 'ARR uses a renamed measure.',
|
||||
},
|
||||
});
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({
|
||||
sources: [
|
||||
{
|
||||
name: 'mart_account_segments',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'total_contract_arr', expr: 'sum(contract_arr)' }],
|
||||
table: 'analytics.mart_account_segments',
|
||||
},
|
||||
],
|
||||
loadErrors: [],
|
||||
}),
|
||||
};
|
||||
|
||||
await expect(
|
||||
validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: ['account-segments'],
|
||||
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'mart_account_segments' }],
|
||||
wikiService: wikiService as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources: async () => ({ invalidSources: [], validSources: ['warehouse:mart_account_segments'] }),
|
||||
tableExists: async () => true,
|
||||
}),
|
||||
).rejects.toThrow(/unknown sl_refs entity mart_account_segments\.total_contract_arr_cents/);
|
||||
});
|
||||
|
||||
it('validates direct declared-join neighbors of touched semantic-layer sources', async () => {
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({
|
||||
sources: [
|
||||
{
|
||||
name: 'orders',
|
||||
grain: ['order_id'],
|
||||
columns: [
|
||||
{ name: 'order_id', type: 'string' },
|
||||
{ name: 'account_id', type: 'string' },
|
||||
],
|
||||
joins: [{ to: 'accounts', on: 'orders.account_id = accounts.account_id', relationship: 'many_to_one' }],
|
||||
measures: [{ name: 'order_count', expr: 'count(*)' }],
|
||||
},
|
||||
{
|
||||
name: 'accounts',
|
||||
grain: ['account_id'],
|
||||
columns: [{ name: 'account_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [{ name: 'account_count', expr: 'count(*)' }],
|
||||
},
|
||||
{
|
||||
name: 'segments',
|
||||
grain: ['segment_id'],
|
||||
columns: [
|
||||
{ name: 'segment_id', type: 'string' },
|
||||
{ name: 'account_id', type: 'string' },
|
||||
],
|
||||
joins: [{ to: 'accounts', on: 'segments.account_id = accounts.account_id', relationship: 'many_to_one' }],
|
||||
measures: [],
|
||||
},
|
||||
],
|
||||
loadErrors: [],
|
||||
}),
|
||||
};
|
||||
const validateTouchedSources = vi.fn().mockResolvedValue({ invalidSources: [], validSources: [] });
|
||||
|
||||
await validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: [],
|
||||
touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'accounts' }],
|
||||
wikiService: { readPage: vi.fn() } as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources,
|
||||
tableExists: async () => true,
|
||||
});
|
||||
|
||||
expect(validateTouchedSources).toHaveBeenCalledWith([
|
||||
{ connectionId: 'warehouse', sourceName: 'accounts' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
{ connectionId: 'warehouse', sourceName: 'segments' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('fails final gates when a changed wiki page references a missing wiki page', async () => {
|
||||
const wikiService = wikiServiceWithPages({
|
||||
'account-segments': {
|
||||
refs: ['missing-frontmatter-page'],
|
||||
content: 'See [[missing-inline-page]] for the related process.',
|
||||
},
|
||||
});
|
||||
const semanticLayerService = {
|
||||
loadAllSources: vi.fn().mockResolvedValue({ sources: [], loadErrors: [] }),
|
||||
};
|
||||
|
||||
await expect(
|
||||
validateFinalIngestArtifacts({
|
||||
connectionIds: ['warehouse'],
|
||||
changedWikiPageKeys: ['account-segments'],
|
||||
touchedSlSources: [],
|
||||
wikiService: wikiService as never,
|
||||
semanticLayerService: semanticLayerService as never,
|
||||
validateTouchedSources: async () => ({ invalidSources: [], validSources: [] }),
|
||||
tableExists: async () => true,
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
/wiki references target missing page\(s\): account-segments -> missing-frontmatter-page, account-segments -> missing-inline-page/,
|
||||
);
|
||||
});
|
||||
});
|
||||
188
packages/context/src/ingest/artifact-gates.ts
Normal file
188
packages/context/src/ingest/artifact-gates.ts
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
import type { SemanticLayerService } from '../sl/index.js';
|
||||
import type { TouchedSlSource } from '../tools/index.js';
|
||||
import type { KnowledgeWikiService } from '../wiki/index.js';
|
||||
import { findMissingWikiRefs } from '../wiki/wiki-ref-validation.js';
|
||||
import { findInvalidWikiBodyRefs } from './wiki-body-refs.js';
|
||||
|
||||
export interface TouchedValidationResult {
|
||||
invalidSources: string[];
|
||||
validSources: string[];
|
||||
}
|
||||
|
||||
export interface FinalArtifactGateInput {
|
||||
connectionIds: string[];
|
||||
changedWikiPageKeys: string[];
|
||||
touchedSlSources: TouchedSlSource[];
|
||||
wikiService: KnowledgeWikiService;
|
||||
semanticLayerService: SemanticLayerService;
|
||||
validateTouchedSources(touched: TouchedSlSource[]): Promise<TouchedValidationResult>;
|
||||
tableExists(connectionId: string, tableRef: string): Promise<boolean>;
|
||||
}
|
||||
|
||||
export interface ProvenanceRawPathValidationInput {
|
||||
rows: Array<{ rawPath: string }>;
|
||||
currentRawPaths: Set<string>;
|
||||
deletedRawPaths: Set<string>;
|
||||
}
|
||||
|
||||
function parseSlRef(ref: string): { connectionId: string | null; sourceName: string; entityName: string | null } {
|
||||
const withoutConnection = ref.includes('/') ? ref.slice(ref.indexOf('/') + 1) : ref;
|
||||
const connectionId = ref.includes('/') ? ref.slice(0, ref.indexOf('/')) : null;
|
||||
const [sourceName = '', entityName = null] = withoutConnection.split('.', 2);
|
||||
return { connectionId, sourceName, entityName };
|
||||
}
|
||||
|
||||
function slEntityNames(source: Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources'][number]): Set<string> {
|
||||
return new Set([
|
||||
...(source.measures ?? []).map((measure) => measure.name),
|
||||
...(source.columns ?? []).map((column) => column.name),
|
||||
...(source.segments ?? []).map((segment) => segment.name),
|
||||
]);
|
||||
}
|
||||
|
||||
function uniqueTouchedSources(sources: TouchedSlSource[]): TouchedSlSource[] {
|
||||
const seen = new Set<string>();
|
||||
const unique: TouchedSlSource[] = [];
|
||||
for (const source of sources) {
|
||||
const key = `${source.connectionId}:${source.sourceName}`;
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(key);
|
||||
unique.push(source);
|
||||
}
|
||||
return unique.sort((left, right) => {
|
||||
const byConnection = left.connectionId.localeCompare(right.connectionId);
|
||||
return byConnection === 0 ? left.sourceName.localeCompare(right.sourceName) : byConnection;
|
||||
});
|
||||
}
|
||||
|
||||
async function expandTouchedSlSourcesWithDirectJoinNeighbors(input: FinalArtifactGateInput): Promise<TouchedSlSource[]> {
|
||||
const expanded = [...input.touchedSlSources];
|
||||
const touchedByConnection = new Map<string, Set<string>>();
|
||||
for (const source of input.touchedSlSources) {
|
||||
const bucket = touchedByConnection.get(source.connectionId) ?? new Set<string>();
|
||||
bucket.add(source.sourceName);
|
||||
touchedByConnection.set(source.connectionId, bucket);
|
||||
}
|
||||
|
||||
for (const connectionId of input.connectionIds) {
|
||||
const touched = touchedByConnection.get(connectionId);
|
||||
if (!touched || touched.size === 0) {
|
||||
continue;
|
||||
}
|
||||
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
|
||||
for (const source of sources) {
|
||||
const sourceIsTouched = touched.has(source.name);
|
||||
if (sourceIsTouched) {
|
||||
for (const join of source.joins ?? []) {
|
||||
expanded.push({ connectionId, sourceName: join.to });
|
||||
}
|
||||
}
|
||||
if ((source.joins ?? []).some((join) => touched.has(join.to))) {
|
||||
expanded.push({ connectionId, sourceName: source.name });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return uniqueTouchedSources(expanded);
|
||||
}
|
||||
|
||||
async function validateWikiSlRefs(input: FinalArtifactGateInput): Promise<string[]> {
|
||||
const errors: string[] = [];
|
||||
const sourcesByConnection = new Map<string, Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources']>();
|
||||
for (const connectionId of input.connectionIds) {
|
||||
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
|
||||
sourcesByConnection.set(connectionId, sources);
|
||||
}
|
||||
|
||||
for (const pageKey of input.changedWikiPageKeys) {
|
||||
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
|
||||
if (!page) {
|
||||
continue;
|
||||
}
|
||||
for (const ref of page.frontmatter.sl_refs ?? []) {
|
||||
const parsed = parseSlRef(ref);
|
||||
const candidateConnections = parsed.connectionId ? [parsed.connectionId] : input.connectionIds;
|
||||
let source: Awaited<ReturnType<SemanticLayerService['loadAllSources']>>['sources'][number] | undefined;
|
||||
for (const connectionId of candidateConnections) {
|
||||
source = sourcesByConnection.get(connectionId)?.find((candidate) => candidate.name === parsed.sourceName);
|
||||
if (source) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!source) {
|
||||
errors.push(`${pageKey}: unknown sl_refs entry ${ref}`);
|
||||
continue;
|
||||
}
|
||||
if (parsed.entityName && !slEntityNames(source).has(parsed.entityName)) {
|
||||
errors.push(`${pageKey}: unknown sl_refs entity ${ref}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
async function validateWikiRefs(input: FinalArtifactGateInput): Promise<string[]> {
|
||||
const dangling: string[] = [];
|
||||
for (const pageKey of input.changedWikiPageKeys) {
|
||||
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
|
||||
if (!page) {
|
||||
continue;
|
||||
}
|
||||
const missingRefs = await findMissingWikiRefs({
|
||||
wikiService: input.wikiService,
|
||||
scope: 'GLOBAL',
|
||||
scopeId: null,
|
||||
pageKey,
|
||||
refs: page.frontmatter.refs,
|
||||
content: page.content,
|
||||
});
|
||||
for (const missingRef of missingRefs) {
|
||||
dangling.push(`${pageKey} -> ${missingRef}`);
|
||||
}
|
||||
}
|
||||
return dangling;
|
||||
}
|
||||
|
||||
export async function validateFinalIngestArtifacts(input: FinalArtifactGateInput): Promise<void> {
|
||||
const touchedWithDependencies = await expandTouchedSlSourcesWithDirectJoinNeighbors(input);
|
||||
const validation = await input.validateTouchedSources(touchedWithDependencies);
|
||||
const errors: string[] = validation.invalidSources.map((source) => `semantic-layer validation failed for ${source}`);
|
||||
errors.push(...(await validateWikiSlRefs(input)));
|
||||
const danglingWikiRefs = await validateWikiRefs(input);
|
||||
if (danglingWikiRefs.length > 0) {
|
||||
errors.push(`wiki references target missing page(s): ${danglingWikiRefs.join(', ')}`);
|
||||
}
|
||||
|
||||
for (const pageKey of input.changedWikiPageKeys) {
|
||||
const page = await input.wikiService.readPage('GLOBAL', null, pageKey);
|
||||
if (!page) {
|
||||
continue;
|
||||
}
|
||||
errors.push(
|
||||
...(await findInvalidWikiBodyRefs({
|
||||
pageKey,
|
||||
body: page.content,
|
||||
visibleConnectionIds: input.connectionIds,
|
||||
loadSources: async (connectionId) => {
|
||||
const { sources } = await input.semanticLayerService.loadAllSources(connectionId);
|
||||
return sources;
|
||||
},
|
||||
tableExists: input.tableExists,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
if (errors.length > 0) {
|
||||
throw new Error(`final artifact gates failed:\n${errors.join('\n')}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function validateProvenanceRawPaths(input: ProvenanceRawPathValidationInput): void {
|
||||
for (const row of input.rows) {
|
||||
if (!input.currentRawPaths.has(row.rawPath) && !input.deletedRawPaths.has(row.rawPath)) {
|
||||
throw new Error(`provenance row references raw path outside this snapshot: ${row.rawPath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
136
packages/context/src/ingest/final-gate-repair.test.ts
Normal file
136
packages/context/src/ingest/final-gate-repair.test.ts
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
import { mkdir, mkdtemp, readFile, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { finalGateRepairPaths, repairFinalGateFailure } from './final-gate-repair.js';
|
||||
import { FileIngestTraceWriter } from './ingest-trace.js';
|
||||
|
||||
async function makeHarness() {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-final-gate-repair-'));
|
||||
const workdir = join(root, 'workdir');
|
||||
await mkdir(join(workdir, 'wiki/global'), { recursive: true });
|
||||
await mkdir(join(workdir, 'semantic-layer/warehouse'), { recursive: true });
|
||||
await writeFile(
|
||||
join(workdir, 'wiki/global/account-segments.md'),
|
||||
'---\nsummary: Account segments\nusage_mode: auto\n---\n\nARR uses `mart_account_segments.total_contract_arr_cents`.\n',
|
||||
'utf-8',
|
||||
);
|
||||
await writeFile(
|
||||
join(workdir, 'semantic-layer/warehouse/mart_account_segments.yaml'),
|
||||
'name: mart_account_segments\ncolumns: [{name: account_id, type: string}]\njoins: []\nmeasures:\n - name: total_contract_arr\n expr: sum(contract_arr)\n',
|
||||
'utf-8',
|
||||
);
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath: join(root, 'trace.jsonl'),
|
||||
jobId: 'job-1',
|
||||
connectionId: 'warehouse',
|
||||
sourceKey: 'metabase',
|
||||
runId: 'run-1',
|
||||
syncId: 'sync-1',
|
||||
level: 'trace',
|
||||
});
|
||||
return { root, workdir, trace };
|
||||
}
|
||||
|
||||
describe('finalGateRepairPaths', () => {
|
||||
it('derives sorted wiki and semantic-layer file paths', () => {
|
||||
expect(
|
||||
finalGateRepairPaths({
|
||||
changedWikiPageKeys: ['account-segments', 'overview', 'account-segments'],
|
||||
touchedSlSources: [
|
||||
{ connectionId: 'warehouse', sourceName: 'mart_account_segments' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
{ connectionId: 'warehouse', sourceName: 'orders' },
|
||||
],
|
||||
}),
|
||||
).toEqual([
|
||||
'semantic-layer/warehouse/mart_account_segments.yaml',
|
||||
'semantic-layer/warehouse/orders.yaml',
|
||||
'wiki/global/account-segments.md',
|
||||
'wiki/global/overview.md',
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('repairFinalGateFailure', () => {
|
||||
it('lets the repair agent read gate errors and edit only allowed files', async () => {
|
||||
const { workdir, trace } = await makeHarness();
|
||||
const agentRunner = {
|
||||
runLoop: vi.fn(async (params: any) => {
|
||||
const error = await params.toolSet.read_gate_error.execute({});
|
||||
expect(error.markdown).toContain('total_contract_arr_cents');
|
||||
|
||||
const page = await params.toolSet.read_repair_file.execute({
|
||||
path: 'wiki/global/account-segments.md',
|
||||
});
|
||||
expect(page.markdown).toContain('total_contract_arr_cents');
|
||||
|
||||
await expect(
|
||||
params.toolSet.write_repair_file.execute({
|
||||
path: 'wiki/global/other.md',
|
||||
content: 'not allowed',
|
||||
}),
|
||||
).rejects.toThrow(/gate repair path not allowed/);
|
||||
|
||||
await params.toolSet.write_repair_file.execute({
|
||||
path: 'wiki/global/account-segments.md',
|
||||
content: page.markdown.replace('total_contract_arr_cents', 'total_contract_arr'),
|
||||
});
|
||||
return { stopReason: 'natural' as const };
|
||||
}),
|
||||
};
|
||||
|
||||
const result = await repairFinalGateFailure({
|
||||
agentRunner,
|
||||
workdir,
|
||||
gateError:
|
||||
'final artifact gates failed:\naccount-segments: unknown semantic-layer entity mart_account_segments.total_contract_arr_cents',
|
||||
allowedPaths: ['wiki/global/account-segments.md'],
|
||||
trace,
|
||||
repairKind: 'final_artifact_gate',
|
||||
maxAttempts: 1,
|
||||
stepBudget: 8,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
status: 'repaired',
|
||||
attempts: 1,
|
||||
changedPaths: ['wiki/global/account-segments.md'],
|
||||
});
|
||||
await expect(readFile(join(workdir, 'wiki/global/account-segments.md'), 'utf-8')).resolves.toContain(
|
||||
'total_contract_arr',
|
||||
);
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_repaired');
|
||||
expect(agentRunner.runLoop).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
modelRole: 'repair',
|
||||
stepBudget: 8,
|
||||
telemetryTags: expect.objectContaining({
|
||||
operationName: 'ingest-isolated-diff-gate-repair',
|
||||
repairKind: 'final_artifact_gate',
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('returns failed when the repair agent edits no allowed file', async () => {
|
||||
const { workdir, trace } = await makeHarness();
|
||||
const result = await repairFinalGateFailure({
|
||||
agentRunner: { runLoop: vi.fn(async () => ({ stopReason: 'natural' as const })) },
|
||||
workdir,
|
||||
gateError: 'final artifact gates failed:\naccount-segments: unknown semantic-layer entity',
|
||||
allowedPaths: ['wiki/global/account-segments.md'],
|
||||
trace,
|
||||
repairKind: 'final_artifact_gate',
|
||||
maxAttempts: 1,
|
||||
stepBudget: 8,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
status: 'failed',
|
||||
attempts: 1,
|
||||
reason: 'gate repair completed without editing an allowed path',
|
||||
});
|
||||
await expect(readFile(trace.tracePath, 'utf-8')).resolves.toContain('gate_repair_failed');
|
||||
});
|
||||
});
|
||||
230
packages/context/src/ingest/final-gate-repair.ts
Normal file
230
packages/context/src/ingest/final-gate-repair.ts
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { z } from 'zod';
|
||||
import type { AgentRunnerPort, KtxRuntimeToolSet } from '../llm/index.js';
|
||||
import type { TouchedSlSource } from '../tools/index.js';
|
||||
import type { IngestTraceWriter } from './ingest-trace.js';
|
||||
import { traceTimed } from './ingest-trace.js';
|
||||
|
||||
type FinalGateRepairKind = 'patch_semantic_gate' | 'final_artifact_gate';
|
||||
|
||||
export type FinalGateRepairResult =
|
||||
| { status: 'repaired'; attempts: number; changedPaths: string[] }
|
||||
| { status: 'failed'; attempts: number; reason: string };
|
||||
|
||||
export interface RepairFinalGateFailureInput {
|
||||
agentRunner: AgentRunnerPort;
|
||||
workdir: string;
|
||||
gateError: string;
|
||||
allowedPaths: string[];
|
||||
trace: IngestTraceWriter;
|
||||
repairKind: FinalGateRepairKind;
|
||||
maxAttempts?: number;
|
||||
stepBudget?: number;
|
||||
}
|
||||
|
||||
const readRepairFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
});
|
||||
|
||||
const writeRepairFileSchema = z.object({
|
||||
path: z.string().min(1),
|
||||
content: z.string(),
|
||||
});
|
||||
|
||||
function normalizeRepoPath(path: string): string {
|
||||
const normalized = path.replace(/\\/g, '/').replace(/^\/+/, '');
|
||||
const parts = normalized.split('/').filter((part) => part.length > 0);
|
||||
if (parts.length === 0 || parts.some((part) => part === '.' || part === '..')) {
|
||||
throw new Error(`gate repair path must be a repository-relative path: ${path}`);
|
||||
}
|
||||
return parts.join('/');
|
||||
}
|
||||
|
||||
function assertAllowedPath(path: string, allowedPaths: ReadonlySet<string>): string {
|
||||
const normalized = normalizeRepoPath(path);
|
||||
if (!allowedPaths.has(normalized)) {
|
||||
throw new Error(`gate repair path not allowed: ${normalized}`);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
async function readOptionalFile(path: string): Promise<{ exists: boolean; content: string }> {
|
||||
try {
|
||||
return { exists: true, content: await readFile(path, 'utf-8') };
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return { exists: false, content: '' };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
function buildGateRepairSystemPrompt(): string {
|
||||
return `<role>
|
||||
You repair one KTX isolated-diff artifact gate failure inside the integration worktree.
|
||||
</role>
|
||||
|
||||
<rules>
|
||||
- Use read_gate_error first.
|
||||
- Read only files exposed by read_repair_file.
|
||||
- Edit only paths exposed by write_repair_file.
|
||||
- Prefer the smallest text edit that makes the gate pass.
|
||||
- Preserve accepted work-unit, reconciliation, and deterministic projection content.
|
||||
- Do not invent warehouse facts, business definitions, or semantic-layer entities.
|
||||
- If the gate error requires choosing between conflicting facts without evidence, stop without editing.
|
||||
</rules>`;
|
||||
}
|
||||
|
||||
function buildGateRepairUserPrompt(input: {
|
||||
gateError: string;
|
||||
allowedPaths: string[];
|
||||
repairKind: FinalGateRepairKind;
|
||||
attempt: number;
|
||||
maxAttempts: number;
|
||||
}): string {
|
||||
return `Repair isolated-diff artifact gates.
|
||||
|
||||
Repair kind: ${input.repairKind}
|
||||
Attempt: ${input.attempt} of ${input.maxAttempts}
|
||||
|
||||
Allowed files:
|
||||
${input.allowedPaths.map((path) => `- ${path}`).join('\n')}
|
||||
|
||||
Gate error:
|
||||
${input.gateError}
|
||||
|
||||
Use read_gate_error first. Then inspect only the allowed files, write the
|
||||
minimal repaired content, and stop.`;
|
||||
}
|
||||
|
||||
function buildToolSet(input: {
|
||||
workdir: string;
|
||||
gateError: string;
|
||||
allowedPaths: ReadonlySet<string>;
|
||||
editedPaths: Set<string>;
|
||||
}): KtxRuntimeToolSet {
|
||||
return {
|
||||
read_gate_error: {
|
||||
name: 'read_gate_error',
|
||||
description: 'Read the artifact gate failure that must be repaired.',
|
||||
inputSchema: z.object({}),
|
||||
execute: async () => ({
|
||||
markdown: input.gateError,
|
||||
structured: { gateError: input.gateError },
|
||||
}),
|
||||
},
|
||||
read_repair_file: {
|
||||
name: 'read_repair_file',
|
||||
description: 'Read one allowed file from the integration worktree.',
|
||||
inputSchema: readRepairFileSchema,
|
||||
execute: async ({ path }: z.infer<typeof readRepairFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
const file = await readOptionalFile(join(input.workdir, normalized));
|
||||
return {
|
||||
markdown: file.exists ? file.content : `(missing file: ${normalized})`,
|
||||
structured: { path: normalized, exists: file.exists },
|
||||
};
|
||||
},
|
||||
},
|
||||
write_repair_file: {
|
||||
name: 'write_repair_file',
|
||||
description: 'Replace one allowed integration worktree file with repaired text content.',
|
||||
inputSchema: writeRepairFileSchema,
|
||||
execute: async ({ path, content }: z.infer<typeof writeRepairFileSchema>) => {
|
||||
const normalized = assertAllowedPath(path, input.allowedPaths);
|
||||
const fullPath = join(input.workdir, normalized);
|
||||
await mkdir(dirname(fullPath), { recursive: true });
|
||||
await writeFile(fullPath, content, 'utf-8');
|
||||
input.editedPaths.add(normalized);
|
||||
return {
|
||||
markdown: `Wrote ${normalized}`,
|
||||
structured: { path: normalized, bytes: Buffer.byteLength(content) },
|
||||
};
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function finalGateRepairPaths(input: {
|
||||
changedWikiPageKeys: string[];
|
||||
touchedSlSources: TouchedSlSource[];
|
||||
}): string[] {
|
||||
return [
|
||||
...new Set([
|
||||
...input.touchedSlSources.map((source) => `semantic-layer/${source.connectionId}/${source.sourceName}.yaml`),
|
||||
...input.changedWikiPageKeys.map((pageKey) => `wiki/global/${pageKey}.md`),
|
||||
]),
|
||||
].sort();
|
||||
}
|
||||
|
||||
export async function repairFinalGateFailure(
|
||||
input: RepairFinalGateFailureInput,
|
||||
): Promise<FinalGateRepairResult> {
|
||||
const allowedPaths = new Set(input.allowedPaths.map(normalizeRepoPath));
|
||||
const maxAttempts = input.maxAttempts ?? 1;
|
||||
const stepBudget = input.stepBudget ?? 16;
|
||||
let lastFailure = 'gate repair did not run';
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
||||
const editedPaths = new Set<string>();
|
||||
const sortedAllowedPaths = [...allowedPaths].sort();
|
||||
const traceData = {
|
||||
repairKind: input.repairKind,
|
||||
attempt,
|
||||
maxAttempts,
|
||||
allowedPaths: sortedAllowedPaths,
|
||||
gateError: input.gateError,
|
||||
};
|
||||
const result = await traceTimed(input.trace, 'gate_repair', 'gate_repair', traceData, async () =>
|
||||
input.agentRunner.runLoop({
|
||||
modelRole: 'repair',
|
||||
systemPrompt: buildGateRepairSystemPrompt(),
|
||||
userPrompt: buildGateRepairUserPrompt({
|
||||
gateError: input.gateError,
|
||||
allowedPaths: sortedAllowedPaths,
|
||||
repairKind: input.repairKind,
|
||||
attempt,
|
||||
maxAttempts,
|
||||
}),
|
||||
toolSet: buildToolSet({
|
||||
workdir: input.workdir,
|
||||
gateError: input.gateError,
|
||||
allowedPaths,
|
||||
editedPaths,
|
||||
}),
|
||||
stepBudget,
|
||||
telemetryTags: {
|
||||
operationName: 'ingest-isolated-diff-gate-repair',
|
||||
source: input.trace.context.sourceKey,
|
||||
jobId: input.trace.context.jobId,
|
||||
repairKind: input.repairKind,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
if (result.stopReason === 'error') {
|
||||
lastFailure = result.error?.message ?? 'gate repair agent loop errored';
|
||||
await input.trace.event('error', 'gate_repair', 'gate_repair_failed', traceData, result.error);
|
||||
continue;
|
||||
}
|
||||
|
||||
const changedPaths = [...editedPaths].sort();
|
||||
if (changedPaths.length === 0) {
|
||||
lastFailure = 'gate repair completed without editing an allowed path';
|
||||
await input.trace.event('error', 'gate_repair', 'gate_repair_failed', {
|
||||
...traceData,
|
||||
reason: lastFailure,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
await input.trace.event('debug', 'gate_repair', 'gate_repair_repaired', {
|
||||
...traceData,
|
||||
changedPaths,
|
||||
});
|
||||
return { status: 'repaired', attempts: attempt, changedPaths };
|
||||
}
|
||||
|
||||
return { status: 'failed', attempts: maxAttempts, reason: lastFailure };
|
||||
}
|
||||
131
packages/context/src/ingest/finalization-scope.test.ts
Normal file
131
packages/context/src/ingest/finalization-scope.test.ts
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
compareFinalizationDeclarations,
|
||||
deriveFinalizationTouchedSources,
|
||||
deriveFinalizationWikiPageKeys,
|
||||
} from './finalization-scope.js';
|
||||
|
||||
describe('deriveFinalizationWikiPageKeys', () => {
|
||||
it('maps changed global wiki markdown paths to page keys', () => {
|
||||
expect(
|
||||
deriveFinalizationWikiPageKeys([
|
||||
'wiki/global/historic-sql-orders.md',
|
||||
'wiki/global/nested/page.md',
|
||||
'README.md',
|
||||
]),
|
||||
).toEqual(['historic-sql-orders']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('deriveFinalizationTouchedSources', () => {
|
||||
it('maps standalone semantic-layer files directly', async () => {
|
||||
const result = await deriveFinalizationTouchedSources({
|
||||
changedPaths: ['semantic-layer/warehouse/orders.yaml'],
|
||||
beforeSourcesByConnection: new Map(),
|
||||
afterSourcesByConnection: new Map(),
|
||||
});
|
||||
expect(result).toEqual({
|
||||
touchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
|
||||
unresolvedPaths: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('resolves aggregate _schema changes by comparing loaded source snapshots', async () => {
|
||||
const beforeSourcesByConnection = new Map([
|
||||
[
|
||||
'warehouse',
|
||||
[
|
||||
{
|
||||
name: 'orders',
|
||||
grain: ['order_id'],
|
||||
columns: [{ name: 'order_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
usage: {
|
||||
narrative: 'old',
|
||||
frequencyTier: 'low' as const,
|
||||
commonFilters: [],
|
||||
commonJoins: [],
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
]);
|
||||
const afterSourcesByConnection = new Map([
|
||||
[
|
||||
'warehouse',
|
||||
[
|
||||
{
|
||||
name: 'orders',
|
||||
grain: ['order_id'],
|
||||
columns: [{ name: 'order_id', type: 'string' }],
|
||||
joins: [],
|
||||
measures: [],
|
||||
usage: {
|
||||
narrative: 'new',
|
||||
frequencyTier: 'high' as const,
|
||||
commonFilters: [],
|
||||
commonJoins: [],
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
]);
|
||||
|
||||
const result = await deriveFinalizationTouchedSources({
|
||||
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
|
||||
beforeSourcesByConnection,
|
||||
afterSourcesByConnection,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
touchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
|
||||
unresolvedPaths: [],
|
||||
});
|
||||
});
|
||||
|
||||
it('flags aggregate _schema changes that cannot be resolved to logical sources', async () => {
|
||||
const beforeSourcesByConnection = new Map([['warehouse', []]]);
|
||||
const afterSourcesByConnection = new Map([['warehouse', []]]);
|
||||
|
||||
const result = await deriveFinalizationTouchedSources({
|
||||
changedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
|
||||
beforeSourcesByConnection,
|
||||
afterSourcesByConnection,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
touchedSources: [],
|
||||
unresolvedPaths: ['semantic-layer/warehouse/_schema/public.yaml'],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('compareFinalizationDeclarations', () => {
|
||||
it('reports missing and extra adapter declarations', () => {
|
||||
expect(
|
||||
compareFinalizationDeclarations({
|
||||
declaredTouchedSources: [{ connectionId: 'warehouse', sourceName: 'orders' }],
|
||||
derivedTouchedSources: [{ connectionId: 'warehouse', sourceName: 'customers' }],
|
||||
declaredChangedWikiPageKeys: ['orders'],
|
||||
derivedChangedWikiPageKeys: ['orders', 'patterns'],
|
||||
}),
|
||||
).toEqual([
|
||||
{
|
||||
artifactKind: 'sl',
|
||||
key: 'warehouse:customers',
|
||||
direction: 'missing_from_adapter_declaration',
|
||||
},
|
||||
{
|
||||
artifactKind: 'sl',
|
||||
key: 'warehouse:orders',
|
||||
direction: 'extra_in_adapter_declaration',
|
||||
},
|
||||
{
|
||||
artifactKind: 'wiki',
|
||||
key: 'patterns',
|
||||
direction: 'missing_from_adapter_declaration',
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
145
packages/context/src/ingest/finalization-scope.ts
Normal file
145
packages/context/src/ingest/finalization-scope.ts
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
import type { SemanticLayerSource } from '../sl/index.js';
|
||||
import type { TouchedSlSource } from '../tools/index.js';
|
||||
import type { IngestReportFinalizationMismatch } from './reports.js';
|
||||
|
||||
interface DeriveTouchedSourcesInput {
|
||||
changedPaths: string[];
|
||||
beforeSourcesByConnection: Map<string, SemanticLayerSource[]>;
|
||||
afterSourcesByConnection: Map<string, SemanticLayerSource[]>;
|
||||
}
|
||||
|
||||
interface DeriveTouchedSourcesResult {
|
||||
touchedSources: TouchedSlSource[];
|
||||
unresolvedPaths: string[];
|
||||
}
|
||||
|
||||
interface CompareFinalizationDeclarationsInput {
|
||||
declaredTouchedSources: TouchedSlSource[];
|
||||
derivedTouchedSources: TouchedSlSource[];
|
||||
declaredChangedWikiPageKeys: string[];
|
||||
derivedChangedWikiPageKeys: string[];
|
||||
}
|
||||
|
||||
function uniqueSorted(values: string[]): string[] {
|
||||
return [...new Set(values.filter((value) => value.length > 0))].sort();
|
||||
}
|
||||
|
||||
function touchedKey(source: TouchedSlSource): string {
|
||||
return `${source.connectionId}:${source.sourceName}`;
|
||||
}
|
||||
|
||||
function stableJson(value: unknown): string {
|
||||
if (Array.isArray(value)) {
|
||||
return `[${value.map((entry) => stableJson(entry)).join(',')}]`;
|
||||
}
|
||||
if (value && typeof value === 'object') {
|
||||
const record = value as Record<string, unknown>;
|
||||
return `{${Object.keys(record)
|
||||
.sort()
|
||||
.map((key) => `${JSON.stringify(key)}:${stableJson(record[key])}`)
|
||||
.join(',')}}`;
|
||||
}
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
|
||||
function changedSourceNames(
|
||||
beforeSources: SemanticLayerSource[],
|
||||
afterSources: SemanticLayerSource[],
|
||||
): string[] {
|
||||
const before = new Map(beforeSources.map((source) => [source.name, stableJson(source)]));
|
||||
const after = new Map(afterSources.map((source) => [source.name, stableJson(source)]));
|
||||
return uniqueSorted(
|
||||
uniqueSorted([...before.keys(), ...after.keys()]).filter(
|
||||
(sourceName) => before.get(sourceName) !== after.get(sourceName),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
export function deriveFinalizationWikiPageKeys(paths: string[]): string[] {
|
||||
return uniqueSorted(
|
||||
paths
|
||||
.filter((path) => path.startsWith('wiki/global/') && path.endsWith('.md'))
|
||||
.filter((path) => !path.slice('wiki/global/'.length, -'.md'.length).includes('/'))
|
||||
.map((path) => path.slice('wiki/global/'.length, -'.md'.length)),
|
||||
);
|
||||
}
|
||||
|
||||
export async function deriveFinalizationTouchedSources(
|
||||
input: DeriveTouchedSourcesInput,
|
||||
): Promise<DeriveTouchedSourcesResult> {
|
||||
const touched = new Map<string, TouchedSlSource>();
|
||||
const unresolvedPaths: string[] = [];
|
||||
|
||||
for (const path of input.changedPaths) {
|
||||
if (!path.startsWith('semantic-layer/') || !(path.endsWith('.yaml') || path.endsWith('.yml'))) {
|
||||
continue;
|
||||
}
|
||||
const parts = path.split('/');
|
||||
const connectionId = parts[1] ?? '';
|
||||
if (!connectionId) {
|
||||
unresolvedPaths.push(path);
|
||||
continue;
|
||||
}
|
||||
if (parts[2] !== '_schema') {
|
||||
const fileName = parts.at(-1) ?? '';
|
||||
const sourceName = fileName.replace(/\.ya?ml$/, '');
|
||||
if (!sourceName) {
|
||||
unresolvedPaths.push(path);
|
||||
continue;
|
||||
}
|
||||
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
|
||||
continue;
|
||||
}
|
||||
|
||||
const changedNames = changedSourceNames(
|
||||
input.beforeSourcesByConnection.get(connectionId) ?? [],
|
||||
input.afterSourcesByConnection.get(connectionId) ?? [],
|
||||
);
|
||||
if (changedNames.length === 0) {
|
||||
unresolvedPaths.push(path);
|
||||
continue;
|
||||
}
|
||||
for (const sourceName of changedNames) {
|
||||
touched.set(`${connectionId}:${sourceName}`, { connectionId, sourceName });
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
touchedSources: [...touched.values()].sort((left, right) =>
|
||||
touchedKey(left).localeCompare(touchedKey(right)),
|
||||
),
|
||||
unresolvedPaths: uniqueSorted(unresolvedPaths),
|
||||
};
|
||||
}
|
||||
|
||||
export function compareFinalizationDeclarations(
|
||||
input: CompareFinalizationDeclarationsInput,
|
||||
): IngestReportFinalizationMismatch[] {
|
||||
const mismatches: IngestReportFinalizationMismatch[] = [];
|
||||
const declaredSl = new Set(input.declaredTouchedSources.map(touchedKey));
|
||||
const derivedSl = new Set(input.derivedTouchedSources.map(touchedKey));
|
||||
const declaredWiki = new Set(input.declaredChangedWikiPageKeys);
|
||||
const derivedWiki = new Set(input.derivedChangedWikiPageKeys);
|
||||
|
||||
for (const key of [...derivedSl].sort()) {
|
||||
if (!declaredSl.has(key)) {
|
||||
mismatches.push({ artifactKind: 'sl', key, direction: 'missing_from_adapter_declaration' });
|
||||
}
|
||||
}
|
||||
for (const key of [...declaredSl].sort()) {
|
||||
if (!derivedSl.has(key)) {
|
||||
mismatches.push({ artifactKind: 'sl', key, direction: 'extra_in_adapter_declaration' });
|
||||
}
|
||||
}
|
||||
for (const key of [...derivedWiki].sort()) {
|
||||
if (!declaredWiki.has(key)) {
|
||||
mismatches.push({ artifactKind: 'wiki', key, direction: 'missing_from_adapter_declaration' });
|
||||
}
|
||||
}
|
||||
for (const key of [...declaredWiki].sort()) {
|
||||
if (!derivedWiki.has(key)) {
|
||||
mismatches.push({ artifactKind: 'wiki', key, direction: 'extra_in_adapter_declaration' });
|
||||
}
|
||||
}
|
||||
return mismatches;
|
||||
}
|
||||
|
|
@ -17,6 +17,11 @@ export {
|
|||
buildLiveDatabaseTableNaturalKey,
|
||||
ktxSchemaSnapshotToExtractedSchema,
|
||||
} from './adapters/live-database/extracted-schema.js';
|
||||
export {
|
||||
assertSemanticLayerTargetPathsAllowed,
|
||||
findDisallowedSemanticLayerTargetPaths,
|
||||
semanticLayerConnectionIdFromPath,
|
||||
} from './semantic-layer-target-policy.js';
|
||||
export { LiveDatabaseSourceAdapter } from './adapters/live-database/live-database.adapter.js';
|
||||
export type {
|
||||
BuildLiveDatabaseManifestShardsInput,
|
||||
|
|
@ -344,7 +349,6 @@ export type {
|
|||
HistoricSqlTableUsageEvidence,
|
||||
} from './adapters/historic-sql/evidence.js';
|
||||
export { createEmitHistoricSqlEvidenceTool } from './adapters/historic-sql/evidence-tool.js';
|
||||
export { HistoricSqlProjectionPostProcessor } from './adapters/historic-sql/post-processor.js';
|
||||
export { projectHistoricSqlEvidence } from './adapters/historic-sql/projection.js';
|
||||
export type { HistoricSqlProjectionInput, HistoricSqlProjectionResult } from './adapters/historic-sql/projection.js';
|
||||
export {
|
||||
|
|
@ -609,6 +613,11 @@ export {
|
|||
} from './raw-sources-paths.js';
|
||||
export { ingestReportSnapshotSchema, parseIngestReportSnapshot } from './report-snapshot.js';
|
||||
export type { IngestReportBody, IngestReportSnapshot } from './reports.js';
|
||||
export * from './artifact-gates.js';
|
||||
export * from './ingest-trace.js';
|
||||
export * from './isolated-diff/git-patch.js';
|
||||
export * from './isolated-diff/patch-integrator.js';
|
||||
export * from './isolated-diff/work-unit-executor.js';
|
||||
export * from './reports.js';
|
||||
export { SourceAdapterRegistry } from './source-adapter-registry.js';
|
||||
export type { SqliteBundleIngestStoreOptions } from './sqlite-bundle-ingest-store.js';
|
||||
|
|
@ -652,4 +661,10 @@ export type {
|
|||
TriageSignals,
|
||||
UnresolvedCardInfo,
|
||||
WorkUnit,
|
||||
DeterministicProjectionContext,
|
||||
ProjectionResult,
|
||||
DeterministicFinalizationContext,
|
||||
FinalizationOverrideReplay,
|
||||
FinalizationResult,
|
||||
} from './types.js';
|
||||
export * from './wiki-body-refs.js';
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,8 +1,7 @@
|
|||
import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises';
|
||||
import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { GitService } from '../core/index.js';
|
||||
import { addTouchedSlSource } from '../tools/index.js';
|
||||
import { IngestBundleRunner } from './ingest-bundle.runner.js';
|
||||
import { createMemoryFlowLiveBuffer } from './memory-flow/live-buffer.js';
|
||||
|
|
@ -96,6 +95,7 @@ const makeDeps = () => {
|
|||
triageSupported: undefined as undefined | boolean,
|
||||
detect: vi.fn().mockResolvedValue(true),
|
||||
listTargetConnectionIds: undefined as undefined | ((stagedDir: string) => Promise<string[]>),
|
||||
finalize: undefined as any,
|
||||
chunk: vi.fn().mockResolvedValue({
|
||||
workUnits: [{ unitKey: 'u1', rawFiles: ['a.yml'], peerFileIndex: [], dependencyPaths: [] }],
|
||||
}),
|
||||
|
|
@ -123,9 +123,16 @@ const makeDeps = () => {
|
|||
};
|
||||
const scopedGit = {
|
||||
revParseHead: vi.fn().mockResolvedValue('h'),
|
||||
commitFiles: vi.fn(),
|
||||
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
|
||||
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
|
||||
resetHardTo: vi.fn(),
|
||||
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
|
||||
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
|
||||
await writeFile(patchPath, '', 'utf-8');
|
||||
}),
|
||||
applyPatchFile3WayIndex: vi.fn(),
|
||||
diffNameStatus: vi.fn().mockResolvedValue([]),
|
||||
changedPaths: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
const sessionWorktreeService = {
|
||||
create: vi.fn().mockResolvedValue({
|
||||
|
|
@ -167,10 +174,12 @@ const makeDeps = () => {
|
|||
loadPrompt: vi.fn().mockResolvedValue('base-framing'),
|
||||
};
|
||||
const wikiService = {
|
||||
forWorktree: vi.fn().mockReturnValue({}),
|
||||
forWorktree: vi.fn(),
|
||||
listPageKeys: vi.fn().mockResolvedValue([]),
|
||||
readPage: vi.fn().mockResolvedValue(null),
|
||||
syncFromCommit: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
wikiService.forWorktree.mockReturnValue(wikiService);
|
||||
const knowledgeSlRefs = {
|
||||
syncFromWiki: vi.fn().mockResolvedValue({ inserted: 1, deleted: 0 }),
|
||||
};
|
||||
|
|
@ -178,7 +187,7 @@ const makeDeps = () => {
|
|||
listPagesForUser: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
const semanticLayerService = {
|
||||
forWorktree: vi.fn().mockReturnValue({}),
|
||||
forWorktree: vi.fn(),
|
||||
listFilesForConnection: vi
|
||||
.fn()
|
||||
.mockImplementation((connectionId: string) =>
|
||||
|
|
@ -193,6 +202,7 @@ const makeDeps = () => {
|
|||
}),
|
||||
),
|
||||
};
|
||||
semanticLayerService.forWorktree.mockReturnValue(semanticLayerService);
|
||||
const slSearchService = {
|
||||
indexSources: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
|
@ -255,8 +265,12 @@ const buildRunner = (deps: ReturnType<typeof makeDeps> = makeDeps(), overrides:
|
|||
resolveUploadDir: (uploadId) => `/tmp/ktx-test/ingest-uploads/${uploadId}`,
|
||||
resolvePullDir: (jobId) => `/tmp/ktx-test/ingest-pulls/${jobId}`,
|
||||
resolveTranscriptDir: (jobId) => `/tmp/ktx-test/run/wu-transcripts/${jobId}`,
|
||||
resolveTracePath: (jobId) => `/tmp/ktx-test/ingest-traces/${jobId}/trace.jsonl`,
|
||||
},
|
||||
settings: {
|
||||
probeRowCount: 1,
|
||||
memoryIngestionModel: 'test-model',
|
||||
},
|
||||
settings: { probeRowCount: 1, memoryIngestionModel: 'test-model' },
|
||||
skillsRegistry: deps.skillsRegistry as any,
|
||||
promptService: deps.promptService as any,
|
||||
wikiService: deps.wikiService as any,
|
||||
|
|
@ -412,6 +426,127 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('fails before squash when reconciliation leaves a touched wiki page with dangling refs', async () => {
|
||||
const deps = makeDeps();
|
||||
let currentToolSession: any = null;
|
||||
const scopedWiki = {
|
||||
listPageKeys: vi.fn().mockResolvedValue(['page-a']),
|
||||
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, key: string) => {
|
||||
if (key === 'page-a') {
|
||||
return Promise.resolve({
|
||||
pageKey: 'page-a',
|
||||
frontmatter: { summary: 'Page A', usage_mode: 'auto', refs: ['missing-page'] },
|
||||
content: 'See [[missing-page]].',
|
||||
});
|
||||
}
|
||||
return Promise.resolve(null);
|
||||
}),
|
||||
};
|
||||
deps.wikiService.forWorktree.mockReturnValue(scopedWiki);
|
||||
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
|
||||
currentToolSession = toolSession;
|
||||
return {
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
});
|
||||
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
|
||||
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
|
||||
currentToolSession.actions.push({ target: 'sl', type: 'updated', key: 'orders', detail: 'Orders source' });
|
||||
}
|
||||
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
|
||||
currentToolSession.actions.push({ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' });
|
||||
}
|
||||
return { stopReason: 'natural' };
|
||||
});
|
||||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['a.yml', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/c1/fake/s',
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
||||
await expect(
|
||||
runner.run({
|
||||
jobId: 'j1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
|
||||
}),
|
||||
).rejects.toThrow(/wiki references target missing page\(s\): page-a -> missing-page/);
|
||||
|
||||
expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1');
|
||||
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('allows reconciliation to save circular wiki refs once both pages exist', async () => {
|
||||
const deps = makeDeps();
|
||||
let currentToolSession: any = null;
|
||||
const scopedWiki = {
|
||||
listPageKeys: vi.fn().mockResolvedValue(['page-a', 'page-b']),
|
||||
readPage: vi.fn().mockImplementation((_scope: string, _scopeId: string | null, key: string) => {
|
||||
if (key === 'page-a') {
|
||||
return Promise.resolve({
|
||||
pageKey: 'page-a',
|
||||
frontmatter: { summary: 'Page A', usage_mode: 'auto', refs: ['page-b'] },
|
||||
content: 'See [[page-b]].',
|
||||
});
|
||||
}
|
||||
if (key === 'page-b') {
|
||||
return Promise.resolve({
|
||||
pageKey: 'page-b',
|
||||
frontmatter: { summary: 'Page B', usage_mode: 'auto', refs: ['page-a'] },
|
||||
content: 'See [[page-a]].',
|
||||
});
|
||||
}
|
||||
return Promise.resolve(null);
|
||||
}),
|
||||
};
|
||||
deps.wikiService.forWorktree.mockReturnValue(scopedWiki);
|
||||
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
|
||||
currentToolSession = toolSession;
|
||||
return {
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
});
|
||||
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
|
||||
if (params.telemetryTags.operationName === 'ingest-bundle-wu') {
|
||||
currentToolSession.actions.push({ target: 'sl', type: 'updated', key: 'orders', detail: 'Orders source' });
|
||||
}
|
||||
if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') {
|
||||
currentToolSession.actions.push(
|
||||
{ target: 'wiki', type: 'created', key: 'page-a', detail: 'Page A' },
|
||||
{ target: 'wiki', type: 'created', key: 'page-b', detail: 'Page B' },
|
||||
);
|
||||
}
|
||||
return { stopReason: 'natural' };
|
||||
});
|
||||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['a.yml', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/c1/fake/s',
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
||||
const result = await runner.run({
|
||||
jobId: 'j1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
|
||||
});
|
||||
|
||||
expect(result.failedWorkUnits).toEqual([]);
|
||||
expect(deps.gitService.squashMergeIntoMain).toHaveBeenCalled();
|
||||
expect(deps.runsRepo.markFailed).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('threads target warehouse connection names into WorkUnit and reconcile tool sessions', async () => {
|
||||
const deps = makeDeps();
|
||||
const sessions: any[] = [];
|
||||
|
|
@ -1384,7 +1519,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['explores/b2b/sales_pipeline.json', 'h1']]),
|
||||
currentHashes: new Map([['a.yml', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/looker-run/fake/s',
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
|
@ -1441,26 +1576,69 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('runs a registered post-processor before squash, records the outcome, and reindexes touched sources after squash', async () => {
|
||||
it('runs adapter finalization before squash, records the outcome, and reindexes touched sources', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.adapter.source = 'metricflow';
|
||||
deps.registry.get.mockReturnValue(deps.adapter);
|
||||
deps.adapter.chunk.mockResolvedValue({
|
||||
workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }],
|
||||
workUnits: [],
|
||||
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
|
||||
});
|
||||
deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse-2']);
|
||||
deps.adapter.finalize = vi.fn().mockResolvedValue({
|
||||
result: { sourcesTouched: 1 },
|
||||
warnings: ['kept going'],
|
||||
errors: [],
|
||||
touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
|
||||
changedWikiPageKeys: [],
|
||||
actions: [
|
||||
{
|
||||
target: 'sl',
|
||||
type: 'updated',
|
||||
key: 'orders',
|
||||
targetConnectionId: 'warehouse-2',
|
||||
detail: 'Finalized orders usage',
|
||||
rawPaths: ['semantic_models.yml'],
|
||||
},
|
||||
],
|
||||
});
|
||||
deps.semanticLayerService.loadAllSources.mockImplementation((connectionId: string) =>
|
||||
Promise.resolve({ sources: [{ name: `${connectionId}_source` }], loadErrors: [] }),
|
||||
);
|
||||
const postProcessor = {
|
||||
run: vi.fn().mockResolvedValue({
|
||||
result: { sourcesCreated: 1 },
|
||||
warnings: ['kept going'],
|
||||
errors: [],
|
||||
touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
|
||||
let head = 'pre-finalization';
|
||||
const git = {
|
||||
revParseHead: vi.fn(async () => head),
|
||||
commitFiles: vi.fn().mockImplementation(async (paths: string[]) => {
|
||||
if (paths.includes('semantic-layer/warehouse-2/orders.yaml')) {
|
||||
head = 'post-finalization';
|
||||
return { created: true, commitHash: 'finalization-sha' };
|
||||
}
|
||||
return { created: true, commitHash: head };
|
||||
}),
|
||||
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'post-finalization' }),
|
||||
resetHardTo: vi.fn(),
|
||||
assertWorktreeClean: vi.fn().mockResolvedValue(undefined),
|
||||
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
|
||||
await writeFile(patchPath, '', 'utf-8');
|
||||
}),
|
||||
applyPatchFile3WayIndex: vi.fn(),
|
||||
diffNameStatus: vi.fn().mockImplementation(async (from: string, to: string) =>
|
||||
from === 'pre-finalization' && to === 'post-finalization'
|
||||
? [{ status: 'M', path: 'semantic-layer/warehouse-2/orders.yaml' }]
|
||||
: [],
|
||||
),
|
||||
changedPaths: vi.fn().mockResolvedValue(['semantic-layer/warehouse-2/orders.yaml']),
|
||||
};
|
||||
const runner = buildRunner(deps, { postProcessors: { metricflow: postProcessor } });
|
||||
deps.sessionWorktreeService.create.mockResolvedValue({
|
||||
chatId: 'j1',
|
||||
workdir: '/tmp/wt',
|
||||
branch: 'session/j1',
|
||||
baseSha: 'b',
|
||||
createdAt: new Date(),
|
||||
git,
|
||||
config: {},
|
||||
});
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['semantic_models.yml', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/c1/metricflow/s',
|
||||
|
|
@ -1475,26 +1653,29 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
|
||||
});
|
||||
|
||||
expect(postProcessor.run).toHaveBeenCalledWith({
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'metricflow',
|
||||
syncId: expect.any(String),
|
||||
jobId: 'j1',
|
||||
runId: 'run-1',
|
||||
workdir: '/tmp/wt',
|
||||
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
|
||||
});
|
||||
expect(deps.adapter.finalize).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'metricflow',
|
||||
syncId: expect.any(String),
|
||||
jobId: 'j1',
|
||||
runId: 'run-1',
|
||||
workdir: '/tmp/wt',
|
||||
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
|
||||
}),
|
||||
);
|
||||
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
body: expect.objectContaining({
|
||||
postProcessor: {
|
||||
finalization: expect.objectContaining({
|
||||
sourceKey: 'metricflow',
|
||||
status: 'success',
|
||||
result: { sourcesCreated: 1 },
|
||||
warnings: ['kept going'],
|
||||
errors: [],
|
||||
touchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
|
||||
},
|
||||
commitSha: 'finalization-sha',
|
||||
touchedPaths: ['semantic-layer/warehouse-2/orders.yaml'],
|
||||
derivedTouchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
|
||||
declaredTouchedSources: [{ connectionId: 'warehouse-2', sourceName: 'orders' }],
|
||||
actions: [expect.objectContaining({ key: 'orders' })],
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
|
@ -1503,7 +1684,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'success');
|
||||
});
|
||||
|
||||
it('includes historic-sql post-processor output in memory-flow saved counts', async () => {
|
||||
it('includes finalization actions in memory-flow saved counts', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.adapter.source = 'historic-sql';
|
||||
deps.registry.get.mockReturnValue(deps.adapter);
|
||||
|
|
@ -1517,21 +1698,19 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
},
|
||||
],
|
||||
});
|
||||
const postProcessor = {
|
||||
run: vi.fn().mockResolvedValue({
|
||||
result: {
|
||||
tableUsageMerged: 2,
|
||||
staleTablesMarked: 1,
|
||||
patternPagesWritten: 3,
|
||||
stalePatternPagesMarked: 1,
|
||||
archivedPatternPages: 1,
|
||||
},
|
||||
warnings: [],
|
||||
errors: [],
|
||||
touchedSources: [{ connectionId: 'c1', sourceName: 'orders' }],
|
||||
}),
|
||||
};
|
||||
const runner = buildRunner(deps, { postProcessors: { 'historic-sql': postProcessor } });
|
||||
deps.adapter.finalize = vi.fn().mockResolvedValue({
|
||||
warnings: [],
|
||||
errors: [],
|
||||
touchedSources: [],
|
||||
changedWikiPageKeys: [],
|
||||
actions: [
|
||||
{ target: 'sl', type: 'updated', key: 'orders', detail: 'Merged usage' },
|
||||
{ target: 'sl', type: 'updated', key: 'customers', detail: 'Merged usage' },
|
||||
{ target: 'wiki', type: 'created', key: 'historic-sql-orders', detail: 'Projected pattern' },
|
||||
{ target: 'wiki', type: 'updated', key: 'historic-sql-customers', detail: 'Projected pattern' },
|
||||
],
|
||||
});
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['tables/public/orders.json', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/c1/historic-sql/s',
|
||||
|
|
@ -1557,13 +1736,13 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
expect(memoryFlow.snapshot().events).toContainEqual(
|
||||
expect.objectContaining({
|
||||
type: 'saved',
|
||||
wikiCount: 5,
|
||||
slCount: 3,
|
||||
wikiCount: 2,
|
||||
slCount: 2,
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('marks post-processor infrastructure failure as failed and preserves worktree cleanup state', async () => {
|
||||
it('marks finalization infrastructure failure as failed and preserves worktree cleanup state', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.adapter.source = 'metricflow';
|
||||
deps.registry.get.mockReturnValue(deps.adapter);
|
||||
|
|
@ -1571,8 +1750,8 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
workUnits: [{ unitKey: 'u1', rawFiles: ['semantic_models.yml'], peerFileIndex: [], dependencyPaths: [] }],
|
||||
parseArtifacts: { semanticModels: [{ name: 'orders' }] },
|
||||
});
|
||||
const postProcessor = { run: vi.fn().mockRejectedValue(new Error('worktree write failed')) };
|
||||
const runner = buildRunner(deps, { postProcessors: { metricflow: postProcessor } });
|
||||
deps.adapter.finalize = vi.fn().mockRejectedValue(new Error('worktree write failed'));
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['semantic_models.yml', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/c1/metricflow/s',
|
||||
|
|
@ -1594,6 +1773,132 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
expect(deps.sessionWorktreeService.cleanup).toHaveBeenCalledWith(expect.any(Object), 'crash');
|
||||
});
|
||||
|
||||
it('reports finalization actions excluded from provenance when raw paths are not defensible', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.adapter.finalize = vi.fn().mockResolvedValue({
|
||||
warnings: [],
|
||||
errors: [],
|
||||
touchedSources: [],
|
||||
changedWikiPageKeys: [],
|
||||
actions: [
|
||||
{ target: 'wiki', type: 'updated', key: 'historic-sql-pattern', detail: 'No raw path' },
|
||||
{ target: 'sl', type: 'updated', key: 'orders', detail: 'Invalid raw path', rawPaths: ['missing.json'] },
|
||||
],
|
||||
});
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['current.json', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/c1/fake/s',
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
||||
await runner.run({
|
||||
jobId: 'j1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
|
||||
});
|
||||
|
||||
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
body: expect.objectContaining({
|
||||
finalization: expect.objectContaining({
|
||||
provenanceExclusions: [
|
||||
expect.objectContaining({ reason: 'missing_raw_paths' }),
|
||||
expect.objectContaining({ reason: 'raw_path_not_defensible', invalidRawPaths: ['missing.json'] }),
|
||||
],
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(deps.provenanceRepo.insertMany).not.toHaveBeenCalledWith(
|
||||
expect.arrayContaining([expect.objectContaining({ rawPath: 'missing.json' })]),
|
||||
);
|
||||
});
|
||||
|
||||
it('passes explicit override replay metadata and no current work unit outcomes', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.reportsRepo.findByJobId.mockResolvedValue({
|
||||
id: 'prior-report',
|
||||
runId: 'prior-run',
|
||||
jobId: 'prior-job',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
createdAt: '2026-05-18T00:00:00.000Z',
|
||||
body: {
|
||||
status: 'completed',
|
||||
syncId: 'prior-sync',
|
||||
diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 },
|
||||
commitSha: 'prior-sha',
|
||||
workUnits: [
|
||||
{
|
||||
unitKey: 'prior-unit',
|
||||
rawFiles: ['prior.json'],
|
||||
status: 'success',
|
||||
actions: [{ target: 'wiki', type: 'created', key: 'prior', detail: 'prior' }],
|
||||
touchedSlSources: [],
|
||||
},
|
||||
],
|
||||
failedWorkUnits: [],
|
||||
reconciliationSkipped: false,
|
||||
conflictsResolved: [],
|
||||
evictionsApplied: [
|
||||
{
|
||||
rawPath: 'do-not-replay.json',
|
||||
artifactKind: 'wiki',
|
||||
artifactKey: 'old',
|
||||
action: 'removed',
|
||||
reason: 'prior',
|
||||
},
|
||||
],
|
||||
unmappedFallbacks: [],
|
||||
artifactResolutions: [],
|
||||
evictionInputs: ['evicted-from-prior-report.json'],
|
||||
unresolvedCards: [],
|
||||
supersededBy: null,
|
||||
overrideOf: null,
|
||||
provenanceRows: [],
|
||||
toolTranscripts: [],
|
||||
},
|
||||
});
|
||||
deps.adapter.finalize = vi.fn().mockResolvedValue({
|
||||
warnings: [],
|
||||
errors: [],
|
||||
touchedSources: [],
|
||||
changedWikiPageKeys: [],
|
||||
actions: [],
|
||||
});
|
||||
deps.gitService.listFilesAtHead.mockResolvedValue(['raw-sources/c1/fake/prior-sync/prior.json']);
|
||||
deps.gitService.getFileAtCommit.mockResolvedValue('{"id":1}\n');
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({
|
||||
currentHashes: new Map([['prior.json', 'h1']]),
|
||||
rawDirInWorktree: 'raw-sources/c1/fake/prior-sync',
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/prior');
|
||||
|
||||
await runner.run({
|
||||
jobId: 'override-job',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'manual_override',
|
||||
bundleRef: { kind: 'override', priorJobId: 'prior-job' },
|
||||
});
|
||||
|
||||
expect(deps.adapter.finalize).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
workUnitOutcomes: [],
|
||||
overrideReplay: {
|
||||
priorJobId: 'prior-job',
|
||||
priorRunId: 'prior-run',
|
||||
priorSyncId: 'prior-sync',
|
||||
evictionRawPaths: ['evicted-from-prior-report.json'],
|
||||
},
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('includes existing global wiki pages in WorkUnit prompts', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.knowledgeIndex.listPagesForUser.mockResolvedValue([
|
||||
|
|
@ -1851,9 +2156,15 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
const assertError = new Error('Worktree has in-progress git operation (sequencer ...); refusing to proceed');
|
||||
const sessionGit = {
|
||||
revParseHead: vi.fn().mockResolvedValue('h'),
|
||||
commitFiles: vi.fn(),
|
||||
commitFiles: vi.fn().mockResolvedValue({ created: true, commitHash: 'h' }),
|
||||
commitStaged: vi.fn().mockResolvedValue({ created: false, commitHash: 'h' }),
|
||||
resetHardTo: vi.fn(),
|
||||
assertWorktreeClean: vi.fn().mockRejectedValue(assertError),
|
||||
writeBinaryNoRenamePatch: vi.fn(async (_base: string, _head: string, patchPath: string) => {
|
||||
await writeFile(patchPath, '', 'utf-8');
|
||||
}),
|
||||
applyPatchFile3WayIndex: vi.fn(),
|
||||
diffNameStatus: vi.fn().mockResolvedValue([]),
|
||||
};
|
||||
deps.sessionWorktreeService.create.mockResolvedValue({
|
||||
chatId: 'j1',
|
||||
|
|
@ -1884,135 +2195,6 @@ describe('IngestBundleRunner — Stages 1 → 7', () => {
|
|||
expect(deps.gitService.squashMergeIntoMain).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('squash-merges only successful WUs into main when one WU fails sl_validate', async () => {
|
||||
const homeDir = await mkdtemp(join(tmpdir(), 'ingest-rollback-'));
|
||||
try {
|
||||
const configDir = join(homeDir, 'config');
|
||||
const mainGit = new GitService({
|
||||
storage: { configDir, homeDir },
|
||||
git: {
|
||||
userName: 'System User',
|
||||
userEmail: 'system@example.com',
|
||||
bootstrapMessage: 'Initialize test config repo',
|
||||
bootstrapAuthor: 'test-system',
|
||||
bootstrapAuthorEmail: 'system@example.com',
|
||||
},
|
||||
});
|
||||
await mainGit.onModuleInit();
|
||||
const baseSha = await mainGit.revParseHead();
|
||||
if (!baseSha) {
|
||||
throw new Error('no base sha');
|
||||
}
|
||||
|
||||
const deps = makeDeps();
|
||||
const sessionDir = join(homeDir, '.worktrees', 'session-j1');
|
||||
const sessionBranch = 'session/j1';
|
||||
let currentToolSession: any = null;
|
||||
|
||||
deps.gitService = mainGit as any;
|
||||
deps.sessionWorktreeService.create.mockImplementation(async (_jobId: string, startSha: string) => {
|
||||
await mkdir(join(homeDir, '.worktrees'), { recursive: true });
|
||||
await mainGit.addWorktree(sessionDir, sessionBranch, startSha);
|
||||
return {
|
||||
chatId: 'j1',
|
||||
workdir: sessionDir,
|
||||
branch: sessionBranch,
|
||||
baseSha: startSha,
|
||||
createdAt: new Date(),
|
||||
git: mainGit.forWorktree(sessionDir),
|
||||
config: {},
|
||||
};
|
||||
});
|
||||
deps.sessionWorktreeService.cleanup.mockResolvedValue(undefined);
|
||||
deps.adapter.chunk.mockResolvedValue({
|
||||
workUnits: [
|
||||
{ unitKey: 'wu-good', rawFiles: ['good.raw'], peerFileIndex: [], dependencyPaths: [] },
|
||||
{ unitKey: 'wu-bad', rawFiles: ['bad.raw'], peerFileIndex: [], dependencyPaths: [] },
|
||||
],
|
||||
});
|
||||
deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => {
|
||||
currentToolSession = toolSession;
|
||||
return {
|
||||
toRuntimeTools: vi.fn().mockReturnValue({}),
|
||||
getAllTools: vi.fn().mockReturnValue([]),
|
||||
getToolNames: vi.fn().mockReturnValue([]),
|
||||
};
|
||||
});
|
||||
deps.slValidator.validateSingleSource.mockImplementation(
|
||||
(_validationDeps: unknown, _connectionId: string, sourceName: string) => ({
|
||||
errors: sourceName === 'bad' ? [{ message: 'bad source rejected' }] : [],
|
||||
warnings: [],
|
||||
}),
|
||||
);
|
||||
deps.agentRunner.runLoop.mockImplementation(async (params: any) => {
|
||||
const unitKey = params.telemetryTags?.unitKey;
|
||||
if (unitKey === 'wu-good') {
|
||||
await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true });
|
||||
await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'good.yaml'), 'name: good\n');
|
||||
addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'good');
|
||||
currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'good', detail: '' });
|
||||
await currentToolSession.gitService.commitFiles(
|
||||
['semantic-layer/c1/good.yaml'],
|
||||
'test: add good source',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
}
|
||||
if (unitKey === 'wu-bad') {
|
||||
await mkdir(join(sessionDir, 'semantic-layer', 'c1'), { recursive: true });
|
||||
await writeFile(join(sessionDir, 'semantic-layer', 'c1', 'bad.yaml'), 'name: bad\n');
|
||||
addTouchedSlSource(currentToolSession.touchedSlSources, 'c1', 'bad');
|
||||
currentToolSession.actions.push({ target: 'sl', type: 'created', key: 'bad', detail: '' });
|
||||
await currentToolSession.gitService.commitFiles(
|
||||
['semantic-layer/c1/bad.yaml'],
|
||||
'test: add bad source',
|
||||
'KTX Test',
|
||||
'system@ktx.local',
|
||||
);
|
||||
}
|
||||
return { stopReason: 'natural' };
|
||||
});
|
||||
|
||||
const runner = buildRunner(deps);
|
||||
(runner as any).stageRawFilesStage1 = vi.fn().mockImplementation(async ({ worktreeRoot }: any) => {
|
||||
const rawDir = join(worktreeRoot, 'raw-sources', 'c1', 'fake', 's');
|
||||
await mkdir(rawDir, { recursive: true });
|
||||
await writeFile(join(rawDir, 'good.raw'), 'good raw');
|
||||
await writeFile(join(rawDir, 'bad.raw'), 'bad raw');
|
||||
return {
|
||||
currentHashes: new Map([
|
||||
['good.raw', 'good-hash'],
|
||||
['bad.raw', 'bad-hash'],
|
||||
]),
|
||||
rawDirInWorktree: 'raw-sources/c1/fake/s',
|
||||
};
|
||||
});
|
||||
(runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x');
|
||||
|
||||
const result = await runner.run({
|
||||
jobId: 'j1',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
trigger: 'upload',
|
||||
bundleRef: { kind: 'upload', uploadId: 'upload-x' },
|
||||
});
|
||||
|
||||
expect(result.failedWorkUnits).toEqual(['wu-bad']);
|
||||
expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'good.yaml'), 'utf-8')).toContain('good');
|
||||
expect(await readFile(join(configDir, 'semantic-layer', 'c1', 'bad.yaml'), 'utf-8').catch(() => null)).toBeNull();
|
||||
expect(deps.reportsRepo.create).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
body: expect.objectContaining({
|
||||
failedWorkUnits: ['wu-bad'],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
await expect(stat(join(configDir, '.git', 'sequencer'))).rejects.toThrow();
|
||||
} finally {
|
||||
await rm(homeDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('fails the run and rethrows when the adapter cannot detect the bundle', async () => {
|
||||
const deps = makeDeps();
|
||||
deps.adapter.detect.mockResolvedValue(false);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
85
packages/context/src/ingest/ingest-trace.test.ts
Normal file
85
packages/context/src/ingest/ingest-trace.test.ts
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
import { mkdtemp, readFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { FileIngestTraceWriter, ingestTracePathForJob, traceTimed } from './ingest-trace.js';
|
||||
|
||||
describe('FileIngestTraceWriter', () => {
|
||||
it('persists structured trace events as JSONL', async () => {
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-trace-'));
|
||||
const tracePath = ingestTracePathForJob(root, 'job-1');
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath,
|
||||
jobId: 'job-1',
|
||||
connectionId: 'metabase-main',
|
||||
sourceKey: 'metabase',
|
||||
level: 'debug',
|
||||
});
|
||||
|
||||
await trace.event('debug', 'snapshot', 'input_snapshot', {
|
||||
baseSha: 'abc123',
|
||||
rawFileCount: 2,
|
||||
diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 },
|
||||
});
|
||||
|
||||
const lines = (await readFile(tracePath, 'utf-8'))
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => JSON.parse(line));
|
||||
expect(lines).toHaveLength(1);
|
||||
expect(lines[0]).toMatchObject({
|
||||
schemaVersion: 1,
|
||||
jobId: 'job-1',
|
||||
connectionId: 'metabase-main',
|
||||
sourceKey: 'metabase',
|
||||
level: 'debug',
|
||||
phase: 'snapshot',
|
||||
event: 'input_snapshot',
|
||||
data: {
|
||||
baseSha: 'abc123',
|
||||
rawFileCount: 2,
|
||||
diffSummary: { added: 1, modified: 1, deleted: 0, unchanged: 3 },
|
||||
},
|
||||
});
|
||||
expect(typeof lines[0].at).toBe('string');
|
||||
});
|
||||
|
||||
it('records timing and error context for postmortem inspection', async () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date('2026-05-17T12:00:00.000Z'));
|
||||
const root = await mkdtemp(join(tmpdir(), 'ktx-trace-'));
|
||||
const tracePath = ingestTracePathForJob(root, 'job-2');
|
||||
const trace = new FileIngestTraceWriter({
|
||||
tracePath,
|
||||
jobId: 'job-2',
|
||||
connectionId: 'c1',
|
||||
sourceKey: 'fake',
|
||||
level: 'trace',
|
||||
});
|
||||
|
||||
await expect(
|
||||
traceTimed(trace, 'integration', 'apply_patch', { unitKey: 'wu-1' }, async () => {
|
||||
vi.advanceTimersByTime(17);
|
||||
throw new Error('patch conflict');
|
||||
}),
|
||||
).rejects.toThrow('patch conflict');
|
||||
|
||||
const lines = (await readFile(tracePath, 'utf-8'))
|
||||
.trim()
|
||||
.split('\n')
|
||||
.map((line) => JSON.parse(line));
|
||||
expect(lines.map((line) => line.event)).toEqual(['apply_patch_started', 'apply_patch_failed']);
|
||||
expect(lines[1]).toMatchObject({
|
||||
level: 'error',
|
||||
phase: 'integration',
|
||||
data: { unitKey: 'wu-1' },
|
||||
error: { name: 'Error', message: 'patch conflict' },
|
||||
});
|
||||
expect(lines[1].durationMs).toBe(17);
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('uses the documented trace path layout', () => {
|
||||
expect(ingestTracePathForJob('/project/.ktx', 'job-3')).toBe('/project/.ktx/ingest-traces/job-3/trace.jsonl');
|
||||
});
|
||||
});
|
||||
158
packages/context/src/ingest/ingest-trace.ts
Normal file
158
packages/context/src/ingest/ingest-trace.ts
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
import { appendFile, mkdir } from 'node:fs/promises';
|
||||
import { dirname, join } from 'node:path';
|
||||
|
||||
export type IngestTraceLevel = 'info' | 'debug' | 'trace' | 'error';
|
||||
|
||||
const TRACE_LEVEL_RANK: Record<IngestTraceLevel, number> = {
|
||||
error: 0,
|
||||
info: 1,
|
||||
debug: 2,
|
||||
trace: 3,
|
||||
};
|
||||
|
||||
export interface IngestTraceContext {
|
||||
tracePath: string;
|
||||
jobId: string;
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
runId?: string;
|
||||
syncId?: string;
|
||||
level?: IngestTraceLevel;
|
||||
}
|
||||
|
||||
export interface IngestTraceEvent {
|
||||
schemaVersion: 1;
|
||||
at: string;
|
||||
level: IngestTraceLevel;
|
||||
jobId: string;
|
||||
connectionId: string;
|
||||
sourceKey: string;
|
||||
runId?: string;
|
||||
syncId?: string;
|
||||
phase: string;
|
||||
event: string;
|
||||
durationMs?: number;
|
||||
data?: Record<string, unknown>;
|
||||
error?: {
|
||||
name: string;
|
||||
message: string;
|
||||
stack?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface IngestTraceWriter {
|
||||
readonly tracePath: string;
|
||||
readonly context: IngestTraceContext;
|
||||
withContext(context: Partial<Pick<IngestTraceContext, 'runId' | 'syncId'>>): IngestTraceWriter;
|
||||
event(
|
||||
level: IngestTraceLevel,
|
||||
phase: string,
|
||||
event: string,
|
||||
data?: Record<string, unknown>,
|
||||
error?: unknown,
|
||||
durationMs?: number,
|
||||
): Promise<void>;
|
||||
}
|
||||
|
||||
export function ingestTracePathForJob(homeDir: string, jobId: string): string {
|
||||
return join(homeDir, 'ingest-traces', jobId, 'trace.jsonl');
|
||||
}
|
||||
|
||||
function serializeError(error: unknown): IngestTraceEvent['error'] | undefined {
|
||||
if (error === undefined || error === null) {
|
||||
return undefined;
|
||||
}
|
||||
if (error instanceof Error) {
|
||||
return {
|
||||
name: error.name,
|
||||
message: error.message,
|
||||
...(error.stack ? { stack: error.stack } : {}),
|
||||
};
|
||||
}
|
||||
return { name: 'Error', message: String(error) };
|
||||
}
|
||||
|
||||
function shouldWrite(configured: IngestTraceLevel, incoming: IngestTraceLevel): boolean {
|
||||
return TRACE_LEVEL_RANK[incoming] <= TRACE_LEVEL_RANK[configured];
|
||||
}
|
||||
|
||||
export class FileIngestTraceWriter implements IngestTraceWriter {
|
||||
readonly tracePath: string;
|
||||
readonly context: IngestTraceContext;
|
||||
|
||||
constructor(context: IngestTraceContext) {
|
||||
this.context = { ...context, level: context.level ?? 'debug' };
|
||||
this.tracePath = context.tracePath;
|
||||
}
|
||||
|
||||
withContext(context: Partial<Pick<IngestTraceContext, 'runId' | 'syncId'>>): IngestTraceWriter {
|
||||
return new FileIngestTraceWriter({ ...this.context, ...context, tracePath: this.tracePath });
|
||||
}
|
||||
|
||||
async event(
|
||||
level: IngestTraceLevel,
|
||||
phase: string,
|
||||
event: string,
|
||||
data?: Record<string, unknown>,
|
||||
error?: unknown,
|
||||
durationMs?: number,
|
||||
): Promise<void> {
|
||||
if (!shouldWrite(this.context.level ?? 'debug', level)) {
|
||||
return;
|
||||
}
|
||||
const serializedError = serializeError(error);
|
||||
const payload: IngestTraceEvent = {
|
||||
schemaVersion: 1,
|
||||
at: new Date().toISOString(),
|
||||
level,
|
||||
jobId: this.context.jobId,
|
||||
connectionId: this.context.connectionId,
|
||||
sourceKey: this.context.sourceKey,
|
||||
...(this.context.runId ? { runId: this.context.runId } : {}),
|
||||
...(this.context.syncId ? { syncId: this.context.syncId } : {}),
|
||||
phase,
|
||||
event,
|
||||
...(durationMs !== undefined ? { durationMs } : {}),
|
||||
...(data ? { data } : {}),
|
||||
...(serializedError ? { error: serializedError } : {}),
|
||||
};
|
||||
await mkdir(dirname(this.tracePath), { recursive: true });
|
||||
await appendFile(this.tracePath, `${JSON.stringify(payload)}\n`, 'utf-8');
|
||||
}
|
||||
}
|
||||
|
||||
export class NoopIngestTraceWriter implements IngestTraceWriter {
|
||||
readonly tracePath = '';
|
||||
readonly context: IngestTraceContext = {
|
||||
tracePath: '',
|
||||
jobId: '',
|
||||
connectionId: '',
|
||||
sourceKey: '',
|
||||
level: 'error',
|
||||
};
|
||||
|
||||
withContext(): IngestTraceWriter {
|
||||
return this;
|
||||
}
|
||||
|
||||
async event(): Promise<void> {}
|
||||
}
|
||||
|
||||
export async function traceTimed<T>(
|
||||
trace: IngestTraceWriter,
|
||||
phase: string,
|
||||
event: string,
|
||||
data: Record<string, unknown>,
|
||||
fn: () => Promise<T>,
|
||||
): Promise<T> {
|
||||
await trace.event('debug', phase, `${event}_started`, data);
|
||||
const started = Date.now();
|
||||
try {
|
||||
const result = await fn();
|
||||
await trace.event('debug', phase, `${event}_finished`, data, undefined, Date.now() - started);
|
||||
return result;
|
||||
} catch (error) {
|
||||
await trace.event('error', phase, `${event}_failed`, data, error, Date.now() - started);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
97
packages/context/src/ingest/isolated-diff/git-patch.test.ts
Normal file
97
packages/context/src/ingest/isolated-diff/git-patch.test.ts
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { assertPatchAllowedForWorkUnit, parsePatchTouchedPaths, textArtifactRoots } from './git-patch.js';
|
||||
|
||||
describe('isolated diff patch contract', () => {
|
||||
it('parses touched paths from no-rename git patches', () => {
|
||||
const patch = [
|
||||
'diff --git a/wiki/global/a.md b/wiki/global/a.md',
|
||||
'index 1111111..2222222 100644',
|
||||
'--- a/wiki/global/a.md',
|
||||
'+++ b/wiki/global/a.md',
|
||||
'@@ -1 +1 @@',
|
||||
'-old',
|
||||
'+new',
|
||||
'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml',
|
||||
'new file mode 100644',
|
||||
'--- /dev/null',
|
||||
'+++ b/semantic-layer/c1/orders.yaml',
|
||||
'@@ -0,0 +1 @@',
|
||||
'+name: orders',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
expect(parsePatchTouchedPaths(patch)).toEqual([
|
||||
{
|
||||
path: 'wiki/global/a.md',
|
||||
oldPath: 'wiki/global/a.md',
|
||||
newPath: 'wiki/global/a.md',
|
||||
mode: '100644',
|
||||
binary: false,
|
||||
},
|
||||
{
|
||||
path: 'semantic-layer/c1/orders.yaml',
|
||||
oldPath: 'semantic-layer/c1/orders.yaml',
|
||||
newPath: 'semantic-layer/c1/orders.yaml',
|
||||
mode: '100644',
|
||||
binary: false,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('rejects semantic-layer paths for slDisallowed work units', () => {
|
||||
const patch = 'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml\nindex 1..2 100644\n';
|
||||
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'lookml-mismatch',
|
||||
patch,
|
||||
slDisallowed: true,
|
||||
}),
|
||||
).toThrow(/slDisallowed WorkUnit lookml-mismatch touched semantic-layer\/c1\/orders.yaml/);
|
||||
});
|
||||
|
||||
it('rejects semantic-layer paths outside allowed target connections', () => {
|
||||
const patch =
|
||||
'diff --git a/semantic-layer/finance/orders.yaml b/semantic-layer/finance/orders.yaml\nindex 1..2 100644\n';
|
||||
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'wu-finance',
|
||||
patch,
|
||||
slDisallowed: false,
|
||||
allowedTargetConnectionIds: new Set(['warehouse']),
|
||||
}),
|
||||
).toThrow(
|
||||
/semantic-layer target connection not allowed: semantic-layer\/finance\/orders.yaml \(finance\); allowed: warehouse/,
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects executable and binary changes under known text artifact roots', () => {
|
||||
expect(textArtifactRoots).toEqual(['wiki/', 'semantic-layer/']);
|
||||
|
||||
const executablePatch =
|
||||
'diff --git a/wiki/global/a.md b/wiki/global/a.md\nold mode 100644\nnew mode 100755\nindex 1..2\n';
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'wu-1',
|
||||
patch: executablePatch,
|
||||
slDisallowed: false,
|
||||
}),
|
||||
).toThrow(/unexpected executable mode under wiki\/global\/a.md/);
|
||||
|
||||
const binaryPatch = [
|
||||
'diff --git a/semantic-layer/c1/orders.yaml b/semantic-layer/c1/orders.yaml',
|
||||
'index 1111111..2222222 100644',
|
||||
'GIT binary patch',
|
||||
'literal 0',
|
||||
'',
|
||||
].join('\n');
|
||||
expect(() =>
|
||||
assertPatchAllowedForWorkUnit({
|
||||
unitKey: 'wu-2',
|
||||
patch: binaryPatch,
|
||||
slDisallowed: false,
|
||||
}),
|
||||
).toThrow(/unexpected binary patch under semantic-layer\/c1\/orders.yaml/);
|
||||
});
|
||||
});
|
||||
101
packages/context/src/ingest/isolated-diff/git-patch.ts
Normal file
101
packages/context/src/ingest/isolated-diff/git-patch.ts
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
import { assertSemanticLayerTargetPathsAllowed } from '../semantic-layer-target-policy.js';
|
||||
|
||||
export const textArtifactRoots = ['wiki/', 'semantic-layer/'] as const;
|
||||
|
||||
export interface PatchTouchedPath {
|
||||
path: string;
|
||||
oldPath: string;
|
||||
newPath: string;
|
||||
mode: string | null;
|
||||
binary: boolean;
|
||||
}
|
||||
|
||||
export interface PatchPolicyInput {
|
||||
unitKey: string;
|
||||
patch: string;
|
||||
slDisallowed: boolean;
|
||||
allowedTargetConnectionIds?: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
function stripPrefix(path: string): string {
|
||||
return path.replace(/^[ab]\//, '');
|
||||
}
|
||||
|
||||
function isTextArtifactPath(path: string): boolean {
|
||||
return textArtifactRoots.some((root) => path.startsWith(root));
|
||||
}
|
||||
|
||||
export function parsePatchTouchedPaths(patch: string): PatchTouchedPath[] {
|
||||
const lines = patch.split('\n');
|
||||
const entries: PatchTouchedPath[] = [];
|
||||
let current: PatchTouchedPath | null = null;
|
||||
|
||||
const pushCurrent = () => {
|
||||
if (current) {
|
||||
entries.push(current);
|
||||
}
|
||||
};
|
||||
|
||||
for (const line of lines) {
|
||||
const diffMatch = /^diff --git (.+) (.+)$/.exec(line);
|
||||
if (diffMatch) {
|
||||
pushCurrent();
|
||||
const oldPath = stripPrefix(diffMatch[1] ?? '');
|
||||
const newPath = stripPrefix(diffMatch[2] ?? '');
|
||||
current = {
|
||||
path: newPath === '/dev/null' ? oldPath : newPath,
|
||||
oldPath,
|
||||
newPath,
|
||||
mode: null,
|
||||
binary: false,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
if (!current) {
|
||||
continue;
|
||||
}
|
||||
const indexMode = /^index [0-9a-f]+\.\.[0-9a-f]+(?: ([0-7]{6}))?$/.exec(line);
|
||||
if (indexMode?.[1]) {
|
||||
current.mode = indexMode[1];
|
||||
}
|
||||
const newMode = /^new mode ([0-7]{6})$/.exec(line);
|
||||
if (newMode) {
|
||||
current.mode = newMode[1] ?? current.mode;
|
||||
}
|
||||
const newFileMode = /^new file mode ([0-7]{6})$/.exec(line);
|
||||
if (newFileMode) {
|
||||
current.mode = newFileMode[1] ?? current.mode;
|
||||
}
|
||||
if (line === 'GIT binary patch' || line.startsWith('Binary files ')) {
|
||||
current.binary = true;
|
||||
}
|
||||
}
|
||||
|
||||
pushCurrent();
|
||||
return entries;
|
||||
}
|
||||
|
||||
export function assertPatchAllowedForWorkUnit(input: PatchPolicyInput): PatchTouchedPath[] {
|
||||
const touched = parsePatchTouchedPaths(input.patch);
|
||||
if (input.allowedTargetConnectionIds) {
|
||||
assertSemanticLayerTargetPathsAllowed({
|
||||
paths: touched.map((entry) => entry.path),
|
||||
allowedConnectionIds: input.allowedTargetConnectionIds,
|
||||
});
|
||||
}
|
||||
for (const entry of touched) {
|
||||
if (input.slDisallowed && entry.path.startsWith('semantic-layer/')) {
|
||||
throw new Error(`slDisallowed WorkUnit ${input.unitKey} touched ${entry.path}`);
|
||||
}
|
||||
if (!isTextArtifactPath(entry.path)) {
|
||||
continue;
|
||||
}
|
||||
if (entry.binary) {
|
||||
throw new Error(`unexpected binary patch under ${entry.path}`);
|
||||
}
|
||||
if (entry.mode && entry.mode !== '100644') {
|
||||
throw new Error(`unexpected executable mode under ${entry.path}: ${entry.mode}`);
|
||||
}
|
||||
}
|
||||
return touched;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue