From b9e0a746af04c1b010992fcb798b01c05a8b156e Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 12:00:08 +0200 Subject: [PATCH 01/15] feat(cli): clean up dev command surface (#57) * feat(cli): clean up dev command surface * test: align CI expectations with CLI cleanup * test(cli): update slow test command expectations --- README.md | 9 +- .../content/docs/cli-reference/ktx-dev.mdx | 134 +- .../content/docs/cli-reference/ktx-ingest.mdx | 82 +- .../content/docs/cli-reference/ktx-scan.mdx | 149 +-- .../content/docs/concepts/context-as-code.mdx | 2 +- .../docs/getting-started/quickstart.mdx | 2 +- .../content/docs/guides/building-context.mdx | 81 +- .../docs/integrations/context-sources.mdx | 2 +- examples/package-artifacts/README.md | 2 +- examples/postgres-historic/README.md | 6 +- .../cli/src/agent-search-readiness.test.ts | 2 +- packages/cli/src/agent-search-readiness.ts | 2 +- packages/cli/src/agent.test.ts | 4 +- packages/cli/src/cli-program.ts | 18 +- packages/cli/src/cli-runtime.ts | 2 - packages/cli/src/command-schemas.ts | 17 - .../cli/src/commands/completion-commands.ts | 47 - .../commands/connection-metabase-commands.ts | 2 +- .../connection-metabase-setup.test.ts | 4 +- .../src/commands/connection-metabase-setup.ts | 8 +- packages/cli/src/commands/doctor-commands.ts | 53 - .../src/commands/public-ingest-commands.ts | 109 -- packages/cli/src/commands/runtime-commands.ts | 12 - packages/cli/src/commands/scan-commands.ts | 320 +---- packages/cli/src/completion.ts | 353 ------ packages/cli/src/connection.test.ts | 4 +- packages/cli/src/connection.ts | 4 +- packages/cli/src/dev.test.ts | 450 +------ packages/cli/src/dev.ts | 13 +- packages/cli/src/example-smoke.test.ts | 3 +- packages/cli/src/index.test.ts | 335 ++--- packages/cli/src/ingest.test.ts | 6 +- packages/cli/src/ingest.ts | 6 +- packages/cli/src/project-dir.test.ts | 19 +- packages/cli/src/public-ingest.test.ts | 6 +- packages/cli/src/public-ingest.ts | 4 +- packages/cli/src/runtime.test.ts | 23 - packages/cli/src/runtime.ts | 24 - packages/cli/src/scan.test.ts | 1112 +---------------- packages/cli/src/scan.ts | 492 +------- packages/cli/src/setup-context.test.ts | 7 +- packages/cli/src/setup-databases.test.ts | 2 +- packages/cli/src/setup-databases.ts | 2 +- packages/cli/src/setup-sources.test.ts | 2 +- packages/cli/src/setup-sources.ts | 2 +- packages/cli/src/setup.test.ts | 4 +- packages/cli/src/standalone-smoke.test.ts | 120 +- .../src/ingest/local-bundle-runtime.test.ts | 2 +- .../src/ingest/local-bundle-runtime.ts | 2 +- packages/llm/src/model-provider.test.ts | 7 +- scripts/examples-docs.test.mjs | 17 +- scripts/installed-live-database-smoke.mjs | 13 +- .../installed-live-database-smoke.test.mjs | 1 - scripts/package-artifacts.mjs | 65 +- scripts/package-artifacts.test.mjs | 14 +- scripts/relationship-orbit-verification.mjs | 39 +- .../relationship-orbit-verification.test.mjs | 58 +- scripts/run-ktx.mjs | 7 +- scripts/run-ktx.test.mjs | 51 +- 59 files changed, 420 insertions(+), 3918 deletions(-) delete mode 100644 packages/cli/src/commands/completion-commands.ts delete mode 100644 packages/cli/src/commands/doctor-commands.ts delete mode 100644 packages/cli/src/commands/public-ingest-commands.ts delete mode 100644 packages/cli/src/completion.ts diff --git a/README.md b/README.md index cfabfbcc..1cd20080 100644 --- a/README.md +++ b/README.md @@ -130,9 +130,7 @@ Scan artifacts are written under ```bash SCAN_OUTPUT="$(ktx scan warehouse --project-dir "$PROJECT_DIR")" printf '%s\n' "$SCAN_OUTPUT" -SCAN_RUN_ID="$(printf '%s\n' "$SCAN_OUTPUT" | awk '/^Run: / { print $2 }')" -ktx scan status --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" -ktx scan report --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" +ktx status --project-dir "$PROJECT_DIR" ``` For non-SQLite drivers, prefer credential references such as `--url env:NAME` @@ -147,12 +145,11 @@ version, and is managed by `ktx dev runtime` commands. KTX requires `uv` on `PATH` to create the managed runtime. Install `uv` with your system package manager or the official installer before running Python- backed KTX commands. KTX doesn't download `uv` automatically; run -`ktx dev runtime doctor` if runtime installation fails: +`ktx dev runtime status` if runtime installation fails: ```bash ktx dev runtime install --yes ktx dev runtime status -ktx dev runtime doctor ktx dev runtime start ktx dev runtime stop ktx dev runtime prune --dry-run @@ -223,7 +220,7 @@ KTX provider. Enable it with an environment flag when running an LLM-backed command: ```bash -KTX_AI_DEVTOOLS_ENABLED=true ktx dev ingest run \ +KTX_AI_DEVTOOLS_ENABLED=true ktx ingest run \ --connection-id warehouse \ --adapter metabase ``` diff --git a/docs-site/content/docs/cli-reference/ktx-dev.mdx b/docs-site/content/docs/cli-reference/ktx-dev.mdx index 82ba9acb..eea02562 100644 --- a/docs-site/content/docs/cli-reference/ktx-dev.mdx +++ b/docs-site/content/docs/cli-reference/ktx-dev.mdx @@ -1,9 +1,9 @@ --- title: "ktx dev" -description: "Low-level diagnostics, scans, adapter commands, and mapping tools." +description: "Low-level project initialization and runtime management." --- -Hidden commands for low-level project management, diagnostics, direct adapter control, and shell completion. Most users interact with these through higher-level commands like [`ktx ingest`](/docs/cli-reference/ktx-ingest) and [`ktx setup`](/docs/cli-reference/ktx-setup), but `ktx dev` provides direct access when you need fine-grained control. +`ktx dev` contains development-only project initialization and managed runtime commands. Scan and ingest commands live at the root as [`ktx scan`](/docs/cli-reference/ktx-scan) and [`ktx ingest`](/docs/cli-reference/ktx-ingest). ## Command signature @@ -16,145 +16,45 @@ ktx dev [options] | Subcommand | Description | |-----------|-------------| | `init [directory]` | Initialize a Git-backed KTX project directory | -| `runtime` | Install, inspect, and prune the KTX-managed Python runtime | -| `scan` | Run or inspect standalone connection scans | -| `ingest run` | Run local ingest for one configured connection and source adapter | -| `ingest status [runId]` | Print status for a stored local ingest run | -| `ingest watch [runId]` | Open a stored ingest visual report | -| `ingest replay ` | Replay a stored ingest run through memory-flow output | -| `mapping` | Manage Metabase warehouse mappings (same as `ktx connection mapping`) | -| `completion zsh` | Generate zsh completion script | +| `runtime` | Install, start, stop, inspect, and prune the KTX-managed Python runtime | -## Options - -### `dev init` +## `dev init` | Flag | Description | Default | |------|-------------|---------| | `--name ` | Project name written to `ktx.yaml` | — | | `--force` | Rewrite `ktx.yaml` and scaffold files in an existing project | `false` | -### `dev runtime` +## `dev runtime` + +`ktx dev runtime` supports `install`, `start`, `stop`, `status`, and `prune`. | Flag | Description | Default | |------|-------------|---------| -| `--feature ` | Runtime feature level for `install` and `start` (`core` or `local-embeddings`) | `core` | -| `--json` | Print JSON output | `false` | +| `--feature ` | Runtime feature level for `install`, `start`, and `status` (`core` or `local-embeddings`) | `core` | +| `--json` | Print JSON output for `status` | `false` | | `--yes` | Confirm runtime install or prune actions where supported | `false` | | `--force` | Reinstall or restart where supported | `false` | - -### `dev scan` - -See [`ktx scan`](/docs/cli-reference/ktx-scan) for the full scan command reference. - -### `dev ingest run` - -| Flag | Description | Default | -|------|-------------|---------| -| `--connection-id ` | KTX connection id (required) | — | -| `--adapter ` | Ingest source adapter name (required) | — | -| `--source-dir ` | Directory containing source files | — | -| `--database-introspection-url ` | Daemon URL for live-database introspection | — | -| `--debug-llm-request-file ` | Write sanitized LLM request structure to a JSONL file | — | -| `--plain` | Print plain text output | `false` | -| `--json` | Print JSON output | `false` | -| `--viz` | Render memory-flow TUI output | `false` | -| `--no-input` | Disable interactive terminal input for visualization | — | - -### `dev ingest status` - -| Flag | Description | Default | -|------|-------------|---------| -| `--report-file ` | Bundle ingest report JSON file to render | — | -| `--plain` | Print plain text output | `false` | -| `--json` | Print JSON output | `false` | -| `--viz` | Render memory-flow TUI output | `false` | -| `--no-input` | Disable interactive terminal input for visualization | — | - -### `dev ingest watch` - -| Flag | Description | Default | -|------|-------------|---------| -| `--report-file ` | Bundle ingest report JSON file to render | — | -| `--plain` | Print plain text output | `false` | -| `--json` | Print JSON output | `false` | -| `--viz` | Render memory-flow TUI output (the default unless `--plain` or `--json` is set) | `true` | -| `--no-input` | Disable interactive terminal input for visualization | — | - -### `dev ingest replay` - -| Flag | Description | Default | -|------|-------------|---------| -| `--report-file ` | Bundle ingest report JSON file to render | — | -| `--plain` | Print plain text output | `false` | -| `--json` | Print JSON output | `false` | -| `--viz` | Render memory-flow TUI output | `false` | -| `--no-input` | Disable interactive terminal input for visualization | — | - -### `dev completion zsh` - -| Flag | Description | Default | -|------|-------------|---------| -| `--install` | Install zsh completion into `~/.zfunc` and update `~/.zshrc` | `false` | +| `--dry-run` | Preview runtime pruning without removing files | `false` | ## Examples ```bash -# Initialize a new KTX project ktx dev init - -# Initialize in a specific directory with a project name ktx dev init ./my-project --name "Analytics Context" - -# Re-initialize an existing project ktx dev init --force -# Check managed Python runtime readiness -ktx dev runtime doctor - -# Start the managed Python daemon +ktx dev runtime install --yes +ktx dev runtime status ktx dev runtime start - -# Run a low-level ingest with a specific adapter -ktx dev ingest run --connection-id my-dbt --adapter dbt - -# Run ingest from a specific source directory -ktx dev ingest run \ - --connection-id my-dbt \ - --adapter dbt \ - --source-dir ./dbt-project - -# View ingest status with the visual TUI -ktx dev ingest watch run-abc123 - -# Replay a stored ingest session -ktx dev ingest replay run-abc123 - -# View ingest status from a report file -ktx dev ingest status --report-file /tmp/ingest-report.json - -# Generate zsh completions -ktx dev completion zsh - -# Install zsh completions -ktx dev completion zsh --install +ktx dev runtime stop +ktx dev runtime prune --dry-run +ktx dev runtime prune --yes ``` -## Output - -`ktx dev` commands are diagnostic and may print plain text, JSON, or visual reports depending on the selected flags. - -| Mode | How to request it | Use case | -|------|-------------------|----------| -| Plain text | `--plain` or default diagnostic output | Human-readable terminal inspection | -| JSON | `--json` | Agent parsing and automation | -| Visual report | `--viz` | Interactive memory-flow and ingest debugging | - ## Common errors | Error | Cause | Recovery | |-------|-------|----------| -| Doctor reports missing runtime pieces | Packages, Python environment, or linked CLI are not ready | Run `pnpm install`, `pnpm run setup:dev`, and `uv sync --all-groups` | -| Ingest run cannot find adapter | `--adapter` does not match a supported source adapter | Use configured source names from `ktx.yaml` or run higher-level `ktx ingest` | -| Replay/report file cannot be read | The report path is wrong or the run id is not stored locally | Run `ktx dev ingest status --json` to discover stored run ids and report files | -| Visual output fails in CI | TUI rendering requires an interactive terminal | Use `--plain --no-input` or `--json --no-input` | +| Runtime status reports missing pieces | Packages, Python environment, or linked CLI are not ready | Run `pnpm install`, `pnpm run setup:dev`, `uv sync --all-groups`, then `ktx dev runtime status` | +| Runtime daemon does not start | The managed Python runtime is missing or stale | Run `ktx dev runtime install --yes`, then `ktx dev runtime start` | diff --git a/docs-site/content/docs/cli-reference/ktx-ingest.mdx b/docs-site/content/docs/cli-reference/ktx-ingest.mdx index 8ce9d9a5..e1c0e339 100644 --- a/docs-site/content/docs/cli-reference/ktx-ingest.mdx +++ b/docs-site/content/docs/cli-reference/ktx-ingest.mdx @@ -1,14 +1,13 @@ --- title: "ktx ingest" -description: "Build and refresh context from configured sources." +description: "Run and inspect local ingest memory-flow output." --- -Ingest context from your configured sources — dbt, Looker, Metabase, MetricFlow, LookML, or Notion. The ingest process extracts metadata from your tools, then uses an LLM agent to reconcile it with existing context, writing semantic sources and knowledge pages to your project. +`ktx ingest` runs adapter-level local ingest and renders stored ingest reports. ## Command signature ```bash -ktx ingest [connectionId] [options] ktx ingest [options] ``` @@ -16,80 +15,59 @@ ktx ingest [options] | Subcommand | Description | |-----------|-------------| -| `status [runId]` | Print status for the latest or selected public ingest run | -| `watch [runId]` | Open the latest or selected public ingest visual report | +| `run` | Run local ingest for one configured connection and source adapter | +| `status [runId]` | Print status for the latest or selected stored local ingest run or report file | +| `watch [runId]` | Open the latest or selected stored ingest visual report | +| `replay ` | Replay a stored ingest run or bundle report through memory-flow output | -## Options - -### `ingest` (run) +## `ingest run` | Flag | Description | Default | |------|-------------|---------| -| `--all` | Ingest every eligible configured source | `false` | +| `--connection-id ` | KTX connection id | Required | +| `--adapter ` | Ingest source adapter name | Required | +| `--source-dir ` | Directory containing source files | — | +| `--database-introspection-url ` | Daemon URL for live-database introspection | — | +| `--debug-llm-request-file ` | Write sanitized LLM request structure to a JSONL file | — | +| `--plain` | Print plain text output | `true` | | `--json` | Print JSON output | `false` | -| `--no-input` | Disable interactive terminal input | — | +| `--viz` | Render memory-flow TUI output | `false` | +| `--yes` | Install the managed Python runtime without prompting when required | `false` | +| `--no-input` | Disable interactive terminal input for visualization and runtime installation | — | -### `ingest status` +## `ingest status`, `watch`, and `replay` | Flag | Description | Default | |------|-------------|---------| +| `--report-file ` | Bundle ingest report JSON file to render | — | +| `--plain` | Print plain text output | `true` for `status` and `replay` | | `--json` | Print JSON output | `false` | -| `--no-input` | Disable interactive terminal input | — | - -### `ingest watch` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output instead of the visual report | `false` | -| `--no-input` | Disable interactive terminal input | — | +| `--viz` | Render memory-flow TUI output | `true` for `watch` | +| `--no-input` | Disable interactive terminal input for visualization | — | ## Examples ```bash -# Ingest from a specific connection -ktx ingest my-dbt-source +ktx ingest run --connection-id my-dbt-source --adapter dbt +ktx ingest run --connection-id prod-metabase --adapter metabase --yes -# Ingest from all eligible sources -ktx ingest --all - -# Check the status of the latest ingest ktx ingest status - -# Check the status of a specific ingest run ktx ingest status run-abc123 - -# Watch the latest ingest report -ktx ingest watch - -# Get ingest status as JSON ktx ingest status --json -``` -## Low-level ingest commands +ktx ingest watch +ktx ingest watch run-abc123 -For adapter-level control, use `ktx dev ingest`. See [`ktx dev`](/docs/cli-reference/ktx-dev) for the full low-level ingest surface including `run`, `status`, `watch`, and `replay` with output mode options (`--plain`, `--json`, `--viz`). - -## Output - -Ingest run commands print progress and create a stored ingest report. `ktx ingest status --json` returns the run state, adapter, connection, and summary information. - -```json -{ - "runId": "ingest-local-abc123", - "status": "completed", - "connectionId": "dbt-main", - "summary": { - "semanticSourcesChanged": 4, - "knowledgePagesChanged": 2 - } -} +ktx ingest replay run-abc123 +ktx ingest replay run-abc123 --viz +ktx ingest replay run-abc123 --report-file /tmp/ingest-report.json ``` ## Common errors | Error | Cause | Recovery | |-------|-------|----------| -| No eligible sources | `ktx.yaml` has no configured context source for ingest | Add a source with `ktx setup` or `ktx connection add`, then rerun ingest | | Ingest needs credentials | The source adapter requires API or git access | Configure the referenced environment variable or secret file | -| Latest run not found | No ingest run has been started in this project | Run `ktx ingest ` or `ktx ingest --all` first | +| Ingest run cannot find adapter | `--adapter` does not match a supported source adapter | Use a configured adapter such as `dbt`, `metabase`, `looker`, `lookml`, `notion`, or `live-database` | +| Latest run not found | No ingest run has been started in this project | Run `ktx ingest run --connection-id --adapter ` first | | Report watch fails in a non-interactive shell | Visual report needs a terminal | Use `ktx ingest status --json` for agent and CI workflows | diff --git a/docs-site/content/docs/cli-reference/ktx-scan.mdx b/docs-site/content/docs/cli-reference/ktx-scan.mdx index 0c37eccb..2f73ed99 100644 --- a/docs-site/content/docs/cli-reference/ktx-scan.mdx +++ b/docs-site/content/docs/cli-reference/ktx-scan.mdx @@ -1,163 +1,39 @@ --- title: "ktx scan" -description: "Run or inspect database scans." +description: "Run standalone database scans." --- -Discover your database schema — tables, columns, types, constraints, and relationships. Scanning is the first step in building context: KTX needs to understand your warehouse structure before it can build semantic sources. - -Scan commands live under `ktx dev scan`. See also the [Building Context](/docs/guides/building-context) guide for a walkthrough. +Discover a configured database connection's schema, including tables, columns, types, constraints, and optional relationship signals. ## Command signature ```bash -ktx dev scan [options] -ktx dev scan [options] +ktx scan [options] ``` -## Subcommands - -| Subcommand | Description | -|-----------|-------------| -| `status ` | Print status for a local scan run | -| `report ` | Print a local scan report | -| `relationships ` | Print relationship artifacts for a local scan run | -| `relationship-apply ` | Apply accepted relationship review decisions as manual manifest joins | -| `relationship-feedback` | Export persisted relationship review decisions as calibration labels | -| `relationship-calibration` | Summarize relationship feedback labels against current score thresholds | -| `relationship-thresholds` | Evaluate relationship feedback labels for offline threshold advice | - ## Options -### `scan` (run) - | Flag | Description | Default | |------|-------------|---------| | `--mode ` | Scan mode: `structural`, `enriched`, or `relationships` | `structural` | | `--dry-run` | Run without writing scan results | `false` | | `--database-introspection-url ` | Daemon URL for live-database introspection | — | - -### `scan report` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print the raw scan report JSON | `false` | - -### `scan relationships` - -| Flag | Description | Default | -|------|-------------|---------| -| `--status ` | Filter by status: `accepted`, `review`, `rejected`, `skipped`, or `all` | `review` | -| `--limit ` | Maximum relationships to print per status | `25` | -| `--accept ` | Record an accepted decision for a relationship candidate | — | -| `--reject ` | Record a rejected decision for a relationship candidate | — | -| `--note ` | Attach a note when recording a relationship review decision | — | -| `--reviewer ` | Reviewer name for a relationship review decision | — | -| `--json` | Print relationship artifacts as JSON | `false` | - -### `scan relationship-apply` - -| Flag | Description | Default | -|------|-------------|---------| -| `--all-accepted` | Apply all accepted relationship review decisions for the scan run | `false` | -| `--candidate ` | Apply one accepted relationship review decision; repeatable | — | -| `--dry-run` | Preview relationships that would be written without rewriting manifest shards | `false` | -| `--json` | Print the apply result as JSON | `false` | - -### `scan relationship-feedback` - -| Flag | Description | Default | -|------|-------------|---------| -| `--connection ` | Only export labels for one KTX connection | — | -| `--decision ` | Filter: `accepted`, `rejected`, or `all` | `all` | -| `--json` | Print the export as JSON | `false` | -| `--jsonl` | Print labels as newline-delimited JSON | `false` | - -### `scan relationship-calibration` - -| Flag | Description | Default | -|------|-------------|---------| -| `--connection ` | Only calibrate labels for one KTX connection | — | -| `--decision ` | Filter: `accepted`, `rejected`, or `all` | `all` | -| `--accept-threshold ` | Score threshold treated as predicted accepted (0–1) | `0.85` | -| `--review-threshold ` | Score threshold treated as predicted review (0–1) | `0.55` | -| `--json` | Print the calibration report as JSON | `false` | - -### `scan relationship-thresholds` - -| Flag | Description | Default | -|------|-------------|---------| -| `--connection ` | Only evaluate labels for one KTX connection | — | -| `--min-total-labels ` | Minimum scored labels before advice can be ready | `20` | -| `--min-accepted-labels ` | Minimum accepted labels before advice can be ready | `5` | -| `--min-rejected-labels ` | Minimum rejected labels before advice can be ready | `5` | -| `--json` | Print the threshold advice report as JSON | `false` | +| `--yes` | Install the managed Python runtime without prompting when required | `false` | +| `--no-input` | Disable interactive managed runtime installation | — | ## Examples ```bash -# Run a structural scan of a connection -ktx dev scan my-warehouse - -# Run a scan with LLM enrichment -ktx dev scan my-warehouse --mode enriched - -# Run a scan with relationship detection -ktx dev scan my-warehouse --mode relationships - -# Dry-run a scan (don't write results) -ktx dev scan my-warehouse --dry-run - -# Check the status of a scan run -ktx dev scan status run-abc123 - -# View the scan report -ktx dev scan report run-abc123 - -# View scan report as JSON -ktx dev scan report run-abc123 --json - -# List relationship candidates pending review -ktx dev scan relationships run-abc123 - -# List all relationships regardless of status -ktx dev scan relationships run-abc123 --status all - -# Accept a relationship candidate -ktx dev scan relationships run-abc123 --accept candidate-xyz - -# Reject a relationship candidate with a note -ktx dev scan relationships run-abc123 --reject candidate-xyz --note "false positive" - -# Apply all accepted relationships to the manifest -ktx dev scan relationship-apply run-abc123 --all-accepted - -# Preview what would be applied -ktx dev scan relationship-apply run-abc123 --all-accepted --dry-run - -# Export relationship feedback as calibration labels -ktx dev scan relationship-feedback --json - -# Calibrate relationship detection thresholds -ktx dev scan relationship-calibration --accept-threshold 0.9 --review-threshold 0.6 - -# Get threshold advice based on review decisions -ktx dev scan relationship-thresholds +ktx scan my-warehouse +ktx scan my-warehouse --mode enriched +ktx scan my-warehouse --mode relationships +ktx scan my-warehouse --dry-run +ktx scan my-warehouse --database-introspection-url http://127.0.0.1:8765 ``` ## Output -Scan commands write scan artifacts under the KTX project directory and print status or report summaries. Use `--json` on report and relationship commands when an agent needs structured output. - -```json -{ - "runId": "scan-local-abc123", - "status": "completed", - "mode": "structural", - "changes": { - "tablesAdded": 42 - } -} -``` +`ktx scan` prints a human summary and writes scan artifacts under the KTX project directory unless `--dry-run` is set. Use `ktx status` after a scan to inspect project readiness and next setup work. ## Common errors @@ -165,5 +41,4 @@ Scan commands write scan artifacts under the KTX project directory and print sta |-------|-------|----------| | Scan cannot connect | Connection credentials or network access are invalid | Run `ktx connection test ` and update the connection before scanning | | Enriched scan cannot describe columns | LLM credentials are missing or invalid | Complete LLM setup with `ktx setup` before enriched scans | -| Relationship apply writes nothing | No accepted candidates match the provided run id or candidate ids | Inspect `ktx dev scan relationships --status accepted` first | -| Calibration is not ready | Too few reviewed relationship labels exist | Review and accept/reject more candidates, then rerun calibration | +| Relationship scan has limited evidence | The connector cannot provide optional validation or statistics | Re-run with a connector that supports the missing capability, or treat relationship output as lower-confidence context | diff --git a/docs-site/content/docs/concepts/context-as-code.mdx b/docs-site/content/docs/concepts/context-as-code.mdx index e40665ec..3c43082e 100644 --- a/docs-site/content/docs/concepts/context-as-code.mdx +++ b/docs-site/content/docs/concepts/context-as-code.mdx @@ -59,7 +59,7 @@ dbt / Looker / Metabase / Notion A typical branch shows a semantic diff: "this ingest added 3 new sources from dbt, updated 2 join definitions based on schema changes, and created 1 knowledge page from a Notion doc." Analytics engineers review the diff, verify that the new sources look correct, and merge. -Teams usually run this on demand while setting up a source, then schedule it once the source is stable. A cron job or CI schedule can run `ktx ingest --all --no-input` overnight on an ingest branch so the latest dbt manifests, BI metadata, and documentation updates are ready for review each morning. +Teams usually run this on demand while setting up a source, then schedule it once the source is stable. A cron job or CI schedule can run `ktx ingest run --connection-id --adapter --no-input` overnight on an ingest branch so the latest dbt manifests, BI metadata, and documentation updates are ready for review each morning. Once merged, agents querying through the KTX CLI see the updated context immediately. No deployment step, no cache invalidation, no restart. The files are the source of truth, and agents read them on every request. diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index ece3ceac..13b973e3 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -239,7 +239,7 @@ Agent integration ready: yes (claude-code:project) | `ktx: command not found` | The KTX package is not installed globally, or the shell cannot find the global binary | Run `npm install -g @kaelio/ktx` and open a new shell | | LLM health check fails | Missing, invalid, or unauthorized Anthropic API key | Export `ANTHROPIC_API_KEY` or rerun `ktx setup` and choose the file-backed secret option | | OpenAI embedding check fails | `OPENAI_API_KEY` is missing when OpenAI embeddings are selected | Export `OPENAI_API_KEY`, or rerun setup and choose local sentence-transformers embeddings | -| Local embeddings hang or fail | The managed Python runtime cannot start or the local model runtime is unavailable | Install `uv`, run `ktx dev runtime doctor`, then run `ktx dev runtime install --feature local-embeddings --yes` and rerun setup | +| Local embeddings hang or fail | The managed Python runtime cannot start or the local model runtime is unavailable | Install `uv`, run `ktx dev runtime status`, then run `ktx dev runtime install --feature local-embeddings --yes` and rerun setup | | Database connection test fails | Credentials, network access, warehouse, database, or schema value is wrong | Test the same URL with the database's native client, then rerun `ktx connection add ... --force` or rerun setup | | `KTX context built: no` in `ktx status` | Setup saved configuration but did not build context | Run `ktx setup` and choose to build context now | | Agent integration is incomplete | Setup skipped the agents step or the target was not installed | Run `ktx setup --agents --target codex --project` using the target you need | diff --git a/docs-site/content/docs/guides/building-context.mdx b/docs-site/content/docs/guides/building-context.mdx index 31d55bac..25d873d9 100644 --- a/docs-site/content/docs/guides/building-context.mdx +++ b/docs-site/content/docs/guides/building-context.mdx @@ -12,7 +12,7 @@ Scanning connects to your database and extracts structural metadata. KTX stores ### Running a scan ```bash -ktx dev scan +ktx scan ``` This runs a structural scan by default. You can control what the scan does with the `--mode` flag: @@ -25,25 +25,18 @@ This runs a structural scan by default. You can control what the scan does with ```bash # Scan with relationship detection -ktx dev scan my-postgres --mode relationships +ktx scan my-postgres --mode relationships # Preview without writing results -ktx dev scan my-postgres --dry-run +ktx scan my-postgres --dry-run ``` -### Checking scan status +### Checking scan results -Every scan produces a run ID. Use it to check progress or review results: +Every scan prints a summary and writes local artifacts. Use `ktx status` after a scan to review project readiness and follow-up setup work: ```bash -# Check status of a scan run -ktx dev scan status - -# Print the full scan report -ktx dev scan report - -# Get the report as JSON for scripting -ktx dev scan report --json +ktx status ``` ### Relationship detection @@ -56,49 +49,7 @@ Many databases lack declared foreign keys. KTX infers relationships by scoring c | 0.55 – 0.84 | `review` | Plausible — needs human review | | < 0.55 | `rejected` | Low confidence — not applied | -After a relationship scan, review the candidates: - -```bash -# Show candidates pending review (default) -ktx dev scan relationships - -# Show all candidates regardless of status -ktx dev scan relationships --status all - -# Accept a specific candidate -ktx dev scan relationships --accept - -# Reject a candidate with a note -ktx dev scan relationships --reject --note "These columns share a name but are unrelated" -``` - -Once you've reviewed candidates, apply the accepted ones as joins in your semantic layer: - -```bash -# Apply all accepted relationships -ktx dev scan relationship-apply --all-accepted - -# Preview what would be applied -ktx dev scan relationship-apply --all-accepted --dry-run - -# Apply a specific candidate -ktx dev scan relationship-apply --candidate -``` - -### Calibrating thresholds - -As you review more relationships, KTX can evaluate whether the default thresholds (0.85 accept, 0.55 review) are optimal for your schema: - -```bash -# See how your feedback aligns with current thresholds -ktx dev scan relationship-calibration --connection my-postgres - -# Get threshold recommendations (needs 20+ labels, 5+ accepted, 5+ rejected) -ktx dev scan relationship-thresholds --connection my-postgres - -# Export your review decisions as calibration labels -ktx dev scan relationship-feedback --connection my-postgres -``` +Relationship scans run with `ktx scan --mode relationships`. This command only executes the scan; relationship review and calibration subcommands are not part of the current CLI surface. ## Ingestion @@ -115,19 +66,7 @@ Each ingest run follows this flow: ### Running an ingest ```bash -# Ingest one configured context source -ktx ingest my-dbt-source - -# Ingest every configured context source -ktx ingest --all -``` - -The public `ktx ingest` command uses the source configuration in `ktx.yaml`, including the source `driver` and any adapter-specific paths or credentials. - -For adapter-level debugging, use the low-level `ktx dev ingest run` command: - -```bash -ktx dev ingest run --connection-id my-dbt-source --adapter dbt +ktx ingest run --connection-id my-dbt-source --adapter dbt ``` Useful low-level flags: @@ -152,7 +91,7 @@ ktx ingest status ktx ingest watch # Replay a past ingest run -ktx dev ingest replay +ktx ingest replay ``` The `watch` command opens an interactive TUI that shows the memory-flow output — every tool call, LLM decision, and artifact written during the ingest. @@ -235,7 +174,7 @@ Orders in "pending" status for more than 48 hours are flagged for review. Every ingest session records a full transcript — tool calls, LLM responses, and write decisions. You can replay any session to debug why a source was written a certain way: ```bash -ktx dev ingest replay --viz +ktx ingest replay --viz ``` This opens the same TUI view as the original run, letting you step through the agent's reasoning. diff --git a/docs-site/content/docs/integrations/context-sources.mdx b/docs-site/content/docs/integrations/context-sources.mdx index 02554e08..904e3f95 100644 --- a/docs-site/content/docs/integrations/context-sources.mdx +++ b/docs-site/content/docs/integrations/context-sources.mdx @@ -13,7 +13,7 @@ Agents should configure and ingest context sources in this order: 1. Add the context source connection in `ktx.yaml` or with `ktx setup`. 2. Store tokens as `env:NAME` or `file:/path/to/secret`. -3. Run `ktx ingest ` for one source or `ktx ingest --all`. +3. Run `ktx ingest run --connection-id --adapter ` for one source or `ktx ingest run --connection-id --adapter `. 4. Check progress with `ktx ingest status --json`. 5. Review generated `semantic-layer/` YAML and `knowledge/` Markdown files in git. 6. Validate changed semantic sources with `ktx sl validate`. diff --git a/examples/package-artifacts/README.md b/examples/package-artifacts/README.md index be161d88..8c92f84e 100644 --- a/examples/package-artifacts/README.md +++ b/examples/package-artifacts/README.md @@ -13,7 +13,7 @@ generated local project. The managed Python runtime smoke requires `uv` on `PATH`, isolates `KTX_RUNTIME_ROOT`, verifies `ktx dev runtime status`, runs `ktx sl query --yes` to -install the core runtime from the bundled wheel, checks `ktx dev runtime doctor`, +install the core runtime from the bundled wheel, checks `ktx dev runtime status`, starts and reuses the managed daemon, stops it, previews a stale runtime with `ktx dev runtime prune --dry-run`, verifies confirmation is required, and removes the stale runtime with `ktx dev runtime prune --yes`. diff --git a/examples/postgres-historic/README.md b/examples/postgres-historic/README.md index c8c85cdc..40ae1674 100644 --- a/examples/postgres-historic/README.md +++ b/examples/postgres-historic/README.md @@ -95,7 +95,7 @@ note, not a warning. Run local historic-SQL ingest: ```bash -pnpm run ktx -- dev ingest run --project-dir /tmp/ktx-postgres-historic \ +pnpm run ktx -- ingest run --project-dir /tmp/ktx-postgres-historic \ --connection-id warehouse \ --adapter historic-sql \ --plain \ @@ -103,7 +103,7 @@ pnpm run ktx -- dev ingest run --project-dir /tmp/ktx-postgres-historic \ --no-input ``` -The full `dev ingest run` path also runs curation WorkUnits, so it requires a +The full `ingest run` path also runs curation WorkUnits, so it requires a configured LLM provider. Inspect the latest manifest: @@ -127,6 +127,6 @@ table. - Missing grants: confirm `GRANT pg_read_all_stats TO ktx_reader;`. - Empty snapshot: rerun `scripts/generate-workload.sh base` and keep `--historic-sql-min-executions 2` for the smoke. -- SQL-analysis failures: run `pnpm run ktx -- dev runtime doctor` from the KTX +- SQL-analysis failures: run `pnpm run ktx -- dev runtime status` from the KTX repository root and confirm `uv`, the bundled Python wheel, and the managed runtime all pass. diff --git a/packages/cli/src/agent-search-readiness.test.ts b/packages/cli/src/agent-search-readiness.test.ts index cfb2999e..432afa90 100644 --- a/packages/cli/src/agent-search-readiness.test.ts +++ b/packages/cli/src/agent-search-readiness.test.ts @@ -15,7 +15,7 @@ describe('agent semantic-layer search readiness guidance', () => { nextSteps: [ 'ktx setup --project-dir /tmp/ktx-search', 'ktx status --project-dir /tmp/ktx-search', - 'ktx ingest ', + 'ktx ingest run --connection-id --adapter ', 'ktx agent sl list --json --query "gross revenue" --project-dir /tmp/ktx-search', ], }); diff --git a/packages/cli/src/agent-search-readiness.ts b/packages/cli/src/agent-search-readiness.ts index c3927613..e4de7318 100644 --- a/packages/cli/src/agent-search-readiness.ts +++ b/packages/cli/src/agent-search-readiness.ts @@ -23,7 +23,7 @@ function baseNextSteps(projectDir: string, query: string | undefined): string[] return [ `ktx setup --project-dir ${projectDir}`, `ktx status --project-dir ${projectDir}`, - 'ktx ingest ', + 'ktx ingest run --connection-id --adapter ', projectSearchCommand(projectDir, query), ]; } diff --git a/packages/cli/src/agent.test.ts b/packages/cli/src/agent.test.ts index 2c86598d..566f5763 100644 --- a/packages/cli/src/agent.test.ts +++ b/packages/cli/src/agent.test.ts @@ -328,7 +328,7 @@ describe('runKtxAgent', () => { nextSteps: [ `ktx setup --project-dir ${tempDir}`, `ktx status --project-dir ${tempDir}`, - 'ktx ingest ', + 'ktx ingest run --connection-id --adapter ', `ktx agent sl list --json --query "gross revenue" --project-dir ${tempDir}`, ], }, @@ -355,7 +355,7 @@ describe('runKtxAgent', () => { nextSteps: [ `ktx setup --project-dir ${tempDir}`, `ktx status --project-dir ${tempDir}`, - 'ktx ingest ', + 'ktx ingest run --connection-id --adapter ', `ktx agent sl list --json --query "revenue" --project-dir ${tempDir}`, ], }, diff --git a/packages/cli/src/cli-program.ts b/packages/cli/src/cli-program.ts index e2091bef..682c027a 100644 --- a/packages/cli/src/cli-program.ts +++ b/packages/cli/src/cli-program.ts @@ -2,8 +2,9 @@ import { Command, InvalidArgumentError } from '@commander-js/extra-typings'; import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js'; import { registerAgentCommands } from './commands/agent-commands.js'; import { registerConnectionCommands } from './commands/connection-commands.js'; +import { registerIngestCommands } from './commands/ingest-commands.js'; import { registerWikiCommands } from './commands/knowledge-commands.js'; -import { registerPublicIngestCommands } from './commands/public-ingest-commands.js'; +import { registerScanCommands } from './commands/scan-commands.js'; import { registerSetupCommands } from './commands/setup-commands.js'; import { registerSlCommands } from './commands/sl-commands.js'; import { registerStatusCommands } from './commands/status-commands.js'; @@ -53,7 +54,7 @@ type CommandPathNode = CommandWithGlobalOptions & { parent?: CommandPathNode | null; }; -const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status']); +const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'scan']); export interface CommandWithGlobalOptions { opts: () => object; @@ -151,7 +152,7 @@ function isProjectAwareCommand(path: string[]): boolean { const rootCommand = path[1]; if (rootCommand === 'dev') { - return path[2] !== undefined && path[2] !== 'completion' && path[2] !== 'runtime'; + return path[2] !== undefined && path[2] !== 'runtime'; } return rootCommand !== undefined && PROJECT_AWARE_ROOT_COMMANDS.has(rootCommand); } @@ -176,9 +177,6 @@ function shouldSuppressProjectDirLine(path: string[], options: Record + await (ingestDeps.ingest ?? defaultRunIngest)(ingestArgs, ingestIo), + }); + registerScanCommands(program, context); registerWikiCommands(program, context); registerSlCommands(program, context); registerStatusCommands(program, context); diff --git a/packages/cli/src/cli-runtime.ts b/packages/cli/src/cli-runtime.ts index 8b143373..8fc06589 100644 --- a/packages/cli/src/cli-runtime.ts +++ b/packages/cli/src/cli-runtime.ts @@ -7,7 +7,6 @@ import type { KtxConnectionArgs } from './connection.js'; import type { KtxDoctorArgs } from './doctor.js'; import type { KtxIngestArgs } from './ingest.js'; import type { KtxKnowledgeArgs } from './knowledge.js'; -import type { KtxPublicIngestArgs } from './public-ingest.js'; import type { KtxRuntimeArgs } from './runtime.js'; import type { KtxScanArgs } from './scan.js'; import type { KtxSetupArgs } from './setup.js'; @@ -37,7 +36,6 @@ export interface KtxCliDeps { connectionMetabaseSetup?: (args: KtxConnectionMetabaseSetupArgs, io: KtxCliIo) => Promise; doctor?: (args: KtxDoctorArgs, io: KtxCliIo) => Promise; ingest?: (args: KtxIngestArgs, io: KtxCliIo) => Promise; - publicIngest?: (args: KtxPublicIngestArgs, io: KtxCliIo) => Promise; runtime?: (args: KtxRuntimeArgs, io: KtxCliIo) => Promise; scan?: (args: KtxScanArgs, io: KtxCliIo) => Promise; knowledge?: (args: KtxKnowledgeArgs, io: KtxCliIo) => Promise; diff --git a/packages/cli/src/command-schemas.ts b/packages/cli/src/command-schemas.ts index 1a442af7..9ffe6de3 100644 --- a/packages/cli/src/command-schemas.ts +++ b/packages/cli/src/command-schemas.ts @@ -68,20 +68,3 @@ export const slQueryCommandSchema = z.object({ runtimeInstallPolicy: z.enum(['prompt', 'auto', 'never']), maxRows: z.number().int().positive().optional(), }); - -export const publicIngestRunCommandSchema = z.object({ - command: z.literal('run'), - projectDir: projectDirSchema, - targetConnectionId: safeConnectionIdSchema.optional(), - all: z.boolean(), - json: z.boolean(), - inputMode: z.enum(['auto', 'disabled']), -}); - -export const publicIngestReadCommandSchema = z.object({ - command: z.enum(['status', 'watch']), - projectDir: projectDirSchema, - runId: z.string().min(1).optional(), - json: z.boolean(), - inputMode: z.enum(['auto', 'disabled']), -}); diff --git a/packages/cli/src/commands/completion-commands.ts b/packages/cli/src/commands/completion-commands.ts deleted file mode 100644 index 23c45429..00000000 --- a/packages/cli/src/commands/completion-commands.ts +++ /dev/null @@ -1,47 +0,0 @@ -import type { CommandUnknownOpts } from '@commander-js/extra-typings'; -import type { KtxCliCommandContext } from '../cli-program.js'; -import { completeCommanderInput, installZshCompletion, zshCompletionScript } from '../completion.js'; - -export function registerCompletionCommands( - program: CommandUnknownOpts, - context: KtxCliCommandContext, - completionRoot: CommandUnknownOpts = program, -): void { - program - .command('completion') - .description('Generate shell completion scripts') - .command('zsh') - .description('Generate zsh completion script') - .option('--install', 'Install zsh completion into ~/.zfunc and update ~/.zshrc', false) - .action(async (options: { install?: boolean }) => { - if (options.install === true) { - const result = await installZshCompletion(); - context.io.stdout.write(`Installed zsh completion: ${result.completionPath}\n`); - context.io.stdout.write(`Updated zsh config: ${result.zshrcPath}\n`); - context.io.stdout.write('Restart your shell or run: source ~/.zshrc\n'); - context.setExitCode(0); - return; - } - context.io.stdout.write(zshCompletionScript()); - context.setExitCode(0); - }); - - program - .command('__complete', { hidden: true }) - .description('Internal shell completion endpoint') - .requiredOption('--shell ', 'Shell requesting completions') - .requiredOption('--position ', 'Current shell word position', (value) => Number(value)) - .argument('[words...]', 'Current shell words') - .allowUnknownOption() - .allowExcessArguments() - .action((words: string[], options: { shell: string; position: number }) => { - if (options.shell !== 'zsh') { - context.setExitCode(1); - return; - } - for (const completion of completeCommanderInput(completionRoot, { position: options.position, words })) { - context.io.stdout.write(`${completion}\n`); - } - context.setExitCode(0); - }); -} diff --git a/packages/cli/src/commands/connection-metabase-commands.ts b/packages/cli/src/commands/connection-metabase-commands.ts index 1a07be3a..c20b8e86 100644 --- a/packages/cli/src/commands/connection-metabase-commands.ts +++ b/packages/cli/src/commands/connection-metabase-commands.ts @@ -88,7 +88,7 @@ export function registerConnectionMetabaseCommands(connection: Command, context: ' ktx connection mapping refresh --auto-accept\n' + ' ktx connection mapping set databaseMappings =\n' + ' ktx connection mapping set-sync-enabled --enabled true\n' + - ' ktx ingest \n', + ' ktx ingest run --connection-id --adapter metabase\n', ) .option( '--map ', diff --git a/packages/cli/src/commands/connection-metabase-setup.test.ts b/packages/cli/src/commands/connection-metabase-setup.test.ts index cf7308d7..9d462bbd 100644 --- a/packages/cli/src/commands/connection-metabase-setup.test.ts +++ b/packages/cli/src/commands/connection-metabase-setup.test.ts @@ -230,7 +230,7 @@ describe('runKtxConnectionMetabaseSetup', () => { expect(io.stdout()).toContain('Connection: metabase'); expect(io.stdout()).toContain('Discovered 1 database'); - expect(io.stdout()).toContain(`ktx ingest metabase --project-dir ${projectDir}`); + expect(io.stdout()).toContain(`ktx ingest run --connection-id metabase --adapter metabase --project-dir ${projectDir}`); expect(io.stdout()).not.toContain('mb_example'); expect(io.stderr()).not.toContain('mb_example'); @@ -784,7 +784,7 @@ describe('runKtxConnectionMetabaseSetup', () => { const config = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); expect(config).toContain('driver: metabase'); - expect(io.stderr()).toContain(`ktx ingest metabase --project-dir ${projectDir}`); + expect(io.stderr()).toContain(`ktx ingest run --connection-id metabase --adapter metabase --project-dir ${projectDir}`); const updatedProject = await loadKtxProject({ projectDir }); const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(updatedProject) }); diff --git a/packages/cli/src/commands/connection-metabase-setup.ts b/packages/cli/src/commands/connection-metabase-setup.ts index 9b5e21d7..2321ea3d 100644 --- a/packages/cli/src/commands/connection-metabase-setup.ts +++ b/packages/cli/src/commands/connection-metabase-setup.ts @@ -743,7 +743,9 @@ export async function runKtxConnectionMetabaseSetup( io.stdout.write(`Connection: ${connectionId}\n`); io.stdout.write(`Discovered ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}\n`); - io.stdout.write(`Next: ktx ingest ${connectionId} --project-dir ${args.projectDir}\n`); + io.stdout.write( + `Next: ktx ingest run --connection-id ${connectionId} --adapter metabase --project-dir ${args.projectDir}\n`, + ); if (args.runIngest) { const ingestRunner = deps.runPublicIngest ?? runKtxPublicIngest; @@ -759,7 +761,9 @@ export async function runKtxConnectionMetabaseSetup( io, ); if (exitCode !== 0) { - io.stderr.write(`Ingest failed; re-run: ktx ingest ${connectionId} --project-dir ${args.projectDir}\n`); + io.stderr.write( + `Ingest failed; re-run: ktx ingest run --connection-id ${connectionId} --adapter metabase --project-dir ${args.projectDir}\n`, + ); return 1; } } diff --git a/packages/cli/src/commands/doctor-commands.ts b/packages/cli/src/commands/doctor-commands.ts deleted file mode 100644 index a7127e48..00000000 --- a/packages/cli/src/commands/doctor-commands.ts +++ /dev/null @@ -1,53 +0,0 @@ -import type { Command } from '@commander-js/extra-typings'; -import { type CommandWithGlobalOptions, type KtxCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; -import type { KtxDoctorArgs } from '../doctor.js'; -import { profileMark } from '../startup-profile.js'; - -profileMark('module:commands/doctor-commands'); - -function outputMode(options: { json?: boolean }): 'plain' | 'json' { - return options.json === true ? 'json' : 'plain'; -} - -function inputMode(options: { input?: boolean }): { inputMode?: 'disabled' } { - return options.input === false ? { inputMode: 'disabled' } : {}; -} - -async function runDoctorArgs(context: KtxCliCommandContext, args: KtxDoctorArgs): Promise { - const runner = context.deps.doctor ?? (await import('../doctor.js')).runKtxDoctor; - context.setExitCode(await runner(args, context.io)); -} - -export function registerDoctorCommands(program: Command, context: KtxCliCommandContext): void { - const doctor = program - .command('doctor') - .description('Check KTX setup and project readiness') - .option('--json', 'Print JSON output', false) - .option('--no-input', 'Disable interactive terminal input') - .action(async (options: { json?: boolean; input?: boolean }, command) => { - await runDoctorArgs(context, { - command: 'project', - projectDir: resolveCommandProjectDir(command), - outputMode: outputMode(options), - ...inputMode(options), - }); - }); - - doctor - .command('setup') - .description('Check KTX install, build, and local runtime readiness') - .option('--json', 'Print JSON output', false) - .option('--no-input', 'Disable interactive terminal input') - .action( - async ( - _options: { json?: boolean; input?: boolean }, - command: CommandWithGlobalOptions, - ) => { - const options = (command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as { - json?: boolean; - input?: boolean; - }; - await runDoctorArgs(context, { command: 'setup', outputMode: outputMode(options), ...inputMode(options) }); - }, - ); -} diff --git a/packages/cli/src/commands/public-ingest-commands.ts b/packages/cli/src/commands/public-ingest-commands.ts deleted file mode 100644 index dfe63c42..00000000 --- a/packages/cli/src/commands/public-ingest-commands.ts +++ /dev/null @@ -1,109 +0,0 @@ -import { InvalidArgumentError, type Command } from '@commander-js/extra-typings'; -import { type KtxCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; -import { publicIngestReadCommandSchema, publicIngestRunCommandSchema } from '../command-schemas.js'; -import type { KtxPublicIngestArgs, KtxPublicIngestInputMode } from '../public-ingest.js'; -import { profileMark } from '../startup-profile.js'; - -profileMark('module:commands/public-ingest-commands'); - -interface PublicIngestOptions { - all?: boolean; - json?: boolean; - input?: boolean; -} - -function inputMode(options: { input?: boolean }): KtxPublicIngestInputMode { - return options.input === false ? 'disabled' : 'auto'; -} - -async function runPublicIngestArgs(context: KtxCliCommandContext, args: KtxPublicIngestArgs): Promise { - const runner = context.deps.publicIngest ?? (await import('../public-ingest.js')).runKtxPublicIngest; - context.setExitCode(await runner(args, context.io)); -} - -function parsePublicIngestConnectionId(value: string): string { - if (value === 'run') { - throw new InvalidArgumentError('run is reserved; use ktx dev ingest run for low-level adapter syntax'); - } - return value; -} - -export function registerPublicIngestCommands(program: Command, context: KtxCliCommandContext): void { - const ingest = program - .command('ingest') - .description('Build and refresh KTX context from configured sources') - .usage('[options] [connectionId]') - .argument('[connectionId]', 'Connection id to ingest', parsePublicIngestConnectionId) - .option('--all', 'Ingest every eligible configured source', false) - .option('--json', 'Print JSON output', false) - .option('--no-input', 'Disable interactive terminal input') - .addHelpText( - 'after', - [ - '', - 'Examples:', - ' ktx ingest [options]', - ' ktx ingest --all [options]', - ' ktx ingest status [runId] [options]', - ' ktx ingest watch [runId] [options]', - '', - 'Project directory defaults to KTX_PROJECT_DIR when set, otherwise the current working directory.', - '', - ].join('\n'), - ) - .showHelpAfterError() - .hook('preAction', (_thisCommand, actionCommand) => { - context.writeDebug?.('ingest', actionCommand); - }) - .action(async (connectionId: string | undefined, _options: PublicIngestOptions, command) => { - const options = command.opts(); - if (options.all === true && connectionId) { - throw new Error('ktx ingest accepts either --all or , not both'); - } - const args = publicIngestRunCommandSchema.parse({ - command: 'run', - projectDir: resolveCommandProjectDir(command), - ...(connectionId ? { targetConnectionId: connectionId } : {}), - all: options.all === true, - json: options.json === true, - inputMode: inputMode(options), - }); - await runPublicIngestArgs(context, args); - }); - - ingest - .command('status') - .description('Print status for the latest or selected public ingest run') - .argument('[runId]', 'Public ingest run id') - .option('--json', 'Print JSON output', false) - .option('--no-input', 'Disable interactive terminal input') - .action(async (runId: string | undefined, _options: PublicIngestOptions, command) => { - const options = (command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as PublicIngestOptions; - const args = publicIngestReadCommandSchema.parse({ - command: 'status', - projectDir: resolveCommandProjectDir(command), - ...(runId ? { runId } : {}), - json: options.json === true, - inputMode: inputMode(options), - }); - await runPublicIngestArgs(context, args); - }); - - ingest - .command('watch') - .description('Open the latest or selected public ingest visual report') - .argument('[runId]', 'Public ingest run id') - .option('--json', 'Print JSON output instead of the visual report', false) - .option('--no-input', 'Disable interactive terminal input') - .action(async (runId: string | undefined, _options: PublicIngestOptions, command) => { - const options = (command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as PublicIngestOptions; - const args = publicIngestReadCommandSchema.parse({ - command: 'watch', - projectDir: resolveCommandProjectDir(command), - ...(runId ? { runId } : {}), - json: options.json === true, - inputMode: inputMode(options), - }); - await runPublicIngestArgs(context, args); - }); -} diff --git a/packages/cli/src/commands/runtime-commands.ts b/packages/cli/src/commands/runtime-commands.ts index 3ce7d9ba..b57eae86 100644 --- a/packages/cli/src/commands/runtime-commands.ts +++ b/packages/cli/src/commands/runtime-commands.ts @@ -74,18 +74,6 @@ export function registerRuntimeCommands(program: Command, context: KtxCliCommand }); }); - runtime - .command('doctor') - .description('Check managed Python runtime prerequisites and installation') - .option('--json', 'Print JSON output', false) - .action(async (options: { json?: boolean }) => { - await runRuntimeArgs(context, { - command: 'doctor', - cliVersion: context.packageInfo.version, - json: options.json === true, - }); - }); - runtime .command('prune') .description('Remove stale managed Python runtimes for older CLI versions') diff --git a/packages/cli/src/commands/scan-commands.ts b/packages/cli/src/commands/scan-commands.ts index fc30fafa..2c19bcdf 100644 --- a/packages/cli/src/commands/scan-commands.ts +++ b/packages/cli/src/commands/scan-commands.ts @@ -1,5 +1,5 @@ -import { type Command, InvalidArgumentError, Option } from '@commander-js/extra-typings'; -import { type KtxCliCommandContext, parsePositiveIntegerOption, resolveCommandProjectDir } from '../cli-program.js'; +import { type Command, InvalidArgumentError } from '@commander-js/extra-typings'; +import { type KtxCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; import { runtimeInstallPolicyFromFlags } from '../managed-python-command.js'; import type { KtxScanArgs } from '../scan.js'; import { profileMark } from '../startup-profile.js'; @@ -13,6 +13,16 @@ async function runScanArgs(context: KtxCliCommandContext, args: KtxScanArgs): Pr type KtxScanModeOption = Extract['mode']; +const REMOVED_SCAN_SUBCOMMAND_NAMES = new Set([ + 'status', + 'report', + 'relationships', + 'relationship-apply', + 'relationship-feedback', + 'relationship-calibration', + 'relationship-thresholds', +]); + function parseScanModeOption(value: string): KtxScanModeOption { if (value === 'structural' || value === 'enriched' || value === 'relationships') { return value; @@ -20,82 +30,18 @@ function parseScanModeOption(value: string): KtxScanModeOption { throw new InvalidArgumentError('Allowed choices are structural, enriched, relationships'); } -type KtxRelationshipStatusOption = Extract['status']; -type KtxRelationshipFeedbackDecisionOption = Extract['decision']; - -function parseRelationshipStatusOption(value: string): KtxRelationshipStatusOption { - if (value === 'accepted' || value === 'review' || value === 'rejected' || value === 'skipped' || value === 'all') { - return value; - } - throw new InvalidArgumentError('Allowed choices are accepted, review, rejected, skipped, all'); -} - -function parseRelationshipFeedbackDecisionOption(value: string): KtxRelationshipFeedbackDecisionOption { - if (value === 'accepted' || value === 'rejected' || value === 'all') { - return value; - } - throw new InvalidArgumentError('Allowed choices are accepted, rejected, all'); -} - -function parseNonEmptyOption(value: string): string { - if (value.trim().length === 0) { - throw new InvalidArgumentError('must not be empty'); +function parseConnectionId(value: string): string { + if (REMOVED_SCAN_SUBCOMMAND_NAMES.has(value)) { + throw new InvalidArgumentError(`"${value}" is not a scan connection id`); } return value; } -function parseRelationshipCalibrationThreshold(value: string): number { - const parsed = Number(value); - if (Number.isFinite(parsed) && parsed >= 0 && parsed <= 1) { - return parsed; - } - throw new InvalidArgumentError('Allowed range is 0 through 1'); -} - -function relationshipDecisionArgs(options: { - accept?: string; - reject?: string; - reviewer?: string; - note?: string; - json?: boolean; -}): Pick< - Extract, - 'candidateId' | 'decision' | 'reviewer' | 'note' | 'json' -> | null { - const decisionCount = [options.accept !== undefined, options.reject !== undefined].filter(Boolean).length; - if (decisionCount > 1) { - throw new Error('Only one relationship review decision option can be used: --accept and --reject conflict'); - } - if (options.accept !== undefined) { - return { - candidateId: options.accept, - decision: 'accepted', - reviewer: options.reviewer ?? 'ktx', - note: options.note ?? null, - json: options.json === true, - }; - } - if (options.reject !== undefined) { - return { - candidateId: options.reject, - decision: 'rejected', - reviewer: options.reviewer ?? 'ktx', - note: options.note ?? null, - json: options.json === true, - }; - } - return null; -} - -function collectRelationshipCandidateOption(value: string, previous: string[]): string[] { - return [...previous, parseNonEmptyOption(value)]; -} - export function registerScanCommands(program: Command, context: KtxCliCommandContext): void { - const scan = program + program .command('scan') - .description('Run or inspect standalone connection scans') - .argument('[connectionId]', 'KTX connection id to scan') + .description('Run a standalone connection scan') + .argument('', 'KTX connection id to scan', parseConnectionId) .option( '--mode ', 'Scan mode: structural, enriched, relationships (default: structural)', @@ -113,13 +59,7 @@ export function registerScanCommands(program: Command, context: KtxCliCommandCon .hook('preAction', (_thisCommand, actionCommand) => { context.writeDebug?.('scan', actionCommand); }) - .action(async (connectionId: string | undefined, options, command) => { - if (!connectionId) { - scan.outputHelp(); - context.io.stderr.write('ktx dev scan requires or a subcommand\n'); - context.setExitCode(1); - return; - } + .action(async (connectionId: string, options, command) => { const mode = options.mode ?? 'structural'; await runScanArgs(context, { command: 'run', @@ -133,226 +73,4 @@ export function registerScanCommands(program: Command, context: KtxCliCommandCon runtimeInstallPolicy: runtimeInstallPolicyFromFlags(options), }); }); - - scan - .command('status') - .description('Print status for a local scan run') - .argument('', 'Local scan run id') - .addHelpText( - 'after', - '\n--project-dir is inherited from `ktx dev scan` (default: KTX_PROJECT_DIR or current working directory).\n', - ) - .action(async (runId: string, _options: unknown, command) => { - await runScanArgs(context, { - command: 'status', - projectDir: resolveCommandProjectDir(command), - runId, - }); - }); - - scan - .command('report') - .description('Print a local scan report') - .argument('', 'Local scan run id') - .option('--json', 'Print the raw scan report JSON', false) - .addHelpText( - 'after', - '\n--project-dir is inherited from `ktx dev scan` (default: KTX_PROJECT_DIR or current working directory).\n', - ) - .action(async (runId: string, options, command) => { - await runScanArgs(context, { - command: 'report', - projectDir: resolveCommandProjectDir(command), - runId, - json: options.json === true, - }); - }); - - scan - .command('relationships') - .description('Print relationship artifacts for a local scan run') - .argument('', 'Local scan run id') - .option( - '--status ', - 'Relationship status: accepted, review, rejected, skipped, all', - parseRelationshipStatusOption, - 'review', - ) - .option('--limit ', 'Maximum relationships to print per status', parsePositiveIntegerOption, 25) - .addOption( - new Option('--accept ', 'Record a reviewer accepted decision for a relationship candidate') - .argParser(parseNonEmptyOption) - .conflicts('reject'), - ) - .addOption( - new Option('--reject ', 'Record a reviewer rejected decision for a relationship candidate') - .argParser(parseNonEmptyOption) - .conflicts('accept'), - ) - .option('--note ', 'Attach a note when recording a relationship review decision') - .option('--reviewer ', 'Reviewer name for a relationship review decision') - .option('--json', 'Print relationship artifacts as JSON', false) - .addHelpText( - 'after', - '\n--project-dir is inherited from `ktx dev scan` (default: KTX_PROJECT_DIR or current working directory).\n', - ) - .action(async (runId: string, options, command) => { - const decision = relationshipDecisionArgs(options); - if (decision) { - await runScanArgs(context, { - command: 'relationshipDecision', - projectDir: resolveCommandProjectDir(command), - runId, - candidateId: decision.candidateId, - decision: decision.decision, - reviewer: decision.reviewer, - note: decision.note, - json: decision.json, - }); - return; - } - await runScanArgs(context, { - command: 'relationships', - projectDir: resolveCommandProjectDir(command), - runId, - status: options.status, - json: options.json === true, - limit: options.limit, - }); - }); - - scan - .command('relationship-apply') - .description('Apply accepted relationship review decisions as manual manifest joins') - .argument('', 'Local scan run id') - .option('--all-accepted', 'Apply all accepted relationship review decisions for the scan run', false) - .option( - '--candidate ', - 'Apply one accepted relationship review decision', - collectRelationshipCandidateOption, - [], - ) - .option('--dry-run', 'Preview relationships that would be written without rewriting manifest shards', false) - .option('--json', 'Print the apply result as JSON', false) - .addHelpText( - 'after', - '\n--project-dir is inherited from `ktx dev scan` (default: KTX_PROJECT_DIR or current working directory).\n', - ) - .action(async (runId: string, options, command) => { - const parentOptions = command.parent?.opts() as { dryRun?: boolean } | undefined; - await runScanArgs(context, { - command: 'relationshipApply', - projectDir: resolveCommandProjectDir(command), - runId, - applyAllAccepted: options.allAccepted === true, - candidateIds: options.candidate, - dryRun: options.dryRun === true || parentOptions?.dryRun === true, - json: options.json === true, - }); - }); - - scan - .command('relationship-feedback') - .description('Export persisted relationship review decisions as calibration labels') - .option('--connection ', 'Only export labels for one KTX connection') - .option( - '--decision ', - 'Relationship feedback decision: accepted, rejected, all', - parseRelationshipFeedbackDecisionOption, - 'all', - ) - .addOption(new Option('--json', 'Print the export as JSON').default(false).conflicts('jsonl')) - .addOption(new Option('--jsonl', 'Print labels as newline-delimited JSON').default(false).conflicts('json')) - .addHelpText( - 'after', - '\n--project-dir is inherited from `ktx dev scan` (default: KTX_PROJECT_DIR or current working directory).\n', - ) - .action(async (options, command) => { - await runScanArgs(context, { - command: 'relationshipFeedback', - projectDir: resolveCommandProjectDir(command), - connectionId: options.connection ?? null, - decision: options.decision, - json: options.json === true, - jsonl: options.jsonl === true, - }); - }); - - scan - .command('relationship-calibration') - .description('Summarize relationship feedback labels against current score thresholds') - .option('--connection ', 'Only calibrate labels for one KTX connection') - .option( - '--decision ', - 'Relationship feedback decision: accepted, rejected, all', - parseRelationshipFeedbackDecisionOption, - 'all', - ) - .option( - '--accept-threshold ', - 'Score threshold treated as predicted accepted', - parseRelationshipCalibrationThreshold, - 0.85, - ) - .option( - '--review-threshold ', - 'Score threshold treated as predicted review', - parseRelationshipCalibrationThreshold, - 0.55, - ) - .option('--json', 'Print the calibration report as JSON', false) - .addHelpText( - 'after', - '\n--project-dir is inherited from `ktx dev scan` (default: KTX_PROJECT_DIR or current working directory).\n', - ) - .action(async (options, command) => { - await runScanArgs(context, { - command: 'relationshipCalibration', - projectDir: resolveCommandProjectDir(command), - connectionId: options.connection ?? null, - decision: options.decision, - acceptThreshold: options.acceptThreshold, - reviewThreshold: options.reviewThreshold, - json: options.json === true, - }); - }); - - scan - .command('relationship-thresholds') - .description('Evaluate relationship feedback labels for offline threshold advice') - .option('--connection ', 'Only evaluate labels for one KTX connection') - .option( - '--min-total-labels ', - 'Minimum scored labels before advice can be ready', - parsePositiveIntegerOption, - 20, - ) - .option( - '--min-accepted-labels ', - 'Minimum accepted labels before advice can be ready', - parsePositiveIntegerOption, - 5, - ) - .option( - '--min-rejected-labels ', - 'Minimum rejected labels before advice can be ready', - parsePositiveIntegerOption, - 5, - ) - .option('--json', 'Print the threshold advice report as JSON', false) - .addHelpText( - 'after', - '\n--project-dir is inherited from `ktx dev scan` (default: KTX_PROJECT_DIR or current working directory).\n', - ) - .action(async (options, command) => { - await runScanArgs(context, { - command: 'relationshipThresholds', - projectDir: resolveCommandProjectDir(command), - connectionId: options.connection ?? null, - minTotalLabels: options.minTotalLabels, - minAcceptedLabels: options.minAcceptedLabels, - minRejectedLabels: options.minRejectedLabels, - json: options.json === true, - }); - }); } diff --git a/packages/cli/src/completion.ts b/packages/cli/src/completion.ts deleted file mode 100644 index 10e787f6..00000000 --- a/packages/cli/src/completion.ts +++ /dev/null @@ -1,353 +0,0 @@ -import { mkdir, readFile, writeFile } from 'node:fs/promises'; -import { homedir } from 'node:os'; -import { dirname, join } from 'node:path'; -import type { CommandUnknownOpts, Option } from '@commander-js/extra-typings'; - -export interface CompletionRequest { - position: number; - words: string[]; -} - -interface CompletionCandidate { - value: string; - description?: string; -} - -interface CommandWithHiddenFlag extends CommandUnknownOpts { - _hidden?: boolean; -} - -interface ResolveState { - command: CommandUnknownOpts; - pendingOption?: Option; - positionalIndex: number; -} - -export interface ZshCompletionInstallResult { - completionPath: string; - zshrcPath: string; -} - -const KTX_COMPLETION_BLOCK_START = '# >>> ktx completion >>>'; -const KTX_COMPLETION_BLOCK_END = '# <<< ktx completion <<<'; -const KTX_COMPLETION_BLOCK_PATTERN = new RegExp( - `\\n?${escapeRegExp(KTX_COMPLETION_BLOCK_START)}[\\s\\S]*?${escapeRegExp(KTX_COMPLETION_BLOCK_END)}\\n?`, - 'g', -); - -export function zshCompletionScript(): string { - const zshWords = '$' + '{words[@]}'; - const zshCompletionCapture = [ - '$', - `{(@f)$("${'$'}{ktx_completion_command[@]}" dev __complete --shell zsh --position "$CURRENT" -- "${zshWords}" 2>/dev/null)}`, - ].join(''); - const zshCompletionsCount = '$' + '{#completions[@]}'; - const zshCompletionCommand = '$' + '(eval "print -r -- $' + '{KTX_COMPLETION_COMMAND:-ktx}")'; - - return [ - '#compdef ktx', - '', - '_ktx() {', - ' local -a completions', - ' local -a ktx_completion_command', - ` ktx_completion_command=("\${(@z)${zshCompletionCommand}}")`, - ` completions=("${zshCompletionCapture}")`, - ` if (( ${zshCompletionsCount} )); then`, - " _describe 'ktx completions' completions", - ' else', - ' _files', - ' fi', - '}', - '', - 'compdef _ktx ktx', - '', - ].join('\n'); -} - -export async function installZshCompletion(): Promise { - const homeDir = process.env.HOME || homedir(); - const zshConfigDir = process.env.ZDOTDIR || homeDir; - const completionDir = join(homeDir, '.zfunc'); - const completionPath = join(completionDir, '_ktx'); - const zshrcPath = join(zshConfigDir, '.zshrc'); - - await mkdir(completionDir, { recursive: true }); - await mkdir(dirname(zshrcPath), { recursive: true }); - await writeFile(completionPath, zshCompletionScript(), 'utf-8'); - - const existingZshrc = await readOptionalTextFile(zshrcPath); - const nextZshrc = updateZshrcCompletionBlock(existingZshrc); - await writeFile(zshrcPath, nextZshrc, 'utf-8'); - - return { completionPath, zshrcPath }; -} - -export function completeCommanderInput(program: CommandUnknownOpts, request: CompletionRequest): string[] { - const words = completionWordsForPosition(request.words, request.position); - const tokens = stripProgramName(program, words); - const current = tokens.at(-1) ?? ''; - const previous = tokens.slice(0, -1); - const state = resolveCommandState(program, previous); - - return candidatesForState(state, current).map(formatZshCandidate); -} - -function completionWordsForPosition(words: string[], position: number): string[] { - if (!Number.isInteger(position) || position < 1) { - return words; - } - return words.slice(0, position); -} - -function stripProgramName(program: CommandUnknownOpts, words: string[]): string[] { - const [first, ...rest] = words; - if (!first) { - return []; - } - return first === program.name() || first.endsWith(`/${program.name()}`) ? rest : words; -} - -function resolveCommandState(program: CommandUnknownOpts, tokens: string[]): ResolveState { - let command = program; - let positionalIndex = 0; - let pendingOption: Option | undefined; - let positionalOnly = false; - - for (let index = 0; index < tokens.length; index += 1) { - const token = tokens[index]; - if (pendingOption) { - pendingOption = undefined; - continue; - } - - if (token === '--') { - positionalOnly = true; - continue; - } - - if (!positionalOnly && token.startsWith('-')) { - const option = findOption(command, optionNameFromToken(token)); - if (option && !token.includes('=') && optionTakesValue(option)) { - if (index === tokens.length - 1) { - pendingOption = option; - } else if (option.required || !tokens[index + 1]?.startsWith('-')) { - index += 1; - } - } - continue; - } - - const child = findVisibleSubcommand(command, token); - if (child) { - command = child; - positionalIndex = 0; - continue; - } - - positionalIndex += 1; - } - - return { command, pendingOption, positionalIndex }; -} - -function candidatesForState(state: ResolveState, current: string): CompletionCandidate[] { - const optionValue = splitOptionValueToken(current); - if (optionValue) { - const option = findOption(state.command, optionValue.optionName); - return choiceCandidates(option?.argChoices, optionValue.valuePrefix, optionValue.optionPrefix); - } - - if (state.pendingOption) { - return choiceCandidates(state.pendingOption.argChoices, current); - } - - if (current.startsWith('-')) { - return visibleOptions(state.command) - .map(optionCandidate) - .filter((candidate) => candidate.value.startsWith(current)); - } - - const commandCandidates = visibleSubcommands(state.command) - .map(commandCandidate) - .filter((candidate) => candidate.value.startsWith(current)); - const argument = state.command.registeredArguments[state.positionalIndex]; - return [...commandCandidates, ...choiceCandidates(argument?.argChoices, current)]; -} - -function visibleSubcommands(command: CommandUnknownOpts): CommandUnknownOpts[] { - return command.commands.filter((subcommand) => (subcommand as CommandWithHiddenFlag)._hidden !== true); -} - -function findVisibleSubcommand(command: CommandUnknownOpts, name: string): CommandUnknownOpts | undefined { - return visibleSubcommands(command).find( - (subcommand) => subcommand.name() === name || subcommand.aliases().includes(name), - ); -} - -function visibleOptions(command: CommandUnknownOpts): Option[] { - const options: Option[] = []; - const seen = new Set(); - for (const current of commandChain(command)) { - for (const option of current.options) { - if (option.hidden) { - continue; - } - const key = option.long ?? option.short ?? option.flags; - if (seen.has(key)) { - continue; - } - seen.add(key); - options.push(option); - } - } - return options; -} - -function commandChain(command: CommandUnknownOpts): CommandUnknownOpts[] { - const chain: CommandUnknownOpts[] = []; - let current: CommandUnknownOpts | null = command; - while (current) { - chain.unshift(current); - current = current.parent; - } - return chain; -} - -function findOption(command: CommandUnknownOpts, name: string): Option | undefined { - return visibleOptions(command).find((option) => option.long === name || option.short === name); -} - -function optionTakesValue(option: Option): boolean { - return option.required || option.optional; -} - -function optionNameFromToken(token: string): string { - return token.split('=', 1)[0] ?? token; -} - -function splitOptionValueToken( - token: string, -): { optionName: string; optionPrefix: string; valuePrefix: string } | null { - const separatorIndex = token.indexOf('='); - if (!token.startsWith('-') || separatorIndex < 0) { - return null; - } - return { - optionName: token.slice(0, separatorIndex), - optionPrefix: token.slice(0, separatorIndex + 1), - valuePrefix: token.slice(separatorIndex + 1), - }; -} - -function commandCandidate(command: CommandUnknownOpts): CompletionCandidate { - return { - value: command.name(), - description: command.summary() || command.description(), - }; -} - -function optionCandidate(option: Option): CompletionCandidate { - return { - value: option.long ?? option.short ?? option.flags, - description: option.description, - }; -} - -function choiceCandidates( - choices: readonly string[] | undefined, - prefix: string, - completionPrefix = '', -): CompletionCandidate[] { - return (choices ?? []) - .filter((choice) => choice.startsWith(prefix)) - .map((choice) => ({ value: `${completionPrefix}${choice}` })); -} - -function formatZshCandidate(candidate: CompletionCandidate): string { - if (!candidate.description) { - return escapeZshCompletion(candidate.value); - } - return `${escapeZshCompletion(candidate.value)}:${escapeZshDescription(candidate.description)}`; -} - -function escapeZshCompletion(value: string): string { - return value.replace(/\\/g, '\\\\').replace(/:/g, '\\:'); -} - -function escapeZshDescription(value: string): string { - return value.replace(/\s+/g, ' ').replace(/\\/g, '\\\\').replace(/:/g, '\\:').trim(); -} - -async function readOptionalTextFile(path: string): Promise { - try { - return await readFile(path, 'utf-8'); - } catch (error) { - if (isNodeError(error) && error.code === 'ENOENT') { - return ''; - } - throw error; - } -} - -function updateZshrcCompletionBlock(contents: string): string { - const withoutManagedBlock = contents.replace(KTX_COMPLETION_BLOCK_PATTERN, normalizeTrailingNewline); - const hasCompinit = /^.*\bcompinit\b.*$/m.test(withoutManagedBlock); - const block = zshrcCompletionBlock({ includeCompinit: !hasCompinit }); - - if (!hasCompinit) { - return appendBlock(withoutManagedBlock, block); - } - - const compinitMatch = /^.*\bcompinit\b.*$/m.exec(withoutManagedBlock); - if (!compinitMatch || compinitMatch.index === undefined) { - return appendBlock(withoutManagedBlock, block); - } - - return [ - withoutManagedBlock.slice(0, compinitMatch.index), - block, - '\n', - withoutManagedBlock.slice(compinitMatch.index), - ].join(''); -} - -function zshrcCompletionBlock(options: { includeCompinit: boolean }): string { - return [ - KTX_COMPLETION_BLOCK_START, - '_ktx_completion_command() {', - ' local dir="$PWD"', - ' while [[ "$dir" != "/" ]]; do', - ` if [[ -f "$dir/package.json" ]] && command grep -q '"name": "ktx-workspace"' "$dir/package.json" 2>/dev/null; then`, - ' print -r -- "node $dir/scripts/run-ktx.mjs --"', - ' return', - ' fi', - ' dir="' + '$' + '{dir:h}"', - ' done', - ' print -r -- "ktx"', - '}', - "export KTX_COMPLETION_COMMAND='$(_ktx_completion_command)'", - 'setopt complete_aliases', - 'fpath=("$HOME/.zfunc" $fpath)', - ...(options.includeCompinit ? ['autoload -Uz compinit', 'compinit'] : []), - KTX_COMPLETION_BLOCK_END, - ].join('\n'); -} - -function appendBlock(contents: string, block: string): string { - if (!contents.trim()) { - return `${block}\n`; - } - return `${contents.replace(/\s*$/, '\n\n')}${block}\n`; -} - -function normalizeTrailingNewline(match: string): string { - return match.startsWith('\n') || match.endsWith('\n') ? '\n' : ''; -} - -function escapeRegExp(value: string): string { - return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); -} - -function isNodeError(error: unknown): error is NodeJS.ErrnoException { - return error instanceof Error && 'code' in error; -} diff --git a/packages/cli/src/connection.test.ts b/packages/cli/src/connection.test.ts index 04c73cf1..4b6cacf1 100644 --- a/packages/cli/src/connection.test.ts +++ b/packages/cli/src/connection.test.ts @@ -310,8 +310,8 @@ describe('runKtxConnection', () => { expect(io.stdout()).toContain('Mappings:'); expect(io.stdout()).toContain('1 -> [unmapped]'); expect(io.stdout()).toContain('Next:'); - expect(io.stdout()).toContain('ktx ingest prod-metabase'); - expect(io.stdout()).toContain('ktx dev mapping'); + expect(io.stdout()).toContain('ktx ingest run --connection-id prod-metabase --adapter '); + expect(io.stdout()).toContain('ktx connection mapping'); expect(io.stderr()).toBe(''); }); diff --git a/packages/cli/src/connection.ts b/packages/cli/src/connection.ts index 1dde60ac..b199239a 100644 --- a/packages/cli/src/connection.ts +++ b/packages/cli/src/connection.ts @@ -326,8 +326,8 @@ async function runPublicConnectionMap( io.stdout.write('\nMappings:\n'); io.stdout.write(listIo.stdoutText().trim() ? listIo.stdoutText() : 'No mappings found.\n'); io.stdout.write('\nNext:\n'); - io.stdout.write(` ktx ingest ${args.sourceConnectionId}\n`); - io.stdout.write(` ktx dev mapping list ${args.sourceConnectionId}\n`); + io.stdout.write(` ktx ingest run --connection-id ${args.sourceConnectionId} --adapter \n`); + io.stdout.write(` ktx connection mapping list ${args.sourceConnectionId}\n`); return 0; } diff --git a/packages/cli/src/dev.test.ts b/packages/cli/src/dev.test.ts index c7894a9e..5aca4201 100644 --- a/packages/cli/src/dev.test.ts +++ b/packages/cli/src/dev.test.ts @@ -29,11 +29,14 @@ describe('dev Commander tree', () => { await expect(runKtxCli(['dev', '--help'], testIo.io)).resolves.toBe(0); expect(testIo.stdout()).toContain('Usage: ktx dev [options] [command]'); - for (const command of ['init', 'runtime', 'scan', 'ingest', 'mapping']) { + for (const command of ['init', 'runtime']) { expect(testIo.stdout()).toContain(command); } for (const removed of [ 'doctor', + 'scan', + 'ingest', + 'mapping', 'knowledge', 'model', 'replay', @@ -102,6 +105,12 @@ describe('dev Commander tree', () => { it('rejects removed dev command groups', async () => { for (const argv of [ ['dev', 'doctor', 'setup'], + ['dev', 'runtime', 'doctor'], + ['dev', 'scan', 'warehouse'], + ['dev', 'ingest', 'run'], + ['dev', 'mapping', 'list'], + ['dev', 'completion', 'zsh'], + ['dev', '__complete', '--shell', 'zsh', '--position', '2', '--', 'ktx', ''], ['dev', 'knowledge', 'list'], ['dev', 'model', 'list'], ['dev', 'artifacts'], @@ -117,90 +126,15 @@ describe('dev Commander tree', () => { it.each([ { argv: ['dev', 'runtime', '--help'], - expected: ['Usage: ktx dev runtime', 'install', 'start', 'stop', 'status', 'doctor', 'prune'], + expected: ['Usage: ktx dev runtime', 'install', 'start', 'stop', 'status', 'prune'], }, { - argv: ['dev', 'scan', '--help'], - expected: [ - 'Usage: ktx dev scan', - '--mode ', - 'structural', - 'relationships', - '--dry-run', - 'status', - 'report', - 'relationships', - 'relationship-apply', - 'relationship-feedback', - 'relationship-calibration', - 'relationship-thresholds', - ], + argv: ['scan', '--help'], + expected: ['Usage: ktx scan [options] ', '--mode ', 'structural', 'relationships', '--dry-run'], }, { - argv: ['dev', 'scan', 'report', '--help'], - expected: ['Usage: ktx dev scan report [options] ', '', '--json'], - }, - { - argv: ['dev', 'scan', 'relationships', '--help'], - expected: [ - 'Usage: ktx dev scan relationships [options] ', - '--status ', - '--limit ', - '--accept ', - '--reject ', - '--note ', - '--reviewer ', - '--json', - ], - }, - { - argv: ['dev', 'scan', 'relationship-apply', '--help'], - expected: [ - 'Usage: ktx dev scan relationship-apply [options] ', - '--all-accepted', - '--candidate ', - '--dry-run', - ], - }, - { - argv: ['dev', 'scan', 'relationship-thresholds', '--help'], - expected: [ - 'Usage: ktx dev scan relationship-thresholds [options]', - '--connection ', - '--min-total-labels ', - '--min-accepted-labels ', - '--min-rejected-labels ', - '--json', - ], - }, - { - argv: ['dev', 'scan', 'relationship-feedback', '--help'], - expected: [ - 'Usage: ktx dev scan relationship-feedback [options]', - '--connection ', - '--decision ', - '--json', - '--jsonl', - ], - }, - { - argv: ['dev', 'scan', 'relationship-calibration', '--help'], - expected: [ - 'Usage: ktx dev scan relationship-calibration [options]', - '--connection ', - '--decision ', - '--accept-threshold ', - '--review-threshold ', - '--json', - ], - }, - { - argv: ['dev', 'ingest', 'run', '--help'], - expected: ['Usage: ktx dev ingest run [options]', '--connection-id ', '--adapter '], - }, - { - argv: ['dev', 'mapping', 'sync-state', 'set', '--help'], - expected: ['Usage: ktx dev mapping sync-state set [options] ', '--mode '], + argv: ['ingest', 'run', '--help'], + expected: ['Usage: ktx ingest run [options]', '--connection-id ', '--adapter '], }, ])('prints generated nested help for $argv', async ({ argv, expected }) => { const io = makeIo(); @@ -219,12 +153,12 @@ describe('dev Commander tree', () => { expect(scan).not.toHaveBeenCalled(); }); - it('dispatches dev scan through Commander with injected dependencies', async () => { + it('dispatches top-level scan through Commander with injected dependencies', async () => { const scanIo = makeIo(); const scan = vi.fn(async () => 0); await expect( - runKtxCli(['dev', 'scan', 'warehouse', '--project-dir', '/tmp/project', '--dry-run'], scanIo.io, { scan }), + runKtxCli(['scan', 'warehouse', '--project-dir', '/tmp/project', '--dry-run'], scanIo.io, { scan }), ).resolves.toBe(0); expect(scan).toHaveBeenCalledWith( @@ -244,12 +178,12 @@ describe('dev Commander tree', () => { expect(scanIo.stderr()).toBe('Project: /tmp/project\n'); }); - it('dispatches dev scan --mode relationships through Commander', async () => { + it('dispatches top-level scan --mode relationships through Commander', async () => { const io = makeIo(); const scan = vi.fn(async () => 0); await expect( - runKtxCli(['dev', 'scan', 'warehouse', '--project-dir', '/tmp/project', '--mode', 'relationships'], io.io, { + runKtxCli(['scan', 'warehouse', '--project-dir', '/tmp/project', '--mode', 'relationships'], io.io, { scan, }), ).resolves.toBe(0); @@ -275,375 +209,53 @@ describe('dev Commander tree', () => { const io = makeIo(); const scan = vi.fn(async () => 0); - await expect(runKtxCli(['dev', 'scan', 'warehouse', option], io.io, { scan })).resolves.toBe(1); + await expect(runKtxCli(['scan', 'warehouse', option], io.io, { scan })).resolves.toBe(1); expect(scan).not.toHaveBeenCalled(); expect(io.stderr()).toContain(`unknown option '${option}'`); }); - it('rejects dev scan without a connection id or subcommand', async () => { + it('rejects scan without a connection id', async () => { const io = makeIo(); const scan = vi.fn(async () => 0); - await expect(runKtxCli(['dev', 'scan', '--dry-run'], io.io, { scan })).resolves.toBe(1); + await expect(runKtxCli(['scan', '--dry-run'], io.io, { scan })).resolves.toBe(1); expect(scan).not.toHaveBeenCalled(); - expect(io.stdout()).toContain('Usage: ktx dev scan'); - expect(io.stderr()).toContain('ktx dev scan requires or a subcommand'); + expect(io.stderr()).toMatch(/missing required argument/i); }); it('rejects invalid scan modes before dispatch', async () => { const io = makeIo(); const scan = vi.fn(async () => 0); - await expect(runKtxCli(['dev', 'scan', 'warehouse', '--mode', 'deep'], io.io, { scan })).resolves.toBe(1); + await expect(runKtxCli(['scan', 'warehouse', '--mode', 'deep'], io.io, { scan })).resolves.toBe(1); expect(scan).not.toHaveBeenCalled(); expect(io.stderr()).toContain("argument 'deep' is invalid"); expect(io.stderr()).toContain('Allowed choices are structural, enriched, relationships'); }); - it('prints dev scan subcommand help with the canonical command name', async () => { + it.each([ + ['scan', 'report', 'scan-run-1'], + ['scan', 'relationships', 'scan-run-1'], + ])('rejects removed scan subcommand %s %s', async (command, subcommand, runId) => { const io = makeIo(); const scan = vi.fn(async () => 0); - await expect(runKtxCli(['dev', 'scan', 'report', '--help'], io.io, { scan })).resolves.toBe(0); - - expect(io.stdout()).toContain('--project-dir is inherited from `ktx dev scan`'); - expect(io.stdout()).not.toContain('--project-dir is inherited from `ktx scan`'); - expect(scan).not.toHaveBeenCalled(); - }); - - it('dispatches dev scan report in human and json modes', async () => { - const humanIo = makeIo(); - const jsonIo = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli(['dev', 'scan', 'report', 'scan-run-1', '--project-dir', '/tmp/project'], humanIo.io, { scan }), - ).resolves.toBe(0); - await expect( - runKtxCli(['dev', 'scan', 'report', 'scan-run-2', '--project-dir', '/tmp/project', '--json'], jsonIo.io, { - scan, - }), - ).resolves.toBe(0); - - expect(scan).toHaveBeenNthCalledWith( - 1, - { command: 'report', projectDir: '/tmp/project', runId: 'scan-run-1', json: false }, - humanIo.io, - ); - expect(scan).toHaveBeenNthCalledWith( - 2, - { command: 'report', projectDir: '/tmp/project', runId: 'scan-run-2', json: true }, - jsonIo.io, - ); - }); - - it('dispatches dev scan relationships with filters through Commander', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli( - [ - 'dev', - 'scan', - 'relationships', - 'scan-run-review', - '--project-dir', - '/tmp/project', - '--status', - 'rejected', - '--limit', - '5', - '--json', - ], - io.io, - { scan }, - ), - ).resolves.toBe(0); - - expect(scan).toHaveBeenCalledWith( - { - command: 'relationships', - projectDir: '/tmp/project', - runId: 'scan-run-review', - status: 'rejected', - json: true, - limit: 5, - }, - io.io, - ); - expect(io.stderr()).toBe(''); - }); - - it('dispatches dev scan relationship decision recording through Commander', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli( - [ - 'dev', - 'scan', - 'relationships', - 'scan-run-review', - '--project-dir', - '/tmp/project', - '--accept', - 'orders:orders.customer_id->customers:customers.id', - '--reviewer', - 'Andrey', - '--note', - 'Looks right', - '--json', - ], - io.io, - { scan }, - ), - ).resolves.toBe(0); - - expect(scan).toHaveBeenCalledWith( - { - command: 'relationshipDecision', - projectDir: '/tmp/project', - runId: 'scan-run-review', - candidateId: 'orders:orders.customer_id->customers:customers.id', - decision: 'accepted', - reviewer: 'Andrey', - note: 'Looks right', - json: true, - }, - io.io, - ); - expect(io.stderr()).toBe(''); - }); - - it.each(['--accept', '--reject'])('rejects empty relationship decision candidate ids for %s', async (option) => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli(['dev', 'scan', 'relationships', 'scan-run-review', option, ''], io.io, { scan }), - ).resolves.toBe(1); + await expect(runKtxCli([command, subcommand, runId], io.io, { scan })).resolves.toBe(1); expect(scan).not.toHaveBeenCalled(); - expect(io.stderr()).toContain('must not be empty'); + expect(io.stderr()).toMatch(/too many arguments|unknown command|error:/); }); - it('rejects relationship feedback JSON and JSONL output together', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli(['dev', 'scan', 'relationship-feedback', '--json', '--jsonl'], io.io, { scan }), - ).resolves.toBe(1); - - expect(scan).not.toHaveBeenCalled(); - expect(io.stderr()).toMatch(/conflict|cannot be used/i); - }); - - it('dispatches relationship apply command args', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli( - [ - 'dev', - 'scan', - 'relationship-apply', - 'scan-run-a', - '--project-dir', - '/tmp/project', - '--candidate', - 'orders:orders.customer_id->customers:customers.id', - '--dry-run', - '--json', - ], - io.io, - { scan }, - ), - ).resolves.toBe(0); - - expect(scan).toHaveBeenCalledWith( - { - command: 'relationshipApply', - projectDir: '/tmp/project', - runId: 'scan-run-a', - applyAllAccepted: false, - candidateIds: ['orders:orders.customer_id->customers:customers.id'], - dryRun: true, - json: true, - }, - io.io, - ); - }); - - it('dispatches scan relationship feedback command with filters and JSONL output', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli( - [ - 'dev', - 'scan', - 'relationship-feedback', - '--project-dir', - '/tmp/project', - '--connection', - 'warehouse', - '--decision', - 'accepted', - '--jsonl', - ], - io.io, - { scan }, - ), - ).resolves.toBe(0); - - expect(scan).toHaveBeenCalledWith( - { - command: 'relationshipFeedback', - projectDir: '/tmp/project', - connectionId: 'warehouse', - decision: 'accepted', - json: false, - jsonl: true, - }, - io.io, - ); - }); - - it('dispatches scan relationship calibration command with thresholds', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli( - [ - 'dev', - 'scan', - 'relationship-calibration', - '--project-dir', - '/tmp/project', - '--connection', - 'warehouse', - '--decision', - 'rejected', - '--accept-threshold', - '0.9', - '--review-threshold', - '0.5', - '--json', - ], - io.io, - { scan }, - ), - ).resolves.toBe(0); - - expect(scan).toHaveBeenCalledWith( - { - command: 'relationshipCalibration', - projectDir: '/tmp/project', - connectionId: 'warehouse', - decision: 'rejected', - acceptThreshold: 0.9, - reviewThreshold: 0.5, - json: true, - }, - io.io, - ); - }); - - it('dispatches relationship threshold advice command args', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli( - [ - 'dev', - 'scan', - 'relationship-thresholds', - '--project-dir', - '/tmp/project', - '--connection', - 'warehouse', - '--min-total-labels', - '12', - '--min-accepted-labels', - '4', - '--min-rejected-labels', - '3', - '--json', - ], - io.io, - { scan }, - ), - ).resolves.toBe(0); - - expect(scan).toHaveBeenCalledWith( - { - command: 'relationshipThresholds', - projectDir: '/tmp/project', - connectionId: 'warehouse', - minTotalLabels: 12, - minAcceptedLabels: 4, - minRejectedLabels: 3, - json: true, - }, - io.io, - ); - }); - - it('rejects invalid relationship calibration thresholds before dispatch', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli(['dev', 'scan', 'relationship-calibration', '--accept-threshold', '1.5'], io.io, { scan }), - ).resolves.toBe(1); - - expect(scan).not.toHaveBeenCalled(); - expect(io.stderr()).toContain('Allowed range is 0 through 1'); - }); - - it('rejects relationship accept and reject options together before dispatch', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli( - [ - 'dev', - 'scan', - 'relationships', - 'scan-run-review', - '--accept', - 'orders:orders.customer_id->customers:customers.id', - '--reject', - 'orders:orders.customer_id->customers:customers.id', - ], - io.io, - { scan }, - ), - ).resolves.toBe(1); - - expect(scan).not.toHaveBeenCalled(); - expect(io.stderr()).toMatch(/conflict|cannot be used/i); - }); - - it('dispatches dev ingest run through the low-level ingest Commander registration', async () => { + it('dispatches top-level ingest run through the low-level ingest Commander registration', async () => { const io = makeIo(); const ingest = vi.fn(async () => 0); await expect( runKtxCli( [ - 'dev', 'ingest', 'run', '--connection-id', diff --git a/packages/cli/src/dev.ts b/packages/cli/src/dev.ts index 06d73856..9391cc43 100644 --- a/packages/cli/src/dev.ts +++ b/packages/cli/src/dev.ts @@ -1,11 +1,7 @@ import { resolve } from 'node:path'; import type { Command } from '@commander-js/extra-typings'; import { type CommandWithGlobalOptions, type KtxCliCommandContext, resolveCommandProjectDir } from './cli-program.js'; -import { registerCompletionCommands } from './commands/completion-commands.js'; -import { registerConnectionMappingCommands } from './commands/connection-commands.js'; -import { registerIngestCommands } from './commands/ingest-commands.js'; import { registerRuntimeCommands } from './commands/runtime-commands.js'; -import { registerScanCommands } from './commands/scan-commands.js'; import { profileMark } from './startup-profile.js'; profileMark('module:dev'); @@ -13,7 +9,7 @@ profileMark('module:dev'); export function registerDevCommands(program: Command, context: KtxCliCommandContext): void { const dev = program .command('dev', { hidden: true }) - .description('Low-level diagnostics, scans, adapter commands, and mapping tools') + .description('Low-level project initialization and runtime management') .showHelpAfterError(); dev.hook('preAction', (_thisCommand, actionCommand) => { @@ -51,11 +47,4 @@ export function registerDevCommands(program: Command, context: KtxCliCommandCont ); registerRuntimeCommands(dev, context); - registerScanCommands(dev, context); - registerIngestCommands(dev, context, { - runIngestWithProgress: async (ingestArgs, ingestIo, ingestDeps, defaultRunIngest) => - await (ingestDeps.ingest ?? defaultRunIngest)(ingestArgs, ingestIo), - }); - registerConnectionMappingCommands(dev, context); - registerCompletionCommands(dev, context, program); } diff --git a/packages/cli/src/example-smoke.test.ts b/packages/cli/src/example-smoke.test.ts index b3bbc681..f5b70bfc 100644 --- a/packages/cli/src/example-smoke.test.ts +++ b/packages/cli/src/example-smoke.test.ts @@ -106,7 +106,6 @@ describe('standalone local warehouse example', () => { expect(parseJsonOutput<{ yaml: string }>(slRead.stdout).yaml).toContain('name: orders'); const ingest = await runBuiltCli([ - 'dev', 'ingest', 'run', '--project-dir', @@ -120,7 +119,7 @@ describe('standalone local warehouse example', () => { ]); expect(ingest).toMatchObject({ code: 1, stdout: '' }); expect(ingest.stderr).toContain( - 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', + 'ktx ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', ); }, 30_000); diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index 6a968fa9..9c08e58a 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -123,10 +123,10 @@ describe('runKtxCli', () => { await expect(runKtxCli(['--help'], testIo.io)).resolves.toBe(0); expect(testIo.stdout()).toContain('Usage: ktx [options] [command]'); - for (const command of ['setup', 'connection', 'ingest', 'wiki', 'sl', 'status']) { + for (const command of ['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'scan']) { expect(testIo.stdout()).toContain(`${command}`); } - for (const removed of ['demo', 'init', 'connect', 'scan', 'ask', 'knowledge', 'agent', 'completion', 'runtime', 'serve']) { + for (const removed of ['demo', 'init', 'connect', 'ask', 'knowledge', 'agent', 'completion', 'serve']) { expect(testIo.stdout()).not.toContain(`${removed} [`); expect(testIo.stdout()).not.toContain(`${removed} `); } @@ -146,7 +146,6 @@ describe('runKtxCli', () => { const stopIo = makeIo(); const stopAllIo = makeIo(); const statusIo = makeIo(); - const doctorIo = makeIo(); const pruneIo = makeIo(); await expect( @@ -160,7 +159,6 @@ describe('runKtxCli', () => { await expect(runKtxCli(['dev', 'runtime', 'stop'], stopIo.io, { runtime })).resolves.toBe(0); await expect(runKtxCli(['dev', 'runtime', 'stop', '--all'], stopAllIo.io, { runtime })).resolves.toBe(0); await expect(runKtxCli(['dev', 'runtime', 'status', '--json'], statusIo.io, { runtime })).resolves.toBe(0); - await expect(runKtxCli(['dev', 'runtime', 'doctor'], doctorIo.io, { runtime })).resolves.toBe(0); await expect(runKtxCli(['dev', 'runtime', 'prune', '--dry-run'], pruneIo.io, { runtime })).resolves.toBe(0); expect(runtime).toHaveBeenNthCalledWith( @@ -212,15 +210,6 @@ describe('runKtxCli', () => { ); expect(runtime).toHaveBeenNthCalledWith( 6, - { - command: 'doctor', - cliVersion: '0.0.0-private', - json: false, - }, - doctorIo.io, - ); - expect(runtime).toHaveBeenNthCalledWith( - 7, { command: 'prune', cliVersion: '0.0.0-private', @@ -229,7 +218,7 @@ describe('runKtxCli', () => { }, pruneIo.io, ); - for (const io of [installIo, startIo, stopIo, stopAllIo, statusIo, doctorIo, pruneIo]) { + for (const io of [installIo, startIo, stopIo, stopAllIo, statusIo, pruneIo]) { expect(io.stderr()).toBe(''); } }); @@ -247,16 +236,15 @@ describe('runKtxCli', () => { }); it('skips the project directory line for JSON and TUI output modes', async () => { - const publicIngest = vi.fn(async () => 0); const ingest = vi.fn(async () => 0); const jsonIo = makeIo(); const vizIo = makeIo({ stdoutIsTty: true }); - await expect(runKtxCli(['--project-dir', tempDir, 'ingest', '--all', '--json'], jsonIo.io, { publicIngest })) + await expect(runKtxCli(['--project-dir', tempDir, 'ingest', 'status', 'run-1', '--json'], jsonIo.io, { ingest })) .resolves.toBe(0); await expect( runKtxCli( - ['--project-dir', tempDir, 'dev', 'ingest', 'status', 'run-1', '--viz'], + ['--project-dir', tempDir, 'ingest', 'status', 'run-1', '--viz'], vizIo.io, { ingest }, ), @@ -503,158 +491,17 @@ describe('runKtxCli', () => { expect(testIo.stdout()).toBe(''); }); - it('prints a zsh completion function', async () => { - const testIo = makeIo(); - const zshWords = '$' + '{words[@]}'; - - await expect(runKtxCli(['dev', 'completion', 'zsh'], testIo.io)).resolves.toBe(0); - - expect(testIo.stdout()).toContain('#compdef ktx'); - expect(testIo.stdout()).toContain('KTX_COMPLETION_COMMAND:-ktx'); - expect(testIo.stdout()).toContain(`dev __complete --shell zsh --position "$CURRENT" -- "${zshWords}"`); - expect(testIo.stdout()).toContain('compdef _ktx ktx'); - expect(testIo.stderr()).toBe(''); - }); - - it('installs zsh completions into the user zsh config directory', async () => { - const testIo = makeIo(); - const previousHome = process.env.HOME; - const previousZdotdir = process.env.ZDOTDIR; - const tempHome = await mkdtemp(join(tmpdir(), 'ktx-completion-home-')); - - try { - process.env.HOME = tempHome; - delete process.env.ZDOTDIR; - - await expect(runKtxCli(['dev', 'completion', 'zsh', '--install'], testIo.io)).resolves.toBe(0); - - const completionFile = await readFile(join(tempHome, '.zfunc', '_ktx'), 'utf-8'); - const zshrc = await readFile(join(tempHome, '.zshrc'), 'utf-8'); - expect(completionFile).toContain('#compdef ktx'); - expect(zshrc).toContain('# >>> ktx completion >>>'); - expect(zshrc).toContain('_ktx_completion_command()'); - expect(zshrc).toContain('"name": "ktx-workspace"'); - expect(zshrc).toContain('scripts/run-ktx.mjs'); - expect(zshrc).toContain("export KTX_COMPLETION_COMMAND='$(_ktx_completion_command)'"); - expect(zshrc).toContain('setopt complete_aliases'); - expect(zshrc).toContain('fpath=("$HOME/.zfunc" $fpath)'); - expect(zshrc).toContain('autoload -Uz compinit'); - expect(zshrc).toContain('compinit'); - expect(testIo.stdout()).toContain('Installed zsh completion:'); - expect(testIo.stdout()).toContain('Restart your shell or run: source ~/.zshrc'); - expect(testIo.stderr()).toBe(''); - } finally { - if (previousHome === undefined) { - delete process.env.HOME; - } else { - process.env.HOME = previousHome; - } - if (previousZdotdir === undefined) { - delete process.env.ZDOTDIR; - } else { - process.env.ZDOTDIR = previousZdotdir; - } - await rm(tempHome, { recursive: true, force: true }); - } - }); - - it('updates zsh completion install block idempotently before existing compinit', async () => { - const firstIo = makeIo(); - const secondIo = makeIo(); - const previousHome = process.env.HOME; - const previousZdotdir = process.env.ZDOTDIR; - const tempHome = await mkdtemp(join(tmpdir(), 'ktx-completion-home-')); - - try { - process.env.HOME = tempHome; - delete process.env.ZDOTDIR; - await writeFile(join(tempHome, '.zshrc'), 'export EDITOR=vim\nautoload -Uz compinit\ncompinit\n', 'utf-8'); - - await expect(runKtxCli(['dev', 'completion', 'zsh', '--install'], firstIo.io)).resolves.toBe(0); - await expect(runKtxCli(['dev', 'completion', 'zsh', '--install'], secondIo.io)).resolves.toBe(0); - - const zshrc = await readFile(join(tempHome, '.zshrc'), 'utf-8'); - expect(zshrc.match(/# >>> ktx completion >>>/g)).toHaveLength(1); - expect(zshrc.indexOf('fpath=("$HOME/.zfunc" $fpath)')).toBeLessThan(zshrc.indexOf('autoload -Uz compinit')); - expect(zshrc.match(/_ktx_completion_command\(\)/g)).toHaveLength(1); - expect(zshrc.match(/^compinit$/gm)).toHaveLength(1); - expect(secondIo.stdout()).toContain('Updated zsh config:'); - expect(firstIo.stderr()).toBe(''); - expect(secondIo.stderr()).toBe(''); - } finally { - if (previousHome === undefined) { - delete process.env.HOME; - } else { - process.env.HOME = previousHome; - } - if (previousZdotdir === undefined) { - delete process.env.ZDOTDIR; - } else { - process.env.ZDOTDIR = previousZdotdir; - } - await rm(tempHome, { recursive: true, force: true }); - } - }); - - it('completes root and nested Commander command names', async () => { - const rootIo = makeIo(); - const connectionIo = makeIo(); + it('rejects removed shell completion commands', async () => { + const completionIo = makeIo(); + const hiddenIo = makeIo(); + await expect(runKtxCli(['dev', 'completion', 'zsh'], completionIo.io)).resolves.toBe(1); await expect( - runKtxCli(['dev', '__complete', '--shell', 'zsh', '--position', '2', '--', 'ktx', 'co'], rootIo.io), - ).resolves.toBe(0); - await expect( - runKtxCli( - ['dev', '__complete', '--shell', 'zsh', '--position', '3', '--', 'ktx', 'connection', 'm'], - connectionIo.io, - ), - ).resolves.toBe(0); + runKtxCli(['dev', '__complete', '--shell', 'zsh', '--position', '2', '--', 'ktx', 'co'], hiddenIo.io), + ).resolves.toBe(1); - expect(rootIo.stdout()).toContain('connection:Add, list, test, and map data sources'); - expect(rootIo.stdout()).not.toContain('__complete'); - expect(connectionIo.stdout()).toContain('map:Refresh and validate BI-to-warehouse mappings'); - expect(connectionIo.stdout()).toContain('mapping:Manage Metabase warehouse mappings'); - expect(rootIo.stderr()).toBe(''); - expect(connectionIo.stderr()).toBe(''); - }); - - it('completes options and Commander choices', async () => { - const optionIo = makeIo(); - const choiceIo = makeIo(); - - await expect( - runKtxCli( - ['dev', '__complete', '--shell', 'zsh', '--position', '4', '--', 'ktx', 'connection', 'add', '--cr'], - optionIo.io, - ), - ).resolves.toBe(0); - await expect( - runKtxCli( - [ - 'dev', - '__complete', - '--shell', - 'zsh', - '--position', - '7', - '--', - 'ktx', - 'connection', - 'add', - 'notion', - 'docs', - '--crawl-mode', - '', - ], - choiceIo.io, - ), - ).resolves.toBe(0); - - expect(optionIo.stdout()).toContain('--crawl-mode:Notion crawl mode'); - expect(choiceIo.stdout()).toContain('all_accessible'); - expect(choiceIo.stdout()).toContain('selected_roots'); - expect(optionIo.stderr()).toBe(''); - expect(choiceIo.stderr()).toBe(''); + expect(completionIo.stderr()).toMatch(/unknown command|error:/); + expect(hiddenIo.stderr()).toMatch(/unknown command|error:/); }); it('rejects removed serve commands', async () => { @@ -666,35 +513,22 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toMatch(/unknown command|error:/); }); - it('routes public ingest through the public ingest parser', async () => { + it('rejects removed public ingest shorthand', async () => { const testIo = makeIo(); const ingest = vi.fn().mockResolvedValue(0); - await expect( - runKtxCli(['--project-dir', '/tmp/project', 'ingest', 'warehouse'], testIo.io, { publicIngest: ingest }), - ).resolves.toBe(0); + await expect(runKtxCli(['--project-dir', '/tmp/project', 'ingest', 'warehouse'], testIo.io, { ingest })) + .resolves.toBe(1); - expect(ingest).toHaveBeenCalledWith( - { - command: 'run', - projectDir: '/tmp/project', - targetConnectionId: 'warehouse', - all: false, - json: false, - inputMode: 'auto', - }, - testIo.io, - ); + expect(ingest).not.toHaveBeenCalled(); + expect(testIo.stderr()).toMatch(/unknown command|error:/); }); - it('prints public ingest watch help from Commander', async () => { + it('prints ingest watch help from Commander', async () => { const testIo = makeIo(); - const publicIngest = vi.fn(async () => 0); - const lowLevelIngest = vi.fn(async () => 0); + const ingest = vi.fn(async () => 0); - await expect( - runKtxCli(['ingest', 'watch', '--help'], testIo.io, { publicIngest, ingest: lowLevelIngest }), - ).resolves.toBe(0); + await expect(runKtxCli(['ingest', 'watch', '--help'], testIo.io, { ingest })).resolves.toBe(0); expect(testIo.stdout()).toContain('Usage: ktx ingest watch [options] [runId]'); expect(testIo.stdout()).toContain('[runId]'); @@ -702,43 +536,42 @@ describe('runKtxCli', () => { expect(testIo.stdout()).toContain('--json'); expect(testIo.stdout()).toContain('--no-input'); expect(testIo.stderr()).toBe(''); - expect(publicIngest).not.toHaveBeenCalled(); - expect(lowLevelIngest).not.toHaveBeenCalled(); + expect(ingest).not.toHaveBeenCalled(); }); - it('dispatches public ingest status and watch through Commander', async () => { + it('dispatches ingest status and watch through Commander', async () => { const statusIo = makeIo(); const watchIo = makeIo(); - const publicIngest = vi.fn(async () => 0); + const ingest = vi.fn(async () => 0); await expect( runKtxCli(['--project-dir', tempDir, 'ingest', 'status', 'run-1', '--json', '--no-input'], statusIo.io, { - publicIngest, + ingest, }), ).resolves.toBe(0); await expect( runKtxCli(['--project-dir', tempDir, 'ingest', 'watch', '--no-input'], watchIo.io, { - publicIngest, + ingest, }), ).resolves.toBe(0); - expect(publicIngest).toHaveBeenNthCalledWith( + expect(ingest).toHaveBeenNthCalledWith( 1, { command: 'status', projectDir: tempDir, runId: 'run-1', - json: true, + outputMode: 'json', inputMode: 'disabled', }, statusIo.io, ); - expect(publicIngest).toHaveBeenNthCalledWith( + expect(ingest).toHaveBeenNthCalledWith( 2, { command: 'watch', projectDir: tempDir, - json: false, + outputMode: 'viz', inputMode: 'disabled', }, watchIo.io, @@ -778,60 +611,44 @@ describe('runKtxCli', () => { expect(setup).not.toHaveBeenCalled(); }); - it('prints public ingest help without invoking ingest execution', async () => { + it('prints ingest help without invoking ingest execution', async () => { const testIo = makeIo(); - const publicIngest = vi.fn(); - const lowLevelIngest = vi.fn(); + const ingest = vi.fn(); - await expect(runKtxCli(['ingest', '--help'], testIo.io, { publicIngest, ingest: lowLevelIngest })).resolves.toBe(0); + await expect(runKtxCli(['ingest', '--help'], testIo.io, { ingest })).resolves.toBe(0); - expect(testIo.stdout()).toContain('Usage: ktx ingest [options] [connectionId]'); - expect(testIo.stdout()).toContain('Build and refresh KTX context from configured sources'); + expect(testIo.stdout()).toContain('Usage: ktx ingest [options] [command]'); + expect(testIo.stdout()).toContain('Run or inspect local ingest memory-flow output'); + expect(testIo.stdout()).toContain('run'); expect(testIo.stdout()).toContain('status'); expect(testIo.stdout()).toContain('watch'); - expect(testIo.stdout()).toContain('ktx ingest --all [options]'); - expect(testIo.stdout()).toContain('ktx ingest status [runId] [options]'); - expect(testIo.stdout()).toContain('ktx ingest watch [runId] [options]'); - expect(testIo.stdout()).not.toContain('ktx ingest replay [options]'); - expect(testIo.stdout()).toContain('--no-input'); - expect(testIo.stdout()).not.toContain('--adapter'); + expect(testIo.stdout()).toContain('replay'); + expect(testIo.stdout()).not.toContain('--all'); expect(testIo.stderr()).toBe(''); - expect(publicIngest).not.toHaveBeenCalled(); - expect(lowLevelIngest).not.toHaveBeenCalled(); + expect(ingest).not.toHaveBeenCalled(); }); - it('reserves public ingest run while keeping dev ingest run available', async () => { - const publicRunIo = makeIo(); - const publicHelpIo = makeIo(); + it('routes ingest run at the top level and rejects removed dev ingest', async () => { + const runIo = makeIo(); const devRunIo = makeIo(); - const publicIngest = vi.fn(async () => 0); - const lowLevelIngest = vi.fn(async () => 0); - - await expect(runKtxCli(['ingest', 'run'], publicRunIo.io, { publicIngest, ingest: lowLevelIngest })).resolves.toBe( - 1, - ); - expect(publicRunIo.stderr()).toMatch(/invalid argument|reserved|run/i); - expect(publicIngest).not.toHaveBeenCalled(); + const ingest = vi.fn(async () => 0); await expect( - runKtxCli(['ingest', 'run', '--help'], publicHelpIo.io, { publicIngest, ingest: lowLevelIngest }), + runKtxCli(['ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'metabase'], runIo.io, { ingest }), ).resolves.toBe(0); - expect(publicHelpIo.stdout()).toContain('Usage: ktx ingest [options] [connectionId]'); - expect(publicHelpIo.stdout()).not.toContain('Usage: ktx ingest ' + 'run'); - await expect( runKtxCli(['dev', 'ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'metabase'], devRunIo.io, { - publicIngest, - ingest: lowLevelIngest, + ingest, }), - ).resolves.toBe(0); - expect(lowLevelIngest).toHaveBeenCalledWith( + ).resolves.toBe(1); + expect(ingest).toHaveBeenCalledWith( expect.objectContaining({ command: 'run', connectionId: 'warehouse', adapter: 'metabase' }), expect.anything(), ); + expect(devRunIo.stderr()).toMatch(/unknown command|error:/); }); - it('rejects removed dev doctor while keeping ingest parser cases under dev', async () => { + it('rejects removed dev doctor while keeping ingest parser cases at the root', async () => { const doctor = vi.fn(async () => 0); const ingest = vi.fn(async () => 0); const doctorIo = makeIo(); @@ -842,7 +659,6 @@ describe('runKtxCli', () => { await expect( runKtxCli( [ - 'dev', 'ingest', 'run', '--project-dir', @@ -862,7 +678,7 @@ describe('runKtxCli', () => { { ingest }, ), ).resolves.toBe(0); - await expect(runKtxCli(['dev', 'ingest', 'replay', '--help'], ingestReplayHelpIo.io, { ingest })).resolves.toBe(0); + await expect(runKtxCli(['ingest', 'replay', '--help'], ingestReplayHelpIo.io, { ingest })).resolves.toBe(0); expect(doctor).not.toHaveBeenCalled(); expect(ingest).toHaveBeenCalledWith( @@ -881,7 +697,7 @@ describe('runKtxCli', () => { }, ingestRunIo.io, ); - expect(ingestReplayHelpIo.stdout()).toContain('Usage: ktx dev ingest replay [options] '); + expect(ingestReplayHelpIo.stdout()).toContain('Usage: ktx ingest replay [options] '); expect(ingestReplayHelpIo.stdout()).toContain(''); expect(doctorIo.stderr()).toMatch(/unknown command|error:/); expect(ingestRunIo.stderr()).toBe(''); @@ -896,7 +712,6 @@ describe('runKtxCli', () => { await expect( runKtxCli( [ - 'dev', 'ingest', 'run', '--project-dir', @@ -914,7 +729,6 @@ describe('runKtxCli', () => { await expect( runKtxCli( [ - 'dev', 'ingest', 'run', '--project-dir', @@ -1729,7 +1543,7 @@ describe('runKtxCli', () => { 'ktx connection mapping refresh --auto-accept', 'ktx connection mapping set databaseMappings =', 'ktx connection mapping set-sync-enabled --enabled true', - 'ktx ingest ', + 'ktx ingest run --connection-id --adapter metabase', ]) { expect(helpIo.stdout()).toContain(line); } @@ -1870,7 +1684,6 @@ describe('runKtxCli', () => { for (const argv of [ ['init'], ['connect', 'list'], - ['scan', 'warehouse'], ['knowledge', 'list'], ['ask', 'What sources are connected?'], ]) { @@ -2041,11 +1854,11 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toContain('[debug] dispatch=connection'); }); - it('routes low-level scan through ktx dev with top-level project-dir', async () => { + it('routes scan through the top-level command with top-level project-dir', async () => { const testIo = makeIo(); const scan = vi.fn().mockResolvedValue(0); - await expect(runKtxCli(['--project-dir', tempDir, 'dev', 'scan', 'warehouse'], testIo.io, { scan })).resolves.toBe( + await expect(runKtxCli(['--project-dir', tempDir, 'scan', 'warehouse'], testIo.io, { scan })).resolves.toBe( 0, ); @@ -2071,12 +1884,12 @@ describe('runKtxCli', () => { const conflictIo = makeIo(); const scan = vi.fn().mockResolvedValue(0); - await expect(runKtxCli(['--project-dir', tempDir, 'dev', 'scan', 'warehouse', '--yes'], autoIo.io, { scan })) + await expect(runKtxCli(['--project-dir', tempDir, 'scan', 'warehouse', '--yes'], autoIo.io, { scan })) .resolves.toBe(0); - await expect(runKtxCli(['--project-dir', tempDir, 'dev', 'scan', 'warehouse', '--no-input'], neverIo.io, { scan })) + await expect(runKtxCli(['--project-dir', tempDir, 'scan', 'warehouse', '--no-input'], neverIo.io, { scan })) .resolves.toBe(0); await expect( - runKtxCli(['--project-dir', tempDir, 'dev', 'scan', 'warehouse', '--yes', '--no-input'], conflictIo.io, { + runKtxCli(['--project-dir', tempDir, 'scan', 'warehouse', '--yes', '--no-input'], conflictIo.io, { scan, }), ).resolves.toBe(1); @@ -2131,44 +1944,38 @@ describe('runKtxCli', () => { await expect(runKtxCli(['dev'], testIo.io)).resolves.toBe(0); expect(testIo.stdout()).toContain('Usage: ktx dev [options] [command]'); - expect(testIo.stdout()).toContain('Low-level diagnostics'); - expect(testIo.stdout()).toContain('scan'); - expect(testIo.stdout()).toContain('ingest'); - expect(testIo.stdout()).toContain('mapping'); + expect(testIo.stdout()).toContain('Low-level project initialization'); + expect(testIo.stdout()).toContain('init'); + expect(testIo.stdout()).toContain('runtime'); + expect(testIo.stdout()).not.toContain('scan'); + expect(testIo.stdout()).not.toContain('ingest'); + expect(testIo.stdout()).not.toContain('mapping'); expect(testIo.stdout()).not.toContain('model'); expect(testIo.stdout()).not.toContain('knowledge'); expect(testIo.stderr()).toBe(''); }); - it('prints dev command help without invoking low-level execution', async () => { - for (const [command, expected] of [ - ['scan', ['Usage: ktx dev scan', '--dry-run', 'status', 'report']], - ['ingest', ['Usage: ktx dev ingest', 'run', 'replay']], - ['mapping', ['Usage: ktx dev mapping', 'sync-state', 'validate']], - ] as const) { + it('rejects removed dev command groups without invoking execution', async () => { + for (const command of ['scan', 'ingest', 'mapping']) { const testIo = makeIo(); const scan = vi.fn().mockResolvedValue(0); const sl = vi.fn().mockResolvedValue(0); - await expect(runKtxCli(['dev', command, '--help'], testIo.io, { scan, sl })).resolves.toBe(0); + await expect(runKtxCli(['dev', command], testIo.io, { scan, sl })).resolves.toBe(1); - for (const text of expected) { - expect(testIo.stdout()).toContain(text); - } - expect(testIo.stderr()).toBe(''); + expect(testIo.stderr()).toMatch(/unknown command|error:/); expect(scan).not.toHaveBeenCalled(); expect(sl).not.toHaveBeenCalled(); } }); - it('prints dev scan subcommand help without invoking scan execution', async () => { + it('rejects removed scan subcommands without invoking scan execution', async () => { const testIo = makeIo(); const scan = vi.fn().mockResolvedValue(0); - await expect(runKtxCli(['dev', 'scan', 'report', '--help'], testIo.io, { scan })).resolves.toBe(0); + await expect(runKtxCli(['scan', 'report'], testIo.io, { scan })).resolves.toBe(1); - expect(testIo.stdout()).toContain('Usage: ktx dev scan report [options] '); - expect(testIo.stderr()).toBe(''); + expect(testIo.stderr()).toMatch(/too many arguments|unknown command|error:/); expect(scan).not.toHaveBeenCalled(); }); @@ -2184,8 +1991,8 @@ describe('runKtxCli', () => { const ingest = vi.fn(async () => 0); for (const argv of [ - ['dev', 'ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'fake', '--json', '--plain'], - ['dev', 'ingest', 'status', 'run-1', '--json', '--viz'], + ['ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'fake', '--json', '--plain'], + ['ingest', 'status', 'run-1', '--json', '--viz'], ]) { const testIo = makeIo(); await expect(runKtxCli(argv, testIo.io, { ingest })).resolves.toBe(1); diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 0db44fcd..9fffdf0c 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -107,7 +107,7 @@ describe('runKtxIngest', () => { expect(statusIo.stderr()).toBe(''); }); - it('prints provider setup guidance when a skip-llm setup project runs dev ingest', async () => { + it('prints provider setup guidance when a skip-llm setup project runs ingest', async () => { const projectDir = join(tempDir, 'project'); const setupIo = makeIo(); await expect( @@ -168,7 +168,7 @@ describe('runKtxIngest', () => { expect(runIo.stdout()).toBe(''); expect(runIo.stderr()).toContain( - 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + 'ktx ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', ); expect(runIo.stderr()).toContain( `ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, @@ -663,7 +663,7 @@ describe('runKtxIngest', () => { ).resolves.toBe(1); expect(io.stderr()).toContain('source-dir uploads are not supported for the Metabase fan-out adapter'); - expect(io.stderr()).not.toContain('ktx dev ingest run requires llm.provider.backend'); + expect(io.stderr()).not.toContain('ktx ingest run requires llm.provider.backend'); expect(io.stdout()).toBe(''); }); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index cf7a7aff..4a68edf3 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -518,7 +518,9 @@ export async function runKtxIngest( const project = await loadKtxProject({ projectDir: args.projectDir }); const env = deps.env ?? process.env; if (args.command === 'run') { - const createAdapters = deps.createAdapters ?? createKtxCliLocalIngestAdapters; + const createAdapters = + deps.createAdapters ?? + (deps.runLocalIngest || deps.runLocalMetabaseIngest ? () => [] : createKtxCliLocalIngestAdapters); const executeLocalIngest = deps.runLocalIngest ?? runLocalIngest; const localIngestOptions = deps.localIngestOptions ?? {}; const managedDaemon = managedDaemonOptionsForIngestRun(args, io); @@ -645,7 +647,7 @@ export async function runKtxIngest( throw new Error( args.runId ? `Local ingest run or report "${args.runId}" was not found` - : 'No local ingest reports were found. Run `ktx ingest --all` first.', + : 'No local ingest reports were found. Run `ktx ingest run --connection-id --adapter ` first.', ); } await writeReportRecord(report, args.outputMode, io, { diff --git a/packages/cli/src/project-dir.test.ts b/packages/cli/src/project-dir.test.ts index 6e3ca901..c59172a6 100644 --- a/packages/cli/src/project-dir.test.ts +++ b/packages/cli/src/project-dir.test.ts @@ -33,11 +33,10 @@ describe('project directory defaults', () => { const connection = vi.fn(async () => 0); const doctor = vi.fn(async () => 0); const ingest = vi.fn(async () => 0); - const publicIngest = vi.fn(async () => 0); const scan = vi.fn(async () => 0); const setup = vi.fn(async () => 0); const agent = vi.fn(async () => 0); - const deps: KtxCliDeps = { agent, connection, doctor, ingest, publicIngest, scan, setup }; + const deps: KtxCliDeps = { agent, connection, doctor, ingest, scan, setup }; const cases: Array<{ argv: string[]; @@ -59,8 +58,8 @@ describe('project directory defaults', () => { }, { argv: ['ingest', 'status', 'run-1'], - spy: publicIngest, - expected: { command: 'status', projectDir: '/tmp/ktx-env-project', runId: 'run-1' }, + spy: ingest, + expected: { command: 'status', projectDir: '/tmp/ktx-env-project', runId: 'run-1', outputMode: 'plain' }, expectedStderr: 'Project: /tmp/ktx-env-project\n', }, { @@ -70,7 +69,7 @@ describe('project directory defaults', () => { expectedStderr: 'Project: /tmp/ktx-env-project\n', }, { - argv: ['dev', 'scan', 'warehouse'], + argv: ['scan', 'warehouse'], spy: scan, expected: { command: 'run', projectDir: '/tmp/ktx-env-project', connectionId: 'warehouse' }, expectedStderr: 'Project: /tmp/ktx-env-project\n', @@ -95,16 +94,16 @@ describe('project directory defaults', () => { process.env.KTX_PROJECT_DIR = '/tmp/ktx-env-project'; const scan = vi.fn(async () => 0); - const publicIngest = vi.fn(async () => 0); + const ingest = vi.fn(async () => 0); const scanIo = makeIo(); const ingestIo = makeIo(); await expect( - runKtxCli(['--project-dir', '/tmp/ktx-explicit-project', 'dev', 'scan', 'warehouse'], scanIo.io, { scan }), + runKtxCli(['--project-dir', '/tmp/ktx-explicit-project', 'scan', 'warehouse'], scanIo.io, { scan }), ).resolves.toBe(0); await expect( runKtxCli(['ingest', 'status', 'run-1', '--project-dir=/tmp/ktx-explicit-project'], ingestIo.io, { - publicIngest, + ingest, }), ).resolves.toBe(0); @@ -112,7 +111,7 @@ describe('project directory defaults', () => { expect.objectContaining({ command: 'run', projectDir: '/tmp/ktx-explicit-project' }), scanIo.io, ); - expect(publicIngest).toHaveBeenCalledWith( + expect(ingest).toHaveBeenCalledWith( expect.objectContaining({ command: 'status', projectDir: '/tmp/ktx-explicit-project' }), ingestIo.io, ); @@ -139,7 +138,7 @@ describe('project directory defaults', () => { try { process.chdir(nestedDir); - await expect(runKtxCli(['dev', 'scan', 'warehouse'], testIo.io, { scan })).resolves.toBe(0); + await expect(runKtxCli(['scan', 'warehouse'], testIo.io, { scan })).resolves.toBe(0); } finally { process.chdir(originalCwd); await rm(root, { recursive: true, force: true }); diff --git a/packages/cli/src/public-ingest.test.ts b/packages/cli/src/public-ingest.test.ts index 13d8f364..1c133a19 100644 --- a/packages/cli/src/public-ingest.test.ts +++ b/packages/cli/src/public-ingest.test.ts @@ -57,7 +57,7 @@ describe('buildPublicIngestPlan', () => { driver: 'notion', operation: 'source-ingest', adapter: 'notion', - debugCommand: 'ktx dev ingest run --connection-id docs --adapter notion --debug', + debugCommand: 'ktx ingest run --connection-id docs --adapter notion --debug', steps: ['source-ingest', 'memory-update'], }, { @@ -65,7 +65,7 @@ describe('buildPublicIngestPlan', () => { driver: 'metabase', operation: 'source-ingest', adapter: 'metabase', - debugCommand: 'ktx dev ingest run --connection-id prod_metabase --adapter metabase --debug', + debugCommand: 'ktx ingest run --connection-id prod_metabase --adapter metabase --debug', steps: ['source-ingest', 'memory-update'], }, ], @@ -76,7 +76,7 @@ describe('buildPublicIngestPlan', () => { const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); expect(() => buildPublicIngestPlan(project, { projectDir: '/tmp/project', all: false })).toThrow( - 'ktx ingest requires or --all in this release', + 'Context build requires a connection id or all targets', ); }); diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index 3c0ede75..c9d9f7bb 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -107,7 +107,7 @@ function targetForConnection(connectionId: string, connection: KtxProjectConnect operation: 'source-ingest', adapter, ...(sourceDir ? { sourceDir } : {}), - debugCommand: `ktx dev ingest run --connection-id ${connectionId} --adapter ${adapter} --debug`, + debugCommand: `ktx ingest run --connection-id ${connectionId} --adapter ${adapter} --debug`, steps: ['source-ingest', 'memory-update'], }; } @@ -130,7 +130,7 @@ export function buildPublicIngestPlan( args: { projectDir: string; targetConnectionId?: string; all: boolean }, ): KtxPublicIngestPlan { if (!args.all && !args.targetConnectionId) { - throw new Error('ktx ingest requires or --all in this release'); + throw new Error('Context build requires a connection id or all targets'); } const entries = Object.entries(project.config.connections).sort(([a], [b]) => a.localeCompare(b)); diff --git a/packages/cli/src/runtime.test.ts b/packages/cli/src/runtime.test.ts index a10b5b36..16e879cc 100644 --- a/packages/cli/src/runtime.test.ts +++ b/packages/cli/src/runtime.test.ts @@ -5,7 +5,6 @@ import type { ManagedPythonDaemonStopResult, } from './managed-python-daemon.js'; import type { - ManagedPythonRuntimeDoctorCheck, ManagedPythonRuntimeInstallResult, ManagedPythonRuntimeStatus, } from './managed-python-runtime.js'; @@ -290,28 +289,6 @@ describe('runKtxRuntime', () => { }); }); - it('returns failure for doctor when any check fails', async () => { - const io = makeIo(); - const deps: KtxRuntimeDeps = { - doctorRuntime: vi.fn(async (): Promise => [ - { id: 'uv', label: 'uv', status: 'pass', detail: 'uv 0.9.5' }, - { - id: 'runtime', - label: 'Managed Python runtime', - status: 'fail', - detail: 'No runtime manifest', - fix: 'Run: ktx dev runtime install --yes', - }, - ]), - }; - - await expect(runKtxRuntime({ command: 'doctor', cliVersion: '0.2.0', json: false }, io.io, deps)).resolves.toBe(1); - - expect(io.stdout()).toContain('PASS uv: uv 0.9.5'); - expect(io.stdout()).toContain('FAIL Managed Python runtime: No runtime manifest'); - expect(io.stdout()).toContain('Fix: Run: ktx dev runtime install --yes'); - }); - it('requires --yes before pruning stale runtime directories', async () => { const io = makeIo(); const deps: KtxRuntimeDeps = { diff --git a/packages/cli/src/runtime.ts b/packages/cli/src/runtime.ts index ed8cca22..d5f4b5cb 100644 --- a/packages/cli/src/runtime.ts +++ b/packages/cli/src/runtime.ts @@ -8,12 +8,10 @@ import { type ManagedPythonDaemonStopResult, } from './managed-python-daemon.js'; import { - doctorManagedPythonRuntime, installManagedPythonRuntime, pruneManagedPythonRuntimes, readManagedPythonRuntimeStatus, type KtxRuntimeFeature, - type ManagedPythonRuntimeDoctorCheck, type ManagedPythonRuntimeInstallOptions, type ManagedPythonRuntimeInstallResult, type ManagedPythonRuntimeLayoutOptions, @@ -26,7 +24,6 @@ export type KtxRuntimeArgs = | { command: 'start'; cliVersion: string; feature: KtxRuntimeFeature; force: boolean } | { command: 'stop'; cliVersion: string; all: boolean } | { command: 'status'; cliVersion: string; json: boolean } - | { command: 'doctor'; cliVersion: string; json: boolean } | { command: 'prune'; cliVersion: string; dryRun: boolean; yes: boolean }; export interface KtxRuntimeDeps { @@ -39,7 +36,6 @@ export interface KtxRuntimeDeps { stopDaemon?: (options: { cliVersion: string }) => Promise; stopAllDaemons?: (options: { cliVersion: string }) => Promise; readStatus?: (options: ManagedPythonRuntimeLayoutOptions) => Promise; - doctorRuntime?: (options: ManagedPythonRuntimeLayoutOptions) => Promise; pruneRuntime?: (options: { cliVersion: string; runtimeRoot: string; @@ -149,16 +145,6 @@ function writeStatus(io: KtxCliIo, status: ManagedPythonRuntimeStatus): void { } } -function writeDoctor(io: KtxCliIo, checks: ManagedPythonRuntimeDoctorCheck[]): void { - io.stdout.write('KTX Python runtime doctor\n'); - for (const check of checks) { - io.stdout.write(`${check.status.toUpperCase()} ${check.label}: ${check.detail}\n`); - if (check.fix) { - io.stdout.write(` Fix: ${check.fix}\n`); - } - } -} - function writePrune(io: KtxCliIo, result: ManagedPythonRuntimePruneResult, dryRun: boolean): void { if (result.stale.length === 0) { io.stdout.write(`No stale KTX Python runtimes found under ${result.runtimeRoot}\n`); @@ -218,16 +204,6 @@ export async function runKtxRuntime( } return 0; } - if (args.command === 'doctor') { - const doctorRuntime = deps.doctorRuntime ?? doctorManagedPythonRuntime; - const checks = await doctorRuntime({ cliVersion: args.cliVersion }); - if (args.json) { - writeJson(io, { checks }); - } else { - writeDoctor(io, checks); - } - return checks.some((check) => check.status === 'fail') ? 1 : 0; - } if (!args.dryRun && !args.yes) { io.stderr.write('Refusing to prune without --yes. Preview with: ktx dev runtime prune --dry-run\n'); return 1; diff --git a/packages/cli/src/scan.test.ts b/packages/cli/src/scan.test.ts index 152c8b3a..74d52f35 100644 --- a/packages/cli/src/scan.test.ts +++ b/packages/cli/src/scan.test.ts @@ -1,18 +1,12 @@ -import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; +import type { SourceAdapter } from '@ktx/context/ingest'; import { initKtxProject } from '@ktx/context/project'; import type { - ApplyLocalScanRelationshipReviewDecisionsResult, - ExportLocalRelationshipFeedbackLabelsResult, - KtxRelationshipFeedbackCalibrationReport, - KtxRelationshipThresholdAdviceReport, KtxScanReport, LocalScanRunResult, - LocalScanStatusResponse, - ReadLocalScanRelationshipArtifactsResult, RunLocalScanOptions, - WriteLocalScanRelationshipReviewDecisionResult, } from '@ktx/context/scan'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { createCliScanProgress, runKtxScan } from './scan.js'; @@ -190,6 +184,32 @@ function makeIo(options: { isTTY?: boolean } = {}) { }; } +function fakeLiveDatabaseAdapter( + createIntrospection: (options: { connections: unknown }) => { + extractSchema: (connectionId: string) => Promise; + }, +): SourceAdapter { + return { + source: 'live-database', + skillNames: [], + async detect() { + return true; + }, + async fetch(_pullConfig: unknown, stagedDir: string, ctx: { connectionId: string }) { + await mkdir(stagedDir, { recursive: true }); + const schema = await createIntrospection({ connections: {} }).extractSchema(ctx.connectionId); + await writeFile( + join(stagedDir, 'connection.json'), + JSON.stringify({ connectionId: ctx.connectionId, schema }, null, 2), + 'utf-8', + ); + }, + async chunk() { + return { workUnits: [] }; + }, + }; +} + const report: KtxScanReport = { connectionId: 'warehouse', driver: 'postgres', @@ -285,6 +305,7 @@ const reportWithAttention: KtxScanReport = { describe('runKtxScan', () => { let tempDir: string; + const noLocalIngestAdapters = () => []; beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-scan-')); @@ -322,7 +343,7 @@ describe('runKtxScan', () => { databaseIntrospectionUrl: 'http://127.0.0.1:8765', }, io.io, - { runLocalScan }, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, ), ).resolves.toBe(0); @@ -346,10 +367,9 @@ describe('runKtxScan', () => { expect(io.stdout()).toContain('Artifacts\n'); expect(io.stdout()).toContain('Report: raw-sources/warehouse/live-database/sync-1/scan-report.json'); expect(io.stdout()).toContain('Next:\n'); - expect(io.stdout()).toContain('ktx dev scan status --project-dir '); - expect(io.stdout()).toContain(' scan-run-1\n'); - expect(io.stdout()).toContain('ktx dev scan report --project-dir '); - expect(io.stdout()).toContain(' scan-run-1\n'); + expect(io.stdout()).toContain('ktx status --project-dir '); + expect(io.stdout()).not.toContain('ktx dev scan status'); + expect(io.stdout()).not.toContain('ktx dev scan report'); expect(io.stdout()).not.toContain('\u001b['); expect(io.stdout()).not.toContain('✓'); expect(io.stdout()).not.toContain('+1'); @@ -426,7 +446,7 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, ), ).resolves.toBe(0); @@ -490,7 +510,7 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, ), ).resolves.toBe(0); @@ -534,7 +554,7 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, ); expect({ exitCode, stderr: io.stderr() }).toEqual({ exitCode: 0, stderr: '' }); } finally { @@ -664,7 +684,7 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, ), ).resolves.toBe(0); @@ -706,7 +726,7 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, ), ).resolves.toBe(0); } finally { @@ -762,7 +782,7 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, ), ).resolves.toBe(0); } finally { @@ -777,1034 +797,6 @@ describe('runKtxScan', () => { expect(io.stdout()).not.toContain('\u001b['); }); - it('prints status and human report output by default', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const status: LocalScanStatusResponse = { - runId: 'scan-run-1', - status: 'done', - done: true, - connectionId: 'warehouse', - mode: 'structural', - dryRun: false, - syncId: 'sync-1', - progress: 1, - startedAt: '2026-04-29T09:00:00.000Z', - completedAt: '2026-04-29T09:00:01.000Z', - reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json', - warnings: [], - }; - const io = makeIo(); - - await expect( - runKtxScan({ command: 'status', projectDir: tempDir, runId: 'scan-run-1' }, io.io, { - getLocalScanStatus: vi.fn().mockResolvedValue(status), - }), - ).resolves.toBe(0); - expect(io.stdout()).toContain('Run: scan-run-1'); - expect(io.stdout()).toContain('Status: done'); - - const reportIo = makeIo(); - await expect( - runKtxScan({ command: 'report', projectDir: tempDir, runId: 'scan-run-1', json: false }, reportIo.io, { - getLocalScanReport: vi.fn().mockResolvedValue(report), - }), - ).resolves.toBe(0); - expect(reportIo.stdout()).toContain('KTX scan report\n'); - expect(reportIo.stdout()).toContain('Run: scan-run-1'); - expect(reportIo.stdout()).toContain('What changed\n'); - expect(() => JSON.parse(reportIo.stdout())).toThrow(); - }); - - it('prints raw report JSON when requested', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const reportIo = makeIo(); - - await expect( - runKtxScan({ command: 'report', projectDir: tempDir, runId: 'scan-run-1', json: true }, reportIo.io, { - getLocalScanReport: vi.fn().mockResolvedValue(report), - }), - ).resolves.toBe(0); - - expect(JSON.parse(reportIo.stdout())).toMatchObject({ runId: 'scan-run-1', connectionId: 'warehouse' }); - }); - - it('prints review relationship artifacts in human form', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const reviewReport: KtxScanReport = { - ...reportWithAttention, - runId: 'scan-run-review', - syncId: 'sync-review', - relationships: { accepted: 0, review: 1, rejected: 1, skipped: 0 }, - artifactPaths: { - ...reportWithAttention.artifactPaths, - reportPath: 'raw-sources/warehouse/live-database/sync-review/scan-report.json', - enrichmentArtifacts: [ - 'raw-sources/warehouse/live-database/sync-review/enrichment/relationships.json', - 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-diagnostics.json', - 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-profile.json', - ], - }, - }; - const relationshipArtifacts: ReadLocalScanRelationshipArtifactsResult = { - runId: 'scan-run-review', - connectionId: 'warehouse', - syncId: 'sync-review', - report: reviewReport, - relationships: { - connectionId: 'warehouse', - accepted: [], - review: [ - { - id: 'orders:orders.customer_id->customers:customers.id', - status: 'review', - source: 'deterministic_name', - from: { - tableId: 'orders', - columnIds: ['orders.customer_id'], - table: { catalog: null, db: 'public', name: 'orders' }, - columns: ['customer_id'], - }, - to: { - tableId: 'customers', - columnIds: ['customers.id'], - table: { catalog: null, db: 'public', name: 'customers' }, - columns: ['id'], - }, - relationshipType: 'many_to_one', - confidence: 0.62, - pkScore: 0.91, - fkScore: 0.62, - score: 0.62, - evidence: { sources: ['table_suffix'] }, - validation: { status: 'unavailable' }, - graph: { reasons: ['validation_unavailable_review_only'] }, - reasons: ['validation_unavailable_review_only', 'fk_score_review'], - }, - ], - rejected: [ - { - id: 'orders:orders.note_id->notes:notes.id', - status: 'rejected', - source: 'deterministic_name', - from: { - tableId: 'orders', - columnIds: ['orders.note_id'], - table: { catalog: null, db: 'public', name: 'orders' }, - columns: ['note_id'], - }, - to: { - tableId: 'notes', - columnIds: ['notes.id'], - table: { catalog: null, db: 'public', name: 'notes' }, - columns: ['id'], - }, - relationshipType: 'many_to_one', - confidence: 0.2, - pkScore: 0.4, - fkScore: 0.2, - score: 0.2, - evidence: { sources: ['exact_column_match'] }, - validation: { status: 'failed' }, - graph: { reasons: ['low_source_coverage'] }, - reasons: ['low_source_coverage'], - }, - ], - skipped: [], - }, - diagnostics: { - connectionId: 'warehouse', - generatedAt: '2026-05-07T10:00:00.000Z', - summary: { accepted: 0, review: 1, rejected: 1, skipped: 0 }, - noAcceptedReason: 'relationship candidates require review before manifest writes', - candidateCountsBySource: { deterministic_name: 2 }, - validation: { available: false, sqlAvailable: false, queryCount: 0 }, - thresholds: { acceptThreshold: 0.85, reviewThreshold: 0.55 }, - policy: { - validationRequiredForManifest: true, - maxCandidatesPerColumn: 25, - profileSampleRows: 10000, - validationConcurrency: 4, - }, - warnings: [], - profileWarnings: [], - }, - profile: { - connectionId: 'warehouse', - driver: 'sqlite', - sqlAvailable: false, - tables: [], - columns: {}, - queryCount: 0, - warnings: ['KTX scan connector cannot run read-only SQL relationship validation'], - }, - paths: { - relationships: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationships.json', - diagnostics: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-diagnostics.json', - profile: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-profile.json', - }, - }; - const readLocalScanRelationshipArtifacts = vi.fn(async () => relationshipArtifacts); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationships', - projectDir: tempDir, - runId: 'scan-run-review', - status: 'review', - json: false, - limit: 10, - }, - io.io, - { readLocalScanRelationshipArtifacts }, - ), - ).resolves.toBe(0); - - expect(readLocalScanRelationshipArtifacts).toHaveBeenCalledWith( - expect.objectContaining({ projectDir: tempDir }), - 'scan-run-review', - ); - - expect(io.stdout()).toContain('KTX relationship artifacts'); - expect(io.stdout()).toContain('Run: scan-run-review'); - expect(io.stdout()).toContain('Summary: accepted=0 review=1 rejected=1 skipped=0'); - expect(io.stdout()).toContain('Reason: relationship candidates require review before manifest writes'); - expect(io.stdout()).toContain('Review relationships (1)'); - expect(io.stdout()).toContain('orders.customer_id -> customers.id'); - expect(io.stdout()).toContain( - 'type=many_to_one source=deterministic_name confidence=0.62 pkScore=0.91 fkScore=0.62', - ); - expect(io.stdout()).toContain('reasons=validation_unavailable_review_only, fk_score_review'); - expect(io.stdout()).toContain('relationships.json'); - }); - - it('prints filtered relationship artifacts as JSON', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const jsonReport: KtxScanReport = { - ...reportWithAttention, - runId: 'scan-run-json', - syncId: 'sync-json', - artifactPaths: { - ...reportWithAttention.artifactPaths, - reportPath: 'raw-sources/warehouse/live-database/sync-json/scan-report.json', - enrichmentArtifacts: ['raw-sources/warehouse/live-database/sync-json/enrichment/relationships.json'], - }, - }; - const relationshipArtifacts: ReadLocalScanRelationshipArtifactsResult = { - runId: 'scan-run-json', - connectionId: 'warehouse', - syncId: 'sync-json', - report: jsonReport, - relationships: { - connectionId: 'warehouse', - accepted: [], - review: [], - rejected: [], - skipped: [{ relationshipId: 'composite:orders', reason: 'composite_key_width_limit' }], - }, - diagnostics: null, - profile: null, - paths: { - relationships: 'raw-sources/warehouse/live-database/sync-json/enrichment/relationships.json', - diagnostics: null, - profile: null, - }, - }; - const readLocalScanRelationshipArtifacts = vi.fn(async () => relationshipArtifacts); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationships', - projectDir: tempDir, - runId: 'scan-run-json', - status: 'skipped', - json: true, - limit: 25, - }, - io.io, - { readLocalScanRelationshipArtifacts }, - ), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toMatchObject({ - runId: 'scan-run-json', - connectionId: 'warehouse', - status: 'skipped', - relationships: { - accepted: [], - review: [], - rejected: [], - skipped: [{ relationshipId: 'composite:orders', reason: 'composite_key_width_limit' }], - }, - }); - }); - - it('records an accepted relationship review decision in human form', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const decisionResult: WriteLocalScanRelationshipReviewDecisionResult = { - path: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', - decision: { - candidateId: 'orders:orders.customer_id->customers:customers.id', - decision: 'accepted', - previousStatus: 'review', - connectionId: 'warehouse', - runId: 'scan-run-review', - syncId: 'sync-review', - decidedAt: '2026-05-07T12:00:00.000Z', - reviewer: 'Andrey', - note: 'Looks right', - from: { - tableId: 'orders', - columnIds: ['orders.customer_id'], - table: { catalog: null, db: 'public', name: 'orders' }, - columns: ['customer_id'], - }, - to: { - tableId: 'customers', - columnIds: ['customers.id'], - table: { catalog: null, db: 'public', name: 'customers' }, - columns: ['id'], - }, - relationshipType: 'many_to_one', - source: 'deterministic_name', - score: 0.62, - confidence: 0.62, - pkScore: 0.91, - fkScore: 0.62, - reasons: ['fk_score_review'], - }, - artifact: { - connectionId: 'warehouse', - runId: 'scan-run-review', - syncId: 'sync-review', - generatedAt: '2026-05-07T12:00:00.000Z', - decisions: [], - }, - }; - const writeLocalScanRelationshipReviewDecision = vi.fn(async () => decisionResult); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipDecision', - projectDir: tempDir, - runId: 'scan-run-review', - candidateId: 'orders:orders.customer_id->customers:customers.id', - decision: 'accepted', - reviewer: 'Andrey', - note: 'Looks right', - json: false, - }, - io.io, - { writeLocalScanRelationshipReviewDecision }, - ), - ).resolves.toBe(0); - - expect(writeLocalScanRelationshipReviewDecision).toHaveBeenCalledWith( - expect.objectContaining({ projectDir: tempDir }), - { - runId: 'scan-run-review', - candidateId: 'orders:orders.customer_id->customers:customers.id', - decision: 'accepted', - reviewer: 'Andrey', - note: 'Looks right', - }, - ); - expect(io.stdout()).toContain('Recorded relationship decision'); - expect(io.stdout()).toContain('Decision: accepted'); - expect(io.stdout()).toContain('Candidate: orders:orders.customer_id->customers:customers.id'); - expect(io.stdout()).toContain('Previous status: review'); - expect(io.stdout()).toContain( - 'Path: raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', - ); - }); - - it('records a rejected relationship review decision as JSON', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const decisionResult: WriteLocalScanRelationshipReviewDecisionResult = { - path: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', - decision: { - candidateId: 'orders:orders.customer_id->customers:customers.id', - decision: 'rejected', - previousStatus: 'review', - connectionId: 'warehouse', - runId: 'scan-run-review', - syncId: 'sync-review', - decidedAt: '2026-05-07T12:00:00.000Z', - reviewer: 'Andrey', - note: null, - from: { - tableId: 'orders', - columnIds: ['orders.customer_id'], - table: { catalog: null, db: 'public', name: 'orders' }, - columns: ['customer_id'], - }, - to: { - tableId: 'customers', - columnIds: ['customers.id'], - table: { catalog: null, db: 'public', name: 'customers' }, - columns: ['id'], - }, - relationshipType: 'many_to_one', - source: 'deterministic_name', - score: 0.62, - confidence: 0.62, - pkScore: 0.91, - fkScore: 0.62, - reasons: ['fk_score_review'], - }, - artifact: { - connectionId: 'warehouse', - runId: 'scan-run-review', - syncId: 'sync-review', - generatedAt: '2026-05-07T12:00:00.000Z', - decisions: [], - }, - }; - const writeLocalScanRelationshipReviewDecision = vi.fn(async () => decisionResult); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipDecision', - projectDir: tempDir, - runId: 'scan-run-review', - candidateId: 'orders:orders.customer_id->customers:customers.id', - decision: 'rejected', - reviewer: 'ktx', - note: null, - json: true, - }, - io.io, - { writeLocalScanRelationshipReviewDecision }, - ), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toMatchObject({ - path: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', - decision: { - candidateId: 'orders:orders.customer_id->customers:customers.id', - decision: 'rejected', - previousStatus: 'review', - }, - }); - }); - - it('reports missing scan runs when recording relationship decisions', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const writeLocalScanRelationshipReviewDecision = vi.fn(async () => null); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipDecision', - projectDir: tempDir, - runId: 'missing-run', - candidateId: 'orders:orders.customer_id->customers:customers.id', - decision: 'accepted', - reviewer: 'ktx', - note: null, - json: false, - }, - io.io, - { writeLocalScanRelationshipReviewDecision }, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('Scan run "missing-run" was not found'); - }); - - it('applies accepted relationship review decisions with human output', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const applyLocalScanRelationshipReviewDecisions = vi.fn( - async (): Promise => ({ - runId: 'scan-run-a', - connectionId: 'warehouse', - syncId: 'sync-a', - dryRun: true, - decisionsPath: 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json', - selectedDecisions: 1, - appliedRelationships: 1, - relationships: [ - { - id: 'orders:orders.customer_id->customers:customers.id', - source: 'manual', - from: { - tableId: 'public.orders', - columnIds: ['public.orders.customer_id'], - table: { catalog: null, db: 'public', name: 'orders' }, - columns: ['customer_id'], - }, - to: { - tableId: 'public.customers', - columnIds: ['public.customers.id'], - table: { catalog: null, db: 'public', name: 'customers' }, - columns: ['id'], - }, - relationshipType: 'many_to_one', - confidence: 1, - isPrimaryKeyReference: true, - }, - ], - manifestShards: [], - manifestShardsWritten: 0, - }), - ); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipApply', - projectDir: tempDir, - runId: 'scan-run-a', - applyAllAccepted: true, - candidateIds: [], - dryRun: true, - json: false, - }, - io.io, - { applyLocalScanRelationshipReviewDecisions }, - ), - ).resolves.toBe(0); - - expect(applyLocalScanRelationshipReviewDecisions).toHaveBeenCalledWith( - expect.objectContaining({ projectDir: tempDir }), - { - runId: 'scan-run-a', - applyAllAccepted: true, - candidateIds: [], - dryRun: true, - }, - ); - expect(io.stdout()).toContain('Relationship review apply'); - expect(io.stdout()).toContain('Run: scan-run-a'); - expect(io.stdout()).toContain('Mode: dry-run'); - expect(io.stdout()).toContain('Applied: 1 manual relationship'); - expect(io.stdout()).toContain('Schema shards written: 0'); - }); - - it('prints relationship review apply JSON', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const applyResult: ApplyLocalScanRelationshipReviewDecisionsResult = { - runId: 'scan-run-a', - connectionId: 'warehouse', - syncId: 'sync-a', - dryRun: false, - decisionsPath: 'raw-sources/warehouse/live-database/sync-a/enrichment/relationship-review-decisions.json', - selectedDecisions: 1, - appliedRelationships: 1, - relationships: [], - manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], - manifestShardsWritten: 1, - }; - const applyLocalScanRelationshipReviewDecisions = vi.fn(async () => applyResult); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipApply', - projectDir: tempDir, - runId: 'scan-run-a', - applyAllAccepted: false, - candidateIds: ['orders:orders.customer_id->customers:customers.id'], - dryRun: false, - json: true, - }, - io.io, - { applyLocalScanRelationshipReviewDecisions }, - ), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toEqual(applyResult); - expect(applyLocalScanRelationshipReviewDecisions).toHaveBeenCalledWith( - expect.objectContaining({ projectDir: tempDir }), - { - runId: 'scan-run-a', - applyAllAccepted: false, - candidateIds: ['orders:orders.customer_id->customers:customers.id'], - dryRun: false, - }, - ); - }); - - it('prints relationship feedback export summary in human form', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const feedback: ExportLocalRelationshipFeedbackLabelsResult = { - generatedAt: '2026-05-07T13:00:00.000Z', - filters: { connectionId: null, decision: 'all' }, - summary: { total: 2, accepted: 1, rejected: 1, connections: 1, runs: 1 }, - labels: [ - { - schemaVersion: 1, - candidateId: 'orders:orders.customer_id->customers:customers.id', - decision: 'accepted', - previousStatus: 'review', - connectionId: 'warehouse', - runId: 'scan-run-review', - syncId: 'sync-review', - decidedAt: '2026-05-07T12:00:00.000Z', - reviewer: 'Andrey', - note: 'Confirmed in warehouse docs', - relationshipType: 'many_to_one', - source: 'deterministic_name', - score: 0.62, - confidence: 0.62, - pkScore: 0.91, - fkScore: 0.62, - fromTable: 'public.orders', - fromColumns: ['customer_id'], - toTable: 'public.customers', - toColumns: ['id'], - reasons: ['fk_score_review'], - artifactPath: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', - }, - { - schemaVersion: 1, - candidateId: 'orders:orders.note_id->notes:notes.id', - decision: 'rejected', - previousStatus: 'rejected', - connectionId: 'warehouse', - runId: 'scan-run-review', - syncId: 'sync-review', - decidedAt: '2026-05-07T12:05:00.000Z', - reviewer: 'Andrey', - note: null, - relationshipType: 'many_to_one', - source: 'deterministic_name', - score: 0.2, - confidence: 0.2, - pkScore: 0.4, - fkScore: 0.2, - fromTable: 'public.orders', - fromColumns: ['note_id'], - toTable: 'public.notes', - toColumns: ['id'], - reasons: ['low_source_coverage'], - artifactPath: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', - }, - ], - warnings: [], - }; - const exportLocalRelationshipFeedbackLabels = vi.fn(async () => feedback); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipFeedback', - projectDir: tempDir, - connectionId: null, - decision: 'all', - json: false, - jsonl: false, - }, - io.io, - { exportLocalRelationshipFeedbackLabels }, - ), - ).resolves.toBe(0); - - expect(exportLocalRelationshipFeedbackLabels).toHaveBeenCalledWith( - expect.objectContaining({ projectDir: tempDir }), - { - connectionId: null, - decision: 'all', - }, - ); - expect(io.stdout()).toContain('KTX relationship feedback labels'); - expect(io.stdout()).toContain('Total: 2'); - expect(io.stdout()).toContain('Accepted: 1'); - expect(io.stdout()).toContain('Rejected: 1'); - expect(io.stdout()).toContain('orders.customer_id -> customers.id'); - expect(io.stdout()).toContain('decision=accepted previous=review score=0.62 reviewer=Andrey'); - }); - - it('prints relationship feedback labels as JSONL', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const feedback: ExportLocalRelationshipFeedbackLabelsResult = { - generatedAt: '2026-05-07T13:00:00.000Z', - filters: { connectionId: 'warehouse', decision: 'accepted' }, - summary: { total: 1, accepted: 1, rejected: 0, connections: 1, runs: 1 }, - labels: [ - { - schemaVersion: 1, - candidateId: 'orders:orders.customer_id->customers:customers.id', - decision: 'accepted', - previousStatus: 'review', - connectionId: 'warehouse', - runId: 'scan-run-review', - syncId: 'sync-review', - decidedAt: '2026-05-07T12:00:00.000Z', - reviewer: 'ktx', - note: null, - relationshipType: 'many_to_one', - source: 'deterministic_name', - score: 0.62, - confidence: 0.62, - pkScore: 0.91, - fkScore: 0.62, - fromTable: 'public.orders', - fromColumns: ['customer_id'], - toTable: 'public.customers', - toColumns: ['id'], - reasons: ['fk_score_review'], - artifactPath: 'raw-sources/warehouse/live-database/sync-review/enrichment/relationship-review-decisions.json', - }, - ], - warnings: [], - }; - const exportLocalRelationshipFeedbackLabels = vi.fn(async () => feedback); - const formatKtxRelationshipFeedbackLabelsJsonl = vi.fn( - () => '{"candidateId":"orders:orders.customer_id->customers:customers.id"}\n', - ); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipFeedback', - projectDir: tempDir, - connectionId: 'warehouse', - decision: 'accepted', - json: false, - jsonl: true, - }, - io.io, - { exportLocalRelationshipFeedbackLabels, formatKtxRelationshipFeedbackLabelsJsonl }, - ), - ).resolves.toBe(0); - - expect(exportLocalRelationshipFeedbackLabels).toHaveBeenCalledWith( - expect.objectContaining({ projectDir: tempDir }), - { - connectionId: 'warehouse', - decision: 'accepted', - }, - ); - expect(formatKtxRelationshipFeedbackLabelsJsonl).toHaveBeenCalledWith(feedback); - expect(JSON.parse(io.stdout())).toEqual({ candidateId: 'orders:orders.customer_id->customers:customers.id' }); - }); - - it('prints relationship feedback export as JSON', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const feedback: ExportLocalRelationshipFeedbackLabelsResult = { - generatedAt: '2026-05-07T13:00:00.000Z', - filters: { connectionId: null, decision: 'rejected' }, - summary: { total: 0, accepted: 0, rejected: 0, connections: 0, runs: 0 }, - labels: [], - warnings: [], - }; - const exportLocalRelationshipFeedbackLabels = vi.fn(async () => feedback); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipFeedback', - projectDir: tempDir, - connectionId: null, - decision: 'rejected', - json: true, - jsonl: false, - }, - io.io, - { exportLocalRelationshipFeedbackLabels }, - ), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toMatchObject({ - filters: { connectionId: null, decision: 'rejected' }, - summary: { total: 0, accepted: 0, rejected: 0 }, - labels: [], - }); - }); - - it('prints relationship feedback calibration as human output', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const calibration: KtxRelationshipFeedbackCalibrationReport = { - generatedAt: '2026-05-07T13:00:00.000Z', - filters: { connectionId: null, decision: 'all' }, - thresholds: { accept: 0.85, review: 0.55 }, - summary: { - total: 2, - scored: 2, - unscored: 0, - acceptedLabels: 1, - rejectedLabels: 1, - predictedAccepted: 1, - predictedReview: 0, - predictedRejected: 1, - acceptedBandPrecision: 1, - rejectedBandPrecision: 1, - reviewBandAcceptedRate: null, - meanAcceptedScore: 0.91, - meanRejectedScore: 0.21, - }, - buckets: [ - { - label: '0.00-0.24', - minInclusive: 0, - maxInclusive: 0.24, - total: 1, - accepted: 0, - rejected: 1, - acceptanceRate: 0, - }, - { - label: '0.25-0.49', - minInclusive: 0.25, - maxInclusive: 0.49, - total: 0, - accepted: 0, - rejected: 0, - acceptanceRate: null, - }, - { - label: '0.50-0.74', - minInclusive: 0.5, - maxInclusive: 0.74, - total: 0, - accepted: 0, - rejected: 0, - acceptanceRate: null, - }, - { - label: '0.75-1.00', - minInclusive: 0.75, - maxInclusive: 1, - total: 1, - accepted: 1, - rejected: 0, - acceptanceRate: 1, - }, - ], - labels: [], - warnings: [], - }; - const calibrateLocalRelationshipFeedbackLabels = vi.fn(async () => calibration); - const formatKtxRelationshipFeedbackCalibrationMarkdown = vi.fn( - () => 'KTX relationship feedback calibration\nTotal labels: 2\n', - ); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipCalibration', - projectDir: tempDir, - connectionId: null, - decision: 'all', - acceptThreshold: 0.85, - reviewThreshold: 0.55, - json: false, - }, - io.io, - { calibrateLocalRelationshipFeedbackLabels, formatKtxRelationshipFeedbackCalibrationMarkdown }, - ), - ).resolves.toBe(0); - - expect(calibrateLocalRelationshipFeedbackLabels).toHaveBeenCalledWith( - expect.objectContaining({ projectDir: tempDir }), - { - connectionId: null, - decision: 'all', - acceptThreshold: 0.85, - reviewThreshold: 0.55, - }, - ); - expect(formatKtxRelationshipFeedbackCalibrationMarkdown).toHaveBeenCalledWith(calibration); - expect(io.stdout()).toBe('KTX relationship feedback calibration\nTotal labels: 2\n'); - }); - - it('prints relationship feedback calibration as JSON', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const calibration: KtxRelationshipFeedbackCalibrationReport = { - generatedAt: '2026-05-07T13:00:00.000Z', - filters: { connectionId: 'warehouse', decision: 'rejected' }, - thresholds: { accept: 0.9, review: 0.5 }, - summary: { - total: 0, - scored: 0, - unscored: 0, - acceptedLabels: 0, - rejectedLabels: 0, - predictedAccepted: 0, - predictedReview: 0, - predictedRejected: 0, - acceptedBandPrecision: null, - rejectedBandPrecision: null, - reviewBandAcceptedRate: null, - meanAcceptedScore: null, - meanRejectedScore: null, - }, - buckets: [], - labels: [], - warnings: [], - }; - const calibrateLocalRelationshipFeedbackLabels = vi.fn(async () => calibration); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipCalibration', - projectDir: tempDir, - connectionId: 'warehouse', - decision: 'rejected', - acceptThreshold: 0.9, - reviewThreshold: 0.5, - json: true, - }, - io.io, - { calibrateLocalRelationshipFeedbackLabels }, - ), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toMatchObject({ - filters: { connectionId: 'warehouse', decision: 'rejected' }, - thresholds: { accept: 0.9, review: 0.5 }, - summary: { total: 0, scored: 0 }, - }); - }); - - it('prints relationship threshold advice as human output', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const advice: KtxRelationshipThresholdAdviceReport = { - generatedAt: '2026-05-07T14:00:00.000Z', - filters: { connectionId: null, decision: 'all' }, - status: 'ready', - gates: { - minTotalLabels: 4, - minAcceptedLabels: 2, - minRejectedLabels: 2, - minAcceptedBandPrecision: 0.9, - minAcceptedOrReviewRecall: 0.8, - minRejectedBandPrecision: 0.8, - }, - summary: { - totalLabels: 4, - scoredLabels: 4, - unscoredLabels: 0, - acceptedLabels: 2, - rejectedLabels: 2, - evaluatedCandidates: 2, - eligibleCandidates: 1, - }, - recommended: { - acceptThreshold: 0.9, - reviewThreshold: 0.55, - eligible: true, - predictedAccepted: 1, - predictedReview: 1, - predictedRejected: 2, - acceptedBandPrecision: 1, - acceptedRecall: 0.5, - acceptedOrReviewRecall: 1, - rejectedBandPrecision: 1, - rejectedRecall: 1, - falseAcceptedRejectedLabels: 0, - falseRejectedAcceptedLabels: 0, - }, - candidates: [], - reasons: [], - warnings: [], - }; - const adviseLocalRelationshipFeedbackThresholds = vi.fn(async () => advice); - const formatKtxRelationshipThresholdAdviceMarkdown = vi.fn( - () => 'KTX relationship threshold advice\nRecommended: accept=0.90 review=0.55\n', - ); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipThresholds', - projectDir: tempDir, - connectionId: null, - minTotalLabels: 4, - minAcceptedLabels: 2, - minRejectedLabels: 2, - json: false, - }, - io.io, - { adviseLocalRelationshipFeedbackThresholds, formatKtxRelationshipThresholdAdviceMarkdown }, - ), - ).resolves.toBe(0); - - expect(adviseLocalRelationshipFeedbackThresholds).toHaveBeenCalledWith( - expect.objectContaining({ projectDir: tempDir }), - { - connectionId: null, - minTotalLabels: 4, - minAcceptedLabels: 2, - minRejectedLabels: 2, - }, - ); - expect(formatKtxRelationshipThresholdAdviceMarkdown).toHaveBeenCalledWith(advice); - expect(io.stdout()).toBe('KTX relationship threshold advice\nRecommended: accept=0.90 review=0.55\n'); - }); - - it('prints relationship threshold advice as JSON', async () => { - await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); - const advice: KtxRelationshipThresholdAdviceReport = { - generatedAt: '2026-05-07T14:00:00.000Z', - filters: { connectionId: 'warehouse', decision: 'all' }, - status: 'insufficient_labels', - gates: { - minTotalLabels: 20, - minAcceptedLabels: 5, - minRejectedLabels: 5, - minAcceptedBandPrecision: 0.9, - minAcceptedOrReviewRecall: 0.8, - minRejectedBandPrecision: 0.8, - }, - summary: { - totalLabels: 0, - scoredLabels: 0, - unscoredLabels: 0, - acceptedLabels: 0, - rejectedLabels: 0, - evaluatedCandidates: 0, - eligibleCandidates: 0, - }, - recommended: null, - candidates: [], - reasons: ['Need at least 20 scored labels; found 0.'], - warnings: [], - }; - const adviseLocalRelationshipFeedbackThresholds = vi.fn(async () => advice); - - const io = makeIo(); - await expect( - runKtxScan( - { - command: 'relationshipThresholds', - projectDir: tempDir, - connectionId: 'warehouse', - minTotalLabels: 20, - minAcceptedLabels: 5, - minRejectedLabels: 5, - json: true, - }, - io.io, - { adviseLocalRelationshipFeedbackThresholds }, - ), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toMatchObject({ - filters: { connectionId: 'warehouse', decision: 'all' }, - status: 'insufficient_labels', - recommended: null, - }); - }); - it('passes native CLI adapters into local scan runs for mysql configs', async () => { const tempProject = await mkdtemp(join(tmpdir(), 'ktx-scan-cli-native-')); await initKtxProject({ projectDir: tempProject, projectName: 'warehouse' }); @@ -1846,7 +838,7 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, ), ).resolves.toBe(0); @@ -1895,7 +887,7 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, ), ).resolves.toBe(0); @@ -1953,7 +945,10 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { + runLocalScan, + createLocalIngestAdapters: () => [fakeLiveDatabaseAdapter(createPostgresLiveDatabaseIntrospection)], + }, ), ).resolves.toBe(0); @@ -2017,7 +1012,7 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, ), ).resolves.toBe(0); @@ -2069,7 +1064,10 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { + runLocalScan, + createLocalIngestAdapters: () => [fakeLiveDatabaseAdapter(createSqlServerLiveDatabaseIntrospection)], + }, ), ).resolves.toBe(0); @@ -2132,7 +1130,10 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { + runLocalScan, + createLocalIngestAdapters: () => [fakeLiveDatabaseAdapter(createBigQueryLiveDatabaseIntrospection)], + }, ), ).resolves.toBe(0); @@ -2198,7 +1199,10 @@ describe('runKtxScan', () => { dryRun: false, }, io.io, - { runLocalScan }, + { + runLocalScan, + createLocalIngestAdapters: () => [fakeLiveDatabaseAdapter(createSnowflakeLiveDatabaseIntrospection)], + }, ), ).resolves.toBe(0); diff --git a/packages/cli/src/scan.ts b/packages/cli/src/scan.ts index e3bda577..bca6057d 100644 --- a/packages/cli/src/scan.ts +++ b/packages/cli/src/scan.ts @@ -1,34 +1,10 @@ import { loadKtxProject } from '@ktx/context/project'; import { - type ApplyLocalScanRelationshipReviewDecisionsResult, - adviseLocalRelationshipFeedbackThresholds, - applyLocalScanRelationshipReviewDecisions, - calibrateLocalRelationshipFeedbackLabels, - type ExportLocalRelationshipFeedbackLabelsResult, - exportLocalRelationshipFeedbackLabels, - formatKtxRelationshipFeedbackCalibrationMarkdown, - formatKtxRelationshipFeedbackLabelsJsonl, - formatKtxRelationshipThresholdAdviceMarkdown, - getLocalScanReport, - getLocalScanStatus, type KtxProgressPort, - type KtxRelationshipArtifact, - type KtxRelationshipArtifactEdge, - type KtxRelationshipArtifactStatus, - type KtxRelationshipDiagnosticsArtifact, - type KtxRelationshipFeedbackCalibrationReport, - type KtxRelationshipFeedbackDecisionFilter, - type KtxRelationshipFeedbackLabel, - type KtxRelationshipReviewDecisionValue, - type KtxRelationshipThresholdAdviceReport, type KtxScanMode, type KtxScanReport, type KtxScanWarning, - type LocalScanStatusResponse, - readLocalScanRelationshipArtifacts, runLocalScan, - type WriteLocalScanRelationshipReviewDecisionResult, - writeLocalScanRelationshipReviewDecision, } from '@ktx/context/scan'; import type { KtxCliIo } from './index.js'; import { createKtxCliLocalIngestAdapters } from './local-adapters.js'; @@ -38,88 +14,21 @@ import { profileMark } from './startup-profile.js'; profileMark('module:scan'); -export type KtxScanArgs = - | { - command: 'run'; - projectDir: string; - connectionId: string; - mode: KtxScanMode; - detectRelationships: boolean; - dryRun: boolean; - databaseIntrospectionUrl?: string; - cliVersion?: string; - runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; - } - | { command: 'status'; projectDir: string; runId: string } - | { command: 'report'; projectDir: string; runId: string; json: boolean } - | { - command: 'relationships'; - projectDir: string; - runId: string; - status: KtxRelationshipArtifactStatus; - json: boolean; - limit: number; - } - | { - command: 'relationshipDecision'; - projectDir: string; - runId: string; - candidateId: string; - decision: KtxRelationshipReviewDecisionValue; - reviewer: string; - note: string | null; - json: boolean; - } - | { - command: 'relationshipApply'; - projectDir: string; - runId: string; - applyAllAccepted: boolean; - candidateIds: string[]; - dryRun: boolean; - json: boolean; - } - | { - command: 'relationshipFeedback'; - projectDir: string; - connectionId: string | null; - decision: KtxRelationshipFeedbackDecisionFilter; - json: boolean; - jsonl: boolean; - } - | { - command: 'relationshipCalibration'; - projectDir: string; - connectionId: string | null; - decision: KtxRelationshipFeedbackDecisionFilter; - acceptThreshold: number; - reviewThreshold: number; - json: boolean; - } - | { - command: 'relationshipThresholds'; - projectDir: string; - connectionId: string | null; - minTotalLabels: number; - minAcceptedLabels: number; - minRejectedLabels: number; - json: boolean; - }; +export interface KtxScanArgs { + command: 'run'; + projectDir: string; + connectionId: string; + mode: KtxScanMode; + detectRelationships: boolean; + dryRun: boolean; + databaseIntrospectionUrl?: string; + cliVersion?: string; + runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; +} interface KtxScanDeps { runLocalScan?: typeof runLocalScan; createLocalIngestAdapters?: typeof createKtxCliLocalIngestAdapters; - getLocalScanStatus?: typeof getLocalScanStatus; - getLocalScanReport?: typeof getLocalScanReport; - readLocalScanRelationshipArtifacts?: typeof readLocalScanRelationshipArtifacts; - writeLocalScanRelationshipReviewDecision?: typeof writeLocalScanRelationshipReviewDecision; - applyLocalScanRelationshipReviewDecisions?: typeof applyLocalScanRelationshipReviewDecisions; - exportLocalRelationshipFeedbackLabels?: typeof exportLocalRelationshipFeedbackLabels; - formatKtxRelationshipFeedbackLabelsJsonl?: typeof formatKtxRelationshipFeedbackLabelsJsonl; - calibrateLocalRelationshipFeedbackLabels?: typeof calibrateLocalRelationshipFeedbackLabels; - formatKtxRelationshipFeedbackCalibrationMarkdown?: typeof formatKtxRelationshipFeedbackCalibrationMarkdown; - adviseLocalRelationshipFeedbackThresholds?: typeof adviseLocalRelationshipFeedbackThresholds; - formatKtxRelationshipThresholdAdviceMarkdown?: typeof formatKtxRelationshipThresholdAdviceMarkdown; } function shouldUseStyledOutput(io: KtxCliIo): boolean { @@ -284,208 +193,8 @@ function writeRunSummary(report: KtxScanReport, projectDir: string, io: KtxCliIo writeHumanReportBody(report, io); const projectDirArg = quoteCliArg(projectDir); io.stdout.write('\nNext:\n'); - const statusCommand = styled ? dim('ktx dev scan status') : 'ktx dev scan status'; - const reportCommand = styled ? dim('ktx dev scan report') : 'ktx dev scan report'; - io.stdout.write(` ${statusCommand} --project-dir ${projectDirArg} ${report.runId}\n`); - io.stdout.write(` ${reportCommand} --project-dir ${projectDirArg} ${report.runId}\n`); -} - -function writeReport(report: KtxScanReport, io: KtxCliIo): void { - io.stdout.write('KTX scan report\n'); - writeHumanReportBody(report, io); -} - -function formatRelationshipEndpoint(edge: KtxRelationshipArtifactEdge, side: 'from' | 'to'): string { - const endpoint = edge[side]; - if (endpoint.columns.length === 1) { - return `${endpoint.table.name}.${endpoint.columns[0]}`; - } - return `${endpoint.table.name}.(${endpoint.columns.join(',')})`; -} - -function formatRelationshipScore(value: number | null): string { - return value === null ? 'n/a' : value.toFixed(2); -} - -function relationshipStatusTitle(status: Exclude): string { - if (status === 'accepted') { - return 'Accepted relationships'; - } - if (status === 'review') { - return 'Review relationships'; - } - if (status === 'rejected') { - return 'Rejected relationships'; - } - return 'Skipped relationships'; -} - -function filteredRelationshipArtifact( - relationships: KtxRelationshipArtifact, - status: KtxRelationshipArtifactStatus, -): KtxRelationshipArtifact { - if (status === 'all') { - return relationships; - } - return { - connectionId: relationships.connectionId, - accepted: status === 'accepted' ? relationships.accepted : [], - review: status === 'review' ? relationships.review : [], - rejected: status === 'rejected' ? relationships.rejected : [], - skipped: status === 'skipped' ? relationships.skipped : [], - }; -} - -function writeRelationshipEdge(edge: KtxRelationshipArtifactEdge, index: number, io: KtxCliIo): void { - io.stdout.write( - ` ${index + 1}. ${formatRelationshipEndpoint(edge, 'from')} -> ${formatRelationshipEndpoint(edge, 'to')}\n`, - ); - io.stdout.write( - ` type=${edge.relationshipType} source=${edge.source} confidence=${edge.confidence.toFixed(2)} pkScore=${formatRelationshipScore(edge.pkScore)} fkScore=${formatRelationshipScore(edge.fkScore)}\n`, - ); - io.stdout.write(` reasons=${edge.reasons.length > 0 ? edge.reasons.join(', ') : 'none'}\n`); -} - -function writeRelationshipGroup( - status: Exclude, - relationships: KtxRelationshipArtifact, - limit: number, - io: KtxCliIo, -): void { - if (status === 'skipped') { - io.stdout.write(`\n${relationshipStatusTitle(status)} (${relationships.skipped.length})\n`); - relationships.skipped.slice(0, limit).forEach((item, index) => { - io.stdout.write(` ${index + 1}. ${item.relationshipId}\n`); - io.stdout.write(` reason=${item.reason}\n`); - }); - return; - } - - const edges = - status === 'accepted' - ? relationships.accepted - : status === 'review' - ? relationships.review - : relationships.rejected; - io.stdout.write(`\n${relationshipStatusTitle(status)} (${edges.length})\n`); - edges.slice(0, limit).forEach((edge, index) => { - writeRelationshipEdge(edge, index, io); - }); - if (edges.length > limit) { - io.stdout.write(` ${edges.length - limit} more not shown; rerun with --limit ${edges.length}\n`); - } -} - -function writeRelationshipArtifactSummary(input: { - runId: string; - connectionId: string; - syncId: string; - status: KtxRelationshipArtifactStatus; - limit: number; - summary: KtxRelationshipArtifact; - relationships: KtxRelationshipArtifact; - diagnostics: KtxRelationshipDiagnosticsArtifact | null; - relationshipsPath: string; - io: KtxCliIo; -}): void { - input.io.stdout.write('KTX relationship artifacts\n'); - input.io.stdout.write(`Run: ${input.runId}\n`); - input.io.stdout.write(`Connection: ${input.connectionId}\n`); - input.io.stdout.write(`Sync: ${input.syncId}\n`); - input.io.stdout.write( - `Summary: accepted=${input.summary.accepted.length} review=${input.summary.review.length} rejected=${input.summary.rejected.length} skipped=${input.summary.skipped.length}\n`, - ); - if (input.diagnostics?.noAcceptedReason) { - input.io.stdout.write(`Reason: ${input.diagnostics.noAcceptedReason}\n`); - } - input.io.stdout.write(`Artifacts: ${input.relationshipsPath}\n`); - - const statuses: Array> = - input.status === 'all' ? ['accepted', 'review', 'rejected', 'skipped'] : [input.status]; - for (const status of statuses) { - writeRelationshipGroup(status, input.relationships, input.limit, input.io); - } -} - -function writeRelationshipDecisionResult(result: WriteLocalScanRelationshipReviewDecisionResult, io: KtxCliIo): void { - io.stdout.write('Recorded relationship decision\n'); - io.stdout.write(`Decision: ${result.decision.decision}\n`); - io.stdout.write(`Candidate: ${result.decision.candidateId}\n`); - io.stdout.write(`Previous status: ${result.decision.previousStatus}\n`); - io.stdout.write(`Reviewer: ${result.decision.reviewer}\n`); - if (result.decision.note) { - io.stdout.write(`Note: ${result.decision.note}\n`); - } - io.stdout.write(`Path: ${result.path}\n`); -} - -function writeRelationshipApplyResult(result: ApplyLocalScanRelationshipReviewDecisionsResult, io: KtxCliIo): void { - io.stdout.write('Relationship review apply\n'); - io.stdout.write(`Run: ${result.runId}\n`); - io.stdout.write(`Connection: ${result.connectionId}\n`); - io.stdout.write(`Sync: ${result.syncId}\n`); - io.stdout.write(`Mode: ${result.dryRun ? 'dry-run' : 'write'}\n`); - io.stdout.write(`Decisions: ${result.selectedDecisions} ${plural(result.selectedDecisions, 'accepted decision')}\n`); - io.stdout.write( - `Applied: ${result.appliedRelationships} manual ${plural(result.appliedRelationships, 'relationship')}\n`, - ); - io.stdout.write(`Schema shards written: ${result.manifestShardsWritten}\n`); - if (result.manifestShards.length > 0) { - io.stdout.write('Schema shards:\n'); - for (const shard of result.manifestShards) { - io.stdout.write(` - ${shard}\n`); - } - } - io.stdout.write(`Decisions: ${result.decisionsPath}\n`); -} - -function formatFeedbackColumns(columns: readonly string[]): string { - return columns.length === 1 ? (columns[0] ?? 'unknown') : `(${columns.join(',')})`; -} - -function feedbackTableShortName(value: string): string { - return value.split('.').at(-1) ?? value; -} - -function feedbackEndpoint(label: KtxRelationshipFeedbackLabel, side: 'from' | 'to'): string { - if (side === 'from') { - return `${feedbackTableShortName(label.fromTable)}.${formatFeedbackColumns(label.fromColumns)}`; - } - return `${feedbackTableShortName(label.toTable)}.${formatFeedbackColumns(label.toColumns)}`; -} - -function writeRelationshipFeedbackSummary(result: ExportLocalRelationshipFeedbackLabelsResult, io: KtxCliIo): void { - io.stdout.write('KTX relationship feedback labels\n'); - io.stdout.write(`Generated: ${result.generatedAt}\n`); - io.stdout.write(`Filter connection: ${result.filters.connectionId ?? 'all'}\n`); - io.stdout.write(`Filter decision: ${result.filters.decision}\n`); - io.stdout.write(`Total: ${result.summary.total}\n`); - io.stdout.write(`Accepted: ${result.summary.accepted}\n`); - io.stdout.write(`Rejected: ${result.summary.rejected}\n`); - io.stdout.write(`Connections: ${result.summary.connections}\n`); - io.stdout.write(`Runs: ${result.summary.runs}\n`); - - if (result.warnings.length > 0) { - io.stdout.write('\nWarnings\n'); - for (const warning of result.warnings.slice(0, 5)) { - io.stdout.write(` - ${warning.path}: ${warning.message}\n`); - } - } - - if (result.labels.length === 0) { - return; - } - - io.stdout.write('\nLabels\n'); - for (const label of result.labels.slice(0, 25)) { - io.stdout.write(` - ${feedbackEndpoint(label, 'from')} -> ${feedbackEndpoint(label, 'to')}\n`); - io.stdout.write( - ` decision=${label.decision} previous=${label.previousStatus} score=${formatRelationshipScore(label.score)} reviewer=${label.reviewer}\n`, - ); - } - if (result.labels.length > 25) { - io.stdout.write(` ${result.labels.length - 25} more labels not shown; rerun with --jsonl for the full dataset\n`); - } + const statusCommand = styled ? dim('ktx status') : 'ktx status'; + io.stdout.write(` ${statusCommand} --project-dir ${projectDirArg}\n`); } interface KtxCliScanProgressState { @@ -540,184 +249,9 @@ export function createCliScanProgress( return progress; } -function writeStatus(status: LocalScanStatusResponse, io: KtxCliIo): void { - io.stdout.write(`Run: ${status.runId}\n`); - io.stdout.write(`Status: ${status.status}\n`); - io.stdout.write(`Connection: ${status.connectionId}\n`); - io.stdout.write(`Mode: ${status.mode}\n`); - io.stdout.write(`Sync: ${status.syncId}\n`); - io.stdout.write(`Progress: ${status.progress}\n`); - io.stdout.write(`Report: ${status.reportPath ?? 'none'}\n`); -} - export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps: KtxScanDeps = {}): Promise { try { const project = await loadKtxProject({ projectDir: args.projectDir }); - if (args.command === 'status') { - const status = await (deps.getLocalScanStatus ?? getLocalScanStatus)(project, args.runId); - if (!status) { - throw new Error(`Scan run "${args.runId}" was not found`); - } - writeStatus(status, io); - return 0; - } - if (args.command === 'report') { - const report = await (deps.getLocalScanReport ?? getLocalScanReport)(project, args.runId); - if (!report) { - throw new Error(`Scan report "${args.runId}" was not found`); - } - if (args.json) { - io.stdout.write(`${JSON.stringify(report, null, 2)}\n`); - } else { - writeReport(report, io); - } - return 0; - } - if (args.command === 'relationships') { - const result = await (deps.readLocalScanRelationshipArtifacts ?? readLocalScanRelationshipArtifacts)( - project, - args.runId, - ); - if (!result) { - throw new Error(`Scan run "${args.runId}" was not found`); - } - const filtered = filteredRelationshipArtifact(result.relationships, args.status); - if (args.json) { - io.stdout.write( - `${JSON.stringify( - { - runId: result.runId, - connectionId: result.connectionId, - syncId: result.syncId, - status: args.status, - paths: result.paths, - diagnostics: result.diagnostics, - summary: { - accepted: result.relationships.accepted.length, - review: result.relationships.review.length, - rejected: result.relationships.rejected.length, - skipped: result.relationships.skipped.length, - }, - relationships: filtered, - }, - null, - 2, - )}\n`, - ); - } else { - writeRelationshipArtifactSummary({ - runId: result.runId, - connectionId: result.connectionId, - syncId: result.syncId, - status: args.status, - limit: args.limit, - summary: result.relationships, - relationships: filtered, - diagnostics: result.diagnostics, - relationshipsPath: result.paths.relationships, - io, - }); - } - return 0; - } - if (args.command === 'relationshipDecision') { - const result = await (deps.writeLocalScanRelationshipReviewDecision ?? writeLocalScanRelationshipReviewDecision)( - project, - { - runId: args.runId, - candidateId: args.candidateId, - decision: args.decision, - reviewer: args.reviewer, - note: args.note, - }, - ); - if (!result) { - throw new Error(`Scan run "${args.runId}" was not found`); - } - if (args.json) { - io.stdout.write(`${JSON.stringify(result, null, 2)}\n`); - } else { - writeRelationshipDecisionResult(result, io); - } - return 0; - } - if (args.command === 'relationshipApply') { - const result = await ( - deps.applyLocalScanRelationshipReviewDecisions ?? applyLocalScanRelationshipReviewDecisions - )(project, { - runId: args.runId, - applyAllAccepted: args.applyAllAccepted, - candidateIds: args.candidateIds, - dryRun: args.dryRun, - }); - if (args.json) { - io.stdout.write( - `${JSON.stringify(result satisfies ApplyLocalScanRelationshipReviewDecisionsResult, null, 2)}\n`, - ); - } else { - writeRelationshipApplyResult(result, io); - } - return 0; - } - if (args.command === 'relationshipFeedback') { - const result = await (deps.exportLocalRelationshipFeedbackLabels ?? exportLocalRelationshipFeedbackLabels)( - project, - { - connectionId: args.connectionId, - decision: args.decision, - }, - ); - if (args.jsonl) { - io.stdout.write( - (deps.formatKtxRelationshipFeedbackLabelsJsonl ?? formatKtxRelationshipFeedbackLabelsJsonl)(result), - ); - } else if (args.json) { - io.stdout.write(`${JSON.stringify(result, null, 2)}\n`); - } else { - writeRelationshipFeedbackSummary(result, io); - } - return 0; - } - if (args.command === 'relationshipCalibration') { - const result = await (deps.calibrateLocalRelationshipFeedbackLabels ?? calibrateLocalRelationshipFeedbackLabels)( - project, - { - connectionId: args.connectionId, - decision: args.decision, - acceptThreshold: args.acceptThreshold, - reviewThreshold: args.reviewThreshold, - }, - ); - if (args.json) { - io.stdout.write(`${JSON.stringify(result satisfies KtxRelationshipFeedbackCalibrationReport, null, 2)}\n`); - } else { - io.stdout.write( - (deps.formatKtxRelationshipFeedbackCalibrationMarkdown ?? formatKtxRelationshipFeedbackCalibrationMarkdown)( - result, - ), - ); - } - return 0; - } - if (args.command === 'relationshipThresholds') { - const result = await ( - deps.adviseLocalRelationshipFeedbackThresholds ?? adviseLocalRelationshipFeedbackThresholds - )(project, { - connectionId: args.connectionId, - minTotalLabels: args.minTotalLabels, - minAcceptedLabels: args.minAcceptedLabels, - minRejectedLabels: args.minRejectedLabels, - }); - if (args.json) { - io.stdout.write(`${JSON.stringify(result satisfies KtxRelationshipThresholdAdviceReport, null, 2)}\n`); - } else { - io.stdout.write( - (deps.formatKtxRelationshipThresholdAdviceMarkdown ?? formatKtxRelationshipThresholdAdviceMarkdown)(result), - ); - } - return 0; - } - const managedDaemon = managedDaemonOptionsForScanRun(args, io); const connector = args.mode !== 'structural' || args.detectRelationships diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index 7012edb6..1ef044ae 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -1,6 +1,7 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; +import { readKtxSetupState } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { @@ -203,7 +204,8 @@ describe('setup context build state', () => { expect.objectContaining({ onDetach: expect.any(Function) }), ); expect(verifyContextReady).toHaveBeenCalledWith(tempDir); - expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).toContain(' - context'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); + expect((await readKtxSetupState(tempDir)).completed_steps).toContain('context'); await expect(readKtxSetupContextState(tempDir)).resolves.toMatchObject({ runId: 'setup-context-local-abc123', status: 'completed', @@ -284,7 +286,8 @@ describe('setup context build state', () => { ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-existing' }); expect(runContextBuildMock).not.toHaveBeenCalled(); - expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).toContain(' - context'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); + expect((await readKtxSetupState(tempDir)).completed_steps).toContain('context'); await expect(readKtxSetupContextState(tempDir)).resolves.toMatchObject({ runId: 'setup-context-local-existing', status: 'completed', diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index fe480b13..46506ae7 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -929,7 +929,7 @@ describe('setup databases step', () => { commandIo.stdout.write(' Raw sources: raw-sources/postgres-warehouse/live-database/2026-05-09-221301-local-moywh3ky\n'); commandIo.stdout.write(' Schema shards: 1\n\n'); commandIo.stdout.write('Next:\n'); - commandIo.stdout.write(` ktx dev scan status --project-dir ${tempDir} local-moywh3ky\n`); + commandIo.stdout.write(` ktx status --project-dir ${tempDir} local-moywh3ky\n`); return 0; }); diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index d71b7225..f770c5c4 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -1448,7 +1448,7 @@ async function validateAndScanConnection(input: { if (scanCode !== 0) { flushBufferedCommandOutput(input.io, scanIo); input.io.stderr.write(`Structural scan failed for ${input.connectionId}.\n`); - input.io.stderr.write(`Debug command: ktx dev scan --project-dir ${input.projectDir} ${input.connectionId}\n`); + input.io.stderr.write(`Debug command: ktx scan --project-dir ${input.projectDir} ${input.connectionId}\n`); return false; } const scanOutput = scanIo.stdoutText(); diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index edadc17e..76ba5d0f 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -664,7 +664,7 @@ describe('setup sources step', () => { expect(runInitialIngest).toHaveBeenCalledTimes(1); expect((await readConfig()).connections['dbt-main']).toMatchObject({ driver: 'dbt', source_dir: '/repo/dbt' }); expect(io.stdout()).toContain('Context source saved without a completed context build for dbt-main.'); - expect(io.stdout()).toContain('Run later: ktx ingest dbt-main'); + expect(io.stdout()).toContain('Run later: ktx ingest run --connection-id dbt-main --adapter '); }); it('retries initial source ingest from the failure menu', async () => { diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index 695fc1c1..6674ef75 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -739,7 +739,7 @@ async function runInitialSourceIngestWithRecovery(input: { } if (action === 'continue') { input.io.stdout.write(`│ Context source saved without a completed context build for ${input.connectionId}.\n`); - input.io.stdout.write(`│ Run later: ktx ingest ${input.connectionId}\n`); + input.io.stdout.write(`│ Run later: ktx ingest run --connection-id ${input.connectionId} --adapter \n`); return 'continue'; } return 'back'; diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index e74dca5d..bf9c381f 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -584,13 +584,13 @@ describe('setup status', () => { expect(projectPrompts.select).toHaveBeenCalledWith( expect.objectContaining({ - message: 'Which KTX project should setup use?', + message: 'Where should KTX create the project?', options: expect.arrayContaining([expect.objectContaining({ value: 'back', label: 'Back' })]), }), ); expect(projectPrompts.select).toHaveBeenCalledWith( expect.objectContaining({ - message: 'Which KTX project should setup use?', + message: 'Where should KTX create the project?', options: expect.not.arrayContaining([expect.objectContaining({ value: 'exit', label: 'Exit' })]), }), ); diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts index a712e373..9efa52cb 100644 --- a/packages/cli/src/standalone-smoke.test.ts +++ b/packages/cli/src/standalone-smoke.test.ts @@ -50,14 +50,6 @@ async function runBuiltCli(args: string[], options: { env?: NodeJS.ProcessEnv } } } -function getRunId(stdout: string): string { - const match = stdout.match(/^Run: (.+)$/m); - if (!match) { - throw new Error(`Could not find run id in output:\n${stdout}`); - } - return match[1]; -} - async function writeWarehouseConfig(projectDir: string): Promise { await writeFile( join(projectDir, 'ktx.yaml'), @@ -181,7 +173,6 @@ describe('standalone built ktx CLI smoke', () => { await writeSourceFixture(sourceDir); const run = await runBuiltCli([ - 'dev', 'ingest', 'run', '--project-dir', @@ -195,7 +186,7 @@ describe('standalone built ktx CLI smoke', () => { ]); expect(run).toMatchObject({ code: 1, stdout: '' }); expect(run.stderr).toContain( - 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', + 'ktx ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', ); }); @@ -228,7 +219,7 @@ describe('standalone built ktx CLI smoke', () => { nextSteps: [ `ktx setup --project-dir ${projectDir}`, `ktx status --project-dir ${projectDir}`, - 'ktx ingest ', + 'ktx ingest run --connection-id --adapter ', `ktx agent sl list --json --query "revenue" --project-dir ${projectDir}`, ], }, @@ -260,31 +251,11 @@ describe('standalone built ktx CLI smoke', () => { expect(connectionTest.stdout).toContain('Driver: sqlite'); expect(connectionTest.stdout).toContain('Tables: 2'); - const structural = await runBuiltCli(['dev', 'scan', 'warehouse', '--project-dir', projectDir]); + const structural = await runBuiltCli(['scan', 'warehouse', '--project-dir', projectDir]); expectProjectStderr(structural, projectDir); expect(structural.stdout).toContain('Status: done'); expect(structural.stdout).toContain('Mode: structural'); - const structuralRunId = getRunId(structural.stdout); - - const structuralReportResult = await runBuiltCli([ - 'dev', - 'scan', - 'report', - '--json', - '--project-dir', - projectDir, - structuralRunId, - ]); - expect(structuralReportResult).toMatchObject({ code: 0, stderr: '' }); - const structuralReport = parseJsonOutput<{ - mode: string; - artifactPaths: { manifestShards: string[]; enrichmentArtifacts: string[] }; - manifestShardsWritten: number; - }>(structuralReportResult.stdout); - expect(structuralReport.mode).toBe('structural'); - expect(structuralReport.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']); - expect(structuralReport.artifactPaths.enrichmentArtifacts).toEqual([]); - expect(structuralReport.manifestShardsWritten).toBe(1); + expect(structural.stdout).toContain('Schema shards: 1'); const structuralManifest = await readFile( join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), @@ -296,7 +267,6 @@ describe('standalone built ktx CLI smoke', () => { expect(structuralManifest).not.toContain('ai:'); const providerlessEnriched = await runBuiltCli([ - 'dev', 'scan', 'warehouse', '--project-dir', @@ -310,89 +280,11 @@ describe('standalone built ktx CLI smoke', () => { expect(providerlessEnriched.stdout).toContain('Accepted: 1'); expect(providerlessEnriched.stdout).toContain('scan_enrichment_backend_not_configured'); expect(providerlessEnriched.stdout).toContain('Enrichment artifacts: 3'); - const providerlessRunId = getRunId(providerlessEnriched.stdout); - - const providerlessReportResult = await runBuiltCli([ - 'dev', - 'scan', - 'report', - '--json', - '--project-dir', - projectDir, - providerlessRunId, - ]); - expect(providerlessReportResult).toMatchObject({ code: 0, stderr: '' }); - const providerlessReport = parseJsonOutput<{ - mode: string; - enrichment: { - tableDescriptions: string; - columnDescriptions: string; - embeddings: string; - deterministicRelationships: string; - statisticalValidation: string; - }; - relationships: { accepted: number; review: number; rejected: number; skipped: number }; - warnings: Array<{ code: string }>; - artifactPaths: { enrichmentArtifacts: string[]; manifestShards: string[] }; - }>(providerlessReportResult.stdout); - expect(providerlessReport.mode).toBe('enriched'); - expect(providerlessReport.enrichment).toMatchObject({ - tableDescriptions: 'skipped', - columnDescriptions: 'skipped', - embeddings: 'skipped', - deterministicRelationships: 'completed', - statisticalValidation: 'completed', - }); - expect(providerlessReport.relationships).toEqual({ accepted: 1, review: 0, rejected: 0, skipped: 0 }); - expect(providerlessReport.warnings).toEqual( - expect.arrayContaining([expect.objectContaining({ code: 'scan_enrichment_backend_not_configured' })]), - ); - expect(providerlessReport.artifactPaths.enrichmentArtifacts).toEqual( - expect.arrayContaining([ - expect.stringContaining('/enrichment/relationships.json'), - expect.stringContaining('/enrichment/relationship-profile.json'), - expect.stringContaining('/enrichment/relationship-diagnostics.json'), - ]), - ); - expect(providerlessReport.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']); - await writeSqliteScanConfig(projectDir, dbPath, true); - const enriched = await runBuiltCli(['dev', 'scan', 'warehouse', '--project-dir', projectDir, '--mode', 'enriched']); + const enriched = await runBuiltCli(['scan', 'warehouse', '--project-dir', projectDir, '--mode', 'enriched']); expectProjectStderr(enriched, projectDir); expect(enriched.stdout).toContain('Mode: enriched'); - const enrichedRunId = getRunId(enriched.stdout); - - const enrichedReportResult = await runBuiltCli([ - 'dev', - 'scan', - 'report', - '--json', - '--project-dir', - projectDir, - enrichedRunId, - ]); - expect(enrichedReportResult).toMatchObject({ code: 0, stderr: '' }); - const enrichedReport = parseJsonOutput<{ - mode: string; - enrichment: { tableDescriptions: string; columnDescriptions: string; embeddings: string }; - artifactPaths: { enrichmentArtifacts: string[]; manifestShards: string[] }; - }>(enrichedReportResult.stdout); - expect(enrichedReport.mode).toBe('enriched'); - expect(enrichedReport.enrichment).toMatchObject({ - tableDescriptions: 'completed', - columnDescriptions: 'completed', - embeddings: 'completed', - }); - expect(enrichedReport.artifactPaths.enrichmentArtifacts).toEqual( - expect.arrayContaining([ - expect.stringContaining('/enrichment/descriptions.json'), - expect.stringContaining('/enrichment/embeddings.json'), - expect.stringContaining('/enrichment/relationships.json'), - expect.stringContaining('/enrichment/relationship-profile.json'), - expect.stringContaining('/enrichment/relationship-diagnostics.json'), - ]), - ); - expect(enrichedReport.artifactPaths.manifestShards).toEqual(['semantic-layer/warehouse/_schema/public.yaml']); + expect(enriched.stdout).toContain('Enrichment artifacts:'); const enrichedManifest = await readFile(join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'); expect(enrichedManifest).toContain('Deterministic description'); diff --git a/packages/context/src/ingest/local-bundle-runtime.test.ts b/packages/context/src/ingest/local-bundle-runtime.test.ts index d8cd3907..af40d44f 100644 --- a/packages/context/src/ingest/local-bundle-runtime.test.ts +++ b/packages/context/src/ingest/local-bundle-runtime.test.ts @@ -55,7 +55,7 @@ describe('createLocalBundleIngestRuntime', () => { }), ).toThrow( [ - 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + 'ktx ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', `Configure an Anthropic provider, then rerun ingest:`, ` ktx setup --project-dir ${project.projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, ].join('\n'), diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index 38e37b1d..b453cae6 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -559,7 +559,7 @@ function nextLocalJobId(): string { function localIngestLlmProviderGuardMessage(projectDir: string): string { return [ - 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + 'ktx ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', 'Configure an Anthropic provider, then rerun ingest:', ` ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, ].join('\n'); diff --git a/packages/llm/src/model-provider.test.ts b/packages/llm/src/model-provider.test.ts index ff65a12a..e89c7c17 100644 --- a/packages/llm/src/model-provider.test.ts +++ b/packages/llm/src/model-provider.test.ts @@ -55,6 +55,7 @@ describe('createKtxLlmProvider', () => { }, { createAnthropic: vi.fn(() => vi.fn(() => anthropicModel)), + devtoolsEnabled: false, wrapLanguageModel, devToolsMiddleware, } satisfies KtxLlmProviderFactoryDeps, @@ -145,7 +146,7 @@ describe('createKtxLlmProvider', () => { modelSlots: { default: 'claude-sonnet-4-6' }, promptCaching: { enabled: false }, }, - { createAnthropic }, + { createAnthropic, devtoolsEnabled: false }, ); expect(provider.getModel('default')).toBe(anthropicModel); @@ -171,7 +172,7 @@ describe('createKtxLlmProvider', () => { modelSlots: { default: 'claude-sonnet-4-6' }, promptCaching: { enabled: false }, }, - { createVertexAnthropic }, + { createVertexAnthropic, devtoolsEnabled: false }, ); expect(provider.getModel('default')).toBe(vertexModel); @@ -191,7 +192,7 @@ describe('createKtxLlmProvider', () => { modelSlots: { default: 'anthropic/claude-sonnet-4-6' }, promptCaching: { enabled: false }, }, - { createGateway }, + { createGateway, devtoolsEnabled: false }, ); expect(provider.getModel('curator')).toBe(gatewayModel); diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index e7c9d8bb..5378b8ce 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -188,10 +188,10 @@ describe('standalone example docs', () => { assert.match(quickstart, publicPackagePattern('npm install -g {package}')); assert.match(quickstart, /ktx dev runtime install --feature local-embeddings --yes/); assert.match(quickstart, /ktx dev runtime start --feature local-embeddings/); - assert.match(quickstart, /Install `uv`, run `ktx dev runtime doctor`/); + assert.match(quickstart, /Install `uv`, run `ktx dev runtime status`/); assert.match(packageArtifacts, /requires `uv` on `PATH`/); assert.match(packageArtifacts, /ktx dev runtime status/); - assert.match(packageArtifacts, /ktx dev runtime doctor/); + assert.match(packageArtifacts, /ktx dev runtime status/); assert.match(packageArtifacts, /ktx dev runtime prune --dry-run/); assert.match(packageArtifacts, /ktx dev runtime prune --yes/); assert.match( @@ -225,7 +225,7 @@ describe('standalone example docs', () => { assert.doesNotMatch(readme, /installs the Python artifacts directly/); assert.match(readme, /requires `uv` on `PATH`/); assert.match(readme, /ktx dev runtime status/); - assert.match(readme, /ktx dev runtime doctor/); + assert.match(readme, /ktx dev runtime status/); assert.match(readme, /ktx dev runtime prune --dry-run/); assert.match(readme, /ktx dev runtime prune --yes/); assert.doesNotMatch(readme, /@ktx\/context/); @@ -238,14 +238,15 @@ describe('standalone example docs', () => { const buildingContext = await readText('docs-site/content/docs/guides/building-context.mdx'); const scanReference = await readText('docs-site/content/docs/cli-reference/ktx-scan.mdx'); - assert.match(buildingContext, /ktx dev scan /); - assert.match(buildingContext, /ktx dev scan status /); - assert.match(buildingContext, /ktx dev scan report /); - assert.match(scanReference, /ktx dev scan \[options\]/); + assert.match(buildingContext, /ktx scan /); + assert.match(buildingContext, /ktx status/); + assert.doesNotMatch(buildingContext, /ktx scan status /); + assert.doesNotMatch(buildingContext, /ktx scan report /); + assert.match(scanReference, /ktx scan \[options\]/); assert.match(rootReadme, /raw-sources\//); assert.match(rootReadme, /live-database\//); assert.doesNotMatch(rootReadme, /Run a local ingest smoke test/); - assert.doesNotMatch(rootReadme, /ktx dev ingest run --project-dir/); + assert.doesNotMatch(rootReadme, /ktx ingest run --project-dir/); assert.doesNotMatch(rootReadme, /ktx ingest status --project-dir/); }); diff --git a/scripts/installed-live-database-smoke.mjs b/scripts/installed-live-database-smoke.mjs index 75c3cc61..653f53b9 100644 --- a/scripts/installed-live-database-smoke.mjs +++ b/scripts/installed-live-database-smoke.mjs @@ -107,7 +107,6 @@ export function buildLiveDatabaseIngestArgs(projectDir, databaseIntrospectionUrl return [ 'exec', 'ktx', - 'dev', 'ingest', 'run', '--project-dir', @@ -325,12 +324,12 @@ async function main() { env: managedRuntimeEnv(cleanInstallDir), timeout: 120_000, }); - requireSuccess('ktx dev ingest run live-database', ingestRun); - requireOutput('ktx dev ingest run live-database', ingestRun, /Status: done/); - requireOutput('ktx dev ingest run live-database', ingestRun, /Adapter: live-database/); - requireOutput('ktx dev ingest run live-database', ingestRun, /Diff: \+4\/~0\/-0\/=0/); - requireOutput('ktx dev ingest run live-database', ingestRun, /Raw files: 4/); - requireOutput('ktx dev ingest run live-database', ingestRun, /Work units: 2/); + requireSuccess('ktx ingest run live-database', ingestRun); + requireOutput('ktx ingest run live-database', ingestRun, /Status: done/); + requireOutput('ktx ingest run live-database', ingestRun, /Adapter: live-database/); + requireOutput('ktx ingest run live-database', ingestRun, /Diff: \+4\/~0\/-0\/=0/); + requireOutput('ktx ingest run live-database', ingestRun, /Raw files: 4/); + requireOutput('ktx ingest run live-database', ingestRun, /Work units: 2/); const runId = getRunId(ingestRun.stdout); const ingestStatus = await run('pnpm', buildLiveDatabaseStatusArgs(projectDir, runId), { diff --git a/scripts/installed-live-database-smoke.test.mjs b/scripts/installed-live-database-smoke.test.mjs index a3d6be9e..c62e98b0 100644 --- a/scripts/installed-live-database-smoke.test.mjs +++ b/scripts/installed-live-database-smoke.test.mjs @@ -102,7 +102,6 @@ describe('installed live-database artifact smoke helpers', () => { assert.deepEqual(buildLiveDatabaseIngestArgs('/tmp/project', 'http://127.0.0.1:8765'), [ 'exec', 'ktx', - 'dev', 'ingest', 'run', '--project-dir', diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index 07b9aaa7..d99509d9 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -621,7 +621,7 @@ try { assert.deepEqual(missingProjectError.error.nextSteps, [ 'ktx setup --project-dir ' + missingProjectDir, 'ktx status --project-dir ' + missingProjectDir, - 'ktx ingest ', + 'ktx ingest run --connection-id --adapter ', 'ktx agent sl list --json --query "revenue" --project-dir ' + missingProjectDir, ]); process.stdout.write('ktx agent sl list missing project guidance verified\\n'); @@ -678,7 +678,7 @@ try { assert.deepEqual(emptySearchError.error.nextSteps, [ 'ktx setup --project-dir ' + emptyProjectDir, 'ktx status --project-dir ' + emptyProjectDir, - 'ktx ingest ', + 'ktx ingest run --connection-id --adapter ', 'ktx agent sl list --json --query "revenue" --project-dir ' + emptyProjectDir, ]); process.stdout.write('ktx agent sl list no connections guidance verified\\n'); @@ -769,7 +769,7 @@ try { assert.deepEqual(noSourceSearchError.error.nextSteps, [ 'ktx setup --project-dir ' + projectDir, 'ktx status --project-dir ' + projectDir, - 'ktx ingest ', + 'ktx ingest run --connection-id --adapter ', 'ktx agent sl list --json --query "revenue" --project-dir ' + projectDir, ]); process.stdout.write('ktx agent sl list no indexed sources guidance verified\\n'); @@ -865,12 +865,11 @@ try { requireOutput('ktx sl query sqlite execute', sqliteSlQuery, /"rows": \\[\\s*\\[\\s*3\\s*\\]\\s*\\]/); process.stdout.write('ktx sl query sqlite execute verified\\n'); - const runtimeDoctor = await run('pnpm', ['exec', 'ktx', 'dev', 'runtime', 'doctor']); - requireSuccess('ktx dev runtime doctor', runtimeDoctor); - requireOutput('ktx dev runtime doctor', runtimeDoctor, /PASS uv/); - requireOutput('ktx dev runtime doctor', runtimeDoctor, /PASS Bundled Python wheel/); - requireOutput('ktx dev runtime doctor', runtimeDoctor, /PASS Managed Python runtime/); - process.stdout.write('ktx dev runtime doctor verified\\n'); + const runtimeDoctor = await run('pnpm', ['exec', 'ktx', 'dev', 'runtime', 'status']); + requireSuccess('ktx dev runtime status', runtimeDoctor); + requireOutput('ktx dev runtime status', runtimeDoctor, /KTX Python runtime/); + requireOutput('ktx dev runtime status', runtimeDoctor, /status: ready/); + process.stdout.write('ktx dev runtime status verified\\n'); const runtimeStart = await run('pnpm', ['exec', 'ktx', 'dev', 'runtime', 'start']); requireSuccess('ktx dev runtime start', runtimeStart); @@ -911,7 +910,7 @@ try { await assert.rejects(() => access(staleRuntimeDir)); process.stdout.write('ktx dev runtime prune verified\\n'); - const structuralScan = await run('pnpm', ['exec', 'ktx', 'dev', 'scan', 'warehouse', + const structuralScan = await run('pnpm', ['exec', 'ktx', 'scan', 'warehouse', '--project-dir', projectDir, ]); @@ -920,34 +919,10 @@ try { requireOutput('ktx scan structural', structuralScan, /Mode: structural/); requireOutput('ktx scan structural', structuralScan, /Needs attention\\s+None/); const structuralScanRunId = getRunId(structuralScan.stdout); - - const scanStatus = await run('pnpm', ['exec', 'ktx', 'dev', 'scan', 'status', - '--project-dir', - projectDir, - structuralScanRunId, - ]); - requireProjectStderr('ktx scan status', scanStatus, projectDir); - requireOutput('ktx scan status', scanStatus, new RegExp('Run: ' + structuralScanRunId)); - requireOutput('ktx scan status', scanStatus, /Status: done/); - requireOutput('ktx scan status', scanStatus, /Mode: structural/); - - const scanReport = await run('pnpm', ['exec', 'ktx', 'dev', 'scan', 'report', - '--project-dir', - projectDir, - '--json', - structuralScanRunId, - ]); - requireSuccess('ktx scan report', scanReport); - const scanReportJson = JSON.parse(scanReport.stdout); - assert.equal(scanReportJson.mode, 'structural'); - assert.equal(scanReportJson.connectionId, 'warehouse'); - assert.equal(scanReportJson.manifestShardsWritten, 1); - assert.deepEqual(scanReportJson.artifactPaths.enrichmentArtifacts, []); - assert.deepEqual(scanReportJson.artifactPaths.manifestShards, ['semantic-layer/warehouse/_schema/public.yaml']); await access(join(projectDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml')); process.stdout.write('ktx scan structural verified: ' + structuralScanRunId + '\\n'); - const enrichedScan = await run('pnpm', ['exec', 'ktx', 'dev', 'scan', 'warehouse', + const enrichedScan = await run('pnpm', ['exec', 'ktx', 'scan', 'warehouse', '--project-dir', projectDir, '--mode', @@ -956,24 +931,14 @@ try { requireProjectStderr('ktx scan enriched', enrichedScan, projectDir); requireOutput('ktx scan enriched', enrichedScan, /Status: done/); requireOutput('ktx scan enriched', enrichedScan, /Mode: enriched/); + requireOutput('ktx scan enriched', enrichedScan, /Enrichment artifacts:/); const enrichedScanRunId = getRunId(enrichedScan.stdout); - const enrichedScanReport = await run('pnpm', ['exec', 'ktx', 'dev', 'scan', 'report', - '--project-dir', - projectDir, - '--json', - enrichedScanRunId, - ]); - requireSuccess('ktx scan enriched report', enrichedScanReport); - const enrichedScanReportJson = JSON.parse(enrichedScanReport.stdout); - assert.equal(enrichedScanReportJson.mode, 'enriched'); - assert.ok(enrichedScanReportJson.artifactPaths.enrichmentArtifacts.length > 0); - assert.deepEqual(enrichedScanReportJson.artifactPaths.manifestShards, ['semantic-layer/warehouse/_schema/public.yaml']); process.stdout.write('ktx scan enriched verified: ' + enrichedScanRunId + '\\n'); await mkdir(join(sourceDir, 'orders'), { recursive: true }); await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\\n', 'utf-8'); - const ingestRun = await run('pnpm', ['exec', 'ktx', 'dev', 'ingest', 'run', + const ingestRun = await run('pnpm', ['exec', 'ktx', 'ingest', 'run', '--project-dir', projectDir, '--connection-id', @@ -983,14 +948,14 @@ try { '--source-dir', sourceDir, ]); - assert.equal(ingestRun.code, 1, 'ktx dev ingest run without an LLM provider must fail'); + assert.equal(ingestRun.code, 1, 'ktx ingest run without an LLM provider must fail'); assert.match( ingestRun.stderr, - /ktx dev ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway, or an injected agentRunner/, + /ktx ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway, or an injected agentRunner/, ); await access(join(projectDir, '.ktx', 'db.sqlite')); - process.stdout.write('ktx dev ingest provider guard verified\\n'); + process.stdout.write('ktx ingest provider guard verified\\n'); } finally { if (daemonStarted) { await run('pnpm', ['exec', 'ktx', 'dev', 'runtime', 'stop']); diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index 4d498e05..a8527c98 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -484,8 +484,8 @@ describe('verification snippets', () => { assert.match(source, /ktx dev runtime status ready/); assert.match(source, /runtimeStatusAfter\.kind, 'ready'/); assert.match(source, /runtimeStatusAfter\.manifest\.features/); - assert.match(source, /ktx dev runtime doctor/); - assert.match(source, /PASS Managed Python runtime/); + assert.match(source, /ktx dev runtime status/); + assert.match(source, /status: ready/); assert.match(source, /ktx dev runtime start/); assert.match(source, /ktx dev runtime start reuse/); assert.match(source, /Using existing KTX Python daemon/); @@ -497,20 +497,18 @@ describe('verification snippets', () => { assert.match(source, /ktx dev runtime prune confirmed/); assert.match(source, /Removed stale KTX Python runtimes/); assert.match(source, /assert\.rejects\(\(\) => access\(staleRuntimeDir\)\)/); - assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'dev',\s*'scan',\s*'warehouse'/); + assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'scan',\s*'warehouse'/); assert.match(source, /'--mode',\s*'enriched'/); assert.doesNotMatch(source, /'--enrich'/); assert.match(source, /ktx scan structural verified/); assert.match(source, /ktx scan enriched verified/); - assert.match(source, /scanReportJson\.artifactPaths\.manifestShards/); - assert.match(source, /scanReportJson\.artifactPaths\.enrichmentArtifacts/); assert.match(source, /enrichment:/); assert.match(source, /mode: deterministic/); - assert.match(source, /run\('pnpm', \['exec', 'ktx', 'dev', 'ingest', 'run'/); + assert.match(source, /run\('pnpm', \['exec', 'ktx', 'ingest', 'run'/); assert.match(source, /access\(join\(projectDir, '\.ktx', 'db\.sqlite'\)\)/); assert.match(source, /SQLite knowledge index/); - assert.match(source, /ktx dev ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway/); - assert.match(source, /ktx dev ingest provider guard verified/); + assert.match(source, /ktx ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway/); + assert.match(source, /ktx ingest provider guard verified/); }); describe('npmCliSmokeSource', () => { diff --git a/scripts/relationship-orbit-verification.mjs b/scripts/relationship-orbit-verification.mjs index d1c97f56..aa6309dd 100644 --- a/scripts/relationship-orbit-verification.mjs +++ b/scripts/relationship-orbit-verification.mjs @@ -1,6 +1,6 @@ #!/usr/bin/env node -import { mkdir as fsMkdir, writeFile as fsWriteFile } from 'node:fs/promises'; +import { mkdir as fsMkdir, readFile as fsReadFile, writeFile as fsWriteFile } from 'node:fs/promises'; import { execFile as childExecFile } from 'node:child_process'; import { dirname, resolve } from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; @@ -90,11 +90,7 @@ function parseArgs(argv) { } export function buildOrbitScanArgv(input) { - return ['dev', 'scan', input.connectionId, '--enrich', '--project-dir', input.projectDir]; -} - -export function buildOrbitReportArgv(input) { - return ['dev', 'scan', 'report', '--json', '--project-dir', input.projectDir, input.runId]; + return ['scan', input.connectionId, '--mode', 'relationships', '--project-dir', input.projectDir]; } export function extractRunId(stdout) { @@ -102,6 +98,11 @@ export function extractRunId(stdout) { return match?.[1] ?? null; } +export function extractReportPath(stdout) { + const match = stdout.match(/^\s*Report:\s*(\S+)/m); + return match?.[1] ?? null; +} + function listLines(values) { if (!values || values.length === 0) { return ['- none']; @@ -204,11 +205,9 @@ export function formatOrbitVerificationMarkdown(result) { if (result.status === 'success') { lines.push( - '## JSON Report Command', + '## Scan Report Artifact', '', - '```bash', - result.reportCommand, - '```', + `- ${result.reportPath}`, '', ...formatSuccess(result), ); @@ -250,6 +249,7 @@ export async function runOrbitVerification(options = {}) { const now = options.now ?? (() => new Date()); const mkdir = options.mkdir ?? fsMkdir; const writeFile = options.writeFile ?? fsWriteFile; + const readFile = options.readFile ?? fsReadFile; const date = dateOnly(now()); const env = options.env ?? orbitVerificationEnv(projectDir); const runWithEnv = (argv, runnerOptions) => runner(argv, { ...runnerOptions, env }); @@ -285,33 +285,32 @@ export async function runOrbitVerification(options = {}) { scanStderr: scan.stderr, }; } else { - const reportArgv = buildOrbitReportArgv({ projectDir, runId }); - const reportOutput = await runBufferedWorkspaceKtx(runWithEnv, reportArgv, rootDir, execFile); - if (reportOutput.exitCode !== 0) { + const scanReportPath = extractReportPath(scan.stdout); + if (!scanReportPath) { result = { status: 'blocked', date, connectionId, projectDir, scanCommand: shellCommand(scanArgv), - scanExitCode: reportOutput.exitCode, - blocker: firstNonEmptyLine(reportOutput.stderr, reportOutput.stdout), - scanStdout: `${scan.stdout}\n${reportOutput.stdout}`.trim(), - scanStderr: `${scan.stderr}\n${reportOutput.stderr}`.trim(), + scanExitCode: scan.exitCode, + blocker: 'KTX scan completed without printing a report artifact path', + scanStdout: scan.stdout, + scanStderr: scan.stderr, }; } else { + const fullScanReportPath = resolve(projectDir, scanReportPath); result = { status: 'success', date, connectionId, projectDir, scanCommand: shellCommand(scanArgv), - reportCommand: shellCommand(reportArgv), + reportPath: fullScanReportPath, scanExitCode: scan.exitCode, - reportExitCode: reportOutput.exitCode, scanStdout: scan.stdout, scanStderr: scan.stderr, - report: JSON.parse(reportOutput.stdout), + report: JSON.parse(await readFile(fullScanReportPath, 'utf8')), }; } } diff --git a/scripts/relationship-orbit-verification.test.mjs b/scripts/relationship-orbit-verification.test.mjs index 017b2518..a6dc3607 100644 --- a/scripts/relationship-orbit-verification.test.mjs +++ b/scripts/relationship-orbit-verification.test.mjs @@ -3,9 +3,9 @@ import { readFile } from 'node:fs/promises'; import { dirname } from 'node:path'; import { describe, it } from 'node:test'; import { - buildOrbitReportArgv, buildOrbitScanArgv, defaultOrbitVerificationProjectDir, + extractReportPath, extractRunId, formatOrbitVerificationMarkdown, runOrbitVerification, @@ -59,24 +59,15 @@ describe('relationship Orbit verification helper', () => { ); }); - it('builds the current KTX launcher arguments for scan and JSON report commands', () => { + it('builds the current KTX launcher arguments for scan commands', () => { assert.deepEqual(buildOrbitScanArgv({ connectionId: 'orbit', projectDir: '/tmp/orbit-project' }), [ - 'dev', 'scan', 'orbit', - '--enrich', + '--mode', + 'relationships', '--project-dir', '/tmp/orbit-project', ]); - assert.deepEqual(buildOrbitReportArgv({ projectDir: '/tmp/orbit-project', runId: 'scan-orbit-1' }), [ - 'dev', - 'scan', - 'report', - '--json', - '--project-dir', - '/tmp/orbit-project', - 'scan-orbit-1', - ]); }); it('uses the checked-in Orbit verification project by default', async () => { @@ -95,22 +86,17 @@ describe('relationship Orbit verification helper', () => { runWorkspaceKtx: async (argv, options) => { calls.push(argv); envs.push(options.env); - if (argv[2] === 'report') { - options.stdout.write(successReportJson()); - return 0; - } - options.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n'); + options.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n Report: reports/scan-report.json\n'); return 0; }, + readFile: async () => successReportJson(), }); assert.equal(result.status, 'success'); assert.deepEqual(calls, [ - ['dev', 'scan', 'orbit', '--enrich', '--project-dir', defaultProjectDir], - ['dev', 'scan', 'report', '--json', '--project-dir', defaultProjectDir, 'scan-orbit-1'], + ['scan', 'orbit', '--mode', 'relationships', '--project-dir', defaultProjectDir], ]); assert.equal(envs[0].GIT_CEILING_DIRECTORIES, dirname(defaultProjectDir)); - assert.equal(envs[1].GIT_CEILING_DIRECTORIES, dirname(defaultProjectDir)); assert.equal(writes.length, 1); assert.match(writes[0].content, new RegExp(defaultProjectDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))); }); @@ -129,19 +115,15 @@ describe('relationship Orbit verification helper', () => { writeFile: async () => {}, runWorkspaceKtx: async (argv, options) => { calls.push(argv); - if (argv[2] === 'report') { - options.stdout.write(successReportJson()); - return 0; - } - options.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n'); + options.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n Report: reports/scan-report.json\n'); return 0; }, + readFile: async () => successReportJson(), }); assert.equal(result.projectDir, '/tmp/orbit-project-from-env'); assert.deepEqual(calls, [ - ['dev', 'scan', 'orbit', '--enrich', '--project-dir', '/tmp/orbit-project-from-env'], - ['dev', 'scan', 'report', '--json', '--project-dir', '/tmp/orbit-project-from-env', 'scan-orbit-1'], + ['scan', 'orbit', '--mode', 'relationships', '--project-dir', '/tmp/orbit-project-from-env'], ]); } finally { if (previousProjectDir === undefined) { @@ -155,6 +137,7 @@ describe('relationship Orbit verification helper', () => { it('extracts the run id from human scan output', () => { assert.equal(extractRunId(`KTX scan completed\nStatus: done\nRun: scan-orbit-1\nConnection: orbit\n`), 'scan-orbit-1'); assert.equal(extractRunId('KTX scan completed without a run line\n'), null); + assert.equal(extractReportPath('Artifacts\n Report: reports/scan-report.json\n'), 'reports/scan-report.json'); }); it('formats successful Orbit verification evidence from the JSON report', () => { @@ -163,10 +146,9 @@ describe('relationship Orbit verification helper', () => { date: '2026-05-07', connectionId: 'orbit', projectDir: '/tmp/orbit-project', - scanCommand: 'pnpm run ktx -- dev scan orbit --enrich --project-dir /tmp/orbit-project', - reportCommand: 'pnpm run ktx -- dev scan report --json --project-dir /tmp/orbit-project scan-orbit-1', + scanCommand: 'pnpm run ktx -- scan orbit --mode relationships --project-dir /tmp/orbit-project', + reportPath: '/tmp/orbit-project/reports/scan-report.json', scanExitCode: 0, - reportExitCode: 0, scanStdout: 'KTX scan completed\nRun: scan-orbit-1\n', scanStderr: '', report: JSON.parse(successReportJson()), @@ -189,7 +171,7 @@ describe('relationship Orbit verification helper', () => { date: '2026-05-07', connectionId: 'orbit', projectDir: '/tmp/orbit-project', - scanCommand: 'pnpm run ktx -- dev scan orbit --enrich --project-dir /tmp/orbit-project', + scanCommand: 'pnpm run ktx -- scan orbit --mode relationships --project-dir /tmp/orbit-project', scanExitCode: 1, blocker: 'Connection "orbit" was not found', scanStdout: '', @@ -202,7 +184,7 @@ describe('relationship Orbit verification helper', () => { assert.doesNotMatch(markdown, /scan\.enrichment\.mode is required/); }); - it('runs scan then JSON report and writes success Markdown', async () => { + it('runs scan then reads the report artifact and writes success Markdown', async () => { const calls = []; const writes = []; const result = await runOrbitVerification({ @@ -216,19 +198,15 @@ describe('relationship Orbit verification helper', () => { }, runWorkspaceKtx: async (argv, options) => { calls.push(argv); - if (argv[2] === 'report') { - options.stdout.write(successReportJson()); - return 0; - } - options.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n'); + options.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n Report: reports/scan-report.json\n'); return 0; }, + readFile: async () => successReportJson(), }); assert.equal(result.status, 'success'); assert.deepEqual(calls, [ - ['dev', 'scan', 'orbit', '--enrich', '--project-dir', '/tmp/orbit-project'], - ['dev', 'scan', 'report', '--json', '--project-dir', '/tmp/orbit-project', 'scan-orbit-1'], + ['scan', 'orbit', '--mode', 'relationships', '--project-dir', '/tmp/orbit-project'], ]); assert.equal(writes.length, 1); assert.equal(writes[0].path, '/tmp/orbit-report.md'); diff --git a/scripts/run-ktx.mjs b/scripts/run-ktx.mjs index e595376f..a283dcae 100644 --- a/scripts/run-ktx.mjs +++ b/scripts/run-ktx.mjs @@ -83,10 +83,6 @@ async function isBuildStale(rootDir, binPath, fs) { return false; } -function isShellCompletionRequest(argv) { - return argv[0] === '__complete' || (argv[0] === 'dev' && argv[1] === '__complete'); -} - async function runBuffered(execFile, stdout, stderr, command, args, options) { try { const result = await execFile(command, args, { cwd: options.cwd, env: options.env, maxBuffer: 1024 * 1024 * 16 }); @@ -150,8 +146,7 @@ export async function runWorkspaceKtx(argv, options = {}) { const commandEnv = options.env; const binExists = await fileExists(binPath, access); - const skipStaleBuildCheck = binExists && isShellCompletionRequest(cliArgv); - const needsBuild = !binExists || (!skipStaleBuildCheck && (await isBuildStale(rootDir, binPath, fs))); + const needsBuild = !binExists || (await isBuildStale(rootDir, binPath, fs)); if (needsBuild) { stderr.write( binExists diff --git a/scripts/run-ktx.test.mjs b/scripts/run-ktx.test.mjs index 76315a36..3263ef30 100644 --- a/scripts/run-ktx.test.mjs +++ b/scripts/run-ktx.test.mjs @@ -105,53 +105,6 @@ test('runWorkspaceKtx drops a leading npm argument separator', async () => { ]); }); -test('runWorkspaceKtx skips stale-build checks for shell completion when dist exists', async () => { - const calls = []; - let statCalls = 0; - - const exitCode = await runWorkspaceKtx(['dev', '__complete', '--shell', 'zsh', '--position', '2', '--', 'ktx', ''], { - rootDir: '/workspace/ktx', - access: async () => undefined, - stat: async (path) => { - statCalls += 1; - return { - mtimeMs: path.endsWith('/packages/cli/dist/bin.js') ? 2000 : 3000, - isDirectory: () => path.endsWith('/src') || path.endsWith('/packages'), - }; - }, - readdir: async () => { - throw new Error('completion should not scan source directories'); - }, - execFile: async (command, args, options) => { - calls.push({ command, args, cwd: options.cwd }); - return { stdout: 'connect:Add, list, test, and map data sources\n', stderr: '' }; - }, - stdout: { write: () => undefined }, - stderr: { write: () => undefined }, - }); - - assert.equal(exitCode, 0); - assert.equal(statCalls, 0); - assert.deepEqual(calls, [ - { - command: process.execPath, - args: [ - '/workspace/ktx/packages/cli/dist/bin.js', - 'dev', - '__complete', - '--shell', - 'zsh', - '--position', - '2', - '--', - 'ktx', - '', - ], - cwd: '/workspace/ktx', - }, - ]); -}); - test('runWorkspaceKtx builds the workspace CLI before running it when dist is missing', async () => { const calls = []; const logs = []; @@ -199,7 +152,7 @@ test('runWorkspaceKtx rebuilds before running when workspace sources are newer t const logs = []; let sourceMtimeMs = 3000; - const exitCode = await runWorkspaceKtx(['dev', 'scan', 'orbit', '--enrich'], { + const exitCode = await runWorkspaceKtx(['scan', 'orbit', '--mode', 'relationships'], { rootDir: '/workspace/ktx', access: async () => undefined, stat: async (path) => ({ @@ -232,7 +185,7 @@ test('runWorkspaceKtx rebuilds before running when workspace sources are newer t calls.map((call) => [call.command, call.args]), [ ['pnpm', ['run', 'build']], - [process.execPath, ['/workspace/ktx/packages/cli/dist/bin.js', 'dev', 'scan', 'orbit', '--enrich']], + [process.execPath, ['/workspace/ktx/packages/cli/dist/bin.js', 'scan', 'orbit', '--mode', 'relationships']], ], ); assert.deepEqual(logs, [ From eaaabb361e57913e745d3793d07468abd381dc39 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 12:28:24 +0200 Subject: [PATCH 02/15] fix(cli): clean up dev runtime commands (#59) --- README.md | 2 - .../content/docs/cli-reference/ktx-dev.mdx | 11 +-- examples/package-artifacts/README.md | 4 +- packages/cli/src/commands/runtime-commands.ts | 18 +--- packages/cli/src/dev.test.ts | 7 +- packages/cli/src/index.test.ts | 16 +--- .../cli/src/managed-python-runtime.test.ts | 41 +-------- packages/cli/src/managed-python-runtime.ts | 42 +-------- packages/cli/src/runtime.test.ts | 88 ++++++++++++------- packages/cli/src/runtime.ts | 54 +++++------- scripts/examples-docs.test.mjs | 6 +- scripts/local-embeddings-runtime-smoke.mjs | 14 ++- scripts/package-artifacts.mjs | 33 +++---- scripts/package-artifacts.test.mjs | 9 +- 14 files changed, 127 insertions(+), 218 deletions(-) diff --git a/README.md b/README.md index 1cd20080..b52a31f6 100644 --- a/README.md +++ b/README.md @@ -152,8 +152,6 @@ ktx dev runtime install --yes ktx dev runtime status ktx dev runtime start ktx dev runtime stop -ktx dev runtime prune --dry-run -ktx dev runtime prune --yes ``` The release artifact manifest contains the public npm tarball and the bundled `kaelio-ktx` diff --git a/docs-site/content/docs/cli-reference/ktx-dev.mdx b/docs-site/content/docs/cli-reference/ktx-dev.mdx index eea02562..e00a4585 100644 --- a/docs-site/content/docs/cli-reference/ktx-dev.mdx +++ b/docs-site/content/docs/cli-reference/ktx-dev.mdx @@ -16,7 +16,7 @@ ktx dev [options] | Subcommand | Description | |-----------|-------------| | `init [directory]` | Initialize a Git-backed KTX project directory | -| `runtime` | Install, start, stop, inspect, and prune the KTX-managed Python runtime | +| `runtime` | Install, start, stop, and inspect the KTX-managed Python runtime | ## `dev init` @@ -27,15 +27,14 @@ ktx dev [options] ## `dev runtime` -`ktx dev runtime` supports `install`, `start`, `stop`, `status`, and `prune`. +`ktx dev runtime` supports `install`, `start`, `stop`, and `status`. | Flag | Description | Default | |------|-------------|---------| -| `--feature ` | Runtime feature level for `install`, `start`, and `status` (`core` or `local-embeddings`) | `core` | +| `--feature ` | Runtime feature level for `install` and `start` (`core` or `local-embeddings`) | `core` | | `--json` | Print JSON output for `status` | `false` | -| `--yes` | Confirm runtime install or prune actions where supported | `false` | +| `--yes` | Confirm runtime install actions where supported | `false` | | `--force` | Reinstall or restart where supported | `false` | -| `--dry-run` | Preview runtime pruning without removing files | `false` | ## Examples @@ -48,8 +47,6 @@ ktx dev runtime install --yes ktx dev runtime status ktx dev runtime start ktx dev runtime stop -ktx dev runtime prune --dry-run -ktx dev runtime prune --yes ``` ## Common errors diff --git a/examples/package-artifacts/README.md b/examples/package-artifacts/README.md index 8c92f84e..22ecaf92 100644 --- a/examples/package-artifacts/README.md +++ b/examples/package-artifacts/README.md @@ -14,9 +14,7 @@ generated local project. The managed Python runtime smoke requires `uv` on `PATH`, isolates `KTX_RUNTIME_ROOT`, verifies `ktx dev runtime status`, runs `ktx sl query --yes` to install the core runtime from the bundled wheel, checks `ktx dev runtime status`, -starts and reuses the managed daemon, stops it, previews a stale runtime with -`ktx dev runtime prune --dry-run`, verifies confirmation is required, and removes -the stale runtime with `ktx dev runtime prune --yes`. +starts and reuses the managed daemon, and stops it. The artifact manifest contains the public `@kaelio/ktx` npm tarball and the bundled `kaelio-ktx` runtime wheel. The smoke does not install standalone diff --git a/packages/cli/src/commands/runtime-commands.ts b/packages/cli/src/commands/runtime-commands.ts index b57eae86..cf0abb42 100644 --- a/packages/cli/src/commands/runtime-commands.ts +++ b/packages/cli/src/commands/runtime-commands.ts @@ -18,7 +18,7 @@ async function runRuntimeArgs(context: KtxCliCommandContext, args: KtxRuntimeArg export function registerRuntimeCommands(program: Command, context: KtxCliCommandContext): void { const runtime = program .command('runtime') - .description('Install, inspect, and prune the KTX-managed Python runtime') + .description('Install, start, stop, and inspect the KTX-managed Python runtime') .showHelpAfterError(); runtime @@ -64,7 +64,7 @@ export function registerRuntimeCommands(program: Command, context: KtxCliCommand runtime .command('status') - .description('Show managed Python runtime status') + .description('Show managed Python runtime status and readiness checks') .option('--json', 'Print JSON output', false) .action(async (options: { json?: boolean }) => { await runRuntimeArgs(context, { @@ -73,18 +73,4 @@ export function registerRuntimeCommands(program: Command, context: KtxCliCommand json: options.json === true, }); }); - - runtime - .command('prune') - .description('Remove stale managed Python runtimes for older CLI versions') - .option('--dry-run', 'List stale runtimes without deleting them', false) - .option('--yes', 'Confirm deletion of stale runtime directories', false) - .action(async (options: { dryRun?: boolean; yes?: boolean }) => { - await runRuntimeArgs(context, { - command: 'prune', - cliVersion: context.packageInfo.version, - dryRun: options.dryRun === true, - yes: options.yes === true, - }); - }); } diff --git a/packages/cli/src/dev.test.ts b/packages/cli/src/dev.test.ts index 5aca4201..fe75d1af 100644 --- a/packages/cli/src/dev.test.ts +++ b/packages/cli/src/dev.test.ts @@ -106,6 +106,7 @@ describe('dev Commander tree', () => { for (const argv of [ ['dev', 'doctor', 'setup'], ['dev', 'runtime', 'doctor'], + ['dev', 'runtime', 'prune', '--dry-run'], ['dev', 'scan', 'warehouse'], ['dev', 'ingest', 'run'], ['dev', 'mapping', 'list'], @@ -126,7 +127,7 @@ describe('dev Commander tree', () => { it.each([ { argv: ['dev', 'runtime', '--help'], - expected: ['Usage: ktx dev runtime', 'install', 'start', 'stop', 'status', 'prune'], + expected: ['Usage: ktx dev runtime', 'install', 'start', 'stop', 'status'], }, { argv: ['scan', '--help'], @@ -147,6 +148,10 @@ describe('dev Commander tree', () => { for (const text of expected) { expect(io.stdout()).toContain(text); } + if (argv.join(' ') === 'dev runtime --help') { + expect(io.stdout()).not.toContain('prune'); + expect(io.stdout()).not.toContain('doctor'); + } expect(io.stderr()).toBe(''); expect(doctor).not.toHaveBeenCalled(); expect(ingest).not.toHaveBeenCalled(); diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index 9c08e58a..b79a0bb3 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -159,7 +159,7 @@ describe('runKtxCli', () => { await expect(runKtxCli(['dev', 'runtime', 'stop'], stopIo.io, { runtime })).resolves.toBe(0); await expect(runKtxCli(['dev', 'runtime', 'stop', '--all'], stopAllIo.io, { runtime })).resolves.toBe(0); await expect(runKtxCli(['dev', 'runtime', 'status', '--json'], statusIo.io, { runtime })).resolves.toBe(0); - await expect(runKtxCli(['dev', 'runtime', 'prune', '--dry-run'], pruneIo.io, { runtime })).resolves.toBe(0); + await expect(runKtxCli(['dev', 'runtime', 'prune', '--dry-run'], pruneIo.io, { runtime })).resolves.toBe(1); expect(runtime).toHaveBeenNthCalledWith( 1, @@ -208,19 +208,11 @@ describe('runKtxCli', () => { }, statusIo.io, ); - expect(runtime).toHaveBeenNthCalledWith( - 6, - { - command: 'prune', - cliVersion: '0.0.0-private', - dryRun: true, - yes: false, - }, - pruneIo.io, - ); - for (const io of [installIo, startIo, stopIo, stopAllIo, statusIo, pruneIo]) { + expect(runtime).toHaveBeenCalledTimes(5); + for (const io of [installIo, startIo, stopIo, stopAllIo, statusIo]) { expect(io.stderr()).toBe(''); } + expect(pruneIo.stderr()).toMatch(/unknown command|error:/); }); it('prints the resolved project directory for ordinary project commands', async () => { diff --git a/packages/cli/src/managed-python-runtime.test.ts b/packages/cli/src/managed-python-runtime.test.ts index f2d9e784..63755ad1 100644 --- a/packages/cli/src/managed-python-runtime.test.ts +++ b/packages/cli/src/managed-python-runtime.test.ts @@ -1,5 +1,5 @@ import { createHash } from 'node:crypto'; -import { mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from 'node:fs/promises'; +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; @@ -8,7 +8,6 @@ import { doctorManagedPythonRuntime, installManagedPythonRuntime, managedPythonRuntimeLayout, - pruneManagedPythonRuntimes, readManagedPythonRuntimeStatus, verifyRuntimeAsset, type ManagedPythonRuntimeExec, @@ -471,41 +470,3 @@ describe('doctorManagedPythonRuntime', () => { }); }); }); - -describe('pruneManagedPythonRuntimes', () => { - let tempDir: string; - - beforeEach(async () => { - tempDir = await mkdtemp(join(tmpdir(), 'ktx-runtime-prune-')); - }); - - afterEach(async () => { - await rm(tempDir, { recursive: true, force: true }); - }); - - it('removes stale version directories and keeps the current version', async () => { - const runtimeRoot = join(tempDir, 'runtime'); - await mkdir(join(runtimeRoot, '0.1.0'), { recursive: true }); - await mkdir(join(runtimeRoot, '0.2.0'), { recursive: true }); - await writeFile(join(runtimeRoot, 'README.txt'), 'not a runtime directory\n'); - - const result = await pruneManagedPythonRuntimes({ cliVersion: '0.2.0', runtimeRoot }); - - expect(result.removed).toEqual([join(runtimeRoot, '0.1.0')]); - expect(result.kept).toEqual([join(runtimeRoot, '0.2.0')]); - await expect(stat(join(runtimeRoot, '0.1.0'))).rejects.toThrow(); - expect(await readdir(runtimeRoot)).toEqual(['0.2.0', 'README.txt']); - }); - - it('supports dry-run without deleting stale directories', async () => { - const runtimeRoot = join(tempDir, 'runtime'); - await mkdir(join(runtimeRoot, '0.1.0'), { recursive: true }); - await mkdir(join(runtimeRoot, '0.2.0'), { recursive: true }); - - const result = await pruneManagedPythonRuntimes({ cliVersion: '0.2.0', runtimeRoot, dryRun: true }); - - expect(result.removed).toEqual([]); - expect(result.stale).toEqual([join(runtimeRoot, '0.1.0')]); - expect(await readdir(runtimeRoot)).toEqual(['0.1.0', '0.2.0']); - }); -}); diff --git a/packages/cli/src/managed-python-runtime.ts b/packages/cli/src/managed-python-runtime.ts index 251ac7da..563b62f7 100644 --- a/packages/cli/src/managed-python-runtime.ts +++ b/packages/cli/src/managed-python-runtime.ts @@ -1,6 +1,6 @@ import { execFile } from 'node:child_process'; import { createHash } from 'node:crypto'; -import { access, appendFile, mkdir, readFile, readdir, rm, stat, writeFile } from 'node:fs/promises'; +import { access, appendFile, mkdir, readFile, rm, writeFile } from 'node:fs/promises'; import { homedir } from 'node:os'; import { basename, join } from 'node:path'; import { fileURLToPath } from 'node:url'; @@ -107,13 +107,6 @@ export interface ManagedPythonRuntimeDoctorCheck { fix?: string; } -export interface ManagedPythonRuntimePruneResult { - runtimeRoot: string; - stale: string[]; - kept: string[]; - removed: string[]; -} - export const MISSING_UV_RUNTIME_INSTALL_MESSAGE = 'uv is required to install the KTX Python runtime. KTX does not download uv automatically. Install uv, make sure it is on PATH, and retry: ktx dev runtime install --yes'; @@ -441,36 +434,3 @@ export async function doctorManagedPythonRuntime( ); return checks; } - -export async function pruneManagedPythonRuntimes(options: { - cliVersion: string; - runtimeRoot: string; - dryRun?: boolean; -}): Promise { - if (!(await pathExists(options.runtimeRoot))) { - return { runtimeRoot: options.runtimeRoot, stale: [], kept: [], removed: [] }; - } - const entries = await readdir(options.runtimeRoot); - const stale: string[] = []; - const kept: string[] = []; - for (const entry of entries) { - const path = join(options.runtimeRoot, entry); - const info = await stat(path); - if (!info.isDirectory()) { - continue; - } - if (entry === options.cliVersion) { - kept.push(path); - } else { - stale.push(path); - } - } - const removed: string[] = []; - if (options.dryRun !== true) { - for (const path of stale) { - await rm(path, { recursive: true, force: true }); - removed.push(path); - } - } - return { runtimeRoot: options.runtimeRoot, stale, kept, removed }; -} diff --git a/packages/cli/src/runtime.test.ts b/packages/cli/src/runtime.test.ts index 16e879cc..8151a4b3 100644 --- a/packages/cli/src/runtime.test.ts +++ b/packages/cli/src/runtime.test.ts @@ -5,6 +5,7 @@ import type { ManagedPythonDaemonStopResult, } from './managed-python-daemon.js'; import type { + ManagedPythonRuntimeDoctorCheck, ManagedPythonRuntimeInstallResult, ManagedPythonRuntimeStatus, } from './managed-python-runtime.js'; @@ -256,7 +257,7 @@ describe('runKtxRuntime', () => { expect(io.stderr()).toContain('process scan: ps failed'); }); - it('prints runtime status as JSON', async () => { + it('prints runtime status and doctor checks as JSON with doctor-style exit status', async () => { const io = makeIo(); const deps: KtxRuntimeDeps = { readStatus: vi.fn(async (): Promise => ({ @@ -278,38 +279,41 @@ describe('runKtxRuntime', () => { daemonStderrPath: '/runtime/0.2.0/daemon.stderr.log', }, })), + doctorRuntime: vi.fn(async (): Promise => [ + { id: 'uv', label: 'uv', status: 'pass', detail: 'uv 0.9.5' }, + { id: 'asset', label: 'Bundled Python wheel', status: 'pass', detail: '/assets/python/runtime.whl' }, + { + id: 'runtime', + label: 'Managed Python runtime', + status: 'fail', + detail: 'No runtime manifest at /runtime/0.2.0/manifest.json', + fix: 'Run: ktx dev runtime install --yes', + }, + ]), }; - await expect(runKtxRuntime({ command: 'status', cliVersion: '0.2.0', json: true }, io.io, deps)).resolves.toBe(0); + await expect(runKtxRuntime({ command: 'status', cliVersion: '0.2.0', json: true }, io.io, deps)).resolves.toBe(1); expect(JSON.parse(io.stdout())).toMatchObject({ kind: 'missing', detail: 'No runtime manifest at /runtime/0.2.0/manifest.json', layout: { runtimeRoot: '/runtime' }, + checks: [ + { id: 'uv', status: 'pass' }, + { id: 'asset', status: 'pass' }, + { id: 'runtime', status: 'fail' }, + ], }); + expect(deps.readStatus).toHaveBeenCalledWith({ cliVersion: '0.2.0' }); + expect(deps.doctorRuntime).toHaveBeenCalledWith({ cliVersion: '0.2.0' }); }); - it('requires --yes before pruning stale runtime directories', async () => { - const io = makeIo(); - const deps: KtxRuntimeDeps = { - pruneRuntime: vi.fn(async () => { - throw new Error('should not prune without --yes'); - }), - }; - - await expect(runKtxRuntime({ command: 'prune', cliVersion: '0.2.0', dryRun: false, yes: false }, io.io, deps)) - .resolves.toBe(1); - - expect(io.stderr()).toContain('Refusing to prune without --yes'); - expect(deps.pruneRuntime).not.toHaveBeenCalled(); - }); - - it('prints stale directories during prune dry-run', async () => { + it('prints runtime status and doctor checks in plain output', async () => { const io = makeIo(); const deps: KtxRuntimeDeps = { readStatus: vi.fn(async (): Promise => ({ - kind: 'missing', - detail: 'No runtime manifest at /runtime/0.2.0/manifest.json', + kind: 'ready', + detail: 'Runtime ready at /runtime/0.2.0', layout: { cliVersion: '0.2.0', runtimeRoot: '/runtime', @@ -325,19 +329,43 @@ describe('runKtxRuntime', () => { daemonStdoutPath: '/runtime/0.2.0/daemon.stdout.log', daemonStderrPath: '/runtime/0.2.0/daemon.stderr.log', }, + manifest: { + schemaVersion: 1, + cliVersion: '0.2.0', + installedAt: '2026-05-11T00:00:00.000Z', + asset: { + schemaVersion: 1, + distributionName: 'kaelio-ktx', + normalizedName: 'kaelio_ktx', + version: '0.1.0', + wheel: { + file: 'kaelio_ktx-0.1.0-py3-none-any.whl', + sha256: 'a'.repeat(64), + bytes: 10, + }, + }, + features: ['core'], + python: { + executable: '/runtime/0.2.0/.venv/bin/python', + daemonExecutable: '/runtime/0.2.0/.venv/bin/ktx-daemon', + }, + installLog: '/runtime/0.2.0/install.log', + }, })), - pruneRuntime: vi.fn(async () => ({ - runtimeRoot: '/runtime', - stale: ['/runtime/0.1.0'], - kept: ['/runtime/0.2.0'], - removed: [], - })), + doctorRuntime: vi.fn(async (): Promise => [ + { id: 'uv', label: 'uv', status: 'pass', detail: 'uv 0.9.5' }, + { id: 'asset', label: 'Bundled Python wheel', status: 'pass', detail: '/assets/python/runtime.whl' }, + { id: 'runtime', label: 'Managed Python runtime', status: 'pass', detail: 'Runtime ready at /runtime/0.2.0' }, + ]), }; - await expect(runKtxRuntime({ command: 'prune', cliVersion: '0.2.0', dryRun: true, yes: false }, io.io, deps)) - .resolves.toBe(0); + await expect(runKtxRuntime({ command: 'status', cliVersion: '0.2.0', json: false }, io.io, deps)).resolves.toBe(0); - expect(io.stdout()).toContain('Stale KTX Python runtimes'); - expect(io.stdout()).toContain('/runtime/0.1.0'); + expect(io.stdout()).toContain('KTX Python runtime'); + expect(io.stdout()).toContain('status: ready'); + expect(io.stdout()).toContain('KTX Python runtime checks'); + expect(io.stdout()).toContain('PASS uv: uv 0.9.5'); + expect(io.stdout()).toContain('PASS Managed Python runtime: Runtime ready at /runtime/0.2.0'); + expect(io.stderr()).toBe(''); }); }); diff --git a/packages/cli/src/runtime.ts b/packages/cli/src/runtime.ts index d5f4b5cb..8bb3fc7c 100644 --- a/packages/cli/src/runtime.ts +++ b/packages/cli/src/runtime.ts @@ -8,14 +8,14 @@ import { type ManagedPythonDaemonStopResult, } from './managed-python-daemon.js'; import { + doctorManagedPythonRuntime, installManagedPythonRuntime, - pruneManagedPythonRuntimes, readManagedPythonRuntimeStatus, type KtxRuntimeFeature, + type ManagedPythonRuntimeDoctorCheck, type ManagedPythonRuntimeInstallOptions, type ManagedPythonRuntimeInstallResult, type ManagedPythonRuntimeLayoutOptions, - type ManagedPythonRuntimePruneResult, type ManagedPythonRuntimeStatus, } from './managed-python-runtime.js'; @@ -23,8 +23,7 @@ export type KtxRuntimeArgs = | { command: 'install'; cliVersion: string; feature: KtxRuntimeFeature; force: boolean } | { command: 'start'; cliVersion: string; feature: KtxRuntimeFeature; force: boolean } | { command: 'stop'; cliVersion: string; all: boolean } - | { command: 'status'; cliVersion: string; json: boolean } - | { command: 'prune'; cliVersion: string; dryRun: boolean; yes: boolean }; + | { command: 'status'; cliVersion: string; json: boolean }; export interface KtxRuntimeDeps { installRuntime?: (options: ManagedPythonRuntimeInstallOptions) => Promise; @@ -36,11 +35,7 @@ export interface KtxRuntimeDeps { stopDaemon?: (options: { cliVersion: string }) => Promise; stopAllDaemons?: (options: { cliVersion: string }) => Promise; readStatus?: (options: ManagedPythonRuntimeLayoutOptions) => Promise; - pruneRuntime?: (options: { - cliVersion: string; - runtimeRoot: string; - dryRun?: boolean; - }) => Promise; + doctorRuntime?: (options: ManagedPythonRuntimeLayoutOptions) => Promise; } function writeJson(io: KtxCliIo, value: unknown): void { @@ -145,17 +140,20 @@ function writeStatus(io: KtxCliIo, status: ManagedPythonRuntimeStatus): void { } } -function writePrune(io: KtxCliIo, result: ManagedPythonRuntimePruneResult, dryRun: boolean): void { - if (result.stale.length === 0) { - io.stdout.write(`No stale KTX Python runtimes found under ${result.runtimeRoot}\n`); - return; - } - io.stdout.write(dryRun ? 'Stale KTX Python runtimes\n' : 'Removed stale KTX Python runtimes\n'); - for (const path of dryRun ? result.stale : result.removed) { - io.stdout.write(`${path}\n`); +function writeRuntimeChecks(io: KtxCliIo, checks: ManagedPythonRuntimeDoctorCheck[]): void { + io.stdout.write('KTX Python runtime checks\n'); + for (const check of checks) { + io.stdout.write(`${check.status.toUpperCase()} ${check.label}: ${check.detail}\n`); + if (check.fix) { + io.stdout.write(` Fix: ${check.fix}\n`); + } } } +function hasRuntimeCheckFailures(checks: ManagedPythonRuntimeDoctorCheck[]): boolean { + return checks.some((check) => check.status === 'fail'); +} + export async function runKtxRuntime( args: KtxRuntimeArgs, io: KtxCliIo = process, @@ -196,27 +194,19 @@ export async function runKtxRuntime( } if (args.command === 'status') { const readStatus = deps.readStatus ?? readManagedPythonRuntimeStatus; + const doctorRuntime = deps.doctorRuntime ?? doctorManagedPythonRuntime; const status = await readStatus({ cliVersion: args.cliVersion }); + const checks = await doctorRuntime({ cliVersion: args.cliVersion }); if (args.json) { - writeJson(io, status); + writeJson(io, { ...status, checks }); } else { writeStatus(io, status); + writeRuntimeChecks(io, checks); } - return 0; + return hasRuntimeCheckFailures(checks) ? 1 : 0; } - if (!args.dryRun && !args.yes) { - io.stderr.write('Refusing to prune without --yes. Preview with: ktx dev runtime prune --dry-run\n'); - return 1; - } - const status = await (deps.readStatus ?? readManagedPythonRuntimeStatus)({ cliVersion: args.cliVersion }); - const pruneRuntime = deps.pruneRuntime ?? pruneManagedPythonRuntimes; - const result = await pruneRuntime({ - cliVersion: args.cliVersion, - runtimeRoot: status.layout.runtimeRoot, - dryRun: args.dryRun, - }); - writePrune(io, result, args.dryRun); - return 0; + const _exhaustive: never = args; + return _exhaustive; } catch (error) { io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 5378b8ce..504e0d36 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -192,8 +192,7 @@ describe('standalone example docs', () => { assert.match(packageArtifacts, /requires `uv` on `PATH`/); assert.match(packageArtifacts, /ktx dev runtime status/); assert.match(packageArtifacts, /ktx dev runtime status/); - assert.match(packageArtifacts, /ktx dev runtime prune --dry-run/); - assert.match(packageArtifacts, /ktx dev runtime prune --yes/); + assert.doesNotMatch(packageArtifacts, /ktx dev runtime prune/); assert.match( packageArtifacts, new RegExp( @@ -226,8 +225,7 @@ describe('standalone example docs', () => { assert.match(readme, /requires `uv` on `PATH`/); assert.match(readme, /ktx dev runtime status/); assert.match(readme, /ktx dev runtime status/); - assert.match(readme, /ktx dev runtime prune --dry-run/); - assert.match(readme, /ktx dev runtime prune --yes/); + assert.doesNotMatch(readme, /ktx dev runtime prune/); assert.doesNotMatch(readme, /@ktx\/context/); assert.doesNotMatch(readme, /@ktx\/cli/); assert.doesNotMatch(readme, /python -m ktx_daemon semantic-validate/); diff --git a/scripts/local-embeddings-runtime-smoke.mjs b/scripts/local-embeddings-runtime-smoke.mjs index 064cd070..8b0243a0 100644 --- a/scripts/local-embeddings-runtime-smoke.mjs +++ b/scripts/local-embeddings-runtime-smoke.mjs @@ -205,6 +205,17 @@ function parseJsonStdout(label, result) { } } +function parseJsonStdoutWithExitCode(label, result, expectedCode) { + if (result.code !== expectedCode) { + throw new Error(`${label} failed with code ${result.code}\nstdout:\n${result.stdout}\nstderr:\n${result.stderr}`); + } + try { + return JSON.parse(result.stdout); + } catch (error) { + throw new Error(`${label} did not write JSON stdout: ${error.message}\nstdout:\n${result.stdout}`); + } +} + function requireOutput(label, result, pattern) { if (!pattern.test(result.stdout)) { throw new Error(`${label} stdout did not match ${pattern}\nstdout:\n${result.stdout}`); @@ -283,13 +294,14 @@ export async function runLocalEmbeddingsRuntimeSmoke(options = {}) { requireSuccess(commands[0].label, version); requireOutput(commands[0].label, version, expectedPublicKtxVersionPattern()); - const missingStatus = parseJsonStdout( + const missingStatus = parseJsonStdoutWithExitCode( commands[1].label, await run(commands[1].command, commands[1].args, { cwd: installDir, env: smokeEnv, timeoutMs: commands[1].timeoutMs, }), + 1, ); if (missingStatus.kind !== 'missing') { throw new Error(`Expected missing runtime before install, got ${JSON.stringify(missingStatus)}`); diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index d99509d9..5f080068 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -548,6 +548,15 @@ function parseJsonResult(label, result) { return JSON.parse(result.stdout); } +function parseJsonResultWithExitCode(label, result, expectedCode) { + assert.equal( + result.code, + expectedCode, + label + ' failed with code ' + result.code + '\\nstdout:\\n' + result.stdout + '\\nstderr:\\n' + result.stderr, + ); + return JSON.parse(result.stdout); +} + function parseJsonFailure(label, result) { assert.equal(result.code, 1, label + ' should fail with exit code 1'); assert.equal(result.stdout, '', label + ' should not write stdout when failing'); @@ -594,9 +603,10 @@ try { requireSuccess('ktx public package version', version); requireOutput('ktx public package version', version, /@kaelio\\/ktx 0\\.1\\.0/); - const runtimeStatusBefore = parseJsonResult( + const runtimeStatusBefore = parseJsonResultWithExitCode( 'ktx dev runtime status missing', await run('pnpm', ['exec', 'ktx', 'dev', 'runtime', 'status', '--json']), + 1, ); assert.equal(runtimeStatusBefore.kind, 'missing'); assert.equal(runtimeStatusBefore.layout.runtimeRoot, process.env.KTX_RUNTIME_ROOT); @@ -889,27 +899,6 @@ try { requireOutput('ktx dev runtime stop', runtimeStop, /Stopped KTX Python daemon/); process.stdout.write('ktx dev runtime daemon lifecycle verified\\n'); - const staleRuntimeDir = join(process.env.KTX_RUNTIME_ROOT, '0.0.0'); - await mkdir(staleRuntimeDir, { recursive: true }); - - const runtimePruneDryRun = await run('pnpm', ['exec', 'ktx', 'dev', 'runtime', 'prune', '--dry-run']); - requireSuccess('ktx dev runtime prune dry run', runtimePruneDryRun); - requireOutput('ktx dev runtime prune dry run', runtimePruneDryRun, /Stale KTX Python runtimes/); - requireOutput('ktx dev runtime prune dry run', runtimePruneDryRun, /0\\.0\\.0/); - await access(staleRuntimeDir); - - const runtimePruneNeedsConfirmation = await run('pnpm', ['exec', 'ktx', 'dev', 'runtime', 'prune']); - assert.equal(runtimePruneNeedsConfirmation.code, 1, 'ktx dev runtime prune needs confirmation'); - assert.equal(runtimePruneNeedsConfirmation.stdout, '', 'ktx dev runtime prune needs confirmation wrote stdout'); - assert.match(runtimePruneNeedsConfirmation.stderr, /Refusing to prune without --yes/); - - const runtimePruneConfirmed = await run('pnpm', ['exec', 'ktx', 'dev', 'runtime', 'prune', '--yes']); - requireSuccess('ktx dev runtime prune confirmed', runtimePruneConfirmed); - requireOutput('ktx dev runtime prune confirmed', runtimePruneConfirmed, /Removed stale KTX Python runtimes/); - requireOutput('ktx dev runtime prune confirmed', runtimePruneConfirmed, /0\\.0\\.0/); - await assert.rejects(() => access(staleRuntimeDir)); - process.stdout.write('ktx dev runtime prune verified\\n'); - const structuralScan = await run('pnpm', ['exec', 'ktx', 'scan', 'warehouse', '--project-dir', projectDir, diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index a8527c98..b4176353 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -490,13 +490,8 @@ describe('verification snippets', () => { assert.match(source, /ktx dev runtime start reuse/); assert.match(source, /Using existing KTX Python daemon/); assert.match(source, /ktx dev runtime stop/); - assert.match(source, /ktx dev runtime prune dry run/); - assert.match(source, /0\.0\.0/); - assert.match(source, /ktx dev runtime prune needs confirmation/); - assert.match(source, /Refusing to prune without --yes/); - assert.match(source, /ktx dev runtime prune confirmed/); - assert.match(source, /Removed stale KTX Python runtimes/); - assert.match(source, /assert\.rejects\(\(\) => access\(staleRuntimeDir\)\)/); + assert.doesNotMatch(source, /ktx dev runtime prune/); + assert.doesNotMatch(source, /staleRuntimeDir/); assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'scan',\s*'warehouse'/); assert.match(source, /'--mode',\s*'enriched'/); assert.doesNotMatch(source, /'--enrich'/); From 721f1a998fdf2f85ce99244ec0cea45749686192 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 13:01:56 +0200 Subject: [PATCH 03/15] feat(cli)!: remove ktx agent command (#58) * feat(cli)!: remove ktx agent command * test(context): update PGlite boundary guardrail --- docs-site/components/terminal-preview.tsx | 2 +- .../docs/ai-resources/agent-quickstart.mdx | 2 +- .../docs/ai-resources/markdown-access.mdx | 3 +- .../content/docs/cli-reference/ktx-agent.mdx | 148 ------ .../content/docs/cli-reference/ktx-sl.mdx | 16 + .../content/docs/cli-reference/ktx-wiki.mdx | 30 +- .../content/docs/cli-reference/meta.json | 1 - .../docs/getting-started/quickstart.mdx | 2 +- .../content/docs/guides/serving-agents.mdx | 41 +- .../docs/integrations/agent-clients.mdx | 43 +- .../docs/integrations/primary-sources.mdx | 2 +- docs-site/lib/llm-docs.ts | 4 +- packages/cli/src/agent-runtime.test.ts | 152 ------- packages/cli/src/agent-runtime.ts | 109 ----- .../cli/src/agent-search-readiness.test.ts | 51 --- packages/cli/src/agent-search-readiness.ts | 94 ---- packages/cli/src/agent.test.ts | 428 ------------------ packages/cli/src/agent.ts | 219 --------- packages/cli/src/cli-program.ts | 2 - packages/cli/src/cli-runtime.ts | 2 - packages/cli/src/command-schemas.ts | 21 +- packages/cli/src/commands/agent-commands.ts | 149 ------ .../cli/src/commands/knowledge-commands.ts | 21 +- packages/cli/src/commands/sl-commands.ts | 40 +- packages/cli/src/example-smoke.test.ts | 25 +- packages/cli/src/index.test.ts | 250 +--------- packages/cli/src/index.ts | 11 - packages/cli/src/knowledge.test.ts | 59 +++ packages/cli/src/knowledge.ts | 32 +- packages/cli/src/next-steps.test.ts | 19 +- packages/cli/src/next-steps.ts | 8 +- packages/cli/src/project-dir.test.ts | 9 +- packages/cli/src/setup-agents.test.ts | 10 +- packages/cli/src/setup-agents.ts | 30 +- packages/cli/src/sl.test.ts | 132 ++++++ packages/cli/src/sl.ts | 57 ++- packages/cli/src/standalone-smoke.test.ts | 38 +- .../search/pglite-runtime-boundary.test.ts | 3 +- scripts/examples-docs.test.mjs | 14 +- scripts/package-artifacts.mjs | 112 +---- scripts/package-artifacts.test.mjs | 4 +- 41 files changed, 500 insertions(+), 1895 deletions(-) delete mode 100644 docs-site/content/docs/cli-reference/ktx-agent.mdx delete mode 100644 packages/cli/src/agent-runtime.test.ts delete mode 100644 packages/cli/src/agent-runtime.ts delete mode 100644 packages/cli/src/agent-search-readiness.test.ts delete mode 100644 packages/cli/src/agent-search-readiness.ts delete mode 100644 packages/cli/src/agent.test.ts delete mode 100644 packages/cli/src/agent.ts delete mode 100644 packages/cli/src/commands/agent-commands.ts diff --git a/docs-site/components/terminal-preview.tsx b/docs-site/components/terminal-preview.tsx index a1f950c8..d430c4ac 100644 --- a/docs-site/components/terminal-preview.tsx +++ b/docs-site/components/terminal-preview.tsx @@ -47,7 +47,7 @@ export function TerminalPreview() {
${" "} - ktx agent context --json + ktx status --json
diff --git a/docs-site/content/docs/ai-resources/agent-quickstart.mdx b/docs-site/content/docs/ai-resources/agent-quickstart.mdx index 40983224..6fd6e5ac 100644 --- a/docs-site/content/docs/ai-resources/agent-quickstart.mdx +++ b/docs-site/content/docs/ai-resources/agent-quickstart.mdx @@ -22,7 +22,7 @@ Agents should start with the smallest source that answers the task: | How to check project readiness | [ktx status](/docs/cli-reference/ktx-status) | [Quickstart](/docs/getting-started/quickstart) | | How context gets built | [Building Context](/docs/guides/building-context) | [ktx ingest](/docs/cli-reference/ktx-ingest) | | How semantic YAML works | [Writing Context](/docs/guides/writing-context) | [ktx sl](/docs/cli-reference/ktx-sl) | -| How machine-readable CLI output is shaped | [ktx agent](/docs/cli-reference/ktx-agent) | [Markdown Access](/docs/ai-resources/markdown-access) | +| How machine-readable CLI output is shaped | [ktx sl](/docs/cli-reference/ktx-sl) | [ktx wiki](/docs/cli-reference/ktx-wiki) | ## Operating workflow diff --git a/docs-site/content/docs/ai-resources/markdown-access.mdx b/docs-site/content/docs/ai-resources/markdown-access.mdx index c363a215..12bb7456 100644 --- a/docs-site/content/docs/ai-resources/markdown-access.mdx +++ b/docs-site/content/docs/ai-resources/markdown-access.mdx @@ -31,7 +31,8 @@ Every docs page has a Markdown route: ```text https://docs.kaelio.com/ktx/docs/getting-started/quickstart.md -https://docs.kaelio.com/ktx/docs/cli-reference/ktx-agent.md +https://docs.kaelio.com/ktx/docs/cli-reference/ktx-sl.md +https://docs.kaelio.com/ktx/docs/cli-reference/ktx-wiki.md https://docs.kaelio.com/ktx/docs/guides/building-context.md ``` diff --git a/docs-site/content/docs/cli-reference/ktx-agent.mdx b/docs-site/content/docs/cli-reference/ktx-agent.mdx deleted file mode 100644 index cdc4ceac..00000000 --- a/docs-site/content/docs/cli-reference/ktx-agent.mdx +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: "ktx agent" -description: "Machine-readable commands for coding agents." ---- - -Hidden commands that provide machine-readable JSON output for coding agents. These are the commands that agent integrations (Claude Code, Cursor, Codex, OpenCode) call under the hood — you typically won't use them directly. - -All `ktx agent` subcommands require `--json` and produce structured JSON output on stdout. - -## Command signature - -```bash -ktx agent --json [options] -``` - -## Subcommands - -| Subcommand | Description | -|-----------|-------------| -| `tools` | Print available agent-facing KTX tools | -| `context` | Print project context for agent planning | -| `sl list` | List semantic-layer sources | -| `sl read ` | Read one semantic-layer source | -| `sl query` | Run a semantic-layer query from a JSON file | -| `wiki search ` | Search KTX wiki pages | -| `wiki read ` | Read one KTX wiki page | -| `sql execute` | Execute read-only SQL with a row limit | - -## Options - -### `agent tools` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output (required) | — | - -### `agent context` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output (required) | — | - -### `agent sl list` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output (required) | — | -| `--connection-id ` | Filter by connection id | — | -| `--query ` | Search source names and descriptions | — | - -### `agent sl read` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output (required) | — | -| `--connection-id ` | Connection id containing the source | — | - -### `agent sl query` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output (required) | — | -| `--connection-id ` | Connection id for execution (required) | — | -| `--query-file ` | JSON semantic-layer query file (required) | — | -| `--execute` | Execute the compiled query against the connection | `false` | -| `--max-rows ` | Maximum rows to return when executing (1-1000) | — | - -### `agent wiki search` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output (required) | — | -| `--limit ` | Maximum search results | `10` | - -### `agent wiki read` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output (required) | — | - -### `agent sql execute` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output (required) | — | -| `--connection-id ` | Connection id for execution (required) | — | -| `--sql-file ` | SQL file to execute (required) | — | -| `--max-rows ` | Maximum rows to return, 1-1000 (required) | — | - -## Examples - -```bash -# List available tools -ktx agent tools --json - -# Get project context for planning -ktx agent context --json - -# List semantic sources -ktx agent sl list --json - -# Search semantic sources by name -ktx agent sl list --json --query "revenue" - -# Read a semantic source -ktx agent sl read orders --json --connection-id my-warehouse - -# Run a semantic-layer query from a file -ktx agent sl query --json \ - --connection-id my-warehouse \ - --query-file /tmp/query.json \ - --execute \ - --max-rows 100 - -# Search wiki pages -ktx agent wiki search "churn definition" --json - -# Read a specific wiki page -ktx agent wiki read page-abc123 --json - -# Execute read-only SQL -ktx agent sql execute --json \ - --connection-id my-warehouse \ - --sql-file /tmp/query.sql \ - --max-rows 500 -``` - -## Output - -Every `ktx agent` command writes JSON to stdout and diagnostic text to stderr. Agents should parse stdout as JSON and treat a non-zero exit code as a failed tool call. - -```json -{ - "ok": true, - "data": { - "type": "agent-response" - } -} -``` - -## Common errors - -| Error | Cause | Recovery | -|-------|-------|----------| -| Missing JSON output | `--json` was omitted | Re-run the same subcommand with `--json` | -| Unknown connection id | The requested connection is not configured in `ktx.yaml` | Call `ktx agent context --json` or `ktx connection list` to discover valid ids | -| Query file cannot be read | `--query-file` points to a missing or invalid JSON file | Write the query payload to a real file and pass its absolute path | -| SQL execution rejected | SQL is not read-only or `--max-rows` is missing | Use semantic-layer queries first; for direct SQL, pass read-only SQL and an explicit row limit | diff --git a/docs-site/content/docs/cli-reference/ktx-sl.mdx b/docs-site/content/docs/cli-reference/ktx-sl.mdx index 4ec7bdd1..f5a31b27 100644 --- a/docs-site/content/docs/cli-reference/ktx-sl.mdx +++ b/docs-site/content/docs/cli-reference/ktx-sl.mdx @@ -28,6 +28,7 @@ ktx sl [options] | Flag | Description | Default | |------|-------------|---------| | `--connection-id ` | Filter by KTX connection id | — | +| `--query ` | Search source names and descriptions | — | | `--output ` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` | | `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` | @@ -36,6 +37,7 @@ ktx sl [options] | Flag | Description | Default | |------|-------------|---------| | `--connection-id ` | KTX connection id (required) | — | +| `--json` | Print JSON output | `false` | ### `sl validate` @@ -55,6 +57,7 @@ ktx sl [options] | Flag | Description | Default | |------|-------------|---------| | `--connection-id ` | KTX connection id | — | +| `--query-file ` | JSON semantic-layer query file | — | | `--measure ` | Measure to query; repeatable (at least one required) | — | | `--dimension ` | Dimension to include; repeatable | — | | `--filter ` | Filter expression; repeatable | — | @@ -78,9 +81,15 @@ ktx sl list --connection-id my-warehouse # List sources as JSON ktx sl list --json +# Search sources as JSON +ktx sl list --json --query "revenue" + # Read a source definition ktx sl read orders --connection-id my-warehouse +# Read a source definition as JSON +ktx sl read orders --connection-id my-warehouse --json + # Validate a source against the live schema ktx sl validate orders --connection-id my-warehouse @@ -119,6 +128,13 @@ ktx sl query \ --dimension orders.created_date \ --execute \ --max-rows 1000 + +# Execute a query from a JSON file +ktx sl query \ + --connection-id my-warehouse \ + --query-file query.json \ + --execute \ + --max-rows 100 ``` ## Output diff --git a/docs-site/content/docs/cli-reference/ktx-wiki.mdx b/docs-site/content/docs/cli-reference/ktx-wiki.mdx index a709ac07..7e45420e 100644 --- a/docs-site/content/docs/cli-reference/ktx-wiki.mdx +++ b/docs-site/content/docs/cli-reference/ktx-wiki.mdx @@ -26,19 +26,23 @@ ktx wiki [options] | Flag | Description | Default | |------|-------------|---------| +| `--json` | Print JSON output | `false` | | `--user-id ` | Local user id | `local` | ### `wiki read` | Flag | Description | Default | |------|-------------|---------| +| `--json` | Print JSON output | `false` | | `--user-id ` | Local user id | `local` | ### `wiki search` | Flag | Description | Default | |------|-------------|---------| +| `--json` | Print JSON output | `false` | | `--user-id ` | Local user id | `local` | +| `--limit ` | Maximum search results | — | ### `wiki write` @@ -58,12 +62,21 @@ ktx wiki [options] # List all wiki pages ktx wiki list +# List all wiki pages as JSON +ktx wiki list --json + # Read a specific wiki page ktx wiki read revenue-definitions +# Read a specific wiki page as JSON +ktx wiki read revenue-definitions --json + # Search wiki pages ktx wiki search "monthly recurring revenue" +# Search wiki pages as JSON +ktx wiki search "monthly recurring revenue" --json --limit 10 + # Write a global knowledge page ktx wiki write revenue-definitions \ --summary "Canonical revenue metric definitions" \ @@ -97,13 +110,16 @@ Wiki commands print local knowledge pages and search results. Agents should sear ```json { - "results": [ - { - "key": "revenue-definitions", - "summary": "Canonical revenue metric definitions", - "score": 0.92 - } - ] + "kind": "list", + "data": { + "items": [ + { + "key": "revenue-definitions", + "summary": "Canonical revenue metric definitions", + "score": 0.92 + } + ] + } } ``` diff --git a/docs-site/content/docs/cli-reference/meta.json b/docs-site/content/docs/cli-reference/meta.json index a5d7a95f..bed3f98c 100644 --- a/docs-site/content/docs/cli-reference/meta.json +++ b/docs-site/content/docs/cli-reference/meta.json @@ -9,7 +9,6 @@ "ktx-sl", "ktx-wiki", "ktx-status", - "ktx-agent", "ktx-dev" ] } diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index 13b973e3..6aef2b14 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -211,7 +211,7 @@ KTX writes project state as plain files so agents can inspect and edit changes i | `semantic-layer//*.yaml` | context build, ingestion, or `ktx sl write` | Semantic source definitions agents use for SQL generation | | `knowledge/global/*.md` | ingestion or `ktx wiki write --scope global` | Shared business context and metric definitions | | `knowledge/user//*.md` | `ktx wiki write --scope user` | User-scoped notes for one agent/user context | -| `.claude/skills/ktx/SKILL.md`, `.agents/skills/ktx/SKILL.md` | CLI-mode agent integration setup | Agent instructions for calling `ktx agent` commands | +| `.claude/skills/ktx/SKILL.md`, `.agents/skills/ktx/SKILL.md` | CLI-mode agent integration setup | Agent instructions for calling public `ktx` commands | ## Verify it worked diff --git a/docs-site/content/docs/guides/serving-agents.mdx b/docs-site/content/docs/guides/serving-agents.mdx index 4285611b..b6f073b8 100644 --- a/docs-site/content/docs/guides/serving-agents.mdx +++ b/docs-site/content/docs/guides/serving-agents.mdx @@ -3,37 +3,36 @@ title: Serving Agents description: Expose your context to Claude Code, Cursor, Codex, and other coding agents. --- -Once you've built and refined your context, the final step is exposing it to -coding agents. KTX provides machine-readable CLI commands for direct terminal -access from Claude Code, Cursor, Codex, OpenCode, and custom agent workflows. +Once you've built and refined your context, expose it to coding agents through +the public KTX CLI. Claude Code, Cursor, Codex, OpenCode, and custom agent +workflows can call the same commands you use at a terminal. ## CLI Commands -KTX provides a set of machine-readable commands under `ktx agent`. These return -JSON output designed for programmatic consumption. +KTX public commands support JSON output for the context reads that agents use +most often. Use `--project-dir` when the agent is not already running inside the +KTX project directory. ### Available commands ```bash -# List available tools and their descriptions -ktx agent tools --json - -# Get project context for planning -ktx agent context --json +# Check setup and context readiness +ktx status --json ``` **Semantic layer:** ```bash # List sources -ktx agent sl list --json -ktx agent sl list --json --connection-id my-postgres +ktx sl list --json +ktx sl list --json --connection-id my-postgres +ktx sl list --json --query "revenue" # Read a source -ktx agent sl read orders --json --connection-id my-postgres +ktx sl read orders --json --connection-id my-postgres # Run a query from a JSON file -ktx agent sl query --json \ +ktx sl query --json \ --connection-id my-postgres \ --query-file query.json \ --execute \ @@ -44,20 +43,10 @@ ktx agent sl query --json \ ```bash # Search knowledge pages -ktx agent wiki search "revenue recognition" --json --limit 10 +ktx wiki search "revenue recognition" --json --limit 10 # Read a specific page -ktx agent wiki read order-status-definitions --json -``` - -**SQL execution:** - -```bash -# Execute read-only SQL with a row limit -ktx agent sql execute --json \ - --connection-id my-postgres \ - --sql-file query.sql \ - --max-rows 500 +ktx wiki read order-status-definitions --json ``` ## Setting Up Your Agent diff --git a/docs-site/content/docs/integrations/agent-clients.mdx b/docs-site/content/docs/integrations/agent-clients.mdx index 1c105e1f..8a055fda 100644 --- a/docs-site/content/docs/integrations/agent-clients.mdx +++ b/docs-site/content/docs/integrations/agent-clients.mdx @@ -3,7 +3,9 @@ title: Agent Clients description: Set up KTX with Claude Code, Cursor, Codex, and OpenCode. --- -KTX integrates with coding agents through CLI skills and command files. These files teach agents to call `ktx agent ...` commands directly from the terminal for semantic-layer context, wiki knowledge, and safe SQL execution. +KTX integrates with coding agents through CLI skills and command files. These +files teach agents to call public `ktx` commands directly from the terminal for +semantic-layer context and wiki knowledge. Run `ktx setup` and select your agent targets, or configure manually using the snippets below. @@ -26,17 +28,17 @@ Create `.claude/skills/ktx/SKILL.md`: ```markdown title=".claude/skills/ktx/SKILL.md" --- name: ktx -description: Use local KTX semantic context, wiki knowledge, and safe SQL execution for this project. +description: Use local KTX semantic context and wiki knowledge for this project. --- Available commands: -- `ktx agent context --json --project-dir /path/to/project` -- `ktx agent sl list --json --project-dir /path/to/project` -- `ktx agent sl read '' --json --project-dir /path/to/project` -- `ktx agent sl query --json --project-dir /path/to/project --connection-id '' --query-file '' --execute --max-rows 100` -- `ktx agent wiki search '' --json --project-dir /path/to/project` -- `ktx agent wiki read '' --json --project-dir /path/to/project` -- `ktx agent sql execute --json --project-dir /path/to/project --connection-id '' --sql-file '' --max-rows 100` +- `ktx status --json --project-dir /path/to/project` +- `ktx sl list --json --project-dir /path/to/project` +- `ktx sl list --json --project-dir /path/to/project --query ''` +- `ktx sl read '' --json --project-dir /path/to/project --connection-id ''` +- `ktx sl query --json --project-dir /path/to/project --connection-id '' --query-file '' --execute --max-rows 100` +- `ktx wiki search '' --json --project-dir /path/to/project --limit 10` +- `ktx wiki read '' --json --project-dir /path/to/project` ``` ### Workflow tips @@ -123,22 +125,19 @@ All supported agent clients call the same KTX CLI commands: | Command | Description | |---------|-------------| -| `ktx agent context --json` | Return a compact project context summary | -| `ktx agent tools --json` | List available agent-facing commands | -| `ktx agent wiki search --json` | Search knowledge pages | -| `ktx agent wiki read --json` | Read a knowledge page | -| `ktx agent wiki write --json` | Write or update a knowledge page | -| `ktx agent sl list --json` | List semantic layer sources | -| `ktx agent sl read --json` | Read a semantic source definition | -| `ktx agent sl write --json` | Write or update a semantic source | -| `ktx agent sl validate --json` | Validate semantic source definitions | -| `ktx agent sl query --json` | Execute a semantic layer query when semantic compute is configured | -| `ktx agent sql execute --json` | Execute read-only SQL with an explicit row limit | +| `ktx status --json` | Return project setup and context readiness | +| `ktx wiki search --json` | Search knowledge pages | +| `ktx wiki read --json` | Read a knowledge page | +| `ktx wiki write ` | Write or update a knowledge page | +| `ktx sl list --json` | List semantic-layer sources | +| `ktx sl list --query --json` | Search semantic-layer sources | +| `ktx sl read --json --connection-id ` | Read a semantic source definition | +| `ktx sl write --connection-id ` | Write or update a semantic source | +| `ktx sl validate --connection-id ` | Validate semantic source definitions | +| `ktx sl query --json` | Execute a semantic-layer query when semantic compute is configured | ### Security constraints -- SQL execution is always read-only. -- Agent SQL execution requires an explicit `--max-rows` limit from 1 to 1000. - Secrets and credentials are never exposed in command output. - Commands resolve the project from `--project-dir`, `KTX_PROJECT_DIR`, or the nearest `ktx.yaml`. diff --git a/docs-site/content/docs/integrations/primary-sources.mdx b/docs-site/content/docs/integrations/primary-sources.mdx index 49200d47..94dc4e44 100644 --- a/docs-site/content/docs/integrations/primary-sources.mdx +++ b/docs-site/content/docs/integrations/primary-sources.mdx @@ -511,4 +511,4 @@ No authentication required — SQLite is file-based. The file must be readable b | Scan returns no tables | Schema/database/project filter is wrong or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions | | Historic SQL is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun scan or setup | | Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on structural scan output | -| SQL execution fails through agents | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test ` and check the agent command flags | +| Semantic query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test ` and check the `ktx sl query` flags | diff --git a/docs-site/lib/llm-docs.ts b/docs-site/lib/llm-docs.ts index 9d9b5c74..69aac698 100644 --- a/docs-site/lib/llm-docs.ts +++ b/docs-site/lib/llm-docs.ts @@ -67,12 +67,12 @@ ${link("/docs/guides/writing-context", "Writing Context", "Write semantic source - [Full documentation](${absoluteUrl("/llms-full.txt")}): All docs pages in one plain-text markdown response - [Markdown access guide](${absoluteUrl("/docs/ai-resources/markdown-access.md")}): How to fetch llms.txt, llms-full.txt, and per-page Markdown - [Quickstart markdown](${absoluteUrl("/docs/getting-started/quickstart.md")}): Human setup walkthrough -- [Agent CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-agent.md")}): Machine-readable agent commands +- [Semantic-layer CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-sl.md")}): Semantic-layer commands and JSON output +- [Wiki CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-wiki.md")}): Knowledge page commands and JSON output ## CLI Reference ${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")} -${link("/docs/cli-reference/ktx-agent", "ktx agent", "Machine-readable commands for coding agents")} ${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")} ${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Knowledge page commands")} ${link("/docs/cli-reference/ktx-connection", "ktx connection", "Connection management commands")} diff --git a/packages/cli/src/agent-runtime.test.ts b/packages/cli/src/agent-runtime.test.ts deleted file mode 100644 index 808ddac3..00000000 --- a/packages/cli/src/agent-runtime.test.ts +++ /dev/null @@ -1,152 +0,0 @@ -import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { - KTX_AGENT_MAX_ROWS_CAP, - createKtxAgentRuntime, - parseAgentMaxRows, - readAgentJsonFile, - writeAgentJson, - writeAgentJsonError, -} from './agent-runtime.js'; - -function makeIo() { - let stdout = ''; - let stderr = ''; - return { - io: { - stdout: { write: (chunk: string) => (stdout += chunk) }, - stderr: { write: (chunk: string) => (stderr += chunk) }, - }, - stdout: () => stdout, - stderr: () => stderr, - }; -} - -describe('agent runtime helpers', () => { - let tempDir: string; - - beforeEach(async () => { - tempDir = await mkdtemp(join(tmpdir(), 'ktx-agent-runtime-')); - }); - - afterEach(async () => { - await rm(tempDir, { recursive: true, force: true }); - }); - - it('writes JSON success and error envelopes without color or spinners', () => { - const successIo = makeIo(); - const errorIo = makeIo(); - - writeAgentJson(successIo.io, { ok: true }); - writeAgentJsonError(errorIo.io, 'missing source', { code: 'NOT_FOUND' }); - - expect(JSON.parse(successIo.stdout())).toEqual({ ok: true }); - expect(successIo.stderr()).toBe(''); - expect(JSON.parse(errorIo.stderr())).toEqual({ - ok: false, - error: { message: 'missing source', code: 'NOT_FOUND' }, - }); - expect(errorIo.stdout()).toBe(''); - }); - - it('reads JSON query files as objects', async () => { - const path = join(tempDir, 'query.json'); - await writeFile(path, '{"measures":["revenue"],"limit":50}', 'utf-8'); - - await expect(readAgentJsonFile(path)).resolves.toEqual({ measures: ['revenue'], limit: 50 }); - }); - - it('rejects non-object JSON query files', async () => { - const path = join(tempDir, 'query.json'); - await writeFile(path, '["revenue"]', 'utf-8'); - - await expect(readAgentJsonFile(path)).rejects.toThrow('must contain a JSON object'); - }); - - it('requires positive row limits and enforces the agent cap', () => { - expect(parseAgentMaxRows(100)).toBe(100); - expect(() => parseAgentMaxRows(undefined)).toThrow('maxRows is required'); - expect(() => parseAgentMaxRows(0)).toThrow('positive integer'); - expect(() => parseAgentMaxRows(KTX_AGENT_MAX_ROWS_CAP + 1)).toThrow(String(KTX_AGENT_MAX_ROWS_CAP)); - }); - - it('constructs local context ports with semantic compute and query executor', async () => { - const project = { - projectDir: tempDir, - configPath: join(tempDir, 'ktx.yaml'), - config: { project: 'revenue', connections: {} }, - coreConfig: {}, - git: {}, - fileStore: {}, - } as never; - const ports = { knowledge: {}, semanticLayer: {} } as never; - const semanticLayerCompute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() }; - const queryExecutor = { execute: vi.fn() }; - const loadProject = vi.fn(async () => project); - const createContextTools = vi.fn(() => ports); - - await expect( - createKtxAgentRuntime( - { projectDir: tempDir, enableSemanticCompute: true, enableQueryExecution: true }, - { - loadProject, - createContextTools, - createSemanticLayerCompute: () => semanticLayerCompute, - createQueryExecutor: () => queryExecutor, - }, - ), - ).resolves.toMatchObject({ project, ports, queryExecutor }); - - expect(loadProject).toHaveBeenCalledWith({ projectDir: tempDir }); - expect(createContextTools).toHaveBeenCalledWith(project, { - semanticLayerCompute, - queryExecutor, - }); - }); - - it('creates managed semantic compute when no test override is injected', async () => { - const project = { - projectDir: tempDir, - configPath: join(tempDir, 'ktx.yaml'), - config: { project: 'revenue', connections: {} }, - coreConfig: {}, - git: {}, - fileStore: {}, - } as never; - const ports = { semanticLayer: {} } as never; - const semanticLayerCompute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() }; - const loadProject = vi.fn(async () => project); - const createContextTools = vi.fn(() => ports); - const createManagedSemanticLayerCompute = vi.fn(async () => semanticLayerCompute); - const { io } = makeIo(); - - await expect( - createKtxAgentRuntime( - { - projectDir: tempDir, - enableSemanticCompute: true, - enableQueryExecution: false, - cliVersion: '0.2.0', - runtimeInstallPolicy: 'auto', - io, - }, - { - loadProject, - createContextTools, - createManagedSemanticLayerCompute, - }, - ), - ).resolves.toMatchObject({ project, ports, semanticLayerCompute }); - - expect(createManagedSemanticLayerCompute).toHaveBeenCalledWith({ - cliVersion: '0.2.0', - installPolicy: 'auto', - io, - }); - expect(createContextTools).toHaveBeenCalledWith(project, { - semanticLayerCompute, - }); - }); -}); diff --git a/packages/cli/src/agent-runtime.ts b/packages/cli/src/agent-runtime.ts deleted file mode 100644 index feccae7c..00000000 --- a/packages/cli/src/agent-runtime.ts +++ /dev/null @@ -1,109 +0,0 @@ -import { readFile } from 'node:fs/promises'; -import { createDefaultLocalQueryExecutor, type KtxSqlQueryExecutorPort } from '@ktx/context/connections'; -import type { KtxSemanticLayerComputePort } from '@ktx/context/daemon'; -import { createLocalProjectMcpContextPorts, type KtxMcpContextPorts } from '@ktx/context/mcp'; -import { type KtxLocalProject, loadKtxProject } from '@ktx/context/project'; -import type { KtxCliIo } from './cli-runtime.js'; -import { - createManagedPythonSemanticLayerComputePort, - type KtxManagedPythonInstallPolicy, -} from './managed-python-command.js'; - -export const KTX_AGENT_MAX_ROWS_CAP = 1000; - -export interface KtxAgentRuntimeOptions { - projectDir: string; - enableSemanticCompute: boolean; - enableQueryExecution: boolean; - cliVersion?: string; - runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; - io?: KtxCliIo; -} - -export interface KtxAgentRuntime { - project: KtxLocalProject; - ports: KtxMcpContextPorts; - semanticLayerCompute?: KtxSemanticLayerComputePort; - queryExecutor?: KtxSqlQueryExecutorPort; -} - -export interface KtxAgentRuntimeDeps { - loadProject?: typeof loadKtxProject; - createContextTools?: typeof createLocalProjectMcpContextPorts; - createSemanticLayerCompute?: () => KtxSemanticLayerComputePort; - createManagedSemanticLayerCompute?: typeof createManagedPythonSemanticLayerComputePort; - createQueryExecutor?: () => KtxSqlQueryExecutorPort; -} - -export function writeAgentJson(io: KtxCliIo, value: unknown): void { - io.stdout.write(`${JSON.stringify(value, null, 2)}\n`); -} - -export function writeAgentJsonError( - io: KtxCliIo, - message: string, - detail: Record = {}, -): void { - io.stderr.write(`${JSON.stringify({ ok: false, error: { message, ...detail } }, null, 2)}\n`); -} - -export async function readAgentJsonFile(path: string): Promise> { - const parsed = JSON.parse(await readFile(path, 'utf-8')) as unknown; - if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { - throw new Error(`${path} must contain a JSON object.`); - } - return parsed as Record; -} - -export function parseAgentMaxRows(value: number | undefined): number { - if (!Number.isInteger(value) || value === undefined || value <= 0) { - throw new Error('maxRows is required and must be a positive integer.'); - } - if (value > KTX_AGENT_MAX_ROWS_CAP) { - throw new Error(`maxRows must be less than or equal to ${KTX_AGENT_MAX_ROWS_CAP}.`); - } - return value; -} - -async function createAgentSemanticLayerCompute( - options: KtxAgentRuntimeOptions, - deps: KtxAgentRuntimeDeps, -): Promise { - if (!options.enableSemanticCompute) { - return undefined; - } - if (deps.createSemanticLayerCompute) { - return deps.createSemanticLayerCompute(); - } - if (!options.cliVersion || !options.runtimeInstallPolicy || !options.io) { - throw new Error('Managed Python semantic compute requires cliVersion, runtimeInstallPolicy, and io.'); - } - const createManagedSemanticLayerCompute = - deps.createManagedSemanticLayerCompute ?? createManagedPythonSemanticLayerComputePort; - return createManagedSemanticLayerCompute({ - cliVersion: options.cliVersion, - installPolicy: options.runtimeInstallPolicy, - io: options.io, - }); -} - -export async function createKtxAgentRuntime( - options: KtxAgentRuntimeOptions, - deps: KtxAgentRuntimeDeps = {}, -): Promise { - const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: options.projectDir }); - const semanticLayerCompute = await createAgentSemanticLayerCompute(options, deps); - const queryExecutor = options.enableQueryExecution - ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() - : undefined; - const ports = (deps.createContextTools ?? createLocalProjectMcpContextPorts)(project, { - ...(semanticLayerCompute ? { semanticLayerCompute } : {}), - ...(queryExecutor ? { queryExecutor } : {}), - }); - return { - project, - ports, - ...(semanticLayerCompute ? { semanticLayerCompute } : {}), - ...(queryExecutor ? { queryExecutor } : {}), - }; -} diff --git a/packages/cli/src/agent-search-readiness.test.ts b/packages/cli/src/agent-search-readiness.test.ts deleted file mode 100644 index 432afa90..00000000 --- a/packages/cli/src/agent-search-readiness.test.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { describe, expect, it } from 'vitest'; -import { - isMissingProjectConfigError, - missingConnectionSlSearchReadiness, - missingProjectSlSearchReadiness, - noConnectionsSlSearchReadiness, - noIndexedSourcesSlSearchReadiness, -} from './agent-search-readiness.js'; - -describe('agent semantic-layer search readiness guidance', () => { - it('formats missing project guidance with exact recovery commands', () => { - expect(missingProjectSlSearchReadiness('/tmp/ktx-search', 'gross revenue')).toEqual({ - code: 'agent_sl_search_missing_project', - message: 'Semantic-layer search needs an initialized KTX project at /tmp/ktx-search.', - nextSteps: [ - 'ktx setup --project-dir /tmp/ktx-search', - 'ktx status --project-dir /tmp/ktx-search', - 'ktx ingest run --connection-id --adapter ', - 'ktx agent sl list --json --query "gross revenue" --project-dir /tmp/ktx-search', - ], - }); - }); - - it('formats no-connection and no-index guidance without hiding the project path', () => { - expect(noConnectionsSlSearchReadiness('/tmp/ktx-search', 'revenue')).toMatchObject({ - code: 'agent_sl_search_no_connections', - message: 'Semantic-layer search found no configured connections in /tmp/ktx-search.', - }); - expect(noIndexedSourcesSlSearchReadiness('/tmp/ktx-search', 'orders')).toMatchObject({ - code: 'agent_sl_search_no_indexed_sources', - message: 'Semantic-layer search found no indexed semantic-layer sources in /tmp/ktx-search.', - }); - }); - - it('formats unknown connection guidance', () => { - expect(missingConnectionSlSearchReadiness('/tmp/ktx-search', 'warehouse', 'revenue')).toMatchObject({ - code: 'agent_sl_search_unknown_connection', - message: 'Semantic-layer search connection "warehouse" is not configured in /tmp/ktx-search.', - }); - }); - - it('detects missing ktx.yaml read errors', () => { - const error = Object.assign(new Error('ENOENT: no such file or directory'), { - code: 'ENOENT', - path: '/tmp/ktx-search/ktx.yaml', - }); - - expect(isMissingProjectConfigError(error)).toBe(true); - expect(isMissingProjectConfigError(new Error('other'))).toBe(false); - }); -}); diff --git a/packages/cli/src/agent-search-readiness.ts b/packages/cli/src/agent-search-readiness.ts deleted file mode 100644 index e4de7318..00000000 --- a/packages/cli/src/agent-search-readiness.ts +++ /dev/null @@ -1,94 +0,0 @@ -export type KtxAgentSlSearchReadinessCode = - | 'agent_sl_search_missing_project' - | 'agent_sl_search_no_connections' - | 'agent_sl_search_unknown_connection' - | 'agent_sl_search_no_indexed_sources'; - -export interface KtxAgentSlSearchReadinessDetail { - code: KtxAgentSlSearchReadinessCode; - message: string; - nextSteps: string[]; -} - -function queryForCommand(query: string | undefined): string { - const trimmed = query?.trim(); - return trimmed && trimmed.length > 0 ? trimmed : 'revenue'; -} - -function projectSearchCommand(projectDir: string, query: string | undefined): string { - return `ktx agent sl list --json --query ${JSON.stringify(queryForCommand(query))} --project-dir ${projectDir}`; -} - -function baseNextSteps(projectDir: string, query: string | undefined): string[] { - return [ - `ktx setup --project-dir ${projectDir}`, - `ktx status --project-dir ${projectDir}`, - 'ktx ingest run --connection-id --adapter ', - projectSearchCommand(projectDir, query), - ]; -} - -export function missingProjectSlSearchReadiness( - projectDir: string, - query: string | undefined, -): KtxAgentSlSearchReadinessDetail { - return { - code: 'agent_sl_search_missing_project', - message: `Semantic-layer search needs an initialized KTX project at ${projectDir}.`, - nextSteps: baseNextSteps(projectDir, query), - }; -} - -export function noConnectionsSlSearchReadiness( - projectDir: string, - query: string | undefined, -): KtxAgentSlSearchReadinessDetail { - return { - code: 'agent_sl_search_no_connections', - message: `Semantic-layer search found no configured connections in ${projectDir}.`, - nextSteps: baseNextSteps(projectDir, query), - }; -} - -export function missingConnectionSlSearchReadiness( - projectDir: string, - connectionId: string, - query: string | undefined, -): KtxAgentSlSearchReadinessDetail { - return { - code: 'agent_sl_search_unknown_connection', - message: `Semantic-layer search connection "${connectionId}" is not configured in ${projectDir}.`, - nextSteps: baseNextSteps(projectDir, query), - }; -} - -export function noIndexedSourcesSlSearchReadiness( - projectDir: string, - query: string | undefined, -): KtxAgentSlSearchReadinessDetail { - return { - code: 'agent_sl_search_no_indexed_sources', - message: `Semantic-layer search found no indexed semantic-layer sources in ${projectDir}.`, - nextSteps: baseNextSteps(projectDir, query), - }; -} - -function errorCode(error: unknown): string | undefined { - if (typeof error !== 'object' || error === null || !('code' in error)) { - return undefined; - } - const code = (error as { code?: unknown }).code; - return typeof code === 'string' ? code : undefined; -} - -function errorPath(error: unknown): string | undefined { - if (typeof error !== 'object' || error === null || !('path' in error)) { - return undefined; - } - const path = (error as { path?: unknown }).path; - return typeof path === 'string' ? path : undefined; -} - -export function isMissingProjectConfigError(error: unknown): boolean { - return errorCode(error) === 'ENOENT' && (errorPath(error)?.endsWith('ktx.yaml') ?? false); -} diff --git a/packages/cli/src/agent.test.ts b/packages/cli/src/agent.test.ts deleted file mode 100644 index 566f5763..00000000 --- a/packages/cli/src/agent.test.ts +++ /dev/null @@ -1,428 +0,0 @@ -import { mkdtemp, rm, writeFile } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { buildDefaultKtxProjectConfig } from '@ktx/context/project'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { runKtxAgent } from './agent.js'; -import type { KtxAgentRuntime } from './agent-runtime.js'; - -function makeIo() { - let stdout = ''; - let stderr = ''; - return { - io: { - stdout: { write: (chunk: string) => (stdout += chunk) }, - stderr: { write: (chunk: string) => (stderr += chunk) }, - }, - stdout: () => stdout, - stderr: () => stderr, - }; -} - -function runtime(overrides: Record = {}): KtxAgentRuntime { - const config = buildDefaultKtxProjectConfig('revenue'); - return { - project: { - projectDir: '/tmp/revenue', - configPath: '/tmp/revenue/ktx.yaml', - config: { - ...config, - connections: { - warehouse: { driver: 'sqlite', path: 'warehouse.sqlite', readonly: true as const }, - }, - }, - coreConfig: {} as KtxAgentRuntime['project']['coreConfig'], - git: {} as KtxAgentRuntime['project']['git'], - fileStore: {} as KtxAgentRuntime['project']['fileStore'], - }, - ports: { - connections: { list: vi.fn(async () => [{ id: 'warehouse', name: 'warehouse', connectionType: 'sqlite' }]) }, - semanticLayer: { - listSources: vi.fn(async () => ({ - sources: [ - { - connectionId: 'warehouse', - connectionName: 'warehouse', - name: 'orders', - columnCount: 2, - measureCount: 1, - joinCount: 0, - }, - ], - totalSources: 1, - })), - readSource: vi.fn(async () => ({ sourceName: 'orders', yaml: 'name: orders\n' })), - writeSource: vi.fn(async () => ({ success: true, sourceName: 'orders' })), - validate: vi.fn(async () => ({ success: true, errors: [], warnings: [] })), - query: vi.fn(async () => ({ sql: 'select 1', headers: ['x'], rows: [[1]], totalRows: 1, plan: {} })), - }, - knowledge: { - search: vi.fn(async () => ({ - results: [ - { - key: 'page-1', - path: 'knowledge/global/page-1.md', - scope: 'GLOBAL' as const, - summary: 'Revenue logic', - score: 0.9, - matchReasons: ['lexical' as const], - }, - ], - totalFound: 1, - })), - read: vi.fn(async () => ({ - key: 'page-1', - scope: 'GLOBAL' as const, - summary: 'Revenue logic', - content: 'Use net revenue.', - })), - write: vi.fn(async () => ({ success: true, key: 'page-1', action: 'created' as const })), - }, - }, - queryExecutor: { - execute: vi.fn(async () => ({ headers: ['x'], rows: [[1]], totalRows: 1, command: 'SELECT', rowCount: 1 })), - }, - ...overrides, - }; -} - -function runtimeWithoutConnections(): KtxAgentRuntime { - const base = runtime(); - return { - ...base, - project: { - ...base.project, - config: { - ...base.project.config, - connections: {}, - }, - }, - ports: { - ...base.ports, - semanticLayer: { - ...base.ports.semanticLayer!, - listSources: vi.fn(async () => ({ sources: [], totalSources: 0 })), - }, - }, - }; -} - -describe('runKtxAgent', () => { - let tempDir: string; - - beforeEach(async () => { - tempDir = await mkdtemp(join(tmpdir(), 'ktx-agent-')); - }); - - afterEach(async () => { - await rm(tempDir, { recursive: true, force: true }); - }); - - it('prints tool discovery with every stable command', async () => { - const io = makeIo(); - - await expect(runKtxAgent({ command: 'tools', projectDir: tempDir, json: true }, io.io)).resolves.toBe(0); - - const body = JSON.parse(io.stdout()); - expect(body.projectDir).toBe(tempDir); - expect(body.tools.map((tool: { name: string }) => tool.name)).toEqual([ - 'context', - 'sl.list', - 'sl.read', - 'sl.query', - 'wiki.search', - 'wiki.read', - 'sql.execute', - ]); - expect(io.stderr()).toBe(''); - }); - - it('prints project context from setup status, connections, and SL summaries', async () => { - const io = makeIo(); - const createRuntime = vi.fn(async () => runtime()); - const readSetupStatus = vi.fn(async () => ({ project: { path: tempDir, ready: true }, agents: [] })); - - await expect( - runKtxAgent({ command: 'context', projectDir: tempDir, json: true }, io.io, { createRuntime, readSetupStatus }), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toMatchObject({ - projectDir: tempDir, - status: { project: { ready: true } }, - connections: [{ id: 'warehouse' }], - semanticLayer: { totalSources: 1 }, - }); - }); - - it('dispatches SL list, SL read, wiki search, and wiki read through local ports', async () => { - for (const args of [ - { command: 'sl-list' as const, projectDir: tempDir, json: true as const, connectionId: 'warehouse' }, - { - command: 'sl-read' as const, - projectDir: tempDir, - json: true as const, - connectionId: 'warehouse', - sourceName: 'orders', - }, - { command: 'wiki-search' as const, projectDir: tempDir, json: true as const, query: 'revenue', limit: 10 }, - { command: 'wiki-read' as const, projectDir: tempDir, json: true as const, pageId: 'page-1' }, - ]) { - const io = makeIo(); - await expect(runKtxAgent(args, io.io, { createRuntime: async () => runtime() })).resolves.toBe(0); - expect(JSON.parse(io.stdout())).toBeTruthy(); - expect(io.stderr()).toBe(''); - } - }); - - it('prints wiki hybrid search metadata from the hidden agent wiki search command', async () => { - const fakeRuntime = runtime(); - const knowledge = fakeRuntime.ports.knowledge; - if (!knowledge) { - throw new Error('Expected runtime knowledge port'); - } - fakeRuntime.ports.knowledge = { - ...knowledge, - search: vi.fn(async () => ({ - results: [ - { - key: 'metrics-revenue', - path: 'knowledge/global/metrics-revenue.md', - scope: 'GLOBAL' as const, - summary: 'Revenue metric definition', - score: 0.02459016393442623, - matchReasons: ['lexical' as const, 'token' as const], - }, - ], - totalFound: 1, - })), - }; - const io = makeIo(); - - await expect( - runKtxAgent({ command: 'wiki-search', projectDir: tempDir, json: true, query: 'paid order', limit: 5 }, io.io, { - createRuntime: async () => fakeRuntime, - }), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toEqual({ - results: [ - expect.objectContaining({ - key: 'metrics-revenue', - path: 'knowledge/global/metrics-revenue.md', - matchReasons: ['lexical', 'token'], - }), - ], - totalFound: 1, - }); - }); - - it('executes SL queries from a JSON query file', async () => { - const queryFile = join(tempDir, 'sl-query.json'); - const io = makeIo(); - await writeFile(queryFile, '{"measures":["total_revenue"],"dimensions":[]}', 'utf-8'); - - await expect( - runKtxAgent( - { - command: 'sl-query', - projectDir: tempDir, - json: true, - connectionId: 'warehouse', - queryFile, - execute: true, - maxRows: 100, - cliVersion: '0.2.0', - runtimeInstallPolicy: 'never', - }, - io.io, - { createRuntime: async () => runtime() }, - ), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toMatchObject({ sql: 'select 1', rows: [[1]] }); - }); - - it('passes managed runtime options into default SL query runtime creation', async () => { - const queryFile = join(tempDir, 'sl-query.json'); - const io = makeIo(); - const createRuntime = vi.fn(async () => runtime()); - await writeFile(queryFile, '{"measures":["total_revenue"],"dimensions":[]}', 'utf-8'); - - await expect( - runKtxAgent( - { - command: 'sl-query', - projectDir: tempDir, - json: true, - connectionId: 'warehouse', - queryFile, - execute: false, - cliVersion: '0.2.0', - runtimeInstallPolicy: 'auto', - }, - io.io, - { createRuntime }, - ), - ).resolves.toBe(0); - - expect(createRuntime).toHaveBeenCalledWith({ - projectDir: tempDir, - enableSemanticCompute: true, - enableQueryExecution: false, - cliVersion: '0.2.0', - runtimeInstallPolicy: 'auto', - io: io.io, - }); - }); - - it('executes read-only SQL from a SQL file with an explicit row limit', async () => { - const sqlFile = join(tempDir, 'query.sql'); - const fakeRuntime = runtime(); - const io = makeIo(); - await writeFile(sqlFile, 'select 1', 'utf-8'); - - await expect( - runKtxAgent( - { - command: 'sql-execute', - projectDir: tempDir, - json: true, - connectionId: 'warehouse', - sqlFile, - maxRows: 100, - }, - io.io, - { createRuntime: async () => fakeRuntime as never }, - ), - ).resolves.toBe(0); - - expect(fakeRuntime.queryExecutor?.execute).toHaveBeenCalledWith({ - connectionId: 'warehouse', - projectDir: '/tmp/revenue', - connection: { driver: 'sqlite', path: 'warehouse.sqlite', readonly: true }, - sql: 'select 1', - maxRows: 100, - }); - }); - - it('prints guided JSON when semantic-layer search runs outside a project', async () => { - const io = makeIo(); - const missingProjectError = Object.assign(new Error('ENOENT: no such file or directory'), { - code: 'ENOENT', - path: join(tempDir, 'ktx.yaml'), - }); - - await expect( - runKtxAgent( - { command: 'sl-list', projectDir: tempDir, json: true, query: 'gross revenue' }, - io.io, - { createRuntime: vi.fn(async () => Promise.reject(missingProjectError)) }, - ), - ).resolves.toBe(1); - - expect(JSON.parse(io.stderr())).toEqual({ - ok: false, - error: { - code: 'agent_sl_search_missing_project', - message: `Semantic-layer search needs an initialized KTX project at ${tempDir}.`, - nextSteps: [ - `ktx setup --project-dir ${tempDir}`, - `ktx status --project-dir ${tempDir}`, - 'ktx ingest run --connection-id --adapter ', - `ktx agent sl list --json --query "gross revenue" --project-dir ${tempDir}`, - ], - }, - }); - expect(io.stdout()).toBe(''); - }); - - it('prints guided JSON when semantic-layer search has no configured connections', async () => { - const io = makeIo(); - - await expect( - runKtxAgent( - { command: 'sl-list', projectDir: tempDir, json: true, query: 'revenue' }, - io.io, - { createRuntime: async () => runtimeWithoutConnections() }, - ), - ).resolves.toBe(1); - - expect(JSON.parse(io.stderr())).toMatchObject({ - ok: false, - error: { - code: 'agent_sl_search_no_connections', - message: `Semantic-layer search found no configured connections in ${tempDir}.`, - nextSteps: [ - `ktx setup --project-dir ${tempDir}`, - `ktx status --project-dir ${tempDir}`, - 'ktx ingest run --connection-id --adapter ', - `ktx agent sl list --json --query "revenue" --project-dir ${tempDir}`, - ], - }, - }); - }); - - it('prints guided JSON when semantic-layer search asks for an unknown connection', async () => { - const io = makeIo(); - - await expect( - runKtxAgent( - { command: 'sl-list', projectDir: tempDir, json: true, connectionId: 'missing', query: 'revenue' }, - io.io, - { createRuntime: async () => runtime() }, - ), - ).resolves.toBe(1); - - expect(JSON.parse(io.stderr())).toMatchObject({ - ok: false, - error: { - code: 'agent_sl_search_unknown_connection', - message: `Semantic-layer search connection "missing" is not configured in ${tempDir}.`, - }, - }); - }); - - it('prints guided JSON when semantic-layer search has no indexed sources', async () => { - const fakeRuntime = runtime(); - const semanticLayer = fakeRuntime.ports.semanticLayer!; - fakeRuntime.ports.semanticLayer = { - ...semanticLayer, - listSources: vi.fn(async () => ({ sources: [], totalSources: 0 })), - }; - const io = makeIo(); - - await expect( - runKtxAgent( - { command: 'sl-list', projectDir: tempDir, json: true, connectionId: 'warehouse', query: 'revenue' }, - io.io, - { createRuntime: async () => fakeRuntime }, - ), - ).resolves.toBe(1); - - expect(JSON.parse(io.stderr())).toMatchObject({ - ok: false, - error: { - code: 'agent_sl_search_no_indexed_sources', - message: `Semantic-layer search found no indexed semantic-layer sources in ${tempDir}.`, - }, - }); - }); - - it('returns JSON errors when required ports or records are missing', async () => { - const io = makeIo(); - - await expect( - runKtxAgent({ command: 'wiki-read', projectDir: tempDir, json: true, pageId: 'missing' }, io.io, { - createRuntime: async () => - runtime({ - ports: { knowledge: { read: vi.fn(async () => null) } }, - }) as never, - }), - ).resolves.toBe(1); - - expect(JSON.parse(io.stderr())).toMatchObject({ - ok: false, - error: { message: expect.stringContaining('missing') }, - }); - }); -}); diff --git a/packages/cli/src/agent.ts b/packages/cli/src/agent.ts deleted file mode 100644 index 61d85b8c..00000000 --- a/packages/cli/src/agent.ts +++ /dev/null @@ -1,219 +0,0 @@ -import { readFile } from 'node:fs/promises'; -import type { KtxCliIo } from './cli-runtime.js'; -import { - createKtxAgentRuntime, - parseAgentMaxRows, - readAgentJsonFile, - writeAgentJson, - writeAgentJsonError, - type KtxAgentRuntime, - type KtxAgentRuntimeDeps, -} from './agent-runtime.js'; -import { - isMissingProjectConfigError, - missingConnectionSlSearchReadiness, - missingProjectSlSearchReadiness, - noConnectionsSlSearchReadiness, - noIndexedSourcesSlSearchReadiness, - type KtxAgentSlSearchReadinessDetail, -} from './agent-search-readiness.js'; -import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; -import { readKtxSetupStatus, type KtxSetupStatus } from './setup.js'; - -export type KtxAgentArgs = - | { command: 'tools'; projectDir: string; json: true } - | { command: 'context'; projectDir: string; json: true } - | { command: 'sl-list'; projectDir: string; json: true; connectionId?: string; query?: string } - | { command: 'sl-read'; projectDir: string; json: true; connectionId?: string; sourceName: string } - | { - command: 'sl-query'; - projectDir: string; - json: true; - connectionId: string; - queryFile: string; - execute: boolean; - maxRows?: number; - cliVersion: string; - runtimeInstallPolicy: KtxManagedPythonInstallPolicy; - } - | { command: 'wiki-search'; projectDir: string; json: true; query: string; limit: number } - | { command: 'wiki-read'; projectDir: string; json: true; pageId: string } - | { command: 'sql-execute'; projectDir: string; json: true; connectionId: string; sqlFile: string; maxRows?: number }; - -export interface KtxAgentDeps extends KtxAgentRuntimeDeps { - createRuntime?: (options: { - projectDir: string; - enableSemanticCompute: boolean; - enableQueryExecution: boolean; - cliVersion?: string; - runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; - io?: KtxCliIo; - }) => Promise; - readSetupStatus?: ( - projectDir: string, - ) => Promise; -} - -const AGENT_TOOLS = [ - { name: 'context', command: 'ktx agent context --json' }, - { name: 'sl.list', command: 'ktx agent sl list --json [--connection-id ] [--query ]' }, - { name: 'sl.read', command: 'ktx agent sl read --json [--connection-id ]' }, - { - name: 'sl.query', - command: 'ktx agent sl query --json --connection-id --query-file --execute --max-rows 100', - }, - { name: 'wiki.search', command: 'ktx agent wiki search --json [--limit 10]' }, - { name: 'wiki.read', command: 'ktx agent wiki read --json' }, - { - name: 'sql.execute', - command: 'ktx agent sql execute --json --connection-id --sql-file --max-rows 100', - }, -] as const; - -function writeAgentSlSearchReadinessError(io: KtxCliIo, detail: KtxAgentSlSearchReadinessDetail): void { - writeAgentJsonError(io, detail.message, { code: detail.code, nextSteps: detail.nextSteps }); -} - -async function runtimeFor(args: KtxAgentArgs, deps: KtxAgentDeps, io: KtxCliIo): Promise { - const needsSemanticCompute = args.command === 'sl-query'; - const needsQueryExecution = args.command === 'sql-execute' || (args.command === 'sl-query' && args.execute); - const runtimeOptions = { - projectDir: args.projectDir, - enableSemanticCompute: needsSemanticCompute, - enableQueryExecution: needsQueryExecution, - ...(args.command === 'sl-query' - ? { - cliVersion: args.cliVersion, - runtimeInstallPolicy: args.runtimeInstallPolicy, - io, - } - : {}), - }; - return deps.createRuntime ? deps.createRuntime(runtimeOptions) : createKtxAgentRuntime(runtimeOptions, deps); -} - -function connectionIdForSource(runtime: KtxAgentRuntime, requested: string | undefined): string { - if (requested) return requested; - const ids = Object.keys(runtime.project.config.connections ?? {}); - if (ids.length === 1) return ids[0] as string; - throw new Error('Use --connection-id when the project has zero or multiple connections.'); -} - -export async function runKtxAgent(args: KtxAgentArgs, io: KtxCliIo, deps: KtxAgentDeps = {}): Promise { - try { - if (args.command === 'tools') { - writeAgentJson(io, { projectDir: args.projectDir, tools: AGENT_TOOLS }); - return 0; - } - - const runtime = await runtimeFor(args, deps, io); - - if (args.command === 'context') { - const [status, connections, semanticLayer] = await Promise.all([ - (deps.readSetupStatus ?? readKtxSetupStatus)(args.projectDir), - runtime.ports.connections?.list() ?? [], - runtime.ports.semanticLayer?.listSources({}) ?? { sources: [], totalSources: 0 }, - ]); - writeAgentJson(io, { projectDir: args.projectDir, status, connections, semanticLayer, tools: AGENT_TOOLS }); - return 0; - } - - if (args.command === 'sl-list') { - const semanticLayer = runtime.ports.semanticLayer; - if (!semanticLayer) throw new Error('Semantic-layer tools are not available for this project.'); - if (args.query) { - const connectionIds = Object.keys(runtime.project.config.connections ?? {}); - if (args.connectionId && !runtime.project.config.connections[args.connectionId]) { - writeAgentSlSearchReadinessError( - io, - missingConnectionSlSearchReadiness(args.projectDir, args.connectionId, args.query), - ); - return 1; - } - if (connectionIds.length === 0) { - writeAgentSlSearchReadinessError(io, noConnectionsSlSearchReadiness(args.projectDir, args.query)); - return 1; - } - } - - const listed = await semanticLayer.listSources({ connectionId: args.connectionId, query: args.query }); - if (args.query && listed.sources.length === 0) { - const allSources = await semanticLayer.listSources({ connectionId: args.connectionId }); - if (allSources.totalSources === 0) { - writeAgentSlSearchReadinessError(io, noIndexedSourcesSlSearchReadiness(args.projectDir, args.query)); - return 1; - } - } - - writeAgentJson(io, listed); - return 0; - } - - if (args.command === 'sl-read') { - const semanticLayer = runtime.ports.semanticLayer; - if (!semanticLayer) throw new Error('Semantic-layer tools are not available for this project.'); - const source = await semanticLayer.readSource({ - connectionId: connectionIdForSource(runtime, args.connectionId), - sourceName: args.sourceName, - }); - if (!source) throw new Error(`Semantic-layer source "${args.sourceName}" was not found.`); - writeAgentJson(io, source); - return 0; - } - - if (args.command === 'sl-query') { - const semanticLayer = runtime.ports.semanticLayer; - if (!semanticLayer) throw new Error('Semantic-layer tools are not available for this project.'); - const query = await readAgentJsonFile(args.queryFile); - const maxRows = args.execute ? parseAgentMaxRows(args.maxRows) : args.maxRows; - writeAgentJson( - io, - await semanticLayer.query({ - connectionId: args.connectionId, - query: { ...query, ...(maxRows !== undefined ? { limit: maxRows } : {}) } as never, - }), - ); - return 0; - } - - if (args.command === 'wiki-search') { - const knowledge = runtime.ports.knowledge; - if (!knowledge) throw new Error('Wiki tools are not available for this project.'); - writeAgentJson(io, await knowledge.search({ userId: 'agent', query: args.query, limit: args.limit })); - return 0; - } - - if (args.command === 'wiki-read') { - const knowledge = runtime.ports.knowledge; - if (!knowledge) throw new Error('Wiki tools are not available for this project.'); - const page = await knowledge.read({ userId: 'agent', key: args.pageId }); - if (!page) throw new Error(`Wiki page "${args.pageId}" was not found.`); - writeAgentJson(io, page); - return 0; - } - - const queryExecutor = runtime.queryExecutor; - if (!queryExecutor) throw new Error('SQL execution is not available for this project.'); - const connection = runtime.project.config.connections[args.connectionId]; - if (!connection) throw new Error(`Connection "${args.connectionId}" was not found.`); - const maxRows = parseAgentMaxRows(args.maxRows); - writeAgentJson( - io, - await queryExecutor.execute({ - connectionId: args.connectionId, - projectDir: runtime.project.projectDir, - connection, - sql: await readFile(args.sqlFile, 'utf-8'), - maxRows, - }), - ); - return 0; - } catch (error) { - if (args.command === 'sl-list' && args.query && isMissingProjectConfigError(error)) { - writeAgentSlSearchReadinessError(io, missingProjectSlSearchReadiness(args.projectDir, args.query)); - return 1; - } - writeAgentJsonError(io, error instanceof Error ? error.message : String(error)); - return 1; - } -} diff --git a/packages/cli/src/cli-program.ts b/packages/cli/src/cli-program.ts index 682c027a..7d6a98f3 100644 --- a/packages/cli/src/cli-program.ts +++ b/packages/cli/src/cli-program.ts @@ -1,6 +1,5 @@ import { Command, InvalidArgumentError } from '@commander-js/extra-typings'; import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js'; -import { registerAgentCommands } from './commands/agent-commands.js'; import { registerConnectionCommands } from './commands/connection-commands.js'; import { registerIngestCommands } from './commands/ingest-commands.js'; import { registerWikiCommands } from './commands/knowledge-commands.js'; @@ -321,7 +320,6 @@ export function buildKtxProgram(options: BuildKtxProgramOptions): Command { registerWikiCommands(program, context); registerSlCommands(program, context); registerStatusCommands(program, context); - registerAgentCommands(program, context); registerDevCommands(program, context); return program; diff --git a/packages/cli/src/cli-runtime.ts b/packages/cli/src/cli-runtime.ts index 8fc06589..f303309a 100644 --- a/packages/cli/src/cli-runtime.ts +++ b/packages/cli/src/cli-runtime.ts @@ -2,7 +2,6 @@ import { createRequire } from 'node:module'; import type { KtxConnectionMetabaseSetupArgs } from './commands/connection-metabase-setup.js'; import type { KtxConnectionNotionArgs } from './commands/connection-notion.js'; -import type { KtxAgentArgs } from './agent.js'; import type { KtxConnectionArgs } from './connection.js'; import type { KtxDoctorArgs } from './doctor.js'; import type { KtxIngestArgs } from './ingest.js'; @@ -30,7 +29,6 @@ export interface KtxCliIo { export interface KtxCliDeps { setup?: (args: KtxSetupArgs, io: KtxCliIo) => Promise; - agent?: (args: KtxAgentArgs, io: KtxCliIo) => Promise; connection?: (args: KtxConnectionArgs, io: KtxCliIo) => Promise; connectionNotion?: (args: KtxConnectionNotionArgs, io: KtxCliIo) => Promise; connectionMetabaseSetup?: (args: KtxConnectionMetabaseSetupArgs, io: KtxCliIo) => Promise; diff --git a/packages/cli/src/command-schemas.ts b/packages/cli/src/command-schemas.ts index 9ffe6de3..cb11f2eb 100644 --- a/packages/cli/src/command-schemas.ts +++ b/packages/cli/src/command-schemas.ts @@ -53,15 +53,18 @@ export const slQueryCommandSchema = z.object({ command: z.literal('query'), projectDir: projectDirSchema, connectionId: z.string().min(1).optional(), - query: z.object({ - measures: z.array(z.string().min(1)).min(1), - dimensions: stringArraySchema, - filters: stringArraySchema.optional(), - segments: stringArraySchema.optional(), - order_by: z.array(orderBySchema).optional(), - limit: z.number().int().positive().optional(), - include_empty: z.literal(true).optional(), - }), + query: z + .object({ + measures: z.array(z.string().min(1)).min(1), + dimensions: stringArraySchema, + filters: stringArraySchema.optional(), + segments: stringArraySchema.optional(), + order_by: z.array(orderBySchema).optional(), + limit: z.number().int().positive().optional(), + include_empty: z.literal(true).optional(), + }) + .optional(), + queryFile: z.string().min(1).optional(), format: z.enum(['json', 'sql']), execute: z.boolean(), cliVersion: z.string().min(1), diff --git a/packages/cli/src/commands/agent-commands.ts b/packages/cli/src/commands/agent-commands.ts deleted file mode 100644 index 2593991a..00000000 --- a/packages/cli/src/commands/agent-commands.ts +++ /dev/null @@ -1,149 +0,0 @@ -import { Option, type Command } from '@commander-js/extra-typings'; -import type { KtxAgentArgs } from '../agent.js'; -import type { KtxCliCommandContext } from '../cli-program.js'; -import { parsePositiveIntegerOption, resolveCommandProjectDir } from '../cli-program.js'; -import { runtimeInstallPolicyFromFlags } from '../managed-python-command.js'; - -async function runAgent(context: KtxCliCommandContext, args: KtxAgentArgs): Promise { - const runner = context.deps.agent ?? (await import('../agent.js')).runKtxAgent; - context.setExitCode(await runner(args, context.io)); -} - -function jsonOption(): Option { - return new Option('--json', 'Print JSON output').makeOptionMandatory(); -} - -export function registerAgentCommands(program: Command, context: KtxCliCommandContext): void { - const agent = program - .command('agent', { hidden: true }) - .description('Machine-readable KTX commands for coding agents') - .showHelpAfterError(); - - agent.hook('preAction', (_thisCommand, actionCommand) => { - context.writeDebug?.('agent', actionCommand); - }); - - agent - .command('tools') - .description('Print available agent-facing KTX tools') - .addOption(jsonOption()) - .action(async (_options, command) => { - await runAgent(context, { command: 'tools', projectDir: resolveCommandProjectDir(command), json: true }); - }); - - agent - .command('context') - .description('Print project context for agent planning') - .addOption(jsonOption()) - .action(async (_options, command) => { - await runAgent(context, { command: 'context', projectDir: resolveCommandProjectDir(command), json: true }); - }); - - const sl = agent.command('sl').description('Semantic-layer agent commands'); - sl.command('list') - .description('List semantic-layer sources') - .addOption(jsonOption()) - .option('--connection-id ', 'Filter by connection id') - .option('--query ', 'Search source names and descriptions') - .action(async (options: { connectionId?: string; query?: string }, command) => { - await runAgent(context, { - command: 'sl-list', - projectDir: resolveCommandProjectDir(command), - json: true, - ...(options.connectionId ? { connectionId: options.connectionId } : {}), - ...(options.query ? { query: options.query } : {}), - }); - }); - sl.command('read') - .description('Read one semantic-layer source') - .argument('') - .addOption(jsonOption()) - .option('--connection-id ', 'Connection id containing the source') - .action(async (sourceName: string, options: { connectionId?: string }, command) => { - await runAgent(context, { - command: 'sl-read', - projectDir: resolveCommandProjectDir(command), - json: true, - sourceName, - ...(options.connectionId ? { connectionId: options.connectionId } : {}), - }); - }); - sl.command('query') - .description('Run a semantic-layer query JSON file') - .addOption(jsonOption()) - .requiredOption('--connection-id ', 'Connection id for execution') - .requiredOption('--query-file ', 'JSON semantic-layer query file') - .option('--execute', 'Execute the compiled query against the connection', false) - .option('--yes', 'Install the managed Python runtime without prompting when required', false) - .option('--no-input', 'Disable interactive managed runtime installation') - .option('--max-rows ', 'Maximum rows to return when executing', parsePositiveIntegerOption) - .action( - async ( - options: { - connectionId: string; - queryFile: string; - execute: boolean; - maxRows?: number; - yes?: boolean; - input?: boolean; - }, - command, - ) => { - await runAgent(context, { - command: 'sl-query', - projectDir: resolveCommandProjectDir(command), - json: true, - connectionId: options.connectionId, - queryFile: options.queryFile, - execute: options.execute, - cliVersion: context.packageInfo.version, - runtimeInstallPolicy: runtimeInstallPolicyFromFlags(options), - ...(options.maxRows !== undefined ? { maxRows: options.maxRows } : {}), - }); - }, - ); - - const wiki = agent.command('wiki').description('KTX wiki agent commands'); - wiki - .command('search') - .description('Search KTX wiki pages') - .argument('') - .addOption(jsonOption()) - .option('--limit ', 'Maximum search results', parsePositiveIntegerOption, 10) - .action(async (query: string, options: { limit: number }, command) => { - await runAgent(context, { - command: 'wiki-search', - projectDir: resolveCommandProjectDir(command), - json: true, - query, - limit: options.limit, - }); - }); - wiki - .command('read') - .description('Read one KTX wiki page') - .argument('') - .addOption(jsonOption()) - .action(async (pageId: string, _options, command) => { - await runAgent(context, { command: 'wiki-read', projectDir: resolveCommandProjectDir(command), json: true, pageId }); - }); - - const sql = agent.command('sql').description('Safe SQL execution commands'); - sql - .command('execute') - .description('Execute read-only SQL with a row limit') - .addOption(jsonOption()) - .requiredOption('--connection-id ', 'Connection id for execution') - .requiredOption('--sql-file ', 'SQL file to execute') - .requiredOption('--max-rows ', 'Maximum rows to return', parsePositiveIntegerOption) - .action(async (options: { connectionId: string; sqlFile: string; maxRows: number }, command) => { - await runAgent(context, { - command: 'sql-execute', - projectDir: resolveCommandProjectDir(command), - json: true, - connectionId: options.connectionId, - sqlFile: options.sqlFile, - maxRows: options.maxRows, - }); - }); -} diff --git a/packages/cli/src/commands/knowledge-commands.ts b/packages/cli/src/commands/knowledge-commands.ts index c85a118c..f8d716f7 100644 --- a/packages/cli/src/commands/knowledge-commands.ts +++ b/packages/cli/src/commands/knowledge-commands.ts @@ -1,5 +1,10 @@ import { type Command, Option } from '@commander-js/extra-typings'; -import { collectOption, type KtxCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; +import { + collectOption, + type KtxCliCommandContext, + parsePositiveIntegerOption, + resolveCommandProjectDir, +} from '../cli-program.js'; import { wikiWriteCommandSchema } from '../command-schemas.js'; import type { KtxKnowledgeArgs } from '../knowledge.js'; import { profileMark } from '../startup-profile.js'; @@ -24,12 +29,14 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon wiki .command('list') .description('List local wiki pages') + .option('--json', 'Print JSON output', false) .option('--user-id ', 'Local user id', 'local') - .action(async (options: { userId: string }, command) => { + .action(async (options: { userId: string; json?: boolean }, command) => { await runKnowledgeArgs(context, { command: 'list', projectDir: resolveCommandProjectDir(command), userId: options.userId, + json: options.json, }); }); @@ -37,13 +44,15 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon .command('read') .description('Read one local wiki page') .argument('', 'Wiki page key') + .option('--json', 'Print JSON output', false) .option('--user-id ', 'Local user id', 'local') - .action(async (key: string, options: { userId: string }, command) => { + .action(async (key: string, options: { userId: string; json?: boolean }, command) => { await runKnowledgeArgs(context, { command: 'read', projectDir: resolveCommandProjectDir(command), key, userId: options.userId, + json: options.json, }); }); @@ -51,13 +60,17 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon .command('search') .description('Search local wiki pages') .argument('', 'Search query') + .option('--json', 'Print JSON output', false) .option('--user-id ', 'Local user id', 'local') - .action(async (query: string, options: { userId: string }, command) => { + .option('--limit ', 'Maximum search results', parsePositiveIntegerOption) + .action(async (query: string, options: { userId: string; json?: boolean; limit?: number }, command) => { await runKnowledgeArgs(context, { command: 'search', projectDir: resolveCommandProjectDir(command), query, userId: options.userId, + json: options.json, + ...(options.limit !== undefined ? { limit: options.limit } : {}), }); }); diff --git a/packages/cli/src/commands/sl-commands.ts b/packages/cli/src/commands/sl-commands.ts index 36d75fac..e1b985a3 100644 --- a/packages/cli/src/commands/sl-commands.ts +++ b/packages/cli/src/commands/sl-commands.ts @@ -51,6 +51,7 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte sl.command('list') .description('List semantic-layer sources') .option('--connection-id ', 'KTX connection id') + .option('--query ', 'Search source names and descriptions') .addOption( new Option('--output ', 'Output mode: pretty (default in TTY), plain (TSV), or json').choices([ 'pretty', @@ -59,26 +60,34 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte ]), ) .option('--json', 'Shortcut for --output=json (overrides --output)', false) - .action(async (options: { connectionId?: string; output?: 'pretty' | 'plain' | 'json'; json?: boolean }, command) => { + .action( + async ( + options: { connectionId?: string; query?: string; output?: 'pretty' | 'plain' | 'json'; json?: boolean }, + command, + ) => { await runSlArgs(context, { command: 'list', projectDir: resolveCommandProjectDir(command), connectionId: options.connectionId, + query: options.query, output: options.output, json: options.json, }); - }); + }, + ); sl.command('read') .description('Read a semantic-layer source') .argument('', 'Semantic-layer source name') .requiredOption('--connection-id ', 'KTX connection id') - .action(async (sourceName: string, options: { connectionId: string }, command) => { + .option('--json', 'Print JSON output', false) + .action(async (sourceName: string, options: { connectionId: string; json?: boolean }, command) => { await runSlArgs(context, { command: 'read', projectDir: resolveCommandProjectDir(command), connectionId: options.connectionId, sourceName, + json: options.json, }); }); @@ -113,6 +122,7 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte sl.command('query') .description('Compile or execute a semantic-layer query') .option('--connection-id ', 'KTX connection id') + .option('--query-file ', 'JSON semantic-layer query file') .option('--measure ', 'Measure to query; repeatable', collectOption, []) .option('--dimension ', 'Dimension to include; repeatable', collectOption, []) .option('--filter ', 'Filter expression; repeatable', collectOption, []) @@ -126,22 +136,26 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte .option('--no-input', 'Disable interactive managed runtime installation') .option('--max-rows ', 'Maximum rows to return when executing', parsePositiveIntegerOption) .action(async (options, command) => { - if (options.measure.length === 0) { + if (options.measure.length === 0 && !options.queryFile) { throw new Error('sl query requires at least one --measure'); } const args = slQueryCommandSchema.parse({ command: 'query', projectDir: resolveCommandProjectDir(command), connectionId: options.connectionId, - query: { - measures: options.measure, - dimensions: options.dimension, - ...(options.filter.length > 0 ? { filters: options.filter } : {}), - ...(options.segment.length > 0 ? { segments: options.segment } : {}), - ...(options.orderBy.length > 0 ? { order_by: options.orderBy } : {}), - ...(options.limit !== undefined ? { limit: options.limit } : {}), - ...(options.includeEmpty === true ? { include_empty: true } : {}), - }, + ...(options.queryFile + ? { queryFile: options.queryFile } + : { + query: { + measures: options.measure, + dimensions: options.dimension, + ...(options.filter.length > 0 ? { filters: options.filter } : {}), + ...(options.segment.length > 0 ? { segments: options.segment } : {}), + ...(options.orderBy.length > 0 ? { order_by: options.orderBy } : {}), + ...(options.limit !== undefined ? { limit: options.limit } : {}), + ...(options.includeEmpty === true ? { include_empty: true } : {}), + }, + }), format: options.format, execute: options.execute === true, cliVersion: context.packageInfo.version, diff --git a/packages/cli/src/example-smoke.test.ts b/packages/cli/src/example-smoke.test.ts index f5b70bfc..221c20f2 100644 --- a/packages/cli/src/example-smoke.test.ts +++ b/packages/cli/src/example-smoke.test.ts @@ -73,26 +73,27 @@ describe('standalone local warehouse example', () => { const projectDir = await copyExampleProject(tempDir); const sourceDir = join(projectDir, 'source'); - const knowledgeList = await runBuiltCli(['agent', 'wiki', 'search', 'revenue', '--json', '--project-dir', projectDir]); + const knowledgeList = await runBuiltCli(['wiki', 'search', 'revenue', '--json', '--project-dir', projectDir]); expect(knowledgeList).toMatchObject({ code: 0, stderr: '' }); - expect(parseJsonOutput<{ results: Array<{ key: string; summary: string }> }>(knowledgeList.stdout).results).toContainEqual( - expect.objectContaining({ key: 'revenue', summary: 'Paid order value after refunds' }), - ); + expect( + parseJsonOutput<{ data: { items: Array<{ key: string; summary: string }> } }>(knowledgeList.stdout).data.items, + ).toContainEqual(expect.objectContaining({ key: 'revenue', summary: 'Paid order value after refunds' })); - const knowledgeRead = await runBuiltCli(['agent', 'wiki', 'read', 'revenue', '--json', '--project-dir', projectDir]); + const knowledgeRead = await runBuiltCli(['wiki', 'read', 'revenue', '--json', '--project-dir', projectDir]); expect(knowledgeRead).toMatchObject({ code: 0, stderr: '' }); - expect(parseJsonOutput<{ content: string }>(knowledgeRead.stdout).content).toContain( + expect(parseJsonOutput<{ data: { content: string } }>(knowledgeRead.stdout).data.content).toContain( 'Revenue is paid order amount after refund adjustments.', ); - const slList = await runBuiltCli(['agent', 'sl', 'list', '--json', '--project-dir', projectDir, '--connection-id', 'warehouse']); + const slList = await runBuiltCli(['sl', 'list', '--json', '--project-dir', projectDir, '--connection-id', 'warehouse']); expect(slList).toMatchObject({ code: 0, stderr: '' }); - expect(parseJsonOutput<{ sources: Array<{ connectionId: string; name: string; columnCount: number }> }>(slList.stdout).sources).toContainEqual( - expect.objectContaining({ connectionId: 'warehouse', name: 'orders', columnCount: 3 }), - ); + expect( + parseJsonOutput<{ data: { items: Array<{ connectionId: string; name: string; columnCount: number }> } }>( + slList.stdout, + ).data.items, + ).toContainEqual(expect.objectContaining({ connectionId: 'warehouse', name: 'orders', columnCount: 3 })); const slRead = await runBuiltCli([ - 'agent', 'sl', 'read', 'orders', @@ -103,7 +104,7 @@ describe('standalone local warehouse example', () => { projectDir, ]); expect(slRead).toMatchObject({ code: 0, stderr: '' }); - expect(parseJsonOutput<{ yaml: string }>(slRead.stdout).yaml).toContain('name: orders'); + expect(parseJsonOutput<{ data: { yaml: string } }>(slRead.stdout).data.yaml).toContain('name: orders'); const ingest = await runBuiltCli([ 'ingest', diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index b79a0bb3..7887e552 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -1141,136 +1141,28 @@ describe('runKtxCli', () => { expect(setupIo.stderr()).toContain('Choose only one Historic SQL action'); }); - it('registers hidden agent help and tools discovery without showing agent in root help', async () => { - const helpIo = makeIo(); - const toolsIo = makeIo(); - const agent = vi.fn(async () => 0); + it('rejects the removed hidden agent command', async () => { + const io = makeIo(); - await expect(runKtxCli(['agent', '--help'], helpIo.io, { agent })).resolves.toBe(0); - await expect( - runKtxCli(['--project-dir', tempDir, 'agent', 'tools', '--json'], toolsIo.io, { agent }), - ).resolves.toBe(0); + await expect(runKtxCli(['agent'], io.io)).resolves.toBe(1); - expect(helpIo.stdout()).toContain('Usage: ktx agent'); - expect(toolsIo.stderr()).toBe(''); - expect(agent).toHaveBeenCalledWith({ command: 'tools', projectDir: tempDir, json: true }, toolsIo.io); + expect(io.stderr()).toContain("unknown command 'agent'"); + expect(io.stdout()).toBe(''); }); - it('dispatches full hidden agent commands without exposing agent in root help', async () => { - const agent = vi.fn(async () => 0); - const cases = [ - { - argv: ['--project-dir', tempDir, 'agent', 'context', '--json'], - args: { command: 'context', projectDir: tempDir, json: true }, - }, - { - argv: [ - '--project-dir', - tempDir, - 'agent', - 'sl', - 'list', - '--json', - '--connection-id', - 'warehouse', - '--query', - 'orders', - ], - args: { command: 'sl-list', projectDir: tempDir, json: true, connectionId: 'warehouse', query: 'orders' }, - }, - { - argv: ['--project-dir', tempDir, 'agent', 'sl', 'read', 'orders', '--json', '--connection-id', 'warehouse'], - args: { command: 'sl-read', projectDir: tempDir, json: true, sourceName: 'orders', connectionId: 'warehouse' }, - }, - { - argv: [ - '--project-dir', - tempDir, - 'agent', - 'sl', - 'query', - '--json', - '--connection-id', - 'warehouse', - '--query-file', - '/tmp/query.json', - '--execute', - '--max-rows', - '100', - ], - args: { - command: 'sl-query', - projectDir: tempDir, - json: true, - connectionId: 'warehouse', - queryFile: '/tmp/query.json', - execute: true, - maxRows: 100, - cliVersion: '0.0.0-private', - runtimeInstallPolicy: 'prompt', - }, - }, - { - argv: ['--project-dir', tempDir, 'agent', 'wiki', 'search', 'revenue', '--json', '--limit', '5'], - args: { command: 'wiki-search', projectDir: tempDir, json: true, query: 'revenue', limit: 5 }, - }, - { - argv: ['--project-dir', tempDir, 'agent', 'wiki', 'read', 'page-1', '--json'], - args: { command: 'wiki-read', projectDir: tempDir, json: true, pageId: 'page-1' }, - }, - { - argv: [ - '--project-dir', - tempDir, - 'agent', - 'sql', - 'execute', - '--json', - '--connection-id', - 'warehouse', - '--sql-file', - '/tmp/query.sql', - '--max-rows', - '100', - ], - args: { - command: 'sql-execute', - projectDir: tempDir, - json: true, - connectionId: 'warehouse', - sqlFile: '/tmp/query.sql', - maxRows: 100, - }, - }, - ]; - - for (const entry of cases) { - const io = makeIo(); - await expect(runKtxCli(entry.argv, io.io, { agent })).resolves.toBe(0); - expect(agent).toHaveBeenLastCalledWith(entry.args, io.io); - expect(io.stderr()).toBe(''); - } - - const helpIo = makeIo(); - await expect(runKtxCli(['--help'], helpIo.io, { agent })).resolves.toBe(0); - expect(helpIo.stdout()).not.toContain('agent '); - }); - - it('routes hidden agent SL query managed runtime policies', async () => { + it('routes public SL query files with managed runtime policies', async () => { const autoIo = makeIo(); const neverIo = makeIo(); const conflictIo = makeIo(); - const agent = vi.fn(async () => 0); + const sl = vi.fn(async () => 0); await expect( runKtxCli( [ '--project-dir', tempDir, - 'agent', 'sl', 'query', - '--json', '--connection-id', 'warehouse', '--query-file', @@ -1278,7 +1170,7 @@ describe('runKtxCli', () => { '--yes', ], autoIo.io, - { agent }, + { sl }, ), ).resolves.toBe(0); @@ -1287,10 +1179,8 @@ describe('runKtxCli', () => { [ '--project-dir', tempDir, - 'agent', 'sl', 'query', - '--json', '--connection-id', 'warehouse', '--query-file', @@ -1298,7 +1188,7 @@ describe('runKtxCli', () => { '--no-input', ], neverIo.io, - { agent }, + { sl }, ), ).resolves.toBe(0); @@ -1307,10 +1197,8 @@ describe('runKtxCli', () => { [ '--project-dir', tempDir, - 'agent', 'sl', 'query', - '--json', '--connection-id', 'warehouse', '--query-file', @@ -1319,33 +1207,33 @@ describe('runKtxCli', () => { '--no-input', ], conflictIo.io, - { agent }, + { sl }, ), ).resolves.toBe(1); - expect(agent).toHaveBeenNthCalledWith( + expect(sl).toHaveBeenNthCalledWith( 1, { - command: 'sl-query', + command: 'query', projectDir: tempDir, - json: true, connectionId: 'warehouse', queryFile: '/tmp/query.json', execute: false, + format: 'json', cliVersion: '0.0.0-private', runtimeInstallPolicy: 'auto', }, autoIo.io, ); - expect(agent).toHaveBeenNthCalledWith( + expect(sl).toHaveBeenNthCalledWith( 2, { - command: 'sl-query', + command: 'query', projectDir: tempDir, - json: true, connectionId: 'warehouse', queryFile: '/tmp/query.json', execute: false, + format: 'json', cliVersion: '0.0.0-private', runtimeInstallPolicy: 'never', }, @@ -1354,112 +1242,6 @@ describe('runKtxCli', () => { expect(conflictIo.stderr()).toContain('Choose only one runtime install mode: --yes or --no-input'); }); - it('prints semantic-layer hybrid search metadata from the hidden agent sl list command', async () => { - const agent = vi.fn(async (args, io) => { - expect(args).toEqual({ - command: 'sl-list', - projectDir: tempDir, - json: true, - connectionId: 'warehouse', - query: 'paid', - }); - io.stdout.write( - `${JSON.stringify( - { - sources: [ - { - connectionId: 'warehouse', - connectionName: 'warehouse', - name: 'orders', - columnCount: 2, - measureCount: 1, - joinCount: 0, - score: 0.03278688524590164, - matchReasons: ['dictionary'], - dictionaryMatches: [{ column: 'status', values: ['paid'] }], - }, - ], - totalSources: 1, - }, - null, - 2, - )}\n`, - ); - return 0; - }); - const io = makeIo(); - - await expect( - runKtxCli( - ['--project-dir', tempDir, 'agent', 'sl', 'list', '--json', '--connection-id', 'warehouse', '--query', 'paid'], - io.io, - { agent }, - ), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toEqual({ - sources: [ - expect.objectContaining({ - connectionId: 'warehouse', - name: 'orders', - matchReasons: ['dictionary'], - dictionaryMatches: [{ column: 'status', values: ['paid'] }], - }), - ], - totalSources: 1, - }); - }); - - it('prints wiki hybrid search metadata from the hidden agent wiki search command', async () => { - const agent = vi.fn(async (args, io) => { - expect(args).toEqual({ - command: 'wiki-search', - projectDir: tempDir, - json: true, - query: 'paid order', - limit: 5, - }); - io.stdout.write( - `${JSON.stringify( - { - results: [ - { - key: 'metrics-revenue', - path: 'knowledge/global/metrics-revenue.md', - scope: 'GLOBAL', - summary: 'Revenue metric definition', - score: 0.02459016393442623, - matchReasons: ['lexical', 'token'], - }, - ], - totalFound: 1, - }, - null, - 2, - )}\n`, - ); - return 0; - }); - const io = makeIo(); - - await expect( - runKtxCli(['--project-dir', tempDir, 'agent', 'wiki', 'search', 'paid order', '--json', '--limit', '5'], io.io, { - agent, - }), - ).resolves.toBe(0); - - expect(JSON.parse(io.stdout())).toEqual({ - results: [ - expect.objectContaining({ - key: 'metrics-revenue', - path: 'knowledge/global/metrics-revenue.md', - matchReasons: ['lexical', 'token'], - }), - ], - totalFound: 1, - }); - }); - it('dispatches public connection subcommands through the existing connection implementation', async () => { const tempDir = await mkdtemp(join(tmpdir(), 'ktx-connection-dispatch-')); const connection = vi.fn(async () => 0); diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index de906ece..2cf9d5b2 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -9,17 +9,6 @@ export { type KtxCliIo, type KtxCliPackageInfo, } from './cli-runtime.js'; -export { runKtxAgent, type KtxAgentArgs } from './agent.js'; -export { - KTX_AGENT_MAX_ROWS_CAP, - createKtxAgentRuntime, - parseAgentMaxRows, - readAgentJsonFile, - writeAgentJson, - writeAgentJsonError, - type KtxAgentRuntime, - type KtxAgentRuntimeDeps, -} from './agent-runtime.js'; export { runKtxSetup, type KtxSetupArgs, type KtxSetupStatus } from './setup.js'; export type { KtxSetupDatabaseDriver, diff --git a/packages/cli/src/knowledge.test.ts b/packages/cli/src/knowledge.test.ts index d7e17605..db794289 100644 --- a/packages/cli/src/knowledge.test.ts +++ b/packages/cli/src/knowledge.test.ts @@ -93,6 +93,65 @@ describe('runKtxKnowledge', () => { expect(searchIo.stdout()).toContain('metrics-revenue'); }); + it('prints wiki list, search, and read as public JSON envelopes', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + + await expect( + runKtxKnowledge( + { + command: 'write', + projectDir, + key: 'metrics-revenue', + scope: 'GLOBAL', + userId: 'local', + summary: 'Revenue', + content: 'Revenue is paid order value.', + tags: ['finance'], + refs: [], + slRefs: ['orders'], + }, + makeIo().io, + ), + ).resolves.toBe(0); + + const listIo = makeIo(); + await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local', json: true }, listIo.io)).resolves.toBe( + 0, + ); + expect(JSON.parse(listIo.stdout())).toMatchObject({ + kind: 'list', + data: { items: [expect.objectContaining({ key: 'metrics-revenue', summary: 'Revenue' })] }, + meta: { command: 'wiki list' }, + }); + + const searchIo = makeIo(); + await expect( + runKtxKnowledge( + { command: 'search', projectDir, query: 'paid order', userId: 'local', json: true, limit: 5 }, + searchIo.io, + ), + ).resolves.toBe(0); + expect(JSON.parse(searchIo.stdout())).toMatchObject({ + kind: 'list', + data: { items: [expect.objectContaining({ key: 'metrics-revenue', summary: 'Revenue' })] }, + meta: { command: 'wiki search' }, + }); + + const readIo = makeIo(); + await expect( + runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local', json: true }, readIo.io), + ).resolves.toBe(0); + expect(JSON.parse(readIo.stdout())).toMatchObject({ + kind: 'wiki.page', + data: { + key: 'metrics-revenue', + summary: 'Revenue', + content: 'Revenue is paid order value.', + }, + }); + }); + it('rejects slash-delimited write keys with a flat-key suggestion', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); diff --git a/packages/cli/src/knowledge.ts b/packages/cli/src/knowledge.ts index 40cc5372..5c5df1ea 100644 --- a/packages/cli/src/knowledge.ts +++ b/packages/cli/src/knowledge.ts @@ -11,11 +11,12 @@ import { searchLocalKnowledgePages, writeLocalKnowledgePage, } from '@ktx/context/wiki'; +import { writeJsonResult } from './io/print-list.js'; export type KtxKnowledgeArgs = - | { command: 'list'; projectDir: string; userId: string } - | { command: 'read'; projectDir: string; key: string; userId: string } - | { command: 'search'; projectDir: string; query: string; userId: string } + | { command: 'list'; projectDir: string; userId: string; json?: boolean } + | { command: 'read'; projectDir: string; key: string; userId: string; json?: boolean } + | { command: 'search'; projectDir: string; query: string; userId: string; json?: boolean; limit?: number } | { command: 'write'; projectDir: string; @@ -61,6 +62,14 @@ export async function runKtxKnowledge( const project = await loadKtxProject({ projectDir: args.projectDir }); if (args.command === 'list') { const pages = await listLocalKnowledgePages(project, { userId: args.userId }); + if (args.json) { + writeJsonResult(io, { + kind: 'list', + data: { items: pages }, + meta: { command: 'wiki list' }, + }); + return 0; + } for (const page of pages) { io.stdout.write(`${page.scope}\t${page.key}\t${page.summary}\n`); } @@ -71,6 +80,14 @@ export async function runKtxKnowledge( if (!page) { throw new Error(`Knowledge page "${args.key}" was not found`); } + if (args.json) { + writeJsonResult(io, { + kind: 'wiki.page', + data: page, + meta: { command: 'wiki read' }, + }); + return 0; + } io.stdout.write(`# ${page.key}\n\n`); io.stdout.write(`Scope: ${page.scope}\n`); io.stdout.write(`Summary: ${page.summary}\n\n`); @@ -82,7 +99,16 @@ export async function runKtxKnowledge( query: args.query, userId: args.userId, embeddingService: wikiSearchEmbeddingService(project, deps), + limit: args.limit, }); + if (args.json) { + writeJsonResult(io, { + kind: 'list', + data: { items: results }, + meta: { command: 'wiki search' }, + }); + return 0; + } if (results.length === 0) { const pages = await listLocalKnowledgePages(project, { userId: args.userId }); if (pages.length === 0) { diff --git a/packages/cli/src/next-steps.test.ts b/packages/cli/src/next-steps.test.ts index fd8d8216..b4706d72 100644 --- a/packages/cli/src/next-steps.test.ts +++ b/packages/cli/src/next-steps.test.ts @@ -25,12 +25,8 @@ describe('KTX demo next steps', () => { it('uses supported final public commands', () => { expect(KTX_NEXT_STEP_COMMANDS).toEqual([ { - command: 'ktx agent context --json', - description: 'Verify the project context your agent can read', - }, - { - command: 'ktx agent tools --json', - description: 'List direct CLI tools available to agents', + command: 'ktx status --json', + description: 'Verify project setup and context readiness', }, { command: 'ktx sl list', @@ -46,8 +42,8 @@ describe('KTX demo next steps', () => { it('uses only the direct CLI route for agent verification', () => { const commands = KTX_NEXT_STEP_COMMANDS.map((step) => step.command); - expect(commands).toContain('ktx agent context --json'); - expect(commands).toContain('ktx agent tools --json'); + expect(commands).not.toContain('ktx agent context --json'); + expect(commands).toContain('ktx status --json'); expect(commands).not.toContain('ktx serve --mcp stdio --user-id local'); }); @@ -64,8 +60,8 @@ describe('KTX demo next steps', () => { it('does not advertise removed Commander migration commands', () => { const rendered = formatNextStepLines().join('\n'); - expect(rendered).toContain('ktx agent tools --json'); - expect(rendered).toContain('ktx agent context --json'); + expect(rendered).toContain('ktx status --json'); + expect(rendered).not.toContain('ktx agent'); expect(rendered).toContain('ktx sl list'); expect(rendered).toContain('ktx wiki list'); @@ -109,7 +105,8 @@ describe('KTX demo next steps', () => { }).join('\n'); expect(rendered).toContain('KTX context is ready for agents.'); - expect(rendered).toContain('ktx agent context --json'); + expect(rendered).toContain('ktx status --json'); + expect(rendered).not.toContain('ktx agent'); expect(rendered).not.toContain('ktx serve --mcp stdio --user-id local'); expect(rendered).not.toContain('Build KTX context next.'); }); diff --git a/packages/cli/src/next-steps.ts b/packages/cli/src/next-steps.ts index db85da66..ee7535d7 100644 --- a/packages/cli/src/next-steps.ts +++ b/packages/cli/src/next-steps.ts @@ -11,12 +11,8 @@ export const KTX_CONTEXT_BUILD_COMMANDS = [ export const KTX_NEXT_STEP_DIRECT_COMMANDS = [ { - command: 'ktx agent context --json', - description: 'Verify the project context your agent can read', - }, - { - command: 'ktx agent tools --json', - description: 'List direct CLI tools available to agents', + command: 'ktx status --json', + description: 'Verify project setup and context readiness', }, { command: 'ktx sl list', diff --git a/packages/cli/src/project-dir.test.ts b/packages/cli/src/project-dir.test.ts index c59172a6..c0022d4d 100644 --- a/packages/cli/src/project-dir.test.ts +++ b/packages/cli/src/project-dir.test.ts @@ -35,8 +35,7 @@ describe('project directory defaults', () => { const ingest = vi.fn(async () => 0); const scan = vi.fn(async () => 0); const setup = vi.fn(async () => 0); - const agent = vi.fn(async () => 0); - const deps: KtxCliDeps = { agent, connection, doctor, ingest, scan, setup }; + const deps: KtxCliDeps = { connection, doctor, ingest, scan, setup }; const cases: Array<{ argv: string[]; @@ -74,12 +73,6 @@ describe('project directory defaults', () => { expected: { command: 'run', projectDir: '/tmp/ktx-env-project', connectionId: 'warehouse' }, expectedStderr: 'Project: /tmp/ktx-env-project\n', }, - { - argv: ['agent', 'tools', '--json'], - spy: agent, - expected: { command: 'tools', projectDir: '/tmp/ktx-env-project' }, - expectedStderr: '', - }, ]; for (const item of cases) { diff --git a/packages/cli/src/setup-agents.test.ts b/packages/cli/src/setup-agents.test.ts index 322db2aa..19647a3f 100644 --- a/packages/cli/src/setup-agents.test.ts +++ b/packages/cli/src/setup-agents.test.ts @@ -84,7 +84,10 @@ describe('setup agents', () => { const skill = await readFile(join(tempDir, '.agents/skills/ktx/SKILL.md'), 'utf-8'); expect(skill).toContain(`--project-dir ${tempDir}`); expect(skill).toContain('must not print secrets'); - expect(skill).toContain('agent sql execute'); + expect(skill).toContain('status --json'); + expect(skill).toContain('sl list --json'); + expect(skill).not.toContain('agent '); + expect(skill).not.toContain('sql execute'); expect(await readKtxAgentInstallManifest(tempDir)).toMatchObject({ version: 1, projectDir: tempDir, @@ -115,8 +118,9 @@ describe('setup agents', () => { const skill = await readFile(join(tempDir, '.agents/skills/ktx/SKILL.md'), 'utf-8'); expect(skill).not.toContain('`ktx agent'); - expect(skill).toContain('agent context --json'); - expect(skill).toContain('agent sql execute'); + expect(skill).toContain('status --json'); + expect(skill).toContain('sl query'); + expect(skill).not.toContain('sql execute'); }); it('removes only manifest-listed files', async () => { diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts index 151967aa..b4202ed6 100644 --- a/packages/cli/src/setup-agents.ts +++ b/packages/cli/src/setup-agents.ts @@ -124,7 +124,7 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun return [ '---', 'name: ktx', - 'description: Use local KTX semantic context, wiki knowledge, and safe SQL execution for this project.', + 'description: Use local KTX semantic context and wiki knowledge for this project.', '---', '', '# KTX Local Context', @@ -137,11 +137,11 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun '', 'Available commands:', '', - `- \`${ktxCommandLine(input.launcher, ['agent', 'context', ...projectDirArgs])}\``, - `- \`${ktxCommandLine(input.launcher, ['agent', 'sl', 'list', ...projectDirArgs])}\``, - `- \`${ktxCommandLine(input.launcher, ['agent', 'sl', 'read', '', ...projectDirArgs])}\``, + `- \`${ktxCommandLine(input.launcher, ['status', ...projectDirArgs])}\``, + `- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...projectDirArgs])}\``, + `- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...projectDirArgs, '--query', ''])}\``, + `- \`${ktxCommandLine(input.launcher, ['sl', 'read', '', ...projectDirArgs, '--connection-id', ''])}\``, `- \`${ktxCommandLine(input.launcher, [ - 'agent', 'sl', 'query', ...projectDirArgs, @@ -153,29 +153,17 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun '--max-rows', '100', ])}\``, - `- \`${ktxCommandLine(input.launcher, ['agent', 'wiki', 'search', '', ...projectDirArgs])}\``, - `- \`${ktxCommandLine(input.launcher, ['agent', 'wiki', 'read', '', ...projectDirArgs])}\``, - `- \`${ktxCommandLine(input.launcher, [ - 'agent', - 'sql', - 'execute', - ...projectDirArgs, - '--connection-id', - '', - '--sql-file', - '', - '--max-rows', - '100', - ])}\``, + `- \`${ktxCommandLine(input.launcher, ['wiki', 'search', '', ...projectDirArgs, '--limit', '10'])}\``, + `- \`${ktxCommandLine(input.launcher, ['wiki', 'read', '', ...projectDirArgs])}\``, '', - 'SQL execution is read-only, requires an explicit row limit, and should use the smallest useful limit.', + 'Use semantic-layer queries before direct database access. Do not print secrets or credential references.', '', ].join('\n'); } function ruleInstructionContent(input: { projectDir: string }): string { return [ - `Use the \`ktx\` CLI to query local semantic context, wiki knowledge, and execute safe SQL for this project (\`--project-dir ${input.projectDir}\`).`, + `Use the \`ktx\` CLI to query local semantic context and wiki knowledge for this project (\`--project-dir ${input.projectDir}\`).`, '', 'Use when the user asks about data schemas, metrics, dimensions, database structure, or wants to run SQL queries.', '', diff --git a/packages/cli/src/sl.test.ts b/packages/cli/src/sl.test.ts index c04ec15d..8d360c58 100644 --- a/packages/cli/src/sl.test.ts +++ b/packages/cli/src/sl.test.ts @@ -84,6 +84,71 @@ describe('runKtxSl', () => { expect(listIo.stdout()).toContain('warehouse\torders\tcolumns=1\tmeasures=0\tjoins=0'); }); + it('prints semantic-layer reads and searched lists as public JSON envelopes', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + + await expect( + runKtxSl( + { + command: 'write', + projectDir, + connectionId: 'warehouse', + sourceName: 'orders', + yaml: [ + 'name: orders', + 'table: public.orders', + 'description: Paid order facts', + 'grain: [order_id]', + 'columns:', + ' - name: order_id', + ' type: string', + '', + ].join('\n'), + }, + makeIo().io, + ), + ).resolves.toBe(0); + + const readIo = makeIo(); + await expect( + runKtxSl( + { command: 'read', projectDir, connectionId: 'warehouse', sourceName: 'orders', json: true }, + readIo.io, + ), + ).resolves.toBe(0); + expect(JSON.parse(readIo.stdout())).toMatchObject({ + kind: 'sl.source', + data: { + connectionId: 'warehouse', + name: 'orders', + yaml: expect.stringContaining('name: orders'), + }, + }); + + const listIo = makeIo(); + await expect( + runKtxSl( + { command: 'list', projectDir, connectionId: 'warehouse', query: 'paid', json: true }, + listIo.io, + ), + ).resolves.toBe(0); + expect(JSON.parse(listIo.stdout())).toMatchObject({ + kind: 'list', + data: { + items: [ + expect.objectContaining({ + connectionId: 'warehouse', + name: 'orders', + score: expect.any(Number), + matchReasons: expect.arrayContaining(['token']), + }), + ], + }, + meta: { command: 'sl list' }, + }); + }); + it('fails validation when a table-backed source declares columns absent from a matching warehouse manifest', async () => { const projectDir = join(tempDir, 'project'); const project = await initKtxProject({ projectDir, projectName: 'warehouse' }); @@ -191,6 +256,73 @@ joins: [] expect(stderr.write).not.toHaveBeenCalled(); }); + it('runs sl query from a JSON query file', async () => { + const projectDir = join(tempDir, 'project'); + const project = await initKtxProject({ projectDir, projectName: 'warehouse' }); + project.config.connections.warehouse = { driver: 'postgres', readonly: true }; + await project.fileStore.writeFile( + 'semantic-layer/warehouse/orders.yaml', + `name: orders +table: public.orders +grain: [id] +columns: + - name: id + type: number +measures: + - name: order_count + expr: count(*) +joins: [] +`, + 'ktx', + 'ktx@example.com', + 'Add orders source', + ); + const queryFile = join(tempDir, 'query.json'); + await writeFile(queryFile, '{"measures":["orders.order_count"],"dimensions":[]}', 'utf-8'); + + const stdout = { write: vi.fn() }; + const stderr = { write: vi.fn() }; + const query = vi.fn(async () => ({ + sql: 'select count(*) as order_count from public.orders', + dialect: 'postgres', + columns: [{ name: 'orders.order_count' }], + plan: {}, + })); + const createSemanticLayerCompute = vi.fn(() => ({ + query, + validateSources: vi.fn(), + generateSources: vi.fn(), + })); + + await expect( + runKtxSl( + { + command: 'query', + projectDir, + connectionId: 'warehouse', + queryFile, + format: 'json', + execute: false, + cliVersion: '0.2.0', + runtimeInstallPolicy: 'auto', + }, + { stdout, stderr }, + { createSemanticLayerCompute }, + ), + ).resolves.toBe(0); + + expect(query).toHaveBeenCalledWith( + expect.objectContaining({ + query: { measures: ['orders.order_count'], dimensions: [] }, + }), + ); + expect(JSON.parse(String(stdout.write.mock.calls[0][0]))).toMatchObject({ + sql: 'select count(*) as order_count from public.orders', + plan: { execution: { mode: 'compile_only' } }, + }); + expect(stderr.write).not.toHaveBeenCalled(); + }); + it('creates default sl query compute through the managed runtime helper', async () => { const projectDir = join(tempDir, 'project'); const project = await initKtxProject({ projectDir, projectName: 'warehouse' }); diff --git a/packages/cli/src/sl.ts b/packages/cli/src/sl.ts index fb0f129e..ebf3eca7 100644 --- a/packages/cli/src/sl.ts +++ b/packages/cli/src/sl.ts @@ -1,14 +1,22 @@ +import { readFile } from 'node:fs/promises'; import { createDefaultLocalQueryExecutor, type KtxSqlQueryExecutorPort } from '@ktx/context/connections'; +import { + createLocalKtxEmbeddingProviderFromConfig, + KtxIngestEmbeddingPortAdapter, + type KtxEmbeddingPort, +} from '@ktx/context'; import type { KtxSemanticLayerComputePort } from '@ktx/context/daemon'; import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project'; import { compileLocalSlQuery, listLocalSlSources, readLocalSlSource, + searchLocalSlSources, validateLocalSlSource, writeLocalSlSource, type SemanticLayerQueryInput, } from '@ktx/context/sl'; +import { writeJsonResult } from './io/print-list.js'; import { createManagedPythonSemanticLayerComputePort, type KtxManagedPythonInstallPolicy, @@ -20,15 +28,16 @@ profileMark('module:sl'); type SlQueryFormat = 'json' | 'sql'; export type KtxSlArgs = - | { command: 'list'; projectDir: string; connectionId?: string; output?: string; json?: boolean } - | { command: 'read'; projectDir: string; connectionId: string; sourceName: string } + | { command: 'list'; projectDir: string; connectionId?: string; query?: string; output?: string; json?: boolean } + | { command: 'read'; projectDir: string; connectionId: string; sourceName: string; json?: boolean } | { command: 'validate'; projectDir: string; connectionId: string; sourceName: string } | { command: 'write'; projectDir: string; connectionId: string; sourceName: string; yaml: string } | { command: 'query'; projectDir: string; connectionId?: string; - query: SemanticLayerQueryInput; + query?: SemanticLayerQueryInput; + queryFile?: string; format: SlQueryFormat; execute: boolean; maxRows?: number; @@ -43,6 +52,8 @@ interface KtxSlIo { interface KtxSlDeps { loadProject?: typeof loadKtxProject; + embeddingService?: KtxEmbeddingPort | null; + createEmbeddingProvider?: typeof createLocalKtxEmbeddingProviderFromConfig; createSemanticLayerCompute?: () => KtxSemanticLayerComputePort; createManagedSemanticLayerCompute?: (options: { cliVersion: string; @@ -52,11 +63,35 @@ interface KtxSlDeps { createQueryExecutor?: () => KtxSqlQueryExecutorPort; } +function slSearchEmbeddingService(project: KtxLocalProject, deps: KtxSlDeps): KtxEmbeddingPort | null { + if ('embeddingService' in deps) { + return deps.embeddingService ?? null; + } + const provider = (deps.createEmbeddingProvider ?? createLocalKtxEmbeddingProviderFromConfig)( + project.config.ingest.embeddings, + ); + return provider ? new KtxIngestEmbeddingPortAdapter(provider) : null; +} + +async function readSlQueryFile(path: string): Promise { + const parsed = JSON.parse(await readFile(path, 'utf-8')) as unknown; + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error(`${path} must contain a JSON object.`); + } + return parsed as SemanticLayerQueryInput; +} + export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: KtxSlDeps = {}): Promise { try { const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); if (args.command === 'list') { - const sources = await listLocalSlSources(project, { connectionId: args.connectionId }); + const sources = args.query + ? await searchLocalSlSources(project, { + connectionId: args.connectionId, + query: args.query, + embeddingService: slSearchEmbeddingService(project, deps), + }) + : await listLocalSlSources(project, { connectionId: args.connectionId }); const { resolveOutputMode } = await import('./io/mode.js'); const { printList } = await import('./io/print-list.js'); const mode = resolveOutputMode({ explicit: args.output, json: args.json, io }); @@ -86,6 +121,14 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx if (!source) { throw new Error(`Semantic-layer source "${args.connectionId}/${args.sourceName}" was not found`); } + if (args.json) { + writeJsonResult(io, { + kind: 'sl.source', + data: source, + meta: { command: 'sl read' }, + }); + return 0; + } io.stdout.write(source.yaml); return 0; } @@ -108,6 +151,10 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx return 0; } if (args.command === 'query') { + const query = args.query ?? (args.queryFile ? await readSlQueryFile(args.queryFile) : undefined); + if (!query) { + throw new Error('sl query requires query input from --query-file or at least one --measure'); + } const compute = deps.createSemanticLayerCompute ? deps.createSemanticLayerCompute() : await (deps.createManagedSemanticLayerCompute ?? createManagedPythonSemanticLayerComputePort)({ @@ -118,7 +165,7 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx const queryExecutor = args.execute ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() : undefined; const result = await compileLocalSlQuery(project as KtxLocalProject, { connectionId: args.connectionId, - query: args.query, + query, compute, execute: args.execute, maxRows: args.maxRows, diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts index 9efa52cb..026b4834 100644 --- a/packages/cli/src/standalone-smoke.test.ts +++ b/packages/cli/src/standalone-smoke.test.ts @@ -190,49 +190,21 @@ describe('standalone built ktx CLI smoke', () => { ); }); - it('prints guided JSON for agent semantic-layer search outside a project through the built binary', async () => { - const projectDir = join(tempDir, 'missing-search-project'); - await mkdir(projectDir, { recursive: true }); - - const result = await runBuiltCli([ - 'agent', - 'sl', - 'list', - '--json', - '--query', - 'revenue', - '--project-dir', - projectDir, - ]); + it('rejects the removed agent command through the built binary', async () => { + const result = await runBuiltCli(['agent']); expect(result.code).toBe(1); expect(result.stdout).toBe(''); - const errorJson = parseJsonOutput<{ - ok: false; - error: { code: string; message: string; nextSteps: string[] }; - }>(result.stderr); - expect(errorJson).toEqual({ - ok: false, - error: { - code: 'agent_sl_search_missing_project', - message: `Semantic-layer search needs an initialized KTX project at ${projectDir}.`, - nextSteps: [ - `ktx setup --project-dir ${projectDir}`, - `ktx status --project-dir ${projectDir}`, - 'ktx ingest run --connection-id --adapter ', - `ktx agent sl list --json --query "revenue" --project-dir ${projectDir}`, - ], - }, - }); + expect(result.stderr).toContain("unknown command 'agent'"); }); it('runs doctor setup through the built binary', async () => { const result = await runBuiltCli(['status', '--no-input']); - expect(result.stdout).toContain('KTX setup doctor'); + expect(result.stdout).toMatch(/KTX (setup|project) doctor/); expect(result.stdout).toContain('Node 22+'); expect(result.stdout).toContain('Workspace-local CLI'); - expect(result.stderr).toBe(''); + expect(result.stderr === '' || result.stderr.startsWith('Project: ')).toBe(true); expect([0, 1]).toContain(result.code); }); diff --git a/packages/context/src/search/pglite-runtime-boundary.test.ts b/packages/context/src/search/pglite-runtime-boundary.test.ts index ce2f5b7a..2db3209b 100644 --- a/packages/context/src/search/pglite-runtime-boundary.test.ts +++ b/packages/context/src/search/pglite-runtime-boundary.test.ts @@ -46,7 +46,8 @@ describe('PGlite hybrid search runtime boundary', () => { } const productionRoutingFiles = [ - 'packages/cli/src/agent.ts', + 'packages/cli/src/sl.ts', + 'packages/cli/src/knowledge.ts', 'packages/context/src/mcp/local-project-ports.ts', 'packages/context/src/wiki/local-knowledge.ts', 'packages/context/src/ingest/context-evidence/sqlite-context-evidence-store.ts', diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 504e0d36..26db4ae8 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -156,14 +156,12 @@ describe('standalone example docs', () => { const servingAgents = await readText('docs-site/content/docs/guides/serving-agents.mdx'); for (const command of [ - 'ktx agent tools --json', - 'ktx agent context --json', - 'ktx agent sl list --json', - 'ktx agent sl read orders --json', - 'ktx agent sl query --json', - 'ktx agent wiki search "revenue recognition" --json', - 'ktx agent wiki read order-status-definitions --json', - 'ktx agent sql execute --json', + 'ktx status --json', + 'ktx sl list --json', + 'ktx sl read orders --json', + 'ktx sl query --json', + 'ktx wiki search "revenue recognition" --json', + 'ktx wiki read order-status-definitions --json', ]) { assert.match(servingAgents, new RegExp(command.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))); } diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index 5f080068..2428dd2e 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -557,12 +557,6 @@ function parseJsonResultWithExitCode(label, result, expectedCode) { return JSON.parse(result.stdout); } -function parseJsonFailure(label, result) { - assert.equal(result.code, 1, label + ' should fail with exit code 1'); - assert.equal(result.stdout, '', label + ' should not write stdout when failing'); - return JSON.parse(result.stderr); -} - function requireIncludes(values, expected, label) { assert.ok(Array.isArray(values), label + ' must be an array'); assert.ok(values.includes(expected), label + ' did not include ' + expected + ': ' + values.join(', ')); @@ -612,30 +606,6 @@ try { assert.equal(runtimeStatusBefore.layout.runtimeRoot, process.env.KTX_RUNTIME_ROOT); process.stdout.write('ktx managed runtime starts missing in isolated root\\n'); - const missingProjectDir = join(root, 'missing-project'); - await mkdir(missingProjectDir, { recursive: true }); - const missingProjectSearch = await run('pnpm', [ - 'exec', - 'ktx', - 'agent', - 'sl', - 'list', - '--json', - '--query', - 'revenue', - '--project-dir', - missingProjectDir, - ]); - const missingProjectError = parseJsonFailure('ktx agent sl list missing project', missingProjectSearch); - assert.equal(missingProjectError.error.code, 'agent_sl_search_missing_project'); - assert.deepEqual(missingProjectError.error.nextSteps, [ - 'ktx setup --project-dir ' + missingProjectDir, - 'ktx status --project-dir ' + missingProjectDir, - 'ktx ingest run --connection-id --adapter ', - 'ktx agent sl list --json --query "revenue" --project-dir ' + missingProjectDir, - ]); - process.stdout.write('ktx agent sl list missing project guidance verified\\n'); - const init = await run('pnpm', [ 'exec', 'ktx', @@ -671,28 +641,6 @@ try { '--skip-agents', ]); requireProjectStderr('ktx setup empty project', emptyInit, emptyProjectDir); - const emptySearch = await run('pnpm', [ - 'exec', - 'ktx', - 'agent', - 'sl', - 'list', - '--json', - '--query', - 'revenue', - '--project-dir', - emptyProjectDir, - ]); - const emptySearchError = parseJsonFailure('ktx agent sl list no connections', emptySearch); - assert.equal(emptySearchError.error.code, 'agent_sl_search_no_connections'); - assert.deepEqual(emptySearchError.error.nextSteps, [ - 'ktx setup --project-dir ' + emptyProjectDir, - 'ktx status --project-dir ' + emptyProjectDir, - 'ktx ingest run --connection-id --adapter ', - 'ktx agent sl list --json --query "revenue" --project-dir ' + emptyProjectDir, - ]); - process.stdout.write('ktx agent sl list no connections guidance verified\\n'); - await writeFile( join(projectDir, 'ktx.yaml'), [ @@ -737,10 +685,9 @@ try { 'utf-8', ); - const agentWikiSearch = await run('pnpm', [ + const wikiSearch = await run('pnpm', [ 'exec', 'ktx', - 'agent', 'wiki', 'search', 'revenue', @@ -750,40 +697,17 @@ try { '--project-dir', projectDir, ]); - const agentWikiSearchJson = parseJsonResult('ktx agent wiki search', agentWikiSearch); - assert.equal(agentWikiSearchJson.totalFound, 1); - assert.equal(agentWikiSearchJson.results[0].key, 'revenue'); - assert.equal(agentWikiSearchJson.results[0].path, 'knowledge/global/revenue.md'); - assert.equal(typeof agentWikiSearchJson.results[0].score, 'number'); - requireIncludes(agentWikiSearchJson.results[0].matchReasons, 'lexical', 'agent wiki search match reasons'); - process.stdout.write('ktx agent wiki search hybrid metadata verified\\n'); + const wikiSearchJson = parseJsonResult('ktx wiki search', wikiSearch); + assert.equal(wikiSearchJson.kind, 'list'); + assert.equal(wikiSearchJson.data.items.length, 1); + assert.equal(wikiSearchJson.data.items[0].key, 'revenue'); + assert.equal(wikiSearchJson.data.items[0].path, 'knowledge/global/revenue.md'); + assert.equal(typeof wikiSearchJson.data.items[0].score, 'number'); + requireIncludes(wikiSearchJson.data.items[0].matchReasons, 'lexical', 'wiki search match reasons'); + process.stdout.write('ktx wiki search hybrid metadata verified\\n'); await access(join(projectDir, '.ktx', 'db.sqlite')); process.stdout.write('SQLite knowledge index: ' + join(projectDir, '.ktx', 'db.sqlite') + '\\n'); - const noSourceSearch = await run('pnpm', [ - 'exec', - 'ktx', - 'agent', - 'sl', - 'list', - '--json', - '--connection-id', - 'warehouse', - '--query', - 'revenue', - '--project-dir', - projectDir, - ]); - const noSourceSearchError = parseJsonFailure('ktx agent sl list no indexed sources', noSourceSearch); - assert.equal(noSourceSearchError.error.code, 'agent_sl_search_no_indexed_sources'); - assert.deepEqual(noSourceSearchError.error.nextSteps, [ - 'ktx setup --project-dir ' + projectDir, - 'ktx status --project-dir ' + projectDir, - 'ktx ingest run --connection-id --adapter ', - 'ktx agent sl list --json --query "revenue" --project-dir ' + projectDir, - ]); - process.stdout.write('ktx agent sl list no indexed sources guidance verified\\n'); - const slYaml = [ 'name: orders', 'table: orders', @@ -804,10 +728,9 @@ try { await mkdir(join(projectDir, 'semantic-layer', 'warehouse'), { recursive: true }); await writeFile(join(projectDir, 'semantic-layer', 'warehouse', 'orders.yaml'), slYaml, 'utf-8'); - const agentSlSearch = await run('pnpm', [ + const slSearch = await run('pnpm', [ 'exec', 'ktx', - 'agent', 'sl', 'list', '--json', @@ -818,13 +741,14 @@ try { '--project-dir', projectDir, ]); - const agentSlSearchJson = parseJsonResult('ktx agent sl list', agentSlSearch); - assert.equal(agentSlSearchJson.totalSources, 1); - assert.equal(agentSlSearchJson.sources[0].connectionId, 'warehouse'); - assert.equal(agentSlSearchJson.sources[0].name, 'orders'); - assert.equal(typeof agentSlSearchJson.sources[0].score, 'number'); - requireIncludes(agentSlSearchJson.sources[0].matchReasons, 'lexical', 'agent sl search match reasons'); - process.stdout.write('ktx agent sl list hybrid metadata verified\\n'); + const slSearchJson = parseJsonResult('ktx sl list', slSearch); + assert.equal(slSearchJson.kind, 'list'); + assert.equal(slSearchJson.data.items.length, 1); + assert.equal(slSearchJson.data.items[0].connectionId, 'warehouse'); + assert.equal(slSearchJson.data.items[0].name, 'orders'); + assert.equal(typeof slSearchJson.data.items[0].score, 'number'); + requireIncludes(slSearchJson.data.items[0].matchReasons, 'lexical', 'sl search match reasons'); + process.stdout.write('ktx sl list hybrid metadata verified\\n'); const slQuery = await run('pnpm', ['exec', 'ktx', 'sl', 'query', '--connection-id', diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index b4176353..64ce9466 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -461,9 +461,9 @@ describe('verification snippets', () => { assert.doesNotMatch(source, /startSemanticDaemon/); assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'setup'/); assert.match(source, /knowledge', 'global', 'revenue\.md'/); - assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'agent',\s*'wiki',\s*'search'/); + assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'wiki',\s*'search'/); assert.match(source, /semantic-layer', 'warehouse', 'orders\.yaml'/); - assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'agent',\s*'sl',\s*'list'/); + assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'sl',\s*'list'/); assert.match(source, /orders\.order_count/); assert.match(source, /sqlite3/); assert.match(source, /driver: sqlite/); From bcb0d2f8f7869fea34e9959b490a4dbbe5d20636 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 13:33:28 +0200 Subject: [PATCH 04/15] chore: add TypeScript dead-code checks (#60) * chore: add TypeScript dead-code checks * chore: trim stale Knip ignores * Fix CI smoke and artifact checks --- .github/workflows/ci.yml | 3 + .pre-commit-config.yaml | 13 + AGENTS.md | 19 + biome.json | 36 ++ knip.json | 114 ++++ package.json | 9 +- .../cli/src/commands/connection-commands.ts | 2 +- .../src/commands/connection-notion-tree.ts | 8 - .../commands/connection-notion-tui.test.tsx | 2 +- .../src/commands/connection-notion-tui.tsx | 2 +- packages/cli/src/index.test.ts | 2 +- packages/cli/src/ingest.test-utils.ts | 8 +- packages/cli/src/ingest.test.ts | 4 - packages/cli/src/memory-flow-hud.tsx | 121 +--- packages/cli/src/memory-flow-tui.test.tsx | 1 - packages/cli/src/memory-flow-tui.tsx | 19 +- packages/cli/src/project-dir.ts | 5 - packages/cli/src/public-ingest.ts | 6 +- packages/cli/src/setup-context.ts | 10 - packages/cli/src/standalone-smoke.test.ts | 4 - .../context/src/ingest/memory-flow/summary.ts | 1 - packages/context/src/scan/enrichment-types.ts | 9 - pnpm-lock.yaml | 611 +++++++++++++++++- scripts/build-public-npm-package.mjs | 5 - scripts/build-public-npm-package.test.mjs | 2 +- scripts/examples-docs.test.mjs | 4 - scripts/package-artifacts.mjs | 7 +- scripts/package-artifacts.test.mjs | 10 +- scripts/relationship-orbit-verification.mjs | 1 - 29 files changed, 818 insertions(+), 220 deletions(-) create mode 100644 biome.json create mode 100644 knip.json delete mode 100644 packages/cli/src/project-dir.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5d70d495..3da14c7b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,6 +37,9 @@ jobs: - name: Install TypeScript dependencies run: pnpm install --frozen-lockfile + - name: Run TypeScript dead-code checks + run: pnpm run dead-code + - name: Run TypeScript checks run: pnpm run check diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8908b532..167681a6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,6 +33,19 @@ repos: name: ruff format (python) files: ^python/ + - repo: local + hooks: + - id: biome-dead-code + name: biome dead-code check + entry: pnpm exec biome ci . --formatter-enabled=false --assist-enabled=false + language: system + pass_filenames: false + - id: knip-dead-code + name: knip dead-code check + entry: pnpm exec knip --reporter compact + language: system + pass_filenames: false + - repo: https://github.com/Yelp/detect-secrets rev: v1.5.0 hooks: diff --git a/AGENTS.md b/AGENTS.md index 2e5a684a..1e5480f2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -86,6 +86,7 @@ pnpm run build pnpm run type-check pnpm run test pnpm run check +pnpm run dead-code pnpm --filter @ktx/cli run smoke pnpm --filter './packages/*' run build pnpm --filter './packages/*' run test @@ -127,6 +128,7 @@ shared contracts or package exports are affected. - Build/export changes: `pnpm run build` - Workspace scripts: `node --test scripts/*.test.mjs` or the specific script test file +- TypeScript dead-code tooling/config changes: `pnpm run dead-code` - Python semantic layer: `uv run pytest python/ktx-sl/tests -q` - Python daemon: `uv run pytest python/ktx-daemon/tests -q` - Python files: also run `uv run pre-commit run --files [FILES]` when @@ -156,6 +158,23 @@ pnpm run test 2>&1 | tee /tmp/ktx-test-output.log - Do not manually edit generated or built output under `dist/`; edit source and rebuild. +### Dead TypeScript Code Checks + +KTX uses Biome for local unused-code linting and Knip for workspace graph +analysis. These checks are intentionally part of CI and pre-commit because the +normal development workflow is agent-based. + +- Run `pnpm run dead-code` after TypeScript changes. +- Treat Knip findings as investigation prompts, not automatic deletion orders. +- Remove private dead code when you confirm there are no imports, dynamic + references, generated references, or tests that still need it. +- Preserve public package exports unless the task explicitly includes API + pruning. +- Add narrow `knip.json` ignores only for intentional dynamic or public cases. + Do not add broad package-level ignores to silence unrelated findings. +- Update `knip.json` when adding dynamic entrypoints, generated files, package + exports, CLI bins, or framework files that Knip cannot infer. + ### CLI Standards - Use Commander for CLI command trees, arguments, options, help text, custom diff --git a/biome.json b/biome.json new file mode 100644 index 00000000..35c6d596 --- /dev/null +++ b/biome.json @@ -0,0 +1,36 @@ +{ + "$schema": "https://biomejs.dev/schemas/2.4.15/schema.json", + "assist": { + "enabled": false + }, + "formatter": { + "enabled": false + }, + "files": { + "includes": [ + "scripts/**/*.mjs", + "packages/**/*.ts", + "packages/**/*.tsx", + "docs-site/**/*.ts", + "docs-site/**/*.tsx", + "docs-site/**/*.mjs", + "!**/dist/**", + "!**/coverage/**", + "!**/.next/**", + "!**/node_modules/**", + "!**/*.gen.ts", + "!**/*.generated.ts" + ] + }, + "linter": { + "enabled": true, + "rules": { + "recommended": false, + "correctness": { + "noUnusedImports": "error", + "noUnusedVariables": "error", + "noUnusedPrivateClassMembers": "error" + } + } + } +} diff --git a/knip.json b/knip.json new file mode 100644 index 00000000..a6bf5d26 --- /dev/null +++ b/knip.json @@ -0,0 +1,114 @@ +{ + "$schema": "https://unpkg.com/knip@6/schema.json", + "workspaces": { + ".": { + "entry": ["scripts/**/*.mjs"], + "project": ["scripts/**/*.mjs"] + }, + "packages/cli": { + "entry": [ + "src/index.ts", + "src/bin.ts", + "src/**/*.test.ts", + "src/**/*.test.tsx", + "scripts/**/*.mjs" + ], + "project": ["src/**/*.{ts,tsx}", "scripts/**/*.mjs", "vitest.config.ts"] + }, + "packages/context": { + "entry": [ + "src/index.ts", + "src/agent/index.ts", + "src/core/index.ts", + "src/connections/index.ts", + "src/daemon/index.ts", + "src/ingest/index.ts", + "src/ingest/memory-flow/index.ts", + "src/ingest/metabase-mapping.ts", + "src/scan/index.ts", + "src/search/index.ts", + "src/sql-analysis/index.ts", + "src/memory/index.ts", + "src/mcp/index.ts", + "src/project/index.ts", + "src/prompts/index.ts", + "src/skills/index.ts", + "src/sl/index.ts", + "src/sl/descriptions.ts", + "src/tools/index.ts", + "src/wiki/index.ts", + "src/**/*.test.ts", + "scripts/**/*.mjs" + ], + "project": ["src/**/*.ts", "scripts/**/*.mjs", "vitest.config.ts"] + }, + "packages/llm": { + "entry": ["src/index.ts", "src/**/*.test.ts"], + "project": ["src/**/*.ts", "vitest.config.ts"] + }, + "packages/connector-*": { + "entry": ["src/index.ts", "src/**/*.test.ts"], + "project": ["src/**/*.ts"] + }, + "docs-site": { + "entry": [ + "app/**/*.{ts,tsx}", + "components/**/*.{ts,tsx}", + "lib/**/*.{ts,tsx}", + "middleware.ts", + "next.config.mjs", + "source.config.ts", + "tests/**/*.mjs" + ], + "project": [ + "app/**/*.{ts,tsx}", + "components/**/*.{ts,tsx}", + "lib/**/*.{ts,tsx}", + "*.ts", + "*.mjs", + "tests/**/*.mjs" + ], + "ignoreDependencies": ["tailwindcss"] + } + }, + "ignore": [ + "**/dist/**", + "**/coverage/**", + "**/.next/**", + "**/node_modules/**", + "**/*.gen.ts", + "**/*.generated.ts" + ], + "ignoreIssues": { + "packages/cli/src/clack.ts": ["exports"], + "packages/cli/src/commands/connection-metabase-setup.ts": ["exports", "types"], + "packages/cli/src/ingest.test-utils.ts": ["exports"], + "packages/cli/src/io/symbols.ts": ["exports"], + "packages/cli/src/managed-python-command.ts": ["types"], + "packages/cli/src/managed-python-daemon.ts": ["types"], + "packages/cli/src/managed-python-http.ts": ["exports", "types"], + "packages/cli/src/managed-python-runtime.ts": ["types"], + "packages/cli/src/memory-flow-tui.tsx": ["types"], + "packages/cli/src/next-steps.ts": ["exports"], + "packages/cli/src/print-command-tree.ts": ["exports"], + "packages/cli/src/setup-agents.ts": ["exports", "types"], + "packages/cli/src/setup-context.ts": ["types"], + "packages/cli/src/setup-demo-tour.ts": ["exports"], + "packages/cli/src/setup-models.ts": ["exports"], + "packages/cli/src/setup-project.ts": ["types"], + "packages/cli/src/setup-ready-menu.ts": ["types"], + "packages/cli/src/setup-sources.ts": ["types"], + "packages/context/src/ingest/adapters/historic-sql/pattern-inputs.ts": ["exports", "types"], + "packages/context/src/ingest/adapters/lookml/pull-config.ts": ["exports"], + "packages/context/src/ingest/adapters/metabase/serialize-card.ts": ["types"], + "packages/context/src/ingest/adapters/metabase/types.ts": ["exports"], + "packages/context/src/ingest/adapters/metricflow/parse.ts": ["types"], + "packages/context/src/ingest/ports.ts": ["types"], + "packages/context/src/ingest/stages/stage-3-work-units.ts": ["types"], + "packages/context/src/ingest/stages/stage-index.types.ts": ["types"], + "packages/context/src/project/config.ts": ["types"], + "packages/context/src/scan/relationship-candidates.ts": ["types"], + "packages/context/src/scan/relationship-diagnostics.ts": ["types"], + "packages/context/src/tools/context-evidence-tool-store.ts": ["types"] + } +} diff --git a/package.json b/package.json index 159bd709..b105784b 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,10 @@ "artifacts:verify-manifest": "node scripts/package-artifacts.mjs verify-manifest", "build": "pnpm --filter './packages/*' run build", "check": "node scripts/check-boundaries.mjs && node --test scripts/*.test.mjs && pnpm --filter './packages/*' run build && pnpm --filter './packages/*' run test", + "dead-code": "pnpm run dead-code:biome && pnpm run dead-code:knip", + "dead-code:biome": "biome ci . --formatter-enabled=false --assist-enabled=false", + "dead-code:fix": "biome check . --formatter-enabled=false --assist-enabled=false --write && knip --fix --format", + "dead-code:knip": "knip --reporter compact", "ktx": "node scripts/run-ktx.mjs", "link:dev": "node scripts/link-dev-cli.mjs", "native:rebuild": "pnpm -r rebuild better-sqlite3", @@ -36,9 +40,12 @@ "type-check": "pnpm --filter './packages/*' run type-check" }, "devDependencies": { + "@biomejs/biome": "^2.4.15", "@types/node": "^25.7.0", + "better-sqlite3": "^12.10.0", + "knip": "^6.12.2", "typescript": "^6.0.3", - "vitest": "^4.1.6" + "yaml": "^2.9.0" }, "pnpm": { "onlyBuiltDependencies": [ diff --git a/packages/cli/src/commands/connection-commands.ts b/packages/cli/src/commands/connection-commands.ts index f3c87709..4ce75057 100644 --- a/packages/cli/src/commands/connection-commands.ts +++ b/packages/cli/src/commands/connection-commands.ts @@ -188,7 +188,7 @@ export function registerConnectionCommands(program: Command, context: KtxCliComm registerConnectionNotionCommands(connection, context); } -export function registerConnectionMappingCommands(connection: Command, context: KtxCliCommandContext): void { +function registerConnectionMappingCommands(connection: Command, context: KtxCliCommandContext): void { const mapping = connection .command('mapping') .description('Manage Metabase warehouse mappings') diff --git a/packages/cli/src/commands/connection-notion-tree.ts b/packages/cli/src/commands/connection-notion-tree.ts index 57a8f335..379ac938 100644 --- a/packages/cli/src/commands/connection-notion-tree.ts +++ b/packages/cli/src/commands/connection-notion-tree.ts @@ -369,14 +369,6 @@ function setExpanded(state: PickerState, nodeId: string, value: boolean | 'toggl return cloneState(state, { expanded }); } -function expandPath(state: PickerState, nodeId: string): PickerState { - const expanded = new Set(state.expanded); - for (const ancestorId of ancestorsOf(nodeId, state.byId)) { - expanded.add(ancestorId); - } - return cloneState(state, { expanded }); -} - export function moveCursor(state: PickerState, dir: 'up' | 'down' | 'left' | 'right'): PickerState { const node = state.byId.get(state.cursorId); if (!node) { diff --git a/packages/cli/src/commands/connection-notion-tui.test.tsx b/packages/cli/src/commands/connection-notion-tui.test.tsx index 0b7efc82..dc394688 100644 --- a/packages/cli/src/commands/connection-notion-tui.test.tsx +++ b/packages/cli/src/commands/connection-notion-tui.test.tsx @@ -1,6 +1,6 @@ /* @jsxImportSource react */ import { render as renderInkTest } from 'ink-testing-library'; -import React, { act, type ReactNode } from 'react'; +import { act, type ReactNode } from 'react'; import { afterEach, describe, expect, it, vi } from 'vitest'; import { buildInitialState, buildPickerTree, type NotionPickerPageInput } from './connection-notion-tree.js'; import { diff --git a/packages/cli/src/commands/connection-notion-tui.tsx b/packages/cli/src/commands/connection-notion-tui.tsx index c0a4746a..b2a47036 100644 --- a/packages/cli/src/commands/connection-notion-tui.tsx +++ b/packages/cli/src/commands/connection-notion-tui.tsx @@ -1,6 +1,6 @@ /* @jsxImportSource react */ import { Box, Text, render as renderInkRuntime, useApp, useInput } from 'ink'; -import React, { type ReactNode, useEffect, useMemo, useRef, useState } from 'react'; +import { type ReactNode, useEffect, useMemo, useRef, useState } from 'react'; import { filterTree, flattenSelection, diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index 7887e552..1e69c590 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -1,4 +1,4 @@ -import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { mkdtemp, rm } from 'node:fs/promises'; import { createRequire } from 'node:module'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; diff --git a/packages/cli/src/ingest.test-utils.ts b/packages/cli/src/ingest.test-utils.ts index 71d85c6c..1e9f5662 100644 --- a/packages/cli/src/ingest.test-utils.ts +++ b/packages/cli/src/ingest.test-utils.ts @@ -1,10 +1,8 @@ import { EventEmitter } from 'node:events'; -import { access, mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; +import { mkdir, writeFile } from 'node:fs/promises'; import { join } from 'node:path'; import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent'; import { - LocalLookerRuntimeStore, LocalMetabaseSourceStateReader, MetabaseSourceAdapter, getLocalIngestStatus, @@ -12,12 +10,10 @@ import { type FetchContext, type IngestReportSnapshot, type LocalIngestResult, - type LocalMetabaseFanoutProgress, type LookerMappingClient, type LookerRuntimeClient, type LookerTableIdentifierParser, type MemoryFlowEventSink, - type MemoryFlowReplayInput, type MetabaseCard, type MetabaseCardSummary, type MetabaseClientFactory, @@ -28,7 +24,7 @@ import { } from '@ktx/context/ingest'; import { ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project'; import { expect, vi } from 'vitest'; -import { type KtxIngestArgs, runKtxIngest } from './ingest.js'; +import { runKtxIngest } from './ingest.js'; export function makeIo( options: { diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 9fffdf0c..410312f5 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -4,10 +4,8 @@ import { join } from 'node:path'; import { LocalLookerRuntimeStore, LocalMetabaseSourceStateReader, - getLocalIngestStatus, type LocalIngestResult, type LocalMetabaseFanoutProgress, - type MemoryFlowReplayInput, type RunLocalIngestOptions, type SourceAdapter, } from '@ktx/context/ingest'; @@ -20,7 +18,6 @@ import { CliMetabaseAgentRunner, CliMetabaseSourceAdapter, completedLocalBundleRun, - emitLiveLocalMemoryFlow, failedLocalBundleRun, localFakeBundleReport, makeCliLookerParser, @@ -28,7 +25,6 @@ import { makeIo, persistLocalBundleReport, runPublicMetabaseSyncModeCase, - writeBundleReportFile, writeMetabaseConfig, writeWarehouseConfig, } from './ingest.test-utils.js'; diff --git a/packages/cli/src/memory-flow-hud.tsx b/packages/cli/src/memory-flow-hud.tsx index fbeaf219..5d2be9eb 100644 --- a/packages/cli/src/memory-flow-hud.tsx +++ b/packages/cli/src/memory-flow-hud.tsx @@ -1,7 +1,7 @@ /* @jsxImportSource react */ import type { MemoryFlowEvent, MemoryFlowReplayInput } from '@ktx/context/ingest/memory-flow'; import { Box, Text } from 'ink'; -import React, { type ReactNode } from 'react'; +import { type ReactNode } from 'react'; import { buildDemoMetrics, formatCost, formatDuration } from './demo-metrics.js'; import { formatNextStepLines } from './next-steps.js'; import { profileMark } from './startup-profile.js'; @@ -38,45 +38,6 @@ function isPrepopulatedDemoReplay(input: MemoryFlowReplayInput): boolean { return input.metadata?.origin === 'packaged' || input.metadata?.timing === 'prebuilt'; } -function flowLine(width: number, frame: number, active: boolean): string { - if (!active) return '━'.repeat(width); - const pulse = ['░', '▒', '▓', '█', '█', '█', '▓', '▒', '░']; - const pw = pulse.length; - const chars: string[] = []; - const offset = (frame * 2) % (width + pw); - for (let i = 0; i < width; i += 1) { - const p = i - offset + pw; - chars.push(p >= 0 && p < pw ? (pulse[p] ?? '━') : '━'); - } - return chars.join(''); -} - -function brailleFlow(width: number, frame: number): string { - // Braille unicode: U+2800 + dot bitmask - // Dots: 1=0x01 2=0x02 3=0x04 4=0x08 5=0x10 6=0x20 7=0x40 8=0x80 - // Layout: col0=[1,2,3,7] col1=[4,5,6,8] - const chars: string[] = []; - for (let i = 0; i < width; i += 1) { - const density = (i + 1) / width; - const phase = (i * 3 + frame * 2) % 12; - let dots = 0; - - // Sparse diagonal streams on the left, dense on the right - // Each "stream" is a diagonal line of dots moving rightward - if ((phase + 0) % 4 < density * 4) dots |= 0x01; // dot 1 - if ((phase + 1) % 5 < density * 4) dots |= 0x08; // dot 4 - if ((phase + 2) % 4 < density * 3) dots |= 0x02; // dot 2 - if ((phase + 3) % 5 < density * 3) dots |= 0x10; // dot 5 - if ((phase + 4) % 4 < density * 2.5) dots |= 0x04; // dot 3 - if ((phase + 5) % 5 < density * 2.5) dots |= 0x20; // dot 6 - if ((phase + 1) % 6 < density * 2) dots |= 0x40; // dot 7 - if ((phase + 3) % 6 < density * 2) dots |= 0x80; // dot 8 - - chars.push(String.fromCharCode(0x2800 + dots)); - } - return chars.join(''); -} - function progressBarOverall( finishedCount: number, activeCount: number, @@ -104,43 +65,6 @@ function progressBarOverall( return finished + activeChars.join('') + '░'.repeat(queuedWidth); } -function sparkleWipe(width: number, frame: number, row: number): string { - const chars: string[] = []; - const sweepPos = (frame * 2 + row * 6) % (width + 8); - const sparkles = ['✨', '✦', '✧', '·']; - for (let i = 0; i < width; i += 1) { - const dist = i - sweepPos; - if (dist < -6) { - const t = (i * 11 + row * 5 + frame * 3) % 10; - chars.push(t === 0 ? sparkles[0]! : t === 3 ? sparkles[1]! : t === 7 ? sparkles[2]! : ' '); - } else if (dist < -3) { - const t = (i + frame) % 3; - chars.push(t === 0 ? sparkles[1]! : t === 1 ? sparkles[2]! : sparkles[3]!); - } else if (dist <= 0) { - const gradient = ['░', '▒', '▓', '█']; - chars.push(gradient[Math.min(3, dist + 3)] ?? '█'); - } else if (dist <= 2) { - chars.push(dist === 1 ? '▓' : '▒'); - } else { - const noise = (i * 31 + row * 17 + frame * 3) % 5; - const messy = ['░', '▒', '▓', '▒', '░']; - chars.push(messy[noise] ?? '▒'); - } - } - return chars.join(''); -} - -function activityWave(width: number, frame: number, offset: number): string { - const heights = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█']; - const chars: string[] = []; - for (let i = 0; i < width; i += 1) { - const wave = Math.sin(((i * 2 + frame + offset * 5) * Math.PI) / 6); - const idx = Math.round(((wave + 1) / 2) * (heights.length - 1)); - chars.push(heights[idx] ?? '▁'); - } - return chars.join(''); -} - function topicName(key: string): string { return (key.split('/').pop()?.replace(/\.md$/, '') ?? key).replace(/[_-]/g, ' '); } @@ -155,18 +79,9 @@ function humanizeInsight(key: string, target: 'sl' | 'wiki', summary: string | u return target === 'sl' ? `Query definition: ${name}` : `Knowledge page: ${name}`; } -const ADAPTER_PREFIXES = ['live_database_', 'metabase_', 'looker_', 'lookml_', 'metricflow_', 'notion_', 'historic_sql_', 'dbt_descriptions_']; const INTERNAL_DEMO_CONNECTION_ID = 'orbit_demo'; const PUBLIC_DEMO_SOURCE_LABEL = 'Orbit Demo'; -function humanizeUnitKey(unitKey: string): string { - let key = unitKey.replace(/-/g, '_'); - for (const prefix of ADAPTER_PREFIXES) { - if (key.startsWith(prefix)) { key = key.slice(prefix.length); break; } - } - return key.replace(/_/g, ' '); -} - interface SourceInfo { type: string; name: string; @@ -224,13 +139,6 @@ function sourceDescription(input: MemoryFlowReplayInput): SourceInfo { return { type: info.type, name: conn, sourceCount: count, itemNounPlural: info.plural, readingVerb: info.verb, ingestDescription: info.description }; } -function activeWorkUnit( - input: MemoryFlowReplayInput, -): { unitKey: string; stepIndex: number; stepBudget: number } | null { - const units = activeWorkUnits(input); - return units.at(-1) ?? null; -} - function activeWorkUnits( input: MemoryFlowReplayInput, ): Array<{ unitKey: string; stepIndex: number; stepBudget: number }> { @@ -299,22 +207,6 @@ function finishedUnits(input: MemoryFlowReplayInput): Array<{ unitKey: string; a return units; } -function artifactCounts(input: MemoryFlowReplayInput): { sl: number; wiki: number } { - let sl = 0; - let wiki = 0; - for (const e of input.events) { - if (e.type === 'candidate_action') { - if (e.target === 'sl') sl++; - else wiki++; - } - } - return { sl, wiki }; -} - -function pad(str: string, width: number): string { - return str.length >= width ? str : str + ' '.repeat(width - str.length); -} - const KTX_LOGO_SMALL = [ '██╗ ██╗████████╗██╗ ██╗', '██║ ██╔╝╚══██╔══╝╚██╗██╔╝', @@ -344,12 +236,7 @@ export function Hud(props: { width: number; now?: () => number; }): ReactNode { - const isRunning = props.input.status === 'running'; - const isDone = props.input.status === 'done'; - const isFlowing = isRunning && hasWorkStarted(props.input); - const src = sourceDescription(props.input); - const counts = artifactCounts(props.input); const metrics = buildDemoMetrics(props.input, props.now ? { now: props.now } : {}); const workStarted = hasWorkStarted(props.input); @@ -358,11 +245,6 @@ export function Hud(props: { const innerWidth = Math.max(60, props.width - 6); - const actives = activeWorkUnits(props.input); - const reconEvent = props.input.events.find((e) => e.type === 'reconciliation_finished'); - const allAnalyzed = isFlowing && actives.length === 0; - const isReconciling = allAnalyzed && !reconEvent && !isDone; - const hLine = '─'.repeat(innerWidth); const elapsed = formatDuration(metrics.elapsedMs); @@ -429,7 +311,6 @@ export function ActivityFeed(props: { const workStarted = hasWorkStarted(props.input); const totalChunks = planEvent?.chunkCount ?? 0; - const finishedWithArtifacts = finished.filter((u) => u.artifactCount > 0); const finishedAreas = totalChunks > 0 ? Math.min(finished.length, totalChunks) : finished.length; const allWorkDone = workStarted && actives.length === 0 && queued.length === 0; const isReconciling = allWorkDone && !reconEvent && !isDone && !isError; diff --git a/packages/cli/src/memory-flow-tui.test.tsx b/packages/cli/src/memory-flow-tui.test.tsx index 83a46795..b555c6c1 100644 --- a/packages/cli/src/memory-flow-tui.test.tsx +++ b/packages/cli/src/memory-flow-tui.test.tsx @@ -11,7 +11,6 @@ import { startLiveMemoryFlowTui, type KtxMemoryFlowTuiIo, type MemoryFlowInkInstance, - type MemoryFlowInkRenderOptions, } from './memory-flow-tui.js'; function replayInput(): MemoryFlowReplayInput { diff --git a/packages/cli/src/memory-flow-tui.tsx b/packages/cli/src/memory-flow-tui.tsx index 31a3aea4..37b9f2af 100644 --- a/packages/cli/src/memory-flow-tui.tsx +++ b/packages/cli/src/memory-flow-tui.tsx @@ -1,7 +1,6 @@ /* @jsxImportSource react */ import { buildMemoryFlowViewModel, - buildMemoryFlowVisualModel, createInitialMemoryFlowInteractionState, findMemoryFlowSearchMatches, type MemoryFlowColumnId, @@ -14,8 +13,7 @@ import { selectedMemoryFlowDetails, } from '@ktx/context/ingest'; import { Box, Text, render as renderInkRuntime, useApp, useInput } from 'ink'; -import React, { type ReactNode, useEffect, useMemo, useRef, useState } from 'react'; -import { buildDemoMetrics } from './demo-metrics.js'; +import { type ReactNode, useEffect, useMemo, useRef, useState } from 'react'; import { ActivityFeed, Hud, @@ -201,14 +199,6 @@ function stageLabel(columnId: MemoryFlowColumnId): string { return STAGE_LABELS[columnId]; } -function statusLabel(status: string): 'OK' | 'RUN' | 'WARN' | 'FAIL' | 'WAIT' { - if (status === 'complete') return 'OK'; - if (status === 'active') return 'RUN'; - if (status === 'warning') return 'WARN'; - if (status === 'failed') return 'FAIL'; - return 'WAIT'; -} - function filterLabel(filter: MemoryFlowInteractionState['filter']): string { return filter === 'failed_or_flagged' ? 'issues' : 'all'; } @@ -325,7 +315,6 @@ export function MemoryFlowTuiApp(props: MemoryFlowTuiAppProps): ReactNode { const view = useMemo(() => buildMemoryFlowViewModel(pacedInput), [pacedInput]); const [state, setState] = useState(() => createInitialMemoryFlowInteractionState(view)); const [frame, setFrame] = useState(0); - const [thoughtFrame, setThoughtFrame] = useState(0); const [completionFrame, setCompletionFrame] = useState(0); const [holdComplete, setHoldComplete] = useState(false); const [userHasNavigated, setUserHasNavigated] = useState(false); @@ -346,7 +335,6 @@ export function MemoryFlowTuiApp(props: MemoryFlowTuiAppProps): ReactNode { useEffect(() => { const timer = setInterval(() => { setFrame((current) => current + 1); - setThoughtFrame((current) => current + 1); }, props.frameMs ?? DEFAULT_TUI_TIMING.frameMs); return () => clearInterval(timer); }, [props.frameMs]); @@ -354,7 +342,6 @@ export function MemoryFlowTuiApp(props: MemoryFlowTuiAppProps): ReactNode { useEffect(() => { if (lastEventCountRef.current !== pacedInput.events.length) { lastEventCountRef.current = pacedInput.events.length; - setThoughtFrame(0); } }, [pacedInput.events.length]); @@ -409,10 +396,6 @@ export function MemoryFlowTuiApp(props: MemoryFlowTuiAppProps): ReactNode { }); const isComplete = pacedInput.status === 'done' || pacedInput.status === 'error'; - const completionMetrics = useMemo( - () => buildDemoMetrics(pacedInput, pacedNow ? { now: pacedNow } : {}), - [pacedInput, pacedNow], - ); const termWidth = props.terminalWidth ?? 80; diff --git a/packages/cli/src/project-dir.ts b/packages/cli/src/project-dir.ts deleted file mode 100644 index d8aef2e2..00000000 --- a/packages/cli/src/project-dir.ts +++ /dev/null @@ -1,5 +0,0 @@ -import { resolve } from 'node:path'; - -export function resolveProjectDir(projectDir?: string, fallback = '.'): string { - return resolve(projectDir ?? fallback); -} diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index c9d9f7bb..f8296177 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -6,9 +6,9 @@ import { profileMark } from './startup-profile.js'; profileMark('module:public-ingest'); -export type KtxPublicIngestStepName = 'scan' | 'source-ingest' | 'enrich' | 'memory-update'; -export type KtxPublicIngestStepStatus = 'done' | 'skipped' | 'failed' | 'not-run'; -export type KtxPublicIngestInputMode = 'auto' | 'disabled'; +type KtxPublicIngestStepName = 'scan' | 'source-ingest' | 'enrich' | 'memory-update'; +type KtxPublicIngestStepStatus = 'done' | 'skipped' | 'failed' | 'not-run'; +type KtxPublicIngestInputMode = 'auto' | 'disabled'; export type KtxPublicIngestArgs = | { diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index efcd35f1..04a572ac 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -474,16 +474,6 @@ async function markContextComplete(projectDir: string): Promise { await markKtxSetupStateStepComplete(projectDir, 'context'); } -function writeBuildHeader(projectDir: string, runId: string, io: KtxCliIo): void { - const commands = contextBuildCommands(projectDir, runId); - io.stdout.write('\nKTX context build\n'); - io.stdout.write(`Run: ${runId}\n`); - io.stdout.write(`Project: ${resolve(projectDir)}\n\n`); - io.stdout.write('Detach: press d to leave this running.\n'); - io.stdout.write(`Resume: ${commands.watch}\n`); - io.stdout.write(`Status: ${commands.status}\n\n`); -} - function writeMissingCapabilities(missing: string[], io: KtxCliIo): void { io.stderr.write('KTX cannot build agent-ready context yet.\n\n'); io.stderr.write('Missing:\n'); diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts index 026b4834..c2b7386c 100644 --- a/packages/cli/src/standalone-smoke.test.ts +++ b/packages/cli/src/standalone-smoke.test.ts @@ -126,10 +126,6 @@ async function writeSqliteScanConfig(projectDir: string, dbPath: string, enrich ); } -function parseJsonOutput(stdout: string): T { - return JSON.parse(stdout) as T; -} - function expectProjectStderr(result: CliResult, projectDir: string): void { expect(result).toMatchObject({ code: 0, stderr: `Project: ${projectDir}\n` }); } diff --git a/packages/context/src/ingest/memory-flow/summary.ts b/packages/context/src/ingest/memory-flow/summary.ts index a23d7146..b76bde90 100644 --- a/packages/context/src/ingest/memory-flow/summary.ts +++ b/packages/context/src/ingest/memory-flow/summary.ts @@ -53,7 +53,6 @@ function fixSuggestions(input: MemoryFlowReplayInput): string[] { export function formatMemoryFlowFinalSummary(input: MemoryFlowReplayInput): string { const sources = eventsOf(input.events, 'source_acquired'); - const source = sources.at(-1); const totalFiles = sources.reduce((sum, s) => sum + s.fileCount, 0); const saved = latest(input.events, 'saved'); const provenance = latest(input.events, 'provenance_recorded'); diff --git a/packages/context/src/scan/enrichment-types.ts b/packages/context/src/scan/enrichment-types.ts index 2b186976..645951c3 100644 --- a/packages/context/src/scan/enrichment-types.ts +++ b/packages/context/src/scan/enrichment-types.ts @@ -68,15 +68,6 @@ export interface KtxDescriptionUpdate { columnDescriptions?: Record; } -const PREFERRED_METADATA_FIELD_NAMES = [ - 'tags', - 'constraints', - 'enum_values', - 'freshness', - 'tests', - 'lineage', -] as const; - export interface KtxMetadataUpdate { connectionId: string; table: KtxTableRef; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0f7ff8bd..6f9f19b1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -12,15 +12,24 @@ importers: .: devDependencies: + '@biomejs/biome': + specifier: ^2.4.15 + version: 2.4.15 '@types/node': specifier: ^24.3.0 version: 24.12.2 + better-sqlite3: + specifier: ^12.10.0 + version: 12.10.0 + knip: + specifier: ^6.12.2 + version: 6.12.2(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0) typescript: specifier: ^6.0.3 version: 6.0.3 - vitest: - specifier: ^4.1.6 - version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@24.12.2)(vite@8.0.10(@types/node@24.12.2)(esbuild@0.28.0)(jiti@2.7.0)(yaml@2.9.0)) + yaml: + specifier: ^2.9.0 + version: 2.9.0 docs-site: dependencies: @@ -741,6 +750,63 @@ packages: resolution: {integrity: sha512-SriLPKezypIsiZ+TtlFfE46uuBIap2HeaQVS78e1P7rz5OSbq0rsd52WE1mC5f7vAeLiXqv7I7oRhL3WFZEw3Q==} engines: {node: '>=18.0.0'} + '@biomejs/biome@2.4.15': + resolution: {integrity: sha512-j5VH3a/h/HXTKBM50MDMxRCzkeLv9S2XJcW2WgnZT1+xyisi+0bISrXR82gCX+8S9lvK0skEvHJRN+3Ktr2hlw==} + engines: {node: '>=14.21.3'} + hasBin: true + + '@biomejs/cli-darwin-arm64@2.4.15': + resolution: {integrity: sha512-rF3PPqLq1yoST79zaQbDjVJwsuIeci/O+9bgNmC5QpgOqz6aqYuzA4abyAGx+mgyiDXn4A049xAN8gijbuR1Qg==} + engines: {node: '>=14.21.3'} + cpu: [arm64] + os: [darwin] + + '@biomejs/cli-darwin-x64@2.4.15': + resolution: {integrity: sha512-/5KHXYMfSJs1fNXiX30xFtI8JcCFV6zaVVLxOa0M2sfqBKHkpQhRTv94yxQWxeTY2lzo2OuTlNvPC+hDQt2wcQ==} + engines: {node: '>=14.21.3'} + cpu: [x64] + os: [darwin] + + '@biomejs/cli-linux-arm64-musl@2.4.15': + resolution: {integrity: sha512-ZPcxznxm0pogHBLZhYntyR3sR+MrZjqJIKEr7ZqVen0Rl+P/4upVmfYXjftizi9RoqZntg33fv/1fbdhbYXpEQ==} + engines: {node: '>=14.21.3'} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@biomejs/cli-linux-arm64@2.4.15': + resolution: {integrity: sha512-owaAMZD/T4LrD0ELNCk0Km3qrRHuM0X6EAyVE1FSqGY0rbLoiDLrO4Us2tllm6cAeB2Ioa9C2C08NZPdr8+0Ug==} + engines: {node: '>=14.21.3'} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@biomejs/cli-linux-x64-musl@2.4.15': + resolution: {integrity: sha512-CNq/9W38SYSH023lfcQ4KKU8K0YX8T//FZUhcgtMMRABDojx5XsMV7jlweAvGSl389wJQB29Qo6Zb/a+jdvt+w==} + engines: {node: '>=14.21.3'} + cpu: [x64] + os: [linux] + libc: [musl] + + '@biomejs/cli-linux-x64@2.4.15': + resolution: {integrity: sha512-0jj7THz12GbUOLmMibktK6DZjqz2zV64KFxyBtcFTKPiiOIY0a7vns1elpO1dERvxpsZ5ik0oFfz0oGwFde1+g==} + engines: {node: '>=14.21.3'} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@biomejs/cli-win32-arm64@2.4.15': + resolution: {integrity: sha512-ouhkYdlhp/1GghEJPdWwD/Vi3gQ1nFxuSpMolWsbq3Lsq3QUR4jl6UdhhscdCugKU5vOEuMiJhvKj66O0OCq+w==} + engines: {node: '>=14.21.3'} + cpu: [arm64] + os: [win32] + + '@biomejs/cli-win32-x64@2.4.15': + resolution: {integrity: sha512-zBrGq5mx5wwpnow4+2BxUvleDM+GNd4sLbPaMapsSLQLD0NGRCquqPBTgN+7XkUteHvj7M+BstuI8tmnV7+HgQ==} + engines: {node: '>=14.21.3'} + cpu: [x64] + os: [win32] + '@clack/core@1.3.1': resolution: {integrity: sha512-fT1qHVGAag4IEkrupZ6lRRbNCs1vS9P01KB/sG8zKgvUztbYtFBtQpjSITNwooDZ83tpsPzP0mRNs1/KVszCRA==} engines: {node: '>= 20.12.0'} @@ -1317,9 +1383,247 @@ packages: resolution: {integrity: sha512-a61ljmRVVyG5MC/698C8/FfFDw5a8LOIvyOLW5fztgUXqUpc1jOfQzOitSCbge657OgXXThmY3Tk8fpiDb4UcA==} engines: {node: '>= 20.0.0'} + '@oxc-parser/binding-android-arm-eabi@0.128.0': + resolution: {integrity: sha512-aca6ZvzmCBUGOANQRiRQRZuRKYI3ENhcit6GisnknOOmcezfQc7xJ4dxlPU7MV7mOvrC7RNR1u3LAD7xyaiCxA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm] + os: [android] + + '@oxc-parser/binding-android-arm64@0.128.0': + resolution: {integrity: sha512-BbeDmuohoJ7Rz/it5wnkj69i/OsCPS3Z51nLEzwO/Y6YshtC4JU+15oNwhY8v4LRKRYclRc7ggOikwrsJ/eOEQ==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [android] + + '@oxc-parser/binding-darwin-arm64@0.128.0': + resolution: {integrity: sha512-tRUHPt80417QmvNpoSslJT1VY8NUbWdrWR+L14Zn+RbOTcaqB8E6PYE/ZGN8jjWBzqporiA/H4MfO50ew/NCNA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [darwin] + + '@oxc-parser/binding-darwin-x64@0.128.0': + resolution: {integrity: sha512-rWI2Hb1Nt3U/vKsjyNvZzDC8i/l144U20DKjhzaTmwIhIiSRGeroPWWiImwypmKLqrw8GuIixbWJkpGWLbkzrQ==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [darwin] + + '@oxc-parser/binding-freebsd-x64@0.128.0': + resolution: {integrity: sha512-hhpdVMaNCLgQxjgNPeeFzSeJMmZPc5lKfv0NGSI3egZq9EdnEGqeC8JsYsQjK7PoQgbvZ17xlj0SO5ziH5Obkg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [freebsd] + + '@oxc-parser/binding-linux-arm-gnueabihf@0.128.0': + resolution: {integrity: sha512-093zNw0zZ/e/obML+rhlSdmnzR0mVZluPcAkxunEc5E3F0yBVsFn24Y1ILfsEte11Ud041qn/gp2OJ1jxNqUng==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm] + os: [linux] + + '@oxc-parser/binding-linux-arm-musleabihf@0.128.0': + resolution: {integrity: sha512-fq7DmKmfC+dvD97IXrgbph6Jzwe0EDu+PYMofmzZ6fv5X1k9vtaqLpDGMuICO9MmUnyKAQmVl+wIv2RNy4Dz8g==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm] + os: [linux] + + '@oxc-parser/binding-linux-arm64-gnu@0.128.0': + resolution: {integrity: sha512-Xvm48jJah8TlIrURIjNOP/gNiGe6aKvCB+r06VliflFo8Kq7VOLE8PxtgShJzZIqubrgdMdYfvuPPozn7F6MbQ==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@oxc-parser/binding-linux-arm64-musl@0.128.0': + resolution: {integrity: sha512-M7iwBGmYJTx+pKOYFjI0buop4gJvlmcVzFGaXPt21DKpQkbQZG1f63Yg7LloIYT/t9yLxCw0Lhfx/RFlAlMSjA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@oxc-parser/binding-linux-ppc64-gnu@0.128.0': + resolution: {integrity: sha512-21LGNIZb1Pcfk5/EGsqabrxv4yqQOWis1407JJrClS7XpFCrbvr74YAB1V+m54cYbwvO6UWwQqS4WecxiyfCRg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [ppc64] + os: [linux] + libc: [glibc] + + '@oxc-parser/binding-linux-riscv64-gnu@0.128.0': + resolution: {integrity: sha512-gyHjOTFpg9bTTYjxPmQirvufb89+VdZwVfcMtAUyPr6F5H8ZswvCQshK4qOW+Q+2Xyb33hduRgY/eFHJQjU/vQ==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [riscv64] + os: [linux] + libc: [glibc] + + '@oxc-parser/binding-linux-riscv64-musl@0.128.0': + resolution: {integrity: sha512-X6Q2oKUrP5GyDd2xniuEBLk6aFQCZ97W2+aVXGgJXdjx5t4/oFuA9ri0wLOUrBIX+qdSuK581snMBio4z910eA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [riscv64] + os: [linux] + libc: [musl] + + '@oxc-parser/binding-linux-s390x-gnu@0.128.0': + resolution: {integrity: sha512-BdzTmqxfxoYkpgokoLaSnOX6T+R3/goL42klre2tnG+kHbG2TXS0VN+P5BPofH1axdKOHy5ei4ENZrjmCOt2lA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [s390x] + os: [linux] + libc: [glibc] + + '@oxc-parser/binding-linux-x64-gnu@0.128.0': + resolution: {integrity: sha512-OO1nW2Q7sSYYvJZpDHdvyFSdRaVcQqRijZSSmWVMqFxPYy8cEF45zJ9fcdIYuzIT3jYq6YRhEFm/VMWNWhE22Q==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@oxc-parser/binding-linux-x64-musl@0.128.0': + resolution: {integrity: sha512-4NehAe404MRdoZVS9DW8C5XbJwbXIc/KfVlYdpi5vE4081zc9Y0YzKVqyOYj/Puye7/Do+ohaONBFWlEHYl9hw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [linux] + libc: [musl] + + '@oxc-parser/binding-openharmony-arm64@0.128.0': + resolution: {integrity: sha512-kVbqgW9xLL8bh8oc7aYOJilRKXE5G33+tE0jan+duo/9OriaFRpijcCwT2waWs2oqYROYq0GlE7/p3ywoshVeg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [openharmony] + + '@oxc-parser/binding-wasm32-wasi@0.128.0': + resolution: {integrity: sha512-L38ojghJYHmgiz6fJd7jwLB/ESDBpB02NdFxh+smqVM6P2anCEvHn0jhaSrt5eVNR1Ak8+moOeftUlofeyvniA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [wasm32] + + '@oxc-parser/binding-win32-arm64-msvc@0.128.0': + resolution: {integrity: sha512-xgvO35GyHBtjlQ5AEpaYr7Rll1rvY7zqIhT6ty8E3ezBW2J1SFLjIDEvI/tcgDg6oaseDAqVcM+jU1HuCekgZw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [win32] + + '@oxc-parser/binding-win32-ia32-msvc@0.128.0': + resolution: {integrity: sha512-OY+3eM2SN72prHKRB22mPz8o5A/7dJ+f5DFLBVvggyZhEaNDAH9IB+ElMjmOkOIwf5MDCUAowCK7pAncNxzpBA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [ia32] + os: [win32] + + '@oxc-parser/binding-win32-x64-msvc@0.128.0': + resolution: {integrity: sha512-NE9ny+cPUCCObXa0IKLfj0tCdPd7pe/dz9ZpkxpUOymB3miNeMPybdlYYTBSGJUalMWeBM85/4JcCErCNTqOXw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [win32] + '@oxc-project/types@0.127.0': resolution: {integrity: sha512-aIYXQBo4lCbO4z0R3FHeucQHpF46l2LbMdxRvqvuRuW2OxdnSkcng5B8+K12spgLDj93rtN3+J2Vac/TIO+ciQ==} + '@oxc-project/types@0.128.0': + resolution: {integrity: sha512-huv1Y/LzBJkBVHt3OlC7u0zHBW9qXf1FdD7sGmc1rXc2P1mTwHssYv7jyGx5KAACSCH+9B3Bhn6Z9luHRvf7pQ==} + + '@oxc-resolver/binding-android-arm-eabi@11.19.1': + resolution: {integrity: sha512-aUs47y+xyXHUKlbhqHUjBABjvycq6YSD7bpxSW7vplUmdzAlJ93yXY6ZR0c1o1x5A/QKbENCvs3+NlY8IpIVzg==} + cpu: [arm] + os: [android] + + '@oxc-resolver/binding-android-arm64@11.19.1': + resolution: {integrity: sha512-oolbkRX+m7Pq2LNjr/kKgYeC7bRDMVTWPgxBGMjSpZi/+UskVo4jsMU3MLheZV55jL6c3rNelPl4oD60ggYmqA==} + cpu: [arm64] + os: [android] + + '@oxc-resolver/binding-darwin-arm64@11.19.1': + resolution: {integrity: sha512-nUC6d2i3R5B12sUW4O646qD5cnMXf2oBGPLIIeaRfU9doJRORAbE2SGv4eW6rMqhD+G7nf2Y8TTJTLiiO3Q/dQ==} + cpu: [arm64] + os: [darwin] + + '@oxc-resolver/binding-darwin-x64@11.19.1': + resolution: {integrity: sha512-cV50vE5+uAgNcFa3QY1JOeKDSkM/9ReIcc/9wn4TavhW/itkDGrXhw9jaKnkQnGbjJ198Yh5nbX/Gr2mr4Z5jQ==} + cpu: [x64] + os: [darwin] + + '@oxc-resolver/binding-freebsd-x64@11.19.1': + resolution: {integrity: sha512-xZOQiYGFxtk48PBKff+Zwoym7ScPAIVp4c14lfLxizO2LTTTJe5sx9vQNGrBymrf/vatSPNMD4FgsaaRigPkqw==} + cpu: [x64] + os: [freebsd] + + '@oxc-resolver/binding-linux-arm-gnueabihf@11.19.1': + resolution: {integrity: sha512-lXZYWAC6kaGe/ky2su94e9jN9t6M0/6c+GrSlCqL//XO1cxi5lpAhnJYdyrKfm0ZEr/c7RNyAx3P7FSBcBd5+A==} + cpu: [arm] + os: [linux] + + '@oxc-resolver/binding-linux-arm-musleabihf@11.19.1': + resolution: {integrity: sha512-veG1kKsuK5+t2IsO9q0DErYVSw2azvCVvWHnfTOS73WE0STdLLB7Q1bB9WR+yHPQM76ASkFyRbogWo1GR1+WbQ==} + cpu: [arm] + os: [linux] + + '@oxc-resolver/binding-linux-arm64-gnu@11.19.1': + resolution: {integrity: sha512-heV2+jmXyYnUrpUXSPugqWDRpnsQcDm2AX4wzTuvgdlZfoNYO0O3W2AVpJYaDn9AG4JdM6Kxom8+foE7/BcSig==} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@oxc-resolver/binding-linux-arm64-musl@11.19.1': + resolution: {integrity: sha512-jvo2Pjs1c9KPxMuMPIeQsgu0mOJF9rEb3y3TdpsrqwxRM+AN6/nDDwv45n5ZrUnQMsdBy5gIabioMKnQfWo9ew==} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@oxc-resolver/binding-linux-ppc64-gnu@11.19.1': + resolution: {integrity: sha512-vLmdNxWCdN7Uo5suays6A/+ywBby2PWBBPXctWPg5V0+eVuzsJxgAn6MMB4mPlshskYbppjpN2Zg83ArHze9gQ==} + cpu: [ppc64] + os: [linux] + libc: [glibc] + + '@oxc-resolver/binding-linux-riscv64-gnu@11.19.1': + resolution: {integrity: sha512-/b+WgR+VTSBxzgOhDO7TlMXC1ufPIMR6Vj1zN+/x+MnyXGW7prTLzU9eW85Aj7Th7CCEG9ArCbTeqxCzFWdg2w==} + cpu: [riscv64] + os: [linux] + libc: [glibc] + + '@oxc-resolver/binding-linux-riscv64-musl@11.19.1': + resolution: {integrity: sha512-YlRdeWb9j42p29ROh+h4eg/OQ3dTJlpHSa+84pUM9+p6i3djtPz1q55yLJhgW9XfDch7FN1pQ/Vd6YP+xfRIuw==} + cpu: [riscv64] + os: [linux] + libc: [musl] + + '@oxc-resolver/binding-linux-s390x-gnu@11.19.1': + resolution: {integrity: sha512-EDpafVOQWF8/MJynsjOGFThcqhRHy417sRyLfQmeiamJ8qVhSKAn2Dn2VVKUGCjVB9C46VGjhNo7nOPUi1x6uA==} + cpu: [s390x] + os: [linux] + libc: [glibc] + + '@oxc-resolver/binding-linux-x64-gnu@11.19.1': + resolution: {integrity: sha512-NxjZe+rqWhr+RT8/Ik+5ptA3oz7tUw361Wa5RWQXKnfqwSSHdHyrw6IdcTfYuml9dM856AlKWZIUXDmA9kkiBQ==} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@oxc-resolver/binding-linux-x64-musl@11.19.1': + resolution: {integrity: sha512-cM/hQwsO3ReJg5kR+SpI69DMfvNCp+A/eVR4b4YClE5bVZwz8rh2Nh05InhwI5HR/9cArbEkzMjcKgTHS6UaNw==} + cpu: [x64] + os: [linux] + libc: [musl] + + '@oxc-resolver/binding-openharmony-arm64@11.19.1': + resolution: {integrity: sha512-QF080IowFB0+9Rh6RcD19bdgh49BpQHUW5TajG1qvWHvmrQznTZZjYlgE2ltLXyKY+qs4F/v5xuX1XS7Is+3qA==} + cpu: [arm64] + os: [openharmony] + + '@oxc-resolver/binding-wasm32-wasi@11.19.1': + resolution: {integrity: sha512-w8UCKhX826cP/ZLokXDS6+milN8y4X7zidsAttEdWlVoamTNf6lhBJldaWr3ukTDiye7s4HRcuPEPOXNC432Vg==} + engines: {node: '>=14.0.0'} + cpu: [wasm32] + + '@oxc-resolver/binding-win32-arm64-msvc@11.19.1': + resolution: {integrity: sha512-nJ4AsUVZrVKwnU/QRdzPCCrO0TrabBqgJ8pJhXITdZGYOV28TIYystV1VFLbQ7DtAcaBHpocT5/ZJnF78YJPtQ==} + cpu: [arm64] + os: [win32] + + '@oxc-resolver/binding-win32-ia32-msvc@11.19.1': + resolution: {integrity: sha512-EW+ND5q2Tl+a3pH81l1QbfgbF3HmqgwLfDfVithRFheac8OTcnbXt/JxqD2GbDkb7xYEqy1zNaVFRr3oeG8npA==} + cpu: [ia32] + os: [win32] + + '@oxc-resolver/binding-win32-x64-msvc@11.19.1': + resolution: {integrity: sha512-6hIU3RQu45B+VNTY4Ru8ppFwjVS/S5qwYyGhBotmjxfEKk41I2DlGtRfGJndZ5+6lneE2pwloqunlOyZuX/XAw==} + cpu: [x64] + os: [win32] + '@radix-ui/number@1.1.1': resolution: {integrity: sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==} @@ -2799,6 +3103,9 @@ packages: resolution: {integrity: sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg==} engines: {node: '>= 4.9.1'} + fd-package-json@2.0.0: + resolution: {integrity: sha512-jKmm9YtsNXN789RS/0mSzOC1NUq9mkVd65vbSSVsKdjGvYXBuE4oWe2QOEoFeRmJg+lPuZxpmrfFclNhoRMneQ==} + fdir@6.5.0: resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} engines: {node: '>=12.0.0'} @@ -2838,6 +3145,11 @@ packages: resolution: {integrity: sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==} engines: {node: '>= 6'} + formatly@0.3.0: + resolution: {integrity: sha512-9XNj/o4wrRFyhSMJOvsuyMwy8aUfBaZ1VrqHVfohyXf0Sw0e+yfKG+xZaY3arGCOMdwFsqObtzVOc1gU9KiT9w==} + engines: {node: '>=18.3.0'} + hasBin: true + formdata-polyfill@4.0.10: resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==} engines: {node: '>=12.20.0'} @@ -3019,6 +3331,9 @@ packages: resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==} engines: {node: '>= 0.4'} + get-tsconfig@4.14.0: + resolution: {integrity: sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==} + github-from-package@0.0.0: resolution: {integrity: sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==} @@ -3269,6 +3584,11 @@ packages: jws@4.0.1: resolution: {integrity: sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==} + knip@6.12.2: + resolution: {integrity: sha512-RcZpT1sVziKZgDk1F0hAcp+bq71VJAF8vg1Y9ZLXc1+UXQaMm1rjiUqpJQTIj+lqwmiBQT19/u7ikgazs23cvA==} + engines: {node: ^20.19.0 || >=22.12.0} + hasBin: true + kuler@2.0.0: resolution: {integrity: sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A==} @@ -3766,6 +4086,13 @@ packages: zod: optional: true + oxc-parser@0.128.0: + resolution: {integrity: sha512-XkOw3eiIxAgQ19WRew/Bq9wc5Ga/guaWIzDBzq80z1PyuDNGvWBpPby9k6YGwV8A8uMw+Nlq3xqlzuDYmUFYUw==} + engines: {node: ^20.19.0 || >=22.12.0} + + oxc-resolver@11.19.1: + resolution: {integrity: sha512-qE/CIg/spwrTBFt5aKmwe3ifeDdLfA2NESN30E42X/lII5ClF8V7Wt6WIJhcGZjp0/Q+nQ+9vgxGk//xZNX2hg==} + p-limit@7.3.0: resolution: {integrity: sha512-7cIXg/Z0M5WZRblrsOla88S4wAK+zOQQWeBYfV3qJuJXMr+LnbYjaadrFaS0JILfEDPVqHyKnZ1Z/1d6J9VVUw==} engines: {node: '>=20'} @@ -4028,6 +4355,9 @@ packages: resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==} engines: {node: '>=0.10.0'} + resolve-pkg-maps@1.0.0: + resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==} + restore-cursor@4.0.0: resolution: {integrity: sha512-I9fPXU9geO9bHOt9pHHOhOkYerIMsmVaWB0rA2AI9ERh/+x/i7MV5HKBNrg+ljO5eoPVgCcnFuRjJ9uH6I/3eg==} engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} @@ -4138,6 +4468,10 @@ packages: resolution: {integrity: sha512-SO/3iYL5S3W57LLEniscOGPZgOqZUPCx6d3dB+52B80yJ0XstzsC/eV8gnA4tM3MHDrKz+OCFSLNjswdSC+/bA==} engines: {node: '>=22'} + smol-toml@1.6.1: + resolution: {integrity: sha512-dWUG8F5sIIARXih1DTaQAX4SsiTXhInKf1buxdY9DIg4ZYPZK5nGM1VRIYmEbDbsHt7USo99xSLFu5Q1IqTmsg==} + engines: {node: '>= 18'} + snowflake-sdk@2.4.1: resolution: {integrity: sha512-JIdqz9ed2FzkU8oEstf06hTJRoX9+PRRG9LJT1vfGTXN3A52kGxhGoWzmK0GtFTUnxTMxMoMYgD5QdoQbckyag==} engines: {node: '>=18'} @@ -4211,6 +4545,10 @@ packages: resolution: {integrity: sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==} engines: {node: '>=0.10.0'} + strip-json-comments@5.0.3: + resolution: {integrity: sha512-1tB5mhVo7U+ETBKNf92xT4hrQa3pm0MZ0PQvuDnWgAAGHDsfp4lPSpiS6psrSiet87wyGPh9ft6wmhOMQ0hDiw==} + engines: {node: '>=14.16'} + strnum@2.2.3: resolution: {integrity: sha512-oKx6RUCuHfT3oyVjtnrmn19H1SiCqgJSg+54XqURKp5aCMbrXrhLjRN9TjuwMjiYstZ0MzDrHqkGZ5dFTKd+zg==} @@ -4336,6 +4674,10 @@ packages: engines: {node: '>=0.8.0'} hasBin: true + unbash@3.0.0: + resolution: {integrity: sha512-FeFPZ/WFT0mbRCuydiZzpPFlrYN8ZUpphQKoq4EeElVIYjYyGzPMxQR/simUwCOJIyVhpFk4RbtyO7RuMpMnHA==} + engines: {node: '>=14'} + undici-types@7.16.0: resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} @@ -4487,6 +4829,10 @@ packages: jsdom: optional: true + walk-up-path@4.0.0: + resolution: {integrity: sha512-3hu+tD8YzSLGuFYtPRb48vdhKMi0KQV5sn+uWr8+7dMEq/2G/dtLrdDinkLjqq5TIbIBjYJ4Ax/n3YiaW7QM8A==} + engines: {node: 20 || >=22} + web-namespaces@2.0.1: resolution: {integrity: sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==} @@ -5546,6 +5892,41 @@ snapshots: transitivePeerDependencies: - supports-color + '@biomejs/biome@2.4.15': + optionalDependencies: + '@biomejs/cli-darwin-arm64': 2.4.15 + '@biomejs/cli-darwin-x64': 2.4.15 + '@biomejs/cli-linux-arm64': 2.4.15 + '@biomejs/cli-linux-arm64-musl': 2.4.15 + '@biomejs/cli-linux-x64': 2.4.15 + '@biomejs/cli-linux-x64-musl': 2.4.15 + '@biomejs/cli-win32-arm64': 2.4.15 + '@biomejs/cli-win32-x64': 2.4.15 + + '@biomejs/cli-darwin-arm64@2.4.15': + optional: true + + '@biomejs/cli-darwin-x64@2.4.15': + optional: true + + '@biomejs/cli-linux-arm64-musl@2.4.15': + optional: true + + '@biomejs/cli-linux-arm64@2.4.15': + optional: true + + '@biomejs/cli-linux-x64-musl@2.4.15': + optional: true + + '@biomejs/cli-linux-x64@2.4.15': + optional: true + + '@biomejs/cli-win32-arm64@2.4.15': + optional: true + + '@biomejs/cli-win32-x64@2.4.15': + optional: true + '@clack/core@1.3.1': dependencies: fast-wrap-ansi: 0.2.0 @@ -6130,8 +6511,139 @@ snapshots: '@orama/orama@3.1.18': {} + '@oxc-parser/binding-android-arm-eabi@0.128.0': + optional: true + + '@oxc-parser/binding-android-arm64@0.128.0': + optional: true + + '@oxc-parser/binding-darwin-arm64@0.128.0': + optional: true + + '@oxc-parser/binding-darwin-x64@0.128.0': + optional: true + + '@oxc-parser/binding-freebsd-x64@0.128.0': + optional: true + + '@oxc-parser/binding-linux-arm-gnueabihf@0.128.0': + optional: true + + '@oxc-parser/binding-linux-arm-musleabihf@0.128.0': + optional: true + + '@oxc-parser/binding-linux-arm64-gnu@0.128.0': + optional: true + + '@oxc-parser/binding-linux-arm64-musl@0.128.0': + optional: true + + '@oxc-parser/binding-linux-ppc64-gnu@0.128.0': + optional: true + + '@oxc-parser/binding-linux-riscv64-gnu@0.128.0': + optional: true + + '@oxc-parser/binding-linux-riscv64-musl@0.128.0': + optional: true + + '@oxc-parser/binding-linux-s390x-gnu@0.128.0': + optional: true + + '@oxc-parser/binding-linux-x64-gnu@0.128.0': + optional: true + + '@oxc-parser/binding-linux-x64-musl@0.128.0': + optional: true + + '@oxc-parser/binding-openharmony-arm64@0.128.0': + optional: true + + '@oxc-parser/binding-wasm32-wasi@0.128.0': + dependencies: + '@emnapi/core': 1.10.0 + '@emnapi/runtime': 1.10.0 + '@napi-rs/wasm-runtime': 1.1.4(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0) + optional: true + + '@oxc-parser/binding-win32-arm64-msvc@0.128.0': + optional: true + + '@oxc-parser/binding-win32-ia32-msvc@0.128.0': + optional: true + + '@oxc-parser/binding-win32-x64-msvc@0.128.0': + optional: true + '@oxc-project/types@0.127.0': {} + '@oxc-project/types@0.128.0': {} + + '@oxc-resolver/binding-android-arm-eabi@11.19.1': + optional: true + + '@oxc-resolver/binding-android-arm64@11.19.1': + optional: true + + '@oxc-resolver/binding-darwin-arm64@11.19.1': + optional: true + + '@oxc-resolver/binding-darwin-x64@11.19.1': + optional: true + + '@oxc-resolver/binding-freebsd-x64@11.19.1': + optional: true + + '@oxc-resolver/binding-linux-arm-gnueabihf@11.19.1': + optional: true + + '@oxc-resolver/binding-linux-arm-musleabihf@11.19.1': + optional: true + + '@oxc-resolver/binding-linux-arm64-gnu@11.19.1': + optional: true + + '@oxc-resolver/binding-linux-arm64-musl@11.19.1': + optional: true + + '@oxc-resolver/binding-linux-ppc64-gnu@11.19.1': + optional: true + + '@oxc-resolver/binding-linux-riscv64-gnu@11.19.1': + optional: true + + '@oxc-resolver/binding-linux-riscv64-musl@11.19.1': + optional: true + + '@oxc-resolver/binding-linux-s390x-gnu@11.19.1': + optional: true + + '@oxc-resolver/binding-linux-x64-gnu@11.19.1': + optional: true + + '@oxc-resolver/binding-linux-x64-musl@11.19.1': + optional: true + + '@oxc-resolver/binding-openharmony-arm64@11.19.1': + optional: true + + '@oxc-resolver/binding-wasm32-wasi@11.19.1(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)': + dependencies: + '@napi-rs/wasm-runtime': 1.1.4(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0) + transitivePeerDependencies: + - '@emnapi/core' + - '@emnapi/runtime' + optional: true + + '@oxc-resolver/binding-win32-arm64-msvc@11.19.1': + optional: true + + '@oxc-resolver/binding-win32-ia32-msvc@11.19.1': + optional: true + + '@oxc-resolver/binding-win32-x64-msvc@11.19.1': + optional: true + '@radix-ui/number@1.1.1': {} '@radix-ui/primitive@1.1.3': {} @@ -7691,6 +8203,10 @@ snapshots: fastest-levenshtein@1.0.16: {} + fd-package-json@2.0.0: + dependencies: + walk-up-path: 4.0.0 + fdir@6.5.0(picomatch@4.0.4): optionalDependencies: picomatch: 4.0.4 @@ -7727,6 +8243,10 @@ snapshots: hasown: 2.0.3 mime-types: 2.1.35 + formatly@0.3.0: + dependencies: + fd-package-json: 2.0.0 + formdata-polyfill@4.0.10: dependencies: fetch-blob: 3.2.0 @@ -7895,6 +8415,10 @@ snapshots: dunder-proto: 1.0.1 es-object-atoms: 1.1.1 + get-tsconfig@4.14.0: + dependencies: + resolve-pkg-maps: 1.0.0 + github-from-package@0.0.0: {} github-slugger@2.0.0: {} @@ -8234,6 +8758,26 @@ snapshots: jwa: 2.0.1 safe-buffer: 5.2.1 + knip@6.12.2(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0): + dependencies: + fdir: 6.5.0(picomatch@4.0.4) + formatly: 0.3.0 + get-tsconfig: 4.14.0 + jiti: 2.7.0 + minimist: 1.2.8 + oxc-parser: 0.128.0 + oxc-resolver: 11.19.1(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0) + picomatch: 4.0.4 + smol-toml: 1.6.1 + strip-json-comments: 5.0.3 + tinyglobby: 0.2.16 + unbash: 3.0.0 + yaml: 2.9.0 + zod: 4.4.3 + transitivePeerDependencies: + - '@emnapi/core' + - '@emnapi/runtime' + kuler@2.0.0: {} lightningcss-android-arm64@1.32.0: @@ -8963,6 +9507,57 @@ snapshots: optionalDependencies: zod: 4.4.3 + oxc-parser@0.128.0: + dependencies: + '@oxc-project/types': 0.128.0 + optionalDependencies: + '@oxc-parser/binding-android-arm-eabi': 0.128.0 + '@oxc-parser/binding-android-arm64': 0.128.0 + '@oxc-parser/binding-darwin-arm64': 0.128.0 + '@oxc-parser/binding-darwin-x64': 0.128.0 + '@oxc-parser/binding-freebsd-x64': 0.128.0 + '@oxc-parser/binding-linux-arm-gnueabihf': 0.128.0 + '@oxc-parser/binding-linux-arm-musleabihf': 0.128.0 + '@oxc-parser/binding-linux-arm64-gnu': 0.128.0 + '@oxc-parser/binding-linux-arm64-musl': 0.128.0 + '@oxc-parser/binding-linux-ppc64-gnu': 0.128.0 + '@oxc-parser/binding-linux-riscv64-gnu': 0.128.0 + '@oxc-parser/binding-linux-riscv64-musl': 0.128.0 + '@oxc-parser/binding-linux-s390x-gnu': 0.128.0 + '@oxc-parser/binding-linux-x64-gnu': 0.128.0 + '@oxc-parser/binding-linux-x64-musl': 0.128.0 + '@oxc-parser/binding-openharmony-arm64': 0.128.0 + '@oxc-parser/binding-wasm32-wasi': 0.128.0 + '@oxc-parser/binding-win32-arm64-msvc': 0.128.0 + '@oxc-parser/binding-win32-ia32-msvc': 0.128.0 + '@oxc-parser/binding-win32-x64-msvc': 0.128.0 + + oxc-resolver@11.19.1(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0): + optionalDependencies: + '@oxc-resolver/binding-android-arm-eabi': 11.19.1 + '@oxc-resolver/binding-android-arm64': 11.19.1 + '@oxc-resolver/binding-darwin-arm64': 11.19.1 + '@oxc-resolver/binding-darwin-x64': 11.19.1 + '@oxc-resolver/binding-freebsd-x64': 11.19.1 + '@oxc-resolver/binding-linux-arm-gnueabihf': 11.19.1 + '@oxc-resolver/binding-linux-arm-musleabihf': 11.19.1 + '@oxc-resolver/binding-linux-arm64-gnu': 11.19.1 + '@oxc-resolver/binding-linux-arm64-musl': 11.19.1 + '@oxc-resolver/binding-linux-ppc64-gnu': 11.19.1 + '@oxc-resolver/binding-linux-riscv64-gnu': 11.19.1 + '@oxc-resolver/binding-linux-riscv64-musl': 11.19.1 + '@oxc-resolver/binding-linux-s390x-gnu': 11.19.1 + '@oxc-resolver/binding-linux-x64-gnu': 11.19.1 + '@oxc-resolver/binding-linux-x64-musl': 11.19.1 + '@oxc-resolver/binding-openharmony-arm64': 11.19.1 + '@oxc-resolver/binding-wasm32-wasi': 11.19.1(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0) + '@oxc-resolver/binding-win32-arm64-msvc': 11.19.1 + '@oxc-resolver/binding-win32-ia32-msvc': 11.19.1 + '@oxc-resolver/binding-win32-x64-msvc': 11.19.1 + transitivePeerDependencies: + - '@emnapi/core' + - '@emnapi/runtime' + p-limit@7.3.0: dependencies: yocto-queue: 1.2.2 @@ -9273,6 +9868,8 @@ snapshots: require-from-string@2.0.2: {} + resolve-pkg-maps@1.0.0: {} + restore-cursor@4.0.0: dependencies: onetime: 5.1.2 @@ -9467,6 +10064,8 @@ snapshots: ansi-styles: 6.2.3 is-fullwidth-code-point: 5.1.0 + smol-toml@1.6.1: {} + snowflake-sdk@2.4.1(asn1.js@5.4.1): dependencies: '@aws-crypto/sha256-js': 5.2.0 @@ -9558,6 +10157,8 @@ snapshots: strip-json-comments@2.0.1: {} + strip-json-comments@5.0.3: {} + strnum@2.2.3: {} stubs@3.0.0: {} @@ -9689,6 +10290,8 @@ snapshots: uglify-js@3.19.3: optional: true + unbash@3.0.0: {} + undici-types@7.16.0: {} unified@11.0.5: @@ -9811,6 +10414,8 @@ snapshots: transitivePeerDependencies: - msw + walk-up-path@4.0.0: {} + web-namespaces@2.0.1: {} web-streams-polyfill@3.3.3: {} diff --git a/scripts/build-public-npm-package.mjs b/scripts/build-public-npm-package.mjs index 1a9ef8bc..9df91c69 100644 --- a/scripts/build-public-npm-package.mjs +++ b/scripts/build-public-npm-package.mjs @@ -151,10 +151,6 @@ export function publicNpmPackageJson(cliPackageJson, dependencies, version = PUB } function bundledWorkspacePackageJson(packageJson) { - const dependencies = Object.fromEntries( - Object.entries(packageJson.dependencies ?? {}).filter(([name]) => !isWorkspacePackageName(name)), - ); - return { name: packageJson.name, version: packageJson.version ?? PUBLIC_NPM_PACKAGE_VERSION, @@ -164,7 +160,6 @@ function bundledWorkspacePackageJson(packageJson) { types: packageJson.types, exports: packageJson.exports, files: packageJson.files, - dependencies: sortedObject(Object.entries(dependencies)), license: packageJson.license ?? 'Apache-2.0', }; } diff --git a/scripts/build-public-npm-package.test.mjs b/scripts/build-public-npm-package.test.mjs index d4461049..c68c2aed 100644 --- a/scripts/build-public-npm-package.test.mjs +++ b/scripts/build-public-npm-package.test.mjs @@ -250,7 +250,7 @@ describe('createPublicNpmPackageTree', () => { await readFile(join(layout.packRoot, 'node_modules', '@ktx', 'context', 'package.json'), 'utf8'), ); assert.equal(bundledContextJson.private, true); - assert.deepEqual(bundledContextJson.dependencies, { yaml: '^2.8.2' }); + assert.equal(bundledContextJson.dependencies, undefined); } finally { await rm(root, { recursive: true, force: true }); } diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 26db4ae8..793566ed 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -22,10 +22,6 @@ function publicPackagePattern(text) { return new RegExp(text.replaceAll('{package}', escapeRegExp(publicNpmPackageName()))); } -function runtimeWheelPackagePattern(text) { - return new RegExp(text.replaceAll('{package}', escapeRegExp(runtimeWheelPackageName()))); -} - describe('standalone example docs', () => { it('documents the local warehouse example from the examples index', async () => { const examples = await readText('examples/README.md'); diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index 2428dd2e..ab8d7adf 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -459,9 +459,6 @@ export function npmSmokePackageJson(layout) { dependencies: { '@kaelio/ktx': `file:${layout.cliTarball}`, }, - devDependencies: { - 'better-sqlite3': '^12.6.2', - }, }; } @@ -485,11 +482,11 @@ if (typeof cli.runKtxCli !== 'function') { export function npmRuntimeSmokeSource() { return ` import assert from 'node:assert/strict'; -import Database from 'better-sqlite3'; import { execFile } from 'node:child_process'; import { access, mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; +import { DatabaseSync } from 'node:sqlite'; import { promisify } from 'node:util'; const execFileAsync = promisify(execFile); @@ -569,7 +566,7 @@ function getRunId(stdout) { } async function writeSqliteWarehouse(projectDir) { - const database = new Database(join(projectDir, 'warehouse.db')); + const database = new DatabaseSync(join(projectDir, 'warehouse.db')); try { database.exec(\` DROP TABLE IF EXISTS orders; diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index 64ce9466..9fe5a2c1 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -27,8 +27,6 @@ import { writeArtifactManifest, } from './package-artifacts.mjs'; -const STALE_METABASE_UNSUPPORTED = ['Standalone Metabase scheduled fetch', 'is intentionally unsupported'].join(' '); - async function writeJson(path, value) { await writeFile(path, `${JSON.stringify(value, null, 2)}\n`); } @@ -420,9 +418,7 @@ describe('verification snippets', () => { assert.deepEqual(packageJson.dependencies, { '@kaelio/ktx': `file:${layout.cliTarball}`, }); - assert.deepEqual(packageJson.devDependencies, { - 'better-sqlite3': '^12.6.2', - }); + assert.equal(packageJson.devDependencies, undefined); assert.equal( npmSmokePnpmWorkspaceYaml(), ['packages:', ' - "."', 'allowBuilds:', ' better-sqlite3: true', ''].join('\n'), @@ -465,7 +461,7 @@ describe('verification snippets', () => { assert.match(source, /semantic-layer', 'warehouse', 'orders\.yaml'/); assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'sl',\s*'list'/); assert.match(source, /orders\.order_count/); - assert.match(source, /sqlite3/); + assert.match(source, /node:sqlite/); assert.match(source, /driver: sqlite/); assert.match(source, /path: warehouse\.db/); assert.match(source, /live-database/); @@ -473,7 +469,7 @@ describe('verification snippets', () => { assert.match(source, /"mode": "compile_only"/); assert.match(source, /"mode": "executed"/); assert.match(source, /ktx sl query sqlite execute/); - assert.match(source, /import Database from 'better-sqlite3'/); + assert.match(source, /import \{ DatabaseSync \} from 'node:sqlite'/); assert.doesNotMatch(source, /run\('python'/); assert.match(source, /KTX_RUNTIME_ROOT/); assert.match(source, /managed-runtime/); diff --git a/scripts/relationship-orbit-verification.mjs b/scripts/relationship-orbit-verification.mjs index aa6309dd..81f81187 100644 --- a/scripts/relationship-orbit-verification.mjs +++ b/scripts/relationship-orbit-verification.mjs @@ -9,7 +9,6 @@ import { runWorkspaceKtx } from './run-ktx.mjs'; const scriptDir = dirname(fileURLToPath(import.meta.url)); const ktxRootDir = resolve(scriptDir, '..'); -const repoRootDir = resolve(ktxRootDir, '..'); const defaultProjectDir = resolve(ktxRootDir, 'examples/orbit-relationship-verification'); const defaultReportPath = resolve( ktxRootDir, From c22248dabf387c76cc4fa8f75b98d628ca7c7dcf Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 13:43:23 +0200 Subject: [PATCH 05/15] feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references --- ...tion-warehouse-verification-gap-closure.md | 785 ++++++++ ...warehouse-verification-final-v1-closure.md | 957 ++++++++++ ...2026-05-12-warehouse-verification-tools.md | 1617 +++++++++++++++++ ...house-verification-prompt-shape-closure.md | 345 ++++ ...ehouse-verification-sql-example-closure.md | 215 +++ ...fication-structured-target-miss-closure.md | 236 +++ ...ingestion-warehouse-verification-design.md | 331 ++++ .../cli/src/ingest-query-executor.test.ts | 86 + packages/cli/src/ingest-query-executor.ts | 49 + packages/cli/src/ingest.test-utils.ts | 12 + packages/cli/src/ingest.test.ts | 38 + packages/cli/src/ingest.ts | 10 +- .../memory_agent_bundle_ingest_reconcile.md | 1 + .../memory_agent_bundle_ingest_work_unit.md | 9 +- .../skills/_shared/identifier-verification.md | 27 + packages/context/skills/dbt_ingest/SKILL.md | 36 +- .../skills/historic_sql_patterns/SKILL.md | 31 + .../skills/historic_sql_table_digest/SKILL.md | 28 + .../context/skills/knowledge_capture/SKILL.md | 34 + .../skills/live_database_ingest/SKILL.md | 31 + .../context/skills/looker_ingest/SKILL.md | 31 + .../context/skills/lookml_ingest/SKILL.md | 39 +- .../context/skills/metabase_ingest/SKILL.md | 31 + .../context/skills/metricflow_ingest/SKILL.md | 39 +- .../context/skills/notion_synthesize/SKILL.md | 32 +- packages/context/skills/sl/SKILL.md | 4 + packages/context/skills/sl_capture/SKILL.md | 36 +- .../context/src/connections/dialects.test.ts | 30 + packages/context/src/connections/dialects.ts | 102 ++ packages/context/src/connections/index.ts | 2 + packages/context/src/core/git.service.test.ts | 32 + packages/context/src/core/git.service.ts | 39 +- .../local-ingest-acceptance.test.ts | 4 +- .../adapters/historic-sql/post-processor.ts | 2 +- .../adapters/historic-sql/projection.test.ts | 36 +- .../adapters/historic-sql/projection.ts | 24 +- .../adapters/lookml/lookml.adapter.test.ts | 12 + .../ingest/adapters/lookml/lookml.adapter.ts | 9 + .../metricflow/metricflow.adapter.test.ts | 9 + .../adapters/metricflow/metricflow.adapter.ts | 9 + .../src/ingest/adapters/notion/chunk.ts | 4 +- .../adapters/notion/notion.adapter.test.ts | 12 +- .../ingest/adapters/notion/notion.adapter.ts | 9 + .../src/ingest/ingest-bundle.runner.test.ts | 68 +- .../src/ingest/ingest-bundle.runner.ts | 19 + .../context/src/ingest/ingest-prompts.test.ts | 12 + .../src/ingest/ingest-runtime-assets.test.ts | 10 + .../context/src/ingest/local-adapters.test.ts | 54 + packages/context/src/ingest/local-adapters.ts | 11 +- .../src/ingest/local-bundle-ingest.test.ts | 36 + .../src/ingest/local-bundle-runtime.test.ts | 34 +- .../src/ingest/local-bundle-runtime.ts | 90 +- packages/context/src/ingest/local-ingest.ts | 6 +- packages/context/src/ingest/reports.ts | 3 + .../stages/build-reconcile-context.test.ts | 51 + .../ingest/stages/build-reconcile-context.ts | 34 +- .../ingest/stages/build-wu-context.test.ts | 34 + .../src/ingest/stages/build-wu-context.ts | 30 +- .../tools/emit-unmapped-fallback.tool.ts | 2 +- .../tools/tool-transcript-summary.test.ts | 22 + .../ingest/tools/tool-transcript-summary.ts | 6 +- .../ingest/tools/verification-ledger.tool.ts | 97 + .../discover-data.tool.test.ts | 119 ++ .../discover-data.tool.ts | 142 ++ .../entity-details.tool.test.ts | 192 ++ .../entity-details.tool.ts | 170 ++ .../tools/warehouse-verification/index.ts | 34 + .../sql-execution.tool.test.ts | 54 + .../sql-execution.tool.ts | 102 ++ .../warehouse-catalog.service.test.ts | 196 ++ .../warehouse-catalog.service.ts | 452 +++++ .../src/ingest/wiki-sl-ref-repair.test.ts | 99 + .../context/src/ingest/wiki-sl-ref-repair.ts | 140 ++ packages/context/src/memory/local-memory.ts | 17 +- .../src/memory/memory-runtime-assets.test.ts | 77 + .../src/sl/tools/sl-warehouse-validation.ts | 2 +- packages/context/src/tools/tool-session.ts | 1 + packages/context/src/wiki/index.ts | 1 + .../src/wiki/knowledge-wiki.service.test.ts | 17 +- .../src/wiki/knowledge-wiki.service.ts | 12 - .../context/src/wiki/local-knowledge.test.ts | 26 + packages/context/src/wiki/local-knowledge.ts | 14 - packages/context/src/wiki/ports.ts | 13 +- .../wiki/tools/wiki-list-tags.tool.test.ts | 34 +- .../src/wiki/tools/wiki-list-tags.tool.ts | 12 +- .../context/src/wiki/tools/wiki-write.tool.ts | 1 + packages/context/src/wiki/types.ts | 1 + python/ktx-sl/semantic_layer/generator.py | 6 + .../tests/test_corner_case_regressions.py | 31 + 89 files changed, 7818 insertions(+), 191 deletions(-) create mode 100644 docs/superpowers/plans/2026-05-12-notion-warehouse-verification-gap-closure.md create mode 100644 docs/superpowers/plans/2026-05-12-warehouse-verification-final-v1-closure.md create mode 100644 docs/superpowers/plans/2026-05-12-warehouse-verification-tools.md create mode 100644 docs/superpowers/plans/2026-05-13-warehouse-verification-prompt-shape-closure.md create mode 100644 docs/superpowers/plans/2026-05-13-warehouse-verification-sql-example-closure.md create mode 100644 docs/superpowers/plans/2026-05-13-warehouse-verification-structured-target-miss-closure.md create mode 100644 docs/superpowers/specs/2026-05-12-notion-ingestion-warehouse-verification-design.md create mode 100644 packages/cli/src/ingest-query-executor.test.ts create mode 100644 packages/cli/src/ingest-query-executor.ts create mode 100644 packages/context/skills/_shared/identifier-verification.md create mode 100644 packages/context/src/connections/dialects.test.ts create mode 100644 packages/context/src/connections/dialects.ts create mode 100644 packages/context/src/ingest/tools/verification-ledger.tool.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/index.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts create mode 100644 packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts create mode 100644 packages/context/src/ingest/wiki-sl-ref-repair.test.ts create mode 100644 packages/context/src/ingest/wiki-sl-ref-repair.ts diff --git a/docs/superpowers/plans/2026-05-12-notion-warehouse-verification-gap-closure.md b/docs/superpowers/plans/2026-05-12-notion-warehouse-verification-gap-closure.md new file mode 100644 index 00000000..3cfdc843 --- /dev/null +++ b/docs/superpowers/plans/2026-05-12-notion-warehouse-verification-gap-closure.md @@ -0,0 +1,785 @@ +# Notion Warehouse Verification Gap Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1 gaps that prevent ingest agents, especially +Notion WorkUnits, from reliably verifying warehouse table and column +identifiers before writing wiki or semantic-layer output. + +**Architecture:** Keep the existing warehouse verification tool module and +runner wiring. Add Notion target-warehouse scoping through the local adapter +factory, make the active WorkUnit prompt name the shipped tools, enforce +`allowedConnectionNames` in `discover_data`, and teach `entity_details` to +resolve and reject column-level display targets. + +**Tech Stack:** TypeScript, Node 22, Vitest, AI SDK v6 tools, Zod, KTX local +ingest adapters, KTX file store. + +--- + +## Audit summary + +The previous implementation plan landed the main tool module and prompt +protocol, but four v1-blocking gaps remain: + +- Notion ingest sessions still allow only the Notion connection unless a + specific adapter supplies target IDs. `NotionSourceAdapter` does not supply + target warehouse IDs, so the original Notion hallucination case cannot use + `entity_details` or raw-schema `discover_data` for the warehouse connection. +- The active WorkUnit framing prompt still tells agents to call + `wiki_sl_search` and `sl_describe_table`, which are not shipped KTX tools. +- `discover_data` accepts an explicit out-of-scope `connectionName` and still + searches raw schema for that connection. +- `entity_details({ targets: [{ display: "schema.table.column" }] })` does not + resolve column display strings and does not fail explicit missing-column + targets. + +Non-blocking gaps remain out of scope for this plan: + +- Full DDL-style `entity_details` formatting with FK and profile summaries. +- AST-backed SQL read-only validation for data-modifying CTEs. +- Search over `enrichment/descriptions.json` for generated descriptions. +- Lexicographic latest-sync edge cases for non-timestamp sync IDs. +- Hard write-time validation in `wiki_write` and `emit_unmapped_fallback`. + +## File structure + +Modify these files: + +- `packages/context/src/ingest/adapters/notion/notion.adapter.ts`: add + configured target warehouse IDs and implement `listTargetConnectionIds()`. +- `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`: cover + Notion target connection ID fan-out. +- `packages/context/src/ingest/local-adapters.ts`: pass primary warehouse IDs + into `NotionSourceAdapter`. +- `packages/context/src/ingest/local-adapters.test.ts`: cover local Notion + adapter target IDs. +- `packages/context/src/ingest/adapters/notion/chunk.ts`: update Notion + WorkUnit notes to prefer the warehouse verification tools. +- `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`: update + Notion note expectations. +- `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`: replace + stale tool names in the active WorkUnit prompt. +- `packages/context/src/ingest/ingest-prompts.test.ts`: guard the WorkUnit + prompt against stale tool names. +- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`: + refuse explicit out-of-scope connection names. +- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`: + cover `discover_data` scoping. +- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`: + add column-aware display-target resolution. +- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts`: + cover column display resolution. +- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`: + use column-aware resolution and report missing columns. +- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`: + cover column display and missing-column behavior. + +### Task 1: Give Notion ingest access to target warehouses + +**Files:** +- Modify: `packages/context/src/ingest/adapters/notion/notion.adapter.ts` +- Modify: `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts` +- Modify: `packages/context/src/ingest/local-adapters.ts` +- Modify: `packages/context/src/ingest/local-adapters.test.ts` + +- [ ] **Step 1: Write the failing Notion adapter test** + +Add this test inside `describe('NotionSourceAdapter', ...)` in +`packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`: + +```ts +it('returns configured target warehouse connection ids', async () => { + const adapter = new NotionSourceAdapter({ + targetConnectionIds: ['warehouse', 'warehouse', 'analytics'], + }); + + await expect(adapter.listTargetConnectionIds?.(stagedDir)).resolves.toEqual([ + 'analytics', + 'warehouse', + ]); +}); +``` + +- [ ] **Step 2: Run the failing Notion adapter test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/adapters/notion/notion.adapter.test.ts -t "target warehouse connection ids" +``` + +Expected: FAIL because `NotionSourceAdapterDeps` has no +`targetConnectionIds` option and `NotionSourceAdapter` does not implement +`listTargetConnectionIds()`. + +- [ ] **Step 3: Implement Notion target connection IDs** + +Modify `packages/context/src/ingest/adapters/notion/notion.adapter.ts`: + +```ts +export interface NotionSourceAdapterDeps { + onPullSucceeded?: (ctx: NotionPullSucceededContext) => Promise; + logger?: NotionFetchLogger; + targetConnectionIds?: string[]; +} + +function uniqueSorted(values: readonly string[] | undefined): string[] { + return [...new Set(values ?? [])].sort((left, right) => + left.localeCompare(right), + ); +} +``` + +Add this method to `NotionSourceAdapter`: + +```ts + async listTargetConnectionIds(_stagedDir: string): Promise { + return uniqueSorted(this.deps.targetConnectionIds); + } +``` + +- [ ] **Step 4: Pass primary warehouses into the local Notion adapter** + +Modify the Notion adapter construction in +`packages/context/src/ingest/local-adapters.ts`: + +```ts + new NotionSourceAdapter({ + targetConnectionIds: primaryWarehouseConnectionIds(project), + ...(options.logger ? { logger: options.logger } : {}), + }), +``` + +- [ ] **Step 5: Write the local adapter fan-out test** + +Add this test to `packages/context/src/ingest/local-adapters.test.ts`: + +```ts +it('passes primary warehouse connection ids to the local Notion adapter', async () => { + const adapters = createDefaultLocalIngestAdapters( + projectWithConnections({ + notion: { + driver: 'notion', + auth_token: 'secret', + crawl_mode: 'selected_roots', + root_page_ids: ['page-1'], + }, + warehouse: { + driver: 'postgres', + url: 'postgresql://readonly@db.example.test/analytics', + }, + docs: { + driver: 'dbt', + source_dir: './dbt', + }, + } as never), + ); + + const notion = adapters.find((adapter) => adapter.source === 'notion'); + + await expect(notion?.listTargetConnectionIds?.('/tmp/staged-notion')).resolves.toEqual([ + 'warehouse', + ]); +}); +``` + +- [ ] **Step 6: Run the Notion target tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/adapters/notion/notion.adapter.test.ts -t "target warehouse connection ids" \ + src/ingest/local-adapters.test.ts -t "local Notion adapter" +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +Run: + +```bash +git add \ + packages/context/src/ingest/adapters/notion/notion.adapter.ts \ + packages/context/src/ingest/adapters/notion/notion.adapter.test.ts \ + packages/context/src/ingest/local-adapters.ts \ + packages/context/src/ingest/local-adapters.test.ts +git commit -m "fix(context): expose target warehouses to Notion ingest" +``` + +### Task 2: Remove stale tool names from active ingest prompts + +**Files:** +- Modify: `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md` +- Modify: `packages/context/src/ingest/ingest-prompts.test.ts` +- Modify: `packages/context/src/ingest/adapters/notion/chunk.ts` +- Modify: `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts` + +- [ ] **Step 1: Add failing prompt guards** + +Add this test to `packages/context/src/ingest/ingest-prompts.test.ts`: + +```ts +it('uses shipped warehouse verification tools in the WorkUnit prompt', async () => { + const prompt = await readFile( + new URL('../../prompts/memory_agent_bundle_ingest_work_unit.md', import.meta.url), + 'utf-8', + ); + + expect(prompt).toContain('discover_data'); + expect(prompt).toContain('entity_details'); + expect(prompt).not.toContain('wiki_sl_search'); + expect(prompt).not.toContain('sl_describe_table'); +}); +``` + +- [ ] **Step 2: Run the failing prompt guard** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-prompts.test.ts -t "warehouse verification tools" +``` + +Expected: FAIL because the WorkUnit prompt still contains `wiki_sl_search` and +`sl_describe_table`. + +- [ ] **Step 3: Update the WorkUnit framing prompt** + +In `packages/context/prompts/memory_agent_bundle_ingest_work_unit.md`, replace +the first `` paragraph with: + +```md +You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, Notion pages, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and discoverable with `discover_data`. +``` + +In workflow step 2, replace the final sentence with: + +```md +The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping. +``` + +In workflow step 4, replace the sentence that starts +`For each raw file:` with: + +```md +4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `discover_data` for each candidate source, table, metric, or topic name to find prior-WU writes, existing wiki pages, SL sources, and raw warehouse matches; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip. +``` + +In the `` block, replace the physical-column rule with: + +```md +- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`discover_data`, `sl_discover`, `entity_details`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source. +``` + +- [ ] **Step 4: Update Notion WorkUnit notes** + +In `packages/context/src/ingest/adapters/notion/chunk.ts`, replace +`NOTION_SL_WRITE_GUIDANCE` with: + +```ts +const NOTION_SL_WRITE_GUIDANCE = + 'Write wiki entries with wiki_write. Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview. Search existing wiki pages, SL sources, and raw warehouse schema for the same tables or sl_refs with discover_data before creating a new page. Only write or edit SL sources after discover_data plus sl_discover/sl_read_source or entity_details confirms a mapped non-Notion target source; if no mapped target exists, emit_unmapped_fallback and keep the fact wiki-only. Notion dataSourceCount counts Notion databases/data sources only, not warehouse/dbt mappings. If a warehouse/dbt connection exists but the named table or source is absent, use reason no_physical_table rather than no_connection_mapping. Do not create SL sources under the Notion connection just because a page mentions a warehouse table.'; +``` + +In the `reconcileNotes` array in the same file, replace: + +```ts + 'Notion dataSourceCount is Notion-only; use sl_discover for warehouse/dbt mapping decisions.', +``` + +with: + +```ts + 'Notion dataSourceCount is Notion-only; use discover_data/entity_details for warehouse/dbt mapping decisions.', +``` + +- [ ] **Step 5: Update Notion note expectations** + +In `packages/context/src/ingest/adapters/notion/notion.adapter.test.ts`, +update the note expectations in `it('chunks changed Notion pages...')`: + +```ts +expect(result.workUnits[0].notes).toContain('discover_data'); +expect(result.workUnits[0].notes).toContain('entity_details'); +``` + +Update the exact `reconcileNotes` expectation to: + +```ts +expect(result.reconcileNotes).toEqual([ + 'Notion maxKnowledgeCreatesPerRun=25', + 'Notion maxKnowledgeUpdatesPerRun=20', + 'Notion dataSourceCount is Notion-only; use discover_data/entity_details for warehouse/dbt mapping decisions.', + 'Reconcile Notion wiki pages sharing tables/sl_refs before creating distinct artifacts.', +]); +``` + +- [ ] **Step 6: Run prompt and Notion note tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/ingest-prompts.test.ts \ + src/ingest/adapters/notion/notion.adapter.test.ts +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +Run: + +```bash +git add \ + packages/context/prompts/memory_agent_bundle_ingest_work_unit.md \ + packages/context/src/ingest/ingest-prompts.test.ts \ + packages/context/src/ingest/adapters/notion/chunk.ts \ + packages/context/src/ingest/adapters/notion/notion.adapter.test.ts +git commit -m "fix(context): update ingest prompts for warehouse verification tools" +``` + +### Task 3: Enforce allowed connection scope in discover_data + +**Files:** +- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts` +- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts` + +- [ ] **Step 1: Write the failing scoping test** + +Add this test to +`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`: + +```ts +it('refuses explicit out-of-scope connection names', async () => { + const result = await tool.call({ query: 'orders', connectionName: 'billing' }, context); + + expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.'); + expect(result.structured).toEqual({ wiki: null, sl: null, raw: null }); + expect(wikiSearchTool.call).not.toHaveBeenCalled(); + expect(slDiscoverTool.call).not.toHaveBeenCalled(); + expect(catalog.searchByName).not.toHaveBeenCalled(); +}); +``` + +- [ ] **Step 2: Run the failing scoping test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts -t "out-of-scope" +``` + +Expected: FAIL because `discover_data` currently searches raw schema for an +explicit `connectionName` even when it is not in `allowedConnectionNames`. + +- [ ] **Step 3: Add the scope guard** + +In +`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`, +add this helper near `totalSources()`: + +```ts +function allowedConnectionNames(context: ToolContext): ReadonlySet | null { + return context.session?.allowedConnectionNames ?? null; +} +``` + +At the top of `DiscoverDataTool.call()`, before the `sourceName` branch and +before calling any child tool, add: + +```ts + const allowed = allowedConnectionNames(context); + if (input.connectionName && allowed && !allowed.has(input.connectionName)) { + return { + markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`, + structured: { wiki: null, sl: null, raw: null }, + }; + } +``` + +Then replace the raw connection-list construction with: + +```ts + const connections = input.connectionName ? [input.connectionName] : [...(allowed ?? [])].sort(); +``` + +- [ ] **Step 4: Run discover_data tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +Run: + +```bash +git add \ + packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \ + packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts +git commit -m "fix(context): scope raw schema discovery to allowed connections" +``` + +### Task 4: Fix column-level entity_details verification + +**Files:** +- Modify: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts` +- Modify: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts` +- Modify: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts` +- Modify: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts` + +- [ ] **Step 1: Write failing catalog column-target tests** + +First update `seedLiveDatabaseScan()` in that test file so BigQuery tables have +a project/catalog. Replace the repeated inline table refs with: + +```ts +const tableRef = { + catalog: driver === 'bigquery' ? 'analytics' : null, + db: driver === 'sqlite' ? null : 'public', + name: 'orders', +}; +``` + +Use `tableRef.catalog`, `tableRef.db`, and `tableRef.name` for the seeded +table and profile table references. + +Then add these tests to +`packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts`: + +```ts +it('resolves postgres column display strings without treating the column as a table', async () => { + await seedLiveDatabaseScan(); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.resolveDisplayTarget('warehouse', 'public.orders.status')).resolves.toMatchObject({ + resolved: { catalog: null, db: 'public', name: 'orders', column: 'status' }, + candidates: [], + dialect: 'postgres', + }); +}); + +it('resolves BigQuery column display strings with four parts', async () => { + await seedLiveDatabaseScan('warehouse', 'sync-bigquery', 'bigquery'); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.resolveDisplayTarget('warehouse', 'analytics.public.orders.status')).resolves.toMatchObject({ + resolved: { catalog: 'analytics', db: 'public', name: 'orders', column: 'status' }, + candidates: [], + dialect: 'bigquery', + }); +}); +``` + +- [ ] **Step 2: Run the failing catalog tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts -t "column display" +``` + +Expected: FAIL because `resolveDisplayTarget()` does not exist. + +- [ ] **Step 3: Implement column-aware display resolution** + +In +`packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`, +add this exported interface near `RawSchemaHit`: + +```ts +export interface DisplayTargetResolution { + resolved: (KtxTableRef & { column?: string }) | null; + candidates: KtxTableRef[]; + dialect: string; +} +``` + +Add these helpers near `parseDisplay()`: + +```ts +function expectedDisplayPartCount(driver: CatalogDriver): number { + if (driver === 'sqlite' || driver === 'sqlite3') { + return 1; + } + if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') { + return 3; + } + return 2; +} + +function parseColumnDisplay(driver: CatalogDriver, display: string): (KtxTableRef & { column: string }) | null { + const parts = splitDisplay(display); + const tablePartCount = expectedDisplayPartCount(driver); + if (parts.length !== tablePartCount + 1) { + return null; + } + const column = parts.at(-1); + if (!column) { + return null; + } + const table = parseDisplay(driver, parts.slice(0, -1).join('.')); + return table ? { ...table, column } : null; +} +``` + +Add this method to `WarehouseCatalogService` after `resolveDisplay()`: + +```ts + async resolveDisplayTarget(connectionName: string, display: string): Promise { + const catalog = await this.loadCatalog(connectionName); + if (!catalog) { + return { resolved: null, candidates: [], dialect: 'unknown' }; + } + + const dialect = getDialectForDriver(catalog.driver).type; + const tableResolution = await this.resolveDisplay(connectionName, display); + if (tableResolution.resolved) { + return tableResolution; + } + + const parsedColumn = parseColumnDisplay(catalog.driver, display); + if (!parsedColumn) { + return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect }; + } + + const table = catalog.tables.find((candidate) => refsEqual(candidate, parsedColumn)); + if (!table) { + return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect }; + } + + return { + resolved: { + catalog: table.catalog, + db: table.db, + name: table.name, + column: parsedColumn.column, + }, + candidates: [], + dialect, + }; + } +``` + +- [ ] **Step 4: Write failing entity_details column tests** + +Add these tests to +`packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`: + +```ts +it('resolves display targets that include a column name', async () => { + const result = await tool.call( + { connectionName: 'warehouse', targets: [{ display: 'public.orders.status' }] }, + context, + ); + + expect(result.markdown).toContain('### public.orders'); + expect(result.markdown).toContain('- status (text, nullable=false)'); + expect(result.markdown).not.toContain('- id (integer'); + expect(result.structured.resolved).toHaveLength(1); + expect(result.structured.resolved[0]?.columns.map((column) => column.name)).toEqual(['status']); +}); + +it('reports missing explicit columns instead of returning an empty column list', async () => { + const result = await tool.call( + { connectionName: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] }, + context, + ); + + expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier'); + expect(result.markdown).toContain('Available columns: id, status'); + expect(result.structured.resolved).toHaveLength(0); + expect(result.structured.missing).toHaveLength(1); +}); +``` + +- [ ] **Step 5: Run the failing entity_details tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/entity-details.tool.test.ts -t "column" +``` + +Expected: FAIL because display column targets are treated as table names and +missing columns are not reported. + +- [ ] **Step 6: Use column-aware resolution in entity_details** + +In +`packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`, +add this helper near `appendTableMarkdown()`: + +```ts +function findColumn(detail: TableDetail, columnName: string): TableDetail['columns'][number] | null { + const normalized = columnName.toLowerCase(); + return detail.columns.find((column) => column.name.toLowerCase() === normalized) ?? null; +} +``` + +Replace the display resolution block inside the `for (const target of +input.targets)` loop with: + +```ts + const resolution = + 'display' in target + ? await catalog.resolveDisplayTarget(input.connectionName, target.display) + : { + resolved: { catalog: target.catalog, db: target.db, name: target.name, column: target.column }, + candidates: [], + dialect: '', + }; +``` + +After `const detail = await catalog.getTable(...)`, replace the existing +`resolved.push(detail); appendTableMarkdown(...)` lines with: + +```ts + const requestedColumn = resolution.resolved.column; + if (requestedColumn) { + const column = findColumn(detail, requestedColumn); + if (!column) { + missing.push({ + target, + candidates: [{ catalog: detail.catalog, db: detail.db, name: detail.name }], + }); + parts.push(`Column not found in scan: ${detail.display}.${requestedColumn}`); + parts.push(`Available columns: ${detail.columns.map((candidate) => candidate.name).join(', ')}`); + continue; + } + const scopedDetail = { ...detail, columns: [column] }; + resolved.push(scopedDetail); + appendTableMarkdown(parts, scopedDetail, column.name); + continue; + } + + resolved.push(detail); + appendTableMarkdown(parts, detail); +``` + +- [ ] **Step 7: Run warehouse verification tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \ + src/ingest/tools/warehouse-verification/entity-details.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 8: Commit** + +Run: + +```bash +git add \ + packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \ + packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \ + packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts \ + packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts +git commit -m "fix(context): verify warehouse column display targets" +``` + +### Task 5: Verify the v1 gap closure + +**Files:** +- Verify all files changed by Tasks 1-4. + +- [ ] **Step 1: Run focused tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/adapters/notion/notion.adapter.test.ts \ + src/ingest/local-adapters.test.ts \ + src/ingest/ingest-prompts.test.ts \ + src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \ + src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \ + src/ingest/tools/warehouse-verification/entity-details.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run package type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run package tests** + +Run: + +```bash +pnpm --filter @ktx/context run test +``` + +Expected: PASS. + +- [ ] **Step 4: Run pre-commit on changed files when configured** + +Run: + +```bash +uv run pre-commit run --files \ + packages/context/src/ingest/adapters/notion/notion.adapter.ts \ + packages/context/src/ingest/adapters/notion/notion.adapter.test.ts \ + packages/context/src/ingest/local-adapters.ts \ + packages/context/src/ingest/local-adapters.test.ts \ + packages/context/src/ingest/adapters/notion/chunk.ts \ + packages/context/prompts/memory_agent_bundle_ingest_work_unit.md \ + packages/context/src/ingest/ingest-prompts.test.ts \ + packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \ + packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \ + packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \ + packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \ + packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts \ + packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts +``` + +Expected: PASS. If the repo has no pre-commit config or the local `uv` version +cannot satisfy the project pin, record the exact error and rely on focused +tests plus type-check. + +- [ ] **Step 5: Inspect final git status** + +Run: + +```bash +git status --short +``` + +Expected: only intentional files are modified. Commit any formatter-driven +changes with: + +```bash +git add packages/context +git commit -m "chore(context): verify warehouse verification v1 gaps" +``` + +## Self-review checklist + +- Spec coverage: this plan closes the remaining v1 paths for Notion warehouse + verification, active WorkUnit prompt correctness, raw discovery scoping, and + column-level identifier verification. +- Placeholder scan: no task relies on future-work markers, unnamed edge-case + handling, or cross-task shorthand. +- Type consistency: `discover_data` continues to use `connectionName`, + `sl_discover` still receives `connectionId` internally, and + `resolveDisplayTarget()` returns the same table identity plus optional + `column`. diff --git a/docs/superpowers/plans/2026-05-12-warehouse-verification-final-v1-closure.md b/docs/superpowers/plans/2026-05-12-warehouse-verification-final-v1-closure.md new file mode 100644 index 00000000..f48fea36 --- /dev/null +++ b/docs/superpowers/plans/2026-05-12-warehouse-verification-final-v1-closure.md @@ -0,0 +1,957 @@ +# Warehouse Verification Final V1 Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1 gaps that still prevent ingest agents from +reliably following warehouse verification results through to `entity_details` +and `sql_execution`. + +**Architecture:** Keep the existing warehouse verification module and runner +session scoping. Add connection names to raw discovery hits, expose primary +warehouse targets from the remaining source adapters, and make local ingest +SQL probes use the same scan connector read-only execution path as schema scan. + +**Tech Stack:** TypeScript, Node 22, Vitest, AI SDK v6 tools, Zod, KTX local +ingest runtime, KTX scan connectors. + +--- + +## Audit summary + +The first two implementation plans landed the warehouse verification tools, +prompt protocol, Notion warehouse scoping, and stale prompt-name cleanup. The +focused audit on May 12, 2026, found three remaining v1-blocking gaps: + +- `discover_data` searches multiple allowed raw warehouse scans, but raw hits do + not carry or render `connectionName`. The tool tells the agent to call + `entity_details({connectionName, targets: [...]})`, then omits the required + `connectionName` from the follow-up evidence. +- Local LookML and MetricFlow adapters do not expose primary warehouse target + IDs. The runner only adds adapter-provided targets to `allowedConnectionNames`, + so those WorkUnits cannot use raw warehouse verification unless their source + connection is itself the warehouse. +- `sql_execution` calls the local ingest connection catalog, but the catalog + either has no query executor in normal CLI ingest or calls an injected + executor without `projectDir` and connection config. The default local query + executor cannot dispatch without that config. + +Non-blocking gaps remain out of scope for this v1 plan: + +- Full DDL-style `entity_details` formatting with FK profile summaries. +- AST-backed SQL read-only validation for data-modifying CTE bodies. +- Search over generated `enrichment/descriptions.json`. +- Lexicographic latest-sync edge cases for non-timestamp sync IDs. +- Hard write-time validation in `wiki_write` and `emit_unmapped_fallback`. + +## File structure + +Modify these files: + +- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`: + add `connectionName` to raw schema hit records. +- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`: + render raw hit connection names and preserve them in structured output. +- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`: + cover multi-connection raw discovery follow-up data. +- `packages/context/src/ingest/adapters/lookml/lookml.adapter.ts`: + accept and return configured target warehouse connection IDs. +- `packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts`: + cover LookML target warehouse IDs. +- `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts`: + accept and return configured target warehouse connection IDs. +- `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts`: + cover MetricFlow target warehouse IDs. +- `packages/context/src/ingest/local-adapters.ts`: + pass primary warehouse IDs into LookML and MetricFlow adapters. +- `packages/context/src/ingest/local-adapters.test.ts`: + cover local adapter warehouse target fan-out. +- `packages/context/src/ingest/local-bundle-runtime.ts`: + pass full project connection config to local ingest query executors. +- `packages/context/src/ingest/local-bundle-runtime.test.ts`: + cover the local ingest query executor call shape. +- `packages/context/src/ingest/local-ingest.ts`: + use the shared query executor port type. +- `packages/context/src/mcp/local-project-ports.ts`: + no behavior change expected, but type-checks against the updated local ingest + query executor type. +- `packages/cli/src/ingest.ts`: + provide a read-only scan-connector-backed query executor for normal local + ingest runs. + +Create these files: + +- `packages/cli/src/ingest-query-executor.ts`: CLI query executor that adapts + scan connectors' `executeReadOnly()` method to `KtxSqlQueryExecutorPort`. +- `packages/cli/src/ingest-query-executor.test.ts`: unit coverage for the CLI + ingest query executor. + +### Task 1: Preserve raw discovery connection names + +**Files:** +- Modify: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts` +- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts` +- Modify: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts` + +- [ ] **Step 1: Write the failing multi-connection discovery test** + +Add this test to +`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`: + +```ts + it('includes connectionName on raw schema hits so entity_details can follow up', async () => { + const multiConnectionContext: ToolContext = { + ...context, + session: { allowedConnectionNames: new Set(['warehouse', 'analytics']) } as any, + }; + catalog.searchByName.mockImplementation(async (connectionName: string, query: string) => [ + { + kind: 'table', + connectionName, + ref: { catalog: null, db: 'public', name: `${connectionName}_${query}` }, + display: `public.${connectionName}_${query}`, + matchedOn: 'name', + }, + ]); + + const result = await tool.call({ query: 'orders', limit: 10 }, multiConnectionContext); + + expect(catalog.searchByName).toHaveBeenCalledWith('analytics', 'orders', 10); + expect(catalog.searchByName).toHaveBeenCalledWith('warehouse', 'orders', 10); + expect(result.markdown).toContain('connectionName=analytics'); + expect(result.markdown).toContain('connectionName=warehouse'); + expect(result.markdown).toContain( + 'entity_details({connectionName: "analytics", targets: [{display: "public.analytics_orders"}]})', + ); + expect(result.structured.raw?.hits.map((hit) => hit.connectionName)).toEqual([ + 'analytics', + 'warehouse', + ]); + }); +``` + +- [ ] **Step 2: Run the failing discovery test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts -t "connectionName on raw schema hits" +``` + +Expected: FAIL because `RawSchemaHit` has no `connectionName` property and the +markdown only renders the display string. + +- [ ] **Step 3: Add `connectionName` to raw schema hits** + +Modify the raw hit type and hit construction in +`packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`: + +```ts +export type RawSchemaHit = + | { + kind: 'table'; + connectionName: string; + ref: KtxTableRef; + display: string; + matchedOn: 'name' | 'db' | 'comment' | 'description'; + } + | { + kind: 'column'; + connectionName: string; + ref: KtxTableRef & { column: string }; + display: string; + matchedOn: 'name' | 'comment' | 'description'; + }; +``` + +In the table hit block, add `connectionName`: + +```ts + hits.push({ + kind: 'table', + connectionName, + ref: { catalog: table.catalog, db: table.db, name: table.name }, + display: formatDisplay(catalog.driver, table), + matchedOn: tableMatch, + }); +``` + +In the column hit block, add `connectionName`: + +```ts + hits.push({ + kind: 'column', + connectionName, + ref: { catalog: table.catalog, db: table.db, name: table.name, column: column.name }, + display: `${formatDisplay(catalog.driver, table)}.${column.name}`, + matchedOn: columnMatch, + }); +``` + +- [ ] **Step 4: Render follow-up-ready raw hits** + +Modify the raw schema markdown in +`packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`: + +```ts + parts.push('## Raw Warehouse Schema', '> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values'); + parts.push( + rawHits + .slice(0, limit) + .map( + (hit) => + `- ${hit.kind}: ${hit.display} [connectionName=${hit.connectionName}] (matched on ${hit.matchedOn}) — ` + + `follow up with \`entity_details({connectionName: "${hit.connectionName}", targets: [{display: "${hit.display}"}]})\``, + ) + .join('\n'), + ); +``` + +- [ ] **Step 5: Run the discovery test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +Run: + +```bash +git add \ + packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \ + packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \ + packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts +git commit -m "fix(context): include raw discovery connection names" +``` + +### Task 2: Expose LookML and MetricFlow warehouse targets + +**Files:** +- Modify: `packages/context/src/ingest/adapters/lookml/lookml.adapter.ts` +- Modify: `packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts` +- Modify: `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts` +- Modify: `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts` +- Modify: `packages/context/src/ingest/local-adapters.ts` +- Modify: `packages/context/src/ingest/local-adapters.test.ts` + +- [ ] **Step 1: Write failing adapter target tests** + +Add this test to +`packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts`: + +```ts + it('returns configured target warehouse connection ids', async () => { + const adapter = new LookmlSourceAdapter({ + homeDir: join(tmpRoot, 'home'), + targetConnectionIds: ['warehouse', 'analytics', 'warehouse'], + }); + + await expect(adapter.listTargetConnectionIds?.(join(tmpRoot, 'staged'))).resolves.toEqual([ + 'analytics', + 'warehouse', + ]); + }); +``` + +Add this test to +`packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts`: + +```ts + it('returns configured target warehouse connection ids', async () => { + const metricflow = new MetricflowSourceAdapter({ + homeDir: join(tmpRoot, 'cache-home'), + targetConnectionIds: ['warehouse', 'analytics', 'warehouse'], + }); + + await expect(metricflow.listTargetConnectionIds?.(stagedDir)).resolves.toEqual([ + 'analytics', + 'warehouse', + ]); + }); +``` + +- [ ] **Step 2: Run the failing adapter tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/adapters/lookml/lookml.adapter.test.ts -t "target warehouse connection ids" \ + src/ingest/adapters/metricflow/metricflow.adapter.test.ts -t "target warehouse connection ids" +``` + +Expected: FAIL because neither adapter accepts `targetConnectionIds` or +implements `listTargetConnectionIds()`. + +- [ ] **Step 3: Implement target ID support in LookML** + +Modify `packages/context/src/ingest/adapters/lookml/lookml.adapter.ts`: + +```ts +export interface LookmlSourceAdapterDeps { + homeDir: string; + targetConnectionIds?: string[]; +} + +function uniqueSorted(values: readonly string[] | undefined): string[] { + return [...new Set(values ?? [])].sort((left, right) => left.localeCompare(right)); +} +``` + +Add this method to `LookmlSourceAdapter`: + +```ts + async listTargetConnectionIds(_stagedDir: string): Promise { + return uniqueSorted(this.deps.targetConnectionIds); + } +``` + +- [ ] **Step 4: Implement target ID support in MetricFlow** + +Modify `packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts`: + +```ts +export interface MetricflowSourceAdapterDeps { + homeDir: string; + targetConnectionIds?: string[]; +} + +function uniqueSorted(values: readonly string[] | undefined): string[] { + return [...new Set(values ?? [])].sort((left, right) => left.localeCompare(right)); +} +``` + +Add this method to `MetricflowSourceAdapter`: + +```ts + async listTargetConnectionIds(_stagedDir: string): Promise { + return uniqueSorted(this.deps.targetConnectionIds); + } +``` + +- [ ] **Step 5: Pass primary warehouses from the local adapter factory** + +Modify the LookML and MetricFlow adapter construction in +`packages/context/src/ingest/local-adapters.ts`: + +```ts + new LookmlSourceAdapter({ + homeDir: join(project.projectDir, '.ktx/cache'), + targetConnectionIds: primaryWarehouseConnectionIds(project), + }), +``` + +```ts + new MetricflowSourceAdapter({ + homeDir: join(project.projectDir, '.ktx/cache'), + targetConnectionIds: primaryWarehouseConnectionIds(project), + }), +``` + +- [ ] **Step 6: Write the local adapter fan-out test** + +Add this test to `packages/context/src/ingest/local-adapters.test.ts`: + +```ts + it('passes primary warehouse connection ids to local LookML and MetricFlow adapters', async () => { + const adapters = createDefaultLocalIngestAdapters( + projectWithConnections({ + warehouse: { + driver: 'postgres', + url: 'postgresql://readonly@db.example.test/analytics', + }, + lookml_docs: { + driver: 'lookml', + lookml: { + repoUrl: 'https://github.com/acme/lookml.git', + }, + }, + metrics_repo: { + driver: 'metricflow', + metricflow: { + repoUrl: 'https://github.com/acme/metrics.git', + }, + }, + } as never), + ); + + const lookml = adapters.find((adapter) => adapter.source === 'lookml'); + const metricflow = adapters.find((adapter) => adapter.source === 'metricflow'); + + await expect(lookml?.listTargetConnectionIds?.('/tmp/staged-lookml')).resolves.toEqual([ + 'warehouse', + ]); + await expect(metricflow?.listTargetConnectionIds?.('/tmp/staged-metricflow')).resolves.toEqual([ + 'warehouse', + ]); + }); +``` + +- [ ] **Step 7: Run the target fan-out tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/adapters/lookml/lookml.adapter.test.ts \ + src/ingest/adapters/metricflow/metricflow.adapter.test.ts \ + src/ingest/local-adapters.test.ts +``` + +Expected: PASS. + +- [ ] **Step 8: Commit** + +Run: + +```bash +git add \ + packages/context/src/ingest/adapters/lookml/lookml.adapter.ts \ + packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts \ + packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts \ + packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts \ + packages/context/src/ingest/local-adapters.ts \ + packages/context/src/ingest/local-adapters.test.ts +git commit -m "fix(context): expose warehouse targets for LookML and MetricFlow" +``` + +### Task 3: Pass full connection config to local ingest SQL execution + +**Files:** +- Modify: `packages/context/src/ingest/local-bundle-runtime.ts` +- Modify: `packages/context/src/ingest/local-bundle-runtime.test.ts` +- Modify: `packages/context/src/ingest/local-ingest.ts` + +- [ ] **Step 1: Write the failing local connection catalog test** + +In `packages/context/src/ingest/local-bundle-runtime.test.ts`, change the +Vitest import to include `vi`: + +```ts +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +``` + +Extend `RuntimeWithConnectionDeps`: + +```ts +type RuntimeWithConnectionDeps = { + deps: { + connections: { + listEnabledConnections(ids: string[]): Promise>; + getConnectionById(connectionId: string): Promise<{ id: string; name: string; connectionType: string } | null>; + executeQuery(connectionId: string, sql: string): Promise; + }; + }; +}; +``` + +Add this test: + +```ts + it('passes project connection config to local ingest query executors', async () => { + const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any }); + const queryExecutor = { + execute: vi.fn(async () => ({ + headers: ['answer'], + rows: [[1]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + })), + }; + + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + agentRunner, + queryExecutor, + }); + const connections = (runtime.runner as unknown as RuntimeWithConnectionDeps).deps.connections; + + await expect(connections.executeQuery('warehouse', 'select 1')).resolves.toMatchObject({ + headers: ['answer'], + }); + expect(queryExecutor.execute).toHaveBeenCalledWith({ + connectionId: 'warehouse', + projectDir: project.projectDir, + connection: project.config.connections.warehouse, + sql: 'select 1', + }); + }); +``` + +- [ ] **Step 2: Run the failing local runtime test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "project connection config" +``` + +Expected: FAIL because `LocalConnectionCatalog.executeQuery()` only passes +`connectionId` and `sql`. + +- [ ] **Step 3: Update local ingest query executor types** + +In `packages/context/src/ingest/local-bundle-runtime.ts`, import the shared +query executor type: + +```ts +import { localConnectionInfoFromConfig, type KtxSqlQueryExecutorPort } from '../connections/index.js'; +``` + +Change `CreateLocalBundleIngestRuntimeOptions.queryExecutor` to: + +```ts + queryExecutor?: KtxSqlQueryExecutorPort; +``` + +Change `LocalConnectionCatalog` to store that type: + +```ts +class LocalConnectionCatalog implements SlConnectionCatalogPort { + constructor( + private readonly project: KtxLocalProject, + private readonly queryExecutor?: KtxSqlQueryExecutorPort, + ) {} +``` + +Change `executeQuery()`: + +```ts + async executeQuery(connectionId: string, sql: string): Promise { + if (!this.queryExecutor) { + throw new Error('Local ingest has no query executor configured'); + } + return this.queryExecutor.execute({ + connectionId, + projectDir: this.project.projectDir, + connection: this.project.config.connections[connectionId], + sql, + }); + } +``` + +In `packages/context/src/ingest/local-ingest.ts`, replace the local query +executor object type with the shared port: + +```ts +import type { KtxSqlQueryExecutorPort } from '../connections/index.js'; +``` + +```ts + queryExecutor?: KtxSqlQueryExecutorPort; +``` + +- [ ] **Step 4: Run the local runtime test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts -t "project connection config" +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +Run: + +```bash +git add \ + packages/context/src/ingest/local-bundle-runtime.ts \ + packages/context/src/ingest/local-bundle-runtime.test.ts \ + packages/context/src/ingest/local-ingest.ts +git commit -m "fix(context): pass connection config to ingest query executors" +``` + +### Task 4: Supply a scan-connector query executor to CLI ingest + +**Files:** +- Create: `packages/cli/src/ingest-query-executor.ts` +- Create: `packages/cli/src/ingest-query-executor.test.ts` +- Modify: `packages/cli/src/ingest.ts` + +- [ ] **Step 1: Write the CLI query executor tests** + +Create `packages/cli/src/ingest-query-executor.test.ts`: + +```ts +import type { KtxLocalProject } from '@ktx/context/project'; +import { createKtxConnectorCapabilities, type KtxScanConnector } from '@ktx/context/scan'; +import { describe, expect, it, vi } from 'vitest'; +import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js'; + +function project(): KtxLocalProject { + return { + projectDir: '/tmp/ktx-query-project', + config: { + project: 'warehouse', + connections: { + warehouse: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' }, + }, + }, + } as unknown as KtxLocalProject; +} + +function connector(overrides: Partial = {}): KtxScanConnector { + return { + id: 'warehouse', + driver: 'postgres', + capabilities: createKtxConnectorCapabilities({ readOnlySql: true }), + async introspect() { + throw new Error('introspect is not used by this test'); + }, + executeReadOnly: vi.fn(async () => ({ + headers: ['answer'], + rows: [[1]], + totalRows: 1, + rowCount: 1, + })), + cleanup: vi.fn(async () => {}), + ...overrides, + }; +} + +describe('createKtxCliIngestQueryExecutor', () => { + it('executes read-only SQL through the scan connector and cleans it up', async () => { + const scanConnector = connector(); + const createConnector = vi.fn(async () => scanConnector); + const executor = createKtxCliIngestQueryExecutor(project(), { createConnector }); + + await expect( + executor.execute({ + connectionId: 'warehouse', + connection: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' }, + projectDir: '/tmp/ktx-query-project', + sql: 'select 1', + maxRows: 5, + }), + ).resolves.toMatchObject({ + headers: ['answer'], + rows: [[1]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + }); + + expect(createConnector).toHaveBeenCalledWith(project(), 'warehouse'); + expect(scanConnector.executeReadOnly).toHaveBeenCalledWith( + { connectionId: 'warehouse', sql: 'select 1', maxRows: 5 }, + { runId: 'ingest-sql-execution' }, + ); + expect(scanConnector.cleanup).toHaveBeenCalledTimes(1); + }); + + it('rejects connectors without read-only SQL support', async () => { + const scanConnector = connector({ + capabilities: createKtxConnectorCapabilities({ readOnlySql: false }), + executeReadOnly: undefined, + }); + const executor = createKtxCliIngestQueryExecutor(project(), { + createConnector: vi.fn(async () => scanConnector), + }); + + await expect( + executor.execute({ + connectionId: 'warehouse', + connection: { driver: 'postgres' }, + projectDir: '/tmp/ktx-query-project', + sql: 'select 1', + }), + ).rejects.toThrow('Connection "warehouse" driver "postgres" does not support read-only SQL execution.'); + expect(scanConnector.cleanup).toHaveBeenCalledTimes(1); + }); +}); +``` + +- [ ] **Step 2: Run the failing CLI query executor test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts +``` + +Expected: FAIL because `ingest-query-executor.ts` does not exist. + +- [ ] **Step 3: Add the scan-connector-backed query executor** + +Create `packages/cli/src/ingest-query-executor.ts`: + +```ts +import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutorPort } from '@ktx/context/connections'; +import type { KtxLocalProject } from '@ktx/context/project'; +import type { KtxScanConnector, KtxScanContext } from '@ktx/context/scan'; +import { createKtxCliScanConnector } from './local-scan-connectors.js'; + +type CreateConnector = typeof createKtxCliScanConnector; + +export interface KtxCliIngestQueryExecutorDeps { + createConnector?: CreateConnector; +} + +async function cleanupConnector(connector: KtxScanConnector | null): Promise { + await connector?.cleanup?.(); +} + +export function createKtxCliIngestQueryExecutor( + project: KtxLocalProject, + deps: KtxCliIngestQueryExecutorDeps = {}, +): KtxSqlQueryExecutorPort { + const createConnector = deps.createConnector ?? createKtxCliScanConnector; + return { + async execute(input: KtxSqlQueryExecutionInput) { + let connector: KtxScanConnector | null = null; + try { + connector = await createConnector(project, input.connectionId); + if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) { + throw new Error( + `Connection "${input.connectionId}" driver "${connector.driver}" does not support read-only SQL execution.`, + ); + } + + const ctx: KtxScanContext = { runId: 'ingest-sql-execution' }; + const result = await connector.executeReadOnly( + { connectionId: input.connectionId, sql: input.sql, maxRows: input.maxRows }, + ctx, + ); + return { + headers: result.headers, + rows: result.rows, + totalRows: result.totalRows, + command: 'SELECT', + rowCount: result.rowCount, + }; + } finally { + await cleanupConnector(connector); + } + }, + }; +} +``` + +- [ ] **Step 4: Wire the CLI executor into local ingest runs** + +In `packages/cli/src/ingest.ts`, import the executor and type: + +```ts +import type { KtxSqlQueryExecutorPort } from '@ktx/context/connections'; +import type { KtxLocalProject } from '@ktx/context/project'; +import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js'; +``` + +Extend `KtxIngestDeps`: + +```ts + createQueryExecutor?: (project: KtxLocalProject) => KtxSqlQueryExecutorPort; +``` + +Inside the `args.command === 'run'` branch, after `localIngestOptions` is +defined, add: + +```ts + const queryExecutor = + localIngestOptions.queryExecutor ?? + (deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(project); +``` + +Pass `queryExecutor` to both local ingest execution paths. In the Metabase +fan-out call: + +```ts + ...localIngestOptions, + queryExecutor, + trigger: 'manual_resync', +``` + +In the normal local ingest call: + +```ts + ...localIngestOptions, + queryExecutor, + pullConfigOptions: adapterOptions, +``` + +- [ ] **Step 5: Add CLI wiring coverage** + +Add this test to `packages/cli/src/ingest.test.ts`: + +```ts + it('supplies a scan-connector query executor to local ingest runs', async () => { + const io = makeIo(); + const projectDir = join(tempDir, 'query-executor-project'); + await writeWarehouseConfig(projectDir); + const queryExecutor = { + execute: vi.fn(async () => ({ + headers: [], + rows: [], + totalRows: 0, + command: 'SELECT', + rowCount: 0, + })), + }; + const runLocalIngest = vi.fn(async (input: RunLocalIngestOptions): Promise => + completedLocalBundleRun(input, 'query-executor-run'), + ); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + outputMode: 'json', + }, + io.io, + { + runLocalIngest, + createAdapters: () => [], + createQueryExecutor: () => queryExecutor, + }, + ), + ).resolves.toBe(0); + + expect(runLocalIngest).toHaveBeenCalledWith(expect.objectContaining({ queryExecutor })); + }); +``` + +- [ ] **Step 6: Run CLI query executor tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts -t "query executor" +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +Run: + +```bash +git add \ + packages/cli/src/ingest-query-executor.ts \ + packages/cli/src/ingest-query-executor.test.ts \ + packages/cli/src/ingest.ts \ + packages/cli/src/ingest.test.ts +git commit -m "fix(cli): enable read-only SQL probes for local ingest" +``` + +### Task 5: Final verification + +**Files:** +- Verify: all files changed by Tasks 1-4. + +- [ ] **Step 1: Run focused context tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \ + src/ingest/tools/warehouse-verification/entity-details.tool.test.ts \ + src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \ + src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts \ + src/ingest/local-bundle-runtime.test.ts \ + src/ingest/local-adapters.test.ts \ + src/ingest/adapters/lookml/lookml.adapter.test.ts \ + src/ingest/adapters/metricflow/metricflow.adapter.test.ts \ + src/ingest/ingest-bundle.runner.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run focused CLI tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Run type checks** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +pnpm --filter @ktx/cli run type-check +``` + +Expected: both commands pass. + +- [ ] **Step 4: Run pre-commit on changed files if configured** + +Run: + +```bash +uv run pre-commit run --files \ + packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \ + packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \ + packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \ + packages/context/src/ingest/adapters/lookml/lookml.adapter.ts \ + packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts \ + packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts \ + packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts \ + packages/context/src/ingest/local-adapters.ts \ + packages/context/src/ingest/local-adapters.test.ts \ + packages/context/src/ingest/local-bundle-runtime.ts \ + packages/context/src/ingest/local-bundle-runtime.test.ts \ + packages/context/src/ingest/local-ingest.ts \ + packages/cli/src/ingest-query-executor.ts \ + packages/cli/src/ingest-query-executor.test.ts \ + packages/cli/src/ingest.ts \ + packages/cli/src/ingest.test.ts \ + docs/superpowers/plans/2026-05-12-warehouse-verification-final-v1-closure.md +``` + +Expected: PASS. If the repository has no pre-commit config or the local `uv` +version cannot satisfy the configured toolchain, record the exact error and use +the focused test and type-check results as the closest verification. + +- [ ] **Step 5: Commit final verification fixes if any were needed** + +If verification required edits, run: + +```bash +git add +git commit -m "test: cover warehouse verification v1 closure" +``` + +If verification required no edits, do not create an empty commit. + +## Self-review + +Spec coverage: + +- Raw warehouse discovery still covers wiki, semantic-layer, and raw schema + results, and now raw hits include the connection name needed by the required + `entity_details` follow-up. +- Every local synthesis adapter with an external source connection now has a + path to target warehouse IDs: dbt and Notion already had it, Looker resolves + staged mappings, Metabase fan-out runs under target warehouse IDs, and this + plan adds LookML and MetricFlow. +- `sql_execution` remains scoped by `allowedConnectionNames`, retains the + read-only SQL wrapper, and gains a normal local ingest execution backend. + +Placeholder scan: + +- This plan contains no deferred implementation placeholders. +- Every code-changing step includes the exact test or implementation snippet to + add. + +Type consistency: + +- `connectionName` is added to `RawSchemaHit` and used by `DiscoverDataTool`. +- `targetConnectionIds` and `listTargetConnectionIds()` match the existing dbt + and Notion adapter pattern. +- Local ingest uses `KtxSqlQueryExecutorPort` consistently from CLI to context. diff --git a/docs/superpowers/plans/2026-05-12-warehouse-verification-tools.md b/docs/superpowers/plans/2026-05-12-warehouse-verification-tools.md new file mode 100644 index 00000000..42bb7f44 --- /dev/null +++ b/docs/superpowers/plans/2026-05-12-warehouse-verification-tools.md @@ -0,0 +1,1617 @@ +# Warehouse Verification Tools Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add synthesis-time warehouse verification tools so ingest agents can verify raw warehouse tables, columns, and sample values before writing wiki pages, SL sources, `tables:` frontmatter, `sl_refs`, or unmapped fallback records. + +**Architecture:** Add a raw scan catalog service over `raw-sources//live-database//`, three BaseTool-backed ingest tools, and runner/tool-session scoping for allowed warehouse connections. Register the tools in the local ingest toolset so both WorkUnit and reconcile stages receive them through the existing `toAiSdkTools()` path. + +**Tech Stack:** TypeScript, Node 22, Vitest, AI SDK v6 tools, Zod, KTX file store, KTX semantic layer and wiki tools. + +--- + +## Audit summary + +The current repo has the original spec file only; no matching plan or implementation exists under `docs/superpowers/plans`. The following v1-blocking gaps remain: + +- `packages/context/src/connections/dialects.ts` does not exist. +- `packages/context/src/ingest/tools/warehouse-verification/` does not exist. +- `entity_details`, `sql_execution`, and `discover_data` are not available to ingest WU or reconcile toolsets. +- `ToolSession` does not carry the ingest stage's allowed warehouse connection IDs. +- Prompt updates are absent from the 11 writer skills named in the spec. +- Cleanup strings remain: `orbit_analytics.customer`, `wiki_sl_search`, and `sl_describe_table`. +- Prompt-bundling and warehouse-tool tests are absent. + +Non-blocking gaps remain out of scope for this plan: + +- Hard write-time validation in `wiki_write` and `emit_unmapped_fallback`. +- `dictionary_search`. +- `semantic_query` in synthesis toolsets. +- A raw-schema FTS index. +- A UUID identity layer for tables and columns. + +One repo-specific adjustment is required: do not import `@ktx/connector-*` +dialect classes into `@ktx/context`, because every connector package already +depends on `@ktx/context`. Add a minimal context-local dialect dispatch instead. + +## File structure + +Create these files: + +- `packages/context/src/connections/dialects.ts`: Context-local driver dispatch for identifier quoting and display formatting. +- `packages/context/src/connections/dialects.test.ts`: Driver dispatch and display-format tests. +- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts`: Reads the latest live-database scan, resolves display identifiers, and searches table and column metadata. +- `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts`: Fixture-backed catalog tests. +- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`: `entity_details` ingest tool. +- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`: Tool contract tests. +- `packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts`: `sql_execution` ingest tool. +- `packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts`: Read-only SQL and output tests. +- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`: `discover_data` ingest tool composing wiki, SL, and raw-schema search. +- `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts`: Discovery composition tests. +- `packages/context/src/ingest/tools/warehouse-verification/index.ts`: Exports tool classes and `createWarehouseVerificationTools()`. +- `packages/context/skills/_shared/identifier-verification.md`: Shared protocol text kept in the tree for review even though writer skills inline it. + +Modify these files: + +- `packages/context/src/connections/index.ts`: Export the dialect helper. +- `packages/context/src/tools/tool-session.ts`: Add `allowedConnectionNames`. +- `packages/context/src/ingest/ingest-bundle.runner.ts`: Populate `allowedConnectionNames` for WU and reconcile sessions. +- `packages/context/src/ingest/local-bundle-runtime.ts`: Register the warehouse verification tools in `LocalIngestToolsetFactory`. +- `packages/context/src/ingest/ingest-bundle.runner.test.ts`: Assert the runner scopes allowed warehouse connections. +- `packages/context/src/memory/memory-runtime-assets.test.ts`: Assert writer skills contain the protocol and banned strings are gone. +- `packages/context/src/ingest/ingest-runtime-assets.test.ts`: Assert ingest skill packaging includes the protocol. +- `packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts`: Replace the fictional table example. +- `packages/context/src/sl/tools/sl-warehouse-validation.ts`: Replace the stale `sl_describe_table` hint. +- `packages/context/skills/*/SKILL.md`: Inline protocol updates for the writer skills listed in the spec. + +### Task 1: Add context-local dialect dispatch + +**Files:** +- Create: `packages/context/src/connections/dialects.ts` +- Create: `packages/context/src/connections/dialects.test.ts` +- Modify: `packages/context/src/connections/index.ts` + +- [ ] **Step 1: Write the failing dialect tests** + +Create `packages/context/src/connections/dialects.test.ts`: + +```ts +import { describe, expect, it } from 'vitest'; +import { getDialectForDriver } from './dialects.js'; + +describe('getDialectForDriver', () => { + it.each([ + ['postgres', '"public"."orders"'], + ['postgresql', '"public"."orders"'], + ['mysql', '`public`.`orders`'], + ['clickhouse', '`public`.`orders`'], + ['sqlite', '"orders"'], + ['snowflake', '"analytics"."public"."orders"'], + ['bigquery', '`analytics`.`public`.`orders`'], + ['sqlserver', '[analytics].[public].[orders]'], + ] as const)('formats table names for %s', (driver, expected) => { + const dialect = getDialectForDriver(driver); + expect( + dialect.formatTableName({ + catalog: driver === 'snowflake' || driver === 'bigquery' || driver === 'sqlserver' ? 'analytics' : null, + db: driver === 'sqlite' ? null : 'public', + name: 'orders', + }), + ).toBe(expected); + }); + + it('throws with a supported-driver list for unknown drivers', () => { + expect(() => getDialectForDriver('oracle')).toThrow( + 'Unsupported warehouse driver "oracle". Supported drivers: bigquery, clickhouse, mysql, postgres, postgresql, sqlite, sqlite3, snowflake, sqlserver', + ); + }); +}); +``` + +- [ ] **Step 2: Run the failing test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts +``` + +Expected: FAIL because `./dialects.js` does not exist. + +- [ ] **Step 3: Add the minimal dialect implementation** + +Create `packages/context/src/connections/dialects.ts`: + +```ts +import type { KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js'; + +export type SupportedDriver = + | 'postgres' + | 'postgresql' + | 'mysql' + | 'sqlserver' + | 'snowflake' + | 'bigquery' + | 'clickhouse' + | 'sqlite' + | 'sqlite3'; + +export interface KtxDialect { + readonly type: SupportedDriver; + quoteIdentifier(identifier: string): string; + formatTableName(table: KtxTableRef): string; + mapToDimensionType(nativeType: string): KtxSchemaDimensionType; +} + +const supportedDrivers: SupportedDriver[] = [ + 'bigquery', + 'clickhouse', + 'mysql', + 'postgres', + 'postgresql', + 'sqlite', + 'sqlite3', + 'snowflake', + 'sqlserver', +]; + +function doubleQuoted(identifier: string): string { + return `"${identifier.replace(/"/g, '""')}"`; +} + +function backtickQuoted(identifier: string): string { + return `\`${identifier.replace(/`/g, '``')}\``; +} + +function bigQueryQuoted(identifier: string): string { + return `\`${identifier.replace(/`/g, '\\`')}\``; +} + +function bracketQuoted(identifier: string): string { + return `[${identifier.replace(/\]/g, ']]')}]`; +} + +function inferDimensionType(nativeType: string): KtxSchemaDimensionType { + const normalized = nativeType.toLowerCase().trim(); + if (normalized.includes('date') || normalized.includes('time')) { + return 'time'; + } + if ( + normalized.includes('int') || + normalized.includes('num') || + normalized.includes('dec') || + normalized.includes('float') || + normalized.includes('double') || + normalized.includes('real') + ) { + return 'number'; + } + if (normalized.includes('bool') || normalized === 'bit') { + return 'boolean'; + } + return 'string'; +} + +function formatWithParts(table: KtxTableRef, quote: (identifier: string) => string, sqlite = false): string { + const parts = sqlite ? [table.name] : [table.catalog, table.db, table.name].filter((part): part is string => !!part); + return parts.map(quote).join('.'); +} + +function createDialect(type: SupportedDriver, quote: (identifier: string) => string, sqlite = false): KtxDialect { + return { + type, + quoteIdentifier: quote, + formatTableName: (table) => formatWithParts(table, quote, sqlite), + mapToDimensionType: inferDimensionType, + }; +} + +const dialects: Record = { + postgres: createDialect('postgres', doubleQuoted), + postgresql: createDialect('postgresql', doubleQuoted), + mysql: createDialect('mysql', backtickQuoted), + clickhouse: createDialect('clickhouse', backtickQuoted), + sqlite: createDialect('sqlite', doubleQuoted, true), + sqlite3: createDialect('sqlite3', doubleQuoted, true), + snowflake: createDialect('snowflake', doubleQuoted), + bigquery: createDialect('bigquery', bigQueryQuoted), + sqlserver: createDialect('sqlserver', bracketQuoted), +}; + +export function getDialectForDriver(driver: string): KtxDialect { + const normalized = driver.toLowerCase().trim(); + if (normalized in dialects) { + return dialects[normalized as SupportedDriver]; + } + throw new Error(`Unsupported warehouse driver "${driver}". Supported drivers: ${supportedDrivers.join(', ')}`); +} +``` + +Modify `packages/context/src/connections/index.ts`: + +```ts +export type { KtxDialect, SupportedDriver } from './dialects.js'; +export { getDialectForDriver } from './dialects.js'; +``` + +- [ ] **Step 4: Run the dialect tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +Run: + +```bash +git add packages/context/src/connections/dialects.ts packages/context/src/connections/dialects.test.ts packages/context/src/connections/index.ts +git commit -m "feat(context): add warehouse dialect dispatch" +``` + +### Task 2: Add the raw scan warehouse catalog service + +**Files:** +- Create: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts` +- Create: `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts` + +- [ ] **Step 1: Write failing catalog tests** + +Create `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts`: + +```ts +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initKtxProject, type KtxLocalProject } from '../../../project/index.js'; +import { WarehouseCatalogService } from './warehouse-catalog.service.js'; + +describe('WarehouseCatalogService', () => { + let tempDir: string; + let project: KtxLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-warehouse-catalog-')); + project = await initKtxProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-2', driver = 'postgres') { + const root = `raw-sources/${connectionName}/live-database/${syncId}`; + await project.fileStore.writeFile( + `${root}/connection.json`, + JSON.stringify({ connectionId: connectionName, driver, extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2), + 'ktx', + 'ktx@example.com', + 'seed connection', + ); + await project.fileStore.writeFile( + `${root}/tables/orders.json`, + JSON.stringify( + { + catalog: null, + db: driver === 'sqlite' ? null : 'public', + name: 'orders', + kind: 'table', + comment: 'Customer orders', + estimatedRows: 12, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + { + name: 'status', + nativeType: 'text', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: 'Order status', + }, + ], + foreignKeys: [], + }, + null, + 2, + ), + 'ktx', + 'ktx@example.com', + 'seed orders', + ); + await project.fileStore.writeFile( + `${root}/enrichment/relationship-profile.json`, + JSON.stringify( + { + connectionId: connectionName, + driver, + sqlAvailable: true, + queryCount: 3, + tables: [{ table: { catalog: null, db: driver === 'sqlite' ? null : 'public', name: 'orders' }, rowCount: 12 }], + columns: { + 'orders.status': { + table: { catalog: null, db: driver === 'sqlite' ? null : 'public', name: 'orders' }, + column: 'status', + nativeType: 'text', + normalizedType: 'text', + rowCount: 12, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 0.1667, + nullRate: 0, + sampleValues: ['paid', 'refunded'], + minTextLength: 4, + maxTextLength: 8, + }, + }, + warnings: [], + }, + null, + 2, + ), + 'ktx', + 'ktx@example.com', + 'seed profile', + ); + } + + it('finds the latest sync and merges table schema with relationship profile values', async () => { + await seedLiveDatabaseScan('warehouse', 'sync-1'); + await seedLiveDatabaseScan('warehouse', 'sync-2'); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.getLatestSyncId('warehouse')).resolves.toBe('sync-2'); + const detail = await catalog.getTable({ connectionName: 'warehouse', catalog: null, db: 'public', name: 'orders' }); + + expect(detail).toMatchObject({ + connectionName: 'warehouse', + display: 'public.orders', + rowCount: 12, + columns: [ + { name: 'id', nativeType: 'integer', primaryKey: true }, + { name: 'status', nativeType: 'text', sampleValues: ['paid', 'refunded'], distinctCount: 2 }, + ], + }); + }); + + it('returns scanAvailable=false when no live-database scan exists', async () => { + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + await expect(catalog.getTable({ connectionName: 'missing', catalog: null, db: 'public', name: 'orders' })).resolves.toBeNull(); + await expect(catalog.hasScan('missing')).resolves.toBe(false); + }); + + it('resolves postgres display strings and returns closest candidates for missing tables', async () => { + await seedLiveDatabaseScan(); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.resolveDisplay('warehouse', 'public.orders')).resolves.toMatchObject({ + resolved: { catalog: null, db: 'public', name: 'orders' }, + candidates: [], + dialect: 'postgres', + }); + await expect(catalog.resolveDisplay('warehouse', 'public.orderz')).resolves.toMatchObject({ + resolved: null, + candidates: [{ name: 'orders' }], + }); + }); + + it('treats two-part BigQuery identifiers as ambiguous instead of guessing', async () => { + await seedLiveDatabaseScan('warehouse', 'sync-bigquery', 'bigquery'); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.resolveDisplay('warehouse', 'public.orders')).resolves.toMatchObject({ + resolved: null, + dialect: 'bigquery', + }); + }); + + it('searches table names, column names, comments, and descriptions', async () => { + await seedLiveDatabaseScan(); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.searchByName('warehouse', 'status', 10)).resolves.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + kind: 'column', + ref: expect.objectContaining({ db: 'public', name: 'orders', column: 'status' }), + matchedOn: 'name', + }), + ]), + ); + }); +}); +``` + +- [ ] **Step 2: Run the failing catalog tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts +``` + +Expected: FAIL because the service file does not exist. + +- [ ] **Step 3: Add the catalog service** + +Create `packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts` with these exported shapes and behavior: + +```ts +import type { KtxFileStorePort } from '../../../core/index.js'; +import { getDialectForDriver } from '../../../connections/index.js'; +import type { KtxConnectionDriver, KtxSchemaColumn, KtxSchemaForeignKey, KtxSchemaTable, KtxTableRef } from '../../../scan/types.js'; + +export interface WarehouseCatalogServiceDeps { + fileStore: KtxFileStorePort; +} + +export interface WarehouseColumnDetail extends KtxSchemaColumn { + descriptions: Record; + rowCount: number | null; + nullCount: number | null; + distinctCount: number | null; + nullRate: number | null; + sampleValues: string[]; +} + +export interface TableDetail { + connectionName: string; + catalog: string | null; + db: string | null; + name: string; + display: string; + kind: string; + comment: string | null; + description: string | null; + rowCount: number | null; + columns: WarehouseColumnDetail[]; + foreignKeys: KtxSchemaForeignKey[]; +} + +export type RawSchemaHit = + | { kind: 'table'; ref: KtxTableRef; display: string; matchedOn: 'name' | 'db' | 'comment' | 'description' } + | { kind: 'column'; ref: KtxTableRef & { column: string }; display: string; matchedOn: 'name' | 'comment' | 'description' }; + +interface ConnectionArtifact { + driver?: KtxConnectionDriver; +} + +interface RelationshipProfileColumn { + table?: KtxTableRef; + column?: string; + rowCount?: number; + nullCount?: number; + distinctCount?: number; + nullRate?: number; + sampleValues?: unknown[]; +} + +interface RelationshipProfileArtifact { + driver?: KtxConnectionDriver; + tables?: Array<{ table?: KtxTableRef; rowCount?: number }>; + columns?: Record; +} + +interface ConnectionCatalog { + connectionName: string; + syncId: string; + driver: KtxConnectionDriver; + tables: KtxSchemaTable[]; + profile: RelationshipProfileArtifact | null; +} +``` + +The implementation must: + +- Use `fileStore.listFiles("raw-sources//live-database")` and choose the lexicographically latest path ending in `/connection.json`. +- Read every JSON file under `/tables/` rather than reconstructing a path from the table ref. This supports encoded and simple table filenames already present in tests. +- Parse display strings by driver: + - Postgres, MySQL, and ClickHouse: `schema.table`. + - SQL Server, Snowflake, and BigQuery: `catalog.schema.table`. + - SQLite: `table`. + - For BigQuery, a two-part display must return `resolved: null` and candidate matches. +- Match table refs case-insensitively, while preserving stored casing in outputs. +- Merge relationship-profile fields by `(catalog, db, name, column)`, with fallback matching on `table.name + "." + column`. +- Cache a loaded connection catalog per `connectionName` within the service instance. +- Return `null` from `getTable()` when the scan is absent or the table ref is not found. + +Use these method signatures: + +```ts +export class WarehouseCatalogService { + constructor(private readonly deps: WarehouseCatalogServiceDeps) {} + + async hasScan(connectionName: string): Promise; + async getLatestSyncId(connectionName: string): Promise; + async listTables(connectionName: string): Promise; + async getTable(ref: { connectionName: string } & KtxTableRef): Promise; + async resolveDisplay(connectionName: string, display: string): Promise<{ + resolved: KtxTableRef | null; + candidates: KtxTableRef[]; + dialect: string; + }>; + async searchByName(connectionName: string, query: string, limit: number): Promise; +} +``` + +- [ ] **Step 4: Run the catalog tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +Run: + +```bash +git add packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts +git commit -m "feat(context): read warehouse scan catalog" +``` + +### Task 3: Add `entity_details` + +**Files:** +- Create: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts` +- Create: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts` + +- [ ] **Step 1: Write failing `entity_details` tests** + +Create tests that instantiate the tool with a seeded `WarehouseCatalogService` and a `ToolContext` whose session has `allowedConnectionNames: new Set(['warehouse'])`. Test these cases: + +```ts +it('returns scoped table detail for a display target', async () => { + const result = await tool.call( + { connectionName: 'warehouse', targets: [{ display: 'public.orders' }] }, + context, + ); + expect(result.markdown).toContain('### public.orders'); + expect(result.markdown).toContain('- status (text, nullable=false)'); + expect(result.markdown).toContain('sample: ["paid","refunded"]'); + expect(result.structured.scanAvailable).toBe(true); + expect(result.structured.resolved).toHaveLength(1); +}); + +it('returns a no-scan state distinct from not found', async () => { + const result = await tool.call( + { connectionName: 'empty', targets: [{ display: 'public.orders' }] }, + { ...context, session: { ...context.session!, allowedConnectionNames: new Set(['empty']) } }, + ); + expect(result.markdown).toContain('No live-database scan available for connection "empty"; run `ktx scan` first.'); + expect(result.structured.scanAvailable).toBe(false); +}); + +it('refuses out-of-scope connections', async () => { + const result = await tool.call( + { connectionName: 'billing', targets: [{ display: 'public.orders' }] }, + context, + ); + expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.'); + expect(result.structured.scanAvailable).toBe(false); +}); +``` + +- [ ] **Step 2: Run the failing tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/entity-details.tool.test.ts +``` + +Expected: FAIL because the tool file does not exist. + +- [ ] **Step 3: Implement the tool** + +Create `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`: + +```ts +import { z } from 'zod'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js'; +import type { KtxTableRef } from '../../../scan/types.js'; +import { WarehouseCatalogService, type TableDetail } from './warehouse-catalog.service.js'; + +const targetSchema = z.union([ + z.object({ display: z.string().min(1) }), + z.object({ + catalog: z.string().nullable(), + db: z.string().nullable(), + name: z.string().min(1), + column: z.string().optional(), + }), +]); + +const entityDetailsInputSchema = z.object({ + connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/), + targets: z.array(targetSchema).min(1).max(50), +}); + +type EntityDetailsInput = z.infer; + +export interface EntityDetailsStructured { + resolved: TableDetail[]; + missing: Array<{ target: unknown; candidates: KtxTableRef[] }>; + scanAvailable: boolean; +} + +function allowedConnectionNames(context: ToolContext): ReadonlySet | null { + return context.session?.allowedConnectionNames ?? null; +} + +function sampleText(values: string[]): string { + return values.length > 0 ? ` - sample: ${JSON.stringify(values.slice(0, 10))}` : ''; +} + +function appendTableMarkdown(parts: string[], detail: TableDetail, columnName?: string): void { + const columns = columnName ? detail.columns.filter((column) => column.name === columnName) : detail.columns; + parts.push(`### ${detail.display}`); + parts.push(`Type: ${detail.kind} | Native columns: ${detail.columns.length}`); + if (detail.description || detail.comment) { + parts.push(`Description: ${detail.description ?? detail.comment}`); + } + parts.push('', 'Columns:'); + for (const column of columns) { + const pk = column.primaryKey ? ', PK' : ''; + parts.push(`- ${column.name} (${column.nativeType}, nullable=${column.nullable}${pk})${sampleText(column.sampleValues)}`); + } + parts.push(''); +} + +export class EntityDetailsTool extends BaseTool { + readonly name = 'entity_details'; + + constructor(private readonly catalogFactory: (context: ToolContext) => WarehouseCatalogService) { + super(); + } + + get description(): string { + return 'Verify warehouse tables and columns from the latest live-database scan before writing them into wiki or semantic-layer output.'; + } + + get inputSchema() { + return entityDetailsInputSchema; + } + + async call(input: EntityDetailsInput, context: ToolContext): Promise> { + const allowed = allowedConnectionNames(context); + if (allowed && !allowed.has(input.connectionName)) { + return { + markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`, + structured: { resolved: [], missing: [], scanAvailable: false }, + }; + } + + const catalog = this.catalogFactory(context); + const scanAvailable = await catalog.hasScan(input.connectionName); + if (!scanAvailable) { + return { + markdown: `No live-database scan available for connection "${input.connectionName}"; run \`ktx scan\` first.`, + structured: { resolved: [], missing: [], scanAvailable: false }, + }; + } + + const parts: string[] = []; + const resolved: TableDetail[] = []; + const missing: EntityDetailsStructured['missing'] = []; + + for (const target of input.targets) { + const resolution = + 'display' in target + ? await catalog.resolveDisplay(input.connectionName, target.display) + : { resolved: { catalog: target.catalog, db: target.db, name: target.name }, candidates: [], dialect: '' }; + if (!resolution.resolved) { + missing.push({ target, candidates: resolution.candidates }); + parts.push(`Not found in scan: ${'display' in target ? target.display : target.name}`); + if (resolution.candidates.length > 0) { + parts.push(`Closest matches: ${resolution.candidates.map((candidate) => candidate.name).join(', ')}`); + } + continue; + } + const detail = await catalog.getTable({ connectionName: input.connectionName, ...resolution.resolved }); + if (!detail) { + missing.push({ target, candidates: resolution.candidates }); + continue; + } + resolved.push(detail); + appendTableMarkdown(parts, detail, 'column' in target ? target.column : undefined); + } + + return { + markdown: parts.join('\n').trim(), + structured: { resolved, missing, scanAvailable: true }, + }; + } +} +``` + +- [ ] **Step 4: Run the `entity_details` tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/entity-details.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +Run: + +```bash +git add packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts +git commit -m "feat(context): add entity details verification tool" +``` + +### Task 4: Add `sql_execution` + +**Files:** +- Create: `packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts` +- Create: `packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts` + +- [ ] **Step 1: Write failing `sql_execution` tests** + +Create tests for: + +```ts +it('wraps read-only SQL with a capped row limit', async () => { + connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 }); + const result = await tool.call( + { connectionName: 'warehouse', sql: 'select status from public.orders', rowLimit: 5 }, + context, + ); + expect(connections.executeQuery).toHaveBeenCalledWith( + 'warehouse', + 'select * from (select status from public.orders) as ktx_query_result limit 5', + ); + expect(result.markdown).toContain('| status |'); + expect(result.structured.wrappedSql).toContain('limit 5'); +}); + +it.each(['insert into x values (1)', 'drop table x', 'vacuum'])('rejects mutating SQL: %s', async (sql) => { + const result = await tool.call({ connectionName: 'warehouse', sql }, context); + expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.'); + expect(connections.executeQuery).not.toHaveBeenCalled(); +}); + +it('surfaces connector errors verbatim', async () => { + connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist')); + const result = await tool.call( + { connectionName: 'warehouse', sql: 'select 1 from orbit_analytics.customer', rowLimit: 1 }, + context, + ); + expect(result.markdown).toContain('relation "orbit_analytics.customer" does not exist'); + expect(result.structured.error).toContain('relation "orbit_analytics.customer" does not exist'); +}); +``` + +- [ ] **Step 2: Run the failing tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts +``` + +Expected: FAIL because the tool file does not exist. + +- [ ] **Step 3: Implement the tool** + +Create `packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts`: + +```ts +import { z } from 'zod'; +import { assertReadOnlySql, limitSqlForExecution } from '../../../connections/index.js'; +import type { SlConnectionCatalogPort } from '../../../sl/index.js'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js'; + +const sqlExecutionInputSchema = z.object({ + connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/), + sql: z.string().min(1), + rowLimit: z.number().int().positive().max(1000).optional().default(100), +}); + +type SqlExecutionInput = z.infer; + +export interface SqlExecutionStructured { + headers: string[]; + rows: unknown[][]; + rowCount: number; + truncated: boolean; + sql: string; + wrappedSql: string; + error?: string; +} + +function markdownTable(headers: string[], rows: unknown[][], totalRows: number): string { + if (headers.length === 0) { + return rows.length === 0 ? 'Query returned no rows.' : JSON.stringify(rows.slice(0, 20)); + } + const visible = rows.slice(0, 20); + const lines = [ + `| ${headers.join(' | ')} |`, + `| ${headers.map(() => '---').join(' | ')} |`, + ...visible.map((row) => `| ${row.map((value) => String(value ?? '')).join(' | ')} |`), + ]; + if (totalRows > visible.length) { + lines.push(`... +${totalRows - visible.length} more rows`); + } + return lines.join('\n'); +} + +export class SqlExecutionTool extends BaseTool { + readonly name = 'sql_execution'; + + constructor(private readonly connections: SlConnectionCatalogPort) { + super(); + } + + get description(): string { + return 'Run a single read-only SELECT or WITH probe against an allowed warehouse connection and return a capped markdown table or the warehouse error.'; + } + + get inputSchema() { + return sqlExecutionInputSchema; + } + + async call(input: SqlExecutionInput, context: ToolContext): Promise> { + const allowed = context.session?.allowedConnectionNames; + if (allowed && !allowed.has(input.connectionName)) { + return { + markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`, + structured: { headers: [], rows: [], rowCount: 0, truncated: false, sql: input.sql, wrappedSql: '', error: 'connection_not_allowed' }, + }; + } + + let sql: string; + let wrappedSql: string; + try { + sql = assertReadOnlySql(input.sql); + wrappedSql = limitSqlForExecution(sql, input.rowLimit); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + markdown: message, + structured: { headers: [], rows: [], rowCount: 0, truncated: false, sql: input.sql, wrappedSql: '', error: message }, + }; + } + + try { + const result = await this.connections.executeQuery(input.connectionName, wrappedSql); + const headers = result.headers ?? []; + const rows = result.rows ?? []; + const rowCount = result.totalRows ?? rows.length; + return { + markdown: markdownTable(headers, rows, rowCount), + structured: { headers, rows, rowCount, truncated: rowCount > rows.length, sql, wrappedSql }, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + markdown: `SQL execution failed: ${message}`, + structured: { headers: [], rows: [], rowCount: 0, truncated: false, sql, wrappedSql, error: message }, + }; + } + } +} +``` + +- [ ] **Step 4: Run the `sql_execution` tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +Run: + +```bash +git add packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts +git commit -m "feat(context): add ingest SQL verification tool" +``` + +### Task 5: Add `discover_data` + +**Files:** +- Create: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts` +- Create: `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts` +- Create: `packages/context/src/ingest/tools/warehouse-verification/index.ts` + +- [ ] **Step 1: Write failing `discover_data` tests** + +Create tests with fake `wikiSearchTool.call`, `slDiscoverTool.call`, and `WarehouseCatalogService.searchByName`. Cover: + +```ts +it('groups wiki, semantic layer, and raw schema hits with routing hints', async () => { + const result = await tool.call({ query: 'orders', connectionName: 'warehouse', limit: 5 }, context); + expect(result.markdown).toContain('## Wiki Pages'); + expect(result.markdown).toContain('use `wiki_read(blockKey)` for full content'); + expect(result.markdown).toContain('## Semantic Layer Sources'); + expect(result.markdown).toContain('use `sl_read_source(sourceName)` for the YAML'); + expect(result.markdown).toContain('## Raw Warehouse Schema'); + expect(result.markdown).toContain('use `entity_details({connectionName, targets: [{display}]})`'); + expect(result.structured.raw?.hits).toHaveLength(1); +}); + +it('delegates sourceName inspect mode to sl_discover only', async () => { + const result = await tool.call({ sourceName: 'orders', connectionName: 'warehouse' }, context); + expect(slDiscoverTool.call).toHaveBeenCalledWith({ sourceName: 'orders', connectionId: 'warehouse' }, context); + expect(wikiSearchTool.call).not.toHaveBeenCalled(); + expect(catalog.searchByName).not.toHaveBeenCalled(); + expect(result.markdown).toContain('source detail'); +}); + +it('returns the empty-state message when all sections are empty', async () => { + const result = await tool.call({ query: 'customer source', connectionName: 'warehouse' }, emptyContext); + expect(result.markdown).toContain('No matches for "customer source" across wiki, semantic layer, or raw warehouse schema.'); +}); +``` + +- [ ] **Step 2: Run the failing tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts +``` + +Expected: FAIL because the tool file does not exist. + +- [ ] **Step 3: Implement the tool and index export** + +Create `packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts`: + +```ts +import { z } from 'zod'; +import type { BaseTool, ToolContext, ToolOutput } from '../../../tools/index.js'; +import { BaseTool as ToolBase } from '../../../tools/index.js'; +import { WarehouseCatalogService, type RawSchemaHit } from './warehouse-catalog.service.js'; + +const discoverDataInputSchema = z.object({ + query: z.string().optional(), + connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(), + limit: z.number().int().positive().max(50).optional().default(10), + sourceName: z.string().optional(), +}); + +type DiscoverDataInput = z.infer; + +export interface DiscoverDataStructured { + wiki: unknown | null; + sl: unknown | null; + raw: { hits: RawSchemaHit[] } | null; +} + +interface DiscoverDataDeps { + wikiSearchTool: BaseTool; + slDiscoverTool: BaseTool; + catalogFactory: (context: ToolContext) => WarehouseCatalogService; +} + +export class DiscoverDataTool extends ToolBase { + readonly name = 'discover_data'; + + constructor(private readonly deps: DiscoverDataDeps) { + super(); + } + + get description(): string { + return 'Discover existing wiki pages, semantic layer sources, and raw warehouse schema hits before writing ingest output.'; + } + + get inputSchema() { + return discoverDataInputSchema; + } + + async call(input: DiscoverDataInput, context: ToolContext): Promise> { + if (input.sourceName) { + const sl = await this.deps.slDiscoverTool.call( + { sourceName: input.sourceName, connectionId: input.connectionName }, + context, + ); + return { markdown: sl.markdown, structured: { wiki: null, sl: sl.structured, raw: null } }; + } + + const query = input.query?.trim() || ''; + const limit = input.limit ?? 10; + const parts: string[] = []; + let wiki: unknown | null = null; + let sl: unknown | null = null; + let raw: DiscoverDataStructured['raw'] = null; + + if (query) { + const wikiResult = await this.deps.wikiSearchTool.call({ query, limit }, context); + if (wikiResult.structured?.totalFound > 0) { + parts.push('## Wiki Pages', '> use `wiki_read(blockKey)` for full content', wikiResult.markdown, ''); + wiki = wikiResult.structured; + } + } + + const slResult = await this.deps.slDiscoverTool.call( + { query: query || undefined, connectionId: input.connectionName }, + context, + ); + if (slResult.structured?.totalSources > 0) { + parts.push('## Semantic Layer Sources', '> use `sl_read_source(sourceName)` for the YAML, or `entity_details` for warehouse-shape details', slResult.markdown, ''); + sl = slResult.structured; + } + + const catalog = this.deps.catalogFactory(context); + const connections = input.connectionName + ? [input.connectionName] + : [...(context.session?.allowedConnectionNames ?? [])].sort(); + const rawHits: RawSchemaHit[] = []; + for (const connectionName of connections) { + rawHits.push(...(await catalog.searchByName(connectionName, query, limit))); + } + if (rawHits.length > 0) { + parts.push('## Raw Warehouse Schema', '> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values'); + parts.push( + rawHits + .slice(0, limit) + .map((hit) => `- ${hit.kind}: ${hit.display} (matched on ${hit.matchedOn})`) + .join('\n'), + ); + raw = { hits: rawHits.slice(0, limit) }; + } + + if (parts.length === 0) { + return { + markdown: `No matches for "${query}" across wiki, semantic layer, or raw warehouse schema. Try broader terms; this concept may not exist yet.`, + structured: { wiki, sl, raw }, + }; + } + + return { markdown: parts.join('\n'), structured: { wiki, sl, raw } }; + } +} +``` + +Create `packages/context/src/ingest/tools/warehouse-verification/index.ts`: + +```ts +import type { BaseTool, ToolContext } from '../../../tools/index.js'; +import type { KtxFileStorePort } from '../../../core/index.js'; +import type { SlConnectionCatalogPort } from '../../../sl/index.js'; +import { DiscoverDataTool } from './discover-data.tool.js'; +import { EntityDetailsTool } from './entity-details.tool.js'; +import { SqlExecutionTool } from './sql-execution.tool.js'; +import { WarehouseCatalogService } from './warehouse-catalog.service.js'; + +export { DiscoverDataTool } from './discover-data.tool.js'; +export { EntityDetailsTool } from './entity-details.tool.js'; +export { SqlExecutionTool } from './sql-execution.tool.js'; +export { WarehouseCatalogService } from './warehouse-catalog.service.js'; +export type { TableDetail, WarehouseColumnDetail, RawSchemaHit } from './warehouse-catalog.service.js'; + +export function createWarehouseVerificationTools(deps: { + connections: SlConnectionCatalogPort; + fallbackFileStore: KtxFileStorePort; + wikiSearchTool: BaseTool; + slDiscoverTool: BaseTool; +}): BaseTool[] { + const catalogFactory = (context: ToolContext) => + new WarehouseCatalogService({ + fileStore: context.session?.configService ?? deps.fallbackFileStore, + }); + return [ + new EntityDetailsTool(catalogFactory), + new SqlExecutionTool(deps.connections), + new DiscoverDataTool({ + wikiSearchTool: deps.wikiSearchTool, + slDiscoverTool: deps.slDiscoverTool, + catalogFactory, + }), + ]; +} +``` + +- [ ] **Step 4: Run the `discover_data` tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/discover-data.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +Run: + +```bash +git add packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts packages/context/src/ingest/tools/warehouse-verification/index.ts +git commit -m "feat(context): add raw warehouse discovery tool" +``` + +### Task 6: Wire tools into ingest sessions + +**Files:** +- Modify: `packages/context/src/tools/tool-session.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.ts` +- Modify: `packages/context/src/ingest/local-bundle-runtime.ts` +- Modify: `packages/context/src/ingest/ingest-bundle.runner.test.ts` + +- [ ] **Step 1: Write failing scoping test** + +Add to `packages/context/src/ingest/ingest-bundle.runner.test.ts`: + +```ts +it('threads target warehouse connection names into WorkUnit and reconcile tool sessions', async () => { + const deps = makeDeps(); + const sessions: any[] = []; + deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse']); + deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => { + sessions.push(toolSession); + return { + toAiSdkTools: vi.fn().mockReturnValue({}), + getAllTools: vi.fn().mockReturnValue([]), + getToolNames: vi.fn().mockReturnValue([]), + }; + }); + deps.agentRunner.runLoop.mockResolvedValue({ stopReason: 'natural' }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/notion/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'notion', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect([...sessions[0].allowedConnectionNames].sort()).toEqual(['notion', 'warehouse']); +}); +``` + +- [ ] **Step 2: Run the failing runner test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.test.ts -t "threads target warehouse connection names" +``` + +Expected: FAIL because `allowedConnectionNames` is absent. + +- [ ] **Step 3: Thread allowed connection names** + +Modify `packages/context/src/tools/tool-session.ts`: + +```ts + allowedRawPaths?: ReadonlySet; + allowedConnectionNames?: ReadonlySet; + semanticLayerService: SemanticLayerService; +``` + +Modify WU session creation in `packages/context/src/ingest/ingest-bundle.runner.ts`: + +```ts + allowedRawPaths: new Set(wu.rawFiles), + allowedConnectionNames: new Set(slConnectionIds), + semanticLayerService: scopedSemanticLayerService, +``` + +Modify reconcile session creation in the same file: + +```ts + allowedRawPaths: reconciliationAllowedRawPaths, + allowedConnectionNames: new Set(slConnectionIds), + semanticLayerService: rcScopedSl, +``` + +- [ ] **Step 4: Register the tools in the local ingest toolset** + +Modify `packages/context/src/ingest/local-bundle-runtime.ts`: + +```ts +import { + createWarehouseVerificationTools, +} from './tools/warehouse-verification/index.js'; +``` + +Refactor the existing inline wiki and SL tool instances in `LocalIngestToolsetFactory` so `wikiSearchTool` and `slDiscoverTool` are named constants, then add the warehouse tools: + +```ts + const wikiSearchTool = new WikiSearchTool({ + search: async (input) => { + const results = await searchLocalKnowledgePages(deps.project, { + userId: input.userId, + query: input.query, + limit: input.limit, + embeddingService: deps.embedding, + }); + return { + results: results.slice(0, input.limit).map((result) => ({ + key: result.key, + path: result.path, + summary: result.summary, + score: result.score, + matchReasons: result.matchReasons, + lanes: result.lanes, + })), + totalFound: results.length, + }; + }, + }); + const slDiscoverTool = new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }); + const warehouseVerificationTools = createWarehouseVerificationTools({ + connections: deps.connections, + fallbackFileStore: deps.project.fileStore, + wikiSearchTool, + slDiscoverTool, + }); + + this.baseTools = [ + new WikiReadTool(deps.wikiService, deps.knowledgeIndex), + wikiSearchTool, + new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex), + new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents), + new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents), + slDiscoverTool, + new SlEditSourceTool(slDeps), + new SlReadSourceTool(slDeps), + new SlWriteSourceTool(slDeps), + new SlValidateTool(slDeps), + new SlRollbackTool(deps.slSourcesRepository, deps.connections, 0), + ...warehouseVerificationTools, + ]; +``` + +- [ ] **Step 5: Run integration and toolset tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/ingest-bundle.runner.test.ts -t "threads target warehouse connection names" +pnpm --filter @ktx/context exec vitest run src/ingest/local-bundle-runtime.test.ts +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +Run: + +```bash +git add packages/context/src/tools/tool-session.ts packages/context/src/ingest/ingest-bundle.runner.ts packages/context/src/ingest/local-bundle-runtime.ts packages/context/src/ingest/ingest-bundle.runner.test.ts +git commit -m "feat(context): expose warehouse verification tools to ingest" +``` + +### Task 7: Update writer prompts and cleanup stale references + +**Files:** +- Create: `packages/context/skills/_shared/identifier-verification.md` +- Modify: `packages/context/skills/notion_synthesize/SKILL.md` +- Modify: `packages/context/skills/dbt_ingest/SKILL.md` +- Modify: `packages/context/skills/lookml_ingest/SKILL.md` +- Modify: `packages/context/skills/looker_ingest/SKILL.md` +- Modify: `packages/context/skills/metabase_ingest/SKILL.md` +- Modify: `packages/context/skills/metricflow_ingest/SKILL.md` +- Modify: `packages/context/skills/live_database_ingest/SKILL.md` +- Modify: `packages/context/skills/historic_sql_table_digest/SKILL.md` +- Modify: `packages/context/skills/historic_sql_patterns/SKILL.md` +- Modify: `packages/context/skills/knowledge_capture/SKILL.md` +- Modify: `packages/context/skills/sl_capture/SKILL.md` +- Modify: `packages/context/skills/sl/SKILL.md` +- Modify: `packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts` +- Modify: `packages/context/src/sl/tools/sl-warehouse-validation.ts` + +- [ ] **Step 1: Add the shared protocol file** + +Create `packages/context/skills/_shared/identifier-verification.md`: + +```md +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe: + `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, the + identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.` placeholder strings from these instructions + into output. +``` + +- [ ] **Step 2: Inline the protocol into writer skills** + +Add the same protocol block to these skills: + +```text +packages/context/skills/notion_synthesize/SKILL.md +packages/context/skills/dbt_ingest/SKILL.md +packages/context/skills/lookml_ingest/SKILL.md +packages/context/skills/looker_ingest/SKILL.md +packages/context/skills/metabase_ingest/SKILL.md +packages/context/skills/metricflow_ingest/SKILL.md +packages/context/skills/live_database_ingest/SKILL.md +packages/context/skills/historic_sql_patterns/SKILL.md +packages/context/skills/knowledge_capture/SKILL.md +packages/context/skills/sl_capture/SKILL.md +``` + +For `packages/context/skills/historic_sql_table_digest/SKILL.md`, add this shorter block: + +```md +## Identifier Verification Protocol + +Only mention columns visible in the table's scan record. Use +`entity_details({connectionName, targets: [{display: ""}]})` if +the table or column attribution is uncertain. Do not infer join columns or +filters from neighboring SQL unless the scan record confirms the column exists +on the named table. +``` + +For `packages/context/skills/sl/SKILL.md`, add this cross-reference: + +```md +For capture-time identifier verification, load `sl_capture`. Synthesis writer +skills must verify warehouse identifiers with `discover_data`, +`entity_details`, and `sql_execution` before emitting table or column names. +``` + +- [ ] **Step 3: Apply per-skill edits** + +Make these exact content changes: + +- In `notion_synthesize`, add `discover_data`, `entity_details`, and `sql_execution` to the `Allowed:` line. Replace `tableRef: "orbit_analytics.customer"` with `tableRef: ".
"`. +- In `dbt_ingest`, replace `wiki_sl_search` with `discover_data` and `sl_describe_table` with `entity_details`. +- In `lookml_ingest`, add: `Verify each sql_table_name from the LookML view with entity_details before mapping to an SL source.` +- In `looker_ingest`, add: `For every Looker field reference, call entity_details on the underlying schema.table.column before promoting it to sl_refs or quoting it in wiki body.` +- In `metabase_ingest`, add: `Before writing a wiki page derived from a Metabase question SQL, verify each schema.table.column mentioned with entity_details.` +- In `metricflow_ingest`, add: `Verify each MetricFlow model source table with entity_details before producing the corresponding sl_write_source.` +- In `live_database_ingest`, add: `Sample values come from the scan record; do not invent values not present in relationship-profile.json.` +- In `historic_sql_patterns`, add: `Every join column mentioned in pattern descriptions must be verified via entity_details for both sides of the join.` +- In `knowledge_capture`, update the workflow to call `discover_data` first when a page relates to data or SL concepts. +- In `sl_capture`, add: `Before sl_write_source, call entity_details on the target table to confirm column names and types match the YAML being written.` + +- [ ] **Step 4: Remove stale code and prompt strings** + +Modify `packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts`: + +```ts +.describe('The fully-qualified table or source reference that triggered the fallback (e.g. ".
"). Used to generate canonical detail text.'), +``` + +Modify `packages/context/src/sl/tools/sl-warehouse-validation.ts`: + +```ts + `that inherits the manifest schema. Call sl_read_source to inspect the existing source first.`, +``` + +- [ ] **Step 5: Commit** + +Run: + +```bash +git add packages/context/skills packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts packages/context/src/sl/tools/sl-warehouse-validation.ts +git commit -m "docs(context): add ingest identifier verification protocol" +``` + +### Task 8: Add prompt-bundling and banned-string tests + +**Files:** +- Modify: `packages/context/src/memory/memory-runtime-assets.test.ts` +- Modify: `packages/context/src/ingest/ingest-runtime-assets.test.ts` + +- [ ] **Step 1: Add failing asset tests** + +Add to `packages/context/src/memory/memory-runtime-assets.test.ts`: + +```ts +const verificationWriterSkills = [ + 'notion_synthesize', + 'dbt_ingest', + 'lookml_ingest', + 'looker_ingest', + 'metabase_ingest', + 'metricflow_ingest', + 'live_database_ingest', + 'historic_sql_table_digest', + 'historic_sql_patterns', + 'knowledge_capture', + 'sl_capture', +] as const; + +it('ships identifier verification protocol in every synthesis writer skill', async () => { + for (const skillName of verificationWriterSkills) { + const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8'); + expect(body).toContain('## Identifier Verification Protocol'); + expect(body).toMatch(/discover_data|entity_details/); + } +}); + +it('does not ship stale warehouse verification tool names or fictional identifiers', async () => { + for (const skillName of verificationWriterSkills) { + const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8'); + expect(body).not.toContain('orbit_analytics.customer'); + expect(body).not.toContain('wiki_sl_search'); + expect(body).not.toContain('sl_describe_table'); + } +}); +``` + +Add to `packages/context/src/ingest/ingest-runtime-assets.test.ts`: + +```ts +it('packages identifier verification prompt assets', async () => { + const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8'); + expect(shared).toContain('## Identifier Verification Protocol'); + expect(shared).toContain('discover_data'); + expect(shared).toContain('entity_details'); + expect(shared).toContain('sql_execution'); +}); +``` + +- [ ] **Step 2: Run the asset tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts +``` + +Expected: PASS after Task 7. + +- [ ] **Step 3: Commit** + +Run: + +```bash +git add packages/context/src/memory/memory-runtime-assets.test.ts packages/context/src/ingest/ingest-runtime-assets.test.ts +git commit -m "test(context): guard ingest identifier verification prompts" +``` + +### Task 9: Run the full v1 verification set + +**Files:** +- Verify all files changed by Tasks 1-8. + +- [ ] **Step 1: Run focused tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run \ + src/connections/dialects.test.ts \ + src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \ + src/ingest/tools/warehouse-verification/entity-details.tool.test.ts \ + src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts \ + src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \ + src/ingest/ingest-bundle.runner.test.ts \ + src/memory/memory-runtime-assets.test.ts \ + src/ingest/ingest-runtime-assets.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run package type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run package tests** + +Run: + +```bash +pnpm --filter @ktx/context run test +``` + +Expected: PASS. + +- [ ] **Step 4: Run pre-commit on changed files when configured** + +Run: + +```bash +uv run pre-commit run --files \ + packages/context/src/connections/dialects.ts \ + packages/context/src/connections/dialects.test.ts \ + packages/context/src/connections/index.ts \ + packages/context/src/tools/tool-session.ts \ + packages/context/src/ingest/ingest-bundle.runner.ts \ + packages/context/src/ingest/local-bundle-runtime.ts \ + packages/context/src/ingest/ingest-bundle.runner.test.ts \ + packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts \ + packages/context/src/sl/tools/sl-warehouse-validation.ts \ + packages/context/src/memory/memory-runtime-assets.test.ts \ + packages/context/src/ingest/ingest-runtime-assets.test.ts \ + packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts \ + packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts \ + packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts \ + packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts \ + packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts \ + packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts \ + packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts \ + packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts \ + packages/context/src/ingest/tools/warehouse-verification/index.ts \ + packages/context/skills/_shared/identifier-verification.md \ + packages/context/skills/notion_synthesize/SKILL.md \ + packages/context/skills/dbt_ingest/SKILL.md \ + packages/context/skills/lookml_ingest/SKILL.md \ + packages/context/skills/looker_ingest/SKILL.md \ + packages/context/skills/metabase_ingest/SKILL.md \ + packages/context/skills/metricflow_ingest/SKILL.md \ + packages/context/skills/live_database_ingest/SKILL.md \ + packages/context/skills/historic_sql_table_digest/SKILL.md \ + packages/context/skills/historic_sql_patterns/SKILL.md \ + packages/context/skills/knowledge_capture/SKILL.md \ + packages/context/skills/sl_capture/SKILL.md \ + packages/context/skills/sl/SKILL.md +``` + +Expected: PASS. If the repo has no pre-commit config or the local `uv` version cannot satisfy the project pin, record the exact error and rely on the focused tests plus type-check. + +- [ ] **Step 5: Commit final verification notes if any files changed during checks** + +Run: + +```bash +git status --short +``` + +Expected: only intentional files are modified. Commit any formatter-driven edits with: + +```bash +git add packages/context +git commit -m "chore(context): verify warehouse verification tools" +``` + +## Self-review checklist + +- Spec coverage: the plan covers dialect dispatch, raw scan catalog reads, `entity_details`, `sql_execution`, `discover_data`, WU and reconcile availability, prompt updates, cleanups, and tests. +- Placeholder scan: no task relies on unnamed future work. +- Type consistency: tool inputs use `connectionName`; existing `sl_discover` calls receive `connectionId` internally; raw SQL execution uses `SlConnectionCatalogPort.executeQuery()` because `SemanticLayerService.executeQuery()` currently accepts semantic-layer query input, not raw SQL. diff --git a/docs/superpowers/plans/2026-05-13-warehouse-verification-prompt-shape-closure.md b/docs/superpowers/plans/2026-05-13-warehouse-verification-prompt-shape-closure.md new file mode 100644 index 00000000..05223b93 --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-warehouse-verification-prompt-shape-closure.md @@ -0,0 +1,345 @@ +# Warehouse Verification Prompt Shape Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make every warehouse-verification prompt use KTX's shipped +`sql_execution` input shape so ingest agents include `connectionName` when they +probe warehouse identifiers. + +**Architecture:** Keep the warehouse verification tool code unchanged. Add +prompt-asset tests that reject Kaelio's old session-only SQL examples, then +update the shared identifier protocol and the three remaining per-skill SQL +probe examples that still show the legacy shape. + +**Tech Stack:** Markdown skill prompts, TypeScript, Vitest, pnpm workspace +commands. + +--- + +## Audit Summary + +The warehouse verification tools, runner wiring, adapter target fan-out, and +focused tests are present. Focused verification passed: + +```bash +pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts src/connections/read-only-sql.test.ts src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts src/ingest/ingest-prompts.test.ts src/ingest/ingest-runtime-assets.test.ts src/memory/memory-runtime-assets.test.ts src/ingest/local-adapters.test.ts src/ingest/adapters/notion/notion.adapter.test.ts src/ingest/adapters/lookml/lookml.adapter.test.ts src/ingest/adapters/metricflow/metricflow.adapter.test.ts +pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts -t "supplies a scan-connector query executor" +``` + +Remaining v1-blocking gap: + +- `packages/context/skills/lookml_ingest/SKILL.md`, + `packages/context/skills/metricflow_ingest/SKILL.md`, and + `packages/context/skills/sl_capture/SKILL.md` still contain + `sql_execution({ sql ... })` / "session shape" guidance inherited from + Kaelio. KTX's tool contract is + `sql_execution({connectionName, sql, rowLimit?})`, so these examples can make + agents call the shipped tool with invalid input. + +Non-blocking gaps remain out of scope for this v1 plan: + +- Full DDL-style `entity_details` formatting with FK profile summaries. +- AST-backed SQL validation for data-modifying CTE bodies. +- Search over generated `enrichment/descriptions.json`. +- Per-WorkUnit reuse of a single `WarehouseCatalogService` instance for cache + hits across separate tool calls. +- A deterministic fake-LLM end-to-end Notion hallucination regression. Prompt + guards and tool contract tests cover the v1 contract; a broader behavior + regression can land as follow-up. + +## File Structure + +Modify these files: + +- `packages/context/src/memory/memory-runtime-assets.test.ts`: add a prompt + guard that rejects the legacy session-only `sql_execution` shape. +- `packages/context/src/ingest/ingest-runtime-assets.test.ts`: strengthen the + shared prompt asset assertion for the KTX `connectionName` SQL shape. +- `packages/context/skills/_shared/identifier-verification.md`: make both SQL + probe instructions show the KTX `connectionName` argument. +- `packages/context/skills/notion_synthesize/SKILL.md`: inline the updated + protocol block. +- `packages/context/skills/dbt_ingest/SKILL.md`: inline the updated protocol + block. +- `packages/context/skills/lookml_ingest/SKILL.md`: inline the updated protocol + block and fix the legacy SQL fallback example. +- `packages/context/skills/looker_ingest/SKILL.md`: inline the updated + protocol block. +- `packages/context/skills/metabase_ingest/SKILL.md`: inline the updated + protocol block. +- `packages/context/skills/metricflow_ingest/SKILL.md`: inline the updated + protocol block and fix the legacy SQL fallback example. +- `packages/context/skills/live_database_ingest/SKILL.md`: inline the updated + protocol block. +- `packages/context/skills/historic_sql_table_digest/SKILL.md`: inline the + updated protocol block. +- `packages/context/skills/historic_sql_patterns/SKILL.md`: inline the updated + protocol block. +- `packages/context/skills/knowledge_capture/SKILL.md`: inline the updated + protocol block. +- `packages/context/skills/sl_capture/SKILL.md`: inline the updated protocol + block and fix the join-discovery SQL example. + +### Task 1: Add Prompt Guards For The KTX SQL Tool Shape + +**Files:** +- Modify: `packages/context/src/memory/memory-runtime-assets.test.ts` +- Modify: `packages/context/src/ingest/ingest-runtime-assets.test.ts` + +- [ ] **Step 1: Add the failing memory asset guard** + +In `packages/context/src/memory/memory-runtime-assets.test.ts`, add this test +after `does not ship stale warehouse verification tool names or fictional +identifiers`: + +```ts + it('ships only the KTX connectionName sql_execution call shape in writer guidance', async () => { + const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8'); + + expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT'); + expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM'); + + for (const skillName of verificationWriterSkills) { + const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8'); + expect(body).toContain('sql_execution({connectionName'); + expect(body).not.toContain('sql_execution({ sql'); + expect(body).not.toContain('session shape'); + expect(body).not.toContain('connection is already pinned by the ingest session'); + } + }); +``` + +- [ ] **Step 2: Strengthen the shared ingest asset guard** + +In `packages/context/src/ingest/ingest-runtime-assets.test.ts`, update +`packages identifier verification prompt assets` so the final assertions are: + +```ts + expect(shared).toContain('discover_data'); + expect(shared).toContain('entity_details'); + expect(shared).toContain('sql_execution'); + expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT'); + expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM'); +``` + +- [ ] **Step 3: Run the failing prompt guards** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts +``` + +Expected: FAIL. The failure must mention at least one current legacy string: +`sql_execution({ sql`, `session shape`, or missing +`sql_execution({connectionName`. + +### Task 2: Update The Shared Identifier Verification Protocol + +**Files:** +- Modify: `packages/context/skills/_shared/identifier-verification.md` +- Modify: `packages/context/skills/notion_synthesize/SKILL.md` +- Modify: `packages/context/skills/dbt_ingest/SKILL.md` +- Modify: `packages/context/skills/lookml_ingest/SKILL.md` +- Modify: `packages/context/skills/looker_ingest/SKILL.md` +- Modify: `packages/context/skills/metabase_ingest/SKILL.md` +- Modify: `packages/context/skills/metricflow_ingest/SKILL.md` +- Modify: `packages/context/skills/live_database_ingest/SKILL.md` +- Modify: `packages/context/skills/historic_sql_table_digest/SKILL.md` +- Modify: `packages/context/skills/historic_sql_patterns/SKILL.md` +- Modify: `packages/context/skills/knowledge_capture/SKILL.md` +- Modify: `packages/context/skills/sl_capture/SKILL.md` + +- [ ] **Step 1: Replace the shared protocol text** + +Replace the full `## Identifier Verification Protocol` block in +`packages/context/skills/_shared/identifier-verification.md` with: + +```md +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. +``` + +- [ ] **Step 2: Inline the same protocol in every writer skill** + +Replace the existing `## Identifier Verification Protocol` block in each writer +skill with the exact block from Step 1: + +```bash +packages/context/skills/notion_synthesize/SKILL.md +packages/context/skills/dbt_ingest/SKILL.md +packages/context/skills/lookml_ingest/SKILL.md +packages/context/skills/looker_ingest/SKILL.md +packages/context/skills/metabase_ingest/SKILL.md +packages/context/skills/metricflow_ingest/SKILL.md +packages/context/skills/live_database_ingest/SKILL.md +packages/context/skills/historic_sql_table_digest/SKILL.md +packages/context/skills/historic_sql_patterns/SKILL.md +packages/context/skills/knowledge_capture/SKILL.md +packages/context/skills/sl_capture/SKILL.md +``` + +- [ ] **Step 3: Run the shared prompt asset tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts +``` + +Expected: still FAIL because the per-skill legacy SQL examples in LookML, +MetricFlow, and `sl_capture` have not been fixed yet. + +### Task 3: Fix Legacy Per-Skill SQL Examples + +**Files:** +- Modify: `packages/context/skills/lookml_ingest/SKILL.md` +- Modify: `packages/context/skills/metricflow_ingest/SKILL.md` +- Modify: `packages/context/skills/sl_capture/SKILL.md` + +- [ ] **Step 1: Fix the LookML fallback probe example** + +In `packages/context/skills/lookml_ingest/SKILL.md`, replace the current +Required flow item 2 with: + +```md +2. If the table isn't in the manifest, use the warehouse `connectionName` + returned by `discover_data` or the target connection chosen from + `sl_discover`, then call a dialect-appropriate SQL probe with that + connection name, for example: + `sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`. + Replace `warehouse`, `analytics`, and `orders` with the verified connection, + schema or dataset, and table from the WorkUnit evidence. +``` + +- [ ] **Step 2: Fix the MetricFlow fallback probe example** + +In `packages/context/skills/metricflow_ingest/SKILL.md`, replace the paragraph +that begins `If \`sl_discover\` errors` with: + +```md +If `sl_discover` errors because no such table exists, use `discover_data` and +`entity_details` to find the warehouse target. If a SQL probe is still needed, +call `sql_execution` with the same warehouse connection name, for example: +`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`. +**Never invent column names** - every column in `columns:`, `grain:`, and +`sql:` must be sourced from raw files, `entity_details`, or a successful SQL +probe. +``` + +- [ ] **Step 3: Fix the `sl_capture` join probe example** + +In `packages/context/skills/sl_capture/SKILL.md`, replace Tool sequence item 6 +with: + +```md +6. For join discovery: use `sql_execution({connectionName: "warehouse", sql: "SELECT count(*) FROM public.orders o JOIN public.customers c ON c.id = o.customer_id LIMIT 20"})` with the target warehouse connection name and dialect-correct table names to verify the join key exists in both tables and assess cardinality before declaring the join. +``` + +- [ ] **Step 4: Run the prompt asset tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts +``` + +Expected: PASS. The tests must report 2 files passed. + +### Task 4: Final Verification + +**Files:** +- No new files. + +- [ ] **Step 1: Run focused warehouse prompt and tool tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts src/connections/read-only-sql.test.ts src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts src/ingest/ingest-prompts.test.ts src/ingest/ingest-runtime-assets.test.ts src/memory/memory-runtime-assets.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run package type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Inspect final diff** + +Run: + +```bash +git diff -- packages/context/src/memory/memory-runtime-assets.test.ts packages/context/src/ingest/ingest-runtime-assets.test.ts packages/context/skills/_shared/identifier-verification.md packages/context/skills/notion_synthesize/SKILL.md packages/context/skills/dbt_ingest/SKILL.md packages/context/skills/lookml_ingest/SKILL.md packages/context/skills/looker_ingest/SKILL.md packages/context/skills/metabase_ingest/SKILL.md packages/context/skills/metricflow_ingest/SKILL.md packages/context/skills/live_database_ingest/SKILL.md packages/context/skills/historic_sql_table_digest/SKILL.md packages/context/skills/historic_sql_patterns/SKILL.md packages/context/skills/knowledge_capture/SKILL.md packages/context/skills/sl_capture/SKILL.md +``` + +Expected: only prompt wording and prompt-asset guards changed. No tool +implementation files changed. + +- [ ] **Step 4: Commit** + +Run: + +```bash +git add packages/context/src/memory/memory-runtime-assets.test.ts packages/context/src/ingest/ingest-runtime-assets.test.ts packages/context/skills/_shared/identifier-verification.md packages/context/skills/notion_synthesize/SKILL.md packages/context/skills/dbt_ingest/SKILL.md packages/context/skills/lookml_ingest/SKILL.md packages/context/skills/looker_ingest/SKILL.md packages/context/skills/metabase_ingest/SKILL.md packages/context/skills/metricflow_ingest/SKILL.md packages/context/skills/live_database_ingest/SKILL.md packages/context/skills/historic_sql_table_digest/SKILL.md packages/context/skills/historic_sql_patterns/SKILL.md packages/context/skills/knowledge_capture/SKILL.md packages/context/skills/sl_capture/SKILL.md +git commit -m "fix(context): align warehouse sql probe prompt shape" +``` + +Expected: one focused commit. + +## Self-Review + +Spec coverage: + +- The original spec requires `sql_execution` inputs to include + `connectionName`; this plan removes contradictory session-only examples from + all active writer guidance. +- The shared protocol remains in `_shared` and inlined in every synthesis + writer skill named by the original spec. +- The tool implementation remains unchanged because the shipped schema already + enforces the v1 contract. + +Placeholder scan: + +- The plan has no deferred implementation markers. +- Prompt examples use concrete `warehouse`, `analytics`, and `orders` example + names only to demonstrate JSON shape, and each example tells the worker to + replace them with discovered evidence. + +Type consistency: + +- Tests assert the exact KTX tool call shape: + `sql_execution({connectionName, sql: ...})`. +- Prompt wording consistently uses `connectionName`, matching + `packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts`. diff --git a/docs/superpowers/plans/2026-05-13-warehouse-verification-sql-example-closure.md b/docs/superpowers/plans/2026-05-13-warehouse-verification-sql-example-closure.md new file mode 100644 index 00000000..2d1b1779 --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-warehouse-verification-sql-example-closure.md @@ -0,0 +1,215 @@ +# Warehouse Verification SQL Example Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Remove the last connectionless `sql_execution` prompt example so +warehouse-verification writer guidance always matches KTX's shipped tool +contract. + +**Architecture:** Keep the warehouse verification tool code unchanged. Tighten +the prompt asset guard so multiline `sql_execution({ sql: ... })` examples +fail tests, then update the stale `sl_capture` worked example to pass +`connectionName` explicitly. + +**Tech Stack:** Markdown skill prompts, TypeScript, Vitest, pnpm workspace +commands. + +--- + +## Audit summary + +The warehouse verification tools, runner wiring, source-adapter target fan-out, +CLI query executor, and focused tests are present. Focused verification passed: + +```bash +pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts src/connections/read-only-sql.test.ts src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts src/ingest/ingest-prompts.test.ts src/ingest/ingest-runtime-assets.test.ts src/memory/memory-runtime-assets.test.ts src/ingest/local-adapters.test.ts src/ingest/adapters/notion/notion.adapter.test.ts src/ingest/adapters/lookml/lookml.adapter.test.ts src/ingest/adapters/metricflow/metricflow.adapter.test.ts +pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts -t "supplies a scan-connector query executor" +``` + +Remaining v1-blocking gap: + +- `packages/context/skills/sl_capture/SKILL.md` still contains a worked example + with a multiline `sql_execution({ sql: ... })` call. KTX's tool contract is + `sql_execution({connectionName, sql, rowLimit?})`, so this example can teach + agents to call the shipped tool with invalid input. + +Non-blocking gaps remain out of scope for this v1 plan: + +- Full DDL-style `entity_details` formatting with FK profile summaries. +- AST-backed SQL validation for data-modifying CTE bodies. +- Search over generated `enrichment/descriptions.json`. +- Per-WorkUnit reuse of a single `WarehouseCatalogService` instance for cache + hits across separate tool calls. +- A deterministic fake-LLM end-to-end Notion hallucination regression. +- Tokenized or embedding-backed raw schema search ranking in `discover_data`. + +## File structure + +Modify these files: + +- `packages/context/src/memory/memory-runtime-assets.test.ts`: add a prompt + guard that catches multiline `sql_execution` calls without `connectionName`. +- `packages/context/skills/sl_capture/SKILL.md`: update the stale worked + example to include the target warehouse `connectionName`. + +### Task 1: Add a multiline SQL prompt guard + +**Files:** +- Modify: `packages/context/src/memory/memory-runtime-assets.test.ts` + +- [ ] **Step 1: Add a helper that extracts `sql_execution` call examples** + +In `packages/context/src/memory/memory-runtime-assets.test.ts`, add this helper +after `forbiddenProductPattern()`: + +```ts +function sqlExecutionCallBlocks(body: string): string[] { + const blocks: string[] = []; + const marker = 'sql_execution({'; + let offset = 0; + + while (offset < body.length) { + const start = body.indexOf(marker, offset); + if (start === -1) { + break; + } + const end = body.indexOf('})', start + marker.length); + blocks.push(body.slice(start, end === -1 ? start + marker.length : end + 2)); + offset = start + marker.length; + } + + return blocks; +} +``` + +- [ ] **Step 2: Strengthen the existing SQL-shape test** + +Replace the body of +`ships only the KTX connectionName sql_execution call shape in writer guidance` +with: + +```ts + const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8'); + const bodies = [{ name: '_shared/identifier-verification.md', body: shared }]; + + expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT'); + expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM'); + + for (const skillName of verificationWriterSkills) { + const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8'); + bodies.push({ name: `${skillName}/SKILL.md`, body }); + expect(body).toContain('sql_execution({connectionName'); + expect(body).not.toContain('sql_execution({ sql'); + expect(body).not.toContain('session shape'); + expect(body).not.toContain('connection is already pinned by the ingest session'); + } + + for (const { name, body } of bodies) { + const calls = sqlExecutionCallBlocks(body); + expect(calls.length, `${name} should contain sql_execution guidance`).toBeGreaterThan(0); + expect( + calls.filter((call) => !call.includes('connectionName')), + `${name} has sql_execution calls without connectionName`, + ).toEqual([]); + expect(body, `${name} has a connectionless multiline sql_execution call`).not.toMatch( + /sql_execution\(\{\s*sql\s*:/, + ); + } +``` + +- [ ] **Step 3: Run the failing prompt guard** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts -t "connectionName sql_execution" +``` + +Expected: FAIL. The failure must identify +`sl_capture/SKILL.md` as having a `sql_execution` call without +`connectionName` or a connectionless multiline `sql_execution` call. + +- [ ] **Step 4: Commit the failing guard** + +Run: + +```bash +git add packages/context/src/memory/memory-runtime-assets.test.ts +git commit -m "test(context): catch connectionless sql execution prompt examples" +``` + +### Task 2: Fix the stale `sl_capture` SQL example + +**Files:** +- Modify: `packages/context/skills/sl_capture/SKILL.md` +- Test: `packages/context/src/memory/memory-runtime-assets.test.ts` +- Test: `packages/context/src/ingest/ingest-runtime-assets.test.ts` + +- [ ] **Step 1: Update the worked example** + +In `packages/context/skills/sl_capture/SKILL.md`, replace the `sql_execution` +block in "Worked example - new join" with: + +```md +sql_execution({ + connectionName: "warehouse", + sql: "SELECT COUNT(*), COUNT(DISTINCT a.admin_user_id) FROM public.fct_orders a JOIN public.fct_mau_multiprotocol b ON a.admin_user_id = b.admin_user_id LIMIT 1" +}) +``` + +- [ ] **Step 2: Run the prompt guards** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/memory/memory-runtime-assets.test.ts src/ingest/ingest-runtime-assets.test.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Run a direct stale-shape scan** + +Run: + +```bash +rg -n -U "sql_execution\\(\\{\\s*\\n\\s*sql:" packages/context/skills packages/context/prompts +``` + +Expected: no matches and exit code 1. + +- [ ] **Step 4: Run the context type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 5: Commit the prompt fix** + +Run: + +```bash +git add packages/context/skills/sl_capture/SKILL.md +git commit -m "fix(context): include connection name in sl capture sql example" +``` + +## Self-review + +Spec coverage: + +- The only remaining v1-blocking prompt-shape gap has a failing test and a + direct prompt edit. +- Tool implementation, runner wiring, adapter scoping, and CLI execution + remain covered by the focused suites listed in the audit summary. + +Placeholder scan: + +- This plan contains no deferred implementation placeholders. + +Type consistency: + +- The plan uses the shipped KTX tool shape: + `sql_execution({connectionName, sql, rowLimit?})`. diff --git a/docs/superpowers/plans/2026-05-13-warehouse-verification-structured-target-miss-closure.md b/docs/superpowers/plans/2026-05-13-warehouse-verification-structured-target-miss-closure.md new file mode 100644 index 00000000..48983c4a --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-warehouse-verification-structured-target-miss-closure.md @@ -0,0 +1,236 @@ +# Warehouse Verification Structured Target Miss Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `entity_details` return model-visible not-found evidence for every documented target shape, including structured `{catalog, db, name, column?}` targets. + +**Architecture:** Keep the existing warehouse verification module. Add focused tests for missing structured table and column targets, then route structured target labels through the same candidate lookup used by display targets while preserving exact structured resolution. + +**Tech Stack:** TypeScript, Node 22, Vitest, AI SDK v6 tools, Zod, KTX ingest tools. + +--- + +## Audit Summary + +The implemented plans have landed the warehouse verification tools, ingest +runner wiring, adapter warehouse target fan-out, CLI read-only query executor, +and prompt-shape closures. Focused verification passed on May 13, 2026: + +```bash +pnpm --filter @ktx/context exec vitest run src/connections/dialects.test.ts src/connections/read-only-sql.test.ts src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts src/ingest/ingest-prompts.test.ts src/ingest/ingest-runtime-assets.test.ts src/memory/memory-runtime-assets.test.ts src/ingest/local-adapters.test.ts src/ingest/adapters/notion/notion.adapter.test.ts src/ingest/adapters/lookml/lookml.adapter.test.ts src/ingest/adapters/metricflow/metricflow.adapter.test.ts +pnpm --filter @ktx/cli exec vitest run src/ingest-query-executor.test.ts src/ingest.test.ts -t "supplies a scan-connector query executor" +rg -n -U "sql_execution\\(\\{\\s*\\n\\s*sql:" packages/context/skills packages/context/prompts +rg -n "wiki_sl_search|sl_describe_table|orbit_analytics\\.customer" packages/context/skills packages/context/prompts packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts packages/context/src/sl/tools/sl-warehouse-validation.ts +``` + +Remaining v1-blocking gap: + +- `entity_details` accepts structured targets, but if a structured table target + does not exist, it records `structured.missing` and emits no markdown. Tool + outputs are sent to the model as markdown only, so the synthesis agent gets + an empty response instead of the required "Not found in scan" verification + signal. + +Non-blocking gaps remain out of scope for this v1 plan: + +- Full DDL-style `entity_details` formatting with FK and profile summaries. +- AST-backed SQL validation for data-modifying CTE bodies. +- Dialect-specific row-limit wrapping for SQL Server probes. +- Search over generated `enrichment/descriptions.json`. +- Per-WorkUnit reuse of a single `WarehouseCatalogService` instance for cache + hits across separate tool calls. +- A deterministic fake-LLM end-to-end Notion hallucination regression. +- Cleanup of legacy demo Orbit wiki fixtures that still mention + `orbit_analytics.customer`. + +## File Structure + +Modify these files: + +- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`: add failing coverage for missing structured targets. +- `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`: render missing structured targets into markdown and reuse candidate lookup. + +### Task 1: Report Structured Target Misses In `entity_details` + +**Files:** +- Modify: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts` +- Modify: `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts` + +- [ ] **Step 1: Add failing structured miss tests** + +In `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts`, add these tests after `reports missing explicit columns instead of returning an empty column list`: + +```ts + it('reports missing structured table targets in model-visible markdown', async () => { + const result = await tool.call( + { + connectionName: 'warehouse', + targets: [{ catalog: null, db: 'public', name: 'orderz' }], + }, + context, + ); + + expect(result.markdown).toContain('Not found in scan: public.orderz'); + expect(result.markdown).toContain('Closest matches: orders'); + expect(result.structured.resolved).toHaveLength(0); + expect(result.structured.missing).toHaveLength(1); + }); + + it('reports missing structured column targets in model-visible markdown', async () => { + const result = await tool.call( + { + connectionName: 'warehouse', + targets: [{ catalog: null, db: 'public', name: 'orders', column: 'plan_tier' }], + }, + context, + ); + + expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier'); + expect(result.markdown).toContain('Available columns: id, status'); + expect(result.structured.resolved).toHaveLength(0); + expect(result.structured.missing).toHaveLength(1); + }); +``` + +- [ ] **Step 2: Run the failing focused test** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/entity-details.tool.test.ts -t "structured" +``` + +Expected: FAIL. The first new test must fail because `result.markdown` does not contain `Not found in scan: public.orderz`. + +- [ ] **Step 3: Add structured target labels and candidate lookup** + +In `packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts`, add this type alias after `type EntityDetailsInput = z.infer;`: + +```ts +type EntityDetailsTarget = EntityDetailsInput['targets'][number]; +``` + +Add these helpers after `function allowedConnectionNames(context: ToolContext): ReadonlySet | null { ... }`: + +```ts +function targetLabel(target: EntityDetailsTarget): string { + if ('display' in target) { + return target.display; + } + return [target.catalog, target.db, target.name, target.column].filter((part): part is string => !!part).join('.'); +} + +function appendMissingTargetMarkdown(parts: string[], target: EntityDetailsTarget, candidates: KtxTableRef[]): void { + parts.push(`Not found in scan: ${targetLabel(target)}`); + if (candidates.length > 0) { + parts.push(`Closest matches: ${candidates.map((candidate) => candidate.name).join(', ')}`); + } +} + +async function resolveTarget( + catalog: WarehouseCatalogService, + connectionName: string, + target: EntityDetailsTarget, +): Promise<{ resolved: (KtxTableRef & { column?: string }) | null; candidates: KtxTableRef[] }> { + if ('display' in target) { + return catalog.resolveDisplayTarget(connectionName, target.display); + } + + const candidateResolution = await catalog.resolveDisplayTarget(connectionName, targetLabel(target)); + return { + resolved: { + catalog: target.catalog, + db: target.db, + name: target.name, + column: target.column, + }, + candidates: candidateResolution.candidates, + }; +} +``` + +Then replace the `const resolution = ...` block inside the `for (const target of input.targets)` loop with: + +```ts + const resolution = await resolveTarget(catalog, input.connectionName, target); +``` + +Replace the missing-resolution block with: + +```ts + if (!resolution.resolved) { + missing.push({ target, candidates: resolution.candidates }); + appendMissingTargetMarkdown(parts, target, resolution.candidates); + continue; + } +``` + +Replace the missing-detail block with: + +```ts + if (!detail) { + missing.push({ target, candidates: resolution.candidates }); + appendMissingTargetMarkdown(parts, target, resolution.candidates); + continue; + } +``` + +- [ ] **Step 4: Run the focused entity-details tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/entity-details.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Run warehouse verification regression tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts src/ingest/tools/warehouse-verification/entity-details.tool.test.ts src/ingest/tools/warehouse-verification/discover-data.tool.test.ts +``` + +Expected: PASS. + +- [ ] **Step 6: Run context type-check** + +Run: + +```bash +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +Run: + +```bash +git add \ + packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts \ + packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts +git commit -m "fix(context): report structured entity detail misses" +``` + +## Self-review + +Spec coverage: + +- The original `entity_details` contract says structured and display targets + are mixed shapes and unresolved targets must produce `Not found in scan` with + candidates. This plan adds that model-visible behavior for structured table + misses and preserves the existing column-miss behavior. + +Placeholder scan: + +- This plan contains no deferred implementation placeholders. + +Type consistency: + +- The plan uses the existing `WarehouseCatalogService`, `KtxTableRef`, + `EntityDetailsStructured`, and `ToolOutput` types without adding public API + compatibility wrappers. diff --git a/docs/superpowers/specs/2026-05-12-notion-ingestion-warehouse-verification-design.md b/docs/superpowers/specs/2026-05-12-notion-ingestion-warehouse-verification-design.md new file mode 100644 index 00000000..074f00e5 --- /dev/null +++ b/docs/superpowers/specs/2026-05-12-notion-ingestion-warehouse-verification-design.md @@ -0,0 +1,331 @@ +# Warehouse Verification Tools for Ingestion Synthesis + +**Date:** 2026-05-12 +**Author:** Andrey Avtomonov +**Status:** Design — pending implementation plan + +## Background and motivation + +KTX's ingest pipeline synthesises wiki pages and semantic-layer (SL) sources from third-party content (Notion, LookML, Looker, Metabase, dbt, MetricFlow, historic SQL, live-database scans, and chat). The synthesis stage is an LLM call that runs once per WorkUnit, governed by a skill prompt (e.g. `notion_synthesize`) and a set of allowed tools. + +A real-world inspection (project `/tmp/ktx-proj-1`) surfaced two failure modes the synthesis stage produces: + +1. **Fictional identifiers laundered into wiki output.** A Notion page mentioned `orbit_analytics.customer` as a legacy "customer source" table with a `plan_tier in {free, pro, enterprise}` column. Neither the table, the column, nor those values exist in the configured warehouse. The synthesis LLM faithfully copied them into `knowledge/global/orbit/customers-source.md` as a "Conflict Note", giving the fabricated names full wiki frontmatter, a `Source:` citation, and apparent authority. +2. **Column attribution drift.** The same wiki page documents columns under `orbit_raw.accounts` but states the `paying_account_count` measure filters on `normalized_plan_code` and `contract_status`. Those columns live on `orbit_analytics.mart_account_segments`, not on `accounts`. A reader (or a downstream agent) following the page will write `accounts.normalized_plan_code` and get a `column does not exist` error. + +Root cause analysis (`packages/context/skills/notion_synthesize/SKILL.md`, `packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts`, `packages/context/src/wiki/tools/wiki-write.tool.ts`) showed three contributing factors: + +- The synthesis LLM has no verification primitive that distinguishes a real warehouse identifier from a fabricated one. `sl_discover` only finds objects already promoted into the semantic layer; raw warehouse scans (which already exist on disk under `raw-sources//live-database//`) are not surfaced to the LLM at all. +- `wiki_write` performs no body-text validation — anything the LLM emits is written. +- The skill prompt itself uses `orbit_analytics.customer` as a canonical example string (`SKILL.md:70`), reinforcing the same fictional name the LLM ends up emitting. + +Kaelio's server-side ingest WU agent (`/Users/andrey/conductor/workspaces/kaelio-main2/douala/server/src/tools/toolset-factory.service.ts`) had four verification tools that KTX dropped during the open-source extraction: `discover_data`, `entity_details`, `dictionary_search`, and `sql_execution`. The underlying connector infrastructure (`KtxScanConnector`, dialect classes, `assertReadOnlySql`, `SemanticLayerService.executeQuery`) is present in KTX, so the gap is at the tool layer, not the platform layer. + +## Goal + +Give every ingest adapter's synthesis-time LLM call the tools and skill-prompt instructions needed to verify warehouse identifiers (`schema.table`, `schema.table.column`) and sample values before emitting them into wiki pages, SL sources, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback` records. + +## Non-goals + +- Not changing `wiki_write` itself. A complementary spec covers hard write-time validation; this spec focuses on giving the LLM the tools to self-validate. +- Not modifying any Notion fetch/chunk/cluster behaviour. +- Not changing the `_schema/*.yaml` format. +- Not introducing a UUID layer for tables or columns; KTX keeps `(connection, catalog, db, name)` as the canonical table identity. +- Not adding `semantic_query` to the synthesis toolset. `semantic_query` is a future tool for the research/chat-time agent; synthesis creates SL sources rather than queries them, so the wrong shape. +- Not adding `dictionary_search`. `entity_details` already returns per-column `sampleValues` from the relationship-profile, and `sql_execution` covers the rarer "where does this literal live?" case more accurately than a sampled-JSON full-text scan. + +## What already exists in KTX + +The dialect/driver/connection architecture is fully ported from Kaelio. The new tools sit on top of three already-shipping primitives: + +| Primitive | Location | +|---|---| +| `KtxTableRef = { catalog: string\|null, db: string\|null, name: string }` | `packages/context/src/scan/types.ts:168` | +| `SemanticLayerService.executeQuery(connectionId, sql)` | `packages/context/src/sl/semantic-layer.service.ts:1004`, used today by `sl_validate` | +| `assertReadOnlySql` / `limitSqlForExecution` | `packages/context/src/connections/read-only-sql.ts` | +| 7 connectors with parallel layout (postgres, mysql, sqlserver, snowflake, bigquery, clickhouse, sqlite), each exporting a dialect class | `packages/connector-*` | +| Raw scan artefacts: `tables/...json` and `enrichment/relationship-profile.json` (with `nativeType`, `nullable`, `primaryKey`, `foreignKeys`, `rowCount`, `nullCount`, `distinctCount`, `sampleValues`, descriptions) | `raw-sources//live-database//` | +| `wiki_search`, `sl_discover`, `sl_read_source`, `sl_validate`, `emit_unmapped_fallback` | already wired into synthesis stages | + +The only meaningfully new code is `WarehouseCatalogService`, a small `getDialectForDriver` dispatch, the three tool files, and the wiring in `ingest-bundle.runner.ts`. + +## Architecture + +### Module layout + +``` +packages/context/src/ingest/tools/warehouse-verification/ + discover-data.tool.ts + entity-details.tool.ts + sql-execution.tool.ts + warehouse-catalog.service.ts + index.ts # exports createWarehouseVerificationTools() +packages/context/src/connections/ + dialects.ts # adds getDialectForDriver() +packages/context/skills/_shared/ + identifier-verification.md # the protocol snippet referenced from every synthesis skill +``` + +### Canonical table identity + +Every tool that names a warehouse object uses the tuple `(connectionName, catalog, db, name[, column])`. `connectionName` is the slug from `ktx.yaml` (e.g., `"warehouse"`), validated against `^[a-zA-Z0-9][a-zA-Z0-9_-]*$`. There is no UUID layer. + +`display` strings the LLM picks up from source pages (e.g., `"orbit_raw.accounts"` for Postgres or `"project.dataset.table"` for BigQuery) are parsed by `WarehouseCatalogService.resolveDisplay`, which knows the connection's driver via `getDialectForDriver`. Ambiguous parses (e.g., a 2-part display on BigQuery) return a candidates list instead of guessing. + +Dialect mapping: + +| Driver | catalog | db | name | Display | +|---|---|---|---|---| +| postgres | `null` | schema | table | `schema.table` | +| mysql | `null` | schema | table | `schema.table` | +| sqlserver | catalog | schema | table | `catalog.schema.table` | +| snowflake | database | schema | table | `db.schema.table` | +| bigquery | project | dataset | table | `project.dataset.table` | +| clickhouse | `null` | database | table | `database.table` | +| sqlite | `null` | `null` | table | `table` | + +### `WarehouseCatalogService` + +Stateless except for a per-WorkUnit cache. Reads raw scan files under `raw-sources//live-database//`. + +```ts +class WarehouseCatalogService { + getTable(ref: { connectionName: string } & KtxTableRef): Promise; + listTables(connectionName: string): Promise; + resolveDisplay(connectionName: string, display: string): Promise<{ + resolved: KtxTableRef | null; + candidates: KtxTableRef[]; // ranked by edit distance when resolved is null + dialect: string; + }>; + searchByName(connectionName: string, query: string, limit: number): Promise>; + getLatestSyncId(connectionName: string): Promise; +} +``` + +`getTable` merges the raw schema file (native types, PK, FK, nullable) with the enrichment profile (row counts, null rates, distinct counts, sample values, AI-generated descriptions). When no scan exists for the connection, every read returns `null`; tools surface this as a distinct "no scan available" state rather than as "identifier not found", so the LLM doesn't conclude a real table is fictional just because a scan hasn't run yet. + +### `getDialectForDriver` + +```ts +// packages/context/src/connections/dialects.ts +export type SupportedDriver = 'postgres'|'postgresql'|'mysql'|'sqlserver'|'snowflake'|'bigquery'|'clickhouse'|'sqlite'|'sqlite3'; +export function getDialectForDriver(driver: SupportedDriver): KtxDialect; +``` + +Sync dispatch. The connectors' existing dialect classes already expose the same shape — `formatTableName(KtxTableRef)`, `quoteIdentifier(string)`, `mapToDimensionType(nativeType)`. The implementation plan introduces a minimal `KtxDialect` interface that these classes already satisfy structurally; no connector-internal changes required. Used by tools only for display-string parsing and error-message formatting; tools never construct executable SQL. + +## Tool contracts + +### `entity_details` + +```ts +input = { + connectionName: string, + targets: Array< // 1..50, mixed shapes allowed + | { display: string } // "orbit_raw.accounts" or "orbit_raw.accounts.account_id" + | { catalog: string|null, db: string, name: string, column?: string } + >, +} +``` + +Output (markdown, per target): + +``` +### orbit_raw.accounts +Type: table | Native columns: 11 | PK: account_id | FKs: parent_account_id → orbit_raw.accounts.account_id +Description: One row per customer account… + +Columns: +- account_id (text, nullable=false, PK) — sample: ["acct_001","acct_002",…] +- parent_account_id (text, nullable=true, FK → orbit_raw.accounts.account_id) +- account_name (text, nullable=false) +- … + +Profile: rowCount=4321 distinctCount(account_id)=4321 nullRate(parent_account_id)=0.62 +``` + +When `column` is provided in a target, output is scoped to that one column. When a target doesn't resolve, output is `Not found in scan. Closest matches: …` with up to 5 candidates from `searchByName`. When the connection has no `live-database` scan, output is `No live-database scan available for connection ""; run \`ktx scan\` first.` — distinct from the "not found" state. + +Structured output: `{ resolved: TableDetail[], missing: Array<{target, candidates}>, scanAvailable: boolean }`. + +Refuses `connectionName` values not in the WU-stage's `allowedConnectionNames` set. + +### `sql_execution` + +```ts +input = { + connectionName: string, + sql: string, // single SELECT or WITH only + rowLimit?: number, // default 100, hard cap 1000 +} +``` + +Pipeline: + +1. `assertReadOnlySql(sql)` — regex rejects anything starting with `insert|update|delete|merge|alter|drop|create|truncate|grant|revoke|copy|call|do|vacuum|analyze|refresh`. +2. `limitSqlForExecution(sql, rowLimit)` — wraps as `select * from () as ktx_query_result limit N`. +3. `SemanticLayerService.executeQuery(connectionName, wrappedSql)`. +4. Format as markdown table; first ~20 rows inline; if truncated, append `… +N more rows`. + +Structured output: `{ headers, rows, rowCount, truncated, sql, wrappedSql }`. + +Connector errors surface verbatim (e.g., Postgres `relation "orbit_analytics.customer" does not exist`). That error message is the most valuable verification signal — it tells the LLM the identifier is fictional. + +Refuses `connectionName` not in `allowedConnectionNames`. Each connector's driver-level read-only enforcement (Postgres read-only transaction, BigQuery query-only jobs) is a second defence under the regex gate. + +### `discover_data` + +```ts +input = { + query: string, + connectionName?: string, // omit to search all configured warehouse connections + limit?: number, // default 10 per section + sourceName?: string, // SL source detail mode (delegates to sl_discover) +} +``` + +Composes three searches and groups output into three sections, omitting empty sections: + +1. **Wiki Pages** — `wiki_search({query, limit})`. Routing hint: *use `wiki_read(blockKey)` for full content*. +2. **Semantic Layer Sources** — `sl_discover({query, connectionName})`. Routing hint: *use `sl_read_source(sourceName)` for the YAML, or `entity_details` for warehouse-shape details*. +3. **Raw Warehouse Schema** — `WarehouseCatalogService.searchByName(connectionName, query, limit)`. Routing hint: *use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values*. + +When `sourceName` is set, delegates entirely to `sl_discover` inspect mode and skips other sections. When all three sections are empty, output is `No matches for "" across wiki, semantic layer, or raw warehouse schema. Try broader terms; this concept may not exist yet.` + +Structured output: `{ wiki: WikiSearchStructured|null, sl: SlDiscoverStructured|null, raw: RawSchemaHits|null }`. + +## Wiring + +`packages/context/src/ingest/ingest-bundle.runner.ts` already plumbs `emit_unmapped_fallback` into both the WorkUnit stage (`createEmitUnmappedFallbackTool` around line 726) and the reconcile stage (around line 962), with merging done via `packages/context/src/ingest/stages/build-wu-context.ts` and `build-reconcile-context.ts`. + +Add a parallel factory next to those existing calls: + +```ts +const warehouseTools = createWarehouseVerificationTools({ + semanticLayerService: scopedSemanticLayerService, + warehouseCatalog: new WarehouseCatalogService({ fileStore, projectDir }), + dialects: getDialectForDriver, + allowedConnectionNames: slConnectionIds, // reuse existing scoping + sqlExecutionRowLimit: 100, +}); +// Merge `entity_details`, `sql_execution`, `discover_data` into both stage tool maps +// alongside emit_unmapped_fallback. +``` + +`createWarehouseVerificationTools` returns `Record` with three keys. The set is wired into every adapter's synthesis stage — no per-adapter opt-in. + +## Skill-prompt updates + +### Shared protocol + +`packages/context/skills/_shared/identifier-verification.md`: + +```md +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: +1. `discover_data({query: ""})` — see what wikis, SL sources, and raw tables + already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: +2. `entity_details({connectionName, targets: [{display: ""}]})` — + confirm the identifier resolves; inspect native types, FK/PK, and sampleValues. +3. For literal values from the source (status codes, plan tiers): check whether + they appear in `entity_details`' `sampleValues` for the relevant column. + If `sampleValues` is short or you suspect the sample missed real values, run + a `sql_execution` probe: `SELECT DISTINCT FROM LIMIT 50`. +4. If the candidate identifier still doesn't resolve, do one of: + (a) Use `sql_execution` with `SELECT 1 FROM LIMIT 0`. If it errors, + the identifier is fictional. + (b) Wrap the identifier in `[unverified — from ]` in the wiki body, + citing the exact raw path that mentioned it. + (c) When recording `emit_unmapped_fallback` with `no_physical_table`, + include the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. +``` + +Each affected skill inlines this block verbatim (skill files are independent prompts; KTX has no cross-skill include mechanism today). + +### Per-skill diffs + +Two skills are deliberately excluded from updates: `ingest_triage` (read-only triage; produces no wiki or SL output) and `sl` (umbrella reference doc; cross-links to the protocol but doesn't need its own copy). + +| Skill | Changes | +|---|---| +| `notion_synthesize` | Inline protocol; append `discover_data`, `entity_details`, `sql_execution` to `Allowed:` (line 74); replace `orbit_analytics.customer` example on line 70 with `.
` | +| `dbt_ingest` | Inline protocol; line 24: replace `wiki_sl_search` → `discover_data` and `sl_describe_table` → `entity_details`; strengthen the "not permission to invent physical columns" paragraph by naming `entity_details` as the verification call | +| `lookml_ingest` | Inline protocol; add: "Verify each `sql_table_name` from the LookML view with `entity_details` before mapping to an SL source" | +| `looker_ingest` | Inline protocol; add: "For every Looker field reference, call `entity_details` on the underlying `(schema, table, column)` before promoting to `sl_refs` or quoting in wiki body" | +| `metabase_ingest` | Inline protocol; add: "Before writing a wiki page derived from a Metabase question's SQL, verify each `schema.table.column` mentioned with `entity_details`" | +| `metricflow_ingest` | Inline protocol; add: "Verify each MetricFlow model's source table with `entity_details` before producing the corresponding `sl_write_source`" | +| `live_database_ingest` | Inline protocol; add: "Sample values come from the scan record; do not invent values not present in `relationship-profile.json`" | +| `historic_sql_table_digest` | Shortened protocol focused on column attribution: "Only mention columns visible in the table's scan record. Use `entity_details({display})` if uncertain" | +| `historic_sql_patterns` | Inline protocol; add: "Every join column mentioned in pattern descriptions must be verified via `entity_details` for both sides of the join" | +| `knowledge_capture` | Inline protocol; update line 44: "First call `discover_data` to find existing wiki pages, SL sources, and raw tables on the topic" | +| `sl_capture` | Inline protocol; add: "Before `sl_write_source`, call `entity_details` on the target table to confirm column names and types match the YAML being written" | + +### Cleanups beyond the four-tool addition + +- `notion_synthesize/SKILL.md:70` — remove `orbit_analytics.customer` (placeholder). +- `packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts:67` — same example string in the Zod `.describe()` — replace with `.
`. +- `dbt_ingest/SKILL.md:24` — fix `wiki_sl_search` and `sl_describe_table` (neither tool exists in KTX). +- `packages/context/src/sl/tools/sl-warehouse-validation.ts:93` — inline error message references the non-existent `sl_describe_table`. Replace with `sl_read_source`. + +## Testing strategy + +### Unit tests + +| Component | Tests | +|---|---| +| `getDialectForDriver` | Every supported driver returns a dialect; unknown driver throws with a clear list of supported drivers | +| `WarehouseCatalogService.getTable` | Reads and merges `tables/.json` and `relationship-profile.json`; returns `null` when no sync exists; returns `null` for unknown `(catalog, db, name)` | +| `WarehouseCatalogService.resolveDisplay` | Postgres 2-part display → `{catalog: null, db, name}`; BigQuery 3-part display → `{catalog, db, name}`; ambiguous 2-part on BigQuery returns candidates list; unknown displays produce closest-match candidates ordered by edit distance | +| `WarehouseCatalogService.searchByName` | Substring and token match; tiers (exact-name → token-match) ordered correctly; cache hit on second call within same instance | +| `entity_details` | Resolves `{display}` and structured inputs; reports "Not found" with candidates for unknown ref; reports "no scan available" distinctly when scan dir missing; truncates above 50 targets | +| `discover_data` | Three sections present when all three have hits; sections omitted when empty; `sourceName` inspect mode delegates to `sl_discover` and skips other sections; `allowedConnectionNames` scope honoured | +| `sql_execution` | `assertReadOnlySql` rejects each mutating verb; row-limit wrap visible in `wrappedSql`; connector errors surface verbatim with the failing SQL; rejects `connectionName` not in `allowedConnectionNames` | + +### Integration tests + +- Extend `packages/context/src/ingest/ingest-bundle.runner.test.ts` to verify the three new tools are present in both WU-stage and reconcile-stage tool maps and refuse out-of-scope `connectionName` values. +- New fixture-based test: stage a small `raw-sources//live-database//` directory with 2 tables + 1 enrichment profile, then call each tool through the runner's tool map and assert the markdown contains the expected fields. Uses the same fake-LLM harness as `notion.adapter.test.ts`. +- One end-to-end regression test reproducing the `orbit_analytics.customer` hallucination: a fake Notion page mentioning the fictional table is fed to the synthesis stage; the run produces a wiki page where the fictional name is wrapped in `[unverified — …]` or omitted, not promoted to `tables:` frontmatter. + +### Prompt-bundling tests + +Extend `packages/context/src/memory/memory-runtime-assets.test.ts`: + +- Every skill in the synthesis-writers list embeds the verification-protocol block (assert by stable header text). +- Every such skill lists the three new tools when it has a `## Tools / Allowed` section, or mentions them inline in a workflow step otherwise. +- No skill file contains any of the banned strings: `orbit_analytics.customer`, `wiki_sl_search`, `sl_describe_table`. + +### Performance guards + +`WarehouseCatalogService` caches the per-connection table list per stage (one WorkUnit's lifetime). Tests assert second call is a cache hit. No DB index for `searchByName` in this iteration — linear scan over scan artefacts is acceptable up to ~50K columns. If volume warrants it later, a follow-up PR adds a SQLite FTS index. + +## Rollout + +Four mergeable PRs: + +| PR | Lands | +|---|---| +| 1 | `getDialectForDriver` + `WarehouseCatalogService` + `entity_details` tool + wiring in `ingest-bundle.runner.ts` + unit/integration tests | +| 2 | `sql_execution` tool + tests + the `orbit_analytics.customer` regression test (which exercises protocol steps 4a/4c) | +| 3 | `discover_data` tool + tests | +| 4 | All 11 skill prompts updated with the verification protocol + the three cleanups + extended `memory-runtime-assets.test.ts` | + +Skill prompts land last so they can reference the tools that already exist. + +## Out of scope + +- **Hard write-time validation in `wiki_write` / `emit_unmapped_fallback`.** A complementary spec covers regex-based identifier validation at the write boundary. Defence-in-depth — separate concern. +- **SQLite FTS index for `searchByName`.** Deferred until the linear scan benchmark fails. +- **`raw_schema_search` as a standalone tool.** `discover_data`'s raw section covers the concept-search case. +- **`semantic_query` in the synthesis toolset.** `semantic_query` will exist in KTX for the research/chat-time agent; it is deliberately excluded from synthesis because synthesis creates SL sources rather than queries them. +- **`dictionary_search`.** `entity_details` already returns per-column `sampleValues`; for the rarer "where does this literal live?" case, `sql_execution` is more accurate than a sampled-JSON scan. +- **UUID layer for tables/columns.** KTX deliberately stays string-keyed on `(connection, catalog, db, name)`. diff --git a/packages/cli/src/ingest-query-executor.test.ts b/packages/cli/src/ingest-query-executor.test.ts new file mode 100644 index 00000000..343202a1 --- /dev/null +++ b/packages/cli/src/ingest-query-executor.test.ts @@ -0,0 +1,86 @@ +import type { KtxLocalProject } from '@ktx/context/project'; +import { createKtxConnectorCapabilities, type KtxScanConnector } from '@ktx/context/scan'; +import { describe, expect, it, vi } from 'vitest'; +import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js'; + +function project(): KtxLocalProject { + return { + projectDir: '/tmp/ktx-query-project', + config: { + project: 'warehouse', + connections: { + warehouse: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' }, + }, + }, + } as unknown as KtxLocalProject; +} + +function connector(overrides: Partial = {}): KtxScanConnector { + return { + id: 'warehouse', + driver: 'postgres', + capabilities: createKtxConnectorCapabilities({ readOnlySql: true }), + async introspect() { + throw new Error('introspect is not used by this test'); + }, + executeReadOnly: vi.fn(async () => ({ + headers: ['answer'], + rows: [[1]], + totalRows: 1, + rowCount: 1, + })), + cleanup: vi.fn(async () => {}), + ...overrides, + }; +} + +describe('createKtxCliIngestQueryExecutor', () => { + it('executes read-only SQL through the scan connector and cleans it up', async () => { + const scanConnector = connector(); + const createConnector = vi.fn(async () => scanConnector); + const executor = createKtxCliIngestQueryExecutor(project(), { createConnector }); + + await expect( + executor.execute({ + connectionId: 'warehouse', + connection: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' }, + projectDir: '/tmp/ktx-query-project', + sql: 'select 1', + maxRows: 5, + }), + ).resolves.toMatchObject({ + headers: ['answer'], + rows: [[1]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + }); + + expect(createConnector).toHaveBeenCalledWith(project(), 'warehouse'); + expect(scanConnector.executeReadOnly).toHaveBeenCalledWith( + { connectionId: 'warehouse', sql: 'select 1', maxRows: 5 }, + { runId: 'ingest-sql-execution' }, + ); + expect(scanConnector.cleanup).toHaveBeenCalledTimes(1); + }); + + it('rejects connectors without read-only SQL support', async () => { + const scanConnector = connector({ + capabilities: createKtxConnectorCapabilities({ readOnlySql: false }), + executeReadOnly: undefined, + }); + const executor = createKtxCliIngestQueryExecutor(project(), { + createConnector: vi.fn(async () => scanConnector), + }); + + await expect( + executor.execute({ + connectionId: 'warehouse', + connection: { driver: 'postgres' }, + projectDir: '/tmp/ktx-query-project', + sql: 'select 1', + }), + ).rejects.toThrow('Connection "warehouse" driver "postgres" does not support read-only SQL execution.'); + expect(scanConnector.cleanup).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/cli/src/ingest-query-executor.ts b/packages/cli/src/ingest-query-executor.ts new file mode 100644 index 00000000..197119be --- /dev/null +++ b/packages/cli/src/ingest-query-executor.ts @@ -0,0 +1,49 @@ +import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutorPort } from '@ktx/context/connections'; +import type { KtxLocalProject } from '@ktx/context/project'; +import type { KtxScanConnector, KtxScanContext } from '@ktx/context/scan'; +import { createKtxCliScanConnector } from './local-scan-connectors.js'; + +type CreateConnector = typeof createKtxCliScanConnector; + +export interface KtxCliIngestQueryExecutorDeps { + createConnector?: CreateConnector; +} + +async function cleanupConnector(connector: KtxScanConnector | null): Promise { + await connector?.cleanup?.(); +} + +export function createKtxCliIngestQueryExecutor( + project: KtxLocalProject, + deps: KtxCliIngestQueryExecutorDeps = {}, +): KtxSqlQueryExecutorPort { + const createConnector = deps.createConnector ?? createKtxCliScanConnector; + return { + async execute(input: KtxSqlQueryExecutionInput) { + let connector: KtxScanConnector | null = null; + try { + connector = await createConnector(project, input.connectionId); + if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) { + throw new Error( + `Connection "${input.connectionId}" driver "${connector.driver}" does not support read-only SQL execution.`, + ); + } + + const ctx: KtxScanContext = { runId: 'ingest-sql-execution' }; + const result = await connector.executeReadOnly( + { connectionId: input.connectionId, sql: input.sql, maxRows: input.maxRows }, + ctx, + ); + return { + headers: result.headers, + rows: result.rows, + totalRows: result.totalRows, + command: 'SELECT', + rowCount: result.rowCount, + }; + } finally { + await cleanupConnector(connector); + } + }, + }; +} diff --git a/packages/cli/src/ingest.test-utils.ts b/packages/cli/src/ingest.test-utils.ts index 1e9f5662..9241fa34 100644 --- a/packages/cli/src/ingest.test-utils.ts +++ b/packages/cli/src/ingest.test-utils.ts @@ -261,6 +261,18 @@ export class CliLookerSlWritingAgentRunner extends AgentRunnerService { params.telemetryTags?.operationName === 'ingest-bundle-wu' && params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders' ) { + const ledger = params.toolSet.record_verification_ledger; + if (!ledger?.execute) { + throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit'); + } + await ledger.execute( + { + summary: 'Test fixture verified Looker explore target identifiers before writing SL.', + verifiedIdentifiers: ['prod-warehouse', 'public.orders'], + unverifiedIdentifiers: [], + }, + { toolCallId: 'cli-looker-verification-ledger', messages: [] }, + ); const slWrite = params.toolSet.sl_write_source; if (!slWrite?.execute) { throw new Error('sl_write_source tool was not available to the Looker WorkUnit'); diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 410312f5..de226bc4 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -810,6 +810,44 @@ describe('runKtxIngest', () => { expect(runLocalIngest).toHaveBeenCalledWith(expect.objectContaining({ llmDebugRequestFile: debugFile })); }); + it('supplies a scan-connector query executor to local ingest runs', async () => { + const io = makeIo(); + const projectDir = join(tempDir, 'query-executor-project'); + await writeWarehouseConfig(projectDir); + const queryExecutor = { + execute: vi.fn(async () => ({ + headers: [], + rows: [], + totalRows: 0, + command: 'SELECT', + rowCount: 0, + })), + }; + const runLocalIngest = vi.fn(async (input: RunLocalIngestOptions): Promise => + completedLocalBundleRun(input, 'query-executor-run'), + ); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + outputMode: 'json', + }, + io.io, + { + runLocalIngest, + createAdapters: () => [], + createQueryExecutor: () => queryExecutor, + }, + ), + ).resolves.toBe(0); + + expect(runLocalIngest).toHaveBeenCalledWith(expect.objectContaining({ queryExecutor })); + }); + it('passes daemon database introspection URL to default local ingest adapters', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index 4a68edf3..6e0648b5 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -16,7 +16,9 @@ import { runLocalMetabaseIngest, savedMemoryCountsForReport, } from '@ktx/context/ingest'; -import { loadKtxProject } from '@ktx/context/project'; +import type { KtxSqlQueryExecutorPort } from '@ktx/context/connections'; +import { loadKtxProject, type KtxLocalProject } from '@ktx/context/project'; +import { createKtxCliIngestQueryExecutor } from './ingest-query-executor.js'; import { readIngestReportSnapshotFile } from './ingest-report-file.js'; import { createCliOperationalLogger } from './io/logger.js'; import { createKtxCliLocalIngestAdapters } from './local-adapters.js'; @@ -69,6 +71,7 @@ interface KtxIngestDeps { jobIdFactory?: () => string; now?: () => Date; createAdapters?: typeof createKtxCliLocalIngestAdapters; + createQueryExecutor?: (project: KtxLocalProject) => KtxSqlQueryExecutorPort; runLocalIngest?: typeof runLocalIngest; runLocalMetabaseIngest?: typeof runLocalMetabaseIngest; readReportFile?: typeof readIngestReportSnapshotFile; @@ -532,6 +535,9 @@ export async function runKtxIngest( ...(args.adapter === 'historic-sql' ? { historicSqlConnectionId: args.connectionId } : {}), logger: operationalLogger, }; + const queryExecutor = + localIngestOptions.queryExecutor ?? + (deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(project); if (args.adapter === 'metabase' && args.sourceDir) { throw new Error('source-dir uploads are not supported for the Metabase fan-out adapter'); } @@ -544,6 +550,7 @@ export async function runKtxIngest( adapters: createAdapters(project, adapterOptions), metabaseConnectionId: args.connectionId, ...localIngestOptions, + queryExecutor, trigger: 'manual_resync', jobIdFactory: deps.jobIdFactory, ...(progress ? { progress } : {}), @@ -604,6 +611,7 @@ export async function runKtxIngest( trigger: 'manual_resync', jobId, ...localIngestOptions, + queryExecutor, pullConfigOptions: adapterOptions, ...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}), ...(memoryFlow ? { memoryFlow } : {}), diff --git a/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md b/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md index d076d4e5..5d2316fd 100644 --- a/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md +++ b/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md @@ -20,6 +20,7 @@ Parsimonious. Stage 3 WUs already loaded `ingest_triage` and handled conflicts t All wiki writes are GLOBAL (same as Stage 3). SL writes target the same session worktree Stage 3 used. +Wiki keys must be flat slugs, not directory paths. If a Stage 3 page used a path-like key and a flat retry exists, treat the flat key as the canonical page. diff --git a/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md b/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md index 9645ccc4..c7c9eb6d 100644 --- a/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md +++ b/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md @@ -1,5 +1,5 @@ -You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and searchable via `wiki_sl_search`. +You are processing ONE WorkUnit of a multi-file ingest bundle. The WorkUnit gives you a slice of raw source files (LookML views, dbt/MetricFlow YAMLs, Metabase card JSONs, Notion pages, or similar) and you must translate that slice into KTX semantic-layer sources and/or knowledge wiki pages, in one pass. Prior WorkUnits in this same job may have already written SL sources and wiki pages; their writes are visible on the working branch and discoverable with `discover_data`. @@ -8,9 +8,9 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing 1. Read this WorkUnit's section at the end of the user prompt. It lists your `rawFiles`, any unchanged `dependencyPaths` you may need to resolve references, the `peerFileIndex` (paths only; you CANNOT read them), the source's `skillNames`, and any `priorProvenance` rows telling you what earlier syncs produced from these files. -2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `knowledge_capture`, and `ingest_triage` last. The triage skill tells you how to react when `wiki_sl_search` reveals that a prior WU already wrote something overlapping. +2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `knowledge_capture`, and `ingest_triage` last. The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping. 3. If the system prompt includes ``, read those pins before choosing artifact keys. A pin's `canonicalArtifactKey` is the preferred artifact for its `contestedKey`: prefer editing the pinned canonical artifact when it already exists or when this raw file clearly updates it. Do not create a duplicate contested artifact when a pin says another artifact is canonical; use a specific disambiguated key only when the raw file describes a genuinely different domain. -4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `wiki_sl_search` for each candidate name to find prior-WU writes; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip. +4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `discover_data` for each candidate source, table, metric, or topic name to find prior-WU writes, existing wiki pages, SL sources, and raw warehouse matches; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip. 5. For every `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call, include `rawPaths` with only the raw file paths that directly support that action. If one artifact synthesizes several files, list each contributing raw file. Do not include unrelated files from the same WorkUnit. 6. When `priorProvenance` names an existing artifact for one of your raw files, prefer `sl_edit` over `sl_write` for that artifact: the re-ingest change rule says expression-only changes replace silently, grain/column/filter changes replace and flag. 7. When a raw file cannot map to normal SL and you use a fallback path, call `emit_unmapped_fallback` exactly once for that raw file and reason. Use `fallback: "sql_standalone"` for a standalone SQL source, `fallback: "wiki_only"` for documentation-only capture, and `fallback: "flagged"` when no reliable artifact can be written. @@ -19,12 +19,13 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing All wiki writes go to the GLOBAL scope. Bundle ingests are not personal. The `wiki_write` tool selects scope automatically for this caller. +Wiki keys must be flat slugs like `paid-order-lifecycle`, not directory paths like `historic-sql/paid-order-lifecycle`. Use `tags`, `source`, and page content to group related pages. - Do not read peer files; only files listed in `rawFiles` or `dependencyPaths` are accessible. `read_raw_file` will reject everything else. - Do not invent measures/joins/rules not declared in the raw files. -- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`wiki_sl_search`, `sl_discover`, `sl_describe_table`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source. +- Do not invent physical column names or grain keys. For table-backed SL sources, every `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr` column must come from raw-file column declarations or warehouse-backed discovery (`discover_data`, `sl_discover`, `entity_details`). If column names are not confirmed, capture the business context in wiki instead of writing a full SL source. - Do not write context-source overlays into the context source connection just because that is the current WorkUnit connection. Use `sl_discover` across data sources and write the SL artifact to the warehouse/data-source connection that owns the matching manifest. If there is no confirmed target connection, use `emit_unmapped_fallback` and wiki capture. - Do not duplicate an artifact that prior provenance says you already produced; update it. - Do not silently accept a name collision with a prior WU's write when the formula differs. Trigger `ingest_triage`. diff --git a/packages/context/skills/_shared/identifier-verification.md b/packages/context/skills/_shared/identifier-verification.md new file mode 100644 index 00000000..775203bd --- /dev/null +++ b/packages/context/skills/_shared/identifier-verification.md @@ -0,0 +1,27 @@ +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. diff --git a/packages/context/skills/dbt_ingest/SKILL.md b/packages/context/skills/dbt_ingest/SKILL.md index 135dd2e5..6b332d8e 100644 --- a/packages/context/skills/dbt_ingest/SKILL.md +++ b/packages/context/skills/dbt_ingest/SKILL.md @@ -12,16 +12,16 @@ Use this skill for **uploaded** dbt projects (`dbt_project.yml` at stage root, ` | dbt | KTX | Notes | |-----|--------|--------| -| `models:` entry with `columns:` | **Overlay** on the manifest table with the same name (after `wiki_sl_search` / `sl_describe_table`) | One SL source per physical table; model name may differ from DB name — resolve with `read_raw_file` + warehouse context. | +| `models:` entry with `columns:` | **Overlay** on the manifest table with the same name (after `discover_data` / `entity_details`) | One SL source per physical table; model name may differ from DB name — resolve with `read_raw_file` + warehouse context. | | `sources:` → `tables:` | Same as models; use `identifier` when present instead of logical `name`. | Schema + name must match how the connection sees tables. | | Column `description` | `descriptions.user` or merged `descriptions` map on the column | Do not overwrite `dbt` description keys from sync. | | `data_tests: not_null` / `unique` | Short hint in column `descriptions` or notes: “dbt: not null”, “dbt: unique” | Full structured metadata lands in manifest via **sync**; the skill keeps bundle-time SL text useful for the agent. | -| `accepted_values` | Add a **brief** line in the column description: allowed values (truncate long lists) | Also mention enum-like use in `wiki_sl_search` / filters. | -| `relationships` | Add or confirm `joins:` on the overlay **only** when `to` resolves to a real table via `read_raw_file` + `wiki_sl_search` / `sl_describe_table` | If the ref cannot be resolved, capture the intent in a wiki page instead. | +| `accepted_values` | Add a **brief** line in the column description: allowed values (truncate long lists) | Also mention enum-like use in `discover_data` / filters. | +| `relationships` | Add or confirm `joins:` on the overlay **only** when `to` resolves to a real table via `read_raw_file` + `discover_data` / `entity_details` | If the ref cannot be resolved, capture the intent in a wiki page instead. | ## Physical schema grounding -dbt YAML is documentation and test metadata; it is not permission to invent physical columns. Before writing any table-backed SL source, confirm the real warehouse shape with `wiki_sl_search`, `sl_discover`, or `sl_describe_table` and use only confirmed column names in `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr`. +dbt YAML is documentation and test metadata; it is not permission to invent physical columns. Before writing any table-backed SL source, confirm the real warehouse shape with `discover_data`, `sl_discover`, or `entity_details` and use only confirmed column names in `columns:`, `grain:`, `joins:`, `segments:`, and `measures[].expr`. For dbt context-source ingest, the dbt connection is usually not the warehouse connection. Call `sl_discover` without `connectionId` first, then write overlays to the connection that owns the matching manifest-backed source (for example `postgres-warehouse`), not to the dbt connection (for example `dbt-main`). If no matching manifest-backed source is visible on any warehouse connection, do not call `sl_write_source`; record `emit_unmapped_fallback` and keep the fact wiki-only. @@ -31,6 +31,34 @@ Include `rawPaths` on every `wiki_write`, `sl_write_source`, and `sl_edit_source After every `sl_write_source`, call `sl_validate`. A validation error saying a declared column or measure reference is absent from the physical table is a hard stop: re-read the warehouse-backed source and rewrite with confirmed names, or remove the invalid SL fields. +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## 1.1 test hints (descriptions / meta) When YAML shows `accepted_values` or `not_null`, add **short** hints into `columns[].descriptions` (e.g. under `user`) or freeform column notes so chat and validation see intent before the next git sync refreshes `constraints` / `enum_values` in `_schema`. Keep hints under a few words when possible. diff --git a/packages/context/skills/historic_sql_patterns/SKILL.md b/packages/context/skills/historic_sql_patterns/SKILL.md index 33eb6fe0..5e898c47 100644 --- a/packages/context/skills/historic_sql_patterns/SKILL.md +++ b/packages/context/skills/historic_sql_patterns/SKILL.md @@ -18,6 +18,37 @@ Use this skill when the WorkUnit raw file is a `patterns-input/part-0001.json` s 6. Set each evidence object's `rawPath` to the exact raw file path read in step 3. 7. Stop after all pattern evidence has been emitted. +Every join column mentioned in pattern descriptions must be verified via +entity_details for both sides of the join. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Evidence Shape Each call to `emit_historic_sql_evidence` must use this shape: diff --git a/packages/context/skills/historic_sql_table_digest/SKILL.md b/packages/context/skills/historic_sql_table_digest/SKILL.md index 34e49d27..0815e3dc 100644 --- a/packages/context/skills/historic_sql_table_digest/SKILL.md +++ b/packages/context/skills/historic_sql_table_digest/SKILL.md @@ -17,6 +17,34 @@ Use this skill when the WorkUnit raw file is one `tables/..json` f 5. Call `emit_historic_sql_evidence` exactly once with `kind: "table_usage"`. 6. Stop after the evidence tool succeeds. +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Evidence Shape Call `emit_historic_sql_evidence` with this shape: diff --git a/packages/context/skills/knowledge_capture/SKILL.md b/packages/context/skills/knowledge_capture/SKILL.md index 1e6a8f6c..2a111d90 100644 --- a/packages/context/skills/knowledge_capture/SKILL.md +++ b/packages/context/skills/knowledge_capture/SKILL.md @@ -40,6 +40,8 @@ If nothing is worth capturing, respond without calling any tool. 1. Read the wiki index (provided in the prompt) and decide whether the turn introduces durable knowledge. 2. **Before writing**, search for related content so cross-references are accurate: + - `discover_data` first when a page relates to data or SL concepts — find + existing wiki pages, SL sources, and raw warehouse schema together. - `wiki_search` with the topic — find related wiki pages to populate `refs`. - `sl_discover` with the concept — if the page defines a metric (revenue, churn, retention, LTV, ARR, MRR, CAC, attribution, etc.), find matching SL sources or measures to populate `sl_refs`. If no matches, pass `sl_refs: []` so future readers know you checked. 3. If updating an existing page, `wiki_read` it first. Use the returned `structured.content` or markdown body as the exact stored text for targeted replacements; current tags, refs, and sl_refs are returned in structured metadata. @@ -48,6 +50,34 @@ If nothing is worth capturing, respond without calling any tool. For bundle/external ingest, include `rawPaths` on every `wiki_write`/`wiki_remove` call with only the raw files that directly support that wiki action. This keeps ingest provenance tied to the actual source file, not every file in the WorkUnit. +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Keys, summaries, and content - **Keys** are short kebab-case topic identifiers: `leads-source-filter`, `revenue-definition`, `churn-calculation`. No namespacing, no prefixes. @@ -70,6 +100,10 @@ The `wiki_write` tool accepts three array fields that go into the page frontmatt - **`refs`**: keys of related wiki pages. Add when the new page materially depends on concepts from another (e.g., a churn definition that uses the paid-orders filter from a revenue definition). Don't add refs just because pages share a topic area. - **`sl_refs`**: names of SL sources or measures the page relates to. Format: `"source_name"` or `"source_name.measure_name"`. Discover via `sl_discover` → inspect with `sl_read_source` → include the confirmed matches. +Wiki page keys must be flat slugs. Use `large-contract-requesters`, not +`historic-sql/large-contract-requesters`. Use `tags`, `source`, and content +headings for grouping. + ### Replace semantics All three fields use REPLACE semantics on update: diff --git a/packages/context/skills/live_database_ingest/SKILL.md b/packages/context/skills/live_database_ingest/SKILL.md index 9db52484..2b9cb6d8 100644 --- a/packages/context/skills/live_database_ingest/SKILL.md +++ b/packages/context/skills/live_database_ingest/SKILL.md @@ -24,6 +24,37 @@ Use this skill when the ingest work unit contains raw files under or column comments. 9. Run `sl_validate` for the table source before the work unit completes. +Sample values come from the scan record; do not invent values not present in +relationship-profile.json. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Source shape For a raw table with this shape: diff --git a/packages/context/skills/looker_ingest/SKILL.md b/packages/context/skills/looker_ingest/SKILL.md index 462a5910..7a41fa6e 100644 --- a/packages/context/skills/looker_ingest/SKILL.md +++ b/packages/context/skills/looker_ingest/SKILL.md @@ -21,6 +21,37 @@ Looker runtime ingest turns API-staged dashboards, Looks, and explores into dura 9. Write SL from Looker runtime evidence only through the staged warehouse target contract. For explores and inherited dashboard/Look queries, branch on `targetTable.ok`; when it is true, write on `targetWarehouseConnectionId` and use `targetTable.canonicalTable` as `source.table`. When it is false or missing, write wiki knowledge candidates and record `emit_unmapped_fallback` with the staged reason. 10. Run `sl_validate` after every SL write. If validation fails, fix the source or roll it back before the WorkUnit ends. +For every Looker field reference, call entity_details on the underlying +schema.table.column before promoting it to sl_refs or quoting it in wiki body. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Explore WorkUnits Explore WUs have raw files like `explores//.json` and usually depend on `lookml_models.json`. diff --git a/packages/context/skills/lookml_ingest/SKILL.md b/packages/context/skills/lookml_ingest/SKILL.md index 18b43f3e..5a9c79a3 100644 --- a/packages/context/skills/lookml_ingest/SKILL.md +++ b/packages/context/skills/lookml_ingest/SKILL.md @@ -51,10 +51,47 @@ LookML's `dimension_group: date { type: time; timeframes: [raw, date, week, mont A prior replay hallucinated `date_date`, `date_week` into `sql:`, `columns:`, and `grain:` across 4+ standalones; every measure on each affected source returned `400 Unrecognized name: date_date` at query time. Preventable. +Verify each sql_table_name from the LookML view with entity_details before +mapping to an SL source. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + **Required flow before writing any overlay or standalone**: 1. Call `sl_discover()` for each base table you're about to touch. That returns the real columns. -2. If the table isn't in the manifest, fall back to `sql_execution({ sql: "SELECT column_name FROM .INFORMATION_SCHEMA.COLUMNS WHERE table_name = '
'" })` (session shape — a connection is already pinned by the ingest session). +2. If the table isn't in the manifest, use the warehouse `connectionName` + returned by `discover_data` or the target connection chosen from + `sl_discover`, then call a dialect-appropriate SQL probe with that + connection name, for example: + `sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`. + Replace `warehouse`, `analytics`, and `orders` with the verified connection, + schema or dataset, and table from the WorkUnit evidence. 3. Use only those names in `sql:`, `columns:`, and `grain:`. Map each `dimension_group` to ONE `{ name: , type: time, role: time }` entry — never one per timeframe. | LookML input | KTX `columns:` entry | diff --git a/packages/context/skills/metabase_ingest/SKILL.md b/packages/context/skills/metabase_ingest/SKILL.md index 061760bf..f5aa00e2 100644 --- a/packages/context/skills/metabase_ingest/SKILL.md +++ b/packages/context/skills/metabase_ingest/SKILL.md @@ -44,6 +44,37 @@ Use `resultMetadata` to: - `lastRunAt`: ISO timestamp of the card's last execution. If null or very old, the card may be dead; prefer skipping over creating a source. - `dashboardCount`: number of dashboards referencing the card. Cards with `dashboardCount: 0` and a stale `lastRunAt` are strong skip signals. +Before writing a wiki page derived from a Metabase question SQL, verify each +schema.table.column mentioned with entity_details. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Decision tree For each card: diff --git a/packages/context/skills/metricflow_ingest/SKILL.md b/packages/context/skills/metricflow_ingest/SKILL.md index 5a24cda8..47187ffb 100644 --- a/packages/context/skills/metricflow_ingest/SKILL.md +++ b/packages/context/skills/metricflow_ingest/SKILL.md @@ -29,6 +29,37 @@ A MetricFlow `semantic_model` maps to an SL source; MetricFlow `measures` map to Type map: MetricFlow `time` to KTX `time`; `categorical` to `string`; `number` to `number`; `boolean` to `boolean`. Follow `expr` over `name` when both differ — `expr` is the physical column. +Verify each MetricFlow model source table with entity_details before producing +the corresponding sl_write_source. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Flattening `extends:` Within one WorkUnit, multiple semantic_models linked by `extends:` are guaranteed to be present (the chunker groups them). Resolve inheritance **before** writing: @@ -49,7 +80,13 @@ The `model:` field on a semantic_model is a string like `ref('table_name')`, `so - `source('s','t')` → table name `t`. Verify via `sl_discover(t)`. - Literal (no `ref(...)` / `source(...)`) → treat as the table name directly. -If `sl_discover` errors (no such table), fall back to `sql_execution({ sql: "SELECT column_name FROM .INFORMATION_SCHEMA.COLUMNS WHERE table_name = ''" })` (session shape — a connection is already pinned by the ingest session). **Never invent column names** — every column in `columns:`, `grain:`, and `sql:` must be sourced from a real probe. +If `sl_discover` errors because no such table exists, use `discover_data` and +`entity_details` to find the warehouse target. If a SQL probe is still needed, +call `sql_execution` with the same warehouse connection name, for example: +`sql_execution({connectionName: "warehouse", sql: "SELECT 1 FROM analytics.orders LIMIT 0"})`. +**Never invent column names** - every column in `columns:`, `grain:`, and +`sql:` must be sourced from raw files, `entity_details`, or a successful SQL +probe. After every `sl_write_source`, call `sl_validate`. The warehouse will reject invented columns with `Unrecognized name: ` — treat as a hard failure and re-read the schema. diff --git a/packages/context/skills/notion_synthesize/SKILL.md b/packages/context/skills/notion_synthesize/SKILL.md index 933acc55..524c6832 100644 --- a/packages/context/skills/notion_synthesize/SKILL.md +++ b/packages/context/skills/notion_synthesize/SKILL.md @@ -67,10 +67,38 @@ Search existing wiki pages for the same `tables:` or `sl_refs:` frontmatter and - Do not create SL sources under the Notion connection just because a page mentions a warehouse, dbt, Looker, or Metabase object. Use the mapped warehouse/source connection after discovery, or emit an unmapped fallback and write wiki-only. - Distinguish fallback reasons precisely: if a non-Notion warehouse/dbt connection exists but `sl_discover` cannot find the named table/source, use `no_physical_table`; reserve `no_connection_mapping` for cases where there is no plausible non-Notion target connection at all. - If `sl_discover` resolves the table/source, do not call `emit_unmapped_fallback` for that table. Use the resolved source for `sl_refs`, overlay edits, or wiki-only documentation. -- When calling `emit_unmapped_fallback`, pass the table or source identifier as `tableRef` (e.g. `tableRef: "orbit_analytics.customer"`) — the tool generates the canonical detail string from the reason code and `tableRef`. Use the optional `clarification` field only to add context that does not contradict the reason. Do not restate the reason in `clarification`. +- When calling `emit_unmapped_fallback`, pass the table or source identifier as `tableRef` (e.g. `tableRef: ".
"`) — the tool generates the canonical detail string from the reason code and `tableRef`. Use the optional `clarification` field only to add context that does not contradict the reason. Do not restate the reason in `clarification`. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. ## Tools -Allowed: `read_raw_file`, `read_raw_span`, `wiki_search`, `wiki_read`, `wiki_write`, `sl_discover`, `sl_read_source`, `sl_write_source`, `sl_edit_source`, `sl_validate`, `context_evidence_search`, `context_evidence_read`, `context_evidence_neighbors`, `emit_unmapped_fallback`, `eviction_list`, `context_eviction_decision_write`. +Allowed: `read_raw_file`, `read_raw_span`, `wiki_search`, `wiki_read`, `wiki_write`, `discover_data`, `entity_details`, `sql_execution`, `sl_discover`, `sl_read_source`, `sl_write_source`, `sl_edit_source`, `sl_validate`, `context_evidence_search`, `context_evidence_read`, `context_evidence_neighbors`, `emit_unmapped_fallback`, `eviction_list`, `context_eviction_decision_write`. Not allowed: `context_candidate_write`, `context_candidate_mark`. diff --git a/packages/context/skills/sl/SKILL.md b/packages/context/skills/sl/SKILL.md index 2719b9d4..9cdb8b34 100644 --- a/packages/context/skills/sl/SKILL.md +++ b/packages/context/skills/sl/SKILL.md @@ -13,6 +13,10 @@ This skill covers two parts: Capture (when and how to add new patterns to the SL) is a separate concern handled by the memory-agent — see the `sl_capture` skill if you are running in capture mode. The research agent **reads** and **queries** the SL via the tools described here; it does not write to it. +For capture-time identifier verification, load `sl_capture`. Synthesis writer +skills must verify warehouse identifiers with `discover_data`, +`entity_details`, and `sql_execution` before emitting table or column names. + --- ## Part 1 — Schema reference diff --git a/packages/context/skills/sl_capture/SKILL.md b/packages/context/skills/sl_capture/SKILL.md index ffb1780d..4bc383eb 100644 --- a/packages/context/skills/sl_capture/SKILL.md +++ b/packages/context/skills/sl_capture/SKILL.md @@ -174,6 +174,37 @@ Wiki-only is correct when the user is documenting *about* the measure (definition in business terms, owner, policy, glossary, examples of when to use it) without changing its SQL expression or filters. +Before sl_write_source, call entity_details on the target table to confirm +column names and types match the YAML being written. + +## Identifier Verification Protocol + +Before writing a wiki page or SL source on any topic: + +1. `discover_data({query: ""})` - see what wikis, SL sources, and raw + tables already exist. Prefer updating existing pages over creating new ones. + +Before emitting any `schema.table` or `schema.table.column` into a wiki body, +SL source, `tables:` frontmatter, `sl_refs`, or `emit_unmapped_fallback`: + +2. `entity_details({connectionName, targets: [{display: ""}]})` - + confirm the identifier resolves; inspect native types, FK/PK, and + sampleValues. +3. For literal values from the source, such as status codes or plan tiers, + check whether they appear in `entity_details` sampleValues for the relevant + column. If sampleValues is short or the sample may have missed real values, + run a `sql_execution` probe with the same warehouse connection name: + `sql_execution({connectionName, sql: "SELECT DISTINCT FROM LIMIT 50"})`. +4. If the candidate identifier still does not resolve, do one of: + - Use `sql_execution({connectionName, sql: "SELECT 1 FROM LIMIT 0"})`. + If it errors, the identifier is fictional. + - Wrap the identifier in `[unverified - from ]` in the wiki body, + citing the exact raw path that mentioned it. + - When recording `emit_unmapped_fallback` with `no_physical_table`, include + the failing probe error in `clarification`. +5. Never copy `.
` placeholder strings from these instructions + into output. + ## Tool sequence 1. `sl_discover` — see what source files exist. @@ -181,7 +212,7 @@ use it) without changing its SQL expression or filters. 3. `sl_read_source({ sourceName })` — read the raw YAML before editing. 4. For modifications: `sl_edit_source({ sourceName, old_string, new_string })` with exact-string replacements. `old_string` must match exactly and be unique in the file. 5. For new sources or full rewrites: `sl_write_source({ sourceName, content })` with the full YAML content. -6. For join discovery: `sql_execution({ sql })` to verify the join key exists in both tables and assess cardinality before declaring the join. +6. For join discovery: use `sql_execution({connectionName: "warehouse", sql: "SELECT count(*) FROM public.orders o JOIN public.customers c ON c.id = o.customer_id LIMIT 20"})` with the target warehouse connection name and dialect-correct table names to verify the join key exists in both tables and assess cardinality before declaring the join. 7. Cross-reference knowledge: author the edge once on the **wiki** side via `sl_refs: [source_name]` in the page's front-matter. The reverse edge (wiki pages that cite an SL source) is derived automatically by the reconciler — do not add a `knowledge_refs:` field to SL YAMLs. 8. `sl_validate` — run after writing or editing to surface schema issues, duplicate measure names, and cross-source validation errors. Read-only; the writes are already committed (the squash-at-end flow will collapse them into one commit). @@ -248,7 +279,8 @@ Prior turn: user asked to correlate LTV with protocol count; assistant joined `f sl_read_source({ sourceName: "fct_orders" }) → no joins section yet sql_execution({ - sql: "SELECT COUNT(*), COUNT(DISTINCT a.admin_user_id) FROM fct_orders a JOIN fct_mau_multiprotocol b ON a.admin_user_id = b.admin_user_id LIMIT 1" + connectionName: "warehouse", + sql: "SELECT COUNT(*), COUNT(DISTINCT a.admin_user_id) FROM public.fct_orders a JOIN public.fct_mau_multiprotocol b ON a.admin_user_id = b.admin_user_id LIMIT 1" }) → confirms cardinality (many orders per MAU row = many_to_one) sl_edit_source({ diff --git a/packages/context/src/connections/dialects.test.ts b/packages/context/src/connections/dialects.test.ts new file mode 100644 index 00000000..6c9b6c41 --- /dev/null +++ b/packages/context/src/connections/dialects.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, it } from 'vitest'; +import { getDialectForDriver } from './dialects.js'; + +describe('getDialectForDriver', () => { + it.each([ + ['postgres', '"public"."orders"'], + ['postgresql', '"public"."orders"'], + ['mysql', '`public`.`orders`'], + ['clickhouse', '`public`.`orders`'], + ['sqlite', '"orders"'], + ['snowflake', '"analytics"."public"."orders"'], + ['bigquery', '`analytics`.`public`.`orders`'], + ['sqlserver', '[analytics].[public].[orders]'], + ] as const)('formats table names for %s', (driver, expected) => { + const dialect = getDialectForDriver(driver); + expect( + dialect.formatTableName({ + catalog: driver === 'snowflake' || driver === 'bigquery' || driver === 'sqlserver' ? 'analytics' : null, + db: driver === 'sqlite' ? null : 'public', + name: 'orders', + }), + ).toBe(expected); + }); + + it('throws with a supported-driver list for unknown drivers', () => { + expect(() => getDialectForDriver('oracle')).toThrow( + 'Unsupported warehouse driver "oracle". Supported drivers: bigquery, clickhouse, mysql, postgres, postgresql, sqlite, sqlite3, snowflake, sqlserver', + ); + }); +}); diff --git a/packages/context/src/connections/dialects.ts b/packages/context/src/connections/dialects.ts new file mode 100644 index 00000000..afac4bd2 --- /dev/null +++ b/packages/context/src/connections/dialects.ts @@ -0,0 +1,102 @@ +import type { KtxSchemaDimensionType, KtxTableRef } from '../scan/types.js'; + +export type SupportedDriver = + | 'postgres' + | 'postgresql' + | 'mysql' + | 'sqlserver' + | 'snowflake' + | 'bigquery' + | 'clickhouse' + | 'sqlite' + | 'sqlite3'; + +export interface KtxDialect { + readonly type: SupportedDriver; + quoteIdentifier(identifier: string): string; + formatTableName(table: KtxTableRef): string; + mapToDimensionType(nativeType: string): KtxSchemaDimensionType; +} + +const supportedDrivers: SupportedDriver[] = [ + 'bigquery', + 'clickhouse', + 'mysql', + 'postgres', + 'postgresql', + 'sqlite', + 'sqlite3', + 'snowflake', + 'sqlserver', +]; + +function doubleQuoted(identifier: string): string { + return `"${identifier.replace(/"/g, '""')}"`; +} + +function backtickQuoted(identifier: string): string { + return `\`${identifier.replace(/`/g, '``')}\``; +} + +function bigQueryQuoted(identifier: string): string { + return `\`${identifier.replace(/`/g, '\\`')}\``; +} + +function bracketQuoted(identifier: string): string { + return `[${identifier.replace(/\]/g, ']]')}]`; +} + +function inferDimensionType(nativeType: string): KtxSchemaDimensionType { + const normalized = nativeType.toLowerCase().trim(); + if (normalized.includes('date') || normalized.includes('time')) { + return 'time'; + } + if ( + normalized.includes('int') || + normalized.includes('num') || + normalized.includes('dec') || + normalized.includes('float') || + normalized.includes('double') || + normalized.includes('real') + ) { + return 'number'; + } + if (normalized.includes('bool') || normalized === 'bit') { + return 'boolean'; + } + return 'string'; +} + +function formatWithParts(table: KtxTableRef, quote: (identifier: string) => string, sqlite = false): string { + const parts = sqlite ? [table.name] : [table.catalog, table.db, table.name].filter((part): part is string => !!part); + return parts.map(quote).join('.'); +} + +function createDialect(type: SupportedDriver, quote: (identifier: string) => string, sqlite = false): KtxDialect { + return { + type, + quoteIdentifier: quote, + formatTableName: (table) => formatWithParts(table, quote, sqlite), + mapToDimensionType: inferDimensionType, + }; +} + +const dialects: Record = { + postgres: createDialect('postgres', doubleQuoted), + postgresql: createDialect('postgresql', doubleQuoted), + mysql: createDialect('mysql', backtickQuoted), + clickhouse: createDialect('clickhouse', backtickQuoted), + sqlite: createDialect('sqlite', doubleQuoted, true), + sqlite3: createDialect('sqlite3', doubleQuoted, true), + snowflake: createDialect('snowflake', doubleQuoted), + bigquery: createDialect('bigquery', bigQueryQuoted), + sqlserver: createDialect('sqlserver', bracketQuoted), +}; + +export function getDialectForDriver(driver: string): KtxDialect { + const normalized = driver.toLowerCase().trim(); + if (normalized in dialects) { + return dialects[normalized as SupportedDriver]; + } + throw new Error(`Unsupported warehouse driver "${driver}". Supported drivers: ${supportedDrivers.join(', ')}`); +} diff --git a/packages/context/src/connections/index.ts b/packages/context/src/connections/index.ts index 513818fa..0917a7ca 100644 --- a/packages/context/src/connections/index.ts +++ b/packages/context/src/connections/index.ts @@ -3,7 +3,9 @@ export type { KtxSqlQueryExecutionResult, KtxSqlQueryExecutorPort, } from './query-executor.js'; +export type { KtxDialect, SupportedDriver } from './dialects.js'; export { createDefaultLocalQueryExecutor, type DefaultLocalQueryExecutorOptions } from './local-query-executor.js'; +export { getDialectForDriver } from './dialects.js'; export { normalizeQueryRows } from './query-executor.js'; export { createPostgresQueryExecutor } from './postgres-query-executor.js'; export { assertReadOnlySql, limitSqlForExecution } from './read-only-sql.js'; diff --git a/packages/context/src/core/git.service.test.ts b/packages/context/src/core/git.service.test.ts index 14e93495..ba1d9e0f 100644 --- a/packages/context/src/core/git.service.test.ts +++ b/packages/context/src/core/git.service.test.ts @@ -379,5 +379,37 @@ describe('GitService', () => { await service.removeWorktree(wtDir).catch(() => undefined); await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); }); + + it('reports untracked files that would be overwritten by the squash merge', async () => { + const { commitHash: baseSha } = await writeAndCommit('seed.md', 'seed'); + const parent = await realpath(join(tempDir, '..')); + const wtDir = join(parent, `wt-${Date.now()}-untracked`); + await service.addWorktree(wtDir, 'session/untracked', baseSha); + + const scoped = service.forWorktree(wtDir); + await writeFile(join(wtDir, 'knowledge.md'), 'session version\n', 'utf-8'); + await scoped.commitFile('knowledge.md', 'session write', 'System User', 'system@example.com'); + await writeFile(join(tempDir, 'knowledge.md'), 'untracked local version\n', 'utf-8'); + + const result = await service.squashMergeIntoMain( + 'session/untracked', + 'System User', + 'system@example.com', + 'Memory capture: 1 file [chat=untracked]', + ); + + expect(result.ok).toBe(false); + if (result.ok) { + throw new Error('unreachable'); + } + expect(result.conflict).toBe(true); + expect(result.conflictPaths).toEqual(['knowledge.md']); + + const status = await (service as unknown as { git: import('simple-git').SimpleGit }).git.status(); + expect(status.not_added).toContain('knowledge.md'); + + await service.removeWorktree(wtDir).catch(() => undefined); + await rm(wtDir, { recursive: true, force: true }).catch(() => undefined); + }); }); }); diff --git a/packages/context/src/core/git.service.ts b/packages/context/src/core/git.service.ts index 6539f9fd..8d05a089 100644 --- a/packages/context/src/core/git.service.ts +++ b/packages/context/src/core/git.service.ts @@ -31,6 +31,40 @@ export type SquashMergeResult = | { ok: true; squashSha: string; touchedPaths: string[] } | { ok: false; conflict: true; conflictPaths: string[] }; +function mergeErrorMessage(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + return String(error); +} + +function extractUntrackedOverwritePaths(message: string): string[] { + const marker = 'The following untracked working tree files would be overwritten by merge:'; + const markerIndex = message.indexOf(marker); + if (markerIndex === -1) { + return []; + } + + const afterMarker = message.slice(markerIndex + marker.length); + const abortIndex = afterMarker.indexOf('Please move or remove them before you merge.'); + const pathBlock = abortIndex === -1 ? afterMarker : afterMarker.slice(0, abortIndex); + return pathBlock + .split('\n') + .map((line) => line.trim()) + .filter((line) => line.length > 0 && line !== 'Aborting') + .map((line) => line.replace(/^"(.+)"$/, '$1')); +} + +function mergeConflictPaths(unmergedPaths: string[], mergeError: unknown): string[] { + const paths = new Set(unmergedPaths); + if (mergeError !== null) { + for (const path of extractUntrackedOverwritePaths(mergeErrorMessage(mergeError))) { + paths.add(path); + } + } + return [...paths]; +} + export class GitService { private static readonly mutationQueues = new Map>(); @@ -639,10 +673,11 @@ export class GitService { } const unmergedOut = await this.git.raw(['diff', '--name-only', '--diff-filter=U']).catch(() => ''); - const conflictPaths = unmergedOut + const unmergedPaths = unmergedOut .split('\n') .map((l) => l.trim()) .filter(Boolean); + const conflictPaths = mergeConflictPaths(unmergedPaths, mergeError); if (conflictPaths.length > 0 || mergeError !== null) { // `merge --abort` only works for an in-progress merge; squash sets MERGE_MSG but not @@ -651,7 +686,7 @@ export class GitService { await this.git.raw(['reset', '--hard', 'HEAD']).catch(() => undefined); this.logger.warn( `squashMergeIntoMain: conflict merging ${branch} — aborted. conflictPaths=${conflictPaths.join(',')}` + - (mergeError ? ` error=${mergeError instanceof Error ? mergeError.message : String(mergeError)}` : ''), + (mergeError ? ` error=${mergeErrorMessage(mergeError)}` : ''), ); return { ok: false, conflict: true, conflictPaths }; } diff --git a/packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts b/packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts index 22f35cfc..c7a334bf 100644 --- a/packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts @@ -277,7 +277,7 @@ describe('historic-SQL local ingest retrieval acceptance', () => { await expect(readFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves .toContain('Analysts repeatedly inspect paid order lifecycle by customer segment.'); - await expect(readFile(join(project.projectDir, 'knowledge/global/historic-sql/paid-order-lifecycle.md'), 'utf-8')) + await expect(readFile(join(project.projectDir, 'knowledge/global/historic-sql-paid-order-lifecycle.md'), 'utf-8')) .resolves.toContain('Paid Order Lifecycle'); const reloaded = await loadKtxProject({ projectDir: project.projectDir }); @@ -295,7 +295,7 @@ describe('historic-SQL local ingest retrieval acceptance', () => { searchLocalKnowledgePages(reloaded, { query: 'paid order lifecycle', userId: 'local', limit: 5 }), ).resolves.toEqual([ expect.objectContaining({ - key: 'historic-sql/paid-order-lifecycle', + key: 'historic-sql-paid-order-lifecycle', summary: 'Paid Order Lifecycle', matchReasons: expect.arrayContaining(['lexical']), }), diff --git a/packages/context/src/ingest/adapters/historic-sql/post-processor.ts b/packages/context/src/ingest/adapters/historic-sql/post-processor.ts index 815b6798..8d89d397 100644 --- a/packages/context/src/ingest/adapters/historic-sql/post-processor.ts +++ b/packages/context/src/ingest/adapters/historic-sql/post-processor.ts @@ -10,7 +10,7 @@ async function commitProjectionChanges(workdir: string): Promise { const status = await git.status(); const paths = status.files .map((file) => file.path) - .filter((path) => path.startsWith('semantic-layer/') || path.startsWith('knowledge/global/historic-sql/')); + .filter((path) => path.startsWith('semantic-layer/') || path.startsWith('knowledge/global/historic-sql')); if (paths.length === 0) { return; } diff --git a/packages/context/src/ingest/adapters/historic-sql/projection.test.ts b/packages/context/src/ingest/adapters/historic-sql/projection.test.ts index e6cb736a..f2a5b068 100644 --- a/packages/context/src/ingest/adapters/historic-sql/projection.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/projection.test.ts @@ -106,7 +106,7 @@ describe('projectHistoricSqlEvidence', () => { await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' }); await writeText( workdir, - 'knowledge/global/historic-sql/old-order-lifecycle.md', + 'knowledge/global/historic-sql-old-order-lifecycle.md', [ '---', YAML.stringify({ @@ -127,7 +127,7 @@ describe('projectHistoricSqlEvidence', () => { ); await writeText( workdir, - 'knowledge/global/historic-sql/retired-pattern.md', + 'knowledge/global/historic-sql-retired-pattern.md', [ '---', YAML.stringify({ @@ -164,15 +164,15 @@ describe('projectHistoricSqlEvidence', () => { const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' }); expect(result.patternPagesWritten).toBe(1); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql/old-order-lifecycle.md'), 'utf-8')).resolves.toContain( + await expect(readFile(join(workdir, 'knowledge/global/historic-sql-old-order-lifecycle.md'), 'utf-8')).resolves.toContain( 'Order Lifecycle Analysis', ); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql/retired-pattern.md'), 'utf-8')).resolves.toContain( + await expect(readFile(join(workdir, 'knowledge/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain( 'stale_since: "2026-05-11T00:00:00.000Z"', ); }); - it('writes a reappearing pattern to the active slug instead of reusing an archived page key', async () => { + it('rewrites a reappearing archived pattern at the flat slug', async () => { const workdir = await tempWorkdir(); await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/manifest.json', { source: 'historic-sql', @@ -192,7 +192,7 @@ describe('projectHistoricSqlEvidence', () => { await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' }); await writeText( workdir, - 'knowledge/global/historic-sql/_archived/order-lifecycle-analysis.md', + 'knowledge/global/historic-sql-order-lifecycle-analysis.md', [ '---', YAML.stringify({ @@ -230,15 +230,10 @@ describe('projectHistoricSqlEvidence', () => { const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' }); expect(result.patternPagesWritten).toBe(1); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql/order-lifecycle-analysis.md'), 'utf-8')).resolves.toContain( - 'Order Lifecycle Analysis', - ); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/order-lifecycle-analysis.md'), 'utf-8')).resolves.toContain( - 'Archived body', - ); - await expect( - readFile(join(workdir, 'knowledge/global/historic-sql/_archived/_archived/order-lifecycle-analysis.md'), 'utf-8'), - ).rejects.toMatchObject({ code: 'ENOENT' }); + const page = await readFile(join(workdir, 'knowledge/global/historic-sql-order-lifecycle-analysis.md'), 'utf-8'); + expect(page).toContain('Analysts compare order status with customer segment again.'); + expect(page).not.toContain('Archived body'); + expect(page).not.toContain('archived'); }); it('leaves already archived pattern pages stable when they are still absent', async () => { @@ -259,7 +254,7 @@ describe('projectHistoricSqlEvidence', () => { }); await writeText( workdir, - 'knowledge/global/historic-sql/_archived/retired-pattern.md', + 'knowledge/global/historic-sql-retired-pattern.md', [ '---', YAML.stringify({ @@ -284,12 +279,9 @@ describe('projectHistoricSqlEvidence', () => { expect(result.archivedPatternPages).toBe(0); expect(result.stalePatternPagesMarked).toBe(0); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/retired-pattern.md'), 'utf-8')).resolves.toContain( + await expect(readFile(join(workdir, 'knowledge/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain( 'Archived retired body', ); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql/_archived/_archived/retired-pattern.md'), 'utf-8')).rejects.toMatchObject({ - code: 'ENOENT', - }); }); it('marks missing table usage stale and deletes legacy historic SQL query pages', async () => { @@ -330,7 +322,7 @@ describe('projectHistoricSqlEvidence', () => { }); await writeText( workdir, - 'knowledge/global/historic-sql/legacy-template.md', + 'knowledge/global/historic-sql-legacy-template.md', [ '---', YAML.stringify({ @@ -365,7 +357,7 @@ describe('projectHistoricSqlEvidence', () => { commonJoins: [], staleSince: '2026-05-11T00:00:00.000Z', }); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql/legacy-template.md'), 'utf-8')).rejects.toMatchObject({ + await expect(readFile(join(workdir, 'knowledge/global/historic-sql-legacy-template.md'), 'utf-8')).rejects.toMatchObject({ code: 'ENOENT', }); }); diff --git a/packages/context/src/ingest/adapters/historic-sql/projection.ts b/packages/context/src/ingest/adapters/historic-sql/projection.ts index 366b98f3..25a317f3 100644 --- a/packages/context/src/ingest/adapters/historic-sql/projection.ts +++ b/packages/context/src/ingest/adapters/historic-sql/projection.ts @@ -37,7 +37,7 @@ interface HistoricSqlPatternPage { } function safeKnowledgeSlug(value: string): string { - return value.toLowerCase().replace(/[^a-z0-9/-]+/g, '-').replace(/^-+|-+$/g, ''); + return value.toLowerCase().replace(/[^a-z0-9_-]+/g, '-').replace(/^-+|-+$/g, ''); } async function pathExists(path: string): Promise { @@ -159,7 +159,7 @@ function isLegacyQueryPage(page: HistoricSqlPatternPage): boolean { function isArchivedPatternPage(page: HistoricSqlPatternPage): boolean { const tags = Array.isArray(page.frontmatter.tags) ? page.frontmatter.tags : []; - return page.key.startsWith('_archived/') || tags.includes('archived'); + return tags.includes('archived'); } function stringArray(value: unknown): string[] { @@ -191,6 +191,9 @@ async function loadPatternPages(root: string): Promise const files = await walkFiles(root); const pages: HistoricSqlPatternPage[] = []; for (const file of files.filter((candidate) => candidate.endsWith('.md'))) { + if (file.includes('/')) { + continue; + } const key = file.replace(/\.md$/, ''); const path = join(root, file); const page = parseMarkdownPage(key, path, await readFile(path, 'utf-8')); @@ -201,6 +204,10 @@ async function loadPatternPages(root: string): Promise return pages; } +function historicSqlFlatKey(slug: string): string { + return `historic-sql-${safeKnowledgeSlug(slug)}`; +} + async function currentStagedTables(rawDir: string): Promise> { const tablesRoot = join(rawDir, 'tables'); const files = await walkFiles(tablesRoot); @@ -276,7 +283,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp } } - const wikiRoot = join(input.workdir, 'knowledge/global/historic-sql'); + const wikiRoot = join(input.workdir, 'knowledge/global'); await mkdir(wikiRoot, { recursive: true }); const allPages = await loadPatternPages(wikiRoot); const activePages = allPages.filter((page) => !isArchivedPatternPage(page)); @@ -286,7 +293,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp for (const pattern of patternEvidence) { const incomingSignals = [...pattern.pattern.tablesInvolved, ...pattern.pattern.constituentTemplateIds]; const reusable = patternPages.find((page) => overlapRatio(incomingSignals, existingPageSignals(page)) >= 0.6); - const key = reusable?.key ?? safeKnowledgeSlug(pattern.pattern.slug); + const key = reusable?.key ?? historicSqlFlatKey(pattern.pattern.slug); const pagePath = join(wikiRoot, `${key}.md`); const frontmatter = { summary: pattern.pattern.title, @@ -308,11 +315,12 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp for (const page of patternPages) { if (writtenKeys.has(page.key)) continue; if (shouldArchive(page.frontmatter.stale_since, manifest.fetchedAt, manifest.staleArchiveAfterDays)) { - const archivePath = join(wikiRoot, '_archived', `${page.key}.md`); const tags = [...new Set([...stringArray(page.frontmatter.tags), 'archived'])]; - await mkdir(dirname(archivePath), { recursive: true }); - await writeFile(archivePath, renderMarkdownPage({ ...page.frontmatter, tags }, page.content), 'utf-8'); - await rm(page.path, { force: true }); + await writeFile( + page.path, + renderMarkdownPage({ ...page.frontmatter, tags, archived_since: manifest.fetchedAt }, page.content), + 'utf-8', + ); result.archivedPatternPages += 1; continue; } diff --git a/packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts b/packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts index 32564da1..d22597b9 100644 --- a/packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts +++ b/packages/context/src/ingest/adapters/lookml/lookml.adapter.test.ts @@ -15,6 +15,18 @@ describe('LookmlSourceAdapter validation sidecars', () => { afterEach(async () => rm(tmpRoot, { recursive: true, force: true })); + it('returns configured target warehouse connection ids', async () => { + const adapter = new LookmlSourceAdapter({ + homeDir: join(tmpRoot, 'home'), + targetConnectionIds: ['warehouse', 'analytics', 'warehouse'], + }); + + await expect(adapter.listTargetConnectionIds?.(join(tmpRoot, 'staged'))).resolves.toEqual([ + 'analytics', + 'warehouse', + ]); + }); + it('writes a partial fetch report and marks mismatched chunks as SL-disallowed', async () => { const originRoot = join(tmpRoot, 'origin-src'); await mkdir(join(originRoot, 'views'), { recursive: true }); diff --git a/packages/context/src/ingest/adapters/lookml/lookml.adapter.ts b/packages/context/src/ingest/adapters/lookml/lookml.adapter.ts index 9978ddd4..3ea7b9a6 100644 --- a/packages/context/src/ingest/adapters/lookml/lookml.adapter.ts +++ b/packages/context/src/ingest/adapters/lookml/lookml.adapter.ts @@ -14,6 +14,11 @@ import { parseLookmlPullConfig } from './pull-config.js'; export interface LookmlSourceAdapterDeps { homeDir: string; + targetConnectionIds?: string[]; +} + +function uniqueSorted(values: readonly string[] | undefined): string[] { + return [...new Set(values ?? [])].sort((left, right) => left.localeCompare(right)); } export class LookmlSourceAdapter implements SourceAdapter { @@ -43,6 +48,10 @@ export class LookmlSourceAdapter implements SourceAdapter { return readLookmlFetchReport(stagedDir); } + async listTargetConnectionIds(_stagedDir: string): Promise { + return uniqueSorted(this.deps.targetConnectionIds); + } + async chunk(stagedDir: string, diffSet?: DiffSet): Promise { const project = await parseLookmlStagedDir(stagedDir); const mismatchedModelNames = await readLookmlMismatchedModelNames(stagedDir); diff --git a/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts index af2e409f..19bb6cdc 100644 --- a/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts +++ b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.test.ts @@ -42,6 +42,15 @@ describe('MetricflowSourceAdapter', () => { expect(adapter.skillNames).toEqual(['metricflow_ingest']); }); + it('returns configured target warehouse connection ids', async () => { + const metricflow = new MetricflowSourceAdapter({ + homeDir: join(tmpRoot, 'cache-home'), + targetConnectionIds: ['warehouse', 'analytics', 'warehouse'], + }); + + await expect(metricflow.listTargetConnectionIds?.(stagedDir)).resolves.toEqual(['analytics', 'warehouse']); + }); + it('detects a staged dir with a semantic_models YAML', async () => { await mkdir(join(stagedDir, 'models'), { recursive: true }); await writeFile( diff --git a/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts index a465a973..c8182ed8 100644 --- a/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts +++ b/packages/context/src/ingest/adapters/metricflow/metricflow.adapter.ts @@ -9,6 +9,11 @@ import { parseMetricflowPullConfig } from './pull-config.js'; export interface MetricflowSourceAdapterDeps { homeDir: string; + targetConnectionIds?: string[]; +} + +function uniqueSorted(values: readonly string[] | undefined): string[] { + return [...new Set(values ?? [])].sort((left, right) => left.localeCompare(right)); } export class MetricflowSourceAdapter implements SourceAdapter { @@ -30,6 +35,10 @@ export class MetricflowSourceAdapter implements SourceAdapter { }); } + async listTargetConnectionIds(_stagedDir: string): Promise { + return uniqueSorted(this.deps.targetConnectionIds); + } + async chunk(stagedDir: string, diffSet?: DiffSet): Promise { const project = await parseMetricFlowStagedDir(stagedDir); const chunk = await chunkMetricFlowProject(project, { diffSet }); diff --git a/packages/context/src/ingest/adapters/notion/chunk.ts b/packages/context/src/ingest/adapters/notion/chunk.ts index 7d85fb76..bdae8622 100644 --- a/packages/context/src/ingest/adapters/notion/chunk.ts +++ b/packages/context/src/ingest/adapters/notion/chunk.ts @@ -8,7 +8,7 @@ const MAX_NOTION_WORK_UNIT_CHARS = 40_000; export const NOTION_ORG_KNOWLEDGE_WARNING = 'Anything accessible to this Notion integration can become organization knowledge.'; const NOTION_SL_WRITE_GUIDANCE = - 'Write wiki entries with wiki_write. Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview. Search existing wiki pages for the same tables or sl_refs before creating a new page. Only write or edit SL sources after sl_discover/sl_read_source confirms a mapped non-Notion target source; if no mapped target exists, emit_unmapped_fallback and keep the fact wiki-only. Notion dataSourceCount counts Notion databases/data sources only, not warehouse/dbt mappings. If a warehouse/dbt connection exists but the named table or source is absent, use reason no_physical_table rather than no_connection_mapping. Do not create SL sources under the Notion connection just because a page mentions a warehouse table.'; + 'Write wiki entries with wiki_write. Wiki keys must be flat slugs like orbit-company-overview, not orbit/company-overview. Search existing wiki pages, SL sources, and raw warehouse schema for the same tables or sl_refs with discover_data before creating a new page. Only write or edit SL sources after discover_data plus sl_discover/sl_read_source or entity_details confirms a mapped non-Notion target source; if no mapped target exists, emit_unmapped_fallback and keep the fact wiki-only. Notion dataSourceCount counts Notion databases/data sources only, not warehouse/dbt mappings. If a warehouse/dbt connection exists but the named table or source is absent, use reason no_physical_table rather than no_connection_mapping. Do not create SL sources under the Notion connection just because a page mentions a warehouse table.'; async function walk(root: string): Promise { const entries = await readdir(root, { withFileTypes: true, recursive: true }); @@ -117,7 +117,7 @@ export async function chunkNotionStagedDir(stagedDir: string, diffSet?: DiffSet) reconcileNotes: [ `Notion maxKnowledgeCreatesPerRun=${manifest.maxKnowledgeCreatesPerRun}`, `Notion maxKnowledgeUpdatesPerRun=${manifest.maxKnowledgeUpdatesPerRun}`, - 'Notion dataSourceCount is Notion-only; use sl_discover for warehouse/dbt mapping decisions.', + 'Notion dataSourceCount is Notion-only; use discover_data/entity_details for warehouse/dbt mapping decisions.', 'Reconcile Notion wiki pages sharing tables/sl_refs before creating distinct artifacts.', ], contextReport: { diff --git a/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts b/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts index de989d02..0f500d5e 100644 --- a/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts +++ b/packages/context/src/ingest/adapters/notion/notion.adapter.test.ts @@ -52,6 +52,14 @@ describe('NotionSourceAdapter', () => { expect(adapter.triageSupported).toBe(true); }); + it('returns configured target warehouse connection ids', async () => { + const adapter = new NotionSourceAdapter({ + targetConnectionIds: ['warehouse', 'warehouse', 'analytics'], + }); + + await expect(adapter.listTargetConnectionIds?.(stagedDir)).resolves.toEqual(['analytics', 'warehouse']); + }); + it('returns structural triage signals for a staged Notion page', async () => { await mkdir(join(stagedDir, 'pages', 'page-1'), { recursive: true }); await writeFile( @@ -242,6 +250,8 @@ describe('NotionSourceAdapter', () => { }); expect(result.workUnits[0].notes).toContain('Synthesize durable wiki and SL knowledge'); expect(result.workUnits[0].notes).toContain('emit_unmapped_fallback'); + expect(result.workUnits[0].notes).toContain('discover_data'); + expect(result.workUnits[0].notes).toContain('entity_details'); expect(result.workUnits[0].notes).toContain('use reason no_physical_table rather than no_connection_mapping'); expect(result.workUnits[0].notes).toContain('Do not create SL sources under the Notion connection'); expect(result.workUnits[0].notes).toContain( @@ -250,7 +260,7 @@ describe('NotionSourceAdapter', () => { expect(result.reconcileNotes).toEqual([ 'Notion maxKnowledgeCreatesPerRun=25', 'Notion maxKnowledgeUpdatesPerRun=20', - 'Notion dataSourceCount is Notion-only; use sl_discover for warehouse/dbt mapping decisions.', + 'Notion dataSourceCount is Notion-only; use discover_data/entity_details for warehouse/dbt mapping decisions.', 'Reconcile Notion wiki pages sharing tables/sl_refs before creating distinct artifacts.', ]); expect(result.contextReport).toEqual({ capped: false, warnings: [NOTION_ORG_KNOWLEDGE_WARNING] }); diff --git a/packages/context/src/ingest/adapters/notion/notion.adapter.ts b/packages/context/src/ingest/adapters/notion/notion.adapter.ts index fba68cee..b903d4b3 100644 --- a/packages/context/src/ingest/adapters/notion/notion.adapter.ts +++ b/packages/context/src/ingest/adapters/notion/notion.adapter.ts @@ -32,6 +32,11 @@ interface NotionPullSucceededContext { export interface NotionSourceAdapterDeps { onPullSucceeded?: (ctx: NotionPullSucceededContext) => Promise; logger?: NotionFetchLogger; + targetConnectionIds?: string[]; +} + +function uniqueSorted(values: readonly string[] | undefined): string[] { + return [...new Set(values ?? [])].sort((left, right) => left.localeCompare(right)); } export class NotionSourceAdapter implements SourceAdapter { @@ -73,6 +78,10 @@ export class NotionSourceAdapter implements SourceAdapter { return describeNotionScope(stagedDir); } + async listTargetConnectionIds(_stagedDir: string): Promise { + return uniqueSorted(this.deps.targetConnectionIds); + } + async getTriageSignals(stagedDir: string, externalId: string): Promise { const metadata = await this.findMetadataByExternalId(stagedDir, externalId); if (!metadata) { diff --git a/packages/context/src/ingest/ingest-bundle.runner.test.ts b/packages/context/src/ingest/ingest-bundle.runner.test.ts index a4513f63..6134fbe7 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.test.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.test.ts @@ -409,6 +409,38 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { ); }); + it('threads target warehouse connection names into WorkUnit and reconcile tool sessions', async () => { + const deps = makeDeps(); + const sessions: any[] = []; + deps.adapter.listTargetConnectionIds = vi.fn().mockResolvedValue(['warehouse']); + deps.toolsetFactory.createIngestWuToolset.mockImplementation((toolSession: any) => { + sessions.push(toolSession); + return { + toAiSdkTools: vi.fn().mockReturnValue({}), + getAllTools: vi.fn().mockReturnValue([]), + getToolNames: vi.fn().mockReturnValue([]), + }; + }); + deps.agentRunner.runLoop.mockResolvedValue({ stopReason: 'natural' }); + + const runner = buildRunner(deps); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/notion/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'notion', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect([...sessions[0].allowedConnectionNames].sort()).toEqual(['notion', 'warehouse']); + }); + it('reuses document evidence indexing and page triage for document WorkUnits', async () => { const deps = makeDeps(); deps.adapter.source = 'notion'; @@ -643,6 +675,14 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { }); } if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') { + await params.toolSet.record_verification_ledger.execute( + { + summary: 'Reconciliation emits no warehouse identifiers before fallback recording.', + verifiedIdentifiers: [], + unverifiedIdentifiers: [], + }, + { toolCallId: 'ledger-1', messages: [] }, + ); await params.toolSet.emit_conflict_resolution.execute( { kind: 'near_duplicate', @@ -811,6 +851,14 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { { path: 'a.yml', startLine: 1, endLine: 2 }, { toolCallId: 'read-1', messages: [] }, ); + await params.toolSet.record_verification_ledger.execute( + { + summary: 'Wiki write contains no warehouse identifiers.', + verifiedIdentifiers: [], + unverifiedIdentifiers: [], + }, + { toolCallId: 'ledger-1', messages: [] }, + ); await params.toolSet.wiki_write.execute( { key: 'knowledge/a.md', content: 'safe summary' }, { toolCallId: 'wiki-1', messages: [] }, @@ -850,9 +898,9 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { { unitKey: 'u1', path: '/tmp/ktx-test/run/wu-transcripts/j1/u1.jsonl', - toolCallCount: 2, + toolCallCount: 3, errorCount: 0, - toolNames: ['read_raw_span', 'wiki_write'], + toolNames: ['read_raw_span', 'record_verification_ledger', 'wiki_write'], }, ], }), @@ -864,6 +912,14 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { const deps = makeDeps(); deps.agentRunner.runLoop.mockImplementation(async (params: any) => { if (params.telemetryTags.operationName === 'ingest-bundle-wu') { + await params.toolSet.record_verification_ledger.execute( + { + summary: 'Unmapped fallback records an unsupported conversion metric without verified warehouse identifiers.', + verifiedIdentifiers: [], + unverifiedIdentifiers: [], + }, + { toolCallId: 'ledger-1', messages: [] }, + ); await params.toolSet.emit_unmapped_fallback.execute( { rawPath: 'a.yml', @@ -920,6 +976,14 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { }); deps.agentRunner.runLoop.mockImplementation(async (params: any) => { if (params.telemetryTags.operationName === 'ingest-bundle-reconcile') { + await params.toolSet.record_verification_ledger.execute( + { + summary: 'Reconciliation records conflict, eviction, and fallback decisions without warehouse identifiers.', + verifiedIdentifiers: [], + unverifiedIdentifiers: [], + }, + { toolCallId: 'ledger-1', messages: [] }, + ); await params.toolSet.emit_conflict_resolution.execute( { kind: 'near_duplicate', diff --git a/packages/context/src/ingest/ingest-bundle.runner.ts b/packages/context/src/ingest/ingest-bundle.runner.ts index 31b444bf..d8f47c2a 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.ts @@ -53,6 +53,7 @@ import type { UnresolvedCardInfo, WorkUnit, } from './types.js'; +import { repairWikiSlRefs, type WikiSlRefRepairResult } from './wiki-sl-ref-repair.js'; function workUnitToMemoryFlowPlannedWorkUnit(workUnit: WorkUnit): MemoryFlowPlannedWorkUnit { return { @@ -528,6 +529,7 @@ export class IngestBundleRunner { let sourceContextReport: { capped?: boolean; warnings?: string[] } | undefined; let parseArtifacts: unknown; let postProcessorOutcome: IngestReportPostProcessorOutcome | undefined; + let wikiSlRefRepairResult: WikiSlRefRepairResult | null = null; let reconcileNotes: string[] = []; let triageResult: PageTriageRunResult | null = null; if (overrideReport) { @@ -662,6 +664,7 @@ export class IngestBundleRunner { touchedSlSources: session.touchedSlSources, actions: sessionActions, allowedRawPaths: new Set(wu.rawFiles), + allowedConnectionNames: new Set(slConnectionIds), semanticLayerService: scopedSemanticLayerService, wikiService: scopedWikiService, configService: sessionWorktree.config, @@ -898,6 +901,7 @@ export class IngestBundleRunner { touchedSlSources: reconcileSession.touchedSlSources, actions: reconcileActions, allowedRawPaths: reconciliationAllowedRawPaths, + allowedConnectionNames: new Set(slConnectionIds), semanticLayerService: rcScopedSl, wikiService: rcScopedWiki, configService: sessionWorktree.config, @@ -1138,6 +1142,19 @@ export class IngestBundleRunner { } } + const repairConnectionIds = [ + ...new Set([ + ...slConnectionIds, + ...(postProcessorOutcome?.touchedSources ?? []).map((source) => source.connectionId), + ]), + ].sort(); + wikiSlRefRepairResult = await repairWikiSlRefs({ + wikiService: this.deps.wikiService.forWorktree(sessionWorktree.workdir), + semanticLayerService: this.deps.semanticLayerService.forWorktree(sessionWorktree.workdir), + configService: sessionWorktree.config, + connectionIds: repairConnectionIds, + }); + // Stage 6 — squash commit const stage6 = ctx?.startPhase(0.04); await stage6?.updateProgress(0.0, 'Saving changes'); @@ -1354,6 +1371,8 @@ export class IngestBundleRunner { provenanceRows: reportProvenanceRows, toolTranscripts: reportToolTranscripts, postProcessor: postProcessorOutcome, + wikiSlRefRepairs: wikiSlRefRepairResult.repairs, + wikiSlRefRepairWarnings: wikiSlRefRepairResult.warnings, ...(reportMemoryFlow ? { memoryFlow: reportMemoryFlow } : {}), context: contextReport ? { diff --git a/packages/context/src/ingest/ingest-prompts.test.ts b/packages/context/src/ingest/ingest-prompts.test.ts index 43985ee9..8adcbfef 100644 --- a/packages/context/src/ingest/ingest-prompts.test.ts +++ b/packages/context/src/ingest/ingest-prompts.test.ts @@ -29,6 +29,18 @@ describe('ingest prompt assets', () => { expect(prompt).not.toMatch(forbiddenProductPattern()); }); + it('uses shipped warehouse verification tools in the WorkUnit prompt', async () => { + const prompt = await readFile( + new URL('../../prompts/memory_agent_bundle_ingest_work_unit.md', import.meta.url), + 'utf-8', + ); + + expect(prompt).toContain('discover_data'); + expect(prompt).toContain('entity_details'); + expect(prompt).not.toContain('wiki_sl_search'); + expect(prompt).not.toContain('sl_describe_table'); + }); + it('does not route historic-SQL through page-triage prompt examples', async () => { const prompt = await readFile(new URL('../../prompts/skills/page_triage_classifier.md', import.meta.url), 'utf-8'); diff --git a/packages/context/src/ingest/ingest-runtime-assets.test.ts b/packages/context/src/ingest/ingest-runtime-assets.test.ts index 2fafd69b..4b75fcdf 100644 --- a/packages/context/src/ingest/ingest-runtime-assets.test.ts +++ b/packages/context/src/ingest/ingest-runtime-assets.test.ts @@ -91,4 +91,14 @@ describe('ingest runtime assets', () => { expect(body).toContain('cross-table'); expect(body).not.toMatch(forbiddenProductPattern()); }); + + it('packages identifier verification prompt assets', async () => { + const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8'); + expect(shared).toContain('## Identifier Verification Protocol'); + expect(shared).toContain('discover_data'); + expect(shared).toContain('entity_details'); + expect(shared).toContain('sql_execution'); + expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT'); + expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM'); + }); }); diff --git a/packages/context/src/ingest/local-adapters.test.ts b/packages/context/src/ingest/local-adapters.test.ts index f8dd7da7..7161743a 100644 --- a/packages/context/src/ingest/local-adapters.test.ts +++ b/packages/context/src/ingest/local-adapters.test.ts @@ -498,6 +498,60 @@ describe('local ingest adapters', () => { await expect(adapter?.listTargetConnectionIds?.('/tmp/staged-dbt')).resolves.toEqual(['warehouse']); }); + it('passes primary warehouse connection ids to the local Notion adapter', async () => { + const adapters = createDefaultLocalIngestAdapters( + projectWithConnections({ + notion: { + driver: 'notion', + auth_token: 'secret', + crawl_mode: 'selected_roots', + root_page_ids: ['page-1'], + }, + warehouse: { + driver: 'postgres', + url: 'postgresql://readonly@db.example.test/analytics', + }, + docs: { + driver: 'dbt', + source_dir: './dbt', + }, + } as never), + ); + + const notion = adapters.find((adapter) => adapter.source === 'notion'); + + await expect(notion?.listTargetConnectionIds?.('/tmp/staged-notion')).resolves.toEqual(['warehouse']); + }); + + it('passes primary warehouse connection ids to local LookML and MetricFlow adapters', async () => { + const adapters = createDefaultLocalIngestAdapters( + projectWithConnections({ + warehouse: { + driver: 'postgres', + url: 'postgresql://readonly@db.example.test/analytics', + }, + lookml_docs: { + driver: 'lookml', + lookml: { + repoUrl: 'https://github.com/acme/lookml.git', + }, + }, + metrics_repo: { + driver: 'metricflow', + metricflow: { + repoUrl: 'https://github.com/acme/metrics.git', + }, + }, + } as never), + ); + + const lookml = adapters.find((adapter) => adapter.source === 'lookml'); + const metricflow = adapters.find((adapter) => adapter.source === 'metricflow'); + + await expect(lookml?.listTargetConnectionIds?.('/tmp/staged-lookml')).resolves.toEqual(['warehouse']); + await expect(metricflow?.listTargetConnectionIds?.('/tmp/staged-metricflow')).resolves.toEqual(['warehouse']); + }); + it('resolves MetricFlow auth_token_ref without writing literal tokens to config', async () => { const project = projectWithConnections({ metricflow_main: { diff --git a/packages/context/src/ingest/local-adapters.ts b/packages/context/src/ingest/local-adapters.ts index 59daf6d1..533bd526 100644 --- a/packages/context/src/ingest/local-adapters.ts +++ b/packages/context/src/ingest/local-adapters.ts @@ -88,7 +88,10 @@ export function createDefaultLocalIngestAdapters( ...(options.databaseIntrospectionUrl ? { baseUrl: options.databaseIntrospectionUrl } : {}), }), }), - new LookmlSourceAdapter({ homeDir: join(project.projectDir, '.ktx/cache') }), + new LookmlSourceAdapter({ + homeDir: join(project.projectDir, '.ktx/cache'), + targetConnectionIds: primaryWarehouseConnectionIds(project), + }), new DbtSourceAdapter({ homeDir: join(project.projectDir, '.ktx/cache'), targetConnectionIds: primaryWarehouseConnectionIds(project), @@ -106,8 +109,12 @@ export function createDefaultLocalIngestAdapters( }, }, }), - new MetricflowSourceAdapter({ homeDir: join(project.projectDir, '.ktx/cache') }), + new MetricflowSourceAdapter({ + homeDir: join(project.projectDir, '.ktx/cache'), + targetConnectionIds: primaryWarehouseConnectionIds(project), + }), new NotionSourceAdapter({ + targetConnectionIds: primaryWarehouseConnectionIds(project), ...(options.logger ? { logger: options.logger } : {}), }), ]; diff --git a/packages/context/src/ingest/local-bundle-ingest.test.ts b/packages/context/src/ingest/local-bundle-ingest.test.ts index f631e6ed..fe781b33 100644 --- a/packages/context/src/ingest/local-bundle-ingest.test.ts +++ b/packages/context/src/ingest/local-bundle-ingest.test.ts @@ -27,6 +27,18 @@ class LookerSlWritingAgentRunner extends AgentRunnerService { params.telemetryTags?.operationName === 'ingest-bundle-wu' && params.telemetryTags?.unitKey === 'looker-explore-ecommerce-orders' ) { + const ledger = params.toolSet.record_verification_ledger; + if (!ledger?.execute) { + throw new Error('record_verification_ledger tool was not available to the Looker WorkUnit'); + } + await ledger.execute( + { + summary: 'Test fixture verified Looker explore target identifiers before writing SL.', + verifiedIdentifiers: ['prod-warehouse', 'public.orders'], + unverifiedIdentifiers: [], + }, + { toolCallId: 'looker-verification-ledger', messages: [] }, + ); const slWrite = params.toolSet.sl_write_source; if (!slWrite?.execute) { throw new Error('sl_write_source tool was not available to the Looker WorkUnit'); @@ -63,6 +75,18 @@ class LookerSlWritingAgentRunner extends AgentRunnerService { class WikiWritingAgentRunner extends AgentRunnerService { override runLoop = vi.fn(async (params: any) => { if (params.telemetryTags?.operationName === 'ingest-bundle-wu') { + const ledger = params.toolSet.record_verification_ledger; + if (!ledger?.execute) { + throw new Error('record_verification_ledger tool was not available to the WorkUnit'); + } + await ledger.execute( + { + summary: 'Test fixture writes wiki-only context with no warehouse identifiers.', + verifiedIdentifiers: [], + unverifiedIdentifiers: [], + }, + { toolCallId: 'wiki-verification-ledger', messages: [] }, + ); const wikiWrite = params.toolSet.wiki_write; if (!wikiWrite?.execute) { throw new Error('wiki_write tool was not available to the WorkUnit'); @@ -91,6 +115,18 @@ class WikiWritingAgentRunner extends AgentRunnerService { class WikiWritingWithRawPathAgentRunner extends AgentRunnerService { override runLoop = vi.fn(async (params: any) => { if (params.telemetryTags?.operationName === 'ingest-bundle-wu') { + const ledger = params.toolSet.record_verification_ledger; + if (!ledger?.execute) { + throw new Error('record_verification_ledger tool was not available to the WorkUnit'); + } + await ledger.execute( + { + summary: 'Test fixture writes wiki-only context with explicit raw provenance and no warehouse identifiers.', + verifiedIdentifiers: [], + unverifiedIdentifiers: [], + }, + { toolCallId: 'wiki-raw-path-verification-ledger', messages: [] }, + ); const wikiWrite = params.toolSet.wiki_write; if (!wikiWrite?.execute) { throw new Error('wiki_write tool was not available to the WorkUnit'); diff --git a/packages/context/src/ingest/local-bundle-runtime.test.ts b/packages/context/src/ingest/local-bundle-runtime.test.ts index af40d44f..e9be5a14 100644 --- a/packages/context/src/ingest/local-bundle-runtime.test.ts +++ b/packages/context/src/ingest/local-bundle-runtime.test.ts @@ -3,7 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { AgentRunnerService } from '../agent/index.js'; import { initKtxProject, type KtxLocalProject, loadKtxProject } from '../project/index.js'; -import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { FakeSourceAdapter } from './adapters/fake/fake.adapter.js'; import { createLocalBundleIngestRuntime } from './local-bundle-runtime.js'; @@ -12,6 +12,7 @@ type RuntimeWithConnectionDeps = { connections: { listEnabledConnections(ids: string[]): Promise>; getConnectionById(connectionId: string): Promise<{ id: string; name: string; connectionType: string } | null>; + executeQuery(connectionId: string, sql: string): Promise; }; }; }; @@ -113,6 +114,37 @@ describe('createLocalBundleIngestRuntime', () => { ]); }); + it('passes project connection config to local ingest query executors', async () => { + const agentRunner = new AgentRunnerService({ llmProvider: { getModel: () => ({}) as never } as any }); + const queryExecutor = { + execute: vi.fn(async () => ({ + headers: ['answer'], + rows: [[1]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + })), + }; + + const runtime = createLocalBundleIngestRuntime({ + project, + adapters: [new FakeSourceAdapter()], + agentRunner, + queryExecutor, + }); + const connections = (runtime.runner as unknown as RuntimeWithConnectionDeps).deps.connections; + + await expect(connections.executeQuery('warehouse', 'select 1')).resolves.toMatchObject({ + headers: ['answer'], + }); + expect(queryExecutor.execute).toHaveBeenCalledWith({ + connectionId: 'warehouse', + projectDir: project.projectDir, + connection: project.config.connections.warehouse, + sql: 'select 1', + }); + }); + it('accepts a debug LLM request file when constructing the default agent runner', async () => { await writeFile( join(project.projectDir, 'ktx.yaml'), diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index b453cae6..9eeda894 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -6,7 +6,7 @@ import type { Tool } from 'ai'; import YAML from 'yaml'; import type { AgentRunnerService } from '../agent/index.js'; import { AgentRunnerService as DefaultAgentRunnerService } from '../agent/index.js'; -import { localConnectionInfoFromConfig } from '../connections/index.js'; +import { localConnectionInfoFromConfig, type KtxSqlQueryExecutorPort } from '../connections/index.js'; import type { KtxEmbeddingPort, KtxLogger } from '../core/index.js'; import { noopLogger, SessionWorktreeService } from '../core/index.js'; import type { KtxSemanticLayerComputePort } from '../daemon/index.js'; @@ -56,6 +56,7 @@ import { buildKnowledgeSearchText, type KnowledgeEventPort, type KnowledgeIndexPort, + type KnowledgeIndexPageListing, KnowledgeWikiService, searchLocalKnowledgePages, SqliteKnowledgeIndex, @@ -77,6 +78,7 @@ import { ContextEvidenceIndexService, SqliteContextEvidenceStore } from './conte import { DiffSetService } from './diff-set.service.js'; import { IngestBundleRunner } from './ingest-bundle.runner.js'; import { PageTriageService } from './page-triage/index.js'; +import { createWarehouseVerificationTools } from './tools/warehouse-verification/index.js'; import type { IngestBundleRunnerDeps, IngestCommitMessagePort, @@ -103,7 +105,7 @@ export interface CreateLocalBundleIngestRuntimeOptions { llmDebugRequestFile?: string; memoryModel?: string; semanticLayerCompute?: KtxSemanticLayerComputePort; - queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise }; + queryExecutor?: KtxSqlQueryExecutorPort; jobIdFactory?: () => string; logger?: KtxLogger; } @@ -169,9 +171,7 @@ class LocalAuthorResolver implements GitAuthorResolverPort { class LocalConnectionCatalog implements SlConnectionCatalogPort { constructor( private readonly project: KtxLocalProject, - private readonly queryExecutor?: { - execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise; - }, + private readonly queryExecutor?: KtxSqlQueryExecutorPort, ) {} async listEnabledConnections(ids: string[]): Promise { @@ -192,7 +192,12 @@ class LocalConnectionCatalog implements SlConnectionCatalogPort { if (!this.queryExecutor) { throw new Error('Local ingest has no query executor configured'); } - return this.queryExecutor.execute({ connectionId, sql }); + return this.queryExecutor.execute({ + connectionId, + projectDir: this.project.projectDir, + connection: this.project.config.connections[connectionId], + sql, + }); } } @@ -347,15 +352,19 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { async listPagesForUser( userId: string, - ): Promise> { - const pages: Array<{ page_key: string; summary: string; scope: string; scope_id: string | null }> = []; + ): Promise { + const pages: KnowledgeIndexPageListing[] = []; for (const scope of [ { scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' }, { scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` }, ]) { const listed = await this.project.fileStore.listFiles(scope.dir, true); for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) { - const pageKey = file.replace(/\.md$/, ''); + const parsedPath = parseKnowledgeIndexPath(file.startsWith('global/') || file.startsWith('user/') ? file : `${scope.dir.replace('knowledge/', '')}/${file}`); + if (!parsedPath || parsedPath.scope !== scope.scope) { + continue; + } + const pageKey = parsedPath.pageKey; const raw = await this.project.fileStore.readFile(`${scope.dir}/${file}`); const parsed = parseWiki(raw.content); pages.push({ @@ -363,6 +372,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { summary: parsed.summary, scope: scope.scope, scope_id: scope.scopeId, + tags: parseWikiTags(raw.content), }); } } @@ -432,13 +442,6 @@ function parseKnowledgeIndexPath(file: string): { scope: 'GLOBAL' | 'USER'; page const pageKey = segments[1].replace(/\.md$/, ''); return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'GLOBAL', pageKey } : null; } - if (segments.length >= 3 && segments[0] === 'global' && segments[1] === 'historic-sql') { - const historicPath = segments.slice(2).join('/').replace(/\.md$/, ''); - if (historicPath.split('/').every((segment) => /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment))) { - return { scope: 'GLOBAL', pageKey: `historic-sql/${historicPath}` }; - } - return null; - } if (segments.length === 3 && segments[0] === 'user') { const pageKey = segments[2].replace(/\.md$/, ''); return /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(pageKey) ? { scope: 'USER', pageKey } : null; @@ -486,38 +489,47 @@ class LocalIngestToolsetFactory implements IngestToolsetFactoryPort { slSearchService: deps.slSearchService, authorResolver: deps.authorResolver, }; + const wikiSearchTool = new WikiSearchTool({ + search: async (input) => { + const results = await searchLocalKnowledgePages(deps.project, { + userId: input.userId, + query: input.query, + limit: input.limit, + embeddingService: deps.embedding, + }); + return { + results: results.slice(0, input.limit).map((result) => ({ + key: result.key, + path: result.path, + summary: result.summary, + score: result.score, + matchReasons: result.matchReasons, + lanes: result.lanes, + })), + totalFound: results.length, + }; + }, + }); + const slDiscoverTool = new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }); + const warehouseVerificationTools = createWarehouseVerificationTools({ + connections: deps.connections, + fallbackFileStore: deps.project.fileStore, + wikiSearchTool, + slDiscoverTool, + }); this.baseTools = [ new WikiReadTool(deps.wikiService, deps.knowledgeIndex), - new WikiSearchTool({ - search: async (input) => { - const results = await searchLocalKnowledgePages(deps.project, { - userId: input.userId, - query: input.query, - limit: input.limit, - embeddingService: deps.embedding, - }); - return { - results: results.slice(0, input.limit).map((result) => ({ - key: result.key, - path: result.path, - summary: result.summary, - score: result.score, - matchReasons: result.matchReasons, - lanes: result.lanes, - })), - totalFound: results.length, - }; - }, - }), - new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex), + wikiSearchTool, + new WikiListTagsTool(deps.knowledgeIndex), new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents), new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents), - new SlDiscoverTool(slDeps, { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }), + slDiscoverTool, new SlEditSourceTool(slDeps), new SlReadSourceTool(slDeps), new SlWriteSourceTool(slDeps), new SlValidateTool(slDeps), new SlRollbackTool(deps.slSourcesRepository, deps.connections, 0), + ...warehouseVerificationTools, ]; this.contextTools = [ new ContextEvidenceSearchTool(deps.contextStore, deps.embedding), diff --git a/packages/context/src/ingest/local-ingest.ts b/packages/context/src/ingest/local-ingest.ts index bc6294c4..2ec13184 100644 --- a/packages/context/src/ingest/local-ingest.ts +++ b/packages/context/src/ingest/local-ingest.ts @@ -3,11 +3,11 @@ import { cp, mkdir, rm } from 'node:fs/promises'; import { isAbsolute, resolve } from 'node:path'; import type { KtxLlmProvider } from '@ktx/llm'; import type { AgentRunnerService } from '../agent/index.js'; +import type { KtxSqlQueryExecutorPort } from '../connections/index.js'; import type { KtxLogger } from '../core/index.js'; import type { KtxSemanticLayerComputePort } from '../daemon/index.js'; import type { KtxLocalProject } from '../project/index.js'; import { ktxLocalStateDbPath } from '../project/index.js'; -import type { KtxQueryResult } from '../sl/index.js'; import { planMetabaseFanoutChildren } from './adapters/metabase/fanout-planner.js'; import { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; import { localPullConfigForAdapter, type DefaultLocalIngestAdaptersOptions } from './local-adapters.js'; @@ -34,7 +34,7 @@ export interface RunLocalIngestOptions { llmDebugRequestFile?: string; memoryModel?: string; semanticLayerCompute?: KtxSemanticLayerComputePort; - queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise }; + queryExecutor?: KtxSqlQueryExecutorPort; logger?: KtxLogger; } @@ -172,7 +172,7 @@ async function runScheduledPullJob(options: { llmProvider?: KtxLlmProvider; memoryModel?: string; semanticLayerCompute?: KtxSemanticLayerComputePort; - queryExecutor?: { execute(input: { connectionId: string; sql: string; maxRows?: number }): Promise }; + queryExecutor?: KtxSqlQueryExecutorPort; logger?: KtxLogger; }): Promise { const runtime = createLocalBundleIngestRuntime(options); diff --git a/packages/context/src/ingest/reports.ts b/packages/context/src/ingest/reports.ts index 2c3020b4..cda4d7c1 100644 --- a/packages/context/src/ingest/reports.ts +++ b/packages/context/src/ingest/reports.ts @@ -9,6 +9,7 @@ import type { StageIndex, UnmappedFallbackRecord, } from './stages/stage-index.types.js'; +import type { WikiSlRefRepair } from './wiki-sl-ref-repair.js'; import type { IngestDiffSummary, SourceFetchReport, UnresolvedCardInfo } from './types.js'; export interface IngestReportWorkUnit { @@ -70,6 +71,8 @@ export interface IngestReportBody { provenanceRows: IngestReportProvenanceDetail[]; toolTranscripts: IngestReportToolTranscriptSummary[]; postProcessor?: IngestReportPostProcessorOutcome; + wikiSlRefRepairs?: WikiSlRefRepair[]; + wikiSlRefRepairWarnings?: string[]; memoryFlow?: MemoryFlowReplayInput; } diff --git a/packages/context/src/ingest/stages/build-reconcile-context.test.ts b/packages/context/src/ingest/stages/build-reconcile-context.test.ts index 7db0bb23..9ac95356 100644 --- a/packages/context/src/ingest/stages/build-reconcile-context.test.ts +++ b/packages/context/src/ingest/stages/build-reconcile-context.test.ts @@ -107,6 +107,7 @@ describe('buildReconcileToolSet', () => { 'eviction_list', 'load_skill', 'read_raw_span', + 'record_verification_ledger', 'sl_write_source', 'stage_diff', 'stage_list', @@ -114,4 +115,54 @@ describe('buildReconcileToolSet', () => { ].sort(), ); }); + + it('requires the verification ledger before reconciliation write tools run', async () => { + const slWrite = vi.fn().mockResolvedValue({ markdown: 'written', structured: { success: true } }); + const toolSet = buildReconcileToolSet({ + loadSkillTool: { load_skill: { description: 'load', inputSchema: {} as any, execute: vi.fn() } } as any, + stageListTool: { stage_list: { description: 'stage list', inputSchema: {} as any, execute: vi.fn() } } as any, + stageDiffTool: { stage_diff: { description: 'stage diff', inputSchema: {} as any, execute: vi.fn() } } as any, + evictionListTool: { + eviction_list: { description: 'eviction list', inputSchema: {} as any, execute: vi.fn() }, + } as any, + emitConflictResolutionTool: { + emit_conflict_resolution: { description: 'conflict', inputSchema: {} as any, execute: vi.fn() }, + } as any, + emitEvictionDecisionTool: { + emit_eviction_decision: { description: 'eviction', inputSchema: {} as any, execute: vi.fn() }, + } as any, + emitArtifactResolutionTool: { + emit_artifact_resolution: { description: 'resolution', inputSchema: {} as any, execute: vi.fn() }, + } as any, + emitUnmappedFallbackTool: { + emit_unmapped_fallback: { description: 'fallback', inputSchema: {} as any, execute: vi.fn() }, + } as any, + readRawSpanTool: { read_raw_span: { description: 'raw span', inputSchema: {} as any, execute: vi.fn() } } as any, + toolsetTools: { sl_write_source: { description: 'sl write', inputSchema: {} as any, execute: slWrite } as any }, + }); + + const correction = await toolSet.sl_write_source.execute?.( + { connectionId: 'warehouse', sourceName: 'accounts' }, + { toolCallId: 't1' } as any, + ); + + expect(slWrite).not.toHaveBeenCalled(); + expect(correction).toMatchObject({ structured: { success: false, reason: 'verification_ledger_required' } }); + + await toolSet.record_verification_ledger.execute?.( + { + summary: 'Verified warehouse.accounts with entity_details.', + verifiedIdentifiers: ['warehouse.accounts'], + unverifiedIdentifiers: [], + }, + { toolCallId: 't2' } as any, + ); + const written = await toolSet.sl_write_source.execute?.( + { connectionId: 'warehouse', sourceName: 'accounts' }, + { toolCallId: 't3' } as any, + ); + + expect(slWrite).toHaveBeenCalledTimes(1); + expect(written).toMatchObject({ structured: { success: true } }); + }); }); diff --git a/packages/context/src/ingest/stages/build-reconcile-context.ts b/packages/context/src/ingest/stages/build-reconcile-context.ts index 30ff6341..9533acbd 100644 --- a/packages/context/src/ingest/stages/build-reconcile-context.ts +++ b/packages/context/src/ingest/stages/build-reconcile-context.ts @@ -1,5 +1,10 @@ import type { Tool, ToolSet } from 'ai'; import { buildCanonicalPinsPromptBlock, type CanonicalPin } from '../canonical-pins.js'; +import { + createVerificationLedgerState, + VERIFICATION_LEDGER_PROMPT, + withVerificationLedger, +} from '../tools/verification-ledger.tool.js'; import type { EvictionUnit } from '../types.js'; import type { StageIndex } from './stage-index.types.js'; @@ -12,6 +17,7 @@ export function buildReconcileSystemPrompt(params: { }): string { return [ params.baseFraming.trimEnd(), + VERIFICATION_LEDGER_PROMPT, params.skillsPrompt.trimEnd(), buildCanonicalPinsPromptBlock(params.canonicalPins), `\n\nsyncId: ${params.syncId}\nsource: ${params.sourceKey}\n`, @@ -188,16 +194,20 @@ export interface ReconcileToolSetInput { } export function buildReconcileToolSet(input: ReconcileToolSetInput): ToolSet { - return { - ...input.toolsetTools, - ...input.loadSkillTool, - ...input.stageListTool, - ...input.stageDiffTool, - ...input.evictionListTool, - ...input.emitConflictResolutionTool, - ...input.emitEvictionDecisionTool, - ...input.emitArtifactResolutionTool, - ...input.emitUnmappedFallbackTool, - ...input.readRawSpanTool, - }; + const state = createVerificationLedgerState(); + return withVerificationLedger( + { + ...input.toolsetTools, + ...input.loadSkillTool, + ...input.stageListTool, + ...input.stageDiffTool, + ...input.evictionListTool, + ...input.emitConflictResolutionTool, + ...input.emitEvictionDecisionTool, + ...input.emitArtifactResolutionTool, + ...input.emitUnmappedFallbackTool, + ...input.readRawSpanTool, + }, + state, + ); } diff --git a/packages/context/src/ingest/stages/build-wu-context.test.ts b/packages/context/src/ingest/stages/build-wu-context.test.ts index 13a3ff8f..db17154e 100644 --- a/packages/context/src/ingest/stages/build-wu-context.test.ts +++ b/packages/context/src/ingest/stages/build-wu-context.test.ts @@ -68,12 +68,45 @@ describe('buildWuToolSet', () => { 'load_skill', 'read_raw_file', 'read_raw_span', + 'record_verification_ledger', 'sl_write_source', 'wiki_search', ].sort(), ); }); + it('requires the verification ledger before write-capable tools run', async () => { + const wikiWrite = vi.fn().mockResolvedValue({ markdown: 'written', structured: { success: true } }); + const toolSet = buildWuToolSet({ + stagedDir: '/tmp/staged', + wu: { unitKey: 'u1', rawFiles: ['a.yml'], peerFileIndex: [], dependencyPaths: [] }, + loadSkillTool: { load_skill: { description: 'load', inputSchema: {} as any, execute: vi.fn() } } as any, + emitUnmappedFallbackTool: { + emit_unmapped_fallback: { description: 'fallback', inputSchema: {} as any, execute: vi.fn() }, + } as any, + toolsetTools: { wiki_write: { description: 'write', inputSchema: {} as any, execute: wikiWrite } as any }, + }); + + const correction = await toolSet.wiki_write.execute?.({ key: 'customer-rules' }, { toolCallId: 't1' } as any); + + expect(wikiWrite).not.toHaveBeenCalled(); + expect(correction).toMatchObject({ structured: { success: false, reason: 'verification_ledger_required' } }); + expect(String((correction as any).markdown)).toContain('record_verification_ledger'); + + await toolSet.record_verification_ledger.execute?.( + { + summary: 'No warehouse identifiers will be emitted in this wiki write.', + verifiedIdentifiers: [], + unverifiedIdentifiers: [], + }, + { toolCallId: 't2' } as any, + ); + const written = await toolSet.wiki_write.execute?.({ key: 'customer-rules' }, { toolCallId: 't3' } as any); + + expect(wikiWrite).toHaveBeenCalledTimes(1); + expect(written).toMatchObject({ structured: { success: true } }); + }); + it('includes looker_query_to_sl only for Looker WorkUnits', () => { const toolSet = buildWuToolSet({ sourceKey: 'looker', @@ -93,6 +126,7 @@ describe('buildWuToolSet', () => { 'looker_query_to_sl', 'read_raw_file', 'read_raw_span', + 'record_verification_ledger', 'sl_write_source', 'wiki_search', ].sort(), diff --git a/packages/context/src/ingest/stages/build-wu-context.ts b/packages/context/src/ingest/stages/build-wu-context.ts index 6ba26fd7..bfa1bd9c 100644 --- a/packages/context/src/ingest/stages/build-wu-context.ts +++ b/packages/context/src/ingest/stages/build-wu-context.ts @@ -4,6 +4,11 @@ import { createLookerQueryToSlTool } from '../adapters/looker/tools/looker-query import type { IngestProvenanceRow } from '../ports.js'; import { createReadRawFileTool } from '../tools/read-raw-file.tool.js'; import { createReadRawSpanTool } from '../tools/read-raw-span.tool.js'; +import { + createVerificationLedgerState, + VERIFICATION_LEDGER_PROMPT, + withVerificationLedger, +} from '../tools/verification-ledger.tool.js'; import type { WorkUnit } from '../types.js'; const PEER_FILE_INDEX_PROMPT_LIMIT = 100; @@ -24,6 +29,7 @@ export function buildWuSystemPrompt(params: { }): string { const parts = [ params.baseFraming.trimEnd(), + VERIFICATION_LEDGER_PROMPT, params.skillsPrompt.trimEnd(), buildCanonicalPinsPromptBlock(params.canonicalPins ?? []), `\n\nsyncId: ${params.syncId}\nsource: ${params.sourceKey}\n`, @@ -100,15 +106,19 @@ function withoutWriteSlTools(toolset: ToolSet, wu: WorkUnit): ToolSet { export function buildWuToolSet(input: BuildWuToolSetInput): ToolSet { const allowedPaths = new Set([...input.wu.rawFiles, ...input.wu.dependencyPaths]); const lookerTools: ToolSet = input.sourceKey === 'looker' ? { looker_query_to_sl: createLookerQueryToSlTool() } : {}; - return withoutWriteSlTools( - { - ...input.toolsetTools, - ...lookerTools, - ...input.loadSkillTool, - ...input.emitUnmappedFallbackTool, - read_raw_file: createReadRawFileTool({ stagedDir: input.stagedDir, allowedPaths }), - read_raw_span: createReadRawSpanTool({ stagedDir: input.stagedDir, allowedPaths }), - }, - input.wu, + const state = createVerificationLedgerState(); + return withVerificationLedger( + withoutWriteSlTools( + { + ...input.toolsetTools, + ...lookerTools, + ...input.loadSkillTool, + ...input.emitUnmappedFallbackTool, + read_raw_file: createReadRawFileTool({ stagedDir: input.stagedDir, allowedPaths }), + read_raw_span: createReadRawSpanTool({ stagedDir: input.stagedDir, allowedPaths }), + }, + input.wu, + ), + state, ); } diff --git a/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts b/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts index aaba3509..33a8610e 100644 --- a/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts +++ b/packages/context/src/ingest/tools/emit-unmapped-fallback.tool.ts @@ -64,7 +64,7 @@ export function createEmitUnmappedFallbackTool(deps: EmitUnmappedFallbackDeps) { tableRef: z .string() .optional() - .describe('The fully-qualified table or source reference that triggered the fallback (e.g. "orbit_analytics.customer"). Used to generate canonical detail text.'), + .describe('The fully-qualified table or source reference that triggered the fallback (e.g. ".
"). Used to generate canonical detail text.'), clarification: z .string() .optional() diff --git a/packages/context/src/ingest/tools/tool-transcript-summary.test.ts b/packages/context/src/ingest/tools/tool-transcript-summary.test.ts index bc836e97..9e110789 100644 --- a/packages/context/src/ingest/tools/tool-transcript-summary.test.ts +++ b/packages/context/src/ingest/tools/tool-transcript-summary.test.ts @@ -36,6 +36,28 @@ describe('tool transcript summaries', () => { expect(summary.fatalErrorCount).toBe(0); }); + it('treats a suggested flat wiki key retry as recovery for an invalid nested key', () => { + const summary = createMutableToolTranscriptSummary('wu-1', '/tmp/wu-1.jsonl'); + + recordToolTranscriptEntry( + summary, + entry({ + input: { key: 'historic-sql/top-accounts-by-contract-arr' }, + output: { structured: { success: false, key: 'historic-sql/top-accounts-by-contract-arr' } }, + }), + ); + recordToolTranscriptEntry( + summary, + entry({ + input: { key: 'historic-sql-top-accounts-by-contract-arr' }, + output: { structured: { success: true, key: 'historic-sql-top-accounts-by-contract-arr' } }, + }), + ); + + expect(summary.errorCount).toBe(1); + expect(summary.fatalErrorCount).toBe(0); + }); + it('counts unrecovered wiki_remove structured failures as fatal transcript errors', () => { const summary = createMutableToolTranscriptSummary('reconcile', '/tmp/reconcile.jsonl'); diff --git a/packages/context/src/ingest/tools/tool-transcript-summary.ts b/packages/context/src/ingest/tools/tool-transcript-summary.ts index de7ee668..4af450f0 100644 --- a/packages/context/src/ingest/tools/tool-transcript-summary.ts +++ b/packages/context/src/ingest/tools/tool-transcript-summary.ts @@ -1,4 +1,5 @@ import type { ToolCallLogEntry } from './tool-call-logger.js'; +import { isFlatWikiKey, suggestFlatWikiKey } from '../../wiki/keys.js'; export interface MutableToolTranscriptSummary { unitKey: string; @@ -112,7 +113,10 @@ function structuredSuccess(output: unknown): boolean | null { function wikiTargetKey(entry: ToolCallLogEntry): string | null { const key = stringField(recordField(entry.output, 'structured'), 'key') ?? stringField(entry.input, 'key'); - return key ? `wiki:${key}` : null; + if (!key) { + return null; + } + return `wiki:${isFlatWikiKey(key) ? key : suggestFlatWikiKey(key)}`; } function slTargetKey(entry: ToolCallLogEntry): string | null { diff --git a/packages/context/src/ingest/tools/verification-ledger.tool.ts b/packages/context/src/ingest/tools/verification-ledger.tool.ts new file mode 100644 index 00000000..f99e79be --- /dev/null +++ b/packages/context/src/ingest/tools/verification-ledger.tool.ts @@ -0,0 +1,97 @@ +import { tool, type ToolExecuteFunction, type ToolExecutionOptions, type ToolSet } from 'ai'; +import { z } from 'zod'; + +const verificationLedgerInputSchema = z.object({ + summary: z.string().min(1).max(2000), + verifiedIdentifiers: z.array(z.string().min(1)).max(100).default([]), + unverifiedIdentifiers: z.array(z.string().min(1)).max(100).default([]), + notes: z.string().max(2000).optional(), +}); + +export interface VerificationLedgerEntry { + summary: string; + verifiedIdentifiers: string[]; + unverifiedIdentifiers: string[]; + notes?: string; +} + +export interface VerificationLedgerState { + entries: VerificationLedgerEntry[]; +} + +const WRITE_TOOL_NAMES = new Set([ + 'wiki_write', + 'wiki_remove', + 'sl_write_source', + 'sl_edit_source', + 'emit_unmapped_fallback', +]); + +export const VERIFICATION_LEDGER_PROMPT = ` +Before any write-capable tool call (wiki_write, wiki_remove, sl_write_source, sl_edit_source, emit_unmapped_fallback), call record_verification_ledger. +The ledger is a model-authored checkpoint, not a deterministic parser gate. Summarize the verification protocol from the loaded skill, list identifiers verified with discover_data/entity_details/sql_execution, and list anything intentionally left unverified. If the write contains no warehouse identifiers, say that explicitly. +If a write tool returns verification_ledger_required, complete the ledger and retry the write. +`; + +export function createVerificationLedgerState(): VerificationLedgerState { + return { entries: [] }; +} + +export function withVerificationLedger(tools: ToolSet, state: VerificationLedgerState): ToolSet { + const wrapped: ToolSet = {}; + for (const [name, original] of Object.entries(tools)) { + if (!WRITE_TOOL_NAMES.has(name) || typeof original.execute !== 'function') { + wrapped[name] = original; + continue; + } + const originalExecute = original.execute; + const guardedExecute: ToolExecuteFunction = async ( + input: unknown, + opts: ToolExecutionOptions, + ) => { + if (state.entries.length === 0) { + return verificationRequiredOutput(name); + } + return (originalExecute as ToolExecuteFunction)(input, opts); + }; + wrapped[name] = { ...original, execute: guardedExecute }; + } + wrapped.record_verification_ledger = createRecordVerificationLedgerTool(state); + return wrapped; +} + +function createRecordVerificationLedgerTool(state: VerificationLedgerState) { + return tool({ + description: + 'Record the pre-write verification ledger required by loaded ingest skills. Call this before wiki/SL/fallback writes to state what was verified, which tool calls support it, and what remains intentionally unverified.', + inputSchema: verificationLedgerInputSchema, + execute: async (input) => { + const entry = verificationLedgerInputSchema.parse(input); + state.entries.push(entry); + return { + markdown: + `Verification ledger recorded. Summary: ${entry.summary}\n` + + `Verified identifiers: ${entry.verifiedIdentifiers.length ? entry.verifiedIdentifiers.join(', ') : '(none)'}\n` + + `Unverified identifiers: ${ + entry.unverifiedIdentifiers.length ? entry.unverifiedIdentifiers.join(', ') : '(none)' + }`, + structured: { success: true, entry }, + }; + }, + }); +} + +function verificationRequiredOutput(toolName: string) { + return { + markdown: + `Pre-write verification required before calling ${toolName}. ` + + 'Call record_verification_ledger first. In the ledger, summarize the loaded skill protocol you followed, ' + + 'list identifiers verified via discover_data/entity_details/sql_execution, and list any identifiers intentionally left unverified. ' + + 'If the write contains no warehouse identifiers, say that explicitly in the ledger summary.', + structured: { + success: false, + reason: 'verification_ledger_required', + toolName, + }, + }; +} diff --git a/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts b/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts new file mode 100644 index 00000000..979873a4 --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.test.ts @@ -0,0 +1,119 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import type { BaseTool, ToolContext } from '../../../tools/index.js'; +import { DiscoverDataTool } from './discover-data.tool.js'; +import type { WarehouseCatalogService } from './warehouse-catalog.service.js'; + +describe('DiscoverDataTool', () => { + const wikiSearchTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType }; + const slDiscoverTool = { call: vi.fn() } as unknown as BaseTool & { call: ReturnType }; + const catalog = { searchByName: vi.fn() } as unknown as WarehouseCatalogService & { + searchByName: ReturnType; + }; + const context: ToolContext = { + sourceId: 'ingest', + messageId: 'm1', + userId: 'system', + session: { allowedConnectionNames: new Set(['warehouse']) } as any, + }; + const tool = new DiscoverDataTool({ + wikiSearchTool, + slDiscoverTool, + catalogFactory: () => catalog, + }); + + beforeEach(() => { + wikiSearchTool.call.mockReset(); + slDiscoverTool.call.mockReset(); + catalog.searchByName.mockReset(); + wikiSearchTool.call.mockResolvedValue({ + markdown: '- orders wiki', + structured: { totalFound: 1, results: [{ key: 'orders' }] }, + }); + slDiscoverTool.call.mockResolvedValue({ + markdown: '- orders source', + structured: { totalSources: 1, sources: [{ sourceName: 'orders' }] }, + }); + catalog.searchByName.mockResolvedValue([ + { + kind: 'table', + connectionName: 'warehouse', + ref: { catalog: null, db: 'public', name: 'orders' }, + display: 'public.orders', + matchedOn: 'name', + }, + ]); + }); + + it('groups wiki, semantic layer, and raw schema hits with routing hints', async () => { + const result = await tool.call({ query: 'orders', connectionName: 'warehouse', limit: 5 }, context); + + expect(result.markdown).toContain('## Wiki Pages'); + expect(result.markdown).toContain('use `wiki_read(blockKey)` for full content'); + expect(result.markdown).toContain('## Semantic Layer Sources'); + expect(result.markdown).toContain('use `sl_read_source(sourceName)` for the YAML'); + expect(result.markdown).toContain('## Raw Warehouse Schema'); + expect(result.markdown).toContain('use `entity_details({connectionName, targets: [{display}]})`'); + expect(result.structured.raw?.hits).toHaveLength(1); + }); + + it('includes connectionName on raw schema hits so entity_details can follow up', async () => { + const multiConnectionContext: ToolContext = { + ...context, + session: { allowedConnectionNames: new Set(['warehouse', 'analytics']) } as any, + }; + catalog.searchByName.mockImplementation(async (connectionName: string, query: string) => [ + { + kind: 'table', + connectionName, + ref: { catalog: null, db: 'public', name: `${connectionName}_${query}` }, + display: `public.${connectionName}_${query}`, + matchedOn: 'name', + }, + ]); + + const result = await tool.call({ query: 'orders', limit: 10 }, multiConnectionContext); + + expect(catalog.searchByName).toHaveBeenCalledWith('analytics', 'orders', 10); + expect(catalog.searchByName).toHaveBeenCalledWith('warehouse', 'orders', 10); + expect(result.markdown).toContain('connectionName=analytics'); + expect(result.markdown).toContain('connectionName=warehouse'); + expect(result.markdown).toContain( + 'entity_details({connectionName: "analytics", targets: [{display: "public.analytics_orders"}]})', + ); + expect(result.structured.raw?.hits.map((hit) => hit.connectionName)).toEqual(['analytics', 'warehouse']); + }); + + it('refuses explicit out-of-scope connection names', async () => { + const result = await tool.call({ query: 'orders', connectionName: 'billing' }, context); + + expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.'); + expect(result.structured).toEqual({ wiki: null, sl: null, raw: null }); + expect(wikiSearchTool.call).not.toHaveBeenCalled(); + expect(slDiscoverTool.call).not.toHaveBeenCalled(); + expect(catalog.searchByName).not.toHaveBeenCalled(); + }); + + it('delegates sourceName inspect mode to sl_discover only', async () => { + slDiscoverTool.call.mockResolvedValueOnce({ + markdown: 'source detail', + structured: { sourceName: 'orders' }, + }); + + const result = await tool.call({ sourceName: 'orders', connectionName: 'warehouse' }, context); + + expect(slDiscoverTool.call).toHaveBeenCalledWith({ sourceName: 'orders', connectionId: 'warehouse' }, context); + expect(wikiSearchTool.call).not.toHaveBeenCalled(); + expect(catalog.searchByName).not.toHaveBeenCalled(); + expect(result.markdown).toContain('source detail'); + }); + + it('returns the empty-state message when all sections are empty', async () => { + wikiSearchTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalFound: 0, results: [] } }); + slDiscoverTool.call.mockResolvedValueOnce({ markdown: '', structured: { totalSources: 0, sources: [] } }); + catalog.searchByName.mockResolvedValueOnce([]); + + const result = await tool.call({ query: 'customer source', connectionName: 'warehouse' }, context); + + expect(result.markdown).toContain('No matches for "customer source" across wiki, semantic layer, or raw warehouse schema.'); + }); +}); diff --git a/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts b/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts new file mode 100644 index 00000000..667d8f83 --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/discover-data.tool.ts @@ -0,0 +1,142 @@ +import { z } from 'zod'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js'; +import { WarehouseCatalogService, type RawSchemaHit } from './warehouse-catalog.service.js'; + +const discoverDataInputSchema = z.object({ + query: z.string().optional(), + connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/).optional(), + limit: z.number().int().positive().max(50).optional().default(10), + sourceName: z.string().optional(), +}); + +type DiscoverDataInput = z.input; + +export interface DiscoverDataStructured { + wiki: unknown | null; + sl: unknown | null; + raw: { hits: RawSchemaHit[] } | null; +} + +interface DiscoverDataDeps { + wikiSearchTool: BaseTool; + slDiscoverTool: BaseTool; + catalogFactory: (context: ToolContext) => WarehouseCatalogService; +} + +function totalFound(structured: unknown): number { + return typeof structured === 'object' && + structured !== null && + 'totalFound' in structured && + typeof structured.totalFound === 'number' + ? structured.totalFound + : 0; +} + +function totalSources(structured: unknown): number { + return typeof structured === 'object' && + structured !== null && + 'totalSources' in structured && + typeof structured.totalSources === 'number' + ? structured.totalSources + : 0; +} + +function allowedConnectionNames(context: ToolContext): ReadonlySet | null { + return context.session?.allowedConnectionNames ?? null; +} + +export class DiscoverDataTool extends BaseTool { + readonly name = 'discover_data'; + + constructor(private readonly deps: DiscoverDataDeps) { + super(); + } + + get description(): string { + return 'Discover existing wiki pages, semantic layer sources, and raw warehouse schema hits before writing ingest output.'; + } + + get inputSchema() { + return discoverDataInputSchema; + } + + async call(input: DiscoverDataInput, context: ToolContext): Promise> { + const allowed = allowedConnectionNames(context); + if (input.connectionName && allowed && !allowed.has(input.connectionName)) { + return { + markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`, + structured: { wiki: null, sl: null, raw: null }, + }; + } + + if (input.sourceName) { + const sl = await this.deps.slDiscoverTool.call( + { sourceName: input.sourceName, connectionId: input.connectionName }, + context, + ); + return { markdown: sl.markdown, structured: { wiki: null, sl: sl.structured, raw: null } }; + } + + const query = input.query?.trim() || ''; + const limit = input.limit ?? 10; + const parts: string[] = []; + let wiki: unknown | null = null; + let sl: unknown | null = null; + let raw: DiscoverDataStructured['raw'] = null; + + if (query) { + const wikiResult = await this.deps.wikiSearchTool.call({ query, limit }, context); + if (totalFound(wikiResult.structured) > 0) { + parts.push('## Wiki Pages', '> use `wiki_read(blockKey)` for full content', wikiResult.markdown, ''); + wiki = wikiResult.structured; + } + } + + const slResult = await this.deps.slDiscoverTool.call( + { query: query || undefined, connectionId: input.connectionName }, + context, + ); + if (totalSources(slResult.structured) > 0) { + parts.push( + '## Semantic Layer Sources', + '> use `sl_read_source(sourceName)` for the YAML, or `entity_details` for warehouse-shape details', + slResult.markdown, + '', + ); + sl = slResult.structured; + } + + const catalog = this.deps.catalogFactory(context); + const connections = input.connectionName ? [input.connectionName] : [...(allowed ?? [])].sort(); + const rawHits: RawSchemaHit[] = []; + for (const connectionName of connections) { + rawHits.push(...(await catalog.searchByName(connectionName, query, limit))); + } + if (rawHits.length > 0) { + parts.push( + '## Raw Warehouse Schema', + '> use `entity_details({connectionName, targets: [{display}]})` for full DDL + sample values', + ); + parts.push( + rawHits + .slice(0, limit) + .map( + (hit) => + `- ${hit.kind}: ${hit.display} [connectionName=${hit.connectionName}] (matched on ${hit.matchedOn}) - ` + + `follow up with \`entity_details({connectionName: "${hit.connectionName}", targets: [{display: "${hit.display}"}]})\``, + ) + .join('\n'), + ); + raw = { hits: rawHits.slice(0, limit) }; + } + + if (parts.length === 0) { + return { + markdown: `No matches for "${query}" across wiki, semantic layer, or raw warehouse schema. Try broader terms; this concept may not exist yet.`, + structured: { wiki, sl, raw }, + }; + } + + return { markdown: parts.join('\n'), structured: { wiki, sl, raw } }; + } +} diff --git a/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts b/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts new file mode 100644 index 00000000..9188bc68 --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.test.ts @@ -0,0 +1,192 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initKtxProject, type KtxLocalProject } from '../../../project/index.js'; +import type { ToolContext } from '../../../tools/index.js'; +import { EntityDetailsTool } from './entity-details.tool.js'; +import { WarehouseCatalogService } from './warehouse-catalog.service.js'; + +describe('EntityDetailsTool', () => { + let tempDir: string; + let project: KtxLocalProject; + let tool: EntityDetailsTool; + let context: ToolContext; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-entity-details-')); + project = await initKtxProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' }); + await seedLiveDatabaseScan(); + tool = new EntityDetailsTool(() => new WarehouseCatalogService({ fileStore: project.fileStore })); + context = { + sourceId: 'ingest', + messageId: 'm1', + userId: 'system', + session: { + allowedConnectionNames: new Set(['warehouse']), + } as any, + }; + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-1') { + const root = `raw-sources/${connectionName}/live-database/${syncId}`; + await project.fileStore.writeFile( + `${root}/connection.json`, + JSON.stringify({ connectionId: connectionName, driver: 'postgres', extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2), + 'ktx', + 'ktx@example.com', + 'seed connection', + ); + await project.fileStore.writeFile( + `${root}/tables/orders.json`, + JSON.stringify( + { + catalog: null, + db: 'public', + name: 'orders', + kind: 'table', + comment: 'Customer orders', + estimatedRows: 12, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + { + name: 'status', + nativeType: 'text', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: 'Order status', + }, + ], + foreignKeys: [], + }, + null, + 2, + ), + 'ktx', + 'ktx@example.com', + 'seed orders', + ); + await project.fileStore.writeFile( + `${root}/enrichment/relationship-profile.json`, + JSON.stringify( + { + connectionId: connectionName, + driver: 'postgres', + tables: [{ table: { catalog: null, db: 'public', name: 'orders' }, rowCount: 12 }], + columns: { + 'orders.status': { + table: { catalog: null, db: 'public', name: 'orders' }, + column: 'status', + rowCount: 12, + nullCount: 0, + distinctCount: 2, + nullRate: 0, + sampleValues: ['paid', 'refunded'], + }, + }, + }, + null, + 2, + ), + 'ktx', + 'ktx@example.com', + 'seed profile', + ); + } + + it('returns scoped table detail for a display target', async () => { + const result = await tool.call({ connectionName: 'warehouse', targets: [{ display: 'public.orders' }] }, context); + + expect(result.markdown).toContain('### public.orders'); + expect(result.markdown).toContain('- status (text, nullable=false)'); + expect(result.markdown).toContain('sample: ["paid","refunded"]'); + expect(result.structured.scanAvailable).toBe(true); + expect(result.structured.resolved).toHaveLength(1); + }); + + it('resolves display targets that include a column name', async () => { + const result = await tool.call( + { connectionName: 'warehouse', targets: [{ display: 'public.orders.status' }] }, + context, + ); + + expect(result.markdown).toContain('### public.orders'); + expect(result.markdown).toContain('- status (text, nullable=false)'); + expect(result.markdown).not.toContain('- id (integer'); + expect(result.structured.resolved).toHaveLength(1); + expect(result.structured.resolved[0]?.columns.map((column) => column.name)).toEqual(['status']); + }); + + it('reports missing explicit columns instead of returning an empty column list', async () => { + const result = await tool.call( + { connectionName: 'warehouse', targets: [{ display: 'public.orders.plan_tier' }] }, + context, + ); + + expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier'); + expect(result.markdown).toContain('Available columns: id, status'); + expect(result.structured.resolved).toHaveLength(0); + expect(result.structured.missing).toHaveLength(1); + }); + + it('reports missing structured table targets in model-visible markdown', async () => { + const result = await tool.call( + { + connectionName: 'warehouse', + targets: [{ catalog: null, db: 'public', name: 'orderz' }], + }, + context, + ); + + expect(result.markdown).toContain('Not found in scan: public.orderz'); + expect(result.markdown).toContain('Closest matches: orders'); + expect(result.structured.resolved).toHaveLength(0); + expect(result.structured.missing).toHaveLength(1); + }); + + it('reports missing structured column targets in model-visible markdown', async () => { + const result = await tool.call( + { + connectionName: 'warehouse', + targets: [{ catalog: null, db: 'public', name: 'orders', column: 'plan_tier' }], + }, + context, + ); + + expect(result.markdown).toContain('Column not found in scan: public.orders.plan_tier'); + expect(result.markdown).toContain('Available columns: id, status'); + expect(result.structured.resolved).toHaveLength(0); + expect(result.structured.missing).toHaveLength(1); + }); + + it('returns a no-scan state distinct from not found', async () => { + const result = await tool.call( + { connectionName: 'empty', targets: [{ display: 'public.orders' }] }, + { ...context, session: { ...context.session!, allowedConnectionNames: new Set(['empty']) } }, + ); + + expect(result.markdown).toContain('No live-database scan available for connection "empty"; run `ktx scan` first.'); + expect(result.structured.scanAvailable).toBe(false); + }); + + it('refuses out-of-scope connections', async () => { + const result = await tool.call({ connectionName: 'billing', targets: [{ display: 'public.orders' }] }, context); + + expect(result.markdown).toContain('Connection "billing" is not available to this ingest stage.'); + expect(result.structured.scanAvailable).toBe(false); + }); +}); diff --git a/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts b/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts new file mode 100644 index 00000000..27cf55a0 --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/entity-details.tool.ts @@ -0,0 +1,170 @@ +import { z } from 'zod'; +import type { KtxTableRef } from '../../../scan/types.js'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js'; +import { WarehouseCatalogService, type TableDetail } from './warehouse-catalog.service.js'; + +const targetSchema = z.union([ + z.object({ display: z.string().min(1) }), + z.object({ + catalog: z.string().nullable(), + db: z.string().nullable(), + name: z.string().min(1), + column: z.string().optional(), + }), +]); + +const entityDetailsInputSchema = z.object({ + connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/), + targets: z.array(targetSchema).min(1).max(50), +}); + +type EntityDetailsInput = z.infer; +type EntityDetailsTarget = EntityDetailsInput['targets'][number]; + +export interface EntityDetailsStructured { + resolved: TableDetail[]; + missing: Array<{ target: unknown; candidates: KtxTableRef[] }>; + scanAvailable: boolean; +} + +function allowedConnectionNames(context: ToolContext): ReadonlySet | null { + return context.session?.allowedConnectionNames ?? null; +} + +function targetLabel(target: EntityDetailsTarget): string { + if ('display' in target) { + return target.display; + } + return [target.catalog, target.db, target.name, target.column].filter((part): part is string => !!part).join('.'); +} + +function appendMissingTargetMarkdown(parts: string[], target: EntityDetailsTarget, candidates: KtxTableRef[]): void { + parts.push(`Not found in scan: ${targetLabel(target)}`); + if (candidates.length > 0) { + parts.push(`Closest matches: ${candidates.map((candidate) => candidate.name).join(', ')}`); + } +} + +async function resolveTarget( + catalog: WarehouseCatalogService, + connectionName: string, + target: EntityDetailsTarget, +): Promise<{ resolved: (KtxTableRef & { column?: string }) | null; candidates: KtxTableRef[] }> { + if ('display' in target) { + return catalog.resolveDisplayTarget(connectionName, target.display); + } + + const candidateResolution = await catalog.resolveDisplayTarget(connectionName, targetLabel(target)); + return { + resolved: { + catalog: target.catalog, + db: target.db, + name: target.name, + column: target.column, + }, + candidates: candidateResolution.candidates, + }; +} + +function sampleText(values: string[]): string { + return values.length > 0 ? ` - sample: ${JSON.stringify(values.slice(0, 10))}` : ''; +} + +function appendTableMarkdown(parts: string[], detail: TableDetail, columnName?: string): void { + const columns = columnName ? detail.columns.filter((column) => column.name === columnName) : detail.columns; + parts.push(`### ${detail.display}`); + parts.push(`Type: ${detail.kind} | Native columns: ${detail.columns.length}`); + if (detail.description || detail.comment) { + parts.push(`Description: ${detail.description ?? detail.comment}`); + } + parts.push('', 'Columns:'); + for (const column of columns) { + const pk = column.primaryKey ? ', PK' : ''; + parts.push(`- ${column.name} (${column.nativeType}, nullable=${column.nullable}${pk})${sampleText(column.sampleValues)}`); + } + parts.push(''); +} + +function findColumn(detail: TableDetail, columnName: string): TableDetail['columns'][number] | null { + const normalized = columnName.toLowerCase(); + return detail.columns.find((column) => column.name.toLowerCase() === normalized) ?? null; +} + +export class EntityDetailsTool extends BaseTool { + readonly name = 'entity_details'; + + constructor(private readonly catalogFactory: (context: ToolContext) => WarehouseCatalogService) { + super(); + } + + get description(): string { + return 'Verify warehouse tables and columns from the latest live-database scan before writing them into wiki or semantic-layer output.'; + } + + get inputSchema() { + return entityDetailsInputSchema; + } + + async call(input: EntityDetailsInput, context: ToolContext): Promise> { + const allowed = allowedConnectionNames(context); + if (allowed && !allowed.has(input.connectionName)) { + return { + markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`, + structured: { resolved: [], missing: [], scanAvailable: false }, + }; + } + + const catalog = this.catalogFactory(context); + const scanAvailable = await catalog.hasScan(input.connectionName); + if (!scanAvailable) { + return { + markdown: `No live-database scan available for connection "${input.connectionName}"; run \`ktx scan\` first.`, + structured: { resolved: [], missing: [], scanAvailable: false }, + }; + } + + const parts: string[] = []; + const resolved: TableDetail[] = []; + const missing: EntityDetailsStructured['missing'] = []; + + for (const target of input.targets) { + const resolution = await resolveTarget(catalog, input.connectionName, target); + if (!resolution.resolved) { + missing.push({ target, candidates: resolution.candidates }); + appendMissingTargetMarkdown(parts, target, resolution.candidates); + continue; + } + const detail = await catalog.getTable({ connectionName: input.connectionName, ...resolution.resolved }); + if (!detail) { + missing.push({ target, candidates: resolution.candidates }); + appendMissingTargetMarkdown(parts, target, resolution.candidates); + continue; + } + const requestedColumn = resolution.resolved.column; + if (requestedColumn) { + const column = findColumn(detail, requestedColumn); + if (!column) { + missing.push({ + target, + candidates: [{ catalog: detail.catalog, db: detail.db, name: detail.name }], + }); + parts.push(`Column not found in scan: ${detail.display}.${requestedColumn}`); + parts.push(`Available columns: ${detail.columns.map((candidate) => candidate.name).join(', ')}`); + continue; + } + const scopedDetail = { ...detail, columns: [column] }; + resolved.push(scopedDetail); + appendTableMarkdown(parts, scopedDetail, column.name); + continue; + } + + resolved.push(detail); + appendTableMarkdown(parts, detail); + } + + return { + markdown: parts.join('\n').trim(), + structured: { resolved, missing, scanAvailable: true }, + }; + } +} diff --git a/packages/context/src/ingest/tools/warehouse-verification/index.ts b/packages/context/src/ingest/tools/warehouse-verification/index.ts new file mode 100644 index 00000000..0901eace --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/index.ts @@ -0,0 +1,34 @@ +import type { KtxFileStorePort } from '../../../core/index.js'; +import type { SlConnectionCatalogPort } from '../../../sl/index.js'; +import type { BaseTool, ToolContext } from '../../../tools/index.js'; +import { DiscoverDataTool } from './discover-data.tool.js'; +import { EntityDetailsTool } from './entity-details.tool.js'; +import { SqlExecutionTool } from './sql-execution.tool.js'; +import { WarehouseCatalogService } from './warehouse-catalog.service.js'; + +export { DiscoverDataTool } from './discover-data.tool.js'; +export { EntityDetailsTool } from './entity-details.tool.js'; +export { SqlExecutionTool } from './sql-execution.tool.js'; +export { WarehouseCatalogService } from './warehouse-catalog.service.js'; +export type { RawSchemaHit, TableDetail, WarehouseColumnDetail } from './warehouse-catalog.service.js'; + +export function createWarehouseVerificationTools(deps: { + connections: SlConnectionCatalogPort; + fallbackFileStore: KtxFileStorePort; + wikiSearchTool: BaseTool; + slDiscoverTool: BaseTool; +}): BaseTool[] { + const catalogFactory = (context: ToolContext) => + new WarehouseCatalogService({ + fileStore: context.session?.configService ?? deps.fallbackFileStore, + }); + return [ + new EntityDetailsTool(catalogFactory), + new SqlExecutionTool(deps.connections), + new DiscoverDataTool({ + wikiSearchTool: deps.wikiSearchTool, + slDiscoverTool: deps.slDiscoverTool, + catalogFactory, + }), + ]; +} diff --git a/packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts b/packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts new file mode 100644 index 00000000..1cc63cac --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { SlConnectionCatalogPort } from '../../../sl/index.js'; +import type { ToolContext } from '../../../tools/index.js'; +import { SqlExecutionTool } from './sql-execution.tool.js'; + +describe('SqlExecutionTool', () => { + const connections = { + executeQuery: vi.fn(), + } as unknown as SlConnectionCatalogPort & { executeQuery: ReturnType }; + const tool = new SqlExecutionTool(connections); + const context: ToolContext = { + sourceId: 'ingest', + messageId: 'm1', + userId: 'system', + session: { allowedConnectionNames: new Set(['warehouse']) } as any, + }; + + it('wraps read-only SQL with a capped row limit', async () => { + connections.executeQuery.mockResolvedValue({ headers: ['status'], rows: [['paid']], totalRows: 1 }); + + const result = await tool.call( + { connectionName: 'warehouse', sql: 'select status from public.orders', rowLimit: 5 }, + context, + ); + + expect(connections.executeQuery).toHaveBeenCalledWith( + 'warehouse', + 'select * from (select status from public.orders) as ktx_query_result limit 5', + ); + expect(result.markdown).toContain('| status |'); + expect(result.structured.wrappedSql).toContain('limit 5'); + }); + + it.each(['insert into x values (1)', 'drop table x', 'vacuum'])('rejects mutating SQL: %s', async (sql) => { + connections.executeQuery.mockClear(); + + const result = await tool.call({ connectionName: 'warehouse', sql }, context); + + expect(result.markdown).toContain('Only read-only SELECT/WITH queries can be executed locally.'); + expect(connections.executeQuery).not.toHaveBeenCalled(); + }); + + it('surfaces connector errors verbatim', async () => { + connections.executeQuery.mockRejectedValue(new Error('relation "orbit_analytics.customer" does not exist')); + + const result = await tool.call( + { connectionName: 'warehouse', sql: 'select 1 from orbit_analytics.customer', rowLimit: 1 }, + context, + ); + + expect(result.markdown).toContain('relation "orbit_analytics.customer" does not exist'); + expect(result.structured.error).toContain('relation "orbit_analytics.customer" does not exist'); + }); +}); diff --git a/packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts b/packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts new file mode 100644 index 00000000..03375938 --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/sql-execution.tool.ts @@ -0,0 +1,102 @@ +import { z } from 'zod'; +import { assertReadOnlySql, limitSqlForExecution } from '../../../connections/index.js'; +import type { SlConnectionCatalogPort } from '../../../sl/index.js'; +import { BaseTool, type ToolContext, type ToolOutput } from '../../../tools/index.js'; + +const sqlExecutionInputSchema = z.object({ + connectionName: z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/), + sql: z.string().min(1), + rowLimit: z.number().int().positive().max(1000).optional().default(100), +}); + +type SqlExecutionInput = z.input; + +export interface SqlExecutionStructured { + headers: string[]; + rows: unknown[][]; + rowCount: number; + truncated: boolean; + sql: string; + wrappedSql: string; + error?: string; +} + +function markdownTable(headers: string[], rows: unknown[][], totalRows: number): string { + if (headers.length === 0) { + return rows.length === 0 ? 'Query returned no rows.' : JSON.stringify(rows.slice(0, 20)); + } + const visible = rows.slice(0, 20); + const lines = [ + `| ${headers.join(' | ')} |`, + `| ${headers.map(() => '---').join(' | ')} |`, + ...visible.map((row) => `| ${row.map((value) => String(value ?? '')).join(' | ')} |`), + ]; + if (totalRows > visible.length) { + lines.push(`... +${totalRows - visible.length} more rows`); + } + return lines.join('\n'); +} + +export class SqlExecutionTool extends BaseTool { + readonly name = 'sql_execution'; + + constructor(private readonly connections: SlConnectionCatalogPort) { + super(); + } + + get description(): string { + return 'Run a single read-only SELECT or WITH probe against an allowed warehouse connection and return a capped markdown table or the warehouse error.'; + } + + get inputSchema() { + return sqlExecutionInputSchema; + } + + async call(input: SqlExecutionInput, context: ToolContext): Promise> { + const allowed = context.session?.allowedConnectionNames; + if (allowed && !allowed.has(input.connectionName)) { + return { + markdown: `Connection "${input.connectionName}" is not available to this ingest stage.`, + structured: { + headers: [], + rows: [], + rowCount: 0, + truncated: false, + sql: input.sql, + wrappedSql: '', + error: 'connection_not_allowed', + }, + }; + } + + let sql: string; + let wrappedSql: string; + try { + sql = assertReadOnlySql(input.sql); + wrappedSql = limitSqlForExecution(sql, input.rowLimit); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + markdown: message, + structured: { headers: [], rows: [], rowCount: 0, truncated: false, sql: input.sql, wrappedSql: '', error: message }, + }; + } + + try { + const result = await this.connections.executeQuery(input.connectionName, wrappedSql); + const headers = result.headers ?? []; + const rows = result.rows ?? []; + const rowCount = result.totalRows ?? rows.length; + return { + markdown: markdownTable(headers, rows, rowCount), + structured: { headers, rows, rowCount, truncated: rowCount > rows.length, sql, wrappedSql }, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + markdown: `SQL execution failed: ${message}`, + structured: { headers: [], rows: [], rowCount: 0, truncated: false, sql, wrappedSql, error: message }, + }; + } + } +} diff --git a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts new file mode 100644 index 00000000..c2ab1f36 --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.test.ts @@ -0,0 +1,196 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { initKtxProject, type KtxLocalProject } from '../../../project/index.js'; +import { WarehouseCatalogService } from './warehouse-catalog.service.js'; + +describe('WarehouseCatalogService', () => { + let tempDir: string; + let project: KtxLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-warehouse-catalog-')); + project = await initKtxProject({ projectDir: join(tempDir, 'project'), projectName: 'warehouse' }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + async function seedLiveDatabaseScan(connectionName = 'warehouse', syncId = 'sync-2', driver = 'postgres') { + const root = `raw-sources/${connectionName}/live-database/${syncId}`; + const tableRef = { + catalog: driver === 'bigquery' ? 'analytics' : null, + db: driver === 'sqlite' ? null : 'public', + name: 'orders', + }; + await project.fileStore.writeFile( + `${root}/connection.json`, + JSON.stringify({ connectionId: connectionName, driver, extractedAt: '2026-05-12T00:00:00.000Z' }, null, 2), + 'ktx', + 'ktx@example.com', + 'seed connection', + ); + await project.fileStore.writeFile( + `${root}/tables/orders.json`, + JSON.stringify( + { + catalog: tableRef.catalog, + db: tableRef.db, + name: tableRef.name, + kind: 'table', + comment: 'Customer orders', + estimatedRows: 12, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: 'Order id', + }, + { + name: 'status', + nativeType: 'text', + normalizedType: 'text', + dimensionType: 'string', + nullable: false, + primaryKey: false, + comment: 'Order status', + }, + ], + foreignKeys: [], + }, + null, + 2, + ), + 'ktx', + 'ktx@example.com', + 'seed orders', + ); + await project.fileStore.writeFile( + `${root}/enrichment/relationship-profile.json`, + JSON.stringify( + { + connectionId: connectionName, + driver, + sqlAvailable: true, + queryCount: 3, + tables: [{ table: { catalog: tableRef.catalog, db: tableRef.db, name: tableRef.name }, rowCount: 12 }], + columns: { + 'orders.status': { + table: { catalog: tableRef.catalog, db: tableRef.db, name: tableRef.name }, + column: 'status', + nativeType: 'text', + normalizedType: 'text', + rowCount: 12, + nullCount: 0, + distinctCount: 2, + uniquenessRatio: 0.1667, + nullRate: 0, + sampleValues: ['paid', 'refunded'], + minTextLength: 4, + maxTextLength: 8, + }, + }, + warnings: [], + }, + null, + 2, + ), + 'ktx', + 'ktx@example.com', + 'seed profile', + ); + } + + it('finds the latest sync and merges table schema with relationship profile values', async () => { + await seedLiveDatabaseScan('warehouse', 'sync-1'); + await seedLiveDatabaseScan('warehouse', 'sync-2'); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.getLatestSyncId('warehouse')).resolves.toBe('sync-2'); + const detail = await catalog.getTable({ connectionName: 'warehouse', catalog: null, db: 'public', name: 'orders' }); + + expect(detail).toMatchObject({ + connectionName: 'warehouse', + display: 'public.orders', + rowCount: 12, + columns: [ + { name: 'id', nativeType: 'integer', primaryKey: true }, + { name: 'status', nativeType: 'text', sampleValues: ['paid', 'refunded'], distinctCount: 2 }, + ], + }); + }); + + it('returns scanAvailable=false when no live-database scan exists', async () => { + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + await expect(catalog.getTable({ connectionName: 'missing', catalog: null, db: 'public', name: 'orders' })).resolves.toBeNull(); + await expect(catalog.hasScan('missing')).resolves.toBe(false); + }); + + it('resolves postgres display strings and returns closest candidates for missing tables', async () => { + await seedLiveDatabaseScan(); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.resolveDisplay('warehouse', 'public.orders')).resolves.toMatchObject({ + resolved: { catalog: null, db: 'public', name: 'orders' }, + candidates: [], + dialect: 'postgres', + }); + await expect(catalog.resolveDisplay('warehouse', 'public.orderz')).resolves.toMatchObject({ + resolved: null, + candidates: [{ name: 'orders' }], + }); + }); + + it('treats two-part BigQuery identifiers as ambiguous instead of guessing', async () => { + await seedLiveDatabaseScan('warehouse', 'sync-bigquery', 'bigquery'); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.resolveDisplay('warehouse', 'public.orders')).resolves.toMatchObject({ + resolved: null, + dialect: 'bigquery', + }); + }); + + it('resolves postgres column display strings without treating the column as a table', async () => { + await seedLiveDatabaseScan(); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.resolveDisplayTarget('warehouse', 'public.orders.status')).resolves.toMatchObject({ + resolved: { catalog: null, db: 'public', name: 'orders', column: 'status' }, + candidates: [], + dialect: 'postgres', + }); + }); + + it('resolves BigQuery column display strings with four parts', async () => { + await seedLiveDatabaseScan('warehouse', 'sync-bigquery', 'bigquery'); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.resolveDisplayTarget('warehouse', 'analytics.public.orders.status')).resolves.toMatchObject({ + resolved: { catalog: 'analytics', db: 'public', name: 'orders', column: 'status' }, + candidates: [], + dialect: 'bigquery', + }); + }); + + it('searches table names, column names, comments, and descriptions', async () => { + await seedLiveDatabaseScan(); + const catalog = new WarehouseCatalogService({ fileStore: project.fileStore }); + + await expect(catalog.searchByName('warehouse', 'status', 10)).resolves.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + kind: 'column', + ref: expect.objectContaining({ db: 'public', name: 'orders', column: 'status' }), + matchedOn: 'name', + }), + ]), + ); + }); +}); diff --git a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts new file mode 100644 index 00000000..691f88e9 --- /dev/null +++ b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts @@ -0,0 +1,452 @@ +import { getDialectForDriver } from '../../../connections/index.js'; +import type { KtxFileStorePort } from '../../../core/index.js'; +import type { + KtxConnectionDriver, + KtxSchemaColumn, + KtxSchemaForeignKey, + KtxSchemaTable, + KtxTableRef, +} from '../../../scan/types.js'; + +type CatalogDriver = KtxConnectionDriver | 'sqlite3'; + +export interface WarehouseCatalogServiceDeps { + fileStore: KtxFileStorePort; +} + +export interface WarehouseColumnDetail extends KtxSchemaColumn { + descriptions: Record; + rowCount: number | null; + nullCount: number | null; + distinctCount: number | null; + nullRate: number | null; + sampleValues: string[]; +} + +export interface TableDetail { + connectionName: string; + catalog: string | null; + db: string | null; + name: string; + display: string; + kind: string; + comment: string | null; + description: string | null; + rowCount: number | null; + columns: WarehouseColumnDetail[]; + foreignKeys: KtxSchemaForeignKey[]; +} + +export type RawSchemaHit = + | { + kind: 'table'; + connectionName: string; + ref: KtxTableRef; + display: string; + matchedOn: 'name' | 'db' | 'comment' | 'description'; + } + | { + kind: 'column'; + connectionName: string; + ref: KtxTableRef & { column: string }; + display: string; + matchedOn: 'name' | 'comment' | 'description'; + }; + +export interface DisplayTargetResolution { + resolved: (KtxTableRef & { column?: string }) | null; + candidates: KtxTableRef[]; + dialect: string; +} + +interface ConnectionArtifact { + driver?: CatalogDriver; +} + +interface RelationshipProfileColumn { + table?: KtxTableRef; + column?: string; + rowCount?: number; + nullCount?: number; + distinctCount?: number; + nullRate?: number; + sampleValues?: unknown[]; +} + +interface RelationshipProfileArtifact { + driver?: CatalogDriver; + tables?: Array<{ table?: KtxTableRef; rowCount?: number }>; + columns?: Record; +} + +interface ConnectionCatalog { + connectionName: string; + syncId: string; + driver: CatalogDriver; + tables: KtxSchemaTable[]; + profile: RelationshipProfileArtifact | null; +} + +type TableWithDescriptions = KtxSchemaTable & { + description?: string | null; + descriptions?: Record; + columns: Array }>; +}; + +function normalize(value: string | null | undefined): string { + return (value ?? '').toLowerCase(); +} + +function refsEqual(left: KtxTableRef, right: KtxTableRef): boolean { + return ( + normalize(left.catalog) === normalize(right.catalog) && + normalize(left.db) === normalize(right.db) && + normalize(left.name) === normalize(right.name) + ); +} + +function refKey(ref: KtxTableRef): string { + return [ref.catalog, ref.db, ref.name].map((part) => normalize(part)).join('.'); +} + +function columnKey(ref: KtxTableRef, column: string): string { + return `${refKey(ref)}.${normalize(column)}`; +} + +function readJson(content: string): T { + return JSON.parse(content) as T; +} + +function cleanIdentifierPart(part: string): string { + return part.trim().replace(/^["'`\[]|["'`\]]$/g, ''); +} + +function splitDisplay(display: string): string[] { + return display + .trim() + .split('.') + .map(cleanIdentifierPart) + .filter(Boolean); +} + +function formatDisplay(driver: CatalogDriver, table: KtxTableRef): string { + if (driver === 'sqlite' || driver === 'sqlite3') { + return table.name; + } + return [table.catalog, table.db, table.name].filter((part): part is string => Boolean(part)).join('.'); +} + +function parseDisplay(driver: CatalogDriver, display: string): KtxTableRef | null { + const parts = splitDisplay(display); + if (driver === 'sqlite' || driver === 'sqlite3') { + return parts.length === 1 ? { catalog: null, db: null, name: parts[0]! } : null; + } + if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') { + if (parts.length !== 3) { + return null; + } + return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! }; + } + if (parts.length === 2) { + return { catalog: null, db: parts[0]!, name: parts[1]! }; + } + if (parts.length === 3) { + return { catalog: parts[0]!, db: parts[1]!, name: parts[2]! }; + } + return parts.length === 1 ? { catalog: null, db: null, name: parts[0]! } : null; +} + +function expectedDisplayPartCount(driver: CatalogDriver): number { + if (driver === 'sqlite' || driver === 'sqlite3') { + return 1; + } + if (driver === 'bigquery' || driver === 'snowflake' || driver === 'sqlserver') { + return 3; + } + return 2; +} + +function parseColumnDisplay(driver: CatalogDriver, display: string): (KtxTableRef & { column: string }) | null { + const parts = splitDisplay(display); + const tablePartCount = expectedDisplayPartCount(driver); + if (parts.length !== tablePartCount + 1) { + return null; + } + const column = parts.at(-1); + if (!column) { + return null; + } + const table = parseDisplay(driver, parts.slice(0, -1).join('.')); + return table ? { ...table, column } : null; +} + +function bestCandidates(tables: KtxSchemaTable[], display: string, limit = 5): KtxTableRef[] { + const needle = normalize(splitDisplay(display).at(-1) ?? display); + return tables + .map((table) => { + const name = normalize(table.name); + let score = 0; + if (name === needle) { + score = 100; + } else if (name.includes(needle) || needle.includes(name)) { + score = 80; + } else { + const samePrefix = [...name].filter((char, index) => needle[index] === char).length; + score = samePrefix / Math.max(name.length, needle.length, 1); + } + return { table, score }; + }) + .filter((entry) => entry.score > 0) + .sort((left, right) => right.score - left.score || left.table.name.localeCompare(right.table.name)) + .slice(0, limit) + .map(({ table }) => ({ catalog: table.catalog, db: table.db, name: table.name })); +} + +function firstDescription(descriptions: Record | undefined): string | null { + return Object.values(descriptions ?? {}).find((value) => value.trim().length > 0) ?? null; +} + +function matchedOnTable(table: TableWithDescriptions, query: string): RawSchemaHit['matchedOn'] | null { + const q = normalize(query); + if (!q) { + return null; + } + if (normalize(table.name).includes(q)) { + return 'name'; + } + if (normalize(table.db).includes(q)) { + return 'db'; + } + if (normalize(table.comment).includes(q)) { + return 'comment'; + } + if (normalize(firstDescription(table.descriptions) ?? table.description).includes(q)) { + return 'description'; + } + return null; +} + +function matchedOnColumn( + column: KtxSchemaColumn & { description?: string | null; descriptions?: Record }, + query: string, +): 'name' | 'comment' | 'description' | null { + const q = normalize(query); + if (!q) { + return null; + } + if (normalize(column.name).includes(q)) { + return 'name'; + } + if (normalize(column.comment).includes(q)) { + return 'comment'; + } + if (normalize(firstDescription(column.descriptions) ?? column.description).includes(q)) { + return 'description'; + } + return null; +} + +export class WarehouseCatalogService { + private readonly catalogs = new Map>(); + + constructor(private readonly deps: WarehouseCatalogServiceDeps) {} + + async hasScan(connectionName: string): Promise { + return (await this.loadCatalog(connectionName)) !== null; + } + + async getLatestSyncId(connectionName: string): Promise { + return (await this.loadCatalog(connectionName))?.syncId ?? null; + } + + async listTables(connectionName: string): Promise { + const catalog = await this.loadCatalog(connectionName); + return catalog?.tables.map((table) => ({ catalog: table.catalog, db: table.db, name: table.name })) ?? []; + } + + async getTable(ref: { connectionName: string } & KtxTableRef): Promise { + const catalog = await this.loadCatalog(ref.connectionName); + if (!catalog) { + return null; + } + const table = catalog.tables.find((candidate) => refsEqual(candidate, ref)) as TableWithDescriptions | undefined; + if (!table) { + return null; + } + const profileTables = catalog.profile?.tables ?? []; + const profileTable = profileTables.find((candidate) => candidate.table && refsEqual(candidate.table, table)); + const profileColumns = catalog.profile?.columns ?? {}; + + return { + connectionName: ref.connectionName, + catalog: table.catalog, + db: table.db, + name: table.name, + display: formatDisplay(catalog.driver, table), + kind: table.kind, + comment: table.comment, + description: table.description ?? firstDescription(table.descriptions), + rowCount: profileTable?.rowCount ?? table.estimatedRows ?? null, + columns: table.columns.map((rawColumn) => { + const column = rawColumn as KtxSchemaColumn & { + description?: string | null; + descriptions?: Record; + }; + const profileColumn = + profileColumns[columnKey(table, column.name)] ?? + Object.entries(profileColumns).find( + ([key, value]) => + normalize(key) === `${normalize(table.name)}.${normalize(column.name)}` || + (value.table && refsEqual(value.table, table) && normalize(value.column) === normalize(column.name)), + )?.[1]; + return { + ...column, + descriptions: column.descriptions ?? {}, + rowCount: profileColumn?.rowCount ?? null, + nullCount: profileColumn?.nullCount ?? null, + distinctCount: profileColumn?.distinctCount ?? null, + nullRate: profileColumn?.nullRate ?? null, + sampleValues: (profileColumn?.sampleValues ?? []).map((value) => String(value)), + }; + }), + foreignKeys: table.foreignKeys, + }; + } + + async resolveDisplay( + connectionName: string, + display: string, + ): Promise<{ + resolved: KtxTableRef | null; + candidates: KtxTableRef[]; + dialect: string; + }> { + const catalog = await this.loadCatalog(connectionName); + if (!catalog) { + return { resolved: null, candidates: [], dialect: 'unknown' }; + } + const dialect = getDialectForDriver(catalog.driver).type; + const parsed = parseDisplay(catalog.driver, display); + if (!parsed) { + return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect }; + } + const table = catalog.tables.find((candidate) => refsEqual(candidate, parsed)); + if (!table) { + return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect }; + } + return { resolved: { catalog: table.catalog, db: table.db, name: table.name }, candidates: [], dialect }; + } + + async resolveDisplayTarget(connectionName: string, display: string): Promise { + const catalog = await this.loadCatalog(connectionName); + if (!catalog) { + return { resolved: null, candidates: [], dialect: 'unknown' }; + } + + const dialect = getDialectForDriver(catalog.driver).type; + const tableResolution = await this.resolveDisplay(connectionName, display); + if (tableResolution.resolved) { + return tableResolution; + } + + const parsedColumn = parseColumnDisplay(catalog.driver, display); + if (!parsedColumn) { + return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect }; + } + + const table = catalog.tables.find((candidate) => refsEqual(candidate, parsedColumn)); + if (!table) { + return { resolved: null, candidates: bestCandidates(catalog.tables, display), dialect }; + } + + return { + resolved: { + catalog: table.catalog, + db: table.db, + name: table.name, + column: parsedColumn.column, + }, + candidates: [], + dialect, + }; + } + + async searchByName(connectionName: string, query: string, limit: number): Promise { + const catalog = await this.loadCatalog(connectionName); + if (!catalog) { + return []; + } + const hits: RawSchemaHit[] = []; + for (const table of catalog.tables as TableWithDescriptions[]) { + const tableMatch = matchedOnTable(table, query); + if (tableMatch) { + hits.push({ + kind: 'table', + connectionName, + ref: { catalog: table.catalog, db: table.db, name: table.name }, + display: formatDisplay(catalog.driver, table), + matchedOn: tableMatch, + }); + } + for (const column of table.columns) { + const columnMatch = matchedOnColumn(column, query); + if (!columnMatch) { + continue; + } + hits.push({ + kind: 'column', + connectionName, + ref: { catalog: table.catalog, db: table.db, name: table.name, column: column.name }, + display: `${formatDisplay(catalog.driver, table)}.${column.name}`, + matchedOn: columnMatch, + }); + } + } + return hits.slice(0, Math.max(0, limit)); + } + + private loadCatalog(connectionName: string): Promise { + const existing = this.catalogs.get(connectionName); + if (existing) { + return existing; + } + const pending = this.readCatalog(connectionName); + this.catalogs.set(connectionName, pending); + return pending; + } + + private async readCatalog(connectionName: string): Promise { + const root = `raw-sources/${connectionName}/live-database`; + const listed = await this.deps.fileStore.listFiles(root); + const connectionFiles = listed.files.filter((file) => file.endsWith('/connection.json')).sort(); + const latestConnectionPath = connectionFiles.at(-1); + if (!latestConnectionPath) { + return null; + } + const latestRoot = latestConnectionPath.slice(0, -'/connection.json'.length); + const syncId = latestRoot.split('/').at(-1) ?? ''; + const connection = readJson((await this.deps.fileStore.readFile(latestConnectionPath)).content); + const tablesListing = await this.deps.fileStore.listFiles(`${latestRoot}/tables`); + const tables: KtxSchemaTable[] = []; + for (const tablePath of tablesListing.files.filter((file) => file.endsWith('.json')).sort()) { + tables.push(readJson((await this.deps.fileStore.readFile(tablePath)).content)); + } + + let profile: RelationshipProfileArtifact | null = null; + try { + profile = readJson( + (await this.deps.fileStore.readFile(`${latestRoot}/enrichment/relationship-profile.json`)).content, + ); + } catch { + profile = null; + } + + return { + connectionName, + syncId, + driver: connection.driver ?? profile?.driver ?? 'postgres', + tables, + profile, + }; + } +} diff --git a/packages/context/src/ingest/wiki-sl-ref-repair.test.ts b/packages/context/src/ingest/wiki-sl-ref-repair.test.ts new file mode 100644 index 00000000..958386c7 --- /dev/null +++ b/packages/context/src/ingest/wiki-sl-ref-repair.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it, vi } from 'vitest'; +import { repairWikiSlRefs } from './wiki-sl-ref-repair.js'; + +describe('repairWikiSlRefs', () => { + it('removes missing measure refs while keeping source, measure, segment, and manifest-backed refs', async () => { + type TestPage = { pageKey: string; frontmatter: Record; content: string }; + const pages = new Map([ + [ + 'GLOBAL:accounts-at-risk', + { + pageKey: 'accounts-at-risk', + frontmatter: { + summary: 'Accounts at risk', + usage_mode: 'auto', + sl_refs: [ + 'mart_customer_health', + 'mart_customer_health.high_risk_account_count', + 'mart_customer_health.medium_risk_account_count', + 'mart_customer_health.high_risk', + 'int_procurement_qualifying_actions', + ], + }, + content: 'Risk context.', + }, + ], + ]); + const wikiService = { + readPage: vi.fn(async (scope: string, _scopeId: string | null, key: string) => pages.get(`${scope}:${key}`)), + writePage: vi.fn( + async ( + scope: string, + _scopeId: string | null, + key: string, + frontmatter: Record, + content: string, + ) => { + pages.set(`${scope}:${key}`, { pageKey: key, frontmatter, content }); + }, + ), + }; + const configService = { + listFiles: vi.fn(async () => ({ + files: ['global/accounts-at-risk.md', 'global/historic-sql/nested-legacy.md'], + })), + }; + const semanticLayerService = { + loadAllSources: vi.fn(async () => [ + { + name: 'mart_customer_health', + grain: [], + columns: [], + joins: [], + measures: [{ name: 'high_risk_account_count', expr: 'count(*)' }], + segments: [{ name: 'high_risk', expr: "risk_level = 'high'" }], + }, + { + name: 'int_procurement_qualifying_actions', + grain: [], + columns: [], + joins: [], + measures: [], + }, + ]), + }; + + const result = await repairWikiSlRefs({ + wikiService: wikiService as never, + semanticLayerService: semanticLayerService as never, + configService: configService as never, + connectionIds: ['warehouse'], + }); + + expect(result.repairs).toEqual([ + { + pageKey: 'accounts-at-risk', + scope: 'GLOBAL', + scopeId: null, + removedRefs: ['mart_customer_health.medium_risk_account_count'], + }, + ]); + expect(wikiService.writePage).toHaveBeenCalledWith( + 'GLOBAL', + null, + 'accounts-at-risk', + expect.objectContaining({ + sl_refs: [ + 'mart_customer_health', + 'mart_customer_health.high_risk_account_count', + 'mart_customer_health.high_risk', + 'int_procurement_qualifying_actions', + ], + }), + 'Risk context.', + 'System User', + 'system@example.com', + 'Repair semantic-layer refs: accounts-at-risk', + ); + }); +}); diff --git a/packages/context/src/ingest/wiki-sl-ref-repair.ts b/packages/context/src/ingest/wiki-sl-ref-repair.ts new file mode 100644 index 00000000..7d3d48f3 --- /dev/null +++ b/packages/context/src/ingest/wiki-sl-ref-repair.ts @@ -0,0 +1,140 @@ +import type { KtxFileStorePort } from '../core/index.js'; +import type { SemanticLayerService, SemanticLayerSource } from '../sl/index.js'; +import { isFlatWikiKey } from '../wiki/keys.js'; +import type { KnowledgeWikiService, WikiFrontmatter } from '../wiki/index.js'; + +const SYSTEM_AUTHOR = 'System User'; +const SYSTEM_EMAIL = 'system@example.com'; + +export interface WikiSlRefRepair { + pageKey: string; + scope: 'GLOBAL' | 'USER'; + scopeId: string | null; + removedRefs: string[]; +} + +export interface WikiSlRefRepairResult { + repairs: WikiSlRefRepair[]; + warnings: string[]; +} + +interface WikiPath { + scope: 'GLOBAL' | 'USER'; + scopeId: string | null; + pageKey: string; +} + +function parseKnowledgeFilePath(path: string): WikiPath | null { + if (!path.endsWith('.md')) { + return null; + } + const segments = path.split('/'); + if (segments.length === 2 && segments[0] === 'global') { + const pageKey = segments[1].replace(/\.md$/, ''); + return isFlatWikiKey(pageKey) ? { scope: 'GLOBAL', scopeId: null, pageKey } : null; + } + if (segments.length === 3 && segments[0] === 'user') { + const pageKey = segments[2].replace(/\.md$/, ''); + return isFlatWikiKey(pageKey) ? { scope: 'USER', scopeId: segments[1], pageKey } : null; + } + return null; +} + +function entityRefsForSource(source: SemanticLayerSource): string[] { + return [ + source.name, + ...(source.measures ?? []).map((measure) => `${source.name}.${measure.name}`), + ...(source.segments ?? []).map((segment) => `${source.name}.${segment.name}`), + ]; +} + +async function loadVisibleSlRefs( + semanticLayerService: SemanticLayerService, + connectionIds: string[], +): Promise<{ refs: Set; warnings: string[] }> { + const refs = new Set(); + const warnings: string[] = []; + for (const connectionId of connectionIds) { + try { + for (const source of await semanticLayerService.loadAllSources(connectionId)) { + for (const ref of entityRefsForSource(source)) { + refs.add(ref); + } + } + } catch (error) { + warnings.push( + `Skipped wiki sl_refs repair for connection ${connectionId}: ${error instanceof Error ? error.message : String(error)}`, + ); + } + } + return { refs, warnings }; +} + +function uniqueStringArray(value: string[] | undefined): string[] { + return [...new Set((value ?? []).filter((entry) => typeof entry === 'string' && entry.trim().length > 0))]; +} + +export async function repairWikiSlRefs(input: { + wikiService: KnowledgeWikiService; + semanticLayerService: SemanticLayerService; + configService: KtxFileStorePort; + connectionIds: string[]; +}): Promise { + const { refs: validRefs, warnings } = await loadVisibleSlRefs(input.semanticLayerService, input.connectionIds); + const listFiles = + typeof input.configService.listFiles === 'function' + ? input.configService.listFiles.bind(input.configService) + : null; + if (!listFiles) { + return { + repairs: [], + warnings: [...warnings, 'Skipped wiki sl_refs repair: config service cannot list wiki files.'], + }; + } + const listed = await listFiles('knowledge', true); + const repairs: WikiSlRefRepair[] = []; + + for (const file of listed.files.sort()) { + const parsedPath = parseKnowledgeFilePath(file); + if (!parsedPath) { + continue; + } + const page = await input.wikiService.readPage(parsedPath.scope, parsedPath.scopeId, parsedPath.pageKey); + const refs = uniqueStringArray(page?.frontmatter.sl_refs); + if (!page || refs.length === 0) { + continue; + } + const keptRefs = refs.filter((ref) => validRefs.has(ref)); + const removedRefs = refs.filter((ref) => !validRefs.has(ref)); + if (removedRefs.length === 0) { + continue; + } + + const frontmatter: WikiFrontmatter = { + ...page.frontmatter, + sl_refs: keptRefs, + }; + await input.wikiService.writePage( + parsedPath.scope, + parsedPath.scopeId, + parsedPath.pageKey, + frontmatter, + page.content, + SYSTEM_AUTHOR, + SYSTEM_EMAIL, + `Repair semantic-layer refs: ${parsedPath.pageKey}`, + ); + repairs.push({ ...parsedPath, removedRefs }); + } + + return { + repairs, + warnings: [ + ...warnings, + ...repairs.map( + (repair) => + `Removed invalid sl_refs from ${repair.pageKey}: ${repair.removedRefs.join(', ')}`, + ), + ], + }; +} diff --git a/packages/context/src/memory/local-memory.ts b/packages/context/src/memory/local-memory.ts index 25ddb2c1..af65b54e 100644 --- a/packages/context/src/memory/local-memory.ts +++ b/packages/context/src/memory/local-memory.ts @@ -36,6 +36,7 @@ import { BaseTool, type GitAuthorResolverPort, type ToolContext } from '../tools import { type KnowledgeEventPort, type KnowledgeIndexPort, + type KnowledgeIndexPageListing, KnowledgeWikiService, searchLocalKnowledgePages, WikiListTagsTool, @@ -219,7 +220,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { } async listPagesForUser(userId: string) { - const pages: Array<{ id?: string; page_key: string; summary: string; scope: string; scope_id: string | null }> = []; + const pages: KnowledgeIndexPageListing[] = []; for (const scope of [ { scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' }, { scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` }, @@ -234,6 +235,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { summary: parsed.summary, scope: scope.scope, scope_id: scope.scopeId, + tags: parseWikiTags(raw.content), }); } } @@ -433,7 +435,7 @@ class LocalMemoryToolsetFactory implements MemoryToolsetFactoryPort { }; }, }), - new WikiListTagsTool(deps.wikiService, deps.knowledgeIndex), + new WikiListTagsTool(deps.knowledgeIndex), new WikiWriteTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents), new WikiRemoveTool(deps.wikiService, deps.knowledgeIndex, deps.knowledgeEvents), ]; @@ -468,6 +470,17 @@ function parseWiki(raw: string): { summary: string; content: string } { }; } +function parseWikiTags(raw: string): string[] { + const match = raw.match(/^---\n([\s\S]*?)\n---\n?/); + if (!match) { + return []; + } + const frontmatter = (YAML.parse(match[1]) ?? {}) as Record; + return Array.isArray(frontmatter.tags) + ? frontmatter.tags.filter((tag): tag is string => typeof tag === 'string') + : []; +} + function scoreText(text: string, query: string): number { const normalized = query.toLowerCase().trim(); if (!normalized) { diff --git a/packages/context/src/memory/memory-runtime-assets.test.ts b/packages/context/src/memory/memory-runtime-assets.test.ts index ef056d18..bd18e524 100644 --- a/packages/context/src/memory/memory-runtime-assets.test.ts +++ b/packages/context/src/memory/memory-runtime-assets.test.ts @@ -23,11 +23,42 @@ const expectedAdapterSkillHeadings: Record = { metabase_ingest: '# Metabase to KTX Semantic Layer', metricflow_ingest: '# MetricFlow to KTX Semantic Layer', }; +const verificationWriterSkills = [ + 'notion_synthesize', + 'dbt_ingest', + 'lookml_ingest', + 'looker_ingest', + 'metabase_ingest', + 'metricflow_ingest', + 'live_database_ingest', + 'historic_sql_table_digest', + 'historic_sql_patterns', + 'knowledge_capture', + 'sl_capture', +] as const; function forbiddenProductPattern() { return new RegExp([['Kae', 'lio'].join(''), ['kae', 'lio'].join(''), ['KAE', 'LIO_'].join('')].join('|')); } +function sqlExecutionCallBlocks(body: string): string[] { + const blocks: string[] = []; + const marker = 'sql_execution({'; + let offset = 0; + + while (offset < body.length) { + const start = body.indexOf(marker, offset); + if (start === -1) { + break; + } + const end = body.indexOf('})', start + marker.length); + blocks.push(body.slice(start, end === -1 ? start + marker.length : end + 2)); + offset = start + marker.length; + } + + return blocks; +} + describe('memory runtime assets', () => { it('packages every memory-agent base prompt referenced by promptNameFor()', async () => { const prompts = new PromptService({ promptsDir, partials: [] }); @@ -117,4 +148,50 @@ describe('memory runtime assets', () => { expect(body).toContain('Do not call `sl_write_source` or `sl_edit_source`'); expect(body).toContain('LookML writes target the run connection directly'); }); + + it('ships identifier verification protocol in every synthesis writer skill', async () => { + for (const skillName of verificationWriterSkills) { + const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8'); + expect(body).toContain('## Identifier Verification Protocol'); + expect(body).toMatch(/discover_data|entity_details/); + } + }); + + it('does not ship stale warehouse verification tool names or fictional identifiers', async () => { + for (const skillName of verificationWriterSkills) { + const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8'); + expect(body).not.toContain('orbit_analytics.customer'); + expect(body).not.toContain('wiki_sl_search'); + expect(body).not.toContain('sl_describe_table'); + } + }); + + it('ships only the KTX connectionName sql_execution call shape in writer guidance', async () => { + const shared = await readFile(join(skillsDir, '_shared', 'identifier-verification.md'), 'utf-8'); + const bodies = [{ name: '_shared/identifier-verification.md', body: shared }]; + + expect(shared).toContain('sql_execution({connectionName, sql: "SELECT DISTINCT'); + expect(shared).toContain('sql_execution({connectionName, sql: "SELECT 1 FROM'); + + for (const skillName of verificationWriterSkills) { + const body = await readFile(join(skillsDir, skillName, 'SKILL.md'), 'utf-8'); + bodies.push({ name: `${skillName}/SKILL.md`, body }); + expect(body).toContain('sql_execution({connectionName'); + expect(body).not.toContain('sql_execution({ sql'); + expect(body).not.toContain('session shape'); + expect(body).not.toContain('connection is already pinned by the ingest session'); + } + + for (const { name, body } of bodies) { + const calls = sqlExecutionCallBlocks(body); + expect(calls.length, `${name} should contain sql_execution guidance`).toBeGreaterThan(0); + expect( + calls.filter((call) => !call.includes('connectionName')), + `${name} has sql_execution calls without connectionName`, + ).toEqual([]); + expect(body, `${name} has a connectionless multiline sql_execution call`).not.toMatch( + /sql_execution\(\{\s*sql\s*:/, + ); + } + }); }); diff --git a/packages/context/src/sl/tools/sl-warehouse-validation.ts b/packages/context/src/sl/tools/sl-warehouse-validation.ts index f9c5e4fd..a200dad9 100644 --- a/packages/context/src/sl/tools/sl-warehouse-validation.ts +++ b/packages/context/src/sl/tools/sl-warehouse-validation.ts @@ -90,7 +90,7 @@ export async function validateSingleSource( `writing it as-is drops the manifest's columns and joins. ` + `Remove "sql:", "table:", "grain:", "columns:", and "joins:" and keep only ` + `"name:" plus "measures:"/"segments:"/"description:" to write an overlay ` + - `that inherits the manifest schema. Call sl_describe_table to see it first.`, + `that inherits the manifest schema. Call sl_read_source to inspect the existing source first.`, ); return { errors, warnings }; } diff --git a/packages/context/src/tools/tool-session.ts b/packages/context/src/tools/tool-session.ts index d9156258..023a8c8e 100644 --- a/packages/context/src/tools/tool-session.ts +++ b/packages/context/src/tools/tool-session.ts @@ -47,6 +47,7 @@ export interface ToolSession { touchedSlSources: TouchedSlSourceSet; actions: MemoryAction[]; allowedRawPaths?: ReadonlySet; + allowedConnectionNames?: ReadonlySet; semanticLayerService: SemanticLayerService; wikiService: KnowledgeWikiService; configService: KtxFileStorePort; diff --git a/packages/context/src/wiki/index.ts b/packages/context/src/wiki/index.ts index 6eae10f0..17d37399 100644 --- a/packages/context/src/wiki/index.ts +++ b/packages/context/src/wiki/index.ts @@ -12,6 +12,7 @@ export type { KnowledgeEventPort, KnowledgeGitDiffPort, KnowledgeIndexPort, + KnowledgeIndexPageListing, UpsertPageParams, WikiFileStorePort, } from './ports.js'; diff --git a/packages/context/src/wiki/knowledge-wiki.service.test.ts b/packages/context/src/wiki/knowledge-wiki.service.test.ts index ecbf954a..40056edc 100644 --- a/packages/context/src/wiki/knowledge-wiki.service.test.ts +++ b/packages/context/src/wiki/knowledge-wiki.service.test.ts @@ -113,13 +113,13 @@ describe('KnowledgeWikiService.syncFromCommit', () => { expect(call.deletes).toEqual([{ scope: 'GLOBAL', scopeId: null, pageKey: 'gone-page' }]); }); - it('indexes historic-SQL nested pages but skips other nested wiki paths from commit sync', async () => { + it('indexes only flat wiki pages and skips nested paths from commit sync', async () => { const { service, pagesRepository, gitService, logger } = makeService(); gitService.diffNameStatus.mockResolvedValue([ { status: 'A', path: 'knowledge/global/revenue-policy.md' }, + { status: 'A', path: 'knowledge/global/historic-sql-order-lifecycle.md' }, { status: 'A', path: 'knowledge/global/historic-sql/order-lifecycle.md' }, - { status: 'A', path: 'knowledge/global/historic-sql/_archived/retired-pattern.md' }, { status: 'A', path: 'knowledge/global/orbit/company-overview.md' }, ]); gitService.getFileAtCommit.mockImplementation((path: string) => { @@ -138,26 +138,25 @@ describe('KnowledgeWikiService.syncFromCommit', () => { await service.syncFromCommit('sha-before', 'sha-after', 'run-uuid'); expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('knowledge/global/orbit/company-overview.md', 'sha-after'); + expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('knowledge/global/historic-sql/order-lifecycle.md', 'sha-after'); expect(logger.warn).toHaveBeenCalledWith( '[knowledge.sync] skipping unparseable path: knowledge/global/orbit/company-overview.md', ); + expect(logger.warn).toHaveBeenCalledWith( + '[knowledge.sync] skipping unparseable path: knowledge/global/historic-sql/order-lifecycle.md', + ); const call = pagesRepository.applyDiffTransactional.mock.calls[0][0]; expect(call.upserts).toEqual( expect.arrayContaining([ expect.objectContaining({ scope: 'GLOBAL', pageKey: 'revenue-policy', summary: 'revenue' }), expect.objectContaining({ scope: 'GLOBAL', - pageKey: 'historic-sql/order-lifecycle', + pageKey: 'historic-sql-order-lifecycle', summary: 'order lifecycle', }), - expect.objectContaining({ - scope: 'GLOBAL', - pageKey: 'historic-sql/_archived/retired-pattern', - summary: 'retired', - }), ]), ); - expect(call.upserts).toHaveLength(3); + expect(call.upserts).toHaveLength(2); }); it('is a no-op when the diff between shas has no knowledge changes', async () => { diff --git a/packages/context/src/wiki/knowledge-wiki.service.ts b/packages/context/src/wiki/knowledge-wiki.service.ts index 2ca32f79..fb152e83 100644 --- a/packages/context/src/wiki/knowledge-wiki.service.ts +++ b/packages/context/src/wiki/knowledge-wiki.service.ts @@ -11,10 +11,6 @@ const WIKI_PREFIX = 'knowledge'; export type { WikiFrontmatter }; -function isHistoricSqlPathSegment(segment: string): boolean { - return /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment); -} - export class KnowledgeWikiService { private isWorktreeScoped = false; @@ -422,7 +418,6 @@ export class KnowledgeWikiService { * Parse a `knowledge//...` file path into its scope and page key. * `knowledge/global/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'foo' } * `knowledge/user//bar.md` → { scope: 'USER', scopeId: '', pageKey: 'bar' } - * `knowledge/global/historic-sql/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'historic-sql/foo' } */ function parseKnowledgePath(path: string): { scope: string; scopeId: string | null; pageKey: string } | null { if (!path.endsWith('.md')) { @@ -437,13 +432,6 @@ function parseKnowledgePath(path: string): { scope: string; scopeId: string | nu const pageKey = rest[1].replace(/\.md$/, ''); return isFlatWikiKey(pageKey) ? { scope: 'GLOBAL', scopeId: null, pageKey } : null; } - if (rest.length >= 3 && rest[0] === 'global' && rest[1] === 'historic-sql') { - const historicPath = rest.slice(2).join('/').replace(/\.md$/, ''); - if (historicPath.split('/').every(isHistoricSqlPathSegment)) { - return { scope: 'GLOBAL', scopeId: null, pageKey: `historic-sql/${historicPath}` }; - } - return null; - } if (rest.length === 3 && rest[0] === 'user') { const pageKey = rest[2].replace(/\.md$/, ''); return isFlatWikiKey(pageKey) ? { scope: 'USER', scopeId: rest[1], pageKey } : null; diff --git a/packages/context/src/wiki/local-knowledge.test.ts b/packages/context/src/wiki/local-knowledge.test.ts index 78da841f..5ad66eb1 100644 --- a/packages/context/src/wiki/local-knowledge.test.ts +++ b/packages/context/src/wiki/local-knowledge.test.ts @@ -244,4 +244,30 @@ describe('local knowledge helpers', () => { }), ).rejects.toThrow('Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".'); }); + + it('ignores nested historic-SQL legacy paths when listing local knowledge pages', async () => { + await writeLocalKnowledgePage(project, { + key: 'historic-sql-paid-orders', + scope: 'GLOBAL', + summary: 'Flat historic SQL page', + content: 'Flat page body.', + tags: ['historic-sql'], + }); + await project.fileStore.writeFile( + 'knowledge/global/historic-sql/paid-orders.md', + '---\nsummary: Nested historic SQL page\nusage_mode: auto\n---\n\nNested body\n', + 'Test', + 'test@example.com', + 'Write nested legacy page', + ); + + await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([ + { + key: 'historic-sql-paid-orders', + path: 'knowledge/global/historic-sql-paid-orders.md', + scope: 'GLOBAL', + summary: 'Flat historic SQL page', + }, + ]); + }); }); diff --git a/packages/context/src/wiki/local-knowledge.ts b/packages/context/src/wiki/local-knowledge.ts index 007b006e..5d1314a8 100644 --- a/packages/context/src/wiki/local-knowledge.ts +++ b/packages/context/src/wiki/local-knowledge.ts @@ -80,26 +80,12 @@ function knowledgePath(scope: LocalKnowledgeScope, userId: string | undefined, k return `knowledge/user/${assertSafePathToken('user id', userId ?? 'local')}/${safeKey}.md`; } -function isHistoricSqlPathSegment(segment: string): boolean { - return /^[a-zA-Z0-9_][a-zA-Z0-9_-]*$/.test(segment); -} - function keyFromKnowledgePath(path: string, scope: LocalKnowledgeScope, userId: string): string | null { const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${assertSafePathToken('user id', userId)}/`; const key = path.slice(prefix.length).replace(/\.md$/, ''); if (isFlatWikiKey(key)) { return key; } - if ( - scope === 'GLOBAL' && - key.startsWith('historic-sql/') && - key - .slice('historic-sql/'.length) - .split('/') - .every(isHistoricSqlPathSegment) - ) { - return key; - } return null; } diff --git a/packages/context/src/wiki/ports.ts b/packages/context/src/wiki/ports.ts index 7fa48c29..075a6c20 100644 --- a/packages/context/src/wiki/ports.ts +++ b/packages/context/src/wiki/ports.ts @@ -13,6 +13,15 @@ export interface UpsertPageParams { sourceRunId?: string | null; } +export interface KnowledgeIndexPageListing { + id?: string; + page_key: string; + summary: string; + scope: string; + scope_id: string | null; + tags: string[]; +} + export interface KnowledgeIndexPort { upsertPage(params: UpsertPageParams): Promise; applyDiffTransactional(params: { @@ -32,9 +41,7 @@ export interface KnowledgeIndexPort { scopeId: string | null, pageKey: string, ): Promise<{ id?: string; page_key: string } | null | undefined>; - listPagesForUser( - userId: string, - ): Promise>; + listPagesForUser(userId: string): Promise; getUserPageCount(userId: string): Promise; incrementUsageCount(pageIds: string[]): Promise; searchRRF( diff --git a/packages/context/src/wiki/tools/wiki-list-tags.tool.test.ts b/packages/context/src/wiki/tools/wiki-list-tags.tool.test.ts index a47b5912..e4b5b7f3 100644 --- a/packages/context/src/wiki/tools/wiki-list-tags.tool.test.ts +++ b/packages/context/src/wiki/tools/wiki-list-tags.tool.test.ts @@ -8,22 +8,11 @@ describe('WikiListTagsTool', () => { it("returns distinct sorted tags across the user's visible pages", async () => { const pagesRepository = { listPagesForUser: vi.fn().mockResolvedValue([ - { scope: 'GLOBAL', scope_id: null, page_key: 'k1' }, - { scope: 'USER', scope_id: 'u', page_key: 'k2' }, + { scope: 'GLOBAL', scope_id: null, page_key: 'k1', tags: ['metrics', 'finance'] }, + { scope: 'USER', scope_id: 'u', page_key: 'k2', tags: ['metrics'] }, ]), }; - const wikiService = { - readPage: vi.fn().mockImplementation((_scope, _scopeId, key) => { - if (key === 'k1') { - return Promise.resolve({ frontmatter: { tags: ['metrics', 'finance'] }, content: '' }); - } - if (key === 'k2') { - return Promise.resolve({ frontmatter: { tags: ['metrics'] }, content: '' }); - } - return Promise.resolve(null); - }), - }; - const tool = new WikiListTagsTool(wikiService as any, pagesRepository as any); + const tool = new WikiListTagsTool(pagesRepository as any); const result = await tool.call({}, baseContext); expect(result.markdown).toContain('finance'); @@ -31,10 +20,23 @@ describe('WikiListTagsTool', () => { expect(result.structured.tags).toEqual(['finance', 'metrics']); }); + it('lists tags from historic-SQL indexed pages with flat wiki keys', async () => { + const pagesRepository = { + listPagesForUser: vi.fn().mockResolvedValue([ + { scope: 'GLOBAL', scope_id: null, page_key: 'company-overview', tags: ['notion'] }, + { scope: 'GLOBAL', scope_id: null, page_key: 'historic-sql-revenue-pattern', tags: ['historic-sql', 'pattern'] }, + ]), + }; + const tool = new WikiListTagsTool(pagesRepository as any); + + const result = await tool.call({}, baseContext); + + expect(result.structured.tags).toEqual(['historic-sql', 'notion', 'pattern']); + }); + it('returns a friendly message when no pages have tags', async () => { const pagesRepository = { listPagesForUser: vi.fn().mockResolvedValue([]) }; - const wikiService = { readPage: vi.fn() }; - const tool = new WikiListTagsTool(wikiService as any, pagesRepository as any); + const tool = new WikiListTagsTool(pagesRepository as any); const result = await tool.call({}, baseContext); expect(result.markdown).toMatch(/no tags/i); diff --git a/packages/context/src/wiki/tools/wiki-list-tags.tool.ts b/packages/context/src/wiki/tools/wiki-list-tags.tool.ts index cd3c5aac..3a31ee41 100644 --- a/packages/context/src/wiki/tools/wiki-list-tags.tool.ts +++ b/packages/context/src/wiki/tools/wiki-list-tags.tool.ts @@ -1,7 +1,5 @@ import { z } from 'zod'; import type { KnowledgeIndexPort } from '../ports.js'; -type BlockScope = 'GLOBAL' | 'USER'; -import { KnowledgeWikiService } from '../index.js'; import { BaseTool, type ToolContext, type ToolOutput } from '../../tools/index.js'; const wikiListTagsInputSchema = z.object({}); @@ -11,10 +9,7 @@ type WikiListTagsInput = z.infer; export class WikiListTagsTool extends BaseTool { readonly name = 'wiki_list_tags'; - constructor( - private readonly wikiService: KnowledgeWikiService, - private readonly pagesRepository: KnowledgeIndexPort, - ) { + constructor(private readonly pagesRepository: KnowledgeIndexPort) { super(); } @@ -33,10 +28,7 @@ Call before writing a new page so you can reuse existing tags consistently inste const pages = await this.pagesRepository.listPagesForUser(context.userId); const set = new Set(); for (const p of pages) { - const scope = p.scope as BlockScope; - const scopeId = scope === 'USER' ? p.scope_id : null; - const page = await this.wikiService.readPage(scope, scopeId, p.page_key); - for (const t of page?.frontmatter.tags ?? []) { + for (const t of p.tags) { set.add(t); } } diff --git a/packages/context/src/wiki/tools/wiki-write.tool.ts b/packages/context/src/wiki/tools/wiki-write.tool.ts index edd34f8f..70668950 100644 --- a/packages/context/src/wiki/tools/wiki-write.tool.ts +++ b/packages/context/src/wiki/tools/wiki-write.tool.ts @@ -150,6 +150,7 @@ export class WikiWriteTool extends BaseTool { Create or update a knowledge page. Provide content for create/rewrite, or replacements for targeted edits. For existing pages, you may provide only frontmatter fields such as summary, tags, refs, or sl_refs to update metadata while preserving content. tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to clear, [values] to set. +Keys must be flat file names, not directory paths. Use tags/source frontmatter for grouping. `; } diff --git a/packages/context/src/wiki/types.ts b/packages/context/src/wiki/types.ts index 317b17ab..bff57aa5 100644 --- a/packages/context/src/wiki/types.ts +++ b/packages/context/src/wiki/types.ts @@ -25,6 +25,7 @@ export interface WikiFrontmatter { usage?: HistoricSqlWikiUsageFrontmatter; fingerprints?: string[]; stale_since?: string; + archived_since?: string; } export interface WikiPage { diff --git a/python/ktx-sl/semantic_layer/generator.py b/python/ktx-sl/semantic_layer/generator.py index 4e1ec891..a5979299 100644 --- a/python/ktx-sl/semantic_layer/generator.py +++ b/python/ktx-sl/semantic_layer/generator.py @@ -687,6 +687,12 @@ class SqlGenerator: if isinstance(node, exp.AggFunc): if isinstance(node, exp.Count): count_arg = node.this + if isinstance(count_arg, exp.Star): + node.set( + "this", + _make_case(exp.Literal.number(1)), + ) + return node if ( isinstance(count_arg, exp.Distinct) and count_arg.expressions diff --git a/python/ktx-sl/tests/test_corner_case_regressions.py b/python/ktx-sl/tests/test_corner_case_regressions.py index cb99d446..92eeb2a5 100644 --- a/python/ktx-sl/tests/test_corner_case_regressions.py +++ b/python/ktx-sl/tests/test_corner_case_regressions.py @@ -243,6 +243,37 @@ def test_filtered_count_distinct_keeps_distinct_inside_count(): assert_valid_sql(result.sql) +def test_filtered_count_star_uses_case_one_not_case_star(): + engine = make_engine( + { + "accounts": { + "name": "accounts", + "table": "public.accounts", + "grain": ["id"], + "columns": [ + {"name": "id", "type": "number"}, + {"name": "risk_level", "type": "string"}, + ], + "measures": [ + { + "name": "high_risk_account_count", + "expr": "count(*)", + "filter": "risk_level = 'high'", + } + ], + } + } + ) + + result = engine.query( + {"measures": ["accounts.high_risk_account_count"], "dimensions": []} + ) + + assert "THEN *" not in result.sql + assert "COUNT(CASE WHEN accounts.risk_level = 'high' THEN 1 END)" in result.sql + assert_valid_sql(result.sql) + + def test_predefined_measure_via_alias_uses_real_table_and_alias_qualification(): engine = make_engine(_alias_measure_sources()) result = engine.query( From b75576279c8b3a82052ab04a577e16749e52b24b Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 13:55:21 +0200 Subject: [PATCH 06/15] fix: store Metabase mappings in ktx.yaml (#61) * fix: store Metabase mappings in ktx.yaml * docs: note KTX has no public users * refactor: drop setup progress compatibility --- AGENTS.md | 3 + .../src/commands/connection-mapping.test.ts | 54 +- .../cli/src/commands/connection-mapping.ts | 239 ++++--- .../connection-metabase-setup.test.ts | 48 +- .../src/commands/connection-metabase-setup.ts | 92 +-- packages/cli/src/ingest.test-utils.ts | 42 +- packages/cli/src/ingest.test.ts | 43 +- packages/cli/src/setup-agents.ts | 3 +- packages/cli/src/setup-context.test.ts | 11 +- packages/cli/src/setup-context.ts | 6 +- packages/cli/src/setup-databases.test.ts | 20 +- packages/cli/src/setup-databases.ts | 9 +- packages/cli/src/setup-embeddings.test.ts | 15 +- packages/cli/src/setup-embeddings.ts | 28 +- packages/cli/src/setup-models.test.ts | 22 +- packages/cli/src/setup-models.ts | 19 +- packages/cli/src/setup-project.test.ts | 12 +- packages/cli/src/setup-project.ts | 9 +- packages/cli/src/setup-sources.test.ts | 21 +- packages/cli/src/setup-sources.ts | 7 +- packages/cli/src/setup.test.ts | 53 +- packages/cli/src/setup.ts | 3 +- .../ingest/adapters/metabase/fetch.test.ts | 33 +- .../src/ingest/adapters/metabase/fetch.ts | 17 +- .../metabase/local-metabase.adapter.ts | 5 +- .../metabase/local-source-state-store.test.ts | 329 ++------- .../metabase/local-source-state-store.ts | 623 +++++------------- .../adapters/metabase/source-state-port.ts | 2 + packages/context/src/ingest/index.ts | 12 +- .../context/src/ingest/local-adapters.test.ts | 2 +- packages/context/src/ingest/local-ingest.ts | 15 +- .../ingest/local-mapping-reconcile.test.ts | 15 +- .../src/ingest/local-mapping-reconcile.ts | 31 +- .../src/ingest/local-metabase-ingest.test.ts | 143 ++-- .../ingest/tools/verification-ledger.tool.ts | 2 +- .../tools/warehouse-verification/index.ts | 6 - .../warehouse-catalog.service.ts | 2 +- packages/context/src/package-exports.test.ts | 4 +- packages/context/src/project/config.test.ts | 8 +- packages/context/src/project/config.ts | 2 - packages/context/src/project/index.ts | 2 - .../context/src/project/setup-config.test.ts | 32 - packages/context/src/project/setup-config.ts | 22 - 43 files changed, 715 insertions(+), 1351 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 1e5480f2..4a235864 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -24,6 +24,9 @@ database migrations, ORPC contracts, or `python-service/` layout exist here. - **MUST**: Keep package/public API changes intentional. Do not add compatibility wrappers for old KTX names unless the user explicitly asks for a migration bridge. +- **MUST**: Treat KTX as having no public users unless the user says otherwise. + Legacy support is not necessary by default; prefer clean breaking changes over + compatibility shims, migration bridges, or preserved stale behavior. ### Absolute Prohibitions diff --git a/packages/cli/src/commands/connection-mapping.test.ts b/packages/cli/src/commands/connection-mapping.test.ts index 7d76cc9d..825c3c4c 100644 --- a/packages/cli/src/commands/connection-mapping.test.ts +++ b/packages/cli/src/commands/connection-mapping.test.ts @@ -1,8 +1,8 @@ -import { mkdtemp, rm } from 'node:fs/promises'; +import { mkdtemp, readFile, rm } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { LocalMetabaseSourceStateReader } from '@ktx/context/ingest'; -import { initKtxProject, loadKtxProject, serializeKtxProjectConfig } from '@ktx/context/project'; +import { LocalMetabaseDiscoveryCache } from '@ktx/context/ingest'; +import { initKtxProject, loadKtxProject, parseKtxProjectConfig, serializeKtxProjectConfig } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxConnectionMapping } from './connection-mapping.js'; @@ -79,19 +79,24 @@ describe('runKtxConnectionMapping', () => { it('sets, lists, disables, and clears local Metabase mappings', async () => { const io = makeIo(); - await expect( - runKtxConnectionMapping( - { - command: 'set', - projectDir, - connectionId: 'prod-metabase', - field: 'databaseMappings', - key: '1', - value: 'prod-warehouse', - }, - io.io, - ), - ).resolves.toBe(0); + const setCode = await runKtxConnectionMapping( + { + command: 'set', + projectDir, + connectionId: 'prod-metabase', + field: 'databaseMappings', + key: '1', + value: 'prod-warehouse', + }, + io.io, + ); + expect(setCode, io.stderr()).toBe(0); + + let config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections['prod-metabase']?.mappings).toMatchObject({ + databaseMappings: { '1': 'prod-warehouse' }, + syncEnabled: { '1': true }, + }); const listIo = makeIo(); await expect( @@ -113,6 +118,12 @@ describe('runKtxConnectionMapping', () => { ), ).resolves.toBe(0); + config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections['prod-metabase']?.mappings).toMatchObject({ + databaseMappings: { '1': 'prod-warehouse' }, + syncEnabled: { '1': false }, + }); + await expect( runKtxConnectionMapping( { @@ -124,6 +135,9 @@ describe('runKtxConnectionMapping', () => { makeIo().io, ), ).resolves.toBe(0); + + config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections['prod-metabase']?.mappings).toBeUndefined(); }); it('lists Metabase yaml mapping bootstrap rows before any SQLite command writes', async () => { @@ -194,9 +208,11 @@ describe('runKtxConnectionMapping', () => { expect(io.stdout()).toContain('Discovery: 1 database'); expect(client.cleanup).toHaveBeenCalledTimes(1); - const store = new LocalMetabaseSourceStateReader({ dbPath: join(projectDir, '.ktx', 'db.sqlite') }); - await expect(store.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([ - { metabaseDatabaseId: 1, metabaseDatabaseName: 'Analytics', source: 'refresh' }, + const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections['prod-metabase']?.mappings).toBeUndefined(); + const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: join(projectDir, '.ktx', 'db.sqlite') }); + await expect(discoveryCache.listDiscoveredDatabases('prod-metabase')).resolves.toMatchObject([ + { id: 1, name: 'Analytics', engine: 'postgres' }, ]); }); diff --git a/packages/cli/src/commands/connection-mapping.ts b/packages/cli/src/commands/connection-mapping.ts index b35bf40f..5bae8e6e 100644 --- a/packages/cli/src/commands/connection-mapping.ts +++ b/packages/cli/src/commands/connection-mapping.ts @@ -4,8 +4,9 @@ import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultLookerConnectionClientFactory, DefaultMetabaseConnectionClientFactory, + KtxYamlMetabaseSourceStateReader, LocalLookerRuntimeStore, - LocalMetabaseSourceStateReader, + LocalMetabaseDiscoveryCache, computeLookerMappingDrift, computeMetabaseMappingDrift, discoverLookerConnections, @@ -16,10 +17,18 @@ import { validateLookerMappings, validateMappingPhysicalMatch, type LookerMappingClient, + type LocalMetabaseMappingListRow, type MetabaseRuntimeClient, type MetabaseSyncMode, } from '@ktx/context/ingest'; -import { type KtxLocalProject, ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project'; +import { + type KtxLocalProject, + type KtxProjectConfig, + ktxLocalStateDbPath, + loadKtxProject, + parseMetabaseMappingBootstrap, + serializeKtxProjectConfig, +} from '@ktx/context/project'; import type { KtxCliIo } from '../index.js'; import { profileMark } from '../startup-profile.js'; @@ -84,6 +93,89 @@ function parseId(value: string, label: string): number { return parsed; } +interface MetabaseMappingsBlock { + databaseMappings: Record; + syncEnabled: Record; + syncMode: MetabaseSyncMode; + selections: { collections: number[]; items: number[] }; + defaultTagNames: string[]; +} + +function currentMetabaseMappings(project: KtxLocalProject, connectionId: string): MetabaseMappingsBlock { + const connection = project.config.connections[connectionId]; + if (!connection) { + throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`); + } + const bootstrap = parseMetabaseMappingBootstrap(connectionId, connection); + return { + databaseMappings: { ...bootstrap.databaseMappings }, + syncEnabled: { ...bootstrap.syncEnabled }, + syncMode: bootstrap.syncMode, + selections: { + collections: [...bootstrap.selections.collections], + items: [...bootstrap.selections.items], + }, + defaultTagNames: [...bootstrap.defaultTagNames], + }; +} + +function hasMetabaseMappings(block: MetabaseMappingsBlock): boolean { + return ( + Object.keys(block.databaseMappings).length > 0 || + Object.keys(block.syncEnabled).length > 0 || + block.syncMode !== 'ALL' || + block.selections.collections.length > 0 || + block.selections.items.length > 0 || + block.defaultTagNames.length > 0 + ); +} + +function serializeMetabaseMappingsBlock(block: MetabaseMappingsBlock): Record | undefined { + if (!hasMetabaseMappings(block)) { + return undefined; + } + return { + databaseMappings: block.databaseMappings, + syncEnabled: block.syncEnabled, + syncMode: block.syncMode, + selections: block.selections, + defaultTagNames: block.defaultTagNames, + }; +} + +async function writeMetabaseMappings( + project: KtxLocalProject, + connectionId: string, + block: MetabaseMappingsBlock, + message: string, +): Promise { + const connection = project.config.connections[connectionId]; + if (!connection) { + throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`); + } + const mappings = serializeMetabaseMappingsBlock(block); + const nextConnection = { ...connection }; + if (mappings) { + nextConnection.mappings = mappings; + } else { + delete nextConnection.mappings; + } + const nextConfig: KtxProjectConfig = { + ...project.config, + connections: { + ...project.config.connections, + [connectionId]: nextConnection, + }, + }; + await project.fileStore.writeFile( + 'ktx.yaml', + serializeKtxProjectConfig(nextConfig), + 'ktx', + 'ktx@example.com', + message, + ); +} + async function createDefaultMetabaseClient( project: KtxLocalProject, connectionId: string, @@ -149,9 +241,7 @@ function targetPhysicalInfo(project: KtxLocalProject, connectionId: string) { }; } -function renderMapping( - row: Awaited>[number], -): string { +function renderMapping(row: LocalMetabaseMappingListRow): string { const name = row.metabaseDatabaseName ?? 'unhydrated'; const target = row.targetConnectionId ?? '[unmapped]'; return `${row.metabaseDatabaseId} -> ${target} (${name}, sync: ${row.syncEnabled ? 'on' : 'off'}, source: ${ @@ -255,92 +345,78 @@ export async function runKtxConnectionMapping( } assertMetabaseConnection(project, args.connectionId); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(project) }); + const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(project) }); + const metabaseStateReader = new KtxYamlMetabaseSourceStateReader(project, { discoveryCache }); if (args.command === 'list') { - const rows = await store.listDatabaseMappings(args.connectionId); + const rows = await metabaseStateReader.listDatabaseMappings(args.connectionId); io.stdout.write(args.json ? `${JSON.stringify(rows, null, 2)}\n` : `${rows.map(renderMapping).join('\n')}\n`); return 0; } if (args.command === 'set') { + if (args.field !== 'databaseMappings') { + throw new Error('Metabase mapping set requires databaseMappings ='); + } assertTargetConnection(project, args.value); - await store.upsertDatabaseMapping({ - connectionId: args.connectionId, - metabaseDatabaseId: parseId(args.key, 'metabaseDatabaseId'), - targetConnectionId: args.value, - syncEnabled: true, - source: 'cli', - }); + const block = currentMetabaseMappings(project, args.connectionId); + const metabaseDatabaseId = String(parseId(args.key, 'metabaseDatabaseId')); + block.databaseMappings[metabaseDatabaseId] = args.value; + block.syncEnabled[metabaseDatabaseId] = true; + await writeMetabaseMappings(project, args.connectionId, block, `Set Metabase mapping ${args.connectionId}.${metabaseDatabaseId}`); io.stdout.write(`Set databaseMappings.${args.key} = ${args.value}\n`); return 0; } if (args.command === 'apply-bulk') { const payload = JSON.parse(await readFile(args.filePath, 'utf8')) as MetabaseBulkMappingPayload; - const existingState = await store.getSourceState(args.connectionId); - const existingRows = await store.listDatabaseMappings(args.connectionId); - const existingById = new Map(existingRows.map((row) => [row.metabaseDatabaseId, row])); + const block = currentMetabaseMappings(project, args.connectionId); const databaseMappings = payload.databaseMappings ?? {}; for (const targetConnectionId of Object.values(databaseMappings)) { if (targetConnectionId) { assertTargetConnection(project, targetConnectionId); } } - const mappingIds = new Set([ - ...existingRows.map((row) => row.metabaseDatabaseId), - ...Object.keys(databaseMappings).map((id) => parseId(id, 'metabaseDatabaseId')), - ...Object.keys(payload.syncEnabled ?? {}).map((id) => parseId(id, 'metabaseDatabaseId')), - ]); - await store.replaceSourceState({ - connectionId: args.connectionId, - syncMode: payload.syncMode ?? existingState.syncMode, - defaultTagNames: payload.defaultTagNames ?? existingState.defaultTagNames, - selections: - payload.selections === undefined - ? existingState.selections - : [ - ...(payload.selections.collections ?? []).map((id) => ({ - selectionType: 'collection' as const, - metabaseObjectId: id, - })), - ...(payload.selections.items ?? []).map((id) => ({ - selectionType: 'item' as const, - metabaseObjectId: id, - })), - ], - mappings: [...mappingIds] - .sort((a, b) => a - b) - .map((id) => { - const existing = existingById.get(id); - return { - metabaseDatabaseId: id, - metabaseDatabaseName: existing?.metabaseDatabaseName ?? null, - metabaseEngine: existing?.metabaseEngine ?? null, - metabaseHost: existing?.metabaseHost ?? null, - metabaseDbName: existing?.metabaseDbName ?? null, - targetConnectionId: databaseMappings[String(id)] ?? existing?.targetConnectionId ?? null, - syncEnabled: payload.syncEnabled?.[String(id)] ?? existing?.syncEnabled ?? false, - source: 'cli', - }; - }), - }); + for (const id of Object.keys(databaseMappings)) { + parseId(id, 'metabaseDatabaseId'); + block.databaseMappings[id] = databaseMappings[id] ?? null; + } + for (const [id, enabled] of Object.entries(payload.syncEnabled ?? {})) { + parseId(id, 'metabaseDatabaseId'); + block.syncEnabled[id] = enabled; + } + if (payload.syncMode !== undefined) { + block.syncMode = payload.syncMode; + } + if (payload.defaultTagNames !== undefined) { + block.defaultTagNames = payload.defaultTagNames; + } + if (payload.selections !== undefined) { + block.selections = { + collections: payload.selections.collections ?? [], + items: payload.selections.items ?? [], + }; + } + await writeMetabaseMappings(project, args.connectionId, block, `Apply Metabase mappings ${args.connectionId}`); io.stdout.write(`Applied bulk mappings for ${args.connectionId}\n`); return 0; } if (args.command === 'set-sync-enabled') { - await store.setMappingSyncEnabled({ - connectionId: args.connectionId, - metabaseDatabaseId: args.metabaseDatabaseId, - syncEnabled: args.enabled, - }); + const block = currentMetabaseMappings(project, args.connectionId); + block.syncEnabled[String(args.metabaseDatabaseId)] = args.enabled; + await writeMetabaseMappings( + project, + args.connectionId, + block, + `Set Metabase sync ${args.connectionId}.${args.metabaseDatabaseId}`, + ); io.stdout.write(`Set syncEnabled.${args.metabaseDatabaseId} = ${args.enabled}\n`); return 0; } if (args.command === 'sync-state-get') { - const state = await store.getSourceState(args.connectionId); + const state = await metabaseStateReader.getSourceState(args.connectionId); const payload = { syncMode: state.syncMode, selections: state.selections, @@ -351,15 +427,11 @@ export async function runKtxConnectionMapping( } if (args.command === 'sync-state-set') { - await store.setSyncState({ - connectionId: args.connectionId, - syncMode: args.syncMode, - defaultTagNames: args.tagNames, - selections: [ - ...args.collectionIds.map((id) => ({ selectionType: 'collection' as const, metabaseObjectId: id })), - ...args.itemIds.map((id) => ({ selectionType: 'item' as const, metabaseObjectId: id })), - ], - }); + const block = currentMetabaseMappings(project, args.connectionId); + block.syncMode = args.syncMode; + block.defaultTagNames = args.tagNames; + block.selections = { collections: args.collectionIds, items: args.itemIds }; + await writeMetabaseMappings(project, args.connectionId, block, `Set Metabase sync state ${args.connectionId}`); io.stdout.write(`Set sync state for ${args.connectionId}\n`); return 0; } @@ -368,15 +440,11 @@ export async function runKtxConnectionMapping( const client = await (deps.createMetabaseClient ?? createDefaultMetabaseClient)(project, args.connectionId); try { const discovered = await discoverMetabaseDatabases(client); - const existing = Object.fromEntries( - (await store.listDatabaseMappings(args.connectionId)).map((row) => [ - String(row.metabaseDatabaseId), - row.targetConnectionId, - ]), - ); + const block = currentMetabaseMappings(project, args.connectionId); + const existing = block.databaseMappings; const drift = computeMetabaseMappingDrift({ currentMappings: existing, discovered }); if (args.autoAccept) { - await store.refreshDiscoveredDatabases({ connectionId: args.connectionId, discovered }); + await discoveryCache.refreshDiscoveredDatabases({ connectionId: args.connectionId, discovered }); } io.stdout.write(`Discovery: ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}\n`); io.stdout.write(`Unmapped discovered: ${drift.unmappedDiscovered.length}\n`); @@ -388,7 +456,9 @@ export async function runKtxConnectionMapping( } if (args.command === 'validate') { - const rows = await store.listDatabaseMappings(args.connectionId); + const rows = (await metabaseStateReader.listDatabaseMappings(args.connectionId)).filter( + (row) => row.source === 'ktx.yaml', + ); const failures = rows.flatMap((row) => { if (!row.targetConnectionId) { return []; @@ -412,7 +482,18 @@ export async function runKtxConnectionMapping( } const metabaseDatabaseId = args.metabaseDatabaseId ?? (args.mappingKey ? parseId(args.mappingKey, 'metabaseDatabaseId') : undefined); - await store.clearDatabaseMappings({ connectionId: args.connectionId, metabaseDatabaseId }); + const block = currentMetabaseMappings(project, args.connectionId); + if (metabaseDatabaseId === undefined) { + block.databaseMappings = {}; + block.syncEnabled = {}; + block.syncMode = 'ALL'; + block.selections = { collections: [], items: [] }; + block.defaultTagNames = []; + } else { + delete block.databaseMappings[String(metabaseDatabaseId)]; + delete block.syncEnabled[String(metabaseDatabaseId)]; + } + await writeMetabaseMappings(project, args.connectionId, block, `Clear Metabase mappings ${args.connectionId}`); io.stdout.write( metabaseDatabaseId ? `Cleared databaseMappings.${metabaseDatabaseId}\n` diff --git a/packages/cli/src/commands/connection-metabase-setup.test.ts b/packages/cli/src/commands/connection-metabase-setup.test.ts index 9d462bbd..7b7b7b84 100644 --- a/packages/cli/src/commands/connection-metabase-setup.test.ts +++ b/packages/cli/src/commands/connection-metabase-setup.test.ts @@ -1,7 +1,7 @@ import { mkdtemp, readFile, rm } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { LocalMetabaseSourceStateReader } from '@ktx/context/ingest'; +import { KtxYamlMetabaseSourceStateReader, LocalMetabaseDiscoveryCache } from '@ktx/context/ingest'; import { initKtxProject, ktxLocalStateDbPath, loadKtxProject, serializeKtxProjectConfig } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; @@ -9,6 +9,12 @@ import { runKtxConnectionMetabaseSetup } from './connection-metabase-setup.js'; const CANCEL_PROMPT = Symbol('cancel'); +async function metabaseMappingRows(projectDir: string, connectionId = 'metabase') { + const project = await loadKtxProject({ projectDir }); + const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(project) }); + return new KtxYamlMetabaseSourceStateReader(project, { discoveryCache }).listDatabaseMappings(connectionId); +} + function createTestMetabaseSetupPromptAdapter(options: { selects?: Array; multiselects?: Array | typeof CANCEL_PROMPT>; @@ -238,10 +244,7 @@ describe('runKtxConnectionMetabaseSetup', () => { expect(config).toContain('driver: metabase'); expect(config).toContain('api_url: http://metabase.example.test:3000'); expect(config).toContain('api_key: mb_example'); - - const updatedProject = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(updatedProject) }); - await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ { metabaseDatabaseId: 2, metabaseDatabaseName: 'Analytics', @@ -294,10 +297,7 @@ describe('runKtxConnectionMetabaseSetup', () => { { createMetabaseClient: async () => metabaseClient as never }, ), ).resolves.toBe(0); - - const updatedProject = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(updatedProject) }); - await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ { metabaseDatabaseId: 2, targetConnectionId: 'orbit', syncEnabled: true }, ]); }); @@ -369,10 +369,7 @@ describe('runKtxConnectionMetabaseSetup', () => { { createMetabaseClient: async () => metabaseClient as never }, ), ).resolves.toBe(0); - - const updatedProject = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(updatedProject) }); - await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ { metabaseDatabaseId: 2, targetConnectionId: 'orbit', syncEnabled: true }, ]); }); @@ -659,10 +656,7 @@ describe('runKtxConnectionMetabaseSetup', () => { { createMetabaseClient: async () => metabaseClient as never }, ), ).resolves.toBe(0); - - const updatedProject = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(updatedProject) }); - await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ { metabaseDatabaseId: 1, targetConnectionId: 'orbit', syncEnabled: true }, { metabaseDatabaseId: 2, targetConnectionId: null, syncEnabled: false }, ]); @@ -785,10 +779,7 @@ describe('runKtxConnectionMetabaseSetup', () => { const config = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); expect(config).toContain('driver: metabase'); expect(io.stderr()).toContain(`ktx ingest run --connection-id metabase --adapter metabase --project-dir ${projectDir}`); - - const updatedProject = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(updatedProject) }); - await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ { metabaseDatabaseId: 2, targetConnectionId: 'orbit' }, ]); }); @@ -886,10 +877,7 @@ describe('runKtxConnectionMetabaseSetup', () => { expect(config).toContain('driver: metabase'); expect(config).toContain('api_url: http://metabase.example.test:3000'); expect(config).toContain(`api_key: ${interactiveMetabaseCredential}`); - - const updatedProject = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(updatedProject) }); - await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ { metabaseDatabaseId: 2, targetConnectionId: 'orbit', @@ -957,10 +945,7 @@ describe('runKtxConnectionMetabaseSetup', () => { }, ), ).resolves.toBe(0); - - const updatedProject = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(updatedProject) }); - await expect(store.listDatabaseMappings('metabase')).resolves.toMatchObject([ + await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ { metabaseDatabaseId: 2, targetConnectionId: 'orbit', syncEnabled: true }, { metabaseDatabaseId: 3, targetConnectionId: 'warehouse2', syncEnabled: false }, ]); @@ -1128,9 +1113,6 @@ describe('runKtxConnectionMetabaseSetup', () => { const afterConfig = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); expect(afterConfig).toBe(beforeConfig); - - const updatedProject = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(updatedProject) }); - await expect(store.listDatabaseMappings('metabase')).resolves.toEqual([]); + await expect(metabaseMappingRows(projectDir)).resolves.toEqual([]); }); }); diff --git a/packages/cli/src/commands/connection-metabase-setup.ts b/packages/cli/src/commands/connection-metabase-setup.ts index 2321ea3d..b0980c3b 100644 --- a/packages/cli/src/commands/connection-metabase-setup.ts +++ b/packages/cli/src/commands/connection-metabase-setup.ts @@ -16,7 +16,8 @@ import { localConnectionToWarehouseDescriptor } from '@ktx/context/connections'; import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultMetabaseConnectionClientFactory, - LocalMetabaseSourceStateReader, + KtxYamlMetabaseSourceStateReader, + LocalMetabaseDiscoveryCache, MetabaseClient, type MetabaseDatabase, type MetabaseRuntimeClient, @@ -29,6 +30,7 @@ import { type KtxProjectConnectionConfig, ktxLocalStateDbPath, loadKtxProject, + parseMetabaseMappingBootstrap, serializeKtxProjectConfig, } from '@ktx/context/project'; @@ -338,6 +340,33 @@ function noteMetabaseSetupSummary(options: { ); } +function metabaseMappingsBlockForSetup(options: { + connectionId: string; + connection: KtxProjectConnectionConfig; + mappings: MetabaseSetupMappingAssignment[]; + syncEnabledDatabaseIds: number[]; + syncMode: MetabaseSetupSyncMode; +}): Record { + const existing = parseMetabaseMappingBootstrap(options.connectionId, options.connection); + const databaseMappings = { ...existing.databaseMappings }; + const syncEnabled = { ...existing.syncEnabled }; + for (const mapping of options.mappings) { + const key = String(mapping.metabaseDatabaseId); + databaseMappings[key] = mapping.targetConnectionId; + syncEnabled[key] = false; + } + for (const metabaseDatabaseId of options.syncEnabledDatabaseIds) { + syncEnabled[String(metabaseDatabaseId)] = true; + } + return { + databaseMappings, + syncEnabled, + syncMode: options.syncMode, + selections: existing.selections, + defaultTagNames: existing.defaultTagNames, + }; +} + export async function runKtxConnectionMetabaseSetup( args: KtxConnectionMetabaseSetupArgs, io: KtxCliIo, @@ -674,54 +703,37 @@ export async function runKtxConnectionMetabaseSetup( } } + const finalConnectionConfig: KtxProjectConnectionConfig = { + ...transientConnectionConfig, + mappings: metabaseMappingsBlockForSetup({ + connectionId, + connection: transientConnectionConfig, + mappings: resolvedMappings, + syncEnabledDatabaseIds: resolvedSyncEnabledDatabaseIds, + syncMode: args.syncMode, + }), + }; + const finalConfig = { + ...configWithTransient, + connections: { + ...configWithTransient.connections, + [connectionId]: finalConnectionConfig, + }, + }; await project.fileStore.writeFile( 'ktx.yaml', - serializeKtxProjectConfig(configWithTransient), + serializeKtxProjectConfig(finalConfig), 'ktx', 'ktx@example.com', `Setup Metabase connection ${connectionId}`, ); const updatedProject = await loadKtxProject({ projectDir: args.projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(updatedProject) }); - - await store.refreshDiscoveredDatabases({ connectionId, discovered }); - - for (const mapping of resolvedMappings) { - await store.upsertDatabaseMapping({ - connectionId, - metabaseDatabaseId: mapping.metabaseDatabaseId, - targetConnectionId: mapping.targetConnectionId, - syncEnabled: false, - source: 'cli', - }); - } - - for (const metabaseDatabaseId of resolvedSyncEnabledDatabaseIds) { - await store.setMappingSyncEnabled({ - connectionId, - metabaseDatabaseId, - syncEnabled: true, - }); - } - - const existingSyncState = await store.getSourceState(connectionId); - await store.setSyncState({ + const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(updatedProject) }); + await discoveryCache.refreshDiscoveredDatabases({ connectionId, discovered }); + const rows = await new KtxYamlMetabaseSourceStateReader(updatedProject, { discoveryCache }).listDatabaseMappings( connectionId, - syncMode: args.syncMode, - defaultTagNames: existingSyncState.defaultTagNames, - selections: existingSyncState.selections, - }); - - const unhydrated = await store.getUnhydratedSyncEnabledMappingIds(connectionId); - if (unhydrated.length > 0) { - io.stderr.write( - `Sync-enabled mappings are missing discovery metadata; run ktx connection mapping refresh ${connectionId} --auto-accept\n`, - ); - return 1; - } - - const rows = await store.listDatabaseMappings(connectionId); + ); const physicalFailures = rows.flatMap((row) => { if (!row.targetConnectionId) { return []; diff --git a/packages/cli/src/ingest.test-utils.ts b/packages/cli/src/ingest.test-utils.ts index 9241fa34..3596d215 100644 --- a/packages/cli/src/ingest.test-utils.ts +++ b/packages/cli/src/ingest.test-utils.ts @@ -3,7 +3,8 @@ import { mkdir, writeFile } from 'node:fs/promises'; import { join } from 'node:path'; import { AgentRunnerService, type RunLoopParams } from '@ktx/context/agent'; import { - LocalMetabaseSourceStateReader, + KtxYamlMetabaseSourceStateReader, + LocalMetabaseDiscoveryCache, MetabaseSourceAdapter, getLocalIngestStatus, type ChunkResult, @@ -493,6 +494,23 @@ export async function runPublicMetabaseSyncModeCase(tempDir: string, input: Sync ' driver: metabase', ' api_url: https://metabase.example.test', ' api_key: literal-test-key', + ' mappings:', + ' databaseMappings:', + ' "1": warehouse_a', + ' syncEnabled:', + ' "1": true', + ` syncMode: ${input.syncMode}`, + ' selections:', + ` collections: [${input.selections + .filter((selection) => selection.selectionType === 'collection') + .map((selection) => selection.metabaseObjectId) + .join(', ')}]`, + ` items: [${input.selections + .filter((selection) => selection.selectionType === 'item') + .map((selection) => selection.metabaseObjectId) + .join(', ')}]`, + ' defaultTagNames:', + ' - sync-mode-smoke', ' warehouse_a:', ' driver: postgres', ' url: postgresql://readonly@db.example.test/warehouse_a', @@ -507,29 +525,15 @@ export async function runPublicMetabaseSyncModeCase(tempDir: string, input: Sync ); const project = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(project) }); - await store.replaceSourceState({ + const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(project) }); + await discoveryCache.refreshDiscoveredDatabases({ connectionId: 'prod-metabase', - syncMode: input.syncMode, - defaultTagNames: ['sync-mode-smoke'], - selections: input.selections, - mappings: [ - { - metabaseDatabaseId: 1, - metabaseDatabaseName: 'Warehouse A', - metabaseEngine: 'postgres', - metabaseHost: 'db.example.test', - metabaseDbName: 'warehouse_a', - targetConnectionId: 'warehouse_a', - syncEnabled: true, - source: 'refresh', - }, - ], + discovered: [{ id: 1, name: 'Warehouse A', engine: 'postgres', host: 'db.example.test', dbName: 'warehouse_a' }], }); const adapter = new MetabaseSourceAdapter({ clientFactory: new StaticMetabaseClientFactory(createSyncModeMetabaseClient()), - sourceStateReader: store, + sourceStateReader: new KtxYamlMetabaseSourceStateReader(project, { discoveryCache }), }); const jobId = `metabase-sync-mode-${input.name}-child`; const io = makeIo(); diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index de226bc4..24f8c1ca 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -3,7 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { LocalLookerRuntimeStore, - LocalMetabaseSourceStateReader, + LocalMetabaseDiscoveryCache, type LocalIngestResult, type LocalMetabaseFanoutProgress, type RunLocalIngestOptions, @@ -433,6 +433,16 @@ describe('runKtxIngest', () => { ' driver: metabase', ' api_url: https://metabase.example.test', ' api_key: literal-test-key', + ' mappings:', + ' databaseMappings:', + ' "1": warehouse_a', + ' "2": warehouse_b', + ' syncEnabled:', + ' "1": true', + ' "2": true', + ' syncMode: ALL', + ' defaultTagNames:', + ' - ktx', ' warehouse_a:', ' driver: postgres', ' url: postgresql://readonly@db.example.test/warehouse_a', @@ -449,33 +459,12 @@ describe('runKtxIngest', () => { 'utf-8', ); const project = await loadKtxProject({ projectDir }); - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(project) }); - await store.replaceSourceState({ + const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(project) }); + await discoveryCache.refreshDiscoveredDatabases({ connectionId: 'prod-metabase', - syncMode: 'ALL', - defaultTagNames: ['ktx'], - selections: [], - mappings: [ - { - metabaseDatabaseId: 1, - metabaseDatabaseName: 'Warehouse A', - metabaseEngine: 'postgres', - metabaseHost: 'db.example.test', - metabaseDbName: 'warehouse_a', - targetConnectionId: 'warehouse_a', - syncEnabled: true, - source: 'refresh', - }, - { - metabaseDatabaseId: 2, - metabaseDatabaseName: 'Warehouse B', - metabaseEngine: 'postgres', - metabaseHost: 'db.example.test', - metabaseDbName: 'warehouse_b', - targetConnectionId: 'warehouse_b', - syncEnabled: true, - source: 'refresh', - }, + discovered: [ + { id: 1, name: 'Warehouse A', engine: 'postgres', host: 'db.example.test', dbName: 'warehouse_a' }, + { id: 2, name: 'Warehouse B', engine: 'postgres', host: 'db.example.test', dbName: 'warehouse_b' }, ], }); const adapter = new CliMetabaseSourceAdapter(); diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts index b4202ed6..3c7829c7 100644 --- a/packages/cli/src/setup-agents.ts +++ b/packages/cli/src/setup-agents.ts @@ -6,7 +6,6 @@ import { loadKtxProject, markKtxSetupStateStepComplete, serializeKtxProjectConfig, - stripKtxSetupCompletedSteps, } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import { withMenuOptionsSpacing, withMultiselectNavigation } from './prompt-navigation.js'; @@ -364,7 +363,7 @@ async function installTarget(input: { async function markAgentsComplete(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); - await writeFile(project.configPath, serializeKtxProjectConfig(stripKtxSetupCompletedSteps(project.config)), 'utf-8'); + await writeFile(project.configPath, serializeKtxProjectConfig(project.config), 'utf-8'); await markKtxSetupStateStepComplete(projectDir, 'agents'); } diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index 1ef044ae..9115d7a5 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -1,7 +1,7 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { readKtxSetupState } from '@ktx/context/project'; +import { readKtxSetupState, writeKtxSetupState } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { @@ -40,12 +40,6 @@ async function writeReadyProject(projectDir: string) { 'setup:', ' database_connection_ids:', ' - warehouse', - ' completed_steps:', - ' - project', - ' - llm', - ' - embeddings', - ' - databases', - ' - sources', 'connections:', ' warehouse:', ' driver: postgres', @@ -71,6 +65,9 @@ async function writeReadyProject(projectDir: string) { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(projectDir, { + completed_steps: ['project', 'llm', 'embeddings', 'databases', 'sources'], + }); } async function writeScanReport( diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index 04a572ac..94589bdc 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -5,11 +5,9 @@ import { cancel, isCancel, select } from '@clack/prompts'; import { type KtxLocalProject, loadKtxProject, - ktxSetupCompletedSteps, markKtxSetupStateStepComplete, readKtxSetupState, serializeKtxProjectConfig, - stripKtxSetupCompletedSteps, } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import { buildPublicIngestPlan } from './public-ingest.js'; @@ -470,7 +468,7 @@ async function defaultVerifyContextReady(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); - await writeFile(project.configPath, serializeKtxProjectConfig(stripKtxSetupCompletedSteps(project.config)), 'utf-8'); + await writeFile(project.configPath, serializeKtxProjectConfig(project.config), 'utf-8'); await markKtxSetupStateStepComplete(projectDir, 'context'); } @@ -704,7 +702,7 @@ export async function runKtxSetupContextStep( try { const project = await loadKtxProject({ projectDir: args.projectDir }); const existingState = await readKtxSetupContextState(args.projectDir); - const completedSteps = ktxSetupCompletedSteps(project.config, await readKtxSetupState(args.projectDir)); + const completedSteps = (await readKtxSetupState(args.projectDir)).completed_steps; if (completedSteps.includes('context') && existingState.status === 'completed') { return { status: 'ready', projectDir: args.projectDir, runId: existingState.runId ?? 'setup-context-completed' }; } diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 46506ae7..65ee191a 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -1,7 +1,7 @@ import { mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join, resolve } from 'node:path'; -import { initKtxProject, parseKtxProjectConfig, readKtxSetupState } from '@ktx/context/project'; +import { initKtxProject, parseKtxProjectConfig, readKtxSetupState, writeKtxSetupState } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { type KtxSetupDatabaseDriver, @@ -548,12 +548,11 @@ describe('setup databases step', () => { 'setup:', ' database_connection_ids:', ' - warehouse', - ' completed_steps:', - ' - databases', '', ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['databases'] }); const prompts = makePromptAdapter({ multiselectValues: [['back']], selectValues: ['continue'] }); const testConnection = vi.fn(async () => 0); const scanConnection = vi.fn(async () => 0); @@ -590,12 +589,11 @@ describe('setup databases step', () => { 'setup:', ' database_connection_ids:', ' - warehouse', - ' completed_steps:', - ' - databases', '', ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['databases'] }); const prompts = makePromptAdapter({ selectValues: ['add', 'url', 'continue'], multiselectValues: [['mysql']], @@ -706,12 +704,11 @@ describe('setup databases step', () => { 'setup:', ' database_connection_ids:', ' - warehouse', - ' completed_steps:', - ' - databases', '', ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['databases'] }); const io = makeIo(); const prompts = makePromptAdapter({ multiselectValues: [[]], @@ -1124,7 +1121,6 @@ describe('setup databases step', () => { }); expect(config.setup).toEqual({ database_connection_ids: ['warehouse'], - completed_steps: [], }); expect((await readKtxSetupState(tempDir)).completed_steps).toContain('databases'); expect(io.stdout()).toContain('Primary source ready'); @@ -1163,7 +1159,6 @@ describe('setup databases step', () => { }); expect(config.setup).toEqual({ database_connection_ids: ['warehouse'], - completed_steps: [], }); expect((await readKtxSetupState(tempDir)).completed_steps).toContain('databases'); }); @@ -1213,7 +1208,7 @@ describe('setup databases step', () => { expect(scanConnection).toHaveBeenCalledTimes(2); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); expect(config.setup?.database_connection_ids).toEqual(['warehouse', 'analytics']); - expect(config.setup?.completed_steps).toEqual([]); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect((await readKtxSetupState(tempDir)).completed_steps).toContain('databases'); }); @@ -1239,7 +1234,7 @@ describe('setup databases step', () => { expect(result.status).toBe('failed'); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); expect(config.connections.warehouse).toMatchObject({ driver: 'postgres', url: 'env:DATABASE_URL' }); - expect(config.setup?.completed_steps ?? []).not.toContain('databases'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect(io.stderr()).toContain('Structural scan failed for warehouse.'); }); @@ -1544,7 +1539,6 @@ describe('setup databases step', () => { expect(result.status).toBe('skipped'); expect(io.stdout()).toContain('KTX cannot work until you add a primary source.'); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.setup?.completed_steps ?? []).not.toContain('databases'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); }); }); diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index f770c5c4..eceaf5bb 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -7,7 +7,6 @@ import { markKtxSetupStateStepComplete, serializeKtxProjectConfig, setKtxSetupDatabaseConnectionIds, - stripKtxSetupCompletedSteps, } from '@ktx/context/project'; import type { KtxTableListEntry } from '@ktx/context/scan'; import type { KtxCliIo } from './cli-runtime.js'; @@ -1020,7 +1019,7 @@ async function writeConnectionConfig(input: { [input.connectionId]: input.connection, }, }; - await writeFile(project.configPath, serializeKtxProjectConfig(stripKtxSetupCompletedSteps(config)), 'utf-8'); + await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); const historicSql = typeof input.connection.historicSql === 'object' && @@ -1314,7 +1313,7 @@ async function ensureHistoricSqlIngestDefaults(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); const config = setKtxSetupDatabaseConnectionIds(project.config, unique(connectionIds)); - await writeFile(project.configPath, serializeKtxProjectConfig(stripKtxSetupCompletedSteps(config)), 'utf-8'); + await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); await markKtxSetupStateStepComplete(projectDir, 'databases'); } diff --git a/packages/cli/src/setup-embeddings.test.ts b/packages/cli/src/setup-embeddings.test.ts index 67ef83b3..e66aa05a 100644 --- a/packages/cli/src/setup-embeddings.test.ts +++ b/packages/cli/src/setup-embeddings.test.ts @@ -1,7 +1,7 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { initKtxProject, parseKtxProjectConfig, readKtxSetupState } from '@ktx/context/project'; +import { initKtxProject, parseKtxProjectConfig, readKtxSetupState, writeKtxSetupState } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { type KtxSetupEmbeddingsPromptAdapter, runKtxSetupEmbeddingsStep } from './setup-embeddings.js'; @@ -172,7 +172,7 @@ describe('setup embeddings step', () => { sentenceTransformers: { base_url: 'managed:local-embeddings', pathPrefix: '' }, }); expect(config.scan.enrichment.embeddings).toMatchObject(config.ingest.embeddings); - expect(config.setup?.completed_steps).toEqual(undefined); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect((await readKtxSetupState(tempDir)).completed_steps).toContain('embeddings'); expect(spinnerEvents).toContainEqual( 'start:Testing local sentence-transformers embeddings (all-MiniLM-L6-v2, 384 dimensions). First run may take up to 60 seconds.', @@ -251,7 +251,7 @@ describe('setup embeddings step', () => { sentenceTransformers: { base_url: 'managed:local-embeddings', pathPrefix: '' }, }); expect(config.scan.enrichment.embeddings).toMatchObject(config.ingest.embeddings); - expect(config.setup?.completed_steps).toEqual(undefined); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect((await readKtxSetupState(tempDir)).completed_steps).toContain('embeddings'); }); @@ -301,7 +301,7 @@ describe('setup embeddings step', () => { expect(result.status).toBe('failed'); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.setup?.completed_steps ?? []).not.toContain('embeddings'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect(config.ingest.embeddings.backend).toBe('deterministic'); expect(io.stderr()).toContain('Local embedding health check failed: 401 invalid api key [redacted]'); expect(io.stderr()).toContain('Prepare the runtime with: ktx dev runtime start --feature local-embeddings'); @@ -413,7 +413,7 @@ describe('setup embeddings step', () => { expect(result.status).toBe('skipped'); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.setup?.completed_steps ?? []).not.toContain('embeddings'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect(config.ingest.embeddings.backend).toBe('deterministic'); }); @@ -450,10 +450,6 @@ describe('setup embeddings step', () => { 'project: warehouse', 'setup:', ' database_connection_ids: []', - ' completed_steps:', - ' - project', - ' - llm', - ' - embeddings', 'connections: {}', 'ingest:', ' embeddings:', @@ -466,6 +462,7 @@ describe('setup embeddings step', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'llm', 'embeddings'] }); const healthCheck = vi.fn(async () => ({ ok: true as const })); await expect( diff --git a/packages/cli/src/setup-embeddings.ts b/packages/cli/src/setup-embeddings.ts index 1b6a2381..ba3333f1 100644 --- a/packages/cli/src/setup-embeddings.ts +++ b/packages/cli/src/setup-embeddings.ts @@ -4,12 +4,10 @@ import { resolveKtxConfigReference } from '@ktx/context/core'; import { type KtxProjectConfig, type KtxProjectEmbeddingConfig, - ktxSetupCompletedSteps, loadKtxProject, markKtxSetupStateStepComplete, readKtxSetupState, serializeKtxProjectConfig, - stripKtxSetupCompletedSteps, } from '@ktx/context/project'; import { type KtxEmbeddingConfig, type KtxEmbeddingHealthCheckResult, runKtxEmbeddingHealthCheck } from '@ktx/llm'; import type { KtxCliIo } from './cli-runtime.js'; @@ -110,7 +108,7 @@ function createPromptAdapter(): KtxSetupEmbeddingsPromptAdapter { async function hasCompletedEmbeddings(projectDir: string, config: KtxProjectConfig): Promise { return ( - ktxSetupCompletedSteps(config, await readKtxSetupState(projectDir)).includes('embeddings') && + (await readKtxSetupState(projectDir)).completed_steps.includes('embeddings') && config.ingest.embeddings.backend !== 'none' && config.ingest.embeddings.backend !== 'deterministic' && typeof config.ingest.embeddings.model === 'string' && @@ -184,22 +182,20 @@ function embeddingBackendDisplayName(backend: KtxSetupEmbeddingBackend): string async function persistEmbeddingConfig(projectDir: string, embeddings: KtxProjectEmbeddingConfig): Promise { const project = await loadKtxProject({ projectDir }); - const config = stripKtxSetupCompletedSteps( - { - ...project.config, - ingest: { - ...project.config.ingest, + const config = { + ...project.config, + ingest: { + ...project.config.ingest, + embeddings, + }, + scan: { + ...project.config.scan, + enrichment: { + ...project.config.scan.enrichment, embeddings, }, - scan: { - ...project.config.scan, - enrichment: { - ...project.config.scan.enrichment, - embeddings, - }, - }, }, - ); + }; await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); await markKtxSetupStateStepComplete(projectDir, 'embeddings'); } diff --git a/packages/cli/src/setup-models.test.ts b/packages/cli/src/setup-models.test.ts index 82f82875..fb8acb47 100644 --- a/packages/cli/src/setup-models.test.ts +++ b/packages/cli/src/setup-models.test.ts @@ -1,7 +1,7 @@ import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { initKtxProject, parseKtxProjectConfig, readKtxSetupState } from '@ktx/context/project'; +import { initKtxProject, parseKtxProjectConfig, readKtxSetupState, writeKtxSetupState } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { BUNDLED_ANTHROPIC_MODELS, @@ -160,7 +160,7 @@ describe('setup Anthropic model step', () => { promptCaching: { enabled: true }, }); expect(config.scan.enrichment.mode).toBe('llm'); - expect(config.setup?.completed_steps).toEqual(undefined); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect((await readKtxSetupState(tempDir)).completed_steps).toContain('llm'); expect(io.stdout()).toContain('LLM ready: yes'); expect(io.stdout()).not.toContain('sk-ant-test'); @@ -199,7 +199,7 @@ describe('setup Anthropic model step', () => { }, models: { default: 'claude-sonnet-4-6' }, }); - expect(config.setup?.completed_steps).toEqual(undefined); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect((await readKtxSetupState(tempDir)).completed_steps).toContain('llm'); expect(io.stdout()).not.toContain('sk-ant-file'); }); @@ -516,8 +516,7 @@ describe('setup Anthropic model step', () => { ); expect(result.status).toBe('failed'); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.setup?.completed_steps ?? []).not.toContain('llm'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect(io.stderr()).toContain('Anthropic model health check failed: 401 invalid x-api-key [redacted]'); expect(io.stderr()).not.toContain('sk-ant-test'); }); @@ -553,7 +552,7 @@ describe('setup Anthropic model step', () => { expect(io.stderr()).toContain('Choose a different credential source or model, or Back.'); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); expect(config.llm.models.default).toBe('claude-sonnet-4-6'); - expect(config.setup?.completed_steps).toEqual(undefined); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect((await readKtxSetupState(tempDir)).completed_steps).toContain('llm'); expect(io.stderr()).not.toContain('sk-ant-test'); }); @@ -565,8 +564,7 @@ describe('setup Anthropic model step', () => { ); expect(result.status).toBe('skipped'); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.setup?.completed_steps ?? []).not.toContain('llm'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); }); it('returns back without writing config when Back is selected', async () => { @@ -650,9 +648,6 @@ describe('setup Anthropic model step', () => { 'project: warehouse', 'setup:', ' database_connection_ids: []', - ' completed_steps:', - ' - project', - ' - llm', 'connections: {}', 'llm:', ' provider:', @@ -669,6 +664,7 @@ describe('setup Anthropic model step', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'llm'] }); const healthCheck = vi.fn(async () => ({ ok: true as const })); await expect( @@ -698,9 +694,6 @@ describe('setup Anthropic model step', () => { 'project: warehouse', 'setup:', ' database_connection_ids: []', - ' completed_steps:', - ' - project', - ' - llm', 'connections: {}', 'llm:', ' provider:', @@ -715,6 +708,7 @@ describe('setup Anthropic model step', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'llm'] }); const healthCheck = vi.fn(async () => ({ ok: true as const })); const io = makeIo(); diff --git a/packages/cli/src/setup-models.ts b/packages/cli/src/setup-models.ts index 6d3c6757..221dbd14 100644 --- a/packages/cli/src/setup-models.ts +++ b/packages/cli/src/setup-models.ts @@ -8,7 +8,6 @@ import { loadKtxProject, markKtxSetupStateStepComplete, serializeKtxProjectConfig, - stripKtxSetupCompletedSteps, } from '@ktx/context/project'; import { type KtxLlmConfig, type KtxLlmHealthCheckResult, runKtxLlmHealthCheck } from '@ktx/llm'; import type { KtxCliIo } from './cli-runtime.js'; @@ -362,19 +361,17 @@ async function chooseModel( async function persistLlmConfig(projectDir: string, credentialRef: string, model: string): Promise { const project = await loadKtxProject({ projectDir }); - const config = stripKtxSetupCompletedSteps( - { - ...project.config, - llm: buildProjectLlmConfig(project.config.llm, credentialRef, model), - scan: { - ...project.config.scan, - enrichment: { + const config = { + ...project.config, + llm: buildProjectLlmConfig(project.config.llm, credentialRef, model), + scan: { + ...project.config.scan, + enrichment: { ...project.config.scan.enrichment, - mode: 'llm', + mode: 'llm' as const, }, }, - }, - ); + }; await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); await markKtxSetupStateStepComplete(projectDir, 'llm'); } diff --git a/packages/cli/src/setup-project.test.ts b/packages/cli/src/setup-project.test.ts index 9c01402c..70591077 100644 --- a/packages/cli/src/setup-project.test.ts +++ b/packages/cli/src/setup-project.test.ts @@ -59,8 +59,7 @@ describe('setup project step', () => { expect(result.status).toBe('ready'); expect(result.projectDir).toBe(projectDir); - const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); - expect(config.setup?.completed_steps).toEqual(undefined); + expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect(await readKtxSetupState(projectDir)).toEqual({ completed_steps: ['project'] }); await expect(stat(join(projectDir, '.git'))).resolves.toBeDefined(); await expect(readFile(join(projectDir, '.ktx/.gitignore'), 'utf-8')).resolves.toContain('secrets/'); @@ -68,7 +67,7 @@ describe('setup project step', () => { expect(testIo.stderr()).toBe(''); }); - it('loads an existing project with --existing and preserves existing setup metadata', async () => { + it('loads an existing project with --existing and drops config setup progress', async () => { const projectDir = join(tempDir, 'warehouse'); await initKtxProject({ projectDir, projectName: 'warehouse' }); await writeFile( @@ -94,9 +93,9 @@ describe('setup project step', () => { const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); expect(config.setup).toEqual({ database_connection_ids: ['warehouse'], - completed_steps: [], }); - expect(await readKtxSetupState(projectDir)).toEqual({ completed_steps: ['llm', 'project'] }); + expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); + expect(await readKtxSetupState(projectDir)).toEqual({ completed_steps: ['project'] }); }); it('creates a missing auto-mode project only when --yes is present in no-input mode', async () => { @@ -152,8 +151,7 @@ describe('setup project step', () => { }), ); expect(prompts.text).not.toHaveBeenCalled(); - const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); - expect(config.setup?.completed_steps).toEqual(undefined); + expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect(await readKtxSetupState(projectDir)).toEqual({ completed_steps: ['project'] }); }); diff --git a/packages/cli/src/setup-project.ts b/packages/cli/src/setup-project.ts index 18512b03..4b2f71d9 100644 --- a/packages/cli/src/setup-project.ts +++ b/packages/cli/src/setup-project.ts @@ -5,15 +5,11 @@ import { basename, join, resolve } from 'node:path'; import { cancel, isCancel, select, text } from '@clack/prompts'; import { initKtxProject, - ktxSetupCompletedSteps, type KtxLocalProject, loadKtxProject, markKtxSetupStateStepComplete, mergeKtxSetupGitignoreEntries, - readKtxSetupState, serializeKtxProjectConfig, - stripKtxSetupCompletedSteps, - writeKtxSetupState, } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import { withMenuOptionsSpacing, withTextInputNavigation } from './prompt-navigation.js'; @@ -170,10 +166,7 @@ async function normalizeSetupGitignore(projectDir: string): Promise { } async function persistProjectStep(project: KtxLocalProject): Promise { - const completedSteps = ktxSetupCompletedSteps(project.config, await readKtxSetupState(project.projectDir)); - const config = stripKtxSetupCompletedSteps(project.config); - await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); - await writeKtxSetupState(project.projectDir, { completed_steps: completedSteps }); + await writeFile(project.configPath, serializeKtxProjectConfig(project.config), 'utf-8'); await markKtxSetupStateStepComplete(project.projectDir, 'project'); await normalizeSetupGitignore(project.projectDir); return await loadKtxProject({ projectDir: project.projectDir }); diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index 76ba5d0f..27579bb3 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -102,7 +102,6 @@ describe('setup sources step', () => { }, setup: { ...config.setup, - completed_steps: config.setup?.completed_steps ?? [], database_connection_ids: ['warehouse'], }, }), @@ -137,7 +136,7 @@ describe('setup sources step', () => { projectDir, }); - expect((await readConfig()).setup?.completed_steps).toEqual(undefined); + expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect((await readKtxSetupState(projectDir)).completed_steps).toContain('sources'); expect(io.stdout()).toContain('Context source setup skipped.'); }); @@ -171,7 +170,7 @@ describe('setup sources step', () => { source_dir: '/repo/dbt', project_name: 'analytics', }); - expect(config.setup?.completed_steps).toEqual([]); + expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect((await readKtxSetupState(projectDir)).completed_steps).toContain('sources'); expect(runInitialIngest).toHaveBeenCalledWith(projectDir, 'analytics_dbt', io.io, { inputMode: 'disabled' }); }); @@ -190,7 +189,7 @@ describe('setup sources step', () => { source: 'metabase', sourceConnectionId: 'prod_metabase', sourceUrl: 'https://metabase.example.com', - sourceApiKeyRef: 'env:METABASE_API_KEY', + sourceApiKeyRef: 'env:METABASE_API_KEY', // pragma: allowlist secret sourceWarehouseConnectionId: 'warehouse', metabaseDatabaseId: 1, runInitialSourceIngest: false, @@ -204,7 +203,7 @@ describe('setup sources step', () => { expect((await readConfig()).connections.prod_metabase).toMatchObject({ driver: 'metabase', api_url: 'https://metabase.example.com', - api_key_ref: 'env:METABASE_API_KEY', + api_key_ref: 'env:METABASE_API_KEY', // pragma: allowlist secret mappings: { databaseMappings: { '1': 'warehouse' }, syncEnabled: { '1': true }, @@ -225,7 +224,7 @@ describe('setup sources step', () => { inputMode: 'disabled', source: 'notion', sourceConnectionId: 'notion-main', - sourceApiKeyRef: 'env:NOTION_TOKEN', + sourceApiKeyRef: 'env:NOTION_TOKEN', // pragma: allowlist secret notionCrawlMode: 'selected_roots', notionRootPageIds: ['page-1'], runInitialSourceIngest: false, @@ -256,7 +255,7 @@ describe('setup sources step', () => { inputMode: 'disabled', source: 'notion', sourceConnectionId: 'notion-main', - sourceApiKeyRef: 'env:NOTION_TOKEN', + sourceApiKeyRef: 'env:NOTION_TOKEN', // pragma: allowlist secret notionCrawlMode: 'all_accessible', notionRootPageIds: ['page-1'], runInitialSourceIngest: false, @@ -480,7 +479,7 @@ describe('setup sources step', () => { ), ).resolves.toEqual({ status: 'failed', projectDir }); - expect((await readConfig()).setup?.completed_steps ?? []).not.toContain('sources'); + expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect(io.stderr()).toContain('No LookML files found'); }); @@ -766,7 +765,7 @@ describe('setup sources step', () => { connection: { driver: 'metabase', api_url: 'https://metabase.example.com', - api_key_ref: 'env:METABASE_API_KEY', + api_key_ref: 'env:METABASE_API_KEY', // pragma: allowlist secret mappings: { databaseMappings: { '1': 'warehouse' }, syncEnabled: { '1': true }, @@ -786,7 +785,7 @@ describe('setup sources step', () => { driver: 'looker', base_url: 'https://looker.example.com', client_id: 'client-id', - client_secret_ref: 'env:LOOKER_CLIENT_SECRET', + client_secret_ref: 'env:LOOKER_CLIENT_SECRET', // pragma: allowlist secret mappings: { connectionMappings: { warehouse: 'warehouse' } }, }, deps: { @@ -1032,7 +1031,7 @@ describe('setup sources step', () => { expect(testPrompts.multiselect).not.toHaveBeenCalled(); expect(io.stdout()).toContain('Connect a primary source before adding context sources.'); - expect((await readConfig()).setup?.completed_steps ?? []).not.toContain('sources'); + expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); }); it('auto-detects dbt_project.yml at the root of a local path', async () => { diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index 6674ef75..edf83b7b 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -25,7 +25,6 @@ import { loadKtxProject, markKtxSetupStateStepComplete, serializeKtxProjectConfig, - stripKtxSetupCompletedSteps, } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import { runKtxConnectionMapping } from './commands/connection-mapping.js'; @@ -345,7 +344,7 @@ function fileRepoUrl(sourceDir: string): string { async function writeProjectConfig(projectDir: string, config: KtxProjectConfig): Promise { const project = await loadKtxProject({ projectDir }); - await writeFile(project.configPath, serializeKtxProjectConfig(stripKtxSetupCompletedSteps(config)), 'utf-8'); + await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); } async function writeSourceConnection( @@ -372,7 +371,7 @@ async function writeSourceConnection( : [...project.config.ingest.adapters, adapter], }, }; - await writeFile(project.configPath, serializeKtxProjectConfig(stripKtxSetupCompletedSteps(config)), 'utf-8'); + await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); return async () => { const latest = await loadKtxProject({ projectDir }); const connections = { ...latest.config.connections }; @@ -411,7 +410,7 @@ async function ensureSourceAdapterEnabled(projectDir: string, source: KtxSetupSo async function markSourcesComplete(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); - await writeFile(project.configPath, serializeKtxProjectConfig(stripKtxSetupCompletedSteps(project.config)), 'utf-8'); + await writeFile(project.configPath, serializeKtxProjectConfig(project.config), 'utf-8'); await markKtxSetupStateStepComplete(projectDir, 'sources'); } diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index bf9c381f..0cad3ebc 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -3,6 +3,7 @@ import { mkdir, mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises' import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { promisify } from 'node:util'; +import { writeKtxSetupState } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { localFakeBundleReport, persistLocalBundleReport } from './ingest.test-utils.js'; @@ -133,9 +134,6 @@ describe('setup status', () => { ' database_connection_ids:', ' - warehouse', ' - analytics', - ' completed_steps:', - ' - project', - ' - databases', 'connections:', ' warehouse:', ' driver: postgres', @@ -150,6 +148,7 @@ describe('setup status', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'databases'] }); await expect(readKtxSetupStatus(tempDir)).resolves.toMatchObject({ databases: [ @@ -167,8 +166,6 @@ describe('setup status', () => { 'setup:', ' database_connection_ids:', ' - warehouse', - ' completed_steps:', - ' - project', 'connections:', ' warehouse:', ' driver: postgres', @@ -178,6 +175,7 @@ describe('setup status', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['project'] }); await expect(readKtxSetupStatus(tempDir)).resolves.toMatchObject({ databases: [{ connectionId: 'warehouse', ready: false }], @@ -190,9 +188,6 @@ describe('setup status', () => { 'setup:', ' database_connection_ids:', ' - warehouse', - ' completed_steps:', - ' - project', - ' - databases', 'connections:', ' warehouse:', ' driver: postgres', @@ -202,6 +197,7 @@ describe('setup status', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'databases'] }); await expect(readKtxSetupStatus(tempDir)).resolves.toMatchObject({ databases: [{ connectionId: 'warehouse', ready: true }], @@ -215,9 +211,6 @@ describe('setup status', () => { 'project: revenue', 'setup:', ' database_connection_ids: []', - ' completed_steps:', - ' - project', - ' - sources', 'connections:', ' docs:', ' driver: notion', @@ -230,6 +223,7 @@ describe('setup status', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'sources'] }); await expect(readKtxSetupStatus(tempDir)).resolves.toMatchObject({ sources: [{ connectionId: 'docs', type: 'notion', ready: true }], @@ -268,12 +262,6 @@ describe('setup status', () => { 'setup:', ' database_connection_ids:', ' - warehouse', - ' completed_steps:', - ' - project', - ' - llm', - ' - embeddings', - ' - databases', - ' - sources', 'connections:', ' warehouse:', ' driver: postgres', @@ -292,6 +280,9 @@ describe('setup status', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { + completed_steps: ['project', 'llm', 'embeddings', 'databases', 'sources'], + }); await writeKtxSetupContextState(tempDir, { runId: 'setup-context-local-abc123', status: 'running', @@ -324,10 +315,6 @@ describe('setup status', () => { 'setup:', ' database_connection_ids:', ' - warehouse', - ' completed_steps:', - ' - project', - ' - databases', - ' - sources', 'connections:', ' warehouse:', ' driver: postgres', @@ -354,6 +341,7 @@ describe('setup status', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'databases', 'sources'] }); await persistLocalBundleReport( tempDir, localFakeBundleReport('metabase-job-1', { @@ -1281,9 +1269,6 @@ describe('setup status', () => { 'setup:', ' database_connection_ids:', ' - warehouse', - ' completed_steps:', - ' - project', - ' - databases', 'connections:', ' warehouse:', ' driver: postgres', @@ -1296,6 +1281,7 @@ describe('setup status', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'databases'] }); await expect( runKtxSetup( @@ -1782,13 +1768,6 @@ describe('setup status', () => { [ 'project: revenue', 'setup:', - ' completed_steps:', - ' - project', - ' - llm', - ' - embeddings', - ' - sources', - ' - context', - ' - agents', ' database_connection_ids: []', 'connections: {}', 'llm:', @@ -1805,6 +1784,9 @@ describe('setup status', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { + completed_steps: ['project', 'llm', 'embeddings', 'sources', 'context', 'agents'], + }); await writeFile( join(tempDir, '.ktx/agents/install-manifest.json'), JSON.stringify( @@ -1893,12 +1875,6 @@ describe('setup status', () => { [ 'project: revenue', 'setup:', - ' completed_steps:', - ' - project', - ' - llm', - ' - embeddings', - ' - sources', - ' - context', ' database_connection_ids: []', 'connections: {}', 'llm:', @@ -1915,6 +1891,9 @@ describe('setup status', () => { ].join('\n'), 'utf-8', ); + await writeKtxSetupState(tempDir, { + completed_steps: ['project', 'llm', 'embeddings', 'sources', 'context'], + }); await writeKtxSetupContextState(tempDir, { runId: 'setup-context-local-ready', status: 'completed', diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index dec0f4d7..064da729 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -4,7 +4,6 @@ import { cancel, isCancel, select } from '@clack/prompts'; import { getLatestLocalIngestStatus, savedMemoryCountsForReport } from '@ktx/context/ingest'; import { ktxLocalStateDbPath, - ktxSetupCompletedSteps, loadKtxProject, readKtxSetupState, type KtxLocalProject, @@ -297,7 +296,7 @@ export async function readKtxSetupStatus(projectDir: string): Promise { ).rejects.toThrow(/mapping.*does not point to connection/); }); - it('throws when the matching mapping has a null metabaseDatabaseName (unhydrated)', async () => { + it('hydrates missing mapping metadata from Metabase instead of requiring a prior refresh', async () => { sourceStateReader.getSourceState.mockResolvedValue({ syncMode: 'ALL', selections: [], @@ -268,15 +274,22 @@ describe('fetchMetabaseBundle', () => { ], defaultTagNames: [], }); - await expect( - fetchMetabaseBundle({ - pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, - stagedDir, - ctx: makeFetchContext(), - clientFactory, - sourceStateReader, - }), - ).rejects.toThrow(/unhydrated.*ktx connection mapping refresh/); + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }); + + expect(clientFactory.__client.getDatabase).toHaveBeenCalledWith(42); + const databaseFile = JSON.parse(await readFile(join(stagedDir, 'databases/42.json'), 'utf-8')); + expect(databaseFile).toMatchObject({ + metabaseDatabaseId: 42, + metabaseDatabaseName: 'Analytics', + metabaseEngine: 'postgres', + targetConnectionId, + }); }); it('skips cards whose getResolvedSql returns null and records them in unresolved-cards.json', async () => { diff --git a/packages/context/src/ingest/adapters/metabase/fetch.ts b/packages/context/src/ingest/adapters/metabase/fetch.ts index 9ccb2be6..d4e8b59b 100644 --- a/packages/context/src/ingest/adapters/metabase/fetch.ts +++ b/packages/context/src/ingest/adapters/metabase/fetch.ts @@ -97,15 +97,16 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr `mapping for database ${pullConfig.metabaseDatabaseId} does not point to connection ${params.ctx.connectionId} (points to ${mapping.targetConnectionId})`, ); } - if (mapping.metabaseDatabaseName === null) { - throw new IngestInputError( - `mapping for database ${pullConfig.metabaseDatabaseId} on Metabase connection ${pullConfig.metabaseConnectionId} is unhydrated; run \`ktx connection mapping refresh ${pullConfig.metabaseConnectionId}\` to populate metabaseDatabaseName before ingest.`, - ); - } - const mappingDatabaseName: string = mapping.metabaseDatabaseName; const client = await params.clientFactory.createClient(pullConfig, params.ctx); try { + let mappingDatabaseName = mapping.metabaseDatabaseName; + let mappingEngine = mapping.metabaseEngine; + if (mappingDatabaseName === null) { + const database = await client.getDatabase(pullConfig.metabaseDatabaseId); + mappingDatabaseName = database.name; + mappingEngine = database.engine ?? null; + } const stagedForScope: StagedSyncConfig = { metabaseConnectionId: pullConfig.metabaseConnectionId, metabaseDatabaseId: pullConfig.metabaseDatabaseId, @@ -118,7 +119,7 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr mapping: { metabaseDatabaseId: mapping.metabaseDatabaseId, metabaseDatabaseName: mappingDatabaseName, - metabaseEngine: mapping.metabaseEngine, + metabaseEngine: mappingEngine, targetConnectionId: mapping.targetConnectionId, }, }; @@ -233,7 +234,7 @@ export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Pr const databaseFile: StagedDatabaseFile = { metabaseDatabaseId: mapping.metabaseDatabaseId, metabaseDatabaseName: mappingDatabaseName, - metabaseEngine: mapping.metabaseEngine, + metabaseEngine: mappingEngine, targetConnectionId: mapping.targetConnectionId, }; await writeFile( diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts index ec5e163e..a7ffc5de 100644 --- a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts @@ -12,7 +12,7 @@ import { type MetabaseClientRuntimeConfig, } from './client-port.js'; import type { MetabaseFetchLogger } from './fetch.js'; -import { LocalMetabaseSourceStateReader } from './local-source-state-store.js'; +import { KtxYamlMetabaseSourceStateReader, LocalMetabaseDiscoveryCache } from './local-source-state-store.js'; import { MetabaseSourceAdapter } from './metabase.adapter.js'; function stringField(value: unknown): string | null { @@ -62,7 +62,8 @@ export function createLocalMetabaseSourceAdapter( project: KtxLocalProject, options: CreateLocalMetabaseSourceAdapterOptions = {}, ): MetabaseSourceAdapter { - const sourceStateReader = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(project) }); + const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(project) }); + const sourceStateReader = new KtxYamlMetabaseSourceStateReader(project, { discoveryCache }); const connectionFactory = new DefaultMetabaseConnectionClientFactory( (metabaseConnectionId) => metabaseRuntimeConfigFromLocalConnection( diff --git a/packages/context/src/ingest/adapters/metabase/local-source-state-store.test.ts b/packages/context/src/ingest/adapters/metabase/local-source-state-store.test.ts index f5aef74c..1139ea4d 100644 --- a/packages/context/src/ingest/adapters/metabase/local-source-state-store.test.ts +++ b/packages/context/src/ingest/adapters/metabase/local-source-state-store.test.ts @@ -2,313 +2,112 @@ import { mkdtemp, rm } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; -import { LocalMetabaseSourceStateReader } from './local-source-state-store.js'; +import { buildDefaultKtxProjectConfig } from '../../../project/index.js'; +import { KtxYamlMetabaseSourceStateReader, LocalMetabaseDiscoveryCache } from './local-source-state-store.js'; -describe('LocalMetabaseSourceStateReader', () => { +describe('Metabase YAML source state and discovery cache', () => { let tempDir: string; - let store: LocalMetabaseSourceStateReader; + let discoveryCache: LocalMetabaseDiscoveryCache; beforeEach(async () => { - tempDir = await mkdtemp(join(tmpdir(), 'ktx-metabase-local-state-')); - store = new LocalMetabaseSourceStateReader({ dbPath: join(tempDir, '.ktx', 'db.sqlite') }); + tempDir = await mkdtemp(join(tmpdir(), 'ktx-metabase-cache-')); + discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: join(tempDir, '.ktx', 'db.sqlite') }); }); afterEach(async () => { await rm(tempDir, { recursive: true, force: true }); }); - it('round-trips hydrated source state through SQLite', async () => { - await store.replaceSourceState({ - connectionId: 'prod-metabase', + function projectWithMetabaseMappings(mappings: Record) { + return { + config: { + ...buildDefaultKtxProjectConfig('metabase-cache-test'), + connections: { + 'prod-metabase': { + driver: 'metabase', + mappings, + }, + }, + }, + }; + } + + it('reads Metabase mapping intent from ktx.yaml config', async () => { + const reader = new KtxYamlMetabaseSourceStateReader( + projectWithMetabaseMappings({ + databaseMappings: { '2': 'warehouse' }, + syncEnabled: { '2': true }, + syncMode: 'ONLY', + selections: { collections: [12], items: [99] }, + defaultTagNames: ['analytics'], + }), + { discoveryCache }, + ); + + await expect(reader.getSourceState('prod-metabase')).resolves.toEqual({ syncMode: 'ONLY', - defaultTagNames: ['analytics', 'curated'], + defaultTagNames: ['analytics'], selections: [ - { selectionType: 'collection', metabaseObjectId: 10 }, + { selectionType: 'collection', metabaseObjectId: 12 }, { selectionType: 'item', metabaseObjectId: 99 }, ], mappings: [ { - metabaseDatabaseId: 1, - metabaseDatabaseName: 'Analytics', - metabaseEngine: 'postgres', - metabaseHost: 'warehouse.internal', - metabaseDbName: 'analytics', - targetConnectionId: 'warehouse', - syncEnabled: true, - source: 'cli', - }, - ], - }); - - await expect(store.getSourceState('prod-metabase')).resolves.toEqual({ - syncMode: 'ONLY', - defaultTagNames: ['analytics', 'curated'], - selections: [ - { selectionType: 'collection', metabaseObjectId: 10 }, - { selectionType: 'item', metabaseObjectId: 99 }, - ], - mappings: [ - { - metabaseDatabaseId: 1, - metabaseDatabaseName: 'Analytics', - metabaseEngine: 'postgres', - targetConnectionId: 'warehouse', - syncEnabled: true, - }, - ], - }); - }); - - it('excludes unhydrated mappings from getSourceState and exposes them through the side accessor', async () => { - await store.replaceSourceState({ - connectionId: 'prod-metabase', - syncMode: 'ALL', - defaultTagNames: [], - selections: [], - mappings: [ - { - metabaseDatabaseId: 1, + metabaseDatabaseId: 2, metabaseDatabaseName: null, metabaseEngine: null, metabaseHost: null, metabaseDbName: null, targetConnectionId: 'warehouse', syncEnabled: true, - source: 'ktx.yaml', - }, - { - metabaseDatabaseId: 2, - metabaseDatabaseName: 'Sandbox', - metabaseEngine: 'postgres', - metabaseHost: 'warehouse.internal', - metabaseDbName: 'sandbox', - targetConnectionId: 'warehouse', - syncEnabled: true, - source: 'refresh', }, ], }); - - const state = await store.getSourceState('prod-metabase'); - expect(state.mappings.map((mapping) => mapping.metabaseDatabaseId)).toEqual([2]); - await expect(store.getUnhydratedSyncEnabledMappingIds('prod-metabase')).resolves.toEqual([1]); }); - it('defaults missing sync config to ALL with no tags or selections', async () => { - await store.replaceSourceState({ + it('enriches YAML mapping rows with recreatable discovery metadata', async () => { + await discoveryCache.refreshDiscoveredDatabases({ connectionId: 'prod-metabase', - mappings: [ - { - metabaseDatabaseId: 3, - metabaseDatabaseName: 'Warehouse', - metabaseEngine: 'postgres', - metabaseHost: null, - metabaseDbName: null, - targetConnectionId: null, - syncEnabled: false, - source: 'refresh', - }, - ], + discovered: [{ id: 2, name: 'Analytics', engine: 'postgres', host: 'pg.internal', dbName: 'analytics' }], }); + const reader = new KtxYamlMetabaseSourceStateReader( + projectWithMetabaseMappings({ + databaseMappings: { '2': 'warehouse' }, + syncEnabled: { '2': true }, + }), + { discoveryCache }, + ); - await expect(store.getSourceState('prod-metabase')).resolves.toMatchObject({ - syncMode: 'ALL', - defaultTagNames: [], - selections: [], - }); - }); - - it('supports command-sized mapping writes and reads', async () => { - await store.upsertDatabaseMapping({ - connectionId: 'prod-metabase', - metabaseDatabaseId: 1, - targetConnectionId: 'prod-warehouse', - syncEnabled: true, - source: 'cli', - }); - await store.setSyncState({ - connectionId: 'prod-metabase', - syncMode: 'ONLY', - defaultTagNames: ['analytics'], - selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], - }); - - await expect(store.listDatabaseMappings('prod-metabase')).resolves.toEqual([ + await expect(reader.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([ { - metabaseDatabaseId: 1, - metabaseDatabaseName: null, - metabaseEngine: null, - metabaseHost: null, - metabaseDbName: null, - targetConnectionId: 'prod-warehouse', - syncEnabled: true, - source: 'cli', - }, - ]); - await expect(store.getUnhydratedSyncEnabledMappingIds('prod-metabase')).resolves.toEqual([1]); - await expect(store.getSourceState('prod-metabase')).resolves.toMatchObject({ - syncMode: 'ONLY', - defaultTagNames: ['analytics'], - selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], - mappings: [], - }); - }); - - it('refreshes discovered database metadata while preserving user mapping intent', async () => { - await store.upsertDatabaseMapping({ - connectionId: 'prod-metabase', - metabaseDatabaseId: 1, - targetConnectionId: 'prod-warehouse', - syncEnabled: true, - source: 'cli', - }); - - await store.refreshDiscoveredDatabases({ - connectionId: 'prod-metabase', - discovered: [ - { id: 1, name: 'Analytics', engine: 'postgres', host: 'pg.internal', dbName: 'analytics' }, - { id: 2, name: 'Sandbox', engine: 'postgres', host: 'pg.internal', dbName: 'sandbox' }, - ], - }); - - await expect(store.listDatabaseMappings('prod-metabase')).resolves.toEqual([ - { - metabaseDatabaseId: 1, + metabaseDatabaseId: 2, metabaseDatabaseName: 'Analytics', metabaseEngine: 'postgres', metabaseHost: 'pg.internal', metabaseDbName: 'analytics', - targetConnectionId: 'prod-warehouse', + targetConnectionId: 'warehouse', syncEnabled: true, - source: 'cli', + source: 'ktx.yaml', }, + ]); + }); + + it('lists discovered-only rows as refresh cache data without turning them into config state', async () => { + await discoveryCache.refreshDiscoveredDatabases({ + connectionId: 'prod-metabase', + discovered: [{ id: 7, name: 'Unmapped', engine: 'mysql', host: 'mysql.internal', dbName: 'sales' }], + }); + const reader = new KtxYamlMetabaseSourceStateReader(projectWithMetabaseMappings({}), { discoveryCache }); + + await expect(reader.getSourceState('prod-metabase')).resolves.toMatchObject({ mappings: [] }); + await expect(reader.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([ { - metabaseDatabaseId: 2, - metabaseDatabaseName: 'Sandbox', - metabaseEngine: 'postgres', - metabaseHost: 'pg.internal', - metabaseDbName: 'sandbox', + metabaseDatabaseId: 7, + metabaseDatabaseName: 'Unmapped', targetConnectionId: null, syncEnabled: false, source: 'refresh', }, ]); }); - - it('updates sync-enabled, clears scoped rows, and applies bulk state in one call', async () => { - await store.replaceSourceState({ - connectionId: 'prod-metabase', - mappings: [ - { - metabaseDatabaseId: 1, - metabaseDatabaseName: 'Analytics', - metabaseEngine: 'postgres', - metabaseHost: 'pg.internal', - metabaseDbName: 'analytics', - targetConnectionId: 'prod-warehouse', - syncEnabled: true, - source: 'refresh', - }, - { - metabaseDatabaseId: 2, - metabaseDatabaseName: 'Sandbox', - metabaseEngine: 'postgres', - metabaseHost: 'pg.internal', - metabaseDbName: 'sandbox', - targetConnectionId: 'staging-warehouse', - syncEnabled: true, - source: 'refresh', - }, - ], - }); - - await store.setMappingSyncEnabled({ - connectionId: 'prod-metabase', - metabaseDatabaseId: 2, - syncEnabled: false, - }); - await store.clearDatabaseMappings({ connectionId: 'prod-metabase', metabaseDatabaseId: 1 }); - - await expect(store.listDatabaseMappings('prod-metabase')).resolves.toEqual([ - { - metabaseDatabaseId: 2, - metabaseDatabaseName: 'Sandbox', - metabaseEngine: 'postgres', - metabaseHost: 'pg.internal', - metabaseDbName: 'sandbox', - targetConnectionId: 'staging-warehouse', - syncEnabled: false, - source: 'refresh', - }, - ]); - }); - - it('seeds unhydrated yaml intent without exposing it through getSourceState', async () => { - await store.applyYamlBootstrap({ - connectionId: 'prod-metabase', - syncMode: 'ALL', - defaultTagNames: ['ktx'], - selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], - mappings: [{ metabaseDatabaseId: 1, targetConnectionId: 'prod-warehouse', syncEnabled: true }], - }); - - await expect(store.getUnhydratedSyncEnabledMappingIds('prod-metabase')).resolves.toEqual([1]); - await expect(store.getSourceState('prod-metabase')).resolves.toMatchObject({ - syncMode: 'ALL', - defaultTagNames: ['ktx'], - selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], - mappings: [], - }); - await expect(store.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([ - { - metabaseDatabaseId: 1, - metabaseDatabaseName: null, - targetConnectionId: 'prod-warehouse', - syncEnabled: true, - source: 'ktx.yaml', - }, - ]); - }); - - it('applies yaml target intent onto refresh metadata but does not overwrite cli rows', async () => { - await store.refreshDiscoveredDatabases({ - connectionId: 'prod-metabase', - discovered: [{ id: 1, name: 'Analytics', engine: 'postgres', host: 'db.test', dbName: 'analytics' }], - }); - await store.upsertDatabaseMapping({ - connectionId: 'prod-metabase', - metabaseDatabaseId: 2, - targetConnectionId: 'cli-warehouse', - syncEnabled: true, - source: 'cli', - }); - - await store.applyYamlBootstrap({ - connectionId: 'prod-metabase', - syncMode: 'EXCEPT', - defaultTagNames: [], - selections: [{ selectionType: 'item', metabaseObjectId: 99 }], - mappings: [ - { metabaseDatabaseId: 1, targetConnectionId: 'yaml-warehouse', syncEnabled: true }, - { metabaseDatabaseId: 2, targetConnectionId: 'yaml-warehouse', syncEnabled: false }, - ], - }); - - await expect(store.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([ - { - metabaseDatabaseId: 1, - metabaseDatabaseName: 'Analytics', - metabaseEngine: 'postgres', - targetConnectionId: 'yaml-warehouse', - syncEnabled: true, - source: 'ktx.yaml', - }, - { - metabaseDatabaseId: 2, - targetConnectionId: 'cli-warehouse', - syncEnabled: true, - source: 'cli', - }, - ]); - }); }); diff --git a/packages/context/src/ingest/adapters/metabase/local-source-state-store.ts b/packages/context/src/ingest/adapters/metabase/local-source-state-store.ts index 246d5f33..f8026db6 100644 --- a/packages/context/src/ingest/adapters/metabase/local-source-state-store.ts +++ b/packages/context/src/ingest/adapters/metabase/local-source-state-store.ts @@ -1,17 +1,31 @@ import { mkdirSync } from 'node:fs'; import { dirname } from 'node:path'; import Database from 'better-sqlite3'; +import { + parseMetabaseMappingBootstrap, + type KtxLocalProject, + type MetabaseMappingBootstrap, +} from '../../../project/index.js'; +import type { DiscoveredMetabaseDatabase } from './mapping.js'; import type { MetabaseSourceState, MetabaseSourceStateReader, MetabaseSourceStateSelection } from './source-state-port.js'; -import type { MetabaseSyncMode } from './types.js'; -export type LocalMetabaseMappingSource = 'ktx.yaml' | 'cli' | 'refresh'; +export type LocalMetabaseMappingSource = 'ktx.yaml' | 'refresh'; -interface LocalMetabaseSourceStateStoreOptions { +interface LocalMetabaseDiscoveryCacheOptions { dbPath: string; now?: () => Date; } -export interface LocalMetabaseSourceStateMappingInput { +export interface RefreshLocalMetabaseDiscoveredDatabasesInput { + connectionId: string; + discovered: DiscoveredMetabaseDatabase[]; +} + +export interface LocalMetabaseDiscoveredDatabaseRow extends DiscoveredMetabaseDatabase { + updatedAt: string; +} + +export interface LocalMetabaseMappingListRow { metabaseDatabaseId: number; metabaseDatabaseName: string | null; metabaseEngine: string | null; @@ -22,443 +36,86 @@ export interface LocalMetabaseSourceStateMappingInput { source: LocalMetabaseMappingSource; } -export interface ReplaceLocalMetabaseSourceStateInput { - connectionId: string; - syncMode?: MetabaseSyncMode; - defaultTagNames?: string[]; - selections?: MetabaseSourceStateSelection[]; - mappings: LocalMetabaseSourceStateMappingInput[]; -} - -interface ApplyLocalMetabaseYamlBootstrapInput { - connectionId: string; - syncMode: MetabaseSyncMode; - defaultTagNames: string[]; - selections: MetabaseSourceStateSelection[]; - mappings: Array<{ - metabaseDatabaseId: number; - targetConnectionId: string | null; - syncEnabled: boolean; - }>; -} - -export interface LocalMetabaseMappingListRow extends LocalMetabaseSourceStateMappingInput {} - -export interface UpsertLocalMetabaseDatabaseMappingInput { - connectionId: string; - metabaseDatabaseId: number; - targetConnectionId: string | null; - syncEnabled: boolean; - source: LocalMetabaseMappingSource; -} - -export interface SetLocalMetabaseMappingSyncEnabledInput { - connectionId: string; - metabaseDatabaseId: number; - syncEnabled: boolean; -} - -export interface SetLocalMetabaseSyncStateInput { - connectionId: string; - syncMode: MetabaseSyncMode; - defaultTagNames: string[]; - selections: MetabaseSourceStateSelection[]; -} - -export interface RefreshLocalMetabaseDiscoveredDatabasesInput { - connectionId: string; - discovered: Array<{ - id: number; - name: string; - engine: string; - host: string | null; - dbName: string | null; - }>; -} - -export interface ClearLocalMetabaseMappingsInput { - connectionId: string; - metabaseDatabaseId?: number; -} - -interface SelectionRow { - selection_type: 'collection' | 'item'; - metabase_object_id: number; -} - -interface MappingRow { +interface DiscoveryRow { metabase_database_id: number; - metabase_database_name: string | null; - metabase_engine: string | null; - target_connection_id: string | null; - sync_enabled: number; + metabase_database_name: string; + metabase_engine: string; + metabase_host: string | null; + metabase_db_name: string | null; + updated_at: string; } -interface SyncConfigRow { - sync_mode: MetabaseSyncMode; - default_tag_names_json: string; +function selectionState(bootstrap: MetabaseMappingBootstrap): MetabaseSourceStateSelection[] { + return [ + ...bootstrap.selections.collections.map((id) => ({ selectionType: 'collection' as const, metabaseObjectId: id })), + ...bootstrap.selections.items.map((id) => ({ selectionType: 'item' as const, metabaseObjectId: id })), + ]; } -function parseDefaultTagNames(raw: string): string[] { - const parsed = JSON.parse(raw); - return Array.isArray(parsed) ? parsed.filter((value): value is string => typeof value === 'string') : []; +function configuredMappingIds(bootstrap: MetabaseMappingBootstrap): number[] { + return [...new Set([...Object.keys(bootstrap.databaseMappings), ...Object.keys(bootstrap.syncEnabled)].map(Number))].sort( + (left, right) => left - right, + ); } -export class LocalMetabaseSourceStateReader implements MetabaseSourceStateReader { +function discoveredRowToDatabase(row: DiscoveryRow): LocalMetabaseDiscoveredDatabaseRow { + return { + id: row.metabase_database_id, + name: row.metabase_database_name, + engine: row.metabase_engine, + host: row.metabase_host, + dbName: row.metabase_db_name, + updatedAt: row.updated_at, + }; +} + +function emptyMetabaseSourceState(): MetabaseSourceState { + return { + syncMode: 'ALL', + selections: [], + defaultTagNames: [], + mappings: [], + }; +} + +export class LocalMetabaseDiscoveryCache { private readonly db: Database.Database; private readonly now: () => Date; - constructor(options: LocalMetabaseSourceStateStoreOptions) { + constructor(options: LocalMetabaseDiscoveryCacheOptions) { mkdirSync(dirname(options.dbPath), { recursive: true }); this.db = new Database(options.dbPath); this.db.pragma('journal_mode = WAL'); this.db.pragma('foreign_keys = ON'); this.now = options.now ?? (() => new Date()); this.db.exec(` - CREATE TABLE IF NOT EXISTS local_metabase_sync_config ( - metabase_connection_id TEXT PRIMARY KEY, - sync_mode TEXT NOT NULL, - default_tag_names_json TEXT NOT NULL, - updated_at TEXT NOT NULL - ); - - CREATE TABLE IF NOT EXISTS local_metabase_selections ( - metabase_connection_id TEXT NOT NULL, - selection_type TEXT NOT NULL, - metabase_object_id INTEGER NOT NULL, - PRIMARY KEY (metabase_connection_id, selection_type, metabase_object_id) - ); - - CREATE TABLE IF NOT EXISTS local_metabase_database_mappings ( + CREATE TABLE IF NOT EXISTS local_metabase_discovered_databases ( metabase_connection_id TEXT NOT NULL, metabase_database_id INTEGER NOT NULL, - metabase_database_name TEXT, - metabase_engine TEXT, + metabase_database_name TEXT NOT NULL, + metabase_engine TEXT NOT NULL, metabase_host TEXT, metabase_db_name TEXT, - target_connection_id TEXT, - sync_enabled INTEGER NOT NULL DEFAULT 0, - source TEXT NOT NULL, updated_at TEXT NOT NULL, PRIMARY KEY (metabase_connection_id, metabase_database_id) ); `); } - async applyYamlBootstrap(input: ApplyLocalMetabaseYamlBootstrapInput): Promise { - const timestamp = this.now().toISOString(); - const apply = this.db.transaction(() => { - const syncConfigExists = this.db - .prepare('SELECT 1 FROM local_metabase_sync_config WHERE metabase_connection_id = ?') - .get(input.connectionId); - if (!syncConfigExists) { - this.db - .prepare( - ` - INSERT INTO local_metabase_sync_config ( - metabase_connection_id, - sync_mode, - default_tag_names_json, - updated_at - ) - VALUES (?, ?, ?, ?) - `, - ) - .run(input.connectionId, input.syncMode, JSON.stringify(input.defaultTagNames), timestamp); - - const insertSelection = this.db.prepare(` - INSERT INTO local_metabase_selections ( - metabase_connection_id, - selection_type, - metabase_object_id - ) - VALUES (?, ?, ?) - `); - for (const selection of input.selections) { - insertSelection.run(input.connectionId, selection.selectionType, selection.metabaseObjectId); - } - } - - const existing = this.db.prepare(` - SELECT target_connection_id, source - FROM local_metabase_database_mappings - WHERE metabase_connection_id = ? AND metabase_database_id = ? - `); - const insert = this.db.prepare(` - INSERT INTO local_metabase_database_mappings ( - metabase_connection_id, - metabase_database_id, - metabase_database_name, - metabase_engine, - metabase_host, - metabase_db_name, - target_connection_id, - sync_enabled, - source, - updated_at - ) - VALUES (?, ?, NULL, NULL, NULL, NULL, ?, ?, 'ktx.yaml', ?) - `); - const updateRefreshRow = this.db.prepare(` - UPDATE local_metabase_database_mappings - SET target_connection_id = ?, - sync_enabled = ?, - source = 'ktx.yaml', - updated_at = ? - WHERE metabase_connection_id = ? - AND metabase_database_id = ? - AND source = 'refresh' - AND target_connection_id IS NULL - `); - - for (const mapping of input.mappings) { - const row = existing.get(input.connectionId, mapping.metabaseDatabaseId) as - | { target_connection_id: string | null; source: LocalMetabaseMappingSource } - | undefined; - if (!row) { - insert.run( - input.connectionId, - mapping.metabaseDatabaseId, - mapping.targetConnectionId, - mapping.syncEnabled ? 1 : 0, - timestamp, - ); - continue; - } - if (row.source === 'refresh' && row.target_connection_id === null) { - updateRefreshRow.run( - mapping.targetConnectionId, - mapping.syncEnabled ? 1 : 0, - timestamp, - input.connectionId, - mapping.metabaseDatabaseId, - ); - } - } - }); - - apply(); - } - - async replaceSourceState(input: ReplaceLocalMetabaseSourceStateInput): Promise { - const timestamp = this.now().toISOString(); - const syncMode = input.syncMode ?? 'ALL'; - const selections = input.selections ?? []; - const defaultTagNames = input.defaultTagNames ?? []; - - const replace = this.db.transaction(() => { - this.db - .prepare( - ` - INSERT INTO local_metabase_sync_config ( - metabase_connection_id, - sync_mode, - default_tag_names_json, - updated_at - ) - VALUES (?, ?, ?, ?) - ON CONFLICT(metabase_connection_id) DO UPDATE SET - sync_mode = excluded.sync_mode, - default_tag_names_json = excluded.default_tag_names_json, - updated_at = excluded.updated_at - `, - ) - .run(input.connectionId, syncMode, JSON.stringify(defaultTagNames), timestamp); - - this.db.prepare('DELETE FROM local_metabase_selections WHERE metabase_connection_id = ?').run(input.connectionId); - const insertSelection = this.db.prepare(` - INSERT INTO local_metabase_selections ( - metabase_connection_id, - selection_type, - metabase_object_id - ) - VALUES (?, ?, ?) - `); - for (const selection of selections) { - insertSelection.run(input.connectionId, selection.selectionType, selection.metabaseObjectId); - } - - this.db - .prepare('DELETE FROM local_metabase_database_mappings WHERE metabase_connection_id = ?') - .run(input.connectionId); - const insertMapping = this.db.prepare(` - INSERT INTO local_metabase_database_mappings ( - metabase_connection_id, - metabase_database_id, - metabase_database_name, - metabase_engine, - metabase_host, - metabase_db_name, - target_connection_id, - sync_enabled, - source, - updated_at - ) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - `); - for (const mapping of input.mappings) { - insertMapping.run( - input.connectionId, - mapping.metabaseDatabaseId, - mapping.metabaseDatabaseName, - mapping.metabaseEngine, - mapping.metabaseHost, - mapping.metabaseDbName, - mapping.targetConnectionId, - mapping.syncEnabled ? 1 : 0, - mapping.source, - timestamp, - ); - } - }); - - replace(); - } - - async listDatabaseMappings(connectionId: string): Promise { - const rows = this.db - .prepare( - ` - SELECT - metabase_database_id, - metabase_database_name, - metabase_engine, - metabase_host, - metabase_db_name, - target_connection_id, - sync_enabled, - source - FROM local_metabase_database_mappings - WHERE metabase_connection_id = ? - ORDER BY metabase_database_id - `, - ) - .all(connectionId) as Array<{ - metabase_database_id: number; - metabase_database_name: string | null; - metabase_engine: string | null; - metabase_host: string | null; - metabase_db_name: string | null; - target_connection_id: string | null; - sync_enabled: number; - source: LocalMetabaseMappingSource; - }>; - - return rows.map((row) => ({ - metabaseDatabaseId: row.metabase_database_id, - metabaseDatabaseName: row.metabase_database_name, - metabaseEngine: row.metabase_engine, - metabaseHost: row.metabase_host, - metabaseDbName: row.metabase_db_name, - targetConnectionId: row.target_connection_id, - syncEnabled: row.sync_enabled === 1, - source: row.source, - })); - } - - async upsertDatabaseMapping(input: UpsertLocalMetabaseDatabaseMappingInput): Promise { - const timestamp = this.now().toISOString(); - this.db - .prepare( - ` - INSERT INTO local_metabase_database_mappings ( - metabase_connection_id, - metabase_database_id, - metabase_database_name, - metabase_engine, - metabase_host, - metabase_db_name, - target_connection_id, - sync_enabled, - source, - updated_at - ) - VALUES (?, ?, NULL, NULL, NULL, NULL, ?, ?, ?, ?) - ON CONFLICT(metabase_connection_id, metabase_database_id) DO UPDATE SET - target_connection_id = excluded.target_connection_id, - sync_enabled = excluded.sync_enabled, - source = excluded.source, - updated_at = excluded.updated_at - `, - ) - .run( - input.connectionId, - input.metabaseDatabaseId, - input.targetConnectionId, - input.syncEnabled ? 1 : 0, - input.source, - timestamp, - ); - } - - async setMappingSyncEnabled(input: SetLocalMetabaseMappingSyncEnabledInput): Promise { - const timestamp = this.now().toISOString(); - this.db - .prepare( - ` - UPDATE local_metabase_database_mappings - SET sync_enabled = ?, updated_at = ? - WHERE metabase_connection_id = ? AND metabase_database_id = ? - `, - ) - .run(input.syncEnabled ? 1 : 0, timestamp, input.connectionId, input.metabaseDatabaseId); - } - - async setSyncState(input: SetLocalMetabaseSyncStateInput): Promise { - const timestamp = this.now().toISOString(); - const write = this.db.transaction(() => { - this.db - .prepare( - ` - INSERT INTO local_metabase_sync_config ( - metabase_connection_id, - sync_mode, - default_tag_names_json, - updated_at - ) - VALUES (?, ?, ?, ?) - ON CONFLICT(metabase_connection_id) DO UPDATE SET - sync_mode = excluded.sync_mode, - default_tag_names_json = excluded.default_tag_names_json, - updated_at = excluded.updated_at - `, - ) - .run(input.connectionId, input.syncMode, JSON.stringify(input.defaultTagNames), timestamp); - - this.db.prepare('DELETE FROM local_metabase_selections WHERE metabase_connection_id = ?').run(input.connectionId); - const insertSelection = this.db.prepare(` - INSERT INTO local_metabase_selections ( - metabase_connection_id, - selection_type, - metabase_object_id - ) - VALUES (?, ?, ?) - `); - for (const selection of input.selections) { - insertSelection.run(input.connectionId, selection.selectionType, selection.metabaseObjectId); - } - }); - - write(); - } - async refreshDiscoveredDatabases(input: RefreshLocalMetabaseDiscoveredDatabasesInput): Promise { const timestamp = this.now().toISOString(); const refresh = this.db.transaction(() => { const upsert = this.db.prepare(` - INSERT INTO local_metabase_database_mappings ( + INSERT INTO local_metabase_discovered_databases ( metabase_connection_id, metabase_database_id, metabase_database_name, metabase_engine, metabase_host, metabase_db_name, - target_connection_id, - sync_enabled, - source, updated_at ) - VALUES (?, ?, ?, ?, ?, ?, NULL, 0, 'refresh', ?) + VALUES (?, ?, ?, ?, ?, ?, ?) ON CONFLICT(metabase_connection_id, metabase_database_id) DO UPDATE SET metabase_database_name = excluded.metabase_database_name, metabase_engine = excluded.metabase_engine, @@ -483,78 +140,116 @@ export class LocalMetabaseSourceStateReader implements MetabaseSourceStateReader refresh(); } - async clearDatabaseMappings(input: ClearLocalMetabaseMappingsInput): Promise { - if (input.metabaseDatabaseId === undefined) { - this.db.prepare('DELETE FROM local_metabase_database_mappings WHERE metabase_connection_id = ?').run(input.connectionId); - return; - } - this.db - .prepare('DELETE FROM local_metabase_database_mappings WHERE metabase_connection_id = ? AND metabase_database_id = ?') - .run(input.connectionId, input.metabaseDatabaseId); - } - - async getUnhydratedSyncEnabledMappingIds(connectionId: string): Promise { + async listDiscoveredDatabases(connectionId: string): Promise { const rows = this.db - .prepare( - ` - SELECT metabase_database_id - FROM local_metabase_database_mappings - WHERE metabase_connection_id = ? - AND sync_enabled = 1 - AND target_connection_id IS NOT NULL - AND metabase_database_name IS NULL - ORDER BY metabase_database_id - `, - ) - .all(connectionId) as Array<{ metabase_database_id: number }>; - return rows.map((row) => row.metabase_database_id); - } - - async getSourceState(connectionId: string): Promise { - const config = this.db - .prepare('SELECT sync_mode, default_tag_names_json FROM local_metabase_sync_config WHERE metabase_connection_id = ?') - .get(connectionId) as SyncConfigRow | undefined; - const selections = this.db - .prepare( - ` - SELECT selection_type, metabase_object_id - FROM local_metabase_selections - WHERE metabase_connection_id = ? - ORDER BY selection_type, metabase_object_id - `, - ) - .all(connectionId) as SelectionRow[]; - const mappings = this.db .prepare( ` SELECT metabase_database_id, metabase_database_name, metabase_engine, - target_connection_id, - sync_enabled - FROM local_metabase_database_mappings + metabase_host, + metabase_db_name, + updated_at + FROM local_metabase_discovered_databases WHERE metabase_connection_id = ? - AND metabase_database_name IS NOT NULL ORDER BY metabase_database_id `, ) - .all(connectionId) as MappingRow[]; + .all(connectionId) as DiscoveryRow[]; + return rows.map(discoveredRowToDatabase); + } - return { - syncMode: config?.sync_mode ?? 'ALL', - defaultTagNames: config ? parseDefaultTagNames(config.default_tag_names_json) : [], - selections: selections.map((selection) => ({ - selectionType: selection.selection_type, - metabaseObjectId: selection.metabase_object_id, - })), - mappings: mappings.map((mapping) => ({ - metabaseDatabaseId: mapping.metabase_database_id, - metabaseDatabaseName: mapping.metabase_database_name, - metabaseEngine: mapping.metabase_engine, - targetConnectionId: mapping.target_connection_id, - syncEnabled: mapping.sync_enabled === 1, - })), - }; + async getDiscoveredDatabase( + connectionId: string, + metabaseDatabaseId: number, + ): Promise { + const row = this.db + .prepare( + ` + SELECT + metabase_database_id, + metabase_database_name, + metabase_engine, + metabase_host, + metabase_db_name, + updated_at + FROM local_metabase_discovered_databases + WHERE metabase_connection_id = ? AND metabase_database_id = ? + `, + ) + .get(connectionId, metabaseDatabaseId) as DiscoveryRow | undefined; + return row ? discoveredRowToDatabase(row) : null; + } +} + +export class KtxYamlMetabaseSourceStateReader implements MetabaseSourceStateReader { + constructor( + private readonly project: Pick, + private readonly options: { discoveryCache?: LocalMetabaseDiscoveryCache } = {}, + ) {} + + async getSourceState(connectionId: string): Promise { + const connection = this.project.config.connections[connectionId]; + if (!connection || String(connection.driver ?? '').toLowerCase() !== 'metabase') { + return emptyMetabaseSourceState(); + } + + const bootstrap = parseMetabaseMappingBootstrap(connectionId, connection); + const discovered = new Map( + (await this.options.discoveryCache?.listDiscoveredDatabases(connectionId))?.map((database) => [database.id, database]) ?? + [], + ); + + return { + syncMode: bootstrap.syncMode, + selections: selectionState(bootstrap), + defaultTagNames: bootstrap.defaultTagNames, + mappings: configuredMappingIds(bootstrap).map((id) => { + const metadata = discovered.get(id); + return { + metabaseDatabaseId: id, + metabaseDatabaseName: metadata?.name ?? null, + metabaseEngine: metadata?.engine ?? null, + metabaseHost: metadata?.host ?? null, + metabaseDbName: metadata?.dbName ?? null, + targetConnectionId: bootstrap.databaseMappings[String(id)] ?? null, + syncEnabled: bootstrap.syncEnabled[String(id)] ?? false, + }; + }), + }; + } + + async listDatabaseMappings(connectionId: string): Promise { + const state = await this.getSourceState(connectionId); + const configuredRows: LocalMetabaseMappingListRow[] = state.mappings.map((mapping) => ({ + metabaseDatabaseId: mapping.metabaseDatabaseId, + metabaseDatabaseName: mapping.metabaseDatabaseName, + metabaseEngine: mapping.metabaseEngine, + metabaseHost: mapping.metabaseHost ?? null, + metabaseDbName: mapping.metabaseDbName ?? null, + targetConnectionId: mapping.targetConnectionId, + syncEnabled: mapping.syncEnabled, + source: 'ktx.yaml', + })); + + const configuredIds = new Set(configuredRows.map((row) => row.metabaseDatabaseId)); + const discoveredRows = + (await this.options.discoveryCache?.listDiscoveredDatabases(connectionId))?.filter( + (database) => !configuredIds.has(database.id), + ) ?? []; + return [ + ...configuredRows, + ...discoveredRows.map((database) => ({ + metabaseDatabaseId: database.id, + metabaseDatabaseName: database.name, + metabaseEngine: database.engine, + metabaseHost: database.host, + metabaseDbName: database.dbName, + targetConnectionId: null, + syncEnabled: false, + source: 'refresh' as const, + })), + ].sort((left, right) => left.metabaseDatabaseId - right.metabaseDatabaseId); } } diff --git a/packages/context/src/ingest/adapters/metabase/source-state-port.ts b/packages/context/src/ingest/adapters/metabase/source-state-port.ts index 7c872f8d..16de9369 100644 --- a/packages/context/src/ingest/adapters/metabase/source-state-port.ts +++ b/packages/context/src/ingest/adapters/metabase/source-state-port.ts @@ -9,6 +9,8 @@ export interface MetabaseSourceStateMapping { metabaseDatabaseId: number; metabaseDatabaseName: string | null; metabaseEngine: string | null; + metabaseHost?: string | null; + metabaseDbName?: string | null; targetConnectionId: string | null; syncEnabled: boolean; } diff --git a/packages/context/src/ingest/index.ts b/packages/context/src/ingest/index.ts index d2336ae9..3c238d98 100644 --- a/packages/context/src/ingest/index.ts +++ b/packages/context/src/ingest/index.ts @@ -240,17 +240,15 @@ export { createLocalMetabaseSourceAdapter, metabaseRuntimeConfigFromLocalConnection, } from './adapters/metabase/local-metabase.adapter.js'; -export { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; +export { + KtxYamlMetabaseSourceStateReader, + LocalMetabaseDiscoveryCache, +} from './adapters/metabase/local-source-state-store.js'; export type { - ClearLocalMetabaseMappingsInput, + LocalMetabaseDiscoveredDatabaseRow, LocalMetabaseMappingListRow, LocalMetabaseMappingSource, - LocalMetabaseSourceStateMappingInput, - ReplaceLocalMetabaseSourceStateInput, RefreshLocalMetabaseDiscoveredDatabasesInput, - SetLocalMetabaseMappingSyncEnabledInput, - SetLocalMetabaseSyncStateInput, - UpsertLocalMetabaseDatabaseMappingInput, } from './adapters/metabase/local-source-state-store.js'; export { metabaseLocalConnectionIdSchema, metabasePullConfigSchema, parseMetabasePullConfig } from './adapters/metabase/types.js'; export type { MetabasePullConfig, MetabaseSyncMode } from './adapters/metabase/types.js'; diff --git a/packages/context/src/ingest/local-adapters.test.ts b/packages/context/src/ingest/local-adapters.test.ts index 7161743a..ad3b23f4 100644 --- a/packages/context/src/ingest/local-adapters.test.ts +++ b/packages/context/src/ingest/local-adapters.test.ts @@ -480,7 +480,7 @@ describe('local ingest adapters', () => { }), config: { ...project.config, - setup: { database_connection_ids: ['warehouse'], completed_steps: [] }, + setup: { database_connection_ids: ['warehouse'] }, connections: { warehouse: { driver: 'postgres', diff --git a/packages/context/src/ingest/local-ingest.ts b/packages/context/src/ingest/local-ingest.ts index 2ec13184..6056f6ed 100644 --- a/packages/context/src/ingest/local-ingest.ts +++ b/packages/context/src/ingest/local-ingest.ts @@ -9,10 +9,9 @@ import type { KtxSemanticLayerComputePort } from '../daemon/index.js'; import type { KtxLocalProject } from '../project/index.js'; import { ktxLocalStateDbPath } from '../project/index.js'; import { planMetabaseFanoutChildren } from './adapters/metabase/fanout-planner.js'; -import { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; +import { KtxYamlMetabaseSourceStateReader, LocalMetabaseDiscoveryCache } from './adapters/metabase/local-source-state-store.js'; import { localPullConfigForAdapter, type DefaultLocalIngestAdaptersOptions } from './local-adapters.js'; import { createLocalBundleIngestRuntime } from './local-bundle-runtime.js'; -import { seedLocalMappingStateFromKtxYaml } from './local-mapping-reconcile.js'; import type { MemoryFlowEventSink } from './memory-flow/types.js'; import { buildSyncId } from './raw-sources-paths.js'; import type { IngestReportBody, IngestReportSnapshot } from './reports.js'; @@ -364,16 +363,10 @@ export async function runLocalMetabaseIngest( const metabaseConnectionId = safeSegment('metabase connection id', options.metabaseConnectionId); assertConfigured(options.project, 'metabase', metabaseConnectionId); - await seedLocalMappingStateFromKtxYaml(options.project, metabaseConnectionId); const adapter = findAdapter(options.adapters, 'metabase'); - const sourceStateReader = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(options.project) }); - - const unhydrated = await sourceStateReader.getUnhydratedSyncEnabledMappingIds(metabaseConnectionId); - if (unhydrated.length > 0) { - throw new Error( - `Metabase mappings ${unhydrated.join(', ')} are not hydrated; run \`ktx connection mapping refresh ${metabaseConnectionId}\` before local Metabase ingest.`, - ); - } + const sourceStateReader = new KtxYamlMetabaseSourceStateReader(options.project, { + discoveryCache: new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(options.project) }), + }); const state = await sourceStateReader.getSourceState(metabaseConnectionId); const childPlans = planMetabaseFanoutChildren({ diff --git a/packages/context/src/ingest/local-mapping-reconcile.test.ts b/packages/context/src/ingest/local-mapping-reconcile.test.ts index c0f8dcac..4a5d2740 100644 --- a/packages/context/src/ingest/local-mapping-reconcile.test.ts +++ b/packages/context/src/ingest/local-mapping-reconcile.test.ts @@ -4,7 +4,6 @@ import { join } from 'node:path'; import { afterEach, describe, expect, it } from 'vitest'; import { ktxLocalStateDbPath, type KtxLocalProject } from '../project/index.js'; import { LocalLookerRuntimeStore } from './adapters/looker/local-runtime-store.js'; -import { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; import { seedLocalMappingStateFromKtxYaml } from './local-mapping-reconcile.js'; describe('local mapping yaml reconciliation bridge', () => { @@ -23,7 +22,7 @@ describe('local mapping yaml reconciliation bridge', () => { } as KtxLocalProject; } - it('seeds Metabase local state from ktx.yaml mapping intent', async () => { + it('does not copy Metabase mapping intent into local SQLite state', async () => { tempDir = await mkdtemp(join(tmpdir(), 'ktx-metabase-yaml-seed-')); const project = projectWithConnections({ 'prod-metabase': { @@ -39,17 +38,7 @@ describe('local mapping yaml reconciliation bridge', () => { 'prod-warehouse': { driver: 'postgres', url: 'postgresql://readonly@db.test/analytics' }, }); - await seedLocalMappingStateFromKtxYaml(project, 'prod-metabase'); - - const store = new LocalMetabaseSourceStateReader({ dbPath: ktxLocalStateDbPath(project) }); - await expect(store.listDatabaseMappings('prod-metabase')).resolves.toMatchObject([ - { metabaseDatabaseId: 1, targetConnectionId: 'prod-warehouse', syncEnabled: true, source: 'ktx.yaml' }, - ]); - await expect(store.getSourceState('prod-metabase')).resolves.toMatchObject({ - syncMode: 'ONLY', - selections: [{ selectionType: 'collection', metabaseObjectId: 12 }], - defaultTagNames: ['ktx'], - }); + await expect(seedLocalMappingStateFromKtxYaml(project, 'prod-metabase')).resolves.toBeUndefined(); }); it('seeds Looker local mappings from ktx.yaml mapping intent', async () => { diff --git a/packages/context/src/ingest/local-mapping-reconcile.ts b/packages/context/src/ingest/local-mapping-reconcile.ts index a1bae2fc..1a58af95 100644 --- a/packages/context/src/ingest/local-mapping-reconcile.ts +++ b/packages/context/src/ingest/local-mapping-reconcile.ts @@ -3,29 +3,8 @@ import { parseConnectionMappingBootstrap, type KtxLocalProject, type LookerMappingBootstrap, - type MetabaseMappingBootstrap, } from '../project/index.js'; import { LocalLookerRuntimeStore } from './adapters/looker/local-runtime-store.js'; -import { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; - -function metabaseSelections(bootstrap: MetabaseMappingBootstrap) { - return [ - ...bootstrap.selections.collections.map((id) => ({ selectionType: 'collection' as const, metabaseObjectId: id })), - ...bootstrap.selections.items.map((id) => ({ selectionType: 'item' as const, metabaseObjectId: id })), - ]; -} - -function metabaseMappings(bootstrap: MetabaseMappingBootstrap) { - const ids = new Set([...Object.keys(bootstrap.databaseMappings), ...Object.keys(bootstrap.syncEnabled)]); - return [...ids] - .map((id) => Number(id)) - .sort((a, b) => a - b) - .map((id) => ({ - metabaseDatabaseId: id, - targetConnectionId: bootstrap.databaseMappings[String(id)] ?? null, - syncEnabled: bootstrap.syncEnabled[String(id)] ?? false, - })); -} function lookerMappings(bootstrap: LookerMappingBootstrap) { return Object.entries(bootstrap.connectionMappings) @@ -44,20 +23,12 @@ export async function seedLocalMappingStateFromKtxYaml(project: KtxLocalProject, return; } - const dbPath = ktxLocalStateDbPath(project); if (bootstrap.adapter === 'metabase') { - await new LocalMetabaseSourceStateReader({ dbPath }).applyYamlBootstrap({ - connectionId, - syncMode: bootstrap.syncMode, - defaultTagNames: bootstrap.defaultTagNames, - selections: metabaseSelections(bootstrap), - mappings: metabaseMappings(bootstrap), - }); return; } if (bootstrap.adapter === 'looker') { - await new LocalLookerRuntimeStore({ dbPath }).applyYamlBootstrap({ + await new LocalLookerRuntimeStore({ dbPath: ktxLocalStateDbPath(project) }).applyYamlBootstrap({ lookerConnectionId: connectionId, mappings: lookerMappings(bootstrap), }); diff --git a/packages/context/src/ingest/local-metabase-ingest.test.ts b/packages/context/src/ingest/local-metabase-ingest.test.ts index da00c7ec..fe3bd80b 100644 --- a/packages/context/src/ingest/local-metabase-ingest.test.ts +++ b/packages/context/src/ingest/local-metabase-ingest.test.ts @@ -4,7 +4,7 @@ import { join } from 'node:path'; import { AgentRunnerService } from '../agent/index.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { initKtxProject, type KtxLocalProject } from '../project/index.js'; -import { LocalMetabaseSourceStateReader } from './adapters/metabase/local-source-state-store.js'; +import { LocalMetabaseDiscoveryCache } from './adapters/metabase/local-source-state-store.js'; import { getLocalIngestStatus, runLocalMetabaseIngest } from './local-ingest.js'; import type { ChunkResult, FetchContext, SourceAdapter } from './types.js'; @@ -94,33 +94,19 @@ describe('runLocalMetabaseIngest', () => { }); async function seedMetabaseState(): Promise { - const store = new LocalMetabaseSourceStateReader({ dbPath: join(tempDir, '.ktx', 'db.sqlite') }); - await store.replaceSourceState({ - connectionId: 'prod-metabase', + project.config.connections['prod-metabase'].mappings = { + databaseMappings: { '1': 'warehouse_a', '2': 'warehouse_b' }, + syncEnabled: { '1': true, '2': true }, syncMode: 'ALL', defaultTagNames: ['ktx'], - selections: [], - mappings: [ - { - metabaseDatabaseId: 1, - metabaseDatabaseName: 'Warehouse A', - metabaseEngine: 'postgres', - metabaseHost: 'localhost', - metabaseDbName: 'a', - targetConnectionId: 'warehouse_a', - syncEnabled: true, - source: 'refresh', - }, - { - metabaseDatabaseId: 2, - metabaseDatabaseName: 'Warehouse B', - metabaseEngine: 'postgres', - metabaseHost: 'localhost', - metabaseDbName: 'b', - targetConnectionId: 'warehouse_b', - syncEnabled: true, - source: 'refresh', - }, + selections: { collections: [], items: [] }, + }; + const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: join(tempDir, '.ktx', 'db.sqlite') }); + await discoveryCache.refreshDiscoveredDatabases({ + connectionId: 'prod-metabase', + discovered: [ + { id: 1, name: 'Warehouse A', engine: 'postgres', host: 'localhost', dbName: 'a' }, + { id: 2, name: 'Warehouse B', engine: 'postgres', host: 'localhost', dbName: 'b' }, ], }); } @@ -151,22 +137,10 @@ describe('runLocalMetabaseIngest', () => { }); it('throws before runner work when there are no sync-enabled mapped rows', async () => { - const store = new LocalMetabaseSourceStateReader({ dbPath: join(tempDir, '.ktx', 'db.sqlite') }); - await store.replaceSourceState({ - connectionId: 'prod-metabase', - mappings: [ - { - metabaseDatabaseId: 1, - metabaseDatabaseName: 'Warehouse A', - metabaseEngine: 'postgres', - metabaseHost: null, - metabaseDbName: null, - targetConnectionId: null, - syncEnabled: true, - source: 'refresh', - }, - ], - }); + project.config.connections['prod-metabase'].mappings = { + databaseMappings: { '1': null }, + syncEnabled: { '1': true }, + }; await expect( runLocalMetabaseIngest({ @@ -178,59 +152,28 @@ describe('runLocalMetabaseIngest', () => { ).rejects.toThrow('no sync-enabled mappings with a target connection'); }); - it('throws with refresh guidance for unhydrated sync-enabled rows', async () => { - const store = new LocalMetabaseSourceStateReader({ dbPath: join(tempDir, '.ktx', 'db.sqlite') }); - await store.replaceSourceState({ - connectionId: 'prod-metabase', - mappings: [ - { - metabaseDatabaseId: 7, - metabaseDatabaseName: null, - metabaseEngine: null, - metabaseHost: null, - metabaseDbName: null, - targetConnectionId: 'warehouse_a', - syncEnabled: true, - source: 'ktx.yaml', - }, - ], + it('seeds yaml-only Metabase mappings before the unhydrated fan-out preflight', async () => { + project.config.connections['prod-metabase'].mappings = { + databaseMappings: { '1': 'warehouse_a' }, + syncEnabled: { '1': true }, + }; + + const result = await runLocalMetabaseIngest({ + project, + adapters: [new FakeMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + agentRunner: new TestAgentRunner(), + jobIdFactory: () => 'metabase-child-1', }); - await expect( - runLocalMetabaseIngest({ - project, - adapters: [new FakeMetabaseSourceAdapter()], + expect(result.status).toBe('all_succeeded'); + expect(result.children).toMatchObject([ + { metabaseConnectionId: 'prod-metabase', - agentRunner: new TestAgentRunner(), - }), - ).rejects.toThrow('run `ktx connection mapping refresh prod-metabase`'); - }); - - it('seeds yaml-only Metabase mappings before the unhydrated fan-out preflight', async () => { - const project = { - projectDir: tempDir, - config: { - ingest: { adapters: ['metabase'] }, - connections: { - 'prod-metabase': { - driver: 'metabase', - mappings: { - databaseMappings: { '1': 'prod-warehouse' }, - syncEnabled: { '1': true }, - }, - }, - 'prod-warehouse': { driver: 'postgres', url: 'postgresql://readonly@db.test/analytics' }, - }, + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', }, - } as never; - - await expect( - runLocalMetabaseIngest({ - project, - adapters: [new FakeMetabaseSourceAdapter()], - metabaseConnectionId: 'prod-metabase', - }), - ).rejects.toThrow('run `ktx connection mapping refresh prod-metabase`'); + ]); }); it('rejects source-dir uploads through the Metabase fan-out runner', async () => { @@ -266,15 +209,15 @@ describe('runLocalMetabaseIngest', () => { it('captures fetch-time child failures and continues later mappings', async () => { await seedMetabaseState(); project.config.connections.warehouse_c = { driver: 'postgres', url: 'postgres://localhost/c' }; - const store = new LocalMetabaseSourceStateReader({ dbPath: join(tempDir, '.ktx', 'db.sqlite') }); - await store.upsertDatabaseMapping({ - connectionId: 'prod-metabase', - metabaseDatabaseId: 3, - targetConnectionId: 'warehouse_c', - syncEnabled: true, - source: 'cli', - }); - await store.refreshDiscoveredDatabases({ + project.config.connections['prod-metabase'].mappings = { + databaseMappings: { '1': 'warehouse_a', '2': 'warehouse_b', '3': 'warehouse_c' }, + syncEnabled: { '1': true, '2': true, '3': true }, + syncMode: 'ALL', + defaultTagNames: ['ktx'], + selections: { collections: [], items: [] }, + }; + const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: join(tempDir, '.ktx', 'db.sqlite') }); + await discoveryCache.refreshDiscoveredDatabases({ connectionId: 'prod-metabase', discovered: [ { id: 1, name: 'Warehouse A', engine: 'postgres', host: 'localhost', dbName: 'a' }, diff --git a/packages/context/src/ingest/tools/verification-ledger.tool.ts b/packages/context/src/ingest/tools/verification-ledger.tool.ts index f99e79be..ac880607 100644 --- a/packages/context/src/ingest/tools/verification-ledger.tool.ts +++ b/packages/context/src/ingest/tools/verification-ledger.tool.ts @@ -8,7 +8,7 @@ const verificationLedgerInputSchema = z.object({ notes: z.string().max(2000).optional(), }); -export interface VerificationLedgerEntry { +interface VerificationLedgerEntry { summary: string; verifiedIdentifiers: string[]; unverifiedIdentifiers: string[]; diff --git a/packages/context/src/ingest/tools/warehouse-verification/index.ts b/packages/context/src/ingest/tools/warehouse-verification/index.ts index 0901eace..e6ac2c1c 100644 --- a/packages/context/src/ingest/tools/warehouse-verification/index.ts +++ b/packages/context/src/ingest/tools/warehouse-verification/index.ts @@ -6,12 +6,6 @@ import { EntityDetailsTool } from './entity-details.tool.js'; import { SqlExecutionTool } from './sql-execution.tool.js'; import { WarehouseCatalogService } from './warehouse-catalog.service.js'; -export { DiscoverDataTool } from './discover-data.tool.js'; -export { EntityDetailsTool } from './entity-details.tool.js'; -export { SqlExecutionTool } from './sql-execution.tool.js'; -export { WarehouseCatalogService } from './warehouse-catalog.service.js'; -export type { RawSchemaHit, TableDetail, WarehouseColumnDetail } from './warehouse-catalog.service.js'; - export function createWarehouseVerificationTools(deps: { connections: SlConnectionCatalogPort; fallbackFileStore: KtxFileStorePort; diff --git a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts index 691f88e9..a1edf807 100644 --- a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts +++ b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts @@ -14,7 +14,7 @@ export interface WarehouseCatalogServiceDeps { fileStore: KtxFileStorePort; } -export interface WarehouseColumnDetail extends KtxSchemaColumn { +interface WarehouseColumnDetail extends KtxSchemaColumn { descriptions: Record; rowCount: number | null; nullCount: number | null; diff --git a/packages/context/src/package-exports.test.ts b/packages/context/src/package-exports.test.ts index 4fd7e502..9689744e 100644 --- a/packages/context/src/package-exports.test.ts +++ b/packages/context/src/package-exports.test.ts @@ -199,7 +199,9 @@ describe('@ktx/context package exports', () => { expect(ingest.stagedSyncConfigSchema).toBeDefined(); expect(ingest.stagedLookerScopeFileSchema).toBeDefined(); expect(ingest.stagedLookerFetchReportSchema).toBeDefined(); - expect(ingest.LocalMetabaseSourceStateReader).toBeTypeOf('function'); + expect('LocalMetabaseSourceStateReader' in ingest).toBe(false); + expect(ingest.KtxYamlMetabaseSourceStateReader).toBeTypeOf('function'); + expect(ingest.LocalMetabaseDiscoveryCache).toBeTypeOf('function'); expect(ingest.createLocalMetabaseSourceAdapter).toBeTypeOf('function'); expect(ingest.metabaseRuntimeConfigFromLocalConnection).toBeTypeOf('function'); expect(ingest.IngestMetabaseClientFactory).toBeTypeOf('function'); diff --git a/packages/context/src/project/config.test.ts b/packages/context/src/project/config.test.ts index 1be70322..cad7945c 100644 --- a/packages/context/src/project/config.test.ts +++ b/packages/context/src/project/config.test.ts @@ -81,16 +81,13 @@ describe('KTX project config', () => { }); }); - it('parses and serializes setup wizard metadata', () => { + it('parses and serializes setup warehouse metadata without setup progress', () => { const config = parseKtxProjectConfig(` project: revenue setup: database_connection_ids: - warehouse - analytics - completed_steps: - - project - - llm connections: warehouse: driver: postgres @@ -99,13 +96,12 @@ connections: expect(config.setup).toEqual({ database_connection_ids: ['warehouse', 'analytics'], - completed_steps: ['project', 'llm'], }); const serialized = serializeKtxProjectConfig(config); expect(serialized).toContain('setup:'); expect(serialized).toContain('database_connection_ids:'); - expect(serialized).toContain('completed_steps:'); + expect(serialized).not.toContain('completed_steps:'); }); it('parses global direct Anthropic LLM config', () => { diff --git a/packages/context/src/project/config.ts b/packages/context/src/project/config.ts index f1aa9d71..5da193f2 100644 --- a/packages/context/src/project/config.ts +++ b/packages/context/src/project/config.ts @@ -75,7 +75,6 @@ export interface KtxProjectConnectionConfig { export interface KtxProjectSetupConfig { database_connection_ids: string[]; - completed_steps?: string[]; } export interface KtxProjectConfig { @@ -508,7 +507,6 @@ export function parseKtxProjectConfig(raw: string): KtxProjectConfig { ? { setup: { database_connection_ids: stringArray(setup.database_connection_ids, []), - completed_steps: stringArray(setup.completed_steps, []), }, } : {}), diff --git a/packages/context/src/project/index.ts b/packages/context/src/project/index.ts index 8fd171d4..8ea92bf6 100644 --- a/packages/context/src/project/index.ts +++ b/packages/context/src/project/index.ts @@ -27,12 +27,10 @@ export { initKtxProject, loadKtxProject } from './project.js'; export type { KtxSetupStep } from './setup-config.js'; export { KTX_SETUP_STEPS, - ktxSetupCompletedSteps, ktxSetupStatePath, markKtxSetupStateStepComplete, mergeKtxSetupGitignoreEntries, readKtxSetupState, setKtxSetupDatabaseConnectionIds, - stripKtxSetupCompletedSteps, writeKtxSetupState, } from './setup-config.js'; diff --git a/packages/context/src/project/setup-config.test.ts b/packages/context/src/project/setup-config.test.ts index 46912d43..92c02707 100644 --- a/packages/context/src/project/setup-config.test.ts +++ b/packages/context/src/project/setup-config.test.ts @@ -4,12 +4,10 @@ import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { buildDefaultKtxProjectConfig } from './config.js'; import { - ktxSetupCompletedSteps, markKtxSetupStateStepComplete, mergeKtxSetupGitignoreEntries, readKtxSetupState, setKtxSetupDatabaseConnectionIds, - stripKtxSetupCompletedSteps, } from './setup-config.js'; describe('KTX setup config helpers', () => { @@ -48,36 +46,6 @@ describe('KTX setup config helpers', () => { expect(config.setup).toBeUndefined(); }); - it('strips setup completed steps while preserving database connection ids', () => { - const config = { - ...buildDefaultKtxProjectConfig('warehouse'), - setup: { - database_connection_ids: ['warehouse'], - completed_steps: ['project', 'databases'], - }, - }; - - expect(stripKtxSetupCompletedSteps(config).setup).toEqual({ - database_connection_ids: ['warehouse'], - }); - }); - - it('combines legacy config setup steps with local state for reads', () => { - const config = { - ...buildDefaultKtxProjectConfig('warehouse'), - setup: { - database_connection_ids: ['warehouse'], - completed_steps: ['project', 'databases'], - }, - }; - - expect(ktxSetupCompletedSteps(config, { completed_steps: ['databases', 'sources'] })).toEqual([ - 'project', - 'databases', - 'sources', - ]); - }); - it('merges setup-local gitignore entries without removing existing lines', () => { expect(mergeKtxSetupGitignoreEntries('cache/\ndb.sqlite\n')).toBe( ['cache/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'secrets/', 'setup/', 'agents/', ''].join('\n'), diff --git a/packages/context/src/project/setup-config.ts b/packages/context/src/project/setup-config.ts index a426caf7..b2c8e161 100644 --- a/packages/context/src/project/setup-config.ts +++ b/packages/context/src/project/setup-config.ts @@ -64,27 +64,6 @@ export async function markKtxSetupStateStepComplete(projectDir: string, step: Kt return nextState; } -export function ktxSetupCompletedSteps(config: KtxProjectConfig, state: KtxSetupState): KtxSetupStep[] { - return uniqueSetupSteps([...(config.setup?.completed_steps ?? []), ...state.completed_steps]); -} - -export function stripKtxSetupCompletedSteps(config: KtxProjectConfig): KtxProjectConfig { - if (!config.setup) { - return config; - } - const databaseConnectionIds = config.setup.database_connection_ids ?? []; - if (databaseConnectionIds.length === 0) { - const { setup: _setup, ...withoutSetup } = config; - return withoutSetup; - } - return { - ...config, - setup: { - database_connection_ids: [...databaseConnectionIds], - }, - }; -} - export function setKtxSetupDatabaseConnectionIds( config: KtxProjectConfig, connectionIds: string[], @@ -95,7 +74,6 @@ export function setKtxSetupDatabaseConnectionIds( ...config, setup: { database_connection_ids: uniqueConnectionIds, - ...(config.setup?.completed_steps ? { completed_steps: [...config.setup.completed_steps] } : {}), }, }; } From d4d8ad17240c96e6038993acccf63910dc4f8650 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 14:27:29 +0200 Subject: [PATCH 07/15] fix: show setup destination paths (#63) --- packages/cli/src/setup-project.test.ts | 19 ++++++++++++++++--- packages/cli/src/setup-project.ts | 9 +++++++-- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/packages/cli/src/setup-project.test.ts b/packages/cli/src/setup-project.test.ts index 70591077..e20b9544 100644 --- a/packages/cli/src/setup-project.test.ts +++ b/packages/cli/src/setup-project.test.ts @@ -3,6 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { initKtxProject, parseKtxProjectConfig, readKtxSetupState } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { gray } from './io/symbols.js'; import { type KtxSetupProjectPromptAdapter, runKtxSetupProjectStep } from './setup-project.js'; function makeIo(options: { stdoutIsTty?: boolean } = {}) { @@ -37,6 +38,12 @@ function makePromptAdapter(options: { choice?: string; choices?: string[]; textV } satisfies KtxSetupProjectPromptAdapter; } +function defaultSubfolderLabel(parentDir: string): string { + const childName = 'ktx-project'; + const childDir = join(parentDir, childName); + return `New subfolder (${gray(childDir.slice(0, -childName.length))}${childName})`; +} + describe('setup project step', () => { let tempDir: string; @@ -143,8 +150,11 @@ describe('setup project step', () => { expect.objectContaining({ message: 'Where should KTX create the project?', options: [ - expect.objectContaining({ value: 'current', label: 'Current directory' }), - expect.objectContaining({ value: 'new-default', label: 'New subfolder (./ktx-project)' }), + expect.objectContaining({ value: 'current', label: `Current directory (${projectDir})` }), + expect.objectContaining({ + value: 'new-default', + label: defaultSubfolderLabel(projectDir), + }), expect.objectContaining({ value: 'new-custom', label: 'Custom path' }), expect.objectContaining({ value: 'exit', label: 'Exit' }), ], @@ -174,7 +184,10 @@ describe('setup project step', () => { expect.objectContaining({ message: 'Where should KTX create the project?', options: expect.arrayContaining([ - expect.objectContaining({ value: 'new-default', label: 'New subfolder (./ktx-project)' }), + expect.objectContaining({ + value: 'new-default', + label: defaultSubfolderLabel(startDir), + }), ]), }), ); diff --git a/packages/cli/src/setup-project.ts b/packages/cli/src/setup-project.ts index 4b2f71d9..a6b4ca71 100644 --- a/packages/cli/src/setup-project.ts +++ b/packages/cli/src/setup-project.ts @@ -12,6 +12,7 @@ import { serializeKtxProjectConfig, } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; +import { gray } from './io/symbols.js'; import { withMenuOptionsSpacing, withTextInputNavigation } from './prompt-navigation.js'; import { withSetupInterruptConfirmation } from './setup-interrupt.js'; @@ -321,6 +322,10 @@ export async function runKtxSetupProjectStep( const prompts = deps.prompts ?? createClackSetupProjectPromptAdapter(); const defaultProjectDir = join(projectDir, DEFAULT_NEW_PROJECT_FOLDER_NAME); + const defaultProjectDirLabel = [ + gray(defaultProjectDir.slice(0, -DEFAULT_NEW_PROJECT_FOLDER_NAME.length)), + DEFAULT_NEW_PROJECT_FOLDER_NAME, + ].join(''); io.stdout.write( '│ Use Up/Down to move, Enter to confirm the current selection, choose Back to return to the previous step, Ctrl+C to exit.\n', ); @@ -328,8 +333,8 @@ export async function runKtxSetupProjectStep( const choice = await prompts.select({ message: 'Where should KTX create the project?', options: [ - { value: 'current', label: 'Current directory' }, - { value: 'new-default', label: 'New subfolder (./ktx-project)' }, + { value: 'current', label: `Current directory (${projectDir})` }, + { value: 'new-default', label: `New subfolder (${defaultProjectDirLabel})` }, { value: 'new-custom', label: 'Custom path' }, ...(args.allowBack ? [{ value: 'back', label: 'Back' }] : []), ...(args.allowBack ? [] : [{ value: 'exit', label: 'Exit' }]), From 4973ca562f560c3a5bb4bdafbe42012a06a16a3e Mon Sep 17 00:00:00 2001 From: Luca Martial <48870843+luca-martial@users.noreply.github.com> Date: Wed, 13 May 2026 08:42:38 -0400 Subject: [PATCH 08/15] Restore Vertex AI LLM setup (#56) * feat(context): resolve Vertex AI config references * feat(cli): restore Vertex AI LLM setup --------- Co-authored-by: Andrey Avtomonov --- packages/cli/src/commands/setup-commands.ts | 30 + packages/cli/src/index.test.ts | 41 ++ packages/cli/src/setup-models.test.ts | 359 ++++++++++- packages/cli/src/setup-models.ts | 600 +++++++++++++++++- packages/cli/src/setup.test.ts | 64 +- packages/cli/src/setup.ts | 13 +- packages/context/src/llm/local-config.test.ts | 46 ++ packages/context/src/llm/local-config.ts | 19 +- 8 files changed, 1113 insertions(+), 59 deletions(-) diff --git a/packages/cli/src/commands/setup-commands.ts b/packages/cli/src/commands/setup-commands.ts index 90251ae1..6a215651 100644 --- a/packages/cli/src/commands/setup-commands.ts +++ b/packages/cli/src/commands/setup-commands.ts @@ -2,6 +2,7 @@ import { type Command, InvalidArgumentError, Option } from '@commander-js/extra- import type { KtxCliCommandContext } from '../cli-program.js'; import { resolveCommandProjectDir } from '../cli-program.js'; import type { KtxSetupDatabaseDriver } from '../setup-databases.js'; +import type { KtxSetupLlmBackend } from '../setup-models.js'; import type { KtxSetupSourceType } from '../setup-sources.js'; async function runSetupArgs( @@ -27,6 +28,13 @@ function embeddingBackend(value: string): 'openai' | 'sentence-transformers' { throw new InvalidArgumentError(`invalid choice '${value}'`); } +function llmBackend(value: string): KtxSetupLlmBackend { + if (value === 'anthropic' || value === 'vertex') { + return value; + } + throw new InvalidArgumentError(`invalid choice '${value}'`); +} + function databaseDriver(value: string): KtxSetupDatabaseDriver { if ( value === 'sqlite' || @@ -93,9 +101,12 @@ function shouldShowSetupEntryMenu( skipAgents?: boolean; yes?: boolean; input?: boolean; + llmBackend?: KtxSetupLlmBackend; anthropicApiKeyEnv?: string; anthropicApiKeyFile?: string; anthropicModel?: string; + vertexProject?: string; + vertexLocation?: string; skipLlm?: boolean; embeddingBackend?: string; embeddingApiKeyEnv?: string; @@ -166,9 +177,12 @@ function shouldShowSetupEntryMenu( 'skipAgents', 'yes', 'input', + 'llmBackend', 'anthropicApiKeyEnv', 'anthropicApiKeyFile', 'anthropicModel', + 'vertexProject', + 'vertexLocation', 'skipLlm', 'embeddingBackend', 'embeddingApiKeyEnv', @@ -227,9 +241,12 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo .option('--skip-agents', 'Leave agent integration incomplete for now', false) .option('--yes', 'Accept safe defaults in non-interactive setup', false) .option('--no-input', 'Disable interactive terminal input') + .addOption(new Option('--llm-backend ', 'LLM backend').argParser(llmBackend)) .option('--anthropic-api-key-env ', 'Environment variable containing the Anthropic API key') .option('--anthropic-api-key-file ', 'File containing the Anthropic API key') .option('--anthropic-model ', 'Anthropic model ID to validate and save') + .option('--vertex-project ', 'Google Vertex AI project ID, env:NAME, or file:/path') + .option('--vertex-location ', 'Google Vertex AI location, env:NAME, or file:/path') .addOption(new Option('--skip-llm', 'Leave LLM setup incomplete for now').hideHelp().default(false)) .addOption(new Option('--embedding-backend ', 'Embedding backend').argParser(embeddingBackend)) .option('--embedding-api-key-env ', 'Environment variable containing the embedding provider API key') @@ -325,6 +342,16 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo context.setExitCode(1); return; } + if (options.llmBackend === 'vertex' && (options.anthropicApiKeyEnv || options.anthropicApiKeyFile)) { + context.io.stderr.write('Anthropic API key flags are only valid with --llm-backend anthropic.\n'); + context.setExitCode(1); + return; + } + if (options.llmBackend === 'anthropic' && (options.vertexProject || options.vertexLocation)) { + context.io.stderr.write('Vertex AI flags are only valid with --llm-backend vertex.\n'); + context.setExitCode(1); + return; + } if (options.embeddingApiKeyEnv && options.embeddingApiKeyFile) { context.io.stderr.write( 'Choose only one embedding credential source: --embedding-api-key-env or --embedding-api-key-file.\n', @@ -364,9 +391,12 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo inputMode: options.input === false ? 'disabled' : 'auto', yes: options.yes === true, cliVersion: context.packageInfo.version, + ...(options.llmBackend ? { llmBackend: options.llmBackend } : {}), ...(options.anthropicApiKeyEnv ? { anthropicApiKeyEnv: options.anthropicApiKeyEnv } : {}), ...(options.anthropicApiKeyFile ? { anthropicApiKeyFile: options.anthropicApiKeyFile } : {}), ...(options.anthropicModel ? { anthropicModel: options.anthropicModel } : {}), + ...(options.vertexProject ? { vertexProject: options.vertexProject } : {}), + ...(options.vertexLocation ? { vertexLocation: options.vertexLocation } : {}), skipLlm: options.skipLlm === true, ...(options.embeddingBackend ? { embeddingBackend: options.embeddingBackend } : {}), ...(options.embeddingApiKeyEnv ? { embeddingApiKeyEnv: options.embeddingApiKeyEnv } : {}), diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index 1e69c590..58848dab 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -853,6 +853,47 @@ describe('runKtxCli', () => { ); }); + it('dispatches Vertex AI setup flags to the setup runner', async () => { + const setup = vi.fn(async () => 0); + const setupIo = makeIo(); + + await expect( + runKtxCli( + [ + '--project-dir', + tempDir, + 'setup', + '--no-input', + '--llm-backend', + 'vertex', + '--vertex-project', + 'local-gcp-project', + '--vertex-location', + 'us-east5', + '--anthropic-model', + 'claude-sonnet-4-6', + ], + setupIo.io, + { setup }, + ), + ).resolves.toBe(0); + + expect(setup).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + projectDir: tempDir, + inputMode: 'disabled', + cliVersion: '0.0.0-private', + llmBackend: 'vertex', + vertexProject: 'local-gcp-project', + vertexLocation: 'us-east5', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + }), + setupIo.io, + ); + }); + it('rejects conflicting Anthropic credential setup flags', async () => { const setup = vi.fn(async () => 0); const setupIo = makeIo(); diff --git a/packages/cli/src/setup-models.test.ts b/packages/cli/src/setup-models.test.ts index fb8acb47..2e83ade2 100644 --- a/packages/cli/src/setup-models.test.ts +++ b/packages/cli/src/setup-models.test.ts @@ -7,8 +7,10 @@ import { BUNDLED_ANTHROPIC_MODELS, fetchAnthropicModels, type KtxSetupModelPromptAdapter, + runKtxSetupGcloudApplicationDefaultAuth, runKtxSetupAnthropicModelStep, } from './setup-models.js'; +import type { KtxCliIo } from './cli-runtime.js'; function makeIo() { let stdout = ''; @@ -33,6 +35,7 @@ function makeIo() { } function makePromptAdapter(options: { + providerChoice?: string; selectValues?: string[]; credentialChoice?: string; modelChoice?: string; @@ -43,8 +46,20 @@ function makePromptAdapter(options: { const selectValues = [...(options.selectValues ?? [])]; const textValues = [...(options.textValues ?? [])]; const passwordValues = [...(options.passwordValues ?? [])]; + let providerPromptCount = 0; return { select: vi.fn(async ({ message }) => { + if (message.includes('LLM provider')) { + providerPromptCount += 1; + const nextProviderChoice = selectValues[0]; + if (nextProviderChoice === 'anthropic' || nextProviderChoice === 'vertex' || nextProviderChoice === 'back') { + return selectValues.shift() ?? nextProviderChoice; + } + if (options.credentialChoice === 'back' && providerPromptCount > 1) { + return 'back'; + } + return options.providerChoice ?? 'anthropic'; + } const nextValue = selectValues.shift(); if (nextValue) { return nextValue; @@ -55,7 +70,10 @@ function makePromptAdapter(options: { return options.modelChoice ?? 'claude-sonnet-4-6'; }), text: vi.fn(async () => textValues.shift() ?? ''), - password: vi.fn(async () => (passwordValues.length > 0 ? passwordValues.shift() : options.passwordValue ?? 'sk-ant-pasted')), + password: vi.fn( + async () => + passwordValues.length > 0 ? passwordValues.shift() : options.passwordValue ?? 'sk-ant-pasted', // pragma: allowlist secret + ), cancel: vi.fn(), }; } @@ -89,7 +107,7 @@ describe('setup Anthropic model step', () => { ), ); - await expect(fetchAnthropicModels('sk-ant-test', fetchModels)).resolves.toEqual([ + await expect(fetchAnthropicModels('sk-ant-test', fetchModels)).resolves.toEqual([ // pragma: allowlist secret { id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }, { id: 'claude-opus-4-6', label: 'Claude Opus 4.6', recommended: false }, { id: 'claude-haiku-4-5', label: 'Claude Haiku 4.5', recommended: false }, @@ -107,7 +125,7 @@ describe('setup Anthropic model step', () => { makeIo().io, { prompts, - env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret listModels: vi.fn(async () => [ { id: 'claude-sonnet-4', label: 'Claude Sonnet 4', recommended: true }, { id: 'claude-opus-4', label: 'Claude Opus 4', recommended: false }, @@ -132,19 +150,58 @@ describe('setup Anthropic model step', () => { ); }); + it('offers Vertex AI as an Anthropic model provider option', async () => { + const prompts = makePromptAdapter({ providerChoice: 'back' }); + + const result = await runKtxSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + makeIo().io, + { prompts, env: {} }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('Which LLM provider should KTX use?'), + options: expect.arrayContaining([ + { value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' }, + { value: 'back', label: 'Back' }, + ]), + }), + ); + }); + + it('returns from Anthropic credential Back to provider selection', async () => { + const prompts = makePromptAdapter({ selectValues: ['anthropic', 'back', 'back'] }); + + const result = await runKtxSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + makeIo().io, + { prompts, env: {} }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenNthCalledWith( + 3, + expect.objectContaining({ + message: expect.stringContaining('Which LLM provider should KTX use?'), + }), + ); + }); + it('configures env credentials, selected model, prompt caching, and llm completion state', async () => { const io = makeIo(); const result = await runKtxSetupAnthropicModelStep( { projectDir: tempDir, inputMode: 'disabled', - anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret anthropicModel: 'claude-sonnet-4-6', skipLlm: false, }, io.io, { - env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret healthCheck: vi.fn(async () => ({ ok: true as const })), }, ); @@ -154,7 +211,7 @@ describe('setup Anthropic model step', () => { expect(config.llm).toMatchObject({ provider: { backend: 'anthropic', - anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret }, models: { default: 'claude-sonnet-4-6' }, promptCaching: { enabled: true }, @@ -166,10 +223,258 @@ describe('setup Anthropic model step', () => { expect(io.stdout()).not.toContain('sk-ant-test'); }); + it('configures Vertex AI provider, selected model, prompt caching, and llm completion state', async () => { + const io = makeIo(); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + + const result = await runKtxSetupAnthropicModelStep( + { + projectDir: tempDir, + inputMode: 'disabled', + llmBackend: 'vertex', + vertexProject: 'local-gcp-project', + vertexLocation: 'us-east5', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + }, + io.io, + { env: {}, healthCheck }, + ); + + expect(result.status).toBe('ready'); + expect(healthCheck).toHaveBeenCalledWith({ + backend: 'vertex', + vertex: { project: 'local-gcp-project', location: 'us-east5' }, + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: true, vertexFallbackTo5m: true }, + }); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.llm).toMatchObject({ + provider: { + backend: 'vertex', + vertex: { project: 'local-gcp-project', location: 'us-east5' }, + }, + models: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: true, vertexFallbackTo5m: true }, + }); + expect(config.scan.enrichment.mode).toBe('llm'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); + expect((await readKtxSetupState(tempDir)).completed_steps).toContain('llm'); + expect(io.stdout()).toContain('LLM ready: yes (claude-sonnet-4-6)'); + }); + + it('can run gcloud auth for Vertex AI and infer project and default location', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ selectValues: ['vertex', 'gcloud', 'local-gcp-project', 'claude-sonnet-4-6'] }); + const runGcloudAuth = vi.fn(async () => ({ ok: true as const })); + const readGcloudProject = vi.fn(async () => 'local-gcp-project'); + const listGcloudProjects = vi.fn(async () => [ + { projectId: 'local-gcp-project', name: 'Local project' }, + { projectId: 'other-gcp-project', name: 'Other project' }, + ]); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + + const result = await runKtxSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + io.io, + { + prompts, + env: {}, + runGcloudAuth, + readGcloudProject, + listGcloudProjects, + healthCheck, + }, + ); + + expect(result.status).toBe('ready'); + expect(runGcloudAuth).toHaveBeenCalledWith(io.io); + expect(readGcloudProject).toHaveBeenCalled(); + expect(listGcloudProjects).toHaveBeenCalled(); + expect(prompts.text).not.toHaveBeenCalled(); + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('Which Google Cloud project should KTX use for Vertex AI?'), + options: [ + { value: 'local-gcp-project', label: 'local-gcp-project - Local project (current gcloud project)' }, + { value: 'other-gcp-project', label: 'other-gcp-project - Other project' }, + { value: 'manual', label: 'Enter a project ID manually' }, + { value: 'back', label: 'Back' }, + ], + }), + ); + expect(healthCheck).toHaveBeenCalledWith({ + backend: 'vertex', + vertex: { project: 'local-gcp-project', location: 'us-east5' }, + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: true, vertexFallbackTo5m: true }, + }); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.llm.provider).toMatchObject({ + backend: 'vertex', + vertex: { project: 'local-gcp-project', location: 'us-east5' }, + }); + }); + + it('lets users choose a different visible gcloud project for Vertex AI', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ selectValues: ['vertex', 'existing', 'other-gcp-project', 'claude-sonnet-4-6'] }); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + + const result = await runKtxSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + io.io, + { + prompts, + env: {}, + readGcloudProject: vi.fn(async () => 'current-gcp-project'), + listGcloudProjects: vi.fn(async () => [ + { projectId: 'current-gcp-project', name: 'Current project' }, + { projectId: 'other-gcp-project', name: 'Other project' }, + ]), + healthCheck, + }, + ); + + expect(result.status).toBe('ready'); + expect(healthCheck).toHaveBeenCalledWith({ + backend: 'vertex', + vertex: { project: 'other-gcp-project', location: 'us-east5' }, + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: true, vertexFallbackTo5m: true }, + }); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.llm.provider).toMatchObject({ + backend: 'vertex', + vertex: { project: 'other-gcp-project', location: 'us-east5' }, + }); + }); + + it('allows manual Vertex AI project entry when gcloud project listing is empty', async () => { + const io = makeIo(); + const prompts = makePromptAdapter({ + selectValues: ['vertex', 'existing', 'manual', 'claude-sonnet-4-6'], + textValues: ['manual-gcp-project'], + }); + const healthCheck = vi.fn(async () => ({ ok: true as const })); + + const result = await runKtxSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + io.io, + { + prompts, + env: {}, + readGcloudProject: vi.fn(async () => undefined), + listGcloudProjects: vi.fn(async () => []), + healthCheck, + }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('Which Google Cloud project should KTX use for Vertex AI?'), + options: [ + { value: 'manual', label: 'Enter a project ID manually' }, + { value: 'back', label: 'Back' }, + ], + }), + ); + expect(prompts.text).toHaveBeenCalledWith( + expect.objectContaining({ + message: 'Google Cloud project ID\n│ Press Escape to go back.\n│', + }), + ); + expect(healthCheck).toHaveBeenCalledWith( + expect.objectContaining({ + vertex: { project: 'manual-gcp-project', location: 'us-east5' }, + }), + ); + }); + + it('returns from Vertex AI project selection Back to provider selection', async () => { + const prompts = makePromptAdapter({ selectValues: ['vertex', 'existing', 'back', 'back'] }); + + const result = await runKtxSetupAnthropicModelStep( + { projectDir: tempDir, inputMode: 'auto', skipLlm: false }, + makeIo().io, + { + prompts, + env: {}, + readGcloudProject: vi.fn(async () => 'current-gcp-project'), + listGcloudProjects: vi.fn(async () => [{ projectId: 'current-gcp-project', name: 'Current project' }]), + }, + ); + + expect(result.status).toBe('back'); + expect(prompts.select).toHaveBeenNthCalledWith( + 4, + expect.objectContaining({ + message: expect.stringContaining('Which LLM provider should KTX use?'), + }), + ); + }); + + it('runs only gcloud application-default login for Vertex AI auth', async () => { + const io = makeIo(); + const runGcloud = vi.fn(async () => ({ ok: true as const })); + + await expect(runKtxSetupGcloudApplicationDefaultAuth(io.io, runGcloud)).resolves.toEqual({ ok: true }); + + expect(runGcloud).toHaveBeenCalledTimes(1); + expect(runGcloud).toHaveBeenCalledWith(['auth', 'application-default', 'login'], expect.anything()); + expect(runGcloud).not.toHaveBeenCalledWith(['auth', 'login'], expect.anything()); + expect(io.stdout()).toContain('gcloud auth application-default login'); + expect(io.stdout()).not.toContain('gcloud auth login'); + }); + + it('indents gcloud auth output inside the setup gutter', async () => { + const io = makeIo(); + const runGcloud = vi.fn(async (_args: string[], commandIo: KtxCliIo) => { + commandIo.stdout.write('Your browser has been opened to visit:\n\n https://accounts.example/auth\n'); + commandIo.stderr.write('Credentials saved to file: [/tmp/application_default_credentials.json]\n'); + return { ok: true as const }; + }); + + await expect(runKtxSetupGcloudApplicationDefaultAuth(io.io, runGcloud)).resolves.toEqual({ ok: true }); + + expect(io.stdout()).toContain('│ Your browser has been opened to visit:'); + expect(io.stdout()).toContain('│ https://accounts.example/auth'); + expect(io.stderr()).toContain('│ Credentials saved to file: [/tmp/application_default_credentials.json]'); + expect(io.stdout()).not.toContain('\nYour browser has been opened'); + }); + + it('explains common Vertex AI Forbidden health-check causes', async () => { + const io = makeIo(); + + const result = await runKtxSetupAnthropicModelStep( + { + projectDir: tempDir, + inputMode: 'disabled', + llmBackend: 'vertex', + vertexProject: 'kaelio-orbit-looker-20260430', + vertexLocation: 'us-east5', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + }, + io.io, + { + env: {}, + healthCheck: vi.fn(async () => ({ ok: false as const, message: 'Forbidden' })), + }, + ); + + expect(result.status).toBe('failed'); + expect(io.stderr()).toContain('project kaelio-orbit-looker-20260430'); + expect(io.stderr()).toContain('Vertex AI API is enabled'); + expect(io.stderr()).toContain('Anthropic Claude model access'); + expect(io.stderr()).toContain('roles/aiplatform.user'); + }); + it('resolves --anthropic-api-key-file for health checks and stores a file reference', async () => { const io = makeIo(); const secretPath = join(tempDir, 'anthropic-api-key'); - await writeFile(secretPath, 'sk-ant-file', 'utf-8'); + await writeFile(secretPath, 'sk-ant-file', 'utf-8'); // pragma: allowlist secret const healthCheck = vi.fn(async () => ({ ok: true as const })); const result = await runKtxSetupAnthropicModelStep( @@ -187,7 +492,7 @@ describe('setup Anthropic model step', () => { expect(result.status).toBe('ready'); expect(healthCheck).toHaveBeenCalledWith( expect.objectContaining({ - anthropic: { apiKey: 'sk-ant-file' }, + anthropic: { apiKey: 'sk-ant-file' }, // pragma: allowlist secret modelSlots: { default: 'claude-sonnet-4-6' }, }), ); @@ -195,7 +500,7 @@ describe('setup Anthropic model step', () => { expect(config.llm).toMatchObject({ provider: { backend: 'anthropic', - anthropic: { api_key: `file:${secretPath}` }, + anthropic: { api_key: `file:${secretPath}` }, // pragma: allowlist secret }, models: { default: 'claude-sonnet-4-6' }, }); @@ -249,11 +554,11 @@ describe('setup Anthropic model step', () => { { projectDir: tempDir, inputMode: 'disabled', - anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret skipLlm: false, }, io.io, - { env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, healthCheck }, + { env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, healthCheck }, // pragma: allowlist secret ); expect(result.status).toBe('missing-input'); @@ -267,7 +572,7 @@ describe('setup Anthropic model step', () => { const prompts = makePromptAdapter({ credentialChoice: 'paste', modelChoice: 'claude-sonnet-4-6', - passwordValue: 'sk-ant-pasted', + passwordValue: 'sk-ant-pasted', // pragma: allowlist secret }); const result = await runKtxSetupAnthropicModelStep( @@ -282,7 +587,7 @@ describe('setup Anthropic model step', () => { ); expect(result.status).toBe('ready'); - await expect(readFile(join(tempDir, '.ktx/secrets/anthropic-api-key'), 'utf-8')).resolves.toBe('sk-ant-pasted\n'); + await expect(readFile(join(tempDir, '.ktx/secrets/anthropic-api-key'), 'utf-8')).resolves.toBe('sk-ant-pasted\n'); // pragma: allowlist secret if (process.platform !== 'win32') { expect((await stat(join(tempDir, '.ktx/secrets/anthropic-api-key'))).mode & 0o777).toBe(0o600); } @@ -295,7 +600,7 @@ describe('setup Anthropic model step', () => { it('opens pasted key entry directly and tells users Escape goes back', async () => { const prompts = makePromptAdapter({ selectValues: ['paste', 'claude-sonnet-4-6'], - passwordValue: 'sk-ant-pasted', + passwordValue: 'sk-ant-pasted', // pragma: allowlist secret }); const result = await runKtxSetupAnthropicModelStep( @@ -370,7 +675,7 @@ describe('setup Anthropic model step', () => { makeIo().io, { prompts, - env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret listModels: vi.fn(async () => [{ id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }]), }, ); @@ -401,7 +706,7 @@ describe('setup Anthropic model step', () => { io.io, { prompts, - env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret listModels: vi.fn(async () => [{ id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }]), healthCheck: vi.fn(async () => ({ ok: true as const })), }, @@ -424,7 +729,7 @@ describe('setup Anthropic model step', () => { await expect( runKtxSetupAnthropicModelStep({ projectDir: tempDir, inputMode: 'auto', skipLlm: false }, io.io, { prompts, - env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret listModels: vi.fn(async () => { throw new Error('network unavailable'); }), @@ -444,7 +749,7 @@ describe('setup Anthropic model step', () => { io.io, { prompts, - env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret listModels: vi.fn(async () => { throw new Error('network unavailable'); }), @@ -504,13 +809,13 @@ describe('setup Anthropic model step', () => { { projectDir: tempDir, inputMode: 'disabled', - anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret anthropicModel: 'claude-sonnet-4-6', skipLlm: false, }, io.io, { - env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret healthCheck: vi.fn(async () => ({ ok: false as const, message: '401 invalid x-api-key [redacted]' })), }, ); @@ -536,7 +841,7 @@ describe('setup Anthropic model step', () => { io.io, { prompts, - env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret listModels: vi.fn(async () => [ { id: 'claude-haiku-3-5', label: 'Claude Haiku 3.5', recommended: false }, { id: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', recommended: true }, @@ -547,7 +852,7 @@ describe('setup Anthropic model step', () => { expect(result.status).toBe('ready'); expect(healthCheck).toHaveBeenCalledTimes(2); - expect(prompts.select).toHaveBeenCalledTimes(4); + expect(prompts.select).toHaveBeenCalledTimes(5); expect(io.stderr()).toContain('Anthropic model health check failed: model not found'); expect(io.stderr()).toContain('Choose a different credential source or model, or Back.'); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); @@ -583,7 +888,7 @@ describe('setup Anthropic model step', () => { it('returns from model selection Back to credential selection instead of exiting setup', async () => { const prompts = makePromptAdapter({ selectValues: ['paste', 'back', 'back'], - passwordValue: 'sk-ant-pasted', + passwordValue: 'sk-ant-pasted', // pragma: allowlist secret }); const result = await runKtxSetupAnthropicModelStep( @@ -599,7 +904,7 @@ describe('setup Anthropic model step', () => { expect(result.status).toBe('back'); expect(prompts.select).toHaveBeenNthCalledWith( - 3, + 4, expect.objectContaining({ message: expect.stringContaining('How should KTX find your Anthropic API key?'), }), @@ -635,7 +940,7 @@ describe('setup Anthropic model step', () => { const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); expect(config.llm.provider).toMatchObject({ backend: 'anthropic', - anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret }); }); @@ -653,7 +958,7 @@ describe('setup Anthropic model step', () => { ' provider:', ' backend: anthropic', ' anthropic:', - ' api_key: env:ANTHROPIC_API_KEY', + ' api_key: env:ANTHROPIC_API_KEY', // pragma: allowlist secret ' models:', ' default: claude-sonnet-4-6', 'ingest:', @@ -669,7 +974,7 @@ describe('setup Anthropic model step', () => { const healthCheck = vi.fn(async () => ({ ok: true as const })); await expect( runKtxSetupAnthropicModelStep({ projectDir: tempDir, inputMode: 'disabled', skipLlm: false }, makeIo().io, { - env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, + env: { ANTHROPIC_API_KEY: 'sk-ant-test' }, // pragma: allowlist secret healthCheck, }), ).resolves.toMatchObject({ status: 'ready' }); diff --git a/packages/cli/src/setup-models.ts b/packages/cli/src/setup-models.ts index 221dbd14..37ebdeec 100644 --- a/packages/cli/src/setup-models.ts +++ b/packages/cli/src/setup-models.ts @@ -1,4 +1,6 @@ +import { execFile, spawn } from 'node:child_process'; import { writeFile } from 'node:fs/promises'; +import { promisify } from 'node:util'; import { cancel, isCancel, password, select, text } from '@clack/prompts'; import { resolveLocalKtxLlmConfig } from '@ktx/context'; import { resolveKtxConfigReference } from '@ktx/context/core'; @@ -18,9 +20,12 @@ import { envCredentialReference, writeProjectLocalSecretReference } from './setu export interface KtxSetupModelArgs { projectDir: string; inputMode: 'auto' | 'disabled'; + llmBackend?: KtxSetupLlmBackend; anthropicApiKeyEnv?: string; anthropicApiKeyFile?: string; anthropicModel?: string; + vertexProject?: string; + vertexLocation?: string; forcePrompt?: boolean; showPromptInstructions?: boolean; skipLlm: boolean; @@ -39,6 +44,8 @@ export interface AnthropicModelChoice { recommended: boolean; } +export type KtxSetupLlmBackend = 'anthropic' | 'vertex'; + export interface KtxSetupModelPromptAdapter { select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; text(options: { message: string; placeholder?: string }): Promise; @@ -52,6 +59,9 @@ export interface KtxSetupModelDeps { prompts?: KtxSetupModelPromptAdapter; listModels?: (apiKey: string) => Promise; healthCheck?: (config: KtxLlmConfig) => Promise; + runGcloudAuth?: (io: KtxCliIo) => Promise; + readGcloudProject?: () => Promise; + listGcloudProjects?: () => Promise; } export const BUNDLED_ANTHROPIC_MODEL_REGISTRY_VERSION = '2026-05-07'; @@ -78,6 +88,16 @@ const ANTHROPIC_MODEL_PROMPT_CONTEXT = 'KTX uses this as the default model for ingest agents that turn schemas, SQL, BI metadata, and docs ' + 'into semantic-layer sources and wiki context.'; +const VERTEX_AUTH_PROMPT_CONTEXT = + 'KTX can use Google Cloud Application Default Credentials for local Vertex AI access. This opens the normal ' + + 'gcloud browser login flow and does not store Google credentials in ktx.yaml.'; +const VERTEX_PROJECT_PROMPT_CONTEXT = + 'KTX stores the selected Google Cloud project ID in ktx.yaml and uses Application Default Credentials for ' + + 'access. Project visibility depends on the signed-in Google account and organization permissions.'; +const DEFAULT_VERTEX_LOCATION = 'us-east5'; + +const execFileAsync = promisify(execFile); + type AnthropicModelDiscoveryErrorReason = 'authentication' | 'http' | 'empty-response'; export class AnthropicModelDiscoveryError extends Error { @@ -103,6 +123,27 @@ type ChooseModelResult = | { status: 'ready'; model: string } | { status: 'back' | 'missing-input' | 'invalid-credential' }; +type ChooseBackendResult = + | { status: 'ready'; backend: KtxSetupLlmBackend; prompted: boolean } + | { status: 'back' }; + +type VertexConfigChoice = + | { + status: 'ready'; + refs: { project?: string; location: string }; + values: { project?: string; location: string }; + } + | { status: 'back' | 'missing-input' }; + +type VertexAuthChoice = { status: 'ready' } | { status: 'back' | 'missing-input' }; + +export type GcloudAuthResult = { ok: true } | { ok: false; message: string }; +interface GcloudProjectChoice { + projectId: string; + name?: string; +} +type GcloudCommandRunner = (args: string[], io: KtxCliIo) => Promise; + function createPromptAdapter(): KtxSetupModelPromptAdapter { return { async select(options) { @@ -131,6 +172,122 @@ function createPromptAdapter(): KtxSetupModelPromptAdapter { }; } +function createIndentedCommandIo(io: KtxCliIo): KtxCliIo { + const indentedWriter = (write: (chunk: string) => void) => { + let atLineStart = true; + return (chunk: string) => { + for (const char of chunk) { + if (atLineStart) { + write('│ '); + atLineStart = false; + } + write(char); + if (char === '\n') { + atLineStart = true; + } + } + }; + }; + + return { + stdout: { + isTTY: io.stdout.isTTY, + columns: io.stdout.columns, + write: indentedWriter((chunk) => io.stdout.write(chunk)), + }, + stderr: { + write: indentedWriter((chunk) => io.stderr.write(chunk)), + }, + }; +} + +function runInteractiveGcloud(args: string[], io: KtxCliIo): Promise { + return new Promise((resolve) => { + let settled = false; + const child = spawn('gcloud', args, { stdio: ['inherit', 'pipe', 'pipe'] }); + child.stdout?.on('data', (chunk: Buffer) => { + io.stdout.write(chunk.toString('utf8')); + }); + child.stderr?.on('data', (chunk: Buffer) => { + io.stderr.write(chunk.toString('utf8')); + }); + child.on('error', (error: NodeJS.ErrnoException) => { + if (settled) { + return; + } + settled = true; + if (error.code === 'ENOENT') { + resolve({ ok: false, message: 'gcloud CLI was not found on PATH.' }); + return; + } + resolve({ ok: false, message: error.message }); + }); + child.on('close', (code, signal) => { + if (settled) { + return; + } + settled = true; + if (code === 0) { + resolve({ ok: true }); + return; + } + resolve({ + ok: false, + message: signal ? `gcloud exited after signal ${signal}.` : `gcloud exited with code ${code ?? 'unknown'}.`, + }); + }); + }); +} + +export async function runKtxSetupGcloudApplicationDefaultAuth( + io: KtxCliIo, + runGcloud: GcloudCommandRunner = runInteractiveGcloud, +): Promise { + io.stdout.write('│ Running gcloud auth application-default login...\n'); + return await runGcloud(['auth', 'application-default', 'login'], createIndentedCommandIo(io)); +} + +async function defaultReadGcloudProject(): Promise { + try { + const { stdout } = await execFileAsync('gcloud', ['config', 'get-value', 'project'], { encoding: 'utf8' }); + const value = stdout.trim(); + return value && value !== '(unset)' ? value : undefined; + } catch { + return undefined; + } +} + +async function defaultListGcloudProjects(): Promise { + try { + const { stdout } = await execFileAsync('gcloud', ['projects', 'list', '--format=json(projectId,name)'], { + encoding: 'utf8', + }); + const parsed = JSON.parse(stdout.trim() || '[]') as unknown; + if (!Array.isArray(parsed)) { + return []; + } + + return parsed + .map((item): GcloudProjectChoice | undefined => { + if (!item || typeof item !== 'object') { + return undefined; + } + const record = item as { projectId?: unknown; name?: unknown }; + if (typeof record.projectId !== 'string' || !record.projectId.trim()) { + return undefined; + } + const name = typeof record.name === 'string' && record.name.trim() ? record.name.trim() : undefined; + return { + projectId: record.projectId.trim(), + ...(name ? { name } : {}), + }; + }) + .filter((project): project is GcloudProjectChoice => Boolean(project)); + } catch { + return []; + } +} + export async function fetchAnthropicModels( apiKey: string, fetchFn: typeof fetch = fetch, @@ -195,20 +352,33 @@ function hasUsableConfiguredLlm(config: KtxProjectConfig): boolean { function buildProjectLlmConfig( existing: KtxProjectLlmConfig, - credentialRef: string, + provider: + | { backend: 'anthropic'; credentialRef: string } + | { backend: 'vertex'; vertex: { project?: string; location: string } }, model: string, ): KtxProjectLlmConfig { + if (provider.backend === 'vertex') { + return { + provider: { + backend: 'vertex', + vertex: provider.vertex, + }, + models: { ...existing.models, default: model }, + promptCaching: { ...(existing.promptCaching ?? {}), enabled: true, vertexFallbackTo5m: true }, + }; + } + return { provider: { backend: 'anthropic', - anthropic: { api_key: credentialRef }, + anthropic: { api_key: provider.credentialRef }, }, models: { ...existing.models, default: model }, promptCaching: { ...(existing.promptCaching ?? {}), enabled: true }, }; } -function buildHealthConfig(credentialValue: string, model: string): KtxLlmConfig { +function buildAnthropicHealthConfig(credentialValue: string, model: string): KtxLlmConfig { return { backend: 'anthropic', anthropic: { apiKey: credentialValue }, @@ -217,6 +387,28 @@ function buildHealthConfig(credentialValue: string, model: string): KtxLlmConfig }; } +function buildVertexHealthConfig(vertex: { project?: string; location: string }, model: string): KtxLlmConfig { + return { + backend: 'vertex', + vertex, + modelSlots: { default: model }, + promptCaching: { enabled: true, vertexFallbackTo5m: true }, + }; +} + +function formatVertexHealthFailure(message: string, vertex: { project?: string; location: string }): string { + const trimmed = message.trim() || 'unknown error'; + if (!/(forbidden|permission|permission_denied|403)/i.test(trimmed)) { + return trimmed; + } + + return ( + `${trimmed}. Check that Vertex AI API is enabled for project ${vertex.project ?? '(unknown)'}, ` + + `Anthropic Claude model access is enabled for location ${vertex.location}, and that your Application Default ` + + 'Credentials principal has Vertex AI User (roles/aiplatform.user) or equivalent permissions.' + ); +} + async function chooseCredentialRef( args: KtxSetupModelArgs, io: KtxCliIo, @@ -298,6 +490,266 @@ async function chooseCredentialRef( } } +function requestedBackend(args: KtxSetupModelArgs): KtxSetupLlmBackend | undefined { + if (args.llmBackend) { + return args.llmBackend; + } + if (args.vertexProject || args.vertexLocation) { + return 'vertex'; + } + if (args.anthropicApiKeyEnv || args.anthropicApiKeyFile || args.anthropicModel) { + return 'anthropic'; + } + return undefined; +} + +async function chooseBackend( + args: KtxSetupModelArgs, + io: KtxCliIo, + deps: KtxSetupModelDeps, +): Promise { + const explicit = requestedBackend(args); + if (explicit) { + return { status: 'ready', backend: explicit, prompted: false }; + } + if (args.inputMode === 'disabled') { + return { status: 'ready', backend: 'anthropic', prompted: false }; + } + + const prompts = deps.prompts ?? createPromptAdapter(); + if (args.showPromptInstructions !== false) { + io.stdout.write( + '│ Use Up/Down to move, Enter to confirm the current selection, choose Back to return to the previous step, Ctrl+C to exit.\n', + ); + } + const choice = await prompts.select({ + message: 'Which LLM provider should KTX use?', + options: [ + { value: 'anthropic', label: 'Anthropic API' }, + { value: 'vertex', label: 'Google Vertex AI for Anthropic Claude' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') { + return { status: 'back' }; + } + return { status: 'ready', backend: choice === 'vertex' ? 'vertex' : 'anthropic', prompted: true }; +} + +async function chooseVertexAuth( + args: KtxSetupModelArgs, + io: KtxCliIo, + deps: KtxSetupModelDeps, +): Promise { + if (args.inputMode === 'disabled' || args.vertexProject || args.vertexLocation) { + return { status: 'ready' }; + } + + const prompts = deps.prompts ?? createPromptAdapter(); + const choice = await prompts.select({ + message: `How should KTX authenticate with Google Vertex AI?\n\n${VERTEX_AUTH_PROMPT_CONTEXT}`, + options: [ + { value: 'gcloud', label: 'Run gcloud Application Default Credentials login' }, + { value: 'existing', label: 'Use existing gcloud/Application Default Credentials' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') { + return { status: 'back' }; + } + if (choice !== 'gcloud') { + return { status: 'ready' }; + } + + const result = await (deps.runGcloudAuth ?? runKtxSetupGcloudApplicationDefaultAuth)(io); + if (!result.ok) { + io.stderr.write(`gcloud authentication failed: ${result.message}\n`); + return { status: 'missing-input' }; + } + return { status: 'ready' }; +} + +function resolveProvidedVertexRef( + label: 'project' | 'location', + ref: string, + env: NodeJS.ProcessEnv, + io: KtxCliIo, +): { status: 'ready'; ref: string; value: string } | { status: 'missing-input' } { + let value: string | undefined; + try { + value = resolveKtxConfigReference(ref, env); + } catch { + value = undefined; + } + if (!value) { + io.stderr.write(`Missing Vertex AI ${label}: ${ref} could not be resolved.\n`); + return { status: 'missing-input' }; + } + return { status: 'ready', ref, value }; +} + +function normalizeGcloudProjectId(projectId: string | undefined): string | undefined { + const trimmed = projectId?.trim(); + return trimmed ? trimmed : undefined; +} + +function orderGcloudProjects(projects: GcloudProjectChoice[], currentProject: string | undefined): GcloudProjectChoice[] { + const ordered: GcloudProjectChoice[] = []; + const seen = new Set(); + const addProject = (project: GcloudProjectChoice) => { + const projectId = normalizeGcloudProjectId(project.projectId); + if (!projectId || seen.has(projectId)) { + return; + } + seen.add(projectId); + const name = normalizeGcloudProjectId(project.name); + ordered.push({ + projectId, + ...(name ? { name } : {}), + }); + }; + + if (currentProject) { + addProject(projects.find((project) => project.projectId.trim() === currentProject) ?? { projectId: currentProject }); + } + for (const project of projects) { + addProject(project); + } + return ordered; +} + +function formatGcloudProjectLabel(project: GcloudProjectChoice, currentProject: string | undefined): string { + const name = project.name && project.name !== project.projectId ? ` - ${project.name}` : ''; + const current = project.projectId === currentProject ? ' (current gcloud project)' : ''; + return `${project.projectId}${name}${current}`; +} + +async function chooseInteractiveVertexProject( + currentProject: string | undefined, + io: KtxCliIo, + deps: KtxSetupModelDeps, +): Promise<{ status: 'ready'; ref: string; value: string } | { status: 'back' | 'missing-input' }> { + const prompts = deps.prompts ?? createPromptAdapter(); + let projects: GcloudProjectChoice[] = []; + try { + projects = await (deps.listGcloudProjects ?? defaultListGcloudProjects)(); + } catch { + io.stderr.write('Could not list Google Cloud projects with gcloud. Enter a project ID manually or choose Back.\n'); + } + + const orderedProjects = orderGcloudProjects(projects, currentProject); + if (orderedProjects.length === 0) { + io.stdout.write('│ gcloud did not return any visible Google Cloud projects. Enter a project ID manually or choose Back.\n'); + } + + const choice = await prompts.select({ + message: `Which Google Cloud project should KTX use for Vertex AI?\n\n${VERTEX_PROJECT_PROMPT_CONTEXT}`, + options: [ + ...orderedProjects.map((project) => ({ + value: project.projectId, + label: formatGcloudProjectLabel(project, currentProject), + })), + { value: 'manual', label: 'Enter a project ID manually' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') { + return { status: 'back' }; + } + if (choice === 'manual') { + const manual = await prompts.text({ + message: withTextInputNavigation('Google Cloud project ID'), + placeholder: currentProject ?? orderedProjects[0]?.projectId, + }); + if (manual === undefined) { + return { status: 'back' }; + } + const project = normalizeGcloudProjectId(manual); + return project ? { status: 'ready', ref: project, value: project } : { status: 'missing-input' }; + } + + return { status: 'ready', ref: choice, value: choice }; +} + +async function chooseVertexConfig( + args: KtxSetupModelArgs, + io: KtxCliIo, + deps: KtxSetupModelDeps, +): Promise { + const env = deps.env ?? process.env; + let projectRef: string | undefined; + let projectValue: string | undefined; + let gcloudProject: string | undefined; + + if (args.vertexProject) { + const project = resolveProvidedVertexRef('project', args.vertexProject, env, io); + if (project.status !== 'ready') { + return { status: project.status }; + } + projectRef = project.ref; + projectValue = project.value; + } else if (env.GOOGLE_VERTEX_PROJECT?.trim()) { + projectRef = envCredentialReference('GOOGLE_VERTEX_PROJECT'); + projectValue = env.GOOGLE_VERTEX_PROJECT.trim(); + } else { + gcloudProject = normalizeGcloudProjectId(await (deps.readGcloudProject ?? defaultReadGcloudProject)()); + if (args.inputMode === 'disabled') { + if (gcloudProject) { + projectRef = gcloudProject; + projectValue = gcloudProject; + } + } else { + const project = await chooseInteractiveVertexProject(gcloudProject, io, deps); + if (project.status !== 'ready') { + return { status: project.status }; + } + projectRef = project.ref; + projectValue = project.value; + } + } + + let locationRef: string | undefined; + let locationValue: string | undefined; + if (args.vertexLocation) { + const location = resolveProvidedVertexRef('location', args.vertexLocation, env, io); + if (location.status !== 'ready') { + return { status: location.status }; + } + locationRef = location.ref; + locationValue = location.value; + } else if (env.GOOGLE_VERTEX_LOCATION?.trim()) { + locationRef = envCredentialReference('GOOGLE_VERTEX_LOCATION'); + locationValue = env.GOOGLE_VERTEX_LOCATION.trim(); + } else { + locationRef = DEFAULT_VERTEX_LOCATION; + locationValue = DEFAULT_VERTEX_LOCATION; + } + + if (!projectRef || !projectValue) { + io.stderr.write( + 'Missing Vertex AI project: run `gcloud config set project PROJECT_ID`, pass --vertex-project, or set GOOGLE_VERTEX_PROJECT.\n', + ); + return { status: 'missing-input' }; + } + + if (!locationRef || !locationValue) { + io.stderr.write('Missing Vertex AI location: pass --vertex-location.\n'); + return { status: 'missing-input' }; + } + + return { + status: 'ready', + refs: { + ...(projectRef ? { project: projectRef } : {}), + location: locationRef, + }, + values: { + ...(projectValue ? { project: projectValue } : {}), + location: locationValue, + }, + }; +} + async function chooseModel( args: KtxSetupModelArgs, credentialValue: string, @@ -359,28 +811,73 @@ async function chooseModel( return { status: 'ready', model: choice }; } -async function persistLlmConfig(projectDir: string, credentialRef: string, model: string): Promise { +async function chooseVertexModel(args: KtxSetupModelArgs, io: KtxCliIo, deps: KtxSetupModelDeps): Promise { + if (args.anthropicModel) { + return { status: 'ready', model: args.anthropicModel }; + } + if (args.inputMode === 'disabled') { + io.stderr.write('Missing Anthropic model: pass --anthropic-model.\n'); + return { status: 'missing-input' }; + } + + const selectableModels = BUNDLED_ANTHROPIC_MODELS.filter(isSelectableAnthropicModel); + const prompts = deps.prompts ?? createPromptAdapter(); + const choice = await prompts.select({ + message: `Which Anthropic model should KTX use?\n\n${ANTHROPIC_MODEL_PROMPT_CONTEXT}`, + options: [ + ...selectableModels.map((model) => ({ + value: model.id, + label: `${model.label || model.id}${model.recommended ? ' (recommended)' : ''}`, + })), + { value: 'manual', label: 'Enter a model ID manually' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') { + return { status: 'back' }; + } + if (choice === 'manual') { + const manual = await prompts.text({ + message: withTextInputNavigation('Anthropic model ID'), + placeholder: selectableModels.find((model) => model.recommended)?.id ?? selectableModels[0]?.id, + }); + if (manual === undefined) { + return { status: 'back' }; + } + return manual.trim() ? { status: 'ready', model: manual.trim() } : { status: 'missing-input' }; + } + return { status: 'ready', model: choice }; +} + +async function persistLlmConfig( + projectDir: string, + provider: + | { backend: 'anthropic'; credentialRef: string } + | { backend: 'vertex'; vertex: { project?: string; location: string } }, + model: string, +): Promise { const project = await loadKtxProject({ projectDir }); const config = { ...project.config, - llm: buildProjectLlmConfig(project.config.llm, credentialRef, model), + llm: buildProjectLlmConfig(project.config.llm, provider, model), scan: { ...project.config.scan, enrichment: { - ...project.config.scan.enrichment, - mode: 'llm' as const, - }, + ...project.config.scan.enrichment, + mode: 'llm' as const, }, - }; + }, + }; await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); await markKtxSetupStateStepComplete(projectDir, 'llm'); } -function buildInteractiveRetryArgs(args: KtxSetupModelArgs): KtxSetupModelArgs { +function buildInteractiveRetryArgs(args: KtxSetupModelArgs, backend?: KtxSetupLlmBackend): KtxSetupModelArgs { return { projectDir: args.projectDir, inputMode: args.inputMode, - ...(args.showPromptInstructions !== undefined ? { showPromptInstructions: args.showPromptInstructions } : {}), + ...(backend ?? args.llmBackend ? { llmBackend: backend ?? args.llmBackend } : {}), + showPromptInstructions: false, skipLlm: args.skipLlm, }; } @@ -399,9 +896,12 @@ export async function runKtxSetupAnthropicModelStep( if ( args.forcePrompt !== true && hasUsableConfiguredLlm(project.config) && + !args.llmBackend && !args.anthropicApiKeyEnv && !args.anthropicApiKeyFile && - !args.anthropicModel + !args.anthropicModel && + !args.vertexProject && + !args.vertexLocation ) { io.stdout.write(`│ LLM ready: yes (${project.config.llm.models.default})\n`); return { status: 'ready', projectDir: args.projectDir }; @@ -411,31 +911,91 @@ export async function runKtxSetupAnthropicModelStep( let attemptArgs = args; while (true) { - const credential = await chooseCredentialRef(attemptArgs, io, deps); + const backendChoice = await chooseBackend(attemptArgs, io, deps); + if (backendChoice.status !== 'ready') { + return { status: backendChoice.status, projectDir: args.projectDir }; + } + + const backendArgs = backendChoice.prompted + ? ({ ...attemptArgs, llmBackend: backendChoice.backend, showPromptInstructions: false } satisfies KtxSetupModelArgs) + : attemptArgs; + + if (backendChoice.backend === 'vertex') { + const auth = await chooseVertexAuth(backendArgs, io, deps); + if (auth.status === 'back' && backendChoice.prompted) { + attemptArgs = buildInteractiveRetryArgs(args); + continue; + } + if (auth.status !== 'ready') { + return { status: auth.status, projectDir: args.projectDir }; + } + + const vertex = await chooseVertexConfig(backendArgs, io, deps); + if (vertex.status === 'back' && backendChoice.prompted) { + attemptArgs = buildInteractiveRetryArgs(args); + continue; + } + if (vertex.status !== 'ready') { + return { status: vertex.status, projectDir: args.projectDir }; + } + + const model = await chooseVertexModel(backendArgs, io, deps); + if (model.status === 'back' && !backendArgs.vertexLocation) { + attemptArgs = buildInteractiveRetryArgs(args, backendChoice.backend); + continue; + } + if (model.status === 'invalid-credential') { + return { status: 'failed', projectDir: args.projectDir }; + } + if (model.status !== 'ready') { + return { status: model.status, projectDir: args.projectDir }; + } + + const health = await healthCheck(buildVertexHealthConfig(vertex.values, model.model)); + if (health.ok) { + await persistLlmConfig(args.projectDir, { backend: 'vertex', vertex: vertex.refs }, model.model); + io.stdout.write(`│ LLM ready: yes (${model.model})\n`); + return { status: 'ready', projectDir: args.projectDir }; + } + + io.stderr.write(`Vertex AI Anthropic model health check failed: ${formatVertexHealthFailure(health.message, vertex.values)}\n`); + if (args.inputMode === 'disabled') { + return { status: 'failed', projectDir: args.projectDir }; + } + io.stderr.write('Choose a different Vertex AI project, location, or model, or Back.\n'); + attemptArgs = buildInteractiveRetryArgs(args, backendChoice.backend); + continue; + } + + const credential = await chooseCredentialRef(backendArgs, io, deps); + if (credential.status === 'back' && backendChoice.prompted) { + attemptArgs = buildInteractiveRetryArgs(args); + continue; + } if (credential.status !== 'ready') { return { status: credential.status, projectDir: args.projectDir }; } - const model = await chooseModel(attemptArgs, credential.value, io, deps); + const model = await chooseModel(backendArgs, credential.value, io, deps); if (model.status === 'invalid-credential') { if (args.inputMode === 'disabled') { return { status: 'failed', projectDir: args.projectDir }; } io.stderr.write('Choose a different credential source or Back.\n'); - attemptArgs = buildInteractiveRetryArgs(args); + attemptArgs = buildInteractiveRetryArgs(args, backendChoice.backend); continue; } - if (model.status === 'back' && !attemptArgs.anthropicApiKeyEnv && !attemptArgs.anthropicApiKeyFile) { - attemptArgs = buildInteractiveRetryArgs(args); + if (model.status === 'back' && !backendArgs.anthropicApiKeyEnv && !backendArgs.anthropicApiKeyFile) { + attemptArgs = buildInteractiveRetryArgs(args, backendChoice.backend); continue; } if (model.status !== 'ready') { return { status: model.status, projectDir: args.projectDir }; } - const health = await healthCheck(buildHealthConfig(credential.value, model.model)); + const health = await healthCheck(buildAnthropicHealthConfig(credential.value, model.model)); if (health.ok) { - await persistLlmConfig(args.projectDir, credential.ref, model.model); + await persistLlmConfig(args.projectDir, { backend: 'anthropic', credentialRef: credential.ref }, model.model); io.stdout.write(`│ LLM ready: yes (${model.model})\n`); return { status: 'ready', projectDir: args.projectDir }; } @@ -445,6 +1005,6 @@ export async function runKtxSetupAnthropicModelStep( return { status: 'failed', projectDir: args.projectDir }; } io.stderr.write('Choose a different credential source or model, or Back.\n'); - attemptArgs = buildInteractiveRetryArgs(args); + attemptArgs = buildInteractiveRetryArgs(args, backendChoice.backend); } } diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index 0cad3ebc..dd134fce 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -73,7 +73,7 @@ describe('setup status', () => { ' provider:', ' backend: anthropic', ' anthropic:', - ' api_key: env:ANTHROPIC_API_KEY', + ' api_key: env:ANTHROPIC_API_KEY', // pragma: allowlist secret ' models:', ' default: claude-sonnet-4-6', 'ingest:', @@ -144,7 +144,7 @@ describe('setup status', () => { ' model: text-embedding-3-small', ' dimensions: 1536', ' openai:', - ' api_key: env:OPENAI_API_KEY', + ' api_key: env:OPENAI_API_KEY', // pragma: allowlist secret ].join('\n'), 'utf-8', ); @@ -908,7 +908,7 @@ describe('setup status', () => { inputMode: 'disabled', yes: false, cliVersion: '0.2.0', - anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret anthropicModel: 'claude-sonnet-4-6', skipLlm: false, skipEmbeddings: true, @@ -925,7 +925,51 @@ describe('setup status', () => { expect.objectContaining({ projectDir: tempDir, inputMode: 'disabled', - anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + }), + testIo.io, + ); + }); + + it('passes Vertex AI model setup args after project selection succeeds', async () => { + const testIo = makeIo(); + const model = vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir })); + + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'new', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: false, + cliVersion: '0.2.0', + llmBackend: 'vertex', + vertexProject: 'local-gcp-project', + vertexLocation: 'us-east5', + anthropicModel: 'claude-sonnet-4-6', + skipLlm: false, + skipEmbeddings: true, + databaseSchemas: [], + skipDatabases: true, + skipSources: true, + }, + testIo.io, + { model }, + ), + ).resolves.toBe(0); + + expect(model).toHaveBeenCalledWith( + expect.objectContaining({ + projectDir: tempDir, + inputMode: 'disabled', + llmBackend: 'vertex', + vertexProject: 'local-gcp-project', + vertexLocation: 'us-east5', anthropicModel: 'claude-sonnet-4-6', skipLlm: false, }), @@ -949,11 +993,11 @@ describe('setup status', () => { inputMode: 'disabled', yes: true, cliVersion: '0.2.0', - anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret anthropicModel: 'claude-sonnet-4-6', skipLlm: false, embeddingBackend: 'openai', - embeddingApiKeyEnv: 'OPENAI_API_KEY', + embeddingApiKeyEnv: 'OPENAI_API_KEY', // pragma: allowlist secret skipEmbeddings: false, databaseSchemas: [], skipDatabases: true, @@ -971,7 +1015,7 @@ describe('setup status', () => { cliVersion: '0.2.0', runtimeInstallPolicy: 'auto', embeddingBackend: 'openai', - embeddingApiKeyEnv: 'OPENAI_API_KEY', + embeddingApiKeyEnv: 'OPENAI_API_KEY', // pragma: allowlist secret skipEmbeddings: false, }), testIo.io, @@ -1169,11 +1213,11 @@ describe('setup status', () => { inputMode: 'disabled', yes: false, cliVersion: '0.2.0', - anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret anthropicModel: 'claude-sonnet-4-6', skipLlm: false, embeddingBackend: 'openai', - embeddingApiKeyEnv: 'OPENAI_API_KEY', + embeddingApiKeyEnv: 'OPENAI_API_KEY', // pragma: allowlist secret skipEmbeddings: false, databaseDrivers: ['postgres'], databaseConnectionId: 'warehouse', @@ -2020,7 +2064,7 @@ describe('setup status', () => { inputMode: 'disabled', yes: false, cliVersion: '0.2.0', - anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', + anthropicApiKeyEnv: 'ANTHROPIC_API_KEY', // pragma: allowlist secret anthropicModel: 'claude-sonnet-4-6', skipLlm: false, skipEmbeddings: false, diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 064da729..1ab48f0b 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -24,7 +24,12 @@ import { runKtxSetupDatabasesStep, } from './setup-databases.js'; import { type KtxSetupEmbeddingsDeps, runKtxSetupEmbeddingsStep } from './setup-embeddings.js'; -import { type KtxSetupModelDeps, isKtxSetupLlmConfigReady, runKtxSetupAnthropicModelStep } from './setup-models.js'; +import { + type KtxSetupLlmBackend, + type KtxSetupModelDeps, + isKtxSetupLlmConfigReady, + runKtxSetupAnthropicModelStep, +} from './setup-models.js'; import { type KtxSetupProjectDeps, runKtxSetupProjectStep } from './setup-project.js'; import { isKtxPreAgentSetupReady, @@ -65,9 +70,12 @@ export type KtxSetupArgs = inputMode: 'auto' | 'disabled'; yes: boolean; cliVersion: string; + llmBackend?: KtxSetupLlmBackend; anthropicApiKeyEnv?: string; anthropicApiKeyFile?: string; anthropicModel?: string; + vertexProject?: string; + vertexLocation?: string; skipLlm: boolean; embeddingBackend?: 'openai' | 'sentence-transformers'; embeddingApiKeyEnv?: string; @@ -578,9 +586,12 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup { projectDir: projectResult.projectDir, inputMode: args.inputMode, + ...(args.llmBackend ? { llmBackend: args.llmBackend } : {}), ...(args.anthropicApiKeyEnv ? { anthropicApiKeyEnv: args.anthropicApiKeyEnv } : {}), ...(args.anthropicApiKeyFile ? { anthropicApiKeyFile: args.anthropicApiKeyFile } : {}), ...(args.anthropicModel ? { anthropicModel: args.anthropicModel } : {}), + ...(args.vertexProject ? { vertexProject: args.vertexProject } : {}), + ...(args.vertexLocation ? { vertexLocation: args.vertexLocation } : {}), forcePrompt: forcePromptSteps.has('models') || runOnly === 'models', showPromptInstructions, skipLlm: args.skipLlm || !shouldRunModels, diff --git a/packages/context/src/llm/local-config.test.ts b/packages/context/src/llm/local-config.test.ts index ffb00b36..5d114e12 100644 --- a/packages/context/src/llm/local-config.test.ts +++ b/packages/context/src/llm/local-config.test.ts @@ -37,6 +37,52 @@ describe('local KTX LLM config', () => { }); }); + it('resolves Vertex AI env references into a KtxLlmConfig', () => { + const config: KtxProjectLlmConfig = { + provider: { + backend: 'vertex', + vertex: { project: 'env:GOOGLE_VERTEX_PROJECT', location: 'env:GOOGLE_VERTEX_LOCATION' }, + }, + models: { default: 'env:KTX_MODEL' }, + promptCaching: { enabled: true, vertexFallbackTo5m: true }, + }; + + expect( + resolveLocalKtxLlmConfig(config, { + GOOGLE_VERTEX_PROJECT: 'local-gcp-project', + GOOGLE_VERTEX_LOCATION: 'us-east5', + KTX_MODEL: 'claude-sonnet-4-6', + }), + ).toEqual({ + backend: 'vertex', + vertex: { project: 'local-gcp-project', location: 'us-east5' }, + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: true, vertexFallbackTo5m: true }, + }); + }); + + it('ignores inactive Vertex AI references for non-Vertex backends', () => { + const config: KtxProjectLlmConfig = { + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + vertex: { location: 'env:MISSING_VERTEX_LOCATION' }, + }, + models: { default: 'claude-sonnet-4-6' }, + }; + + expect( + resolveLocalKtxLlmConfig(config, { + ANTHROPIC_API_KEY: 'sk-ant-test', // pragma: allowlist secret + }), + ).toEqual({ + backend: 'anthropic', + anthropic: { apiKey: 'sk-ant-test' }, // pragma: allowlist secret + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: undefined, + }); + }); + it('returns null when the local LLM backend is disabled', () => { expect( createLocalKtxLlmProviderFromConfig({ diff --git a/packages/context/src/llm/local-config.ts b/packages/context/src/llm/local-config.ts index 76f1905f..2709c4b7 100644 --- a/packages/context/src/llm/local-config.ts +++ b/packages/context/src/llm/local-config.ts @@ -67,16 +67,33 @@ function resolvedProviderConfig( }; } +function resolvedVertexConfig( + config: { project?: string; location?: string } | undefined, + env: NodeJS.ProcessEnv, +): { project?: string; location: string } | undefined { + if (!config) { + return undefined; + } + + const project = resolveOptional(config.project, env); + const location = resolveRequired(config.location, env, 'llm.provider.vertex.location is required'); + return { + ...(project ? { project } : {}), + location, + }; +} + export function resolveLocalKtxLlmConfig(config: KtxProjectLlmConfig, env: NodeJS.ProcessEnv): KtxLlmConfig | null { if (config.provider.backend === 'none') { return null; } const modelSlots = resolveModelSlots(config.models, env); + const vertex = config.provider.backend === 'vertex' ? resolvedVertexConfig(config.provider.vertex, env) : undefined; const anthropic = resolvedProviderConfig(config.provider.anthropic, env); const gateway = resolvedProviderConfig(config.provider.gateway, env); return { backend: config.provider.backend, - ...(config.provider.vertex ? { vertex: config.provider.vertex } : {}), + ...(vertex ? { vertex } : {}), ...(anthropic ? { anthropic } : {}), ...(gateway ? { gateway } : {}), modelSlots, From e1e9c4af9155915b87b5703d0927ce78f6d727ae Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 15:04:50 +0200 Subject: [PATCH 09/15] fix(cli): clean up connection commands (#62) * fix(cli): clean up connection commands * test(cli): update connection smoke coverage * Fix setup output formatting * fix notion setup picker exit --- .../docs/cli-reference/ktx-connection.mdx | 135 +- .../docs/getting-started/quickstart.mdx | 2 +- packages/cli/src/cli-program.ts | 3 - packages/cli/src/cli-runtime.ts | 4 - packages/cli/src/command-schemas.ts | 25 - .../cli/src/commands/connection-commands.ts | 308 +---- .../src/commands/connection-mapping.test.ts | 345 ----- .../cli/src/commands/connection-mapping.ts | 507 -------- .../commands/connection-metabase-commands.ts | 132 -- .../connection-metabase-setup.test.ts | 1118 ----------------- .../src/commands/connection-metabase-setup.ts | 798 ------------ .../commands/connection-notion-commands.ts | 92 -- .../src/commands/connection-notion.test.ts | 513 -------- packages/cli/src/connection.test.ts | 541 +------- packages/cli/src/connection.ts | 335 +---- packages/cli/src/index.test.ts | 386 +----- ...est.ts => notion-page-picker-tree.test.ts} | 2 +- ...ion-tree.ts => notion-page-picker-tree.ts} | 0 ...st.tsx => notion-page-picker-tui.test.tsx} | 6 +- ...ion-tui.tsx => notion-page-picker-tui.tsx} | 6 +- packages/cli/src/notion-page-picker.test.ts | 308 +++++ ...ection-notion.ts => notion-page-picker.ts} | 200 ++- packages/cli/src/print-command-tree.test.ts | 8 +- packages/cli/src/setup-databases.test.ts | 42 + packages/cli/src/setup-databases.ts | 55 +- packages/cli/src/setup-interrupt.test.ts | 22 + packages/cli/src/setup-interrupt.ts | 7 + packages/cli/src/setup-sources.test.ts | 128 +- packages/cli/src/setup-sources.ts | 161 ++- packages/cli/src/source-mapping.ts | 225 ++++ packages/cli/src/standalone-smoke.test.ts | 20 +- .../src/ingest/memory-flow/known-errors.ts | 2 +- .../src/ingest/memory-flow/summary.test.ts | 2 +- 33 files changed, 1096 insertions(+), 5342 deletions(-) delete mode 100644 packages/cli/src/commands/connection-mapping.test.ts delete mode 100644 packages/cli/src/commands/connection-mapping.ts delete mode 100644 packages/cli/src/commands/connection-metabase-commands.ts delete mode 100644 packages/cli/src/commands/connection-metabase-setup.test.ts delete mode 100644 packages/cli/src/commands/connection-metabase-setup.ts delete mode 100644 packages/cli/src/commands/connection-notion-commands.ts delete mode 100644 packages/cli/src/commands/connection-notion.test.ts rename packages/cli/src/{commands/connection-notion-tree.test.ts => notion-page-picker-tree.test.ts} (99%) rename packages/cli/src/{commands/connection-notion-tree.ts => notion-page-picker-tree.ts} (100%) rename packages/cli/src/{commands/connection-notion-tui.test.tsx => notion-page-picker-tui.test.tsx} (98%) rename packages/cli/src/{commands/connection-notion-tui.tsx => notion-page-picker-tui.tsx} (98%) create mode 100644 packages/cli/src/notion-page-picker.test.ts rename packages/cli/src/{commands/connection-notion.ts => notion-page-picker.ts} (51%) create mode 100644 packages/cli/src/source-mapping.ts diff --git a/docs-site/content/docs/cli-reference/ktx-connection.mdx b/docs-site/content/docs/cli-reference/ktx-connection.mdx index 31a79736..0cec3eae 100644 --- a/docs-site/content/docs/cli-reference/ktx-connection.mdx +++ b/docs-site/content/docs/cli-reference/ktx-connection.mdx @@ -1,9 +1,11 @@ --- title: "ktx connection" -description: "Add, list, test, and map data sources." +description: "List and test configured data sources." --- -Manage database and source connections in your KTX project. Connections define how KTX reaches your data warehouse, BI tools, and context sources. +Inspect configured connections in your KTX project. Connections define how KTX +reaches your data warehouse, BI tools, and context sources. Use `ktx setup` to +add, remove, or reconfigure connections. ## Command signature @@ -17,96 +19,23 @@ ktx connection [options] |-----------|-------------| | `list` | List configured connections | | `test ` | Test a configured connection | -| `add ` | Add or replace a configured connection | -| `remove ` | Remove a configured connection from `ktx.yaml` | -| `map ` | Refresh and validate BI-to-warehouse mappings | -| `mapping list ` | List Metabase database mappings | -| `mapping set ` | Set a Metabase or Looker warehouse mapping | -| `mapping apply-bulk ` | Apply mappings from JSON | -| `mapping set-sync-enabled ` | Enable or disable sync for one Metabase database | -| `mapping sync-state get ` | Read sync-state selection | -| `mapping sync-state set ` | Write sync-state selection | -| `mapping refresh ` | Refresh Metabase database mappings | -| `mapping validate ` | Validate Metabase database mappings | -| `mapping clear [dbId]` | Clear Metabase database mappings | -| `metabase setup` | Guided setup for a Metabase connection | -| `notion pick ` | Pick Notion root pages for a configured Notion connection | ## Options -### `connection add` +The `connection` command has command-level options for listing and testing +existing connections. -| Flag | Description | Default | -|------|-------------|---------| -| `--url ` | Connection URL, `env:NAME`, or `file:/path` reference | — | -| `--schema ` | Schema to include; repeatable | — | -| `--readonly` | Mark the connection as read-only | `false` | -| `--force` | Replace an existing connection | `false` | -| `--allow-literal-credentials` | Allow writing a literal credential URL to `ktx.yaml` | `false` | - -#### Notion-specific options for `connection add` - -| Flag | Description | Default | -|------|-------------|---------| -| `--token-env ` | Environment variable containing Notion auth token | — | -| `--token-file ` | File containing Notion auth token | — | -| `--crawl-mode ` | Notion crawl mode (`all_accessible` or `selected_roots`) | `selected_roots` | -| `--root-page-id ` | Root page to crawl; repeatable | — | -| `--root-database-id ` | Root database to crawl; repeatable | — | -| `--root-data-source-id ` | Root data source to crawl; repeatable | — | -| `--max-pages ` | Maximum pages per run | — | -| `--max-knowledge-creates ` | Maximum knowledge creates per run | — | -| `--max-knowledge-updates ` | Maximum knowledge updates per run | — | - -### `connection remove` - -| Flag | Description | Default | -|------|-------------|---------| -| `--force` | Remove without prompting | `false` | -| `--no-input` | Disable interactive terminal input | — | - -### `connection map` +### `connection list` | Flag | Description | Default | |------|-------------|---------| | `--json` | Print JSON output | `false` | -### `connection mapping` subcommands - -| Flag | Subcommand | Description | Default | -|------|-----------|-------------|---------| -| `--json` | `list`, `sync-state get` | Print JSON output | `false` | -| `--file ` | `apply-bulk` | JSON mapping file (required) | — | -| `--enabled ` | `set-sync-enabled` | `true` or `false` (required) | — | -| `--mode ` | `sync-state set` | `ALL`, `ONLY`, or `EXCEPT` (required) | — | -| `--collections ` | `sync-state set` | Comma-separated collection ids | — | -| `--items ` | `sync-state set` | Comma-separated item ids | — | -| `--tag-names ` | `sync-state set` | Comma-separated tag names | — | -| `--auto-accept` | `refresh` | Accept refresh changes without prompting | `false` | - -### `connection metabase setup` +### `connection test` | Flag | Description | Default | |------|-------------|---------| -| `--id ` | KTX connection id to write | — | -| `--url ` | Metabase API URL | — | -| `--api-key ` | Metabase API key | — | -| `--mint-api-key` | Mint a Metabase API key with credentials | `false` | -| `--username ` | Metabase admin username for API-key minting | — | -| `--password ` | Metabase admin password for API-key minting | — | -| `--map ` | Assign a Metabase database id to a warehouse connection; repeatable | — | -| `--sync ` | Enable sync for a discovered database; repeatable | — | -| `--sync-mode ` | Metabase sync selection mode (`ALL`, `ONLY`, or `EXCEPT`) | `ALL` | -| `--run-ingest` | Run ingest after setup | `false` | -| `--yes` | Confirm and apply setup changes without prompting | `false` | -| `--no-input` | Disable interactive terminal input | — | - -### `connection notion pick` - -| Flag | Description | Default | -|------|-------------|---------| -| `--no-input` | Disable interactive terminal input | — | -| `--root-page-id ` | Root page UUID to crawl; repeatable (required with `--no-input`) | — | +| `--json` | Print JSON output | `false` | ## Examples @@ -114,43 +43,20 @@ ktx connection [options] # List all configured connections ktx connection list -# Add a Postgres connection using an environment variable -ktx connection add postgres my-warehouse --url "env:DATABASE_URL" - -# Add a Postgres connection with specific schemas -ktx connection add postgres analytics --url "env:PG_URL" --schema public --schema analytics - -# Add a read-only Snowflake connection -ktx connection add snowflake sf-prod --url "env:SNOWFLAKE_URL" --readonly - # Test a connection ktx connection test my-warehouse - -# Remove a connection -ktx connection remove old-warehouse - -# Add a Notion source connection -ktx connection add notion my-notion \ - --token-env NOTION_TOKEN \ - --crawl-mode selected_roots \ - --root-page-id abc123def456... - -# Run guided Metabase setup -ktx connection metabase setup --url https://metabase.example.com - -# Map a BI database to a warehouse connection -ktx connection mapping set metabase-prod databaseMappings 1=my-warehouse - -# Refresh Metabase mappings -ktx connection mapping refresh metabase-prod --auto-accept - -# Pick Notion root pages interactively -ktx connection notion pick my-notion ``` +## Setup-managed connections + +Run `ktx setup` when you need to add or reconfigure a connection. Interactive +setup includes the rich Notion page picker for selected root pages and the +Metabase mapping prompts for BI-to-warehouse mappings. + ## Output -Interactive commands render prompts and status text. Commands with `--json` return machine-readable JSON suitable for scripts and agents. +Commands with `--json` return machine-readable JSON suitable for scripts and +agents. ```json { @@ -168,7 +74,6 @@ Interactive commands render prompts and status text. Commands with `--json` retu | Error | Cause | Recovery | |-------|-------|----------| -| Connection test fails | Credentials, network access, database, warehouse, or schema is invalid | Verify the same URL with the database's native client, then rerun `ktx connection add ... --force` | -| Literal credentials rejected | KTX avoids writing raw secrets to `ktx.yaml` by default | Use `env:NAME` or `file:/path/to/secret`; use `--allow-literal-credentials` only for local throwaway projects | -| Mapping validation fails | BI database mappings do not point at valid warehouse connections | Run `ktx connection mapping refresh --auto-accept`, then set invalid mappings explicitly | -| Notion pick cannot run non-interactively | `--no-input` was used without root page or database ids | Pass `--root-page-id`, `--root-database-id`, or `--root-data-source-id` with `--no-input` | +| Connection test fails | Credentials, network access, database, warehouse, or schema is invalid | Verify the same URL with the database's native client, then rerun `ktx setup` and reconfigure the connection | +| Mapping validation fails during setup | BI database mappings do not point at valid warehouse connections | Rerun `ktx setup` and update the source mapping selections | +| Notion page picker cannot run | The terminal is non-interactive or Notion discovery failed | Rerun interactive `ktx setup`, or use non-interactive setup flags with explicit root page ids | diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index 6aef2b14..d71a0754 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -240,7 +240,7 @@ Agent integration ready: yes (claude-code:project) | LLM health check fails | Missing, invalid, or unauthorized Anthropic API key | Export `ANTHROPIC_API_KEY` or rerun `ktx setup` and choose the file-backed secret option | | OpenAI embedding check fails | `OPENAI_API_KEY` is missing when OpenAI embeddings are selected | Export `OPENAI_API_KEY`, or rerun setup and choose local sentence-transformers embeddings | | Local embeddings hang or fail | The managed Python runtime cannot start or the local model runtime is unavailable | Install `uv`, run `ktx dev runtime status`, then run `ktx dev runtime install --feature local-embeddings --yes` and rerun setup | -| Database connection test fails | Credentials, network access, warehouse, database, or schema value is wrong | Test the same URL with the database's native client, then rerun `ktx connection add ... --force` or rerun setup | +| Database connection test fails | Credentials, network access, warehouse, database, or schema value is wrong | Test the same URL with the database's native client, then rerun `ktx setup` and reconfigure the connection | | `KTX context built: no` in `ktx status` | Setup saved configuration but did not build context | Run `ktx setup` and choose to build context now | | Agent integration is incomplete | Setup skipped the agents step or the target was not installed | Run `ktx setup --agents --target codex --project` using the target you need | diff --git a/packages/cli/src/cli-program.ts b/packages/cli/src/cli-program.ts index 7d6a98f3..dbe73a72 100644 --- a/packages/cli/src/cli-program.ts +++ b/packages/cli/src/cli-program.ts @@ -178,9 +178,6 @@ function shouldSuppressProjectDirLine(path: string[], options: Record= 0) { const demoCommand = path[demoIndex + 1]; diff --git a/packages/cli/src/cli-runtime.ts b/packages/cli/src/cli-runtime.ts index f303309a..5e2430cf 100644 --- a/packages/cli/src/cli-runtime.ts +++ b/packages/cli/src/cli-runtime.ts @@ -1,7 +1,5 @@ import { createRequire } from 'node:module'; -import type { KtxConnectionMetabaseSetupArgs } from './commands/connection-metabase-setup.js'; -import type { KtxConnectionNotionArgs } from './commands/connection-notion.js'; import type { KtxConnectionArgs } from './connection.js'; import type { KtxDoctorArgs } from './doctor.js'; import type { KtxIngestArgs } from './ingest.js'; @@ -30,8 +28,6 @@ export interface KtxCliIo { export interface KtxCliDeps { setup?: (args: KtxSetupArgs, io: KtxCliIo) => Promise; connection?: (args: KtxConnectionArgs, io: KtxCliIo) => Promise; - connectionNotion?: (args: KtxConnectionNotionArgs, io: KtxCliIo) => Promise; - connectionMetabaseSetup?: (args: KtxConnectionMetabaseSetupArgs, io: KtxCliIo) => Promise; doctor?: (args: KtxDoctorArgs, io: KtxCliIo) => Promise; ingest?: (args: KtxIngestArgs, io: KtxCliIo) => Promise; runtime?: (args: KtxRuntimeArgs, io: KtxCliIo) => Promise; diff --git a/packages/cli/src/command-schemas.ts b/packages/cli/src/command-schemas.ts index cb11f2eb..5caece1f 100644 --- a/packages/cli/src/command-schemas.ts +++ b/packages/cli/src/command-schemas.ts @@ -1,33 +1,8 @@ import { z } from 'zod'; const projectDirSchema = z.string().min(1); -const safeConnectionIdSchema = z.string().regex(/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/, 'Unsafe connection id'); const stringArraySchema = z.array(z.string()); -export const connectionAddCommandSchema = z.object({ - command: z.literal('add'), - projectDir: projectDirSchema, - driver: z.string().min(1), - connectionId: safeConnectionIdSchema, - url: z.string().optional(), - schemas: stringArraySchema, - readonly: z.boolean(), - force: z.boolean(), - allowLiteralCredentials: z.boolean(), - notion: z - .object({ - authTokenRef: z.string().min(1), - crawlMode: z.enum(['all_accessible', 'selected_roots']), - rootPageIds: stringArraySchema, - rootDatabaseIds: stringArraySchema, - rootDataSourceIds: stringArraySchema, - maxPagesPerRun: z.number().int().positive().optional(), - maxKnowledgeCreatesPerRun: z.number().int().nonnegative().optional(), - maxKnowledgeUpdatesPerRun: z.number().int().nonnegative().optional(), - }) - .optional(), -}); - export const wikiWriteCommandSchema = z.object({ command: z.literal('write'), projectDir: projectDirSchema, diff --git a/packages/cli/src/commands/connection-commands.ts b/packages/cli/src/commands/connection-commands.ts index 4ce75057..d814ffe9 100644 --- a/packages/cli/src/commands/connection-commands.ts +++ b/packages/cli/src/commands/connection-commands.ts @@ -1,61 +1,19 @@ -import { type Command, InvalidArgumentError, Option } from '@commander-js/extra-typings'; -import { - collectOption, - type KtxCliCommandContext, - parseBooleanStringOption, - parseNonEmptyAssignmentOption, - parseNonNegativeIntegerOption, - parsePositiveIntegerOption, - parseSafeConnectionIdOption, - resolveCommandProjectDir, -} from '../cli-program.js'; -import { connectionAddCommandSchema } from '../command-schemas.js'; +import { type Command } from '@commander-js/extra-typings'; +import { type KtxCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; import type { KtxConnectionArgs } from '../connection.js'; import { profileMark } from '../startup-profile.js'; -import type { KtxConnectionMappingArgs } from './connection-mapping.js'; -import { registerConnectionMetabaseCommands } from './connection-metabase-commands.js'; -import { registerConnectionNotionCommands } from './connection-notion-commands.js'; profileMark('module:commands/connection-commands'); -const CRAWL_MODE_CHOICES = ['all_accessible', 'selected_roots'] as const; -const SYNC_MODE_CHOICES = ['ALL', 'ONLY', 'EXCEPT'] as const; - -function parseCsvIds(value: string): number[] { - return value - .split(',') - .filter(Boolean) - .map((item) => parsePositiveIntegerOption(item)); -} - -function parseCsvStrings(value: string): string[] { - return value - .split(',') - .map((item) => item.trim()) - .filter(Boolean); -} - -function parseMappingFieldOption(value: string): 'databaseMappings' | 'connectionMappings' { - if (value === 'databaseMappings' || value === 'connectionMappings') { - return value; - } - throw new InvalidArgumentError('must be databaseMappings or connectionMappings'); -} - async function runConnectionArgs(context: KtxCliCommandContext, args: KtxConnectionArgs): Promise { const runner = context.deps.connection ?? (await import('../connection.js')).runKtxConnection; context.setExitCode(await runner(args, context.io)); } -async function runMappingArgs(context: KtxCliCommandContext, args: KtxConnectionMappingArgs): Promise { - const { runKtxConnectionMapping } = await import('./connection-mapping.js'); - context.setExitCode(await runKtxConnectionMapping(args, context.io)); -} - export function registerConnectionCommands(program: Command, context: KtxCliCommandContext, commandName = 'connection'): void { const connection = program .command(commandName) - .description('Add, list, test, and map data sources') + .description('List and test configured connections') .showHelpAfterError() .addHelpText( 'after', @@ -83,264 +41,4 @@ export function registerConnectionCommands(program: Command, context: KtxCliComm connectionId, }); }); - - connection - .command('add') - .description('Add or replace a configured connection') - .argument('', 'Connection driver') - .argument('', 'KTX connection id') - .option('--url ', 'Connection URL, env:NAME, or file:/path reference') - .option('--schema ', 'Schema to include; repeatable', collectOption, []) - .option('--readonly', 'Mark the connection as read-only', false) - .option('--force', 'Replace an existing connection', false) - .option('--allow-literal-credentials', 'Allow writing a literal credential URL to ktx.yaml', false) - .addOption(new Option('--token-env ', 'Environment variable containing Notion auth token').conflicts('tokenFile')) - .addOption(new Option('--token-file ', 'File containing Notion auth token').conflicts('tokenEnv')) - .addOption( - new Option('--crawl-mode ', 'Notion crawl mode: all_accessible or selected_roots') - .choices(CRAWL_MODE_CHOICES) - .default('selected_roots'), - ) - .option('--root-page-id ', 'Root page to crawl; repeatable', collectOption, []) - .option('--root-database-id ', 'Root database to crawl; repeatable', collectOption, []) - .option('--root-data-source-id ', 'Root data source to crawl; repeatable', collectOption, []) - .option('--max-pages ', 'Maximum pages per run', parsePositiveIntegerOption) - .option('--max-knowledge-creates ', 'Maximum knowledge creates per run', parseNonNegativeIntegerOption) - .option('--max-knowledge-updates ', 'Maximum knowledge updates per run', parseNonNegativeIntegerOption) - .action(async (driver: string, connectionId: string, options, command) => { - const notion = - driver === 'notion' - ? { - authTokenRef: options.tokenEnv - ? `env:${options.tokenEnv}` - : options.tokenFile - ? `file:${options.tokenFile}` - : '', - crawlMode: options.crawlMode, - rootPageIds: options.rootPageId, - rootDatabaseIds: options.rootDatabaseId, - rootDataSourceIds: options.rootDataSourceId, - maxPagesPerRun: options.maxPages, - maxKnowledgeCreatesPerRun: options.maxKnowledgeCreates, - maxKnowledgeUpdatesPerRun: options.maxKnowledgeUpdates, - } - : undefined; - - if (driver === 'notion' && !notion?.authTokenRef) { - throw new Error('connection add notion requires --token-env NAME or --token-file PATH'); - } - if ( - driver === 'notion' && - notion?.crawlMode === 'selected_roots' && - notion.rootPageIds.length + notion.rootDatabaseIds.length + notion.rootDataSourceIds.length === 0 - ) { - throw new Error('connection add notion selected_roots requires at least one root id'); - } - - const args = connectionAddCommandSchema.parse({ - command: 'add', - projectDir: resolveCommandProjectDir(command), - driver, - connectionId, - url: options.url, - schemas: options.schema.filter(Boolean), - readonly: options.readonly === true, - force: options.force === true, - allowLiteralCredentials: options.allowLiteralCredentials === true, - notion, - }); - - await runConnectionArgs(context, args); - }); - - connection - .command('remove') - .description('Remove a configured connection from ktx.yaml') - .argument('', 'KTX connection id') - .option('--force', 'Remove without prompting', false) - .option('--no-input', 'Disable interactive terminal input') - .action(async (connectionId: string, options: { force?: boolean; input?: boolean }, command) => { - await runConnectionArgs(context, { - command: 'remove', - projectDir: resolveCommandProjectDir(command), - connectionId, - force: options.force === true, - ...(options.input === false ? { inputMode: 'disabled' } : {}), - }); - }); - - connection - .command('map') - .description('Refresh and validate BI-to-warehouse mappings') - .argument('', 'Source BI connection id') - .option('--json', 'Print JSON output', false) - .action(async (sourceConnectionId: string, options: { json?: boolean }, command) => { - await runConnectionArgs(context, { - command: 'map', - projectDir: resolveCommandProjectDir(command), - sourceConnectionId, - json: options.json === true, - }); - }); - - registerConnectionMappingCommands(connection, context); - registerConnectionMetabaseCommands(connection, context); - registerConnectionNotionCommands(connection, context); -} - -function registerConnectionMappingCommands(connection: Command, context: KtxCliCommandContext): void { - const mapping = connection - .command('mapping') - .description('Manage Metabase warehouse mappings') - .showHelpAfterError() - .addHelpText( - 'after', - '\nProject directory defaults to KTX_PROJECT_DIR when set, otherwise the current working directory.\n', - ); - - mapping - .command('list') - .description('List Metabase database mappings') - .argument('', 'Metabase connection id') - .option('--json', 'Print JSON output where supported', false) - .action(async (connectionId: string, options: { json?: boolean }, command) => { - await runMappingArgs(context, { - command: 'list', - projectDir: resolveCommandProjectDir(command), - connectionId, - json: options.json === true, - }); - }); - - mapping - .command('set') - .description('Set a Metabase or Looker warehouse mapping') - .argument('', 'Source connection id', parseSafeConnectionIdOption) - .argument('', 'Mapping field', parseMappingFieldOption) - .argument('', 'Mapping assignment such as 1=prod-warehouse', parseNonEmptyAssignmentOption) - .action( - async ( - connectionId: string, - field: 'databaseMappings' | 'connectionMappings', - assignment: { key: string; value: string }, - _options: unknown, - command, - ) => { - await runMappingArgs(context, { - command: 'set', - projectDir: resolveCommandProjectDir(command), - connectionId, - field, - key: assignment.key, - value: assignment.value, - }); - }, - ); - - mapping - .command('apply-bulk') - .description('Apply mappings from JSON') - .argument('', 'Metabase connection id') - .requiredOption('--file ', 'JSON mapping file') - .action(async (connectionId: string, options: { file: string }, command) => { - await runMappingArgs(context, { - command: 'apply-bulk', - projectDir: resolveCommandProjectDir(command), - connectionId, - filePath: options.file, - }); - }); - - mapping - .command('set-sync-enabled') - .description('Enable or disable sync for one Metabase database') - .argument('', 'Metabase connection id') - .argument('', 'Metabase database id', parsePositiveIntegerOption) - .requiredOption('--enabled ', 'true or false', parseBooleanStringOption) - .action( - async (connectionId: string, metabaseDatabaseId: number, options: { enabled: boolean }, command) => { - await runMappingArgs(context, { - command: 'set-sync-enabled', - projectDir: resolveCommandProjectDir(command), - connectionId, - metabaseDatabaseId, - enabled: options.enabled, - }); - }, - ); - - const syncState = mapping.command('sync-state').description('Manage Metabase sync-state selection'); - syncState - .command('get') - .description('Read sync-state selection') - .argument('', 'Metabase connection id') - .option('--json', 'Print JSON output where supported', false) - .action(async (connectionId: string, options: { json?: boolean }, command) => { - await runMappingArgs(context, { - command: 'sync-state-get', - projectDir: resolveCommandProjectDir(command), - connectionId, - json: options.json === true, - }); - }); - - syncState - .command('set') - .description('Write sync-state selection') - .argument('', 'Metabase connection id') - .addOption(new Option('--mode ', 'ALL, ONLY, or EXCEPT').choices(SYNC_MODE_CHOICES).makeOptionMandatory()) - .option('--collections ', 'Comma-separated collection ids', parseCsvIds, []) - .option('--items ', 'Comma-separated item ids', parseCsvIds, []) - .option('--tag-names ', 'Comma-separated tag names', parseCsvStrings, []) - .action(async (connectionId: string, options, command) => { - await runMappingArgs(context, { - command: 'sync-state-set', - projectDir: resolveCommandProjectDir(command), - connectionId, - syncMode: options.mode, - collectionIds: options.collections, - itemIds: options.items, - tagNames: options.tagNames, - }); - }); - - mapping - .command('refresh') - .description('Refresh Metabase database mappings') - .argument('', 'Metabase connection id') - .option('--auto-accept', 'Accept refresh changes without prompting', false) - .action(async (connectionId: string, options: { autoAccept?: boolean }, command) => { - await runMappingArgs(context, { - command: 'refresh', - projectDir: resolveCommandProjectDir(command), - connectionId, - autoAccept: options.autoAccept === true, - }); - }); - - mapping - .command('validate') - .description('Validate Metabase database mappings') - .argument('', 'Metabase connection id') - .action(async (connectionId: string, _options: unknown, command) => { - await runMappingArgs(context, { - command: 'validate', - projectDir: resolveCommandProjectDir(command), - connectionId, - }); - }); - - mapping - .command('clear') - .description('Clear Metabase database mappings') - .argument('', 'Metabase connection id') - .argument('[metabaseDatabaseId]', 'Metabase database id', parsePositiveIntegerOption) - .action(async (connectionId: string, metabaseDatabaseId: number | undefined, _options: unknown, command) => { - await runMappingArgs(context, { - command: 'clear', - projectDir: resolveCommandProjectDir(command), - connectionId, - ...(metabaseDatabaseId ? { metabaseDatabaseId } : {}), - }); - }); } diff --git a/packages/cli/src/commands/connection-mapping.test.ts b/packages/cli/src/commands/connection-mapping.test.ts deleted file mode 100644 index 825c3c4c..00000000 --- a/packages/cli/src/commands/connection-mapping.test.ts +++ /dev/null @@ -1,345 +0,0 @@ -import { mkdtemp, readFile, rm } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { LocalMetabaseDiscoveryCache } from '@ktx/context/ingest'; -import { initKtxProject, loadKtxProject, parseKtxProjectConfig, serializeKtxProjectConfig } from '@ktx/context/project'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { runKtxConnectionMapping } from './connection-mapping.js'; - -function makeIo() { - let stdout = ''; - let stderr = ''; - return { - io: { - stdout: { - write: (chunk: string) => { - stdout += chunk; - }, - }, - stderr: { - write: (chunk: string) => { - stderr += chunk; - }, - }, - }, - stdout: () => stdout, - stderr: () => stderr, - }; -} - -describe('runKtxConnectionMapping', () => { - let tempDir: string; - let projectDir: string; - - beforeEach(async () => { - tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-metabase-mapping-')); - projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'mapping' }); - const project = await loadKtxProject({ projectDir }); - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig({ - ...project.config, - connections: { - 'prod-metabase': { - driver: 'metabase', - api_url: 'https://metabase.example.com', - api_key_ref: 'env:METABASE_API_KEY', // pragma: allowlist secret - }, - 'prod-warehouse': { - driver: 'postgres', - url: 'env:WAREHOUSE_URL', - readonly: true, - }, - }, - }), - 'ktx', - 'ktx@example.com', - 'Seed Metabase mapping test connections', - ); - }); - - async function replaceConnections(connections: Record) { - const project = await loadKtxProject({ projectDir }); - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig({ - ...project.config, - connections, - }), - 'ktx', - 'ktx@example.com', - 'Replace mapping test connections', - ); - } - - afterEach(async () => { - await rm(tempDir, { recursive: true, force: true }); - }); - - it('sets, lists, disables, and clears local Metabase mappings', async () => { - const io = makeIo(); - const setCode = await runKtxConnectionMapping( - { - command: 'set', - projectDir, - connectionId: 'prod-metabase', - field: 'databaseMappings', - key: '1', - value: 'prod-warehouse', - }, - io.io, - ); - expect(setCode, io.stderr()).toBe(0); - - let config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); - expect(config.connections['prod-metabase']?.mappings).toMatchObject({ - databaseMappings: { '1': 'prod-warehouse' }, - syncEnabled: { '1': true }, - }); - - const listIo = makeIo(); - await expect( - runKtxConnectionMapping({ command: 'list', projectDir, connectionId: 'prod-metabase', json: false }, listIo.io), - ).resolves.toBe(0); - expect(listIo.stdout()).toContain('1 -> prod-warehouse'); - expect(listIo.stdout()).toContain('unhydrated'); - - await expect( - runKtxConnectionMapping( - { - command: 'set-sync-enabled', - projectDir, - connectionId: 'prod-metabase', - metabaseDatabaseId: 1, - enabled: false, - }, - makeIo().io, - ), - ).resolves.toBe(0); - - config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); - expect(config.connections['prod-metabase']?.mappings).toMatchObject({ - databaseMappings: { '1': 'prod-warehouse' }, - syncEnabled: { '1': false }, - }); - - await expect( - runKtxConnectionMapping( - { - command: 'clear', - projectDir, - connectionId: 'prod-metabase', - metabaseDatabaseId: 1, - }, - makeIo().io, - ), - ).resolves.toBe(0); - - config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); - expect(config.connections['prod-metabase']?.mappings).toBeUndefined(); - }); - - it('lists Metabase yaml mapping bootstrap rows before any SQLite command writes', async () => { - const projectDir = await mkdtemp(join(tmpdir(), 'ktx-cli-yaml-mapping-')); - await initKtxProject({ projectDir, projectName: 'yaml-mapping' }); - const project = await loadKtxProject({ projectDir }); - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig({ - ...project.config, - connections: { - 'prod-metabase': { - driver: 'metabase', - mappings: { - databaseMappings: { '1': 'prod-warehouse' }, - syncEnabled: { '1': true }, - }, - }, - 'prod-warehouse': { driver: 'postgres', url: 'postgresql://readonly@db.test/analytics' }, - }, - }), - 'ktx', - 'ktx@example.com', - 'Seed yaml mappings', - ); - const io = makeIo(); - - await expect( - runKtxConnectionMapping( - { command: 'list', projectDir, connectionId: 'prod-metabase', json: false }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('1 -> prod-warehouse'); - expect(io.stdout()).toContain('source: ktx.yaml'); - }); - - it('refreshes Metabase discovery metadata through the injected runtime client', async () => { - const client = { - getDatabases: vi.fn().mockResolvedValue([ - { - id: 1, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - ]), - cleanup: vi.fn(), - }; - const io = makeIo(); - - await expect( - runKtxConnectionMapping( - { - command: 'refresh', - projectDir, - connectionId: 'prod-metabase', - autoAccept: true, - }, - io.io, - { - createMetabaseClient: async () => client as never, - }, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Discovery: 1 database'); - expect(client.cleanup).toHaveBeenCalledTimes(1); - const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); - expect(config.connections['prod-metabase']?.mappings).toBeUndefined(); - const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: join(projectDir, '.ktx', 'db.sqlite') }); - await expect(discoveryCache.listDiscoveredDatabases('prod-metabase')).resolves.toMatchObject([ - { id: 1, name: 'Analytics', engine: 'postgres' }, - ]); - }); - - it('sets and lists Looker connection mappings', async () => { - await replaceConnections({ - 'prod-looker': { - driver: 'looker', - base_url: 'https://looker.example.test', - client_id: 'id', - }, - 'prod-warehouse': { - driver: 'postgres', - url: 'postgresql://readonly@db.example.test/analytics', - }, - }); - const io = makeIo(); - - await expect( - runKtxConnectionMapping( - { - command: 'set', - projectDir, - connectionId: 'prod-looker', - field: 'connectionMappings', - key: 'analytics', - value: 'prod-warehouse', - }, - io.io, - ), - ).resolves.toBe(0); - await expect( - runKtxConnectionMapping({ command: 'list', projectDir, connectionId: 'prod-looker', json: false }, io.io), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('analytics -> prod-warehouse'); - }); - - it('keeps driver-specific mapping field validation in the runner', async () => { - await replaceConnections({ - 'prod-looker': { driver: 'looker', base_url: 'https://looker.example.com' }, - warehouse: { driver: 'postgres', url: 'env:WAREHOUSE_URL' }, - }); - - const io = makeIo(); - await expect( - runKtxConnectionMapping( - { - command: 'set', - projectDir, - connectionId: 'prod-looker', - field: 'databaseMappings', - key: '1', - value: 'warehouse', - }, - io.io, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('Looker mapping set requires connectionMappings'); - }); - - it('refreshes Looker mapping metadata and reports drift', async () => { - await replaceConnections({ - 'prod-looker': { - driver: 'looker', - base_url: 'https://looker.example.test', - client_id: 'id', - }, - 'prod-warehouse': { - driver: 'postgres', - url: 'postgresql://readonly@db.example.test/analytics', - }, - }); - const io = makeIo(); - - await expect( - runKtxConnectionMapping( - { command: 'refresh', projectDir, connectionId: 'prod-looker', autoAccept: true }, - io.io, - { - createLookerClient: async () => ({ - listLookerConnections: async () => [ - { - name: 'analytics', - host: 'db.example.test', - database: 'analytics', - schema: null, - dialect: 'postgres', - }, - ], - cleanup: async () => {}, - }), - }, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Discovery: 1 connection'); - expect(io.stdout()).toContain('Unmapped discovered: 1'); - }); - - it('validates Looker mappings through the canonical local warehouse descriptor', async () => { - const projectDir = await mkdtemp(join(tmpdir(), 'ktx-cli-descriptor-validation-')); - await initKtxProject({ projectDir, projectName: 'descriptor-validation' }); - const project = await loadKtxProject({ projectDir }); - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig({ - ...project.config, - connections: { - 'prod-looker': { - driver: 'looker', - mappings: { connectionMappings: { analytics: 'prod-warehouse' } }, - }, - 'prod-warehouse': { driver: 'postgresql', url: 'postgresql://readonly@db.test/analytics' }, - }, - }), - 'ktx', - 'ktx@example.com', - 'Seed descriptor validation', - ); - const io = makeIo(); - - await expect( - runKtxConnectionMapping({ command: 'validate', projectDir, connectionId: 'prod-looker' }, io.io), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Mapping validation passed: prod-looker'); - expect(io.stderr()).toBe(''); - }); -}); diff --git a/packages/cli/src/commands/connection-mapping.ts b/packages/cli/src/commands/connection-mapping.ts deleted file mode 100644 index 5bae8e6e..00000000 --- a/packages/cli/src/commands/connection-mapping.ts +++ /dev/null @@ -1,507 +0,0 @@ -import { readFile } from 'node:fs/promises'; -import { localConnectionToWarehouseDescriptor } from '@ktx/context/connections'; -import { - DEFAULT_METABASE_CLIENT_CONFIG, - DefaultLookerConnectionClientFactory, - DefaultMetabaseConnectionClientFactory, - KtxYamlMetabaseSourceStateReader, - LocalLookerRuntimeStore, - LocalMetabaseDiscoveryCache, - computeLookerMappingDrift, - computeMetabaseMappingDrift, - discoverLookerConnections, - discoverMetabaseDatabases, - lookerCredentialsFromLocalConnection, - metabaseRuntimeConfigFromLocalConnection, - seedLocalMappingStateFromKtxYaml, - validateLookerMappings, - validateMappingPhysicalMatch, - type LookerMappingClient, - type LocalMetabaseMappingListRow, - type MetabaseRuntimeClient, - type MetabaseSyncMode, -} from '@ktx/context/ingest'; -import { - type KtxLocalProject, - type KtxProjectConfig, - ktxLocalStateDbPath, - loadKtxProject, - parseMetabaseMappingBootstrap, - serializeKtxProjectConfig, -} from '@ktx/context/project'; -import type { KtxCliIo } from '../index.js'; -import { profileMark } from '../startup-profile.js'; - -profileMark('module:commands/connection-mapping'); - -export type KtxConnectionMappingArgs = - | { command: 'list'; projectDir: string; connectionId: string; json: boolean } - | { - command: 'set'; - projectDir: string; - connectionId: string; - field: 'databaseMappings' | 'connectionMappings'; - key: string; - value: string; - } - | { command: 'apply-bulk'; projectDir: string; connectionId: string; filePath: string } - | { - command: 'set-sync-enabled'; - projectDir: string; - connectionId: string; - metabaseDatabaseId: number; - enabled: boolean; - } - | { command: 'sync-state-get'; projectDir: string; connectionId: string; json: boolean } - | { - command: 'sync-state-set'; - projectDir: string; - connectionId: string; - syncMode: MetabaseSyncMode; - collectionIds: number[]; - itemIds: number[]; - tagNames: string[]; - } - | { command: 'refresh'; projectDir: string; connectionId: string; autoAccept: boolean } - | { command: 'validate'; projectDir: string; connectionId: string } - | { command: 'clear'; projectDir: string; connectionId: string; metabaseDatabaseId?: number; mappingKey?: string }; - -interface KtxConnectionMappingDeps { - createMetabaseClient?: ( - project: KtxLocalProject, - connectionId: string, - ) => Promise>; - createLookerClient?: ( - project: KtxLocalProject, - connectionId: string, - ) => Promise & { cleanup?(): Promise }>; -} - -interface MetabaseBulkMappingPayload { - databaseMappings?: Record; - syncEnabled?: Record; - syncMode?: MetabaseSyncMode; - selections?: { collections?: number[]; items?: number[] }; - defaultTagNames?: string[]; -} - -function parseId(value: string, label: string): number { - const parsed = Number(value); - if (!Number.isInteger(parsed) || parsed < 1) { - throw new Error(`${label} must be a positive integer`); - } - return parsed; -} - -interface MetabaseMappingsBlock { - databaseMappings: Record; - syncEnabled: Record; - syncMode: MetabaseSyncMode; - selections: { collections: number[]; items: number[] }; - defaultTagNames: string[]; -} - -function currentMetabaseMappings(project: KtxLocalProject, connectionId: string): MetabaseMappingsBlock { - const connection = project.config.connections[connectionId]; - if (!connection) { - throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`); - } - const bootstrap = parseMetabaseMappingBootstrap(connectionId, connection); - return { - databaseMappings: { ...bootstrap.databaseMappings }, - syncEnabled: { ...bootstrap.syncEnabled }, - syncMode: bootstrap.syncMode, - selections: { - collections: [...bootstrap.selections.collections], - items: [...bootstrap.selections.items], - }, - defaultTagNames: [...bootstrap.defaultTagNames], - }; -} - -function hasMetabaseMappings(block: MetabaseMappingsBlock): boolean { - return ( - Object.keys(block.databaseMappings).length > 0 || - Object.keys(block.syncEnabled).length > 0 || - block.syncMode !== 'ALL' || - block.selections.collections.length > 0 || - block.selections.items.length > 0 || - block.defaultTagNames.length > 0 - ); -} - -function serializeMetabaseMappingsBlock(block: MetabaseMappingsBlock): Record | undefined { - if (!hasMetabaseMappings(block)) { - return undefined; - } - return { - databaseMappings: block.databaseMappings, - syncEnabled: block.syncEnabled, - syncMode: block.syncMode, - selections: block.selections, - defaultTagNames: block.defaultTagNames, - }; -} - -async function writeMetabaseMappings( - project: KtxLocalProject, - connectionId: string, - block: MetabaseMappingsBlock, - message: string, -): Promise { - const connection = project.config.connections[connectionId]; - if (!connection) { - throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`); - } - const mappings = serializeMetabaseMappingsBlock(block); - const nextConnection = { ...connection }; - if (mappings) { - nextConnection.mappings = mappings; - } else { - delete nextConnection.mappings; - } - const nextConfig: KtxProjectConfig = { - ...project.config, - connections: { - ...project.config.connections, - [connectionId]: nextConnection, - }, - }; - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig(nextConfig), - 'ktx', - 'ktx@example.com', - message, - ); -} - -async function createDefaultMetabaseClient( - project: KtxLocalProject, - connectionId: string, -): Promise> { - const factory = new DefaultMetabaseConnectionClientFactory( - (metabaseConnectionId) => - metabaseRuntimeConfigFromLocalConnection(metabaseConnectionId, project.config.connections[metabaseConnectionId]), - DEFAULT_METABASE_CLIENT_CONFIG, - ); - return factory.createClient(connectionId); -} - -async function createDefaultLookerClient( - project: KtxLocalProject, - connectionId: string, -): Promise & { cleanup?(): Promise }> { - const factory = new DefaultLookerConnectionClientFactory({ - async resolve(lookerConnectionId) { - return lookerCredentialsFromLocalConnection(lookerConnectionId, project.config.connections[lookerConnectionId]); - }, - }); - return factory.createClient(connectionId) as unknown as Pick & { - cleanup?(): Promise; - }; -} - -function isLookerConnection(project: KtxLocalProject, connectionId: string): boolean { - return String(project.config.connections[connectionId]?.driver ?? '').toLowerCase() === 'looker'; -} - -function assertLookerConnection(project: KtxLocalProject, connectionId: string): void { - if (!isLookerConnection(project, connectionId)) { - throw new Error(`Connection "${connectionId}" is not a Looker connection`); - } -} - -function assertMetabaseConnection(project: KtxLocalProject, connectionId: string): void { - const connection = project.config.connections[connectionId]; - if (!connection || String(connection.driver).toLowerCase() !== 'metabase') { - throw new Error(`Connection "${connectionId}" is not a Metabase connection`); - } -} - -function assertTargetConnection(project: KtxLocalProject, connectionId: string): void { - if (!project.config.connections[connectionId]) { - throw new Error(`Target connection "${connectionId}" does not exist`); - } -} - -function targetPhysicalInfo(project: KtxLocalProject, connectionId: string) { - const descriptor = localConnectionToWarehouseDescriptor(connectionId, project.config.connections[connectionId]); - if (!descriptor) { - return { connection_type: 'UNKNOWN' }; - } - return { - connection_type: descriptor.connection_type, - host: descriptor.host ?? null, - database: descriptor.database ?? null, - account: descriptor.account ?? null, - project_id: descriptor.project_id ?? null, - dataset_id: descriptor.dataset_id ?? null, - ...descriptor.connection_params, - }; -} - -function renderMapping(row: LocalMetabaseMappingListRow): string { - const name = row.metabaseDatabaseName ?? 'unhydrated'; - const target = row.targetConnectionId ?? '[unmapped]'; - return `${row.metabaseDatabaseId} -> ${target} (${name}, sync: ${row.syncEnabled ? 'on' : 'off'}, source: ${ - row.source - })`; -} - -function renderLookerMapping(row: Awaited>[number]): string { - const target = row.ktxConnectionId ?? '[unmapped]'; - const metadata = [row.lookerDialect, row.lookerHost, row.lookerDatabase].filter(Boolean).join(', '); - return `${row.lookerConnectionName} -> ${target}${metadata ? ` (${metadata}, source: ${row.source})` : ` (source: ${row.source})`}`; -} - -export async function runKtxConnectionMapping( - args: KtxConnectionMappingArgs, - io: KtxCliIo = process, - deps: KtxConnectionMappingDeps = {}, -): Promise { - try { - const project = await loadKtxProject({ projectDir: args.projectDir }); - await seedLocalMappingStateFromKtxYaml(project, args.connectionId); - if (isLookerConnection(project, args.connectionId)) { - assertLookerConnection(project, args.connectionId); - const store = new LocalLookerRuntimeStore({ dbPath: ktxLocalStateDbPath(project) }); - - if (args.command === 'list') { - const rows = await store.listConnectionMappings(args.connectionId); - io.stdout.write(args.json ? `${JSON.stringify(rows, null, 2)}\n` : `${rows.map(renderLookerMapping).join('\n')}\n`); - return 0; - } - - if (args.command === 'set') { - if (args.field !== 'connectionMappings') { - throw new Error('Looker mapping set requires connectionMappings ='); - } - assertTargetConnection(project, args.value); - await store.upsertConnectionMapping({ - lookerConnectionId: args.connectionId, - lookerConnectionName: args.key, - ktxConnectionId: args.value, - source: 'cli', - }); - io.stdout.write(`Set connectionMappings.${args.key} = ${args.value}\n`); - return 0; - } - - if (args.command === 'refresh') { - const client = await (deps.createLookerClient ?? createDefaultLookerClient)(project, args.connectionId); - try { - const discovered = await discoverLookerConnections(client); - const drift = computeLookerMappingDrift({ - storedMappings: await store.readMappings(args.connectionId), - discovered, - }); - if (args.autoAccept) { - await store.refreshDiscoveredConnections({ lookerConnectionId: args.connectionId, discovered }); - } - io.stdout.write(`Discovery: ${discovered.length} ${discovered.length === 1 ? 'connection' : 'connections'}\n`); - io.stdout.write(`Unmapped discovered: ${drift.unmappedDiscovered.length}\n`); - io.stdout.write(`Stale mappings: ${drift.staleMappings.length}\n`); - return 0; - } finally { - await client.cleanup?.(); - } - } - - if (args.command === 'validate') { - const knownKtxConnectionIds = new Set(Object.keys(project.config.connections)); - const knownConnectionTypes = new Map( - Object.entries(project.config.connections).map(([id, _config]) => [id, targetPhysicalInfo(project, id).connection_type]), - ); - const validation = validateLookerMappings({ - mappings: await store.readMappings(args.connectionId), - knownKtxConnectionIds, - knownConnectionTypes, - }); - if (!validation.ok) { - for (const error of validation.errors) { - io.stderr.write(`${error.key}: ${error.reason}\n`); - } - return 1; - } - io.stdout.write(`Mapping validation passed: ${args.connectionId}\n`); - return 0; - } - - if (args.command === 'clear') { - await store.clearConnectionMappings({ - lookerConnectionId: args.connectionId, - lookerConnectionName: args.mappingKey ?? (args.metabaseDatabaseId ? String(args.metabaseDatabaseId) : undefined), - }); - io.stdout.write( - args.mappingKey - ? `Cleared connectionMappings.${args.mappingKey}\n` - : `Cleared mappings for ${args.connectionId}\n`, - ); - return 0; - } - - throw new Error(`Looker connection mapping does not support ${args.command}`); - } - - assertMetabaseConnection(project, args.connectionId); - const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(project) }); - const metabaseStateReader = new KtxYamlMetabaseSourceStateReader(project, { discoveryCache }); - - if (args.command === 'list') { - const rows = await metabaseStateReader.listDatabaseMappings(args.connectionId); - io.stdout.write(args.json ? `${JSON.stringify(rows, null, 2)}\n` : `${rows.map(renderMapping).join('\n')}\n`); - return 0; - } - - if (args.command === 'set') { - if (args.field !== 'databaseMappings') { - throw new Error('Metabase mapping set requires databaseMappings ='); - } - assertTargetConnection(project, args.value); - const block = currentMetabaseMappings(project, args.connectionId); - const metabaseDatabaseId = String(parseId(args.key, 'metabaseDatabaseId')); - block.databaseMappings[metabaseDatabaseId] = args.value; - block.syncEnabled[metabaseDatabaseId] = true; - await writeMetabaseMappings(project, args.connectionId, block, `Set Metabase mapping ${args.connectionId}.${metabaseDatabaseId}`); - io.stdout.write(`Set databaseMappings.${args.key} = ${args.value}\n`); - return 0; - } - - if (args.command === 'apply-bulk') { - const payload = JSON.parse(await readFile(args.filePath, 'utf8')) as MetabaseBulkMappingPayload; - const block = currentMetabaseMappings(project, args.connectionId); - const databaseMappings = payload.databaseMappings ?? {}; - for (const targetConnectionId of Object.values(databaseMappings)) { - if (targetConnectionId) { - assertTargetConnection(project, targetConnectionId); - } - } - for (const id of Object.keys(databaseMappings)) { - parseId(id, 'metabaseDatabaseId'); - block.databaseMappings[id] = databaseMappings[id] ?? null; - } - for (const [id, enabled] of Object.entries(payload.syncEnabled ?? {})) { - parseId(id, 'metabaseDatabaseId'); - block.syncEnabled[id] = enabled; - } - if (payload.syncMode !== undefined) { - block.syncMode = payload.syncMode; - } - if (payload.defaultTagNames !== undefined) { - block.defaultTagNames = payload.defaultTagNames; - } - if (payload.selections !== undefined) { - block.selections = { - collections: payload.selections.collections ?? [], - items: payload.selections.items ?? [], - }; - } - await writeMetabaseMappings(project, args.connectionId, block, `Apply Metabase mappings ${args.connectionId}`); - io.stdout.write(`Applied bulk mappings for ${args.connectionId}\n`); - return 0; - } - - if (args.command === 'set-sync-enabled') { - const block = currentMetabaseMappings(project, args.connectionId); - block.syncEnabled[String(args.metabaseDatabaseId)] = args.enabled; - await writeMetabaseMappings( - project, - args.connectionId, - block, - `Set Metabase sync ${args.connectionId}.${args.metabaseDatabaseId}`, - ); - io.stdout.write(`Set syncEnabled.${args.metabaseDatabaseId} = ${args.enabled}\n`); - return 0; - } - - if (args.command === 'sync-state-get') { - const state = await metabaseStateReader.getSourceState(args.connectionId); - const payload = { - syncMode: state.syncMode, - selections: state.selections, - defaultTagNames: state.defaultTagNames, - }; - io.stdout.write(args.json ? `${JSON.stringify(payload, null, 2)}\n` : `${payload.syncMode}\n`); - return 0; - } - - if (args.command === 'sync-state-set') { - const block = currentMetabaseMappings(project, args.connectionId); - block.syncMode = args.syncMode; - block.defaultTagNames = args.tagNames; - block.selections = { collections: args.collectionIds, items: args.itemIds }; - await writeMetabaseMappings(project, args.connectionId, block, `Set Metabase sync state ${args.connectionId}`); - io.stdout.write(`Set sync state for ${args.connectionId}\n`); - return 0; - } - - if (args.command === 'refresh') { - const client = await (deps.createMetabaseClient ?? createDefaultMetabaseClient)(project, args.connectionId); - try { - const discovered = await discoverMetabaseDatabases(client); - const block = currentMetabaseMappings(project, args.connectionId); - const existing = block.databaseMappings; - const drift = computeMetabaseMappingDrift({ currentMappings: existing, discovered }); - if (args.autoAccept) { - await discoveryCache.refreshDiscoveredDatabases({ connectionId: args.connectionId, discovered }); - } - io.stdout.write(`Discovery: ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}\n`); - io.stdout.write(`Unmapped discovered: ${drift.unmappedDiscovered.length}\n`); - io.stdout.write(`Stale mappings: ${drift.staleMappings.length}\n`); - return 0; - } finally { - await client.cleanup(); - } - } - - if (args.command === 'validate') { - const rows = (await metabaseStateReader.listDatabaseMappings(args.connectionId)).filter( - (row) => row.source === 'ktx.yaml', - ); - const failures = rows.flatMap((row) => { - if (!row.targetConnectionId) { - return []; - } - const reason = validateMappingPhysicalMatch( - { metabaseEngine: row.metabaseEngine, metabaseDbName: row.metabaseDbName, metabaseHost: row.metabaseHost }, - project.config.connections[row.targetConnectionId] - ? targetPhysicalInfo(project, row.targetConnectionId) - : { connection_type: 'UNKNOWN' }, - ); - return reason ? [`${row.metabaseDatabaseId}: ${reason}`] : []; - }); - if (failures.length > 0) { - for (const failure of failures) { - io.stderr.write(`${failure}\n`); - } - return 1; - } - io.stdout.write(`Mapping validation passed: ${args.connectionId}\n`); - return 0; - } - - const metabaseDatabaseId = args.metabaseDatabaseId ?? (args.mappingKey ? parseId(args.mappingKey, 'metabaseDatabaseId') : undefined); - const block = currentMetabaseMappings(project, args.connectionId); - if (metabaseDatabaseId === undefined) { - block.databaseMappings = {}; - block.syncEnabled = {}; - block.syncMode = 'ALL'; - block.selections = { collections: [], items: [] }; - block.defaultTagNames = []; - } else { - delete block.databaseMappings[String(metabaseDatabaseId)]; - delete block.syncEnabled[String(metabaseDatabaseId)]; - } - await writeMetabaseMappings(project, args.connectionId, block, `Clear Metabase mappings ${args.connectionId}`); - io.stdout.write( - metabaseDatabaseId - ? `Cleared databaseMappings.${metabaseDatabaseId}\n` - : `Cleared mappings for ${args.connectionId}\n`, - ); - return 0; - } catch (error) { - io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); - return 1; - } -} diff --git a/packages/cli/src/commands/connection-metabase-commands.ts b/packages/cli/src/commands/connection-metabase-commands.ts deleted file mode 100644 index c20b8e86..00000000 --- a/packages/cli/src/commands/connection-metabase-commands.ts +++ /dev/null @@ -1,132 +0,0 @@ -import { type Command, Option } from '@commander-js/extra-typings'; - -import { - type KtxCliCommandContext, - parseNonEmptyAssignmentOption, - parsePositiveIntegerOption, - parseSafeConnectionIdOption, - resolveCommandProjectDir, -} from '../cli-program.js'; -import { - type KtxConnectionMetabaseSetupArgs, - type MetabaseSetupMappingAssignment, - type MetabaseSetupSyncMode, - runKtxConnectionMetabaseSetup, -} from './connection-metabase-setup.js'; - -const SYNC_MODE_CHOICES = ['ALL', 'ONLY', 'EXCEPT'] as const satisfies readonly MetabaseSetupSyncMode[]; - -interface ConnectionMetabaseSetupOptions { - id?: string; - url?: string; - apiKey?: string; - mintApiKey?: boolean; - username?: string; - password?: string; - map: MetabaseSetupMappingAssignment[]; - sync: number[]; - syncMode: MetabaseSetupSyncMode; - runIngest?: boolean; - yes?: boolean; - input?: boolean; -} - -function collectPositiveIntegerOption(value: string, previous: number[] = []): number[] { - return [...previous, parsePositiveIntegerOption(value)]; -} - -function parseMappingAssignment(value: string): MetabaseSetupMappingAssignment { - const assignment = parseNonEmptyAssignmentOption(value); - return { - metabaseDatabaseId: parsePositiveIntegerOption(assignment.key), - targetConnectionId: parseSafeConnectionIdOption(assignment.value), - }; -} - -function collectMappingOption( - value: string, - previous: MetabaseSetupMappingAssignment[] = [], -): MetabaseSetupMappingAssignment[] { - return [...previous, parseMappingAssignment(value)]; -} - -async function runMetabaseSetupArgs( - context: KtxCliCommandContext, - args: KtxConnectionMetabaseSetupArgs, -): Promise { - const runner = context.deps.connectionMetabaseSetup ?? runKtxConnectionMetabaseSetup; - context.setExitCode(await runner(args, context.io)); -} - -export function registerConnectionMetabaseCommands(connection: Command, context: KtxCliCommandContext): void { - const metabase = connection - .command('metabase') - .description('Configure Metabase connections') - .showHelpAfterError() - .addHelpText( - 'after', - '\nProject directory defaults to KTX_PROJECT_DIR when set, otherwise the current working directory.\n', - ); - - metabase.action(() => { - metabase.outputHelp(); - context.setExitCode(0); - }); - - metabase - .command('setup') - .description('Guided setup for a Metabase connection') - .option('--id ', 'KTX connection id to write', parseSafeConnectionIdOption) - .option('--url ', 'Metabase API URL') - .addOption(new Option('--api-key ', 'Metabase API key').conflicts('mintApiKey')) - .option('--mint-api-key', 'Mint a Metabase API key with credentials', false) - .option('--username ', 'Metabase admin username for API-key minting') - .option('--password ', 'Metabase admin password for API-key minting') - .addHelpText( - 'after', - '\nGuided equivalent of:\n' + - ' ktx connection mapping refresh --auto-accept\n' + - ' ktx connection mapping set databaseMappings =\n' + - ' ktx connection mapping set-sync-enabled --enabled true\n' + - ' ktx ingest run --connection-id --adapter metabase\n', - ) - .option( - '--map ', - 'Assign a Metabase database id to a warehouse connection; repeatable', - collectMappingOption, - [], - ) - .option( - '--sync ', - 'Enable Metabase sync for a discovered database; repeatable', - collectPositiveIntegerOption, - [], - ) - .addOption( - new Option('--sync-mode ', 'Metabase sync selection mode') - .choices(SYNC_MODE_CHOICES) - .default('ALL' satisfies MetabaseSetupSyncMode), - ) - .option('--run-ingest', 'Run ingest after setup', false) - .option('--yes', 'Confirm and apply setup changes without prompting', false) - .option('--no-input', 'Disable interactive terminal input') - .showHelpAfterError() - .action(async (options: ConnectionMetabaseSetupOptions, command) => { - await runMetabaseSetupArgs(context, { - command: 'setup', - projectDir: resolveCommandProjectDir(command), - connectionId: options.id, - url: options.url, - apiKey: options.apiKey, - mintApiKey: options.mintApiKey === true, - metabaseUsername: options.username, - metabasePassword: options.password, - mappings: options.map, - syncEnabledDatabaseIds: options.sync, - syncMode: options.syncMode ?? 'ALL', - runIngest: options.runIngest === true, - yes: options.yes === true, - inputMode: options.input === false ? 'disabled' : 'auto', - }); - }); -} diff --git a/packages/cli/src/commands/connection-metabase-setup.test.ts b/packages/cli/src/commands/connection-metabase-setup.test.ts deleted file mode 100644 index 7b7b7b84..00000000 --- a/packages/cli/src/commands/connection-metabase-setup.test.ts +++ /dev/null @@ -1,1118 +0,0 @@ -import { mkdtemp, readFile, rm } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { KtxYamlMetabaseSourceStateReader, LocalMetabaseDiscoveryCache } from '@ktx/context/ingest'; -import { initKtxProject, ktxLocalStateDbPath, loadKtxProject, serializeKtxProjectConfig } from '@ktx/context/project'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; - -import { runKtxConnectionMetabaseSetup } from './connection-metabase-setup.js'; - -const CANCEL_PROMPT = Symbol('cancel'); - -async function metabaseMappingRows(projectDir: string, connectionId = 'metabase') { - const project = await loadKtxProject({ projectDir }); - const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(project) }); - return new KtxYamlMetabaseSourceStateReader(project, { discoveryCache }).listDatabaseMappings(connectionId); -} - -function createTestMetabaseSetupPromptAdapter(options: { - selects?: Array; - multiselects?: Array | typeof CANCEL_PROMPT>; - texts?: Array; - passwords?: Array; - confirms?: Array; - events?: string[]; -}) { - const selects = [...(options.selects ?? [])]; - const multiselects = [...(options.multiselects ?? [])]; - const texts = [...(options.texts ?? [])]; - const passwords = [...(options.passwords ?? [])]; - const confirms = [...(options.confirms ?? [])]; - const events = options.events ?? []; - - const cancelWithError = () => { - throw new Error('Setup cancelled.'); - }; - - return { - intro(title?: string): void { - events.push(`intro:${title ?? ''}`); - }, - outro(message?: string): void { - events.push(`outro:${message ?? ''}`); - }, - note(message: string, title: string): void { - events.push(`note:${title}:${message}`); - }, - log: { - info(message: string): void { - events.push(`log.info:${message}`); - }, - step(message: string): void { - events.push(`log.step:${message}`); - }, - success(message: string): void { - events.push(`log.success:${message}`); - }, - warn(message: string): void { - events.push(`log.warn:${message}`); - }, - error(message: string): void { - events.push(`log.error:${message}`); - }, - }, - spinner() { - return { - start(message: string): void { - events.push(`spinner.start:${message}`); - }, - stop(message: string): void { - events.push(`spinner.stop:${message}`); - }, - error(message: string): void { - events.push(`spinner.error:${message}`); - }, - }; - }, - async select(): Promise { - const next = selects.shift(); - if (next === CANCEL_PROMPT) { - cancelWithError(); - } - return next as T; - }, - async multiselect(options?: { message: string }): Promise { - events.push(`multiselect:${options?.message ?? ''}`); - const next = multiselects.shift(); - if (next === CANCEL_PROMPT) { - cancelWithError(); - } - return (next ?? []) as Value[]; - }, - async text(): Promise { - const next = texts.shift(); - if (next === CANCEL_PROMPT) { - cancelWithError(); - } - return (next ?? '').toString(); - }, - async password(): Promise { - const next = passwords.shift(); - if (next === CANCEL_PROMPT) { - cancelWithError(); - } - return (next ?? '').toString(); - }, - async confirm(): Promise { - const next = confirms.shift(); - if (next === CANCEL_PROMPT) { - cancelWithError(); - } - return next === true; - }, - cancel(): void { - return; - }, - }; -} - -function makeIo(options: { isTTY?: boolean; stdinIsTTY?: boolean } = {}) { - let stdout = ''; - let stderr = ''; - return { - io: { - stdin: { - isTTY: options.stdinIsTTY, - }, - stdout: { - isTTY: options.isTTY, - write: (chunk: string) => { - stdout += chunk; - }, - }, - stderr: { - write: (chunk: string) => { - stderr += chunk; - }, - }, - }, - stdout: () => stdout, - stderr: () => stderr, - }; -} - -describe('runKtxConnectionMetabaseSetup', () => { - const fakeMetabaseCredential = 'mb_example'; - const existingMetabaseCredential = 'mb_existing'; - const fakeAdminCredential = 'admin-secret-value-123'; - - let tempDir: string; - let projectDir: string; - - beforeEach(async () => { - tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-metabase-setup-')); - projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'metabase-setup' }); - }); - - afterEach(async () => { - await rm(tempDir, { recursive: true, force: true }); - }); - - async function writeConnections(connections: Record) { - const project = await loadKtxProject({ projectDir }); - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig({ - ...project.config, - connections, - }), - 'ktx', - 'ktx@example.com', - 'Seed Metabase setup test connections', - ); - } - - function makeMetabaseClient(options: { - testConnectionSuccess: boolean; - databases: Array<{ - id: number; - name: string; - engine: string; - details?: { host?: string; dbname?: string }; - is_sample?: boolean; - }>; - }) { - return { - testConnection: vi.fn().mockResolvedValue({ success: options.testConnectionSuccess }), - getDatabases: vi.fn().mockResolvedValue(options.databases), - cleanup: vi.fn().mockResolvedValue(undefined), - }; - } - - it('covers the headless happy path', async () => { - await writeConnections({ - orbit: { - driver: 'postgres', - url: 'postgresql://readonly@pg.internal/analytics', - readonly: true, - }, - }); - - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [ - { - id: 2, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - ], - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], - syncEnabledDatabaseIds: [2], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - { createMetabaseClient: async () => metabaseClient as never }, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Connection: metabase'); - expect(io.stdout()).toContain('Discovered 1 database'); - expect(io.stdout()).toContain(`ktx ingest run --connection-id metabase --adapter metabase --project-dir ${projectDir}`); - expect(io.stdout()).not.toContain('mb_example'); - expect(io.stderr()).not.toContain('mb_example'); - - const config = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - expect(config).toContain('driver: metabase'); - expect(config).toContain('api_url: http://metabase.example.test:3000'); - expect(config).toContain('api_key: mb_example'); - await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ - { - metabaseDatabaseId: 2, - metabaseDatabaseName: 'Analytics', - targetConnectionId: 'orbit', - syncEnabled: true, - }, - ]); - }); - - it('auto-maps and enables sync in --no-input --yes when deterministic', async () => { - await writeConnections({ - orbit: { - driver: 'postgres', - url: 'postgresql://readonly@pg.internal/analytics', - readonly: true, - }, - }); - - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [ - { - id: 2, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - ], - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - { createMetabaseClient: async () => metabaseClient as never }, - ), - ).resolves.toBe(0); - await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ - { metabaseDatabaseId: 2, targetConnectionId: 'orbit', syncEnabled: true }, - ]); - }); - - it('fails in --no-input when mapping/sync are missing and --yes is false', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [{ id: 2, name: 'Analytics', engine: 'postgres', is_sample: false }], - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: false, - inputMode: 'disabled', - }, - io.io, - { createMetabaseClient: async () => metabaseClient as never }, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toMatch(/--map/i); - expect(io.stderr()).toMatch(/--sync/i); - }); - - it('enables sync for explicitly mapped databases in --no-input --yes when --sync is omitted', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [{ id: 2, name: 'Analytics', engine: 'postgres', is_sample: false }], - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - { createMetabaseClient: async () => metabaseClient as never }, - ), - ).resolves.toBe(0); - await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ - { metabaseDatabaseId: 2, targetConnectionId: 'orbit', syncEnabled: true }, - ]); - }); - - it('fails in no-input mode when the Metabase URL is missing', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('missing Metabase URL'); - }); - - it('fails in no-input mode when the Metabase API key is missing', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('missing Metabase API key'); - }); - - it('names missing minting flags before rejecting minting', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - - const missingUsernameIo = makeIo(); - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - mintApiKey: true, - metabasePassword: fakeAdminCredential, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - missingUsernameIo.io, - ), - ).resolves.toBe(1); - expect(missingUsernameIo.stderr()).toContain('--username'); - - const missingPasswordIo = makeIo(); - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - mintApiKey: true, - metabaseUsername: 'user', - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - missingPasswordIo.io, - ), - ).resolves.toBe(1); - expect(missingPasswordIo.stderr()).toContain('--password'); - - const mintedMetabaseCredential = 'mb_minted'; - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [ - { - id: 2, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - ], - }); - const createMetabaseClient = vi.fn(async () => metabaseClient as never); - const mintMetabaseApiKey = vi.fn(async () => mintedMetabaseCredential); - const mintingIo = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - mintApiKey: true, - metabaseUsername: 'user', - metabasePassword: fakeAdminCredential, - mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], - syncEnabledDatabaseIds: [2], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - mintingIo.io, - { createMetabaseClient, mintMetabaseApiKey }, - ), - ).resolves.toBe(0); - - expect(mintMetabaseApiKey).toHaveBeenCalledTimes(1); - expect(mintMetabaseApiKey).toHaveBeenCalledWith( - expect.objectContaining({ - url: 'http://metabase.example.test:3000', - username: 'user', - password: fakeAdminCredential, - }), - expect.anything(), - ); - - expect(createMetabaseClient).toHaveBeenCalledTimes(1); - expect(mintingIo.stdout()).not.toContain(mintedMetabaseCredential); - expect(mintingIo.stderr()).not.toContain(mintedMetabaseCredential); - expect(mintingIo.stdout()).not.toContain(fakeAdminCredential); - expect(mintingIo.stderr()).not.toContain(fakeAdminCredential); - - const config = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - expect(config).toContain('driver: metabase'); - expect(config).toContain('api_url: http://metabase.example.test:3000'); - expect(config).toContain(`api_key: ${mintedMetabaseCredential}`); - }); - - it('requires at least one warehouse connection', async () => { - await writeConnections({}); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('Add a warehouse connection first'); - }); - - it('fails in --no-input --yes when a deterministic warehouse mapping cannot be derived', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - warehouse2: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [ - { - id: 2, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - ], - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - { createMetabaseClient: async () => metabaseClient as never }, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toMatch(/--map/i); - expect(io.stderr()).toMatch(/--sync/i); - }); - - it('auto-enables sync in --no-input --yes from explicit mappings even when multiple databases are discovered', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [ - { - id: 1, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - { - id: 2, - name: 'Finance', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'finance' }, - is_sample: false, - }, - ], - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [{ metabaseDatabaseId: 1, targetConnectionId: 'orbit' }], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - { createMetabaseClient: async () => metabaseClient as never }, - ), - ).resolves.toBe(0); - await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ - { metabaseDatabaseId: 1, targetConnectionId: 'orbit', syncEnabled: true }, - { metabaseDatabaseId: 2, targetConnectionId: null, syncEnabled: false }, - ]); - }); - - it('suggests updating api_key or using minting when authentication fails', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - const metabaseClient = makeMetabaseClient({ testConnectionSuccess: false, databases: [] }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - { createMetabaseClient: async () => metabaseClient as never }, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('connections.metabase.api_key'); - expect(io.stderr()).toContain('--mint-api-key'); - expect(io.stderr()).not.toContain('mb_example'); - }); - - it('fails when Metabase returns no usable databases', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [{ id: 1, name: 'Sample', engine: 'h2', is_sample: true }], - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - { createMetabaseClient: async () => metabaseClient as never }, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('no usable databases'); - }); - - it('preserves setup writes when --run-ingest fails and reports the debug command', async () => { - await writeConnections({ - orbit: { - driver: 'postgres', - url: 'postgresql://readonly@pg.internal/analytics', - readonly: true, - }, - }); - - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [ - { - id: 2, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - ], - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], - syncEnabledDatabaseIds: [2], - syncMode: 'ALL', - runIngest: true, - yes: true, - inputMode: 'disabled', - }, - io.io, - { - createMetabaseClient: async () => metabaseClient as never, - runPublicIngest: vi.fn(async () => 1), - }, - ), - ).resolves.toBe(1); - - const config = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - expect(config).toContain('driver: metabase'); - expect(io.stderr()).toContain(`ktx ingest run --connection-id metabase --adapter metabase --project-dir ${projectDir}`); - await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ - { metabaseDatabaseId: 2, targetConnectionId: 'orbit' }, - ]); - }); - - it('reuses existing connection id and values when --id, --url, and --api-key are omitted', async () => { - await writeConnections({ - 'prod-metabase': { - driver: 'metabase', - api_url: 'http://metabase.example.test:3000', - api_key: existingMetabaseCredential, - }, - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [{ id: 2, name: 'Analytics', engine: 'postgres', is_sample: false }], - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - mintApiKey: false, - mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], - syncEnabledDatabaseIds: [2], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - { createMetabaseClient: async () => metabaseClient as never }, - ), - ).resolves.toBe(0); - - expect(io.stdout()).toContain('Connection: prod-metabase'); - expect(io.stdout()).not.toContain('mb_existing'); - expect(io.stderr()).not.toContain('mb_existing'); - }); - - it('covers interactive happy path when URL/key/mapping/sync are missing but deterministic', async () => { - await writeConnections({ - orbit: { - driver: 'postgres', - url: 'postgresql://readonly@pg.internal/analytics', - readonly: true, - }, - }); - - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [ - { - id: 2, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - ], - }); - const io = makeIo({ isTTY: true, stdinIsTTY: true }); - const interactiveMetabaseCredential = 'mb_interactive_fixture'; - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: false, - inputMode: 'auto', - }, - io.io, - { - createMetabaseClient: async () => metabaseClient as never, - prompts: createTestMetabaseSetupPromptAdapter({ - texts: ['http://metabase.example.test:3000'], - selects: ['paste'], - passwords: [interactiveMetabaseCredential], - confirms: [true], - }), - }, - ), - ).resolves.toBe(0); - - const config = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - expect(config).toContain('driver: metabase'); - expect(config).toContain('api_url: http://metabase.example.test:3000'); - expect(config).toContain(`api_key: ${interactiveMetabaseCredential}`); - await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ - { - metabaseDatabaseId: 2, - targetConnectionId: 'orbit', - syncEnabled: true, - }, - ]); - - expect(io.stdout()).not.toContain(interactiveMetabaseCredential); - expect(io.stderr()).not.toContain(interactiveMetabaseCredential); - }); - - it('guides interactive setup for multiple databases and warehouses', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics', readonly: true }, - warehouse2: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/finance', readonly: true }, - }); - - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [ - { - id: 2, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - { - id: 3, - name: 'Finance', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'finance' }, - is_sample: false, - }, - ], - }); - const io = makeIo({ isTTY: true, stdinIsTTY: true }); - const interactiveMetabaseCredential = 'mb_interactive_multi'; - const events: string[] = []; - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: false, - inputMode: 'auto', - }, - io.io, - { - createMetabaseClient: async () => metabaseClient as never, - prompts: createTestMetabaseSetupPromptAdapter({ - texts: ['http://metabase.example.test:3000'], - selects: ['paste', 'orbit', 'warehouse2'], - passwords: [interactiveMetabaseCredential], - multiselects: [[2, 3], [2]], - confirms: [true], - events, - }), - }, - ), - ).resolves.toBe(0); - await expect(metabaseMappingRows(projectDir)).resolves.toMatchObject([ - { metabaseDatabaseId: 2, targetConnectionId: 'orbit', syncEnabled: true }, - { metabaseDatabaseId: 3, targetConnectionId: 'warehouse2', syncEnabled: false }, - ]); - - expect(io.stdout()).not.toContain(interactiveMetabaseCredential); - expect(io.stderr()).not.toContain(interactiveMetabaseCredential); - expect(events).toContain( - 'multiselect:Select Metabase databases to configure\nUse Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', - ); - expect(events).toContain( - 'multiselect:Enable sync for which databases?\nUse Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', - ); - }); - - it('emits guided progress via the interaction toolkit in interactive mode', async () => { - await writeConnections({ - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics', readonly: true }, - }); - - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [ - { - id: 2, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - ], - }); - const io = makeIo({ isTTY: true, stdinIsTTY: true }); - const interactiveMetabaseCredential = 'mb_interaction_toolkit'; - const events: string[] = []; - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: false, - inputMode: 'auto', - }, - io.io, - { - createMetabaseClient: async () => metabaseClient as never, - prompts: createTestMetabaseSetupPromptAdapter({ - events, - texts: ['http://metabase.example.test:3000'], - selects: ['paste'], - passwords: [interactiveMetabaseCredential], - confirms: [true], - }), - }, - ), - ).resolves.toBe(0); - - expect(events).toContain('intro:KTX Metabase setup'); - expect(events.some((event) => event.startsWith('spinner.start:Testing Metabase connection'))).toBe(true); - expect(events.some((event) => event.startsWith('spinner.stop:Metabase reachable'))).toBe(true); - expect(events.some((event) => event.startsWith('spinner.start:Discovering Metabase databases'))).toBe(true); - expect(events.some((event) => event.startsWith('log.success:Discovered 1 database'))).toBe(true); - expect(events.some((event) => event.startsWith('note:Summary:'))).toBe(true); - expect(events).toContain('outro:Metabase setup complete'); - - expect(events.join('\n')).not.toContain(interactiveMetabaseCredential); - expect(io.stdout()).not.toContain(interactiveMetabaseCredential); - expect(io.stderr()).not.toContain(interactiveMetabaseCredential); - }); - - it('fails in --no-input when multiple Metabase connections exist and --id is omitted', async () => { - await writeConnections({ - metabase1: { - driver: 'metabase', - api_url: 'http://metabase.example.test:3000', - api_key: existingMetabaseCredential, - }, - metabase2: { - driver: 'metabase', - api_url: 'http://metabase.example.test:3000', - api_key: existingMetabaseCredential, - }, - orbit: { driver: 'postgres', url: 'postgresql://readonly@pg.internal/analytics' }, - }); - const io = makeIo(); - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toMatch(/--id/i); - }); - - it('treats prompt cancellation as a clean exit without writes', async () => { - await writeConnections({ - orbit: { - driver: 'postgres', - url: 'postgresql://readonly@pg.internal/analytics', - readonly: true, - }, - }); - - const beforeConfig = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - const metabaseClient = makeMetabaseClient({ - testConnectionSuccess: true, - databases: [ - { - id: 2, - name: 'Analytics', - engine: 'postgres', - details: { host: 'pg.internal', dbname: 'analytics' }, - is_sample: false, - }, - ], - }); - const io = makeIo({ isTTY: true, stdinIsTTY: true }); - const cancelMetabaseCredential = 'mb_cancel_fixture'; - - await expect( - runKtxConnectionMetabaseSetup( - { - command: 'setup', - projectDir, - mintApiKey: false, - mappings: [], - syncEnabledDatabaseIds: [], - syncMode: 'ALL', - runIngest: false, - yes: false, - inputMode: 'auto', - }, - io.io, - { - createMetabaseClient: async () => metabaseClient as never, - prompts: createTestMetabaseSetupPromptAdapter({ - texts: ['http://metabase.example.test:3000'], - selects: ['paste'], - passwords: [cancelMetabaseCredential], - confirms: [CANCEL_PROMPT], - }), - }, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('Setup cancelled.'); - expect(io.stderr()).not.toContain(cancelMetabaseCredential); - - const afterConfig = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - expect(afterConfig).toBe(beforeConfig); - await expect(metabaseMappingRows(projectDir)).resolves.toEqual([]); - }); -}); diff --git a/packages/cli/src/commands/connection-metabase-setup.ts b/packages/cli/src/commands/connection-metabase-setup.ts deleted file mode 100644 index b0980c3b..00000000 --- a/packages/cli/src/commands/connection-metabase-setup.ts +++ /dev/null @@ -1,798 +0,0 @@ -import type { Option as ClackOption } from '@clack/prompts'; -import { - cancel, - confirm, - intro, - isCancel, - log, - multiselect, - note, - outro, - password, - select, - text, -} from '@clack/prompts'; -import { localConnectionToWarehouseDescriptor } from '@ktx/context/connections'; -import { - DEFAULT_METABASE_CLIENT_CONFIG, - DefaultMetabaseConnectionClientFactory, - KtxYamlMetabaseSourceStateReader, - LocalMetabaseDiscoveryCache, - MetabaseClient, - type MetabaseDatabase, - type MetabaseRuntimeClient, - type MetabaseSyncMode, - metabaseRuntimeConfigFromLocalConnection, - validateMappingPhysicalMatch, -} from '@ktx/context/ingest'; -import { - type KtxLocalProject, - type KtxProjectConnectionConfig, - ktxLocalStateDbPath, - loadKtxProject, - parseMetabaseMappingBootstrap, - serializeKtxProjectConfig, -} from '@ktx/context/project'; - -import { createClackSpinner, type KtxCliSpinner } from '../clack.js'; -import type { KtxCliIo } from '../cli-runtime.js'; -import { withMenuOptionsSpacing, withMultiselectNavigation } from '../prompt-navigation.js'; -import { type KtxPublicIngestArgs, runKtxPublicIngest } from '../public-ingest.js'; - -export type KtxMetabaseSetupInputMode = 'auto' | 'disabled'; - -export type MetabaseSetupSyncMode = MetabaseSyncMode; - -type MetabaseSetupPromptOption = ClackOption; - -export interface MetabaseSetupLogger { - info(message: string): void; - step(message: string): void; - success(message: string): void; - warn(message: string): void; - error(message: string): void; -} - -export interface MetabaseSetupPromptAdapter { - intro(title?: string): void; - outro(message?: string): void; - note(message: string, title: string): void; - log: MetabaseSetupLogger; - spinner(): KtxCliSpinner; - select(options: { message: string; options: Array> }): Promise; - multiselect(options: { - message: string; - options: Array>; - initialValues?: Value[]; - required?: boolean; - maxItems?: number; - }): Promise; - text(options: { message: string; placeholder?: string }): Promise; - password(options: { message: string }): Promise; - confirm(options: { message: string; initialValue?: boolean }): Promise; - cancel(message: string): void; -} - -type KtxMetabaseSetupInteractiveIo = KtxCliIo & { - stdin?: { isTTY?: boolean }; -}; - -export interface MetabaseSetupMappingAssignment { - metabaseDatabaseId: number; - targetConnectionId: string; -} - -export interface MintMetabaseApiKeyArgs { - url: string; - username: string; - password: string; -} - -export type MintMetabaseApiKey = (args: MintMetabaseApiKeyArgs, io: KtxCliIo) => Promise; - -export interface KtxConnectionMetabaseSetupArgs { - command: 'setup'; - projectDir: string; - connectionId?: string; - url?: string; - apiKey?: string; - mintApiKey: boolean; - metabaseUsername?: string; - metabasePassword?: string; - mappings: MetabaseSetupMappingAssignment[]; - syncEnabledDatabaseIds: number[]; - syncMode: MetabaseSetupSyncMode; - runIngest: boolean; - yes: boolean; - inputMode: KtxMetabaseSetupInputMode; -} - -export interface KtxConnectionMetabaseSetupDeps { - createMetabaseClient?: ( - project: KtxLocalProject, - connectionId: string, - ) => Promise>; - mintMetabaseApiKey?: MintMetabaseApiKey; - prompts?: MetabaseSetupPromptAdapter; - runPublicIngest?: (args: Extract, io: KtxCliIo) => Promise; -} - -function isMetabaseConnection(connection: KtxProjectConnectionConfig | undefined): boolean { - return ( - String(connection?.driver ?? '') - .trim() - .toLowerCase() === 'metabase' - ); -} - -function stringField(value: unknown): string | undefined { - return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined; -} - -function uniqueSorted(values: number[]): number[] { - return [...new Set(values)].sort((a, b) => a - b); -} - -function resolveMetabaseUrl(connection: KtxProjectConnectionConfig | undefined): string | undefined { - return stringField(connection?.api_url) ?? stringField(connection?.apiUrl) ?? stringField(connection?.url); -} - -function resolveLiteralMetabaseApiKey(connection: KtxProjectConnectionConfig | undefined): string | undefined { - return stringField(connection?.api_key) ?? stringField(connection?.apiKey); -} - -function listMetabaseConnectionIds(project: KtxLocalProject): string[] { - return Object.entries(project.config.connections) - .filter(([_connectionId, connection]) => isMetabaseConnection(connection)) - .map(([connectionId]) => connectionId) - .sort(); -} - -function listWarehouseConnectionIds(project: KtxLocalProject): string[] { - return Object.entries(project.config.connections) - .filter(([connectionId, connection]) => localConnectionToWarehouseDescriptor(connectionId, connection) != null) - .map(([connectionId]) => connectionId) - .sort(); -} - -function redactSecrets(message: string, secrets: string[]): string { - let result = message; - for (const secret of secrets) { - if (!secret) { - continue; - } - result = result.split(secret).join('[redacted]'); - } - return result; -} - -async function createDefaultMetabaseClient( - project: KtxLocalProject, - connectionId: string, -): Promise> { - const factory = new DefaultMetabaseConnectionClientFactory( - (metabaseConnectionId) => - metabaseRuntimeConfigFromLocalConnection(metabaseConnectionId, project.config.connections[metabaseConnectionId]), - DEFAULT_METABASE_CLIENT_CONFIG, - ); - return factory.createClient(connectionId); -} - -async function defaultMintMetabaseApiKey(args: MintMetabaseApiKeyArgs): Promise { - const loginClient = new MetabaseClient({ apiUrl: args.url, apiKey: '' }, DEFAULT_METABASE_CLIENT_CONFIG); - const sessionId = await loginClient.createSession(args.username, args.password); - const sessionClient = new MetabaseClient( - { apiUrl: args.url, apiKey: sessionId, authHeaderName: 'X-Metabase-Session' }, - DEFAULT_METABASE_CLIENT_CONFIG, - ); - const groups = await sessionClient.getPermissionGroups(); - const adminGroup = groups.find((group) => group.name === 'Administrators'); - - if (!adminGroup) { - throw new Error('Metabase Administrators group was not found; create an API key manually and pass --api-key'); - } - - const mintedKey = await sessionClient.createApiKey({ - groupId: adminGroup.id, - name: `KTX CLI ${new Date().toISOString()}`, - }); - const trimmedKey = stringField(mintedKey); - if (!trimmedKey) { - throw new Error('Metabase API key minting returned an empty key'); - } - return trimmedKey; -} - -function ensureNotCancelled(value: T | symbol, prompts: Pick): T { - if (isCancel(value)) { - prompts.cancel('Setup cancelled.'); - throw new Error('Setup cancelled.'); - } - return value as T; -} - -export function createClackMetabaseSetupPromptAdapter(): MetabaseSetupPromptAdapter { - return { - intro(title?: string): void { - intro(title); - }, - outro(message?: string): void { - outro(message); - }, - note(message: string, title: string): void { - note(message, title); - }, - log: { - info(message: string): void { - log.info(message); - }, - step(message: string): void { - log.step(message); - }, - success(message: string): void { - log.success(message); - }, - warn(message: string): void { - log.warn(message); - }, - error(message: string): void { - log.error(message); - }, - }, - spinner(): KtxCliSpinner { - return createClackSpinner(); - }, - async select(options: { - message: string; - options: Array>; - }): Promise { - return ensureNotCancelled(await select(withMenuOptionsSpacing(options)), this); - }, - async multiselect(options: { - message: string; - options: Array>; - initialValues?: Value[]; - required?: boolean; - maxItems?: number; - }): Promise { - return ensureNotCancelled(await multiselect(withMenuOptionsSpacing(options)), this); - }, - async text(options: { message: string; placeholder?: string }): Promise { - return ensureNotCancelled(await text(options), this); - }, - async password(options: { message: string }): Promise { - return ensureNotCancelled(await password(options), this); - }, - async confirm(options: { message: string; initialValue?: boolean }): Promise { - return ensureNotCancelled(await confirm(options), this); - }, - cancel(message: string): void { - cancel(message); - }, - }; -} - -function isInteractiveMetabaseSetupIo( - args: Pick, - io: KtxMetabaseSetupInteractiveIo, -): boolean { - return args.inputMode !== 'disabled' && io.stdin?.isTTY === true && io.stdout.isTTY === true; -} - -function normalizeDiscoveredDatabases(databases: MetabaseDatabase[]): Array<{ - id: number; - name: string; - engine: string; - host: string | null; - dbName: string | null; -}> { - return databases - .filter((database) => database.is_sample !== true) - .map((database) => ({ - id: database.id, - name: database.name, - engine: stringField(database.engine) ?? 'unknown', - host: stringField(database.details?.host) ?? null, - dbName: stringField(database.details?.dbname) ?? null, - })); -} - -function targetPhysicalInfo(project: KtxLocalProject, connectionId: string) { - const descriptor = localConnectionToWarehouseDescriptor(connectionId, project.config.connections[connectionId]); - if (!descriptor) { - return { connection_type: 'UNKNOWN' }; - } - return { - connection_type: descriptor.connection_type, - host: descriptor.host ?? null, - database: descriptor.database ?? null, - account: descriptor.account ?? null, - project_id: descriptor.project_id ?? null, - dataset_id: descriptor.dataset_id ?? null, - ...descriptor.connection_params, - }; -} - -function noteMetabaseSetupSummary(options: { - prompts: MetabaseSetupPromptAdapter; - connectionId: string; - url: string; - mappings: MetabaseSetupMappingAssignment[]; - syncEnabledDatabaseIds: number[]; -}): void { - const mappingLines = options.mappings - .map((mapping) => ` ${mapping.metabaseDatabaseId} -> ${mapping.targetConnectionId}`) - .join('\n'); - const syncLines = options.syncEnabledDatabaseIds.map((id) => ` ${id}`).join('\n'); - - options.prompts.note( - [ - `Connection: ${options.connectionId}`, - `URL: ${options.url}`, - '', - 'Mappings:', - mappingLines || ' (none)', - '', - 'Sync enabled:', - syncLines || ' (none)', - ].join('\n'), - 'Summary', - ); -} - -function metabaseMappingsBlockForSetup(options: { - connectionId: string; - connection: KtxProjectConnectionConfig; - mappings: MetabaseSetupMappingAssignment[]; - syncEnabledDatabaseIds: number[]; - syncMode: MetabaseSetupSyncMode; -}): Record { - const existing = parseMetabaseMappingBootstrap(options.connectionId, options.connection); - const databaseMappings = { ...existing.databaseMappings }; - const syncEnabled = { ...existing.syncEnabled }; - for (const mapping of options.mappings) { - const key = String(mapping.metabaseDatabaseId); - databaseMappings[key] = mapping.targetConnectionId; - syncEnabled[key] = false; - } - for (const metabaseDatabaseId of options.syncEnabledDatabaseIds) { - syncEnabled[String(metabaseDatabaseId)] = true; - } - return { - databaseMappings, - syncEnabled, - syncMode: options.syncMode, - selections: existing.selections, - defaultTagNames: existing.defaultTagNames, - }; -} - -export async function runKtxConnectionMetabaseSetup( - args: KtxConnectionMetabaseSetupArgs, - io: KtxCliIo, - deps: KtxConnectionMetabaseSetupDeps = {}, -): Promise { - let apiKeyForRedaction = args.apiKey; - let passwordForRedaction = args.metabasePassword; - const interactiveIo = io as KtxMetabaseSetupInteractiveIo; - const isInteractive = isInteractiveMetabaseSetupIo(args, interactiveIo); - const prompts = deps.prompts ?? (isInteractive ? createClackMetabaseSetupPromptAdapter() : undefined); - - try { - if (isInteractive && prompts) { - prompts.intro('KTX Metabase setup'); - } - - const project = await loadKtxProject({ projectDir: args.projectDir }); - const existingMetabaseConnectionIds = listMetabaseConnectionIds(project); - let connectionId: string; - - if (args.connectionId) { - connectionId = args.connectionId; - } else if (existingMetabaseConnectionIds.length === 1) { - const onlyMetabaseConnectionId = existingMetabaseConnectionIds[0]; - if (!onlyMetabaseConnectionId) { - throw new Error('No Metabase connection id was resolved'); - } - connectionId = onlyMetabaseConnectionId; - } else if (existingMetabaseConnectionIds.length > 1) { - if (!isInteractive || !prompts) { - throw new Error( - `Multiple Metabase connections found (${existingMetabaseConnectionIds.join(', ')}); select one with --id`, - ); - } - connectionId = await prompts.select({ - message: 'Select the Metabase connection to configure', - options: existingMetabaseConnectionIds.map((id) => ({ value: id, label: id })), - }); - } else { - connectionId = 'metabase'; - } - - const existingConnection = project.config.connections[connectionId]; - const warehouseConnectionIds = listWarehouseConnectionIds(project); - - if (warehouseConnectionIds.length === 0) { - throw new Error('Add a warehouse connection first'); - } - - let url = args.url ?? resolveMetabaseUrl(existingConnection); - let apiKey = args.apiKey ?? resolveLiteralMetabaseApiKey(existingConnection); - apiKeyForRedaction = apiKey; - - if (!url && isInteractive && prompts) { - url = stringField( - await prompts.text({ - message: 'Metabase API URL', - placeholder: 'http://localhost:3000', - }), - ); - } - - if (args.inputMode === 'disabled' && !url) { - throw new Error('missing Metabase URL'); - } - - if (!args.apiKey && !args.mintApiKey && apiKey && isInteractive && prompts && !args.yes) { - const reuse = await prompts.confirm({ - message: `Reuse the existing Metabase API key from connections.${connectionId}?`, - initialValue: true, - }); - if (!reuse) { - apiKey = undefined; - apiKeyForRedaction = undefined; - } - } - - if (args.mintApiKey) { - let username = stringField(args.metabaseUsername); - let metabasePassword = stringField(args.metabasePassword); - - if (isInteractive && prompts) { - if (!username) { - username = stringField(await prompts.text({ message: 'Metabase admin username' })); - } - if (!metabasePassword) { - metabasePassword = stringField(await prompts.password({ message: 'Metabase admin password' })); - } - } - - if (!username) { - throw new Error('--mint-api-key requires --username'); - } - if (!metabasePassword) { - throw new Error('--mint-api-key requires --password'); - } - if (!url) { - throw new Error('Metabase URL is required (use --url)'); - } - - passwordForRedaction = metabasePassword; - apiKey = await (deps.mintMetabaseApiKey ?? defaultMintMetabaseApiKey)( - { url, username, password: metabasePassword }, - io, - ); - apiKeyForRedaction = apiKey; - } - - if (!apiKey && isInteractive && prompts) { - const credentialMode = await prompts.select({ - message: 'Metabase credentials', - options: [ - { value: 'paste', label: 'Paste API key' }, - { value: 'mint', label: 'Mint API key' }, - ], - }); - - if (credentialMode === 'paste') { - apiKey = stringField(await prompts.password({ message: 'Metabase API key' })); - apiKeyForRedaction = apiKey; - } else { - const username = stringField(await prompts.text({ message: 'Metabase admin username' })); - const metabasePassword = stringField(await prompts.password({ message: 'Metabase admin password' })); - if (!username) { - throw new Error('Metabase username is required'); - } - if (!metabasePassword) { - throw new Error('Metabase password is required'); - } - if (!url) { - throw new Error('Metabase URL is required (use --url)'); - } - - passwordForRedaction = metabasePassword; - apiKey = await (deps.mintMetabaseApiKey ?? defaultMintMetabaseApiKey)( - { url, username, password: metabasePassword }, - io, - ); - apiKeyForRedaction = apiKey; - } - } - - if (args.inputMode === 'disabled' && !apiKey) { - throw new Error('missing Metabase API key'); - } - - if (!url) { - throw new Error('Metabase URL is required (use --url)'); - } - if (!apiKey) { - throw new Error('Metabase API key is required (use --api-key)'); - } - - const transientConnectionConfig: KtxProjectConnectionConfig = { - ...(existingConnection ?? {}), - driver: 'metabase', - api_url: url, - api_key: apiKey, - }; - const configWithTransient = { - ...project.config, - connections: { - ...project.config.connections, - [connectionId]: transientConnectionConfig, - }, - }; - const discoveryProject: KtxLocalProject = { ...project, config: configWithTransient }; - - for (const mapping of args.mappings) { - if (!configWithTransient.connections[mapping.targetConnectionId]) { - throw new Error(`Target connection "${mapping.targetConnectionId}" does not exist`); - } - } - - const client = await (deps.createMetabaseClient ?? createDefaultMetabaseClient)(discoveryProject, connectionId); - try { - const authSpinner = isInteractive && prompts ? prompts.spinner() : undefined; - authSpinner?.start('Testing Metabase connection'); - const testResult = await client.testConnection(); - if (!testResult.success) { - authSpinner?.error('Metabase authentication failed'); - throw new Error( - `Metabase authentication failed. Replace connections.${connectionId}.api_key or use --mint-api-key.`, - ); - } - authSpinner?.stop('Metabase reachable'); - - const discoverySpinner = isInteractive && prompts ? prompts.spinner() : undefined; - discoverySpinner?.start('Discovering Metabase databases'); - const discovered = normalizeDiscoveredDatabases(await client.getDatabases()); - discoverySpinner?.stop(`Discovered ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}`); - if (isInteractive && prompts) { - prompts.log.success( - `Discovered ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}`, - ); - } - if (discovered.length === 0) { - throw new Error('Metabase auth worked but no usable databases were returned'); - } - - let resolvedMappings = args.mappings; - let resolvedSyncEnabledDatabaseIds = args.syncEnabledDatabaseIds; - - if (resolvedSyncEnabledDatabaseIds.length === 0 && args.yes && resolvedMappings.length > 0) { - resolvedSyncEnabledDatabaseIds = uniqueSorted(resolvedMappings.map((mapping) => mapping.metabaseDatabaseId)); - } - - if (resolvedMappings.length === 0 && resolvedSyncEnabledDatabaseIds.length === 0) { - const onlyDiscoveredDatabase = discovered.length === 1 ? discovered[0] : undefined; - const compatibleWarehouses = onlyDiscoveredDatabase - ? warehouseConnectionIds.filter((warehouseConnectionId) => { - const mismatchReason = validateMappingPhysicalMatch( - { - metabaseEngine: onlyDiscoveredDatabase.engine, - metabaseDbName: onlyDiscoveredDatabase.dbName, - metabaseHost: onlyDiscoveredDatabase.host, - }, - targetPhysicalInfo(project, warehouseConnectionId), - ); - return !mismatchReason; - }) - : []; - const onlyWarehouseConnectionId = compatibleWarehouses[0]; - - if (onlyDiscoveredDatabase && compatibleWarehouses.length === 1 && onlyWarehouseConnectionId) { - if (args.yes) { - resolvedMappings = [ - { metabaseDatabaseId: onlyDiscoveredDatabase.id, targetConnectionId: onlyWarehouseConnectionId }, - ]; - resolvedSyncEnabledDatabaseIds = [onlyDiscoveredDatabase.id]; - } else if (isInteractive && prompts) { - const proposedMappings = [ - { metabaseDatabaseId: onlyDiscoveredDatabase.id, targetConnectionId: onlyWarehouseConnectionId }, - ]; - const proposedSyncEnabledDatabaseIds = [onlyDiscoveredDatabase.id]; - noteMetabaseSetupSummary({ - prompts, - connectionId, - url, - mappings: proposedMappings, - syncEnabledDatabaseIds: proposedSyncEnabledDatabaseIds, - }); - const confirmed = await prompts.confirm({ - message: `Map Metabase database "${onlyDiscoveredDatabase.name}" (${onlyDiscoveredDatabase.id}) to "${onlyWarehouseConnectionId}" and enable sync?`, - initialValue: true, - }); - if (!confirmed) { - prompts.cancel('Setup cancelled.'); - throw new Error('Setup cancelled.'); - } - resolvedMappings = proposedMappings; - resolvedSyncEnabledDatabaseIds = proposedSyncEnabledDatabaseIds; - } else { - throw new Error('Metabase mapping/sync is required in --no-input mode; pass --map and --sync'); - } - } else if (isInteractive && prompts) { - const selectedDatabaseIds = await prompts.multiselect({ - message: withMultiselectNavigation('Select Metabase databases to configure'), - options: discovered.map((database) => ({ - value: database.id, - label: `${database.id}: ${database.name}`, - hint: [database.engine, database.host, database.dbName].filter(Boolean).join(' • '), - })), - required: true, - }); - - resolvedMappings = []; - for (const databaseId of selectedDatabaseIds) { - const database = discovered.find((candidate) => candidate.id === databaseId); - if (!database) { - throw new Error(`Selected database id ${databaseId} was not discovered`); - } - - const existingMapping = args.mappings.find((mapping) => mapping.metabaseDatabaseId === databaseId); - if (existingMapping) { - resolvedMappings.push(existingMapping); - continue; - } - - const targetConnectionId = await prompts.select({ - message: `Map Metabase database ${database.id} ("${database.name}") to which KTX connection?`, - options: warehouseConnectionIds.map((warehouseId) => ({ value: warehouseId, label: warehouseId })), - }); - resolvedMappings.push({ metabaseDatabaseId: databaseId, targetConnectionId }); - } - - const syncIds = await prompts.multiselect({ - message: withMultiselectNavigation('Enable sync for which databases?'), - options: selectedDatabaseIds.map((id) => ({ value: id, label: String(id) })), - initialValues: selectedDatabaseIds, - required: true, - }); - resolvedSyncEnabledDatabaseIds = uniqueSorted(syncIds); - - if (!args.yes) { - noteMetabaseSetupSummary({ - prompts, - connectionId, - url, - mappings: resolvedMappings, - syncEnabledDatabaseIds: resolvedSyncEnabledDatabaseIds, - }); - const confirmed = await prompts.confirm({ - message: 'Write changes to ktx.yaml and enable sync?', - initialValue: true, - }); - if (!confirmed) { - prompts.cancel('Setup cancelled.'); - throw new Error('Setup cancelled.'); - } - } - } else if (args.inputMode === 'disabled') { - throw new Error('Metabase mapping/sync is required in --no-input mode; pass --map and --sync'); - } - } - - if ( - args.inputMode === 'disabled' && - resolvedMappings.length > 0 && - resolvedSyncEnabledDatabaseIds.length === 0 - ) { - throw new Error('Metabase sync selection is required in --no-input mode; pass --sync '); - } - - const discoveredIds = new Set(discovered.map((database) => database.id)); - for (const mapping of resolvedMappings) { - if (!discoveredIds.has(mapping.metabaseDatabaseId)) { - throw new Error(`Mapped database id ${mapping.metabaseDatabaseId} was not discovered`); - } - } - for (const syncId of resolvedSyncEnabledDatabaseIds) { - if (!discoveredIds.has(syncId)) { - throw new Error(`Sync database id ${syncId} was not discovered`); - } - } - - const finalConnectionConfig: KtxProjectConnectionConfig = { - ...transientConnectionConfig, - mappings: metabaseMappingsBlockForSetup({ - connectionId, - connection: transientConnectionConfig, - mappings: resolvedMappings, - syncEnabledDatabaseIds: resolvedSyncEnabledDatabaseIds, - syncMode: args.syncMode, - }), - }; - const finalConfig = { - ...configWithTransient, - connections: { - ...configWithTransient.connections, - [connectionId]: finalConnectionConfig, - }, - }; - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig(finalConfig), - 'ktx', - 'ktx@example.com', - `Setup Metabase connection ${connectionId}`, - ); - - const updatedProject = await loadKtxProject({ projectDir: args.projectDir }); - const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(updatedProject) }); - await discoveryCache.refreshDiscoveredDatabases({ connectionId, discovered }); - const rows = await new KtxYamlMetabaseSourceStateReader(updatedProject, { discoveryCache }).listDatabaseMappings( - connectionId, - ); - const physicalFailures = rows.flatMap((row) => { - if (!row.targetConnectionId) { - return []; - } - const reason = validateMappingPhysicalMatch( - { metabaseEngine: row.metabaseEngine, metabaseDbName: row.metabaseDbName, metabaseHost: row.metabaseHost }, - updatedProject.config.connections[row.targetConnectionId] - ? targetPhysicalInfo(updatedProject, row.targetConnectionId) - : { connection_type: 'UNKNOWN' }, - ); - return reason ? [`${row.metabaseDatabaseId}: ${reason}`] : []; - }); - if (physicalFailures.length > 0) { - for (const failure of physicalFailures) { - io.stderr.write(`${failure}\n`); - } - return 1; - } - - io.stdout.write(`Connection: ${connectionId}\n`); - io.stdout.write(`Discovered ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}\n`); - io.stdout.write( - `Next: ktx ingest run --connection-id ${connectionId} --adapter metabase --project-dir ${args.projectDir}\n`, - ); - - if (args.runIngest) { - const ingestRunner = deps.runPublicIngest ?? runKtxPublicIngest; - const exitCode = await ingestRunner( - { - command: 'run', - projectDir: args.projectDir, - targetConnectionId: connectionId, - all: false, - json: false, - inputMode: 'disabled', - }, - io, - ); - if (exitCode !== 0) { - io.stderr.write( - `Ingest failed; re-run: ktx ingest run --connection-id ${connectionId} --adapter metabase --project-dir ${args.projectDir}\n`, - ); - return 1; - } - } - - if (isInteractive && prompts) { - prompts.outro('Metabase setup complete'); - } - - return 0; - } finally { - await client.cleanup(); - } - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - io.stderr.write( - `${redactSecrets(message, [apiKeyForRedaction ?? '', passwordForRedaction ?? '', args.apiKey ?? ''])}\n`, - ); - return 1; - } -} diff --git a/packages/cli/src/commands/connection-notion-commands.ts b/packages/cli/src/commands/connection-notion-commands.ts deleted file mode 100644 index 8f021ad9..00000000 --- a/packages/cli/src/commands/connection-notion-commands.ts +++ /dev/null @@ -1,92 +0,0 @@ -import { type Command, InvalidArgumentError } from '@commander-js/extra-typings'; -import { collectOption, type KtxCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; -import type { KtxConnectionNotionArgs } from './connection-notion.js'; - -interface NotionPickOptions { - input?: boolean; - rootPageId: string[]; -} - -function parseSafeConnectionId(value: string): string { - if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(value)) { - throw new InvalidArgumentError(`Unsafe connection id: ${value}`); - } - return value; -} - -function uniqueInOrder(values: string[]): string[] { - const seen = new Set(); - const result: string[] = []; - for (const value of values) { - if (!seen.has(value)) { - seen.add(value); - result.push(value); - } - } - return result; -} - -function normalizeNotionPageId(value: string): string { - const trimmed = value.trim(); - const compact = trimmed.includes('-') ? trimmed.replace(/-/g, '') : trimmed; - if (!/^[0-9a-fA-F]{32}$/.test(compact)) { - throw new Error(`Invalid Notion page UUID: ${value}`); - } - const lower = compact.toLowerCase(); - return `${lower.slice(0, 8)}-${lower.slice(8, 12)}-${lower.slice(12, 16)}-${lower.slice(16, 20)}-${lower.slice(20)}`; -} - -function buildPickArgs(connectionId: string, projectDir: string, options: NotionPickOptions): KtxConnectionNotionArgs { - if (options.input !== false) { - return { - command: 'pick', - projectDir, - connectionId, - mode: 'interactive', - }; - } - - const rootPageIds = uniqueInOrder(options.rootPageId.map(normalizeNotionPageId)); - if (rootPageIds.length === 0) { - throw new Error('connection notion pick --no-input requires at least one --root-page-id'); - } - return { - command: 'pick', - projectDir, - connectionId, - mode: 'non-interactive', - rootPageIds, - }; -} - -async function runConnectionNotionArgs(context: KtxCliCommandContext, args: KtxConnectionNotionArgs): Promise { - const runner = context.deps.connectionNotion ?? (await import('./connection-notion.js')).runKtxConnectionNotion; - context.setExitCode(await runner(args, context.io)); -} - -export function registerConnectionNotionCommands(connect: Command, context: KtxCliCommandContext): void { - const notion = connect - .command('notion') - .description('Configure Notion source selection') - .showHelpAfterError() - .addHelpText( - 'after', - '\nProject directory defaults to KTX_PROJECT_DIR when set, otherwise the current working directory.\n', - ); - - notion.action(() => { - notion.outputHelp(); - context.setExitCode(0); - }); - - notion - .command('pick') - .description('Pick Notion root pages for a configured Notion connection') - .argument('', 'Notion connection id', parseSafeConnectionId) - .option('--no-input', 'Disable interactive terminal input') - .option('--root-page-id ', 'Root page UUID to crawl; repeatable with --no-input', collectOption, []) - .showHelpAfterError() - .action(async (connectionId: string, options: NotionPickOptions, command) => { - await runConnectionNotionArgs(context, buildPickArgs(connectionId, resolveCommandProjectDir(command), options)); - }); -} diff --git a/packages/cli/src/commands/connection-notion.test.ts b/packages/cli/src/commands/connection-notion.test.ts deleted file mode 100644 index 3315e1cc..00000000 --- a/packages/cli/src/commands/connection-notion.test.ts +++ /dev/null @@ -1,513 +0,0 @@ -import { mkdtemp, readFile, rm } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { - initKtxProject, - loadKtxProject, - serializeKtxProjectConfig, - type KtxProjectConfig, -} from '@ktx/context/project'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { - applyNotionPickerWriteback, - discoverNotionPickerPages, - notionPickerPageFromSearchResult, - normalizeNotionPageId, - resolveNotionWorkspaceLabel, - runKtxConnectionNotion, - type NotionPickerApi, - type PickerRenderInput, - type PickerRenderResult, -} from './connection-notion.js'; - -function makeIo() { - let stdout = ''; - let stderr = ''; - return { - io: { - stdout: { - write: (chunk: string) => { - stdout += chunk; - }, - }, - stderr: { - write: (chunk: string) => { - stderr += chunk; - }, - }, - }, - stdout: () => stdout, - stderr: () => stderr, - }; -} - -type FakeNotionSearchPage = Record & { id: string; object: 'page' }; - -const PAGE_IDS = { - engineering: '11111111-1111-1111-1111-111111111111', - architecture: '22222222-2222-2222-2222-222222222222', - stale: '99999999-9999-9999-9999-999999999999', -}; - -function notionPage(id: string, title: string, parentId: string | null = null): FakeNotionSearchPage { - return { - object: 'page', - id, - archived: false, - parent: parentId ? { type: 'page_id', page_id: parentId } : { type: 'workspace', workspace: true }, - properties: { - title: { - type: 'title', - title: [{ plain_text: title }], - }, - }, - }; -} - -function fakeNotionApi(pages: FakeNotionSearchPage[]): NotionPickerApi { - return { - search: vi.fn(async (_filterValue, startCursor) => { - if (startCursor === 'page-2') { - return { results: pages.slice(2), hasMore: false, nextCursor: null }; - } - return { - results: pages.slice(0, 2), - hasMore: pages.length > 2, - nextCursor: pages.length > 2 ? 'page-2' : null, - }; - }), - retrieveBotUser: vi.fn(async () => ({ name: 'Notion bot', bot: { workspace_name: 'Design Workspace' } })), - }; -} - -describe('normalizeNotionPageId', () => { - it('accepts dashed and compact UUIDs', () => { - expect(normalizeNotionPageId('11111111222233334444555555555555')).toBe( - '11111111-2222-3333-4444-555555555555', - ); - expect(normalizeNotionPageId('AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE')).toBe( - 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', - ); - }); -}); - -describe('runKtxConnectionNotion', () => { - let tempDir: string; - - beforeEach(async () => { - tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-notion-pick-')); - }); - - afterEach(async () => { - await rm(tempDir, { recursive: true, force: true }); - }); - - async function writeProjectConfig(projectDir: string, config: KtxProjectConfig): Promise { - const project = await loadKtxProject({ projectDir }); - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig(config), - 'ktx', - 'ktx@example.com', - 'seed test config', - ); - } - - it('rejects unsafe connection ids before loading a project', async () => { - const io = makeIo(); - const loadProject = vi.fn(async () => { - throw new Error('loadProject should not be called'); - }); - - await expect( - runKtxConnectionNotion( - { - command: 'pick', - projectDir: '/tmp/project', - connectionId: '../evil', - mode: 'interactive', - }, - io.io, - { loadProject }, - ), - ).resolves.toBe(1); - - expect(loadProject).not.toHaveBeenCalled(); - expect(io.stderr()).toContain('Unsafe connection id: ../evil'); - }); - - it('writes selected root_page_ids while preserving every other Notion connection field', async () => { - const projectDir = join(tempDir, 'project'); - const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeProjectConfig(projectDir, { - ...initialized.config, - connections: { - 'notion-main': { - driver: 'notion', - auth_token_ref: 'env:NOTION_TOKEN', - crawl_mode: 'all_accessible', - root_page_ids: ['99999999-9999-9999-9999-999999999999'], - root_database_ids: ['database-1'], - root_data_source_ids: ['data-source-1'], - max_pages_per_run: 12, - max_knowledge_creates_per_run: 2, - max_knowledge_updates_per_run: 7, - last_successful_cursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}', - unknown_future_field: 'keep-me', - }, - }, - }); - const io = makeIo(); - - await expect( - runKtxConnectionNotion( - { - command: 'pick', - projectDir, - connectionId: 'notion-main', - mode: 'non-interactive', - rootPageIds: [ - '11111111-2222-3333-4444-555555555555', - '66666666-7777-8888-9999-aaaaaaaaaaaa', - ], - }, - io.io, - ), - ).resolves.toBe(0); - - const yaml = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - expect(yaml).toContain('crawl_mode: selected_roots'); - expect(yaml).toContain('root_page_ids:'); - expect(yaml).toContain('11111111-2222-3333-4444-555555555555'); - expect(yaml).toContain('66666666-7777-8888-9999-aaaaaaaaaaaa'); - expect(yaml).toContain('root_database_ids:'); - expect(yaml).toContain('database-1'); - expect(yaml).toContain('root_data_source_ids:'); - expect(yaml).toContain('data-source-1'); - expect(yaml).toContain('last_successful_cursor: \'{"phase":"all_accessible_pages","cursor":"cursor-1"}\''); - expect(yaml).toContain('unknown_future_field: keep-me'); - expect(io.stdout()).toContain('Connection: notion-main'); - expect(io.stdout()).toContain('rootPageIds: 2'); - expect(io.stdout()).toContain('crawlMode: selected_roots'); - }); - - it('rejects empty writeback, missing connections, and non-Notion connections', async () => { - const projectDir = join(tempDir, 'project'); - const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeProjectConfig(projectDir, { - ...initialized.config, - connections: { - warehouse: { - driver: 'postgres', - url: 'env:DATABASE_URL', - readonly: true, - }, - }, - }); - const project = await loadKtxProject({ projectDir }); - - await expect(applyNotionPickerWriteback(project, 'warehouse', [])).rejects.toThrow( - 'connection notion pick requires at least one root page id', - ); - await expect( - applyNotionPickerWriteback(project, 'missing', ['11111111-2222-3333-4444-555555555555']), - ).rejects.toThrow('Connection "missing" not found'); - await expect( - applyNotionPickerWriteback(project, 'warehouse', ['11111111-2222-3333-4444-555555555555']), - ).rejects.toThrow('Connection "warehouse" is not a Notion connection'); - }); - - it('extracts picker page inputs from Notion search results', () => { - expect(notionPickerPageFromSearchResult(notionPage(PAGE_IDS.architecture, 'Architecture', PAGE_IDS.engineering))) - .toEqual({ - id: PAGE_IDS.architecture, - title: 'Architecture', - archived: false, - parentId: PAGE_IDS.engineering, - }); - - expect( - notionPickerPageFromSearchResult({ - object: 'page', - id: PAGE_IDS.engineering.replaceAll('-', ''), - archived: true, - parent: { type: 'workspace', workspace: true }, - properties: {}, - }), - ).toEqual({ - id: PAGE_IDS.engineering, - title: 'Untitled', - archived: true, - parentId: null, - }); - }); - - it('discovers visible pages up to the cap and reports cap state', async () => { - const api = fakeNotionApi([ - notionPage(PAGE_IDS.engineering, 'Engineering'), - notionPage(PAGE_IDS.architecture, 'Architecture', PAGE_IDS.engineering), - notionPage('33333333-3333-3333-3333-333333333333', 'Onboarding', PAGE_IDS.engineering), - ]); - - await expect(discoverNotionPickerPages(api, { cap: 2 })).resolves.toEqual({ - pages: [ - { id: PAGE_IDS.engineering, title: 'Engineering', archived: false, parentId: null }, - { id: PAGE_IDS.architecture, title: 'Architecture', archived: false, parentId: PAGE_IDS.engineering }, - ], - cappedAtCount: 2, - warnings: [], - }); - expect(api.search).toHaveBeenCalledTimes(1); - }); - - it('keeps partial discovery results when Notion search fails after at least one page', async () => { - const api: NotionPickerApi = { - search: vi - .fn() - .mockResolvedValueOnce({ - results: [notionPage(PAGE_IDS.engineering, 'Engineering')], - hasMore: true, - nextCursor: 'cursor-2', - }) - .mockRejectedValueOnce(new Error('rate limit after first page')), - retrieveBotUser: vi.fn(async () => ({ name: 'Notion bot' })), - }; - - await expect(discoverNotionPickerPages(api)).resolves.toEqual({ - pages: [{ id: PAGE_IDS.engineering, title: 'Engineering', archived: false, parentId: null }], - cappedAtCount: null, - warnings: ['Notion search stopped early: rate limit after first page'], - }); - }); - - it('uses the Notion workspace name when available and falls back to the connection id', async () => { - await expect(resolveNotionWorkspaceLabel(fakeNotionApi([]), 'notion-main')).resolves.toBe('Design Workspace'); - await expect( - resolveNotionWorkspaceLabel( - { - search: vi.fn(), - retrieveBotUser: vi.fn(async () => { - throw new Error('users.me unavailable'); - }), - }, - 'notion-main', - ), - ).resolves.toBe('notion-main'); - }); - - it('runs interactive discovery, warns about stale roots, renders the TUI, and saves selected roots', async () => { - const projectDir = join(tempDir, 'project'); - const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeProjectConfig(projectDir, { - ...initialized.config, - connections: { - 'notion-main': { - driver: 'notion', - auth_token_ref: 'env:NOTION_TOKEN', - crawl_mode: 'all_accessible', - root_page_ids: [PAGE_IDS.stale], - root_database_ids: ['database-1'], - root_data_source_ids: ['data-source-1'], - max_pages_per_run: 12, - max_knowledge_creates_per_run: 2, - max_knowledge_updates_per_run: 7, - last_successful_cursor: null, - }, - }, - }); - const api = fakeNotionApi([ - notionPage(PAGE_IDS.engineering, 'Engineering'), - notionPage(PAGE_IDS.architecture, 'Architecture', PAGE_IDS.engineering), - ]); - const renderPicker = vi.fn(async (input): Promise => { - expect(input.connectionId).toBe('notion-main'); - expect(input.workspaceLabel).toBe('Design Workspace'); - expect(input.currentCrawlMode).toBe('all_accessible'); - expect(input.cappedAtCount).toBeNull(); - expect(input.initialState.preLoadWarnings).toEqual(['1 stored root_page_ids no longer visible']); - return { kind: 'save', rootPageIds: [PAGE_IDS.engineering] }; - }); - const io = makeIo(); - - await expect( - runKtxConnectionNotion( - { - command: 'pick', - projectDir, - connectionId: 'notion-main', - mode: 'interactive', - }, - io.io, - { - env: { NOTION_TOKEN: 'ntn_test_token' }, - createNotionApi: vi.fn(() => api), - renderPicker, - }, - ), - ).resolves.toBe(0); - - const yaml = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - expect(yaml).toContain('crawl_mode: selected_roots'); - expect(yaml).toContain(PAGE_IDS.engineering); - expect(yaml).not.toContain(PAGE_IDS.stale); - expect(io.stderr()).toContain('1 stored root_page_ids no longer visible'); - expect(io.stdout()).toContain('Connection: notion-main'); - expect(io.stdout()).toContain('rootPageIds: 1'); - }); - - it('uses inline Notion auth_token for interactive discovery', async () => { - const projectDir = join(tempDir, 'project'); - const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeProjectConfig(projectDir, { - ...initialized.config, - connections: { - 'notion-main': { - driver: 'notion', - auth_token: 'ntn_inline_token', - crawl_mode: 'selected_roots', - root_page_ids: [PAGE_IDS.engineering], - root_database_ids: [], - root_data_source_ids: [], - max_pages_per_run: 12, - max_knowledge_creates_per_run: 2, - max_knowledge_updates_per_run: 7, - last_successful_cursor: null, - }, - }, - }); - const api = fakeNotionApi([notionPage(PAGE_IDS.engineering, 'Engineering')]); - const createNotionApi = vi.fn((authToken: string) => { - expect(authToken).toBe('ntn_inline_token'); - return api; - }); - const io = makeIo(); - - await expect( - runKtxConnectionNotion( - { - command: 'pick', - projectDir, - connectionId: 'notion-main', - mode: 'interactive', - }, - io.io, - { - createNotionApi, - renderPicker: vi.fn(async (): Promise => ({ kind: 'quit' })), - }, - ), - ).resolves.toBe(0); - - expect(createNotionApi).toHaveBeenCalledOnce(); - expect(io.stdout()).toContain('No changes saved.'); - }); - - it('passes partial-discovery warnings into the TUI banner state', async () => { - const projectDir = join(tempDir, 'project'); - const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeProjectConfig(projectDir, { - ...initialized.config, - connections: { - 'notion-main': { - driver: 'notion', - auth_token_ref: 'env:NOTION_TOKEN', - crawl_mode: 'selected_roots', - root_page_ids: [PAGE_IDS.engineering], - root_database_ids: [], - root_data_source_ids: [], - max_pages_per_run: 12, - max_knowledge_creates_per_run: 2, - max_knowledge_updates_per_run: 7, - last_successful_cursor: null, - }, - }, - }); - const api: NotionPickerApi = { - search: vi - .fn() - .mockResolvedValueOnce({ - results: [notionPage(PAGE_IDS.engineering, 'Engineering')], - hasMore: true, - nextCursor: 'cursor-2', - }) - .mockRejectedValueOnce(new Error('rate limit after first page')), - retrieveBotUser: vi.fn(async () => ({ name: 'Notion bot', bot: { workspace_name: 'Design Workspace' } })), - }; - let renderInput: PickerRenderInput | undefined; - const renderPicker = vi.fn(async (input: PickerRenderInput): Promise => { - renderInput = input; - return { kind: 'quit' }; - }); - const io = makeIo(); - - await expect( - runKtxConnectionNotion( - { - command: 'pick', - projectDir, - connectionId: 'notion-main', - mode: 'interactive', - }, - io.io, - { - env: { NOTION_TOKEN: 'ntn_test_token' }, - createNotionApi: vi.fn(() => api), - renderPicker, - }, - ), - ).resolves.toBe(0); - - expect(renderPicker).toHaveBeenCalledOnce(); - if (!renderInput) { - throw new Error('renderPicker was not called'); - } - expect(renderInput.initialState.preLoadWarnings).toEqual(['Notion search stopped early: rate limit after first page']); - expect(renderInput.initialState.tree.map((node) => node.title)).toEqual(['Engineering']); - expect(io.stderr()).toContain('Notion search stopped early: rate limit after first page'); - expect(io.stdout()).toContain('No changes saved.'); - }); - - it('quits interactive mode without writing when the TUI returns quit', async () => { - const projectDir = join(tempDir, 'project'); - const initialized = await initKtxProject({ projectDir, projectName: 'warehouse' }); - await writeProjectConfig(projectDir, { - ...initialized.config, - connections: { - 'notion-main': { - driver: 'notion', - auth_token_ref: 'env:NOTION_TOKEN', - crawl_mode: 'selected_roots', - root_page_ids: [PAGE_IDS.engineering], - root_database_ids: [], - root_data_source_ids: [], - max_pages_per_run: 12, - max_knowledge_creates_per_run: 2, - max_knowledge_updates_per_run: 7, - last_successful_cursor: null, - }, - }, - }); - const before = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - const io = makeIo(); - - await expect( - runKtxConnectionNotion( - { - command: 'pick', - projectDir, - connectionId: 'notion-main', - mode: 'interactive', - }, - io.io, - { - env: { NOTION_TOKEN: 'ntn_test_token' }, - createNotionApi: vi.fn(() => fakeNotionApi([notionPage(PAGE_IDS.engineering, 'Engineering')])), - renderPicker: vi.fn(async (): Promise => ({ kind: 'quit' })), - }, - ), - ).resolves.toBe(0); - - await expect(readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).resolves.toBe(before); - expect(io.stdout()).toContain('No changes saved.'); - }); -}); diff --git a/packages/cli/src/connection.test.ts b/packages/cli/src/connection.test.ts index 4b6cacf1..57ed8742 100644 --- a/packages/cli/src/connection.test.ts +++ b/packages/cli/src/connection.test.ts @@ -1,4 +1,4 @@ -import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import type { MetabaseRuntimeClient } from '@ktx/context/ingest'; @@ -6,18 +6,13 @@ import { initKtxProject, parseKtxProjectConfig, serializeKtxProjectConfig } from import type { KtxConnectionDriver, KtxScanConnector, KtxSchemaSnapshot } from '@ktx/context/scan'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxConnection } from './connection.js'; -import { runKtxCli, type KtxCliIo } from './index.js'; -function makeIo(options: { stdoutIsTty?: boolean; stdinIsTty?: boolean } = {}) { +function makeIo() { let stdout = ''; let stderr = ''; return { io: { - stdin: { - isTTY: options.stdinIsTty, - }, stdout: { - isTTY: options.stdoutIsTty, write: (chunk: string) => { stdout += chunk; }, @@ -87,491 +82,49 @@ describe('runKtxConnection', () => { await rm(tempDir, { recursive: true, force: true }); }); - it('adds and lists env-referenced connections without resolving secrets', async () => { + async function writeConnections( + projectDir: string, + connections: ReturnType['connections'], + ): Promise { + const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + await writeFile(join(projectDir, 'ktx.yaml'), serializeKtxProjectConfig({ ...config, connections }), 'utf-8'); + } + + it('lists configured connections without resolving secrets', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); + await writeConnections(projectDir, { + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL', readonly: true }, + docs: { driver: 'notion', auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'all_accessible' }, + }); const io = makeIo(); - await expect( - runKtxConnection( - { - command: 'add', - projectDir, - driver: 'postgres', - connectionId: 'warehouse', - url: 'env:DATABASE_URL', - schemas: ['public'], - readonly: true, - force: false, - allowLiteralCredentials: false, - }, - io.io, - ), - ).resolves.toBe(0); + await expect(runKtxConnection({ command: 'list', projectDir }, io.io)).resolves.toBe(0); - expect(io.stdout()).toContain('Connection: warehouse'); - await expect(readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).resolves.toContain('url: env:DATABASE_URL'); - - const listIo = makeIo(); - await expect(runKtxConnection({ command: 'list', projectDir }, listIo.io)).resolves.toBe(0); - expect(listIo.stdout()).toContain('warehouse'); - expect(listIo.stdout()).toContain('postgres'); - }); - - it('removes a configured connection from ktx.yaml without deleting local artifacts when forced', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await runKtxConnection( - { - command: 'add', - projectDir, - driver: 'sqlite', - connectionId: 'warehouse', - url: undefined, - schemas: [], - readonly: true, - force: false, - allowLiteralCredentials: false, - }, - makeIo().io, - ); - const artifactPath = join(projectDir, '.ktx', 'artifacts', 'warehouse.txt'); - await mkdir(join(projectDir, '.ktx', 'artifacts'), { recursive: true }); - await writeFile(artifactPath, 'keep me', 'utf-8'); - - const io = makeIo(); - - await expect( - runKtxConnection( - { - command: 'remove', - projectDir, - connectionId: 'warehouse', - force: true, - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(0); - - const parsed = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); - expect(parsed.connections.warehouse).toBeUndefined(); - await expect(readFile(artifactPath, 'utf-8')).resolves.toBe('keep me'); - expect(io.stdout()).toContain('Connection removed from ktx.yaml.'); - expect(io.stdout()).toContain( - 'Ingested artifacts from this connection remain in .ktx/. Run ktx dev artifacts to inspect.', - ); + expect(io.stdout()).toContain('warehouse'); + expect(io.stdout()).toContain('postgres'); + expect(io.stdout()).toContain('docs'); + expect(io.stdout()).toContain('notion'); expect(io.stderr()).toBe(''); }); - it('requires --force when removing in non-interactive mode', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await runKtxConnection( - { - command: 'add', - projectDir, - driver: 'sqlite', - connectionId: 'warehouse', - url: undefined, - schemas: [], - readonly: true, - force: false, - allowLiteralCredentials: false, - }, - makeIo().io, - ); - const io = makeIo(); - - await expect( - runKtxConnection( - { - command: 'remove', - projectDir, - connectionId: 'warehouse', - force: false, - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('connection remove warehouse requires --force when input is disabled or not interactive'); - }); - - it('returns a clear error when removing an unknown connection', async () => { + it('prints an empty-state message that points at setup instead of removed connection add', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); const io = makeIo(); - await expect( - runKtxConnection( - { - command: 'remove', - projectDir, - connectionId: 'missing', - force: true, - inputMode: 'disabled', - }, - io.io, - ), - ).resolves.toBe(1); + await expect(runKtxConnection({ command: 'list', projectDir }, io.io)).resolves.toBe(0); - expect(io.stderr()).toContain('Connection "missing" is not configured in ktx.yaml'); - }); - - it('asks for confirmation before removing in an interactive terminal', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await runKtxConnection( - { - command: 'add', - projectDir, - driver: 'sqlite', - connectionId: 'warehouse', - url: undefined, - schemas: [], - readonly: true, - force: false, - allowLiteralCredentials: false, - }, - makeIo().io, - ); - const io = makeIo({ stdoutIsTty: true, stdinIsTty: true }); - const prompts = { - confirm: vi.fn(async () => true), - cancel: vi.fn(), - }; - - await expect( - runKtxConnection( - { - command: 'remove', - projectDir, - connectionId: 'warehouse', - force: false, - }, - io.io, - { prompts }, - ), - ).resolves.toBe(0); - - expect(prompts.confirm).toHaveBeenCalledWith({ - message: 'Remove connection "warehouse" from ktx.yaml? Ingested artifacts will remain in .ktx/.', - initialValue: false, - }); - }); - - it('runs public connect map as refresh, validate, and list over the low-level mapping runner', async () => { - const io = makeIo(); - const runMapping = vi.fn(async (argv: string[], mappingIo: KtxCliIo) => { - if (argv[0] === 'refresh') { - mappingIo.stdout.write('Discovery: 1 database\n'); - mappingIo.stdout.write('Unmapped discovered: 1\n'); - mappingIo.stdout.write('Stale mappings: 0\n'); - return 0; - } - if (argv[0] === 'validate') { - mappingIo.stdout.write('Mapping validation passed: prod-metabase\n'); - return 0; - } - if (argv[0] === 'list') { - mappingIo.stdout.write('1 -> [unmapped] (Analytics, sync: on, source: refresh)\n'); - return 0; - } - return 1; - }); - - await expect( - runKtxConnection( - { command: 'map', projectDir: '/tmp/project', sourceConnectionId: 'prod-metabase', json: false }, - io.io, - { runMapping }, - ), - ).resolves.toBe(0); - - expect(runMapping).toHaveBeenNthCalledWith( - 1, - ['refresh', 'prod-metabase', '--auto-accept', '--project-dir', '/tmp/project'], - expect.any(Object), - ); - expect(runMapping).toHaveBeenNthCalledWith( - 2, - ['validate', 'prod-metabase', '--project-dir', '/tmp/project'], - expect.any(Object), - ); - expect(runMapping).toHaveBeenNthCalledWith( - 3, - ['list', 'prod-metabase', '--project-dir', '/tmp/project'], - expect.any(Object), - ); - expect(io.stdout()).toContain('Mapping: prod-metabase'); - expect(io.stdout()).toContain('Discovery: 1 database'); - expect(io.stdout()).toContain('Mappings:'); - expect(io.stdout()).toContain('1 -> [unmapped]'); - expect(io.stdout()).toContain('Next:'); - expect(io.stdout()).toContain('ktx ingest run --connection-id prod-metabase --adapter '); - expect(io.stdout()).toContain('ktx connection mapping'); - expect(io.stderr()).toBe(''); - }); - - it('prints stable JSON for public connect map without leaking low-level stdout', async () => { - const io = makeIo(); - const runMapping = vi.fn(async (argv: string[], mappingIo: KtxCliIo) => { - if (argv[0] === 'refresh') { - mappingIo.stdout.write('Discovery: 1 connection\nUnmapped discovered: 0\nStale mappings: 0\n'); - return 0; - } - if (argv[0] === 'validate') { - mappingIo.stdout.write('Mapping validation passed: prod-looker\n'); - return 0; - } - if (argv[0] === 'list') { - expect(argv).toContain('--json'); - mappingIo.stdout.write( - `${JSON.stringify( - [ - { - lookerConnectionName: 'analytics', - ktxConnectionId: 'prod-warehouse', - source: 'ktx.yaml', - }, - ], - null, - 2, - )}\n`, - ); - return 0; - } - return 1; - }); - - await expect( - runKtxConnection( - { command: 'map', projectDir: '/tmp/project', sourceConnectionId: 'prod-looker', json: true }, - io.io, - { runMapping }, - ), - ).resolves.toBe(0); - - const parsed = JSON.parse(io.stdout()) as { - connectionId: string; - refresh: { ok: boolean; output: string[] }; - validation: { ok: boolean; output: string[] }; - mappings: Array<{ lookerConnectionName: string; ktxConnectionId: string; source: string }>; - }; - expect(parsed).toEqual({ - connectionId: 'prod-looker', - refresh: { - ok: true, - output: ['Discovery: 1 connection', 'Unmapped discovered: 0', 'Stale mappings: 0'], - }, - validation: { - ok: true, - output: ['Mapping validation passed: prod-looker'], - }, - mappings: [ - { - lookerConnectionName: 'analytics', - ktxConnectionId: 'prod-warehouse', - source: 'ktx.yaml', - }, - ], - }); - expect(io.stderr()).toBe(''); - }); - - it('returns the refresh failure when public connect map cannot discover source metadata', async () => { - const io = makeIo(); - const runMapping = vi.fn(async (argv: string[], mappingIo: KtxCliIo) => { - if (argv[0] === 'refresh') { - mappingIo.stderr.write('Metabase API key is not configured\n'); - return 1; - } - return 0; - }); - - await expect( - runKtxConnection( - { command: 'map', projectDir: '/tmp/project', sourceConnectionId: 'prod-metabase', json: false }, - io.io, - { runMapping }, - ), - ).resolves.toBe(1); - - expect(runMapping).toHaveBeenCalledTimes(1); - expect(io.stdout()).toBe(''); - expect(io.stderr()).toContain('Metabase API key is not configured'); - }); - - it('rejects literal credential URLs unless explicitly allowed', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const io = makeIo(); - - await expect( - runKtxConnection( - { - command: 'add', - projectDir, - driver: 'postgres', - connectionId: 'warehouse', - url: 'postgres://localhost:5432/warehouse', - schemas: [], - readonly: true, - force: false, - allowLiteralCredentials: false, - }, - io.io, - ), - ).resolves.toBe(1); - - expect(io.stderr()).toContain('Literal credential URLs require --allow-literal-credentials'); - }); - - it('warns before writing explicitly allowed literal credential URLs without echoing the URL', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const io = makeIo(); - const literalUrl = 'postgres://localhost:5432/warehouse'; - - await expect( - runKtxConnection( - { - command: 'add', - projectDir, - driver: 'postgres', - connectionId: 'warehouse', - url: literalUrl, - schemas: ['public'], - readonly: true, - force: false, - allowLiteralCredentials: true, - }, - io.io, - ), - ).resolves.toBe(0); - - expect(io.stderr()).toContain( - 'Warning: writing a literal credential URL to ktx.yaml for connection "warehouse". Prefer env:NAME or file:/path references.', - ); - expect(io.stderr()).not.toContain(literalUrl); - await expect(readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).resolves.toContain(literalUrl); - }); - - it('adds a Notion connection without writing token values', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - const io = makeIo(); - - await expect( - runKtxConnection( - { - command: 'add', - projectDir, - driver: 'notion', - connectionId: 'notion-main', - url: undefined, - schemas: [], - readonly: false, - force: false, - allowLiteralCredentials: false, - notion: { - authTokenRef: 'env:NOTION_TOKEN', - crawlMode: 'all_accessible', - rootPageIds: [], - rootDatabaseIds: [], - rootDataSourceIds: [], - maxPagesPerRun: 50, - maxKnowledgeCreatesPerRun: 4, - maxKnowledgeUpdatesPerRun: 12, - }, - }, - io.io, - ), - ).resolves.toBe(0); - - const yaml = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - expect(yaml).toContain('driver: notion'); - expect(yaml).toContain('auth_token_ref: env:NOTION_TOKEN'); - expect(yaml).toContain('crawl_mode: all_accessible'); - expect(yaml).toContain('max_pages_per_run: 50'); - expect(yaml).not.toContain('ntn_'); - expect(io.stdout()).toContain('Connection: notion-main'); - expect(io.stdout()).toContain('Driver: notion'); - }); - - it('runs connection notion pick --no-input through the public connection entrypoint', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - await runKtxConnection( - { - command: 'add', - projectDir, - driver: 'notion', - connectionId: 'notion-main', - url: undefined, - schemas: [], - readonly: false, - force: false, - allowLiteralCredentials: false, - notion: { - authTokenRef: 'env:NOTION_TOKEN', - crawlMode: 'all_accessible', - rootPageIds: [], - rootDatabaseIds: ['database-1'], - rootDataSourceIds: ['data-source-1'], - maxPagesPerRun: 50, - maxKnowledgeCreatesPerRun: 4, - maxKnowledgeUpdatesPerRun: 12, - }, - }, - makeIo().io, - ); - const io = makeIo(); - - await expect( - runKtxCli( - [ - 'connection', - 'notion', - 'pick', - 'notion-main', - '--project-dir', - projectDir, - '--no-input', - '--root-page-id', - '11111111222233334444555555555555', - ], - io.io, - ), - ).resolves.toBe(0); - - const yaml = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - expect(yaml).toContain('crawl_mode: selected_roots'); - expect(yaml).toContain('11111111-2222-3333-4444-555555555555'); - expect(yaml).toContain('database-1'); - expect(yaml).toContain('data-source-1'); - expect(io.stdout()).toContain('Connection: notion-main'); + expect(io.stdout()).toContain('No connections configured. Run `ktx setup` to add one.'); + expect(io.stdout()).not.toContain('ktx connection add'); }); it('tests a configured connection through the native scan connector', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); - await runKtxConnection( - { - command: 'add', - projectDir, - driver: 'sqlite', - connectionId: 'warehouse', - url: undefined, - schemas: [], - readonly: true, - force: false, - allowLiteralCredentials: false, - }, - makeIo().io, - ); + await writeConnections(projectDir, { + warehouse: { driver: 'sqlite', readonly: true }, + }); const { connector, introspect, cleanup } = nativeConnector('sqlite', ['customers', 'orders']); const createScanConnector = vi.fn(async () => connector); const io = makeIo(); @@ -602,22 +155,13 @@ describe('runKtxConnection', () => { it('tests a configured Metabase connection through the Metabase runtime client', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); - const projectConfig = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); - await writeFile( - join(projectDir, 'ktx.yaml'), - serializeKtxProjectConfig({ - ...projectConfig, - connections: { - ...projectConfig.connections, - prod_metabase: { - driver: 'metabase', - api_url: 'http://metabase.example.test', - api_key: 'mb_test', - }, - }, - }), - 'utf-8', - ); + await writeConnections(projectDir, { + prod_metabase: { + driver: 'metabase', + api_url: 'http://metabase.example.test', + api_key: 'mb_test', + }, + }); const testConnection = vi.fn(async () => ({ success: true as const })); const getDatabases = vi.fn(async () => [ { id: 1, name: 'Analytics', engine: 'postgres', details: {}, is_sample: false }, @@ -657,20 +201,9 @@ describe('runKtxConnection', () => { it('cleans up the native scan connector when connection testing fails', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); - await runKtxConnection( - { - command: 'add', - projectDir, - driver: 'sqlite', - connectionId: 'warehouse', - url: undefined, - schemas: [], - readonly: true, - force: false, - allowLiteralCredentials: false, - }, - makeIo().io, - ); + await writeConnections(projectDir, { + warehouse: { driver: 'sqlite', readonly: true }, + }); const cleanup = vi.fn(async () => undefined); const connector: KtxScanConnector = { id: 'sqlite:warehouse', diff --git a/packages/cli/src/connection.ts b/packages/cli/src/connection.ts index b199239a..cf0b512b 100644 --- a/packages/cli/src/connection.ts +++ b/packages/cli/src/connection.ts @@ -1,108 +1,24 @@ -import { cancel, confirm, isCancel } from '@clack/prompts'; import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultMetabaseConnectionClientFactory, type MetabaseRuntimeClient, metabaseRuntimeConfigFromLocalConnection, } from '@ktx/context/ingest'; -import { type KtxLocalProject, loadKtxProject, serializeKtxProjectConfig } from '@ktx/context/project'; +import { type KtxLocalProject, loadKtxProject } from '@ktx/context/project'; import type { KtxScanConnector } from '@ktx/context/scan'; -import type { KtxConnectionMappingArgs } from './commands/connection-mapping.js'; import type { KtxCliIo } from './index.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; import { profileMark } from './startup-profile.js'; profileMark('module:connection'); -interface KtxNotionConnectionCliConfig { - authTokenRef: string; - crawlMode: 'all_accessible' | 'selected_roots'; - rootPageIds: string[]; - rootDatabaseIds: string[]; - rootDataSourceIds: string[]; - maxPagesPerRun?: number; - maxKnowledgeCreatesPerRun?: number; - maxKnowledgeUpdatesPerRun?: number; -} - -type KtxConnectionInputMode = 'disabled'; - export type KtxConnectionArgs = | { command: 'list'; projectDir: string } - | { - command: 'add'; - projectDir: string; - driver: string; - connectionId: string; - url?: string; - schemas: string[]; - readonly: boolean; - force: boolean; - allowLiteralCredentials: boolean; - notion?: KtxNotionConnectionCliConfig; - } - | { command: 'test'; projectDir: string; connectionId: string } - | { - command: 'remove'; - projectDir: string; - connectionId: string; - force: boolean; - inputMode?: KtxConnectionInputMode; - } - | { - command: 'map'; - projectDir: string; - sourceConnectionId: string; - json: boolean; - }; - -interface KtxConnectionPromptAdapter { - confirm(options: { message: string; initialValue?: boolean }): Promise; - cancel(message: string): void; -} - -interface KtxConnectionIo extends KtxCliIo { - stdin?: { isTTY?: boolean }; -} + | { command: 'test'; projectDir: string; connectionId: string }; interface KtxConnectionDeps { createScanConnector?: typeof createKtxCliScanConnector; createMetabaseClient?: typeof createDefaultMetabaseClient; - runMapping?: (argv: string[], io: KtxCliIo) => Promise; - prompts?: KtxConnectionPromptAdapter; -} - -function assertSafeConnectionId(connectionId: string): void { - if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { - throw new Error(`Unsafe connection id: ${connectionId}`); - } -} - -function isCredentialReference(value: string): boolean { - return value.startsWith('env:') || value.startsWith('file:'); -} - -function literalCredentialWarning(connectionId: string): string { - return `Warning: writing a literal credential URL to ktx.yaml for connection "${connectionId}". Prefer env:NAME or file:/path references.`; -} - -function createClackConnectionPromptAdapter(): KtxConnectionPromptAdapter { - return { - async confirm(options: { message: string; initialValue?: boolean }): Promise { - const value = await confirm(options); - return isCancel(value) ? false : value; - }, - cancel(message: string): void { - cancel(message); - }, - }; -} - -function isInteractiveConnectionIo( - args: Extract, - io: KtxConnectionIo, -): boolean { - return args.inputMode !== 'disabled' && io.stdin?.isTTY === true && io.stdout.isTTY === true; } async function cleanupConnector(connector: KtxScanConnector | null): Promise { @@ -186,166 +102,17 @@ async function testMetabaseConnection( } } -interface BufferedIo extends KtxCliIo { - stdoutText(): string; - stderrText(): string; -} - -function createBufferedIo(): BufferedIo { - let stdout = ''; - let stderr = ''; - return { - stdout: { - write(chunk: string) { - stdout += chunk; - }, - }, - stderr: { - write(chunk: string) { - stderr += chunk; - }, - }, - stdoutText() { - return stdout; - }, - stderrText() { - return stderr; - }, - }; -} - -function splitOutputLines(output: string): string[] { - return output - .split('\n') - .map((line) => line.trim()) - .filter(Boolean); -} - -async function runLowLevelMapping( - args: KtxConnectionMappingArgs, - argv: string[], - io: KtxCliIo, - deps: KtxConnectionDeps, -): Promise { - if (deps.runMapping) { - return await deps.runMapping(argv, io); - } - - const { runKtxConnectionMapping } = await import('./commands/connection-mapping.js'); - return await runKtxConnectionMapping(args, io); -} - -function parseMappingListJson(output: string): unknown[] { - const trimmed = output.trim(); - if (!trimmed) { - return []; - } - const parsed = JSON.parse(trimmed) as unknown; - return Array.isArray(parsed) ? parsed : []; -} - -async function runPublicConnectionMap( - args: Extract, - io: KtxCliIo, - deps: KtxConnectionDeps, -): Promise { - const refreshIo = createBufferedIo(); - const refreshArgs: KtxConnectionMappingArgs = { - command: 'refresh', - projectDir: args.projectDir, - connectionId: args.sourceConnectionId, - autoAccept: true, - }; - const refreshCode = await runLowLevelMapping( - refreshArgs, - ['refresh', args.sourceConnectionId, '--auto-accept', '--project-dir', args.projectDir], - refreshIo, - deps, - ); - if (refreshCode !== 0) { - io.stderr.write( - refreshIo.stderrText() || - refreshIo.stdoutText() || - `Failed to refresh mapping metadata for ${args.sourceConnectionId}\n`, - ); - return refreshCode; - } - - const validationIo = createBufferedIo(); - const validationArgs: KtxConnectionMappingArgs = { - command: 'validate', - projectDir: args.projectDir, - connectionId: args.sourceConnectionId, - }; - const validationCode = await runLowLevelMapping( - validationArgs, - ['validate', args.sourceConnectionId, '--project-dir', args.projectDir], - validationIo, - deps, - ); - if (validationCode !== 0) { - io.stderr.write( - validationIo.stderrText() || validationIo.stdoutText() || `Mapping validation failed for ${args.sourceConnectionId}\n`, - ); - return validationCode; - } - - const listIo = createBufferedIo(); - const listArgv = ['list', args.sourceConnectionId, '--project-dir', args.projectDir]; - const listArgs: KtxConnectionMappingArgs = { - command: 'list', - projectDir: args.projectDir, - connectionId: args.sourceConnectionId, - json: args.json, - }; - const listCode = await runLowLevelMapping(listArgs, args.json ? [...listArgv, '--json'] : listArgv, listIo, deps); - if (listCode !== 0) { - io.stderr.write(listIo.stderrText() || listIo.stdoutText() || `Failed to list mappings for ${args.sourceConnectionId}\n`); - return listCode; - } - - if (args.json) { - io.stdout.write( - `${JSON.stringify( - { - connectionId: args.sourceConnectionId, - refresh: { ok: true, output: splitOutputLines(refreshIo.stdoutText()) }, - validation: { ok: true, output: splitOutputLines(validationIo.stdoutText()) }, - mappings: parseMappingListJson(listIo.stdoutText()), - }, - null, - 2, - )}\n`, - ); - return 0; - } - - io.stdout.write(`Mapping: ${args.sourceConnectionId}\n`); - io.stdout.write(refreshIo.stdoutText()); - io.stdout.write(validationIo.stdoutText()); - io.stdout.write('\nMappings:\n'); - io.stdout.write(listIo.stdoutText().trim() ? listIo.stdoutText() : 'No mappings found.\n'); - io.stdout.write('\nNext:\n'); - io.stdout.write(` ktx ingest run --connection-id ${args.sourceConnectionId} --adapter \n`); - io.stdout.write(` ktx connection mapping list ${args.sourceConnectionId}\n`); - return 0; -} - export async function runKtxConnection( args: KtxConnectionArgs, - io: KtxConnectionIo = process, + io: KtxCliIo = process, deps: KtxConnectionDeps = {}, ): Promise { try { - if (args.command === 'map') { - return await runPublicConnectionMap(args, io, deps); - } - const project = await loadKtxProject({ projectDir: args.projectDir }); if (args.command === 'list') { const entries = Object.entries(project.config.connections).sort(([a], [b]) => a.localeCompare(b)); if (entries.length === 0) { - io.stdout.write('No connections configured. Run `ktx connection add --driver ` to add one.\n'); + io.stdout.write('No connections configured. Run `ktx setup` to add one.\n'); return 0; } const idWidth = Math.max('ID'.length, ...entries.map(([id]) => id.length)); @@ -360,100 +127,6 @@ export async function runKtxConnection( return 0; } - if (args.command === 'add') { - assertSafeConnectionId(args.connectionId); - const hasLiteralCredentialUrl = !!args.url && !isCredentialReference(args.url); - if (hasLiteralCredentialUrl && !args.allowLiteralCredentials) { - throw new Error('Literal credential URLs require --allow-literal-credentials'); - } - if (hasLiteralCredentialUrl) { - io.stderr.write(`${literalCredentialWarning(args.connectionId)}\n`); - } - if (project.config.connections[args.connectionId] && !args.force) { - throw new Error(`Connection "${args.connectionId}" already exists; pass --force to replace it`); - } - const connectionConfig = - args.driver === 'notion' && args.notion - ? { - driver: 'notion', - auth_token_ref: args.notion.authTokenRef, - crawl_mode: args.notion.crawlMode, - root_page_ids: args.notion.rootPageIds, - root_database_ids: args.notion.rootDatabaseIds, - root_data_source_ids: args.notion.rootDataSourceIds, - ...(args.notion.maxPagesPerRun !== undefined ? { max_pages_per_run: args.notion.maxPagesPerRun } : {}), - ...(args.notion.maxKnowledgeCreatesPerRun !== undefined - ? { max_knowledge_creates_per_run: args.notion.maxKnowledgeCreatesPerRun } - : {}), - ...(args.notion.maxKnowledgeUpdatesPerRun !== undefined - ? { max_knowledge_updates_per_run: args.notion.maxKnowledgeUpdatesPerRun } - : {}), - } - : { - driver: args.driver, - ...(args.url ? { url: args.url } : {}), - ...(args.schemas.length > 0 ? { schemas: args.schemas } : {}), - readonly: args.readonly, - }; - const nextConfig = { - ...project.config, - connections: { - ...project.config.connections, - [args.connectionId]: connectionConfig, - }, - }; - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig(nextConfig), - 'ktx', - 'ktx@example.com', - `Update KTX connection: ${args.connectionId}`, - ); - io.stdout.write(`Connection: ${args.connectionId}\n`); - io.stdout.write(`Driver: ${args.driver}\n`); - return 0; - } - - if (args.command === 'remove') { - if (!project.config.connections[args.connectionId]) { - throw new Error(`Connection "${args.connectionId}" is not configured in ktx.yaml`); - } - - if (!args.force) { - if (!isInteractiveConnectionIo(args, io)) { - throw new Error( - `connection remove ${args.connectionId} requires --force when input is disabled or not interactive`, - ); - } - - const prompts = deps.prompts ?? createClackConnectionPromptAdapter(); - const confirmed = await prompts.confirm({ - message: `Remove connection "${args.connectionId}" from ktx.yaml? Ingested artifacts will remain in .ktx/.`, - initialValue: false, - }); - if (!confirmed) { - prompts.cancel('Connection removal cancelled.'); - return 1; - } - } - - const { [args.connectionId]: _removedConnection, ...connections } = project.config.connections; - const nextConfig = { - ...project.config, - connections, - }; - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig(nextConfig), - 'ktx', - 'ktx@example.com', - `Remove KTX connection: ${args.connectionId}`, - ); - io.stdout.write('Connection removed from ktx.yaml.\n'); - io.stdout.write('Ingested artifacts from this connection remain in .ktx/. Run ktx dev artifacts to inspect.\n'); - return 0; - } - if (normalizedConnectionDriver(project, args.connectionId) === 'metabase') { const result = await testMetabaseConnection( project, diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index 58848dab..9064143a 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -1291,16 +1291,9 @@ describe('runKtxCli', () => { runKtxCli(['--project-dir', tempDir, 'connection', 'list'], makeIo().io, { connection }), ).resolves.toBe(0); - const removeIo = makeIo(); + const testIo = makeIo(); await expect( - runKtxCli(['--project-dir', tempDir, 'connection', 'remove', 'warehouse', '--force', '--no-input'], removeIo.io, { - connection, - }), - ).resolves.toBe(0); - - const mapIo = makeIo(); - await expect( - runKtxCli(['--project-dir', tempDir, 'connection', 'map', 'prod-metabase', '--json'], mapIo.io, { + runKtxCli(['--project-dir', tempDir, 'connection', 'test', 'warehouse'], testIo.io, { connection, }), ).resolves.toBe(0); @@ -1309,21 +1302,9 @@ describe('runKtxCli', () => { expect(connection).toHaveBeenNthCalledWith( 2, { - command: 'remove', + command: 'test', projectDir: tempDir, connectionId: 'warehouse', - force: true, - inputMode: 'disabled', - }, - expect.anything(), - ); - expect(connection).toHaveBeenNthCalledWith( - 3, - { - command: 'map', - projectDir: tempDir, - sourceConnectionId: 'prod-metabase', - json: true, }, expect.anything(), ); @@ -1331,168 +1312,35 @@ describe('runKtxCli', () => { await rm(tempDir, { recursive: true, force: true }); }); - it('prints help for connection metabase setup', async () => { + it('prints only list and test in connection help', async () => { const helpIo = makeIo(); - await expect(runKtxCli(['connection', 'metabase', 'setup', '--help'], helpIo.io)).resolves.toBe(0); + await expect(runKtxCli(['connection', '--help'], helpIo.io)).resolves.toBe(0); - expect(helpIo.stdout()).toContain('Usage: ktx connection metabase setup'); - for (const option of [ - '--id ', - '--url ', - '--api-key ', - '--username ', - '--password ', - '--mint-api-key', - '--map ', - '--sync ', - '--sync-mode ', - '--run-ingest', - '--yes', - '--no-input', - ]) { - expect(helpIo.stdout()).toContain(option); - } - expect(helpIo.stdout()).toContain('Guided equivalent of:'); - for (const line of [ - 'ktx connection mapping refresh --auto-accept', - 'ktx connection mapping set databaseMappings =', - 'ktx connection mapping set-sync-enabled --enabled true', - 'ktx ingest run --connection-id --adapter metabase', - ]) { - expect(helpIo.stdout()).toContain(line); + expect(helpIo.stdout()).toContain('Usage: ktx connection'); + expect(helpIo.stdout()).toContain('list'); + expect(helpIo.stdout()).toContain('test '); + for (const removed of ['add', 'remove', 'map', 'mapping', 'metabase', 'notion']) { + expect(helpIo.stdout()).not.toMatch(new RegExp(`\\b${removed}\\b`)); } expect(helpIo.stderr()).toBe(''); }); - it('dispatches connection metabase setup through Commander', async () => { - const connectionMetabaseSetup = vi.fn(async () => 0); - const fakeMetabaseCredential = 'mb_example'; - const setupIo = makeIo(); - - await expect( - runKtxCli( - [ - 'connection', - 'metabase', - 'setup', - '--project-dir', - tempDir, - '--id', - 'metabase', - '--url', - 'http://metabase.example.test:3000', - '--api-key', - 'mb_example', - '--map', - '2=orbit', - '--sync', - '2', - '--yes', - '--no-input', - ], - setupIo.io, - { connectionMetabaseSetup }, - ), - ).resolves.toBe(0); - - expect(connectionMetabaseSetup).toHaveBeenCalledWith( - { - command: 'setup', - projectDir: tempDir, - connectionId: 'metabase', - url: 'http://metabase.example.test:3000', - apiKey: fakeMetabaseCredential, - mintApiKey: false, - mappings: [{ metabaseDatabaseId: 2, targetConnectionId: 'orbit' }], - syncEnabledDatabaseIds: [2], - syncMode: 'ALL', - runIngest: false, - yes: true, - inputMode: 'disabled', - }, - expect.anything(), - ); - expect(setupIo.stderr()).toBe(`Project: ${tempDir}\n`); - }); - - it('validates connection metabase setup option values before runner dispatch', async () => { - const connectionMetabaseSetup = vi.fn(async () => 0); - + it('rejects removed connection subcommands', async () => { for (const argv of [ - [ - 'connection', - 'metabase', - 'setup', - '--project-dir', - tempDir, - '--url', - 'http://metabase.example.test:3000', - '--api-key', - 'mb_example', - '--map', - 'nope=orbit', - ], - [ - 'connection', - 'metabase', - 'setup', - '--project-dir', - tempDir, - '--url', - 'http://metabase.example.test:3000', - '--api-key', - 'mb_example', - '--map', - '2=../orbit', - ], - [ - 'connection', - 'metabase', - 'setup', - '--project-dir', - tempDir, - '--url', - 'http://metabase.example.test:3000', - '--api-key', - 'mb_example', - '--sync', - 'nope', - ], - [ - 'connection', - 'metabase', - 'setup', - '--project-dir', - tempDir, - '--url', - 'http://metabase.example.test:3000', - '--api-key', - 'mb_example', - '--sync-mode', - 'BAD', - ], - [ - 'connection', - 'metabase', - 'setup', - '--project-dir', - tempDir, - '--url', - 'http://metabase.example.test:3000', - '--api-key', - 'mb_example', - '--mint-api-key', - '--api-key', - 'also_bad', - ], + ['connection', 'add', 'postgres', 'warehouse'], + ['connection', 'remove', 'warehouse'], + ['connection', 'map', 'prod-metabase'], + ['connection', 'mapping'], + ['connection', 'metabase'], + ['connection', 'notion'], ]) { const testIo = makeIo(); - await expect(runKtxCli(argv, testIo.io, { connectionMetabaseSetup })).resolves.toBe(1); - expect(testIo.stderr()).toMatch(/map|sync|sync-mode|conflict|cannot be used|invalid|integer|choices/i); - } - expect(connectionMetabaseSetup).not.toHaveBeenCalled(); + await expect(runKtxCli(argv, testIo.io)).resolves.toBe(1); + + expect(testIo.stderr()).toMatch(/unknown command|error:/); + } }); it('rejects commands removed from the May 6 root surface', async () => { @@ -1510,153 +1358,6 @@ describe('runKtxCli', () => { } }); - it('dispatches connection add options through Commander', async () => { - const testIo = makeIo(); - const connection = vi.fn(async () => 0); - - await expect( - runKtxCli( - [ - 'connection', - 'add', - 'notion', - 'notion-main', - '--project-dir', - tempDir, - '--token-env', - 'NOTION_TOKEN', - '--crawl-mode', - 'selected_roots', - '--root-page-id', - 'page-1', - '--root-database-id', - 'database-1', - '--max-pages', - '80', - ], - testIo.io, - { connection }, - ), - ).resolves.toBe(0); - - expect(connection).toHaveBeenCalledWith( - { - command: 'add', - projectDir: tempDir, - driver: 'notion', - connectionId: 'notion-main', - url: undefined, - schemas: [], - readonly: false, - force: false, - allowLiteralCredentials: false, - notion: { - authTokenRef: 'env:NOTION_TOKEN', - crawlMode: 'selected_roots', - rootPageIds: ['page-1'], - rootDatabaseIds: ['database-1'], - rootDataSourceIds: [], - maxPagesPerRun: 80, - maxKnowledgeCreatesPerRun: undefined, - maxKnowledgeUpdatesPerRun: undefined, - }, - }, - testIo.io, - ); - expect(testIo.stderr()).toBe(`Project: ${tempDir}\n`); - }); - - it('prints generated connection notion pick help without invoking execution', async () => { - const helpCases = [ - ['connection', 'notion', '--help'], - ['connection', 'notion', 'pick', '--help'], - ['connection', 'notion', 'pick', 'notion-main', '--help'], - ]; - - for (const argv of helpCases) { - const testIo = makeIo(); - const connectionNotion = vi.fn(async () => 0); - - await expect(runKtxCli(argv, testIo.io, { connectionNotion })).resolves.toBe(0); - - expect(testIo.stdout()).toContain('Usage: ktx connection notion'); - expect(testIo.stdout()).toContain('pick'); - expect(testIo.stderr()).toBe(''); - expect(connectionNotion).not.toHaveBeenCalled(); - } - }); - - it('dispatches connection notion pick through Commander', async () => { - const testIo = makeIo(); - const connectionNotion = vi.fn(async () => 0); - - await expect( - runKtxCli( - [ - '--project-dir', - tempDir, - 'connection', - 'notion', - 'pick', - 'notion-main', - '--no-input', - '--root-page-id', - '11111111222233334444555555555555', - '--root-page-id', - '11111111-2222-3333-4444-555555555555', - ], - testIo.io, - { connectionNotion }, - ), - ).resolves.toBe(0); - - expect(connectionNotion).toHaveBeenCalledWith( - { - command: 'pick', - projectDir: tempDir, - connectionId: 'notion-main', - mode: 'non-interactive', - rootPageIds: ['11111111-2222-3333-4444-555555555555'], - }, - testIo.io, - ); - expect(testIo.stderr()).toBe(`Project: ${tempDir}\n`); - }); - - it('ignores connection notion pick root page flags in interactive mode', async () => { - const testIo = makeIo(); - const connectionNotion = vi.fn(async () => 0); - - await expect( - runKtxCli(['connection', 'notion', 'pick', 'notion-main', '--root-page-id', 'not-a-uuid'], testIo.io, { - connectionNotion, - }), - ).resolves.toBe(0); - - expect(connectionNotion).toHaveBeenCalledWith( - { - command: 'pick', - projectDir: expect.any(String), - connectionId: 'notion-main', - mode: 'interactive', - }, - testIo.io, - ); - expect(testIo.stderr()).toBe(''); - }); - - it('rejects connection notion pick no-input mode without root page ids', async () => { - const testIo = makeIo(); - const connectionNotion = vi.fn(async () => 0); - - await expect( - runKtxCli(['connection', 'notion', 'pick', 'notion-main', '--no-input'], testIo.io, { connectionNotion }), - ).resolves.toBe(1); - - expect(connectionNotion).not.toHaveBeenCalled(); - expect(testIo.stderr()).toContain('connection notion pick --no-input requires at least one --root-page-id'); - }); - it('writes basic debug dispatch information when --debug is set', async () => { const testIo = makeIo(); const connection = vi.fn().mockResolvedValue(0); @@ -1817,51 +1518,6 @@ describe('runKtxCli', () => { expect(ingest).not.toHaveBeenCalled(); }); - it('rejects mutually exclusive credential and scan mode options before invoking runners', async () => { - const connection = vi.fn(async () => 0); - const scan = vi.fn(async () => 0); - - const tokenIo = makeIo(); - await expect( - runKtxCli( - [ - 'connection', - 'add', - 'notion', - 'notion-main', - '--token-env', - 'NOTION_TOKEN', - '--token-file', - '/tmp/notion-token', - '--root-page-id', - '11111111111111111111111111111111', - ], - tokenIo.io, - { connection }, - ), - ).resolves.toBe(1); - expect(tokenIo.stderr()).toMatch(/conflict|cannot be used/i); - - expect(connection).not.toHaveBeenCalled(); - expect(scan).not.toHaveBeenCalled(); - }); - - it('validates connection mapping set syntax before runner domain validation', async () => { - const badFieldIo = makeIo(); - await expect( - runKtxCli(['connection', 'mapping', 'set', 'prod-metabase', 'invalidMappings', '1=warehouse'], badFieldIo.io), - ).resolves.toBe(1); - expect(badFieldIo.stderr()).toContain('databaseMappings or connectionMappings'); - - for (const assignment of ['missing-equals', '=warehouse', '1=']) { - const testIo = makeIo(); - await expect( - runKtxCli(['connection', 'mapping', 'set', 'prod-metabase', 'databaseMappings', assignment], testIo.io), - ).resolves.toBe(1); - expect(testIo.stderr()).toContain('non-empty ='); - } - }); - it('does not expose root init after setup owns project creation', async () => { const testIo = makeIo(); diff --git a/packages/cli/src/commands/connection-notion-tree.test.ts b/packages/cli/src/notion-page-picker-tree.test.ts similarity index 99% rename from packages/cli/src/commands/connection-notion-tree.test.ts rename to packages/cli/src/notion-page-picker-tree.test.ts index ed1126d4..94b46b57 100644 --- a/packages/cli/src/commands/connection-notion-tree.test.ts +++ b/packages/cli/src/notion-page-picker-tree.test.ts @@ -14,7 +14,7 @@ import { TRANSIENT_HINT_DURATION_MS, visibleNodeIds, type NotionPickerPageInput, -} from './connection-notion-tree.js'; +} from './notion-page-picker-tree.js'; const IDS = { engineering: '11111111-1111-1111-1111-111111111111', diff --git a/packages/cli/src/commands/connection-notion-tree.ts b/packages/cli/src/notion-page-picker-tree.ts similarity index 100% rename from packages/cli/src/commands/connection-notion-tree.ts rename to packages/cli/src/notion-page-picker-tree.ts diff --git a/packages/cli/src/commands/connection-notion-tui.test.tsx b/packages/cli/src/notion-page-picker-tui.test.tsx similarity index 98% rename from packages/cli/src/commands/connection-notion-tui.test.tsx rename to packages/cli/src/notion-page-picker-tui.test.tsx index dc394688..2d4dffc3 100644 --- a/packages/cli/src/commands/connection-notion-tui.test.tsx +++ b/packages/cli/src/notion-page-picker-tui.test.tsx @@ -2,7 +2,7 @@ import { render as renderInkTest } from 'ink-testing-library'; import { act, type ReactNode } from 'react'; import { afterEach, describe, expect, it, vi } from 'vitest'; -import { buildInitialState, buildPickerTree, type NotionPickerPageInput } from './connection-notion-tree.js'; +import { buildInitialState, buildPickerTree, type NotionPickerPageInput } from './notion-page-picker-tree.js'; import { NotionPickerApp, notionPickerCommandForInkInput, @@ -13,7 +13,7 @@ import { windowOffset, type NotionPickerInkInstance, type NotionPickerInkRenderOptions, -} from './connection-notion-tui.js'; +} from './notion-page-picker-tui.js'; const IDS = { engineering: '11111111-1111-1111-1111-111111111111', @@ -378,7 +378,7 @@ describe('renderNotionPickerTui', () => { }, ), ).resolves.toEqual({ kind: 'quit' }); - expect(stderr).toContain('Use --no-input --root-page-id for scripted mode'); + expect(stderr).toContain('Use --no-input --notion-root-page-id for scripted mode'); expect(stderr).not.toContain('secret'); }); }); diff --git a/packages/cli/src/commands/connection-notion-tui.tsx b/packages/cli/src/notion-page-picker-tui.tsx similarity index 98% rename from packages/cli/src/commands/connection-notion-tui.tsx rename to packages/cli/src/notion-page-picker-tui.tsx index b2a47036..30af7522 100644 --- a/packages/cli/src/commands/connection-notion-tui.tsx +++ b/packages/cli/src/notion-page-picker-tui.tsx @@ -9,8 +9,8 @@ import { visibleNodeIds, type PickerCommand, type PickerState, -} from './connection-notion-tree.js'; -import type { KtxCliIo } from '../index.js'; +} from './notion-page-picker-tree.js'; +import type { KtxCliIo } from './cli-runtime.js'; const COLOR_THEME = { text: 'white', @@ -331,7 +331,7 @@ export async function renderNotionPickerTui( return result; } catch (error) { io.stderr.write( - `Notion picker requires a TTY. Use --no-input --root-page-id for scripted mode. ${sanitizeNotionPickerTuiError(error)}\n`, + `Notion picker requires a TTY. Use --no-input --notion-root-page-id for scripted mode. ${sanitizeNotionPickerTuiError(error)}\n`, ); return { kind: 'quit' }; } diff --git a/packages/cli/src/notion-page-picker.test.ts b/packages/cli/src/notion-page-picker.test.ts new file mode 100644 index 00000000..77710716 --- /dev/null +++ b/packages/cli/src/notion-page-picker.test.ts @@ -0,0 +1,308 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + discoverNotionPickerPages, + notionPickerPageFromSearchResult, + normalizeNotionPageId, + pickNotionRootPages, + resolveNotionWorkspaceLabel, + type NotionPickerApi, + type PickerRenderInput, + type PickerRenderResult, +} from './notion-page-picker.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: true, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +type FakeNotionSearchPage = Record & { id: string; object: 'page' }; + +const PAGE_IDS = { + engineering: '11111111-1111-1111-1111-111111111111', + architecture: '22222222-2222-2222-2222-222222222222', + stale: '99999999-9999-9999-9999-999999999999', +}; + +function notionPage(id: string, title: string, parentId: string | null = null): FakeNotionSearchPage { + return { + object: 'page', + id, + archived: false, + parent: parentId ? { type: 'page_id', page_id: parentId } : { type: 'workspace', workspace: true }, + properties: { + title: { + type: 'title', + title: [{ plain_text: title }], + }, + }, + }; +} + +function fakeNotionApi(pages: FakeNotionSearchPage[]): NotionPickerApi { + return { + search: vi.fn(async (_filterValue, startCursor) => { + if (startCursor === 'page-2') { + return { results: pages.slice(2), hasMore: false, nextCursor: null }; + } + return { + results: pages.slice(0, 2), + hasMore: pages.length > 2, + nextCursor: pages.length > 2 ? 'page-2' : null, + }; + }), + retrieveBotUser: vi.fn(async () => ({ name: 'Notion bot', bot: { workspace_name: 'Design Workspace' } })), + }; +} + +describe('normalizeNotionPageId', () => { + it('accepts dashed and compact UUIDs', () => { + expect(normalizeNotionPageId('11111111222233334444555555555555')).toBe( + '11111111-2222-3333-4444-555555555555', + ); + expect(normalizeNotionPageId('AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE')).toBe( + 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', + ); + }); +}); + +describe('Notion page picker helpers', () => { + it('extracts picker page inputs from Notion search results', () => { + expect(notionPickerPageFromSearchResult(notionPage(PAGE_IDS.architecture, 'Architecture', PAGE_IDS.engineering))) + .toEqual({ + id: PAGE_IDS.architecture, + title: 'Architecture', + archived: false, + parentId: PAGE_IDS.engineering, + }); + + expect( + notionPickerPageFromSearchResult({ + object: 'page', + id: PAGE_IDS.engineering.replaceAll('-', ''), + archived: true, + parent: { type: 'workspace', workspace: true }, + properties: {}, + }), + ).toEqual({ + id: PAGE_IDS.engineering, + title: 'Untitled', + archived: true, + parentId: null, + }); + }); + + it('discovers visible pages up to the cap and reports cap state', async () => { + const api = fakeNotionApi([ + notionPage(PAGE_IDS.engineering, 'Engineering'), + notionPage(PAGE_IDS.architecture, 'Architecture', PAGE_IDS.engineering), + notionPage('33333333-3333-3333-3333-333333333333', 'Onboarding', PAGE_IDS.engineering), + ]); + + await expect(discoverNotionPickerPages(api, { cap: 2 })).resolves.toEqual({ + pages: [ + { id: PAGE_IDS.engineering, title: 'Engineering', archived: false, parentId: null }, + { id: PAGE_IDS.architecture, title: 'Architecture', archived: false, parentId: PAGE_IDS.engineering }, + ], + cappedAtCount: 2, + warnings: [], + }); + expect(api.search).toHaveBeenCalledTimes(1); + }); + + it('keeps partial discovery results when Notion search fails after at least one page', async () => { + const api: NotionPickerApi = { + search: vi + .fn() + .mockResolvedValueOnce({ + results: [notionPage(PAGE_IDS.engineering, 'Engineering')], + hasMore: true, + nextCursor: 'cursor-2', + }) + .mockRejectedValueOnce(new Error('rate limit after first page')), + retrieveBotUser: vi.fn(async () => ({ name: 'Notion bot' })), + }; + + await expect(discoverNotionPickerPages(api)).resolves.toEqual({ + pages: [{ id: PAGE_IDS.engineering, title: 'Engineering', archived: false, parentId: null }], + cappedAtCount: null, + warnings: ['Notion search stopped early: rate limit after first page'], + }); + }); + + it('uses the Notion workspace name when available and falls back to the connection id', async () => { + await expect(resolveNotionWorkspaceLabel(fakeNotionApi([]), 'notion-main')).resolves.toBe('Design Workspace'); + await expect( + resolveNotionWorkspaceLabel( + { + search: vi.fn(), + retrieveBotUser: vi.fn(async () => { + throw new Error('users.me unavailable'); + }), + }, + 'notion-main', + ), + ).resolves.toBe('notion-main'); + }); +}); + +describe('pickNotionRootPages', () => { + it('discovers visible pages, warns about stale roots, renders the TUI, and returns selected roots', async () => { + const api = fakeNotionApi([ + notionPage(PAGE_IDS.engineering, 'Engineering'), + notionPage(PAGE_IDS.architecture, 'Architecture', PAGE_IDS.engineering), + ]); + const renderPicker = vi.fn(async (input: PickerRenderInput): Promise => { + expect(input.connectionId).toBe('notion-main'); + expect(input.workspaceLabel).toBe('Design Workspace'); + expect(input.currentCrawlMode).toBe('all_accessible'); + expect(input.cappedAtCount).toBeNull(); + expect(input.initialState.preLoadWarnings).toEqual(['1 stored root_page_ids no longer visible']); + return { kind: 'save', rootPageIds: [PAGE_IDS.engineering] }; + }); + const io = makeIo(); + + await expect( + pickNotionRootPages( + { + connectionId: 'notion-main', + connection: { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'all_accessible', + root_page_ids: [PAGE_IDS.stale], + }, + }, + io.io, + { + env: { NOTION_TOKEN: 'ntn_test_token' }, + createNotionApi: vi.fn(() => api), + renderPicker, + }, + ), + ).resolves.toEqual({ kind: 'selected', rootPageIds: [PAGE_IDS.engineering] }); + + expect(io.stderr()).toContain('1 stored root_page_ids no longer visible'); + expect(io.stdout()).toBe(''); + }); + + it('uses inline Notion auth_token for discovery', async () => { + const api = fakeNotionApi([notionPage(PAGE_IDS.engineering, 'Engineering')]); + const createNotionApi = vi.fn((authToken: string) => { + expect(authToken).toBe('ntn_inline_token'); + return api; + }); + + await expect( + pickNotionRootPages( + { + connectionId: 'notion-main', + connection: { + driver: 'notion', + auth_token: 'ntn_inline_token', + crawl_mode: 'selected_roots', + root_page_ids: [PAGE_IDS.engineering], + }, + }, + makeIo().io, + { + createNotionApi, + renderPicker: vi.fn(async (): Promise => ({ kind: 'quit' })), + }, + ), + ).resolves.toEqual({ kind: 'back' }); + + expect(createNotionApi).toHaveBeenCalledOnce(); + }); + + it('passes partial-discovery warnings into the TUI banner state', async () => { + const api: NotionPickerApi = { + search: vi + .fn() + .mockResolvedValueOnce({ + results: [notionPage(PAGE_IDS.engineering, 'Engineering')], + hasMore: true, + nextCursor: 'cursor-2', + }) + .mockRejectedValueOnce(new Error('rate limit after first page')), + retrieveBotUser: vi.fn(async () => ({ name: 'Notion bot', bot: { workspace_name: 'Design Workspace' } })), + }; + let renderInput: PickerRenderInput | undefined; + const renderPicker = vi.fn(async (input: PickerRenderInput): Promise => { + renderInput = input; + return { kind: 'quit' }; + }); + const io = makeIo(); + + await expect( + pickNotionRootPages( + { + connectionId: 'notion-main', + connection: { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'selected_roots', + root_page_ids: [PAGE_IDS.engineering], + }, + }, + io.io, + { + env: { NOTION_TOKEN: 'ntn_test_token' }, + createNotionApi: vi.fn(() => api), + renderPicker, + }, + ), + ).resolves.toEqual({ kind: 'back' }); + + expect(renderPicker).toHaveBeenCalledOnce(); + if (!renderInput) { + throw new Error('renderPicker was not called'); + } + expect(renderInput.initialState.preLoadWarnings).toEqual(['Notion search stopped early: rate limit after first page']); + expect(renderInput.initialState.tree.map((node) => node.title)).toEqual(['Engineering']); + expect(io.stderr()).toContain('Notion search stopped early: rate limit after first page'); + }); + + it('returns unavailable when discovery cannot load any pages', async () => { + await expect( + pickNotionRootPages( + { + connectionId: 'notion-main', + connection: { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'selected_roots', + root_page_ids: [], + }, + }, + makeIo().io, + { + env: { NOTION_TOKEN: 'ntn_test_token' }, + createNotionApi: vi.fn(() => ({ + search: vi.fn(async () => { + throw new Error('Notion API unavailable'); + }), + retrieveBotUser: vi.fn(async () => ({ name: 'Notion bot' })), + })), + renderPicker: vi.fn(async (): Promise => ({ kind: 'quit' })), + }, + ), + ).resolves.toEqual({ kind: 'unavailable', message: 'Notion API unavailable' }); + }); +}); diff --git a/packages/cli/src/commands/connection-notion.ts b/packages/cli/src/notion-page-picker.ts similarity index 51% rename from packages/cli/src/commands/connection-notion.ts rename to packages/cli/src/notion-page-picker.ts index e0f68c0b..807c0fc0 100644 --- a/packages/cli/src/commands/connection-notion.ts +++ b/packages/cli/src/notion-page-picker.ts @@ -1,51 +1,40 @@ -import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from '@ktx/context/connections'; +import { resolveNotionConnectionAuthToken } from '@ktx/context/connections'; import { type NotionApi, type NotionBotInfo, NotionClient } from '@ktx/context/ingest'; -import { - type KtxLocalProject, - type KtxProjectConnectionConfig, - loadKtxProject, - serializeKtxProjectConfig, -} from '@ktx/context/project'; -import type { KtxCliIo } from '../index.js'; -import { profileMark } from '../startup-profile.js'; -import { buildInitialState, buildPickerTree, type NotionPickerPageInput } from './connection-notion-tree.js'; +import type { KtxProjectConnectionConfig } from '@ktx/context/project'; +import type { KtxCliIo } from './cli-runtime.js'; +import { profileMark } from './startup-profile.js'; +import { buildInitialState, buildPickerTree, type NotionPickerPageInput } from './notion-page-picker-tree.js'; import { type NotionPickerTuiIo, type PickerRenderInput, type PickerRenderResult, renderNotionPickerTui, -} from './connection-notion-tui.js'; +} from './notion-page-picker-tui.js'; -profileMark('module:commands/connection-notion'); +profileMark('module:notion-page-picker'); -export type KtxConnectionNotionArgs = - | { - command: 'pick'; - projectDir: string; - connectionId: string; - mode: 'interactive'; - } - | { - command: 'pick'; - projectDir: string; - connectionId: string; - mode: 'non-interactive'; - rootPageIds: string[]; - }; +export interface PickNotionRootPagesArgs { + connectionId: string; + connection: KtxProjectConnectionConfig; +} export type NotionPickerApi = Pick; export type { PickerRenderInput, PickerRenderResult }; -interface KtxConnectionNotionDeps { +export type NotionRootPagePickResult = + | { kind: 'selected'; rootPageIds: string[] } + | { kind: 'back' } + | { kind: 'unavailable'; message: string }; + +export interface NotionRootPagePickerDeps { env?: Record; - loadProject?: typeof loadKtxProject; createNotionApi?: (authToken: string) => NotionPickerApi; renderPicker?: (input: PickerRenderInput, io: NotionPickerTuiIo) => Promise; } const NOTION_PICKER_PAGE_CAP = 5000; -function assertSafeConnectionId(connectionId: string): void { +function assertSafeNotionPickerConnectionId(connectionId: string): void { if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { throw new Error(`Unsafe connection id: ${connectionId}`); } @@ -168,111 +157,74 @@ export async function resolveNotionWorkspaceLabel(api: NotionPickerApi, connecti } } -function notionConnection(project: KtxLocalProject, connectionId: string): KtxProjectConnectionConfig { - const connection = project.config.connections[connectionId]; - if (!connection) { - throw new Error(`Connection "${connectionId}" not found`); - } +function assertNotionConnection(connection: KtxProjectConnectionConfig, connectionId: string): void { if (connection.driver !== 'notion') { throw new Error(`Connection "${connectionId}" is not a Notion connection`); } - return connection; } -export async function applyNotionPickerWriteback( - project: KtxLocalProject, - connectionId: string, - rootPageIds: string[], -): Promise { - if (rootPageIds.length === 0) { - throw new Error('connection notion pick requires at least one root page id'); - } - - const existing = notionConnection(project, connectionId); - const nextConfig = { - ...project.config, - connections: { - ...project.config.connections, - [connectionId]: { - ...existing, - crawl_mode: 'selected_roots', - root_page_ids: rootPageIds, - }, - }, - }; - - await project.fileStore.writeFile( - 'ktx.yaml', - serializeKtxProjectConfig(nextConfig), - 'ktx', - 'ktx@example.com', - `Pick Notion roots: ${connectionId} (${rootPageIds.length} pages)`, - ); +function stringArray(value: unknown): string[] { + return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string' && item.trim().length > 0) : []; } -export async function runKtxConnectionNotion( - args: KtxConnectionNotionArgs, +function notionCrawlMode(connection: KtxProjectConnectionConfig): 'all_accessible' | 'selected_roots' { + return connection.crawl_mode === 'all_accessible' ? 'all_accessible' : 'selected_roots'; +} + +export async function pickNotionRootPages( + args: PickNotionRootPagesArgs, io: KtxCliIo = process, - deps: KtxConnectionNotionDeps = {}, -): Promise { + deps: NotionRootPagePickerDeps = {}, +): Promise { try { - assertSafeConnectionId(args.connectionId); - const loadProject = deps.loadProject ?? loadKtxProject; - - if (args.mode === 'interactive') { - const project = await loadProject({ projectDir: args.projectDir }); - const rawConnection = notionConnection(project, args.connectionId); - const notion = parseNotionConnectionConfig(rawConnection); - const authToken = await resolveNotionConnectionAuthToken(notion, { env: deps.env }); - const api = deps.createNotionApi ? deps.createNotionApi(authToken) : new NotionClient(authToken); - const discovery = await discoverNotionPickerPages(api); - const tree = buildPickerTree(discovery.pages); - const initialState = buildInitialState({ - tree, - existingRootPageIds: notion.root_page_ids, - currentCrawlMode: notion.crawl_mode, - }); - const preLoadWarnings = [...discovery.warnings, ...initialState.preLoadWarnings]; - const renderState = - preLoadWarnings.length > 0 - ? { - ...initialState, - preLoadWarnings, - } - : initialState; - for (const warning of preLoadWarnings) { - io.stderr.write(`${warning}\n`); - } - const workspaceLabel = await resolveNotionWorkspaceLabel(api, args.connectionId); - const result = await (deps.renderPicker ?? renderNotionPickerTui)( - { - initialState: renderState, - connectionId: args.connectionId, - workspaceLabel, - cappedAtCount: discovery.cappedAtCount, - currentCrawlMode: notion.crawl_mode, - }, - io as NotionPickerTuiIo, - ); - if (result.kind === 'quit') { - io.stdout.write('No changes saved.\n'); - return 0; - } - await applyNotionPickerWriteback(project, args.connectionId, result.rootPageIds); - io.stdout.write(`Connection: ${args.connectionId}\n`); - io.stdout.write(`rootPageIds: ${result.rootPageIds.length}\n`); - io.stdout.write('crawlMode: selected_roots\n'); - return 0; + assertSafeNotionPickerConnectionId(args.connectionId); + assertNotionConnection(args.connection, args.connectionId); + const crawlMode = notionCrawlMode(args.connection); + const authToken = await resolveNotionConnectionAuthToken( + { + auth_token: typeof args.connection.auth_token === 'string' ? args.connection.auth_token : null, + auth_token_ref: typeof args.connection.auth_token_ref === 'string' ? args.connection.auth_token_ref : null, + }, + { env: deps.env }, + ); + const api = deps.createNotionApi ? deps.createNotionApi(authToken) : new NotionClient(authToken); + const discovery = await discoverNotionPickerPages(api); + const tree = buildPickerTree(discovery.pages); + const initialState = buildInitialState({ + tree, + existingRootPageIds: stringArray(args.connection.root_page_ids), + currentCrawlMode: crawlMode, + }); + const preLoadWarnings = [...discovery.warnings, ...initialState.preLoadWarnings]; + const renderState = + preLoadWarnings.length > 0 + ? { + ...initialState, + preLoadWarnings, + } + : initialState; + for (const warning of preLoadWarnings) { + io.stderr.write(`${warning}\n`); } - - const project = await loadProject({ projectDir: args.projectDir }); - await applyNotionPickerWriteback(project, args.connectionId, args.rootPageIds); - io.stdout.write(`Connection: ${args.connectionId}\n`); - io.stdout.write(`rootPageIds: ${args.rootPageIds.length}\n`); - io.stdout.write('crawlMode: selected_roots\n'); - return 0; + const workspaceLabel = await resolveNotionWorkspaceLabel(api, args.connectionId); + const result = await (deps.renderPicker ?? renderNotionPickerTui)( + { + initialState: renderState, + connectionId: args.connectionId, + workspaceLabel, + cappedAtCount: discovery.cappedAtCount, + currentCrawlMode: crawlMode, + }, + io as NotionPickerTuiIo, + ); + if (result.kind === 'quit') { + return { kind: 'back' }; + } + if (result.rootPageIds.length === 0) { + return { kind: 'unavailable', message: 'Notion picker did not return any selected pages.' }; + } + return { kind: 'selected', rootPageIds: result.rootPageIds }; } catch (error) { - io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); - return 1; + return { kind: 'unavailable', message: error instanceof Error ? error.message : String(error) }; } } diff --git a/packages/cli/src/print-command-tree.test.ts b/packages/cli/src/print-command-tree.test.ts index c50ee9a3..1385d37d 100644 --- a/packages/cli/src/print-command-tree.test.ts +++ b/packages/cli/src/print-command-tree.test.ts @@ -16,7 +16,13 @@ describe('renderKtxCommandTree', () => { expect(topLevel).toContain(expected); } - expect(output).toContain('│ ├── test '); + expect(output).toContain('│ └── test '); + expect(output).not.toContain('│ ├── add'); + expect(output).not.toContain('│ ├── remove'); + expect(output).not.toContain('│ ├── map'); + expect(output).not.toContain('│ ├── mapping'); + expect(output).not.toContain('│ ├── metabase'); + expect(output).not.toContain('│ ├── notion'); }); it('ends with a single trailing newline', () => { diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 65ee191a..2999d365 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -1236,6 +1236,48 @@ describe('setup databases step', () => { expect(config.connections.warehouse).toMatchObject({ driver: 'postgres', url: 'env:DATABASE_URL' }); expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect(io.stderr()).toContain('Structural scan failed for warehouse.'); + expect(io.stderr()).toContain('│ Structural scan failed for warehouse.'); + expect(io.stderr()).not.toMatch(/^Structural scan failed for warehouse\./m); + }); + + it('prints the native SQLite rebuild command when scanning hits a Node ABI mismatch', async () => { + const io = makeIo(); + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KtxCliIo) => { + commandIo.stderr.write( + [ + "The module '/workspace/node_modules/better-sqlite3/build/Release/better_sqlite3.node'", + 'was compiled against a different Node.js version using', + 'NODE_MODULE_VERSION 147. This version of Node.js requires', + 'NODE_MODULE_VERSION 137. Please try re-compiling or re-installing', + 'the module (for instance, using `npm rebuild` or `npm install`).', + '', + ].join('\n'), + ); + return 1; + }), + }, + ); + + expect(result.status).toBe('failed'); + expect(io.stderr()).toContain('Native SQLite is built for a different Node.js ABI.'); + expect(io.stderr()).toContain('│ Native SQLite is built for a different Node.js ABI.'); + expect(io.stderr()).toContain('Fix: pnpm run native:rebuild'); + expect(io.stderr()).toContain(`Retry: ktx scan --project-dir ${tempDir} warehouse`); + expect(io.stderr()).not.toContain('npm rebuild'); + expect(io.stderr()).not.toMatch(/^Native SQLite is built for a different Node.js ABI\./m); }); it('writes Historic SQL config for supported Snowflake databases after validation succeeds', async () => { diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index eceaf5bb..58ee61d9 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -951,6 +951,36 @@ function flushBufferedCommandOutput(io: KtxCliIo, bufferedIo: BufferedCommandIo) } } +function writePrefixedLines(write: (chunk: string) => void, output: string): void { + for (const line of output.split(/\r?\n/)) { + if (line.length > 0) { + write(`│ ${line}\n`); + } + } +} + +function flushPrefixedBufferedCommandOutput(io: KtxCliIo, bufferedIo: BufferedCommandIo): void { + writePrefixedLines((chunk) => io.stdout.write(chunk), bufferedIo.stdoutText()); + writePrefixedLines((chunk) => io.stderr.write(chunk), bufferedIo.stderrText()); +} + +function nativeSqliteAbiMismatchDetail(output: string): string | null { + const mentionsBetterSqlite = /\bbetter-sqlite3\b|better_sqlite3/i.test(output); + const mentionsAbiMismatch = /compiled against a different Node\.js version|NODE_MODULE_VERSION/i.test(output); + if (!mentionsBetterSqlite || !mentionsAbiMismatch) { + return null; + } + + const versionMatch = output.match( + /compiled against[\s\S]*?NODE_MODULE_VERSION\s+(\d+)[\s\S]*?requires[\s\S]*?NODE_MODULE_VERSION\s+(\d+)/i, + ); + if (!versionMatch) { + return 'better-sqlite3 native module could not load for the current Node.js runtime.'; + } + + return `better-sqlite3 was compiled for NODE_MODULE_VERSION ${versionMatch[1]}, but this Node.js requires ${versionMatch[2]}.`; +} + function readOutputValue(output: string, label: string): string | undefined { const escapedLabel = label.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const match = new RegExp(`^\\s*${escapedLabel}:\\s*(.+?)\\s*$`, 'im').exec(output); @@ -1445,9 +1475,28 @@ async function validateAndScanConnection(input: { const scanIo = createBufferedCommandIo(); const scanCode = await scanConnection(input.projectDir, input.connectionId, scanIo); if (scanCode !== 0) { - flushBufferedCommandOutput(input.io, scanIo); - input.io.stderr.write(`Structural scan failed for ${input.connectionId}.\n`); - input.io.stderr.write(`Debug command: ktx scan --project-dir ${input.projectDir} ${input.connectionId}\n`); + const nativeSqliteDetail = nativeSqliteAbiMismatchDetail(`${scanIo.stderrText()}\n${scanIo.stdoutText()}`); + if (nativeSqliteDetail) { + writePrefixedLines( + (chunk) => input.io.stderr.write(chunk), + [ + `Structural scan failed for ${input.connectionId}.`, + 'Native SQLite is built for a different Node.js ABI.', + `Detail: ${nativeSqliteDetail}`, + 'Fix: pnpm run native:rebuild', + `Retry: ktx scan --project-dir ${input.projectDir} ${input.connectionId}`, + ].join('\n'), + ); + } else { + flushPrefixedBufferedCommandOutput(input.io, scanIo); + writePrefixedLines( + (chunk) => input.io.stderr.write(chunk), + [ + `Structural scan failed for ${input.connectionId}.`, + `Debug command: ktx scan --project-dir ${input.projectDir} ${input.connectionId}`, + ].join('\n'), + ); + } return false; } const scanOutput = scanIo.stdoutText(); diff --git a/packages/cli/src/setup-interrupt.test.ts b/packages/cli/src/setup-interrupt.test.ts index d8e6350a..62917db6 100644 --- a/packages/cli/src/setup-interrupt.test.ts +++ b/packages/cli/src/setup-interrupt.test.ts @@ -17,9 +17,11 @@ function makeTracker(ctrlCValues: boolean[]): SetupInterruptTracker { describe('setup interrupt confirmation', () => { const originalIsTTY = process.stdin.isTTY; + const originalRef = process.stdin.ref; afterEach(() => { Object.defineProperty(process.stdin, 'isTTY', { configurable: true, value: originalIsTTY }); + Object.defineProperty(process.stdin, 'ref', { configurable: true, value: originalRef }); }); it('fails before opening a prompt when interactive setup has no tty', async () => { @@ -33,6 +35,26 @@ describe('setup interrupt confirmation', () => { expect(prompt).not.toHaveBeenCalled(); }); + it('refs stdin before opening a real interactive prompt', async () => { + const calls: string[] = []; + Object.defineProperty(process.stdin, 'isTTY', { configurable: true, value: true }); + Object.defineProperty(process.stdin, 'ref', { + configurable: true, + value: vi.fn(() => { + calls.push('ref'); + return process.stdin; + }), + }); + const prompt = vi.fn(async () => { + calls.push('prompt'); + return 'continued'; + }); + + await expect(withSetupInterruptConfirmation(prompt)).resolves.toBe('continued'); + + expect(calls).toEqual(['ref', 'prompt']); + }); + it('asks before exiting on Ctrl+C and reruns the active prompt when declined', async () => { const prompt = vi.fn(async () => (prompt.mock.calls.length === 1 ? CANCEL : 'continued')); const confirmExit = vi.fn(async () => false); diff --git a/packages/cli/src/setup-interrupt.ts b/packages/cli/src/setup-interrupt.ts index 5773c336..9baa0f1f 100644 --- a/packages/cli/src/setup-interrupt.ts +++ b/packages/cli/src/setup-interrupt.ts @@ -23,6 +23,10 @@ interface SetupInterruptOptions { const NON_INTERACTIVE_SETUP_MESSAGE = 'Interactive setup requires a terminal. Re-run this command in a TTY, or pass --no-input with the required options.'; +function refSetupInput(input: NodeJS.ReadStream = stdin): void { + input.ref?.(); +} + function createSetupInterruptTracker(input: NodeJS.ReadStream = stdin): SetupInterruptTracker { let ctrlCPressed = false; const onKeypress = (char: string | undefined, key: Key) => { @@ -73,6 +77,9 @@ export async function withSetupInterruptConfirmation( const confirmExit = options.confirmExit ?? defaultConfirmExit; while (true) { + if (!options.tracker) { + refSetupInput(); + } const value = await tracker.track(prompt); if (!isCancel(value)) { return value; diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index 27579bb3..93ad854b 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -136,7 +136,6 @@ describe('setup sources step', () => { projectDir, }); - expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect((await readKtxSetupState(projectDir)).completed_steps).toContain('sources'); expect(io.stdout()).toContain('Context source setup skipped.'); }); @@ -170,7 +169,6 @@ describe('setup sources step', () => { source_dir: '/repo/dbt', project_name: 'analytics', }); - expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); expect((await readKtxSetupState(projectDir)).completed_steps).toContain('sources'); expect(runInitialIngest).toHaveBeenCalledWith(projectDir, 'analytics_dbt', io.io, { inputMode: 'disabled' }); }); @@ -178,7 +176,10 @@ describe('setup sources step', () => { it('writes Metabase config and validates mapping through existing mapping path', async () => { await addPrimarySource(); const validateMetabase = vi.fn(async () => ({ ok: true as const, detail: 'user=admin@example.com' })); - const runMapping = vi.fn(async () => 0); + const runMapping = vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KtxCliIo) => { + commandIo.stdout.write('Mapping validated — 1 mapping configured\n'); + return 0; + }); const io = makeIo(); await expect( @@ -210,7 +211,16 @@ describe('setup sources step', () => { syncMode: 'ALL', }, }); - expect(runMapping).toHaveBeenCalledWith(projectDir, 'prod_metabase', io.io); + expect(runMapping).toHaveBeenCalledWith( + projectDir, + 'prod_metabase', + expect.objectContaining({ + stdout: expect.objectContaining({ write: expect.any(Function) }), + stderr: expect.objectContaining({ write: expect.any(Function) }), + }), + ); + expect(io.stdout()).toContain('│ Mapping validated — 1 mapping configured'); + expect(io.stdout()).not.toMatch(/^Mapping validated — 1 mapping configured$/m); }); it('writes Notion config with the full default knowledge create budget', async () => { @@ -273,6 +283,105 @@ describe('setup sources step', () => { }); }); + it('uses the rich Notion picker for interactive selected root setup', async () => { + await addPrimarySource(); + const validateNotion = vi.fn(async () => ({ ok: true as const, detail: 'roots=1' })); + const pickNotionRootPages = vi.fn(async (input: Parameters>[0]) => { + expect(input.connectionId).toBe('notion-main'); + expect(input.connection).toMatchObject({ + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'selected_roots', + root_page_ids: [], + }); + return { kind: 'selected' as const, rootPageIds: ['11111111-2222-3333-4444-555555555555'] }; + }); + const testPrompts = prompts({ + multiselect: [['notion']], + select: ['env', 'selected_roots', 'done'], + text: ['notion-main'], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { prompts: testPrompts, validateNotion, pickNotionRootPages }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion-main'] }); + + expect(pickNotionRootPages).toHaveBeenCalledOnce(); + expect(testPrompts.select).toHaveBeenCalledWith({ + message: 'Which Notion pages should KTX ingest?', + options: [ + { value: 'selected_roots', label: 'Specific pages and their subpages (choose them in a picker)' }, + { value: 'all_accessible', label: 'All pages the integration can access' }, + { value: 'back', label: 'Back' }, + ], + }); + expect((await readConfig()).connections['notion-main']).toMatchObject({ + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'selected_roots', + root_page_ids: ['11111111-2222-3333-4444-555555555555'], + }); + }); + + it('backs out of the Notion picker without writing selected_roots when the picker quits', async () => { + await addPrimarySource(); + const validateNotion = vi.fn(async () => ({ ok: true as const, detail: 'roots=0' })); + const pickNotionRootPages = vi.fn(async () => ({ kind: 'back' as const })); + const testPrompts = prompts({ + multiselect: [['notion']], + select: ['env', 'selected_roots', 'all_accessible', 'done'], + text: ['notion-main'], + }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + makeIo().io, + { prompts: testPrompts, validateNotion, pickNotionRootPages }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion-main'] }); + + expect(pickNotionRootPages).toHaveBeenCalledOnce(); + expect((await readConfig()).connections['notion-main']).toMatchObject({ + driver: 'notion', + crawl_mode: 'all_accessible', + }); + expect((await readConfig()).connections['notion-main']?.root_page_ids).toBeUndefined(); + }); + + it('surfaces Notion picker failures and returns to the page-mode step', async () => { + await addPrimarySource(); + const validateNotion = vi.fn(async () => ({ ok: true as const, detail: 'roots=0' })); + const pickNotionRootPages = vi.fn(async () => ({ + kind: 'unavailable' as const, + message: 'Notion picker requires a TTY', + })); + const testPrompts = prompts({ + multiselect: [['notion']], + select: ['env', 'selected_roots', 'all_accessible', 'done'], + text: ['notion-main'], + }); + const io = makeIo(); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { prompts: testPrompts, validateNotion, pickNotionRootPages }, + ), + ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['notion-main'] }); + + expect(io.stderr()).toContain('Notion picker requires a TTY'); + expect((await readConfig()).connections['notion-main']).toMatchObject({ + driver: 'notion', + crawl_mode: 'all_accessible', + }); + }); + it('defaults interactive Metabase and Looker source setup to the only warehouse connection', async () => { await addPrimarySource(); const cases: Array<{ @@ -455,7 +564,14 @@ describe('setup sources step', () => { ), ).resolves.toEqual({ status: 'failed', projectDir }); - expect(runMapping).toHaveBeenCalledWith(projectDir, 'metabase-main', io.io); + expect(runMapping).toHaveBeenCalledWith( + projectDir, + 'metabase-main', + expect.objectContaining({ + stdout: expect.objectContaining({ write: expect.any(Function) }), + stderr: expect.objectContaining({ write: expect.any(Function) }), + }), + ); expect(io.stderr()).toContain('1: Metabase database does not match KTX connection database'); expect(io.stderr()).not.toContain('Metabase mapping validation failed'); }); @@ -479,7 +595,7 @@ describe('setup sources step', () => { ), ).resolves.toEqual({ status: 'failed', projectDir }); - expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); + expect((await readKtxSetupState(projectDir)).completed_steps).not.toContain('sources'); expect(io.stderr()).toContain('No LookML files found'); }); diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index edf83b7b..313dfbe0 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -27,8 +27,8 @@ import { serializeKtxProjectConfig, } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; -import { runKtxConnectionMapping } from './commands/connection-mapping.js'; -import { runKtxConnection } from './connection.js'; +import { pickNotionRootPages } from './notion-page-picker.js'; +import { runKtxSourceMapping } from './source-mapping.js'; import { withMenuOptionsSpacing, withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; import { runKtxPublicIngest } from './public-ingest.js'; import { withSetupInterruptConfirmation } from './setup-interrupt.js'; @@ -94,6 +94,7 @@ export interface KtxSetupSourcesDeps { validateLooker?: (projectDir: string, connectionId: string) => Promise; validateLookml?: (connection: KtxProjectConnectionConfig) => Promise; validateNotion?: (connection: KtxProjectConnectionConfig) => Promise; + pickNotionRootPages?: typeof pickNotionRootPages; discoverMetabaseDatabases?: (args: { sourceUrl: string; sourceApiKeyRef: string; @@ -527,7 +528,7 @@ function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC driver: 'notion', auth_token_ref: credentialRef(args.sourceApiKeyRef, 'Notion token ref'), crawl_mode: crawlMode, - root_page_ids: rootPageIds, + ...(rootPageIds.length > 0 ? { root_page_ids: rootPageIds } : {}), root_database_ids: [], root_data_source_ids: [], max_pages_per_run: 1000, @@ -613,7 +614,7 @@ async function defaultValidateMetricflow(connection: KtxProjectConnectionConfig) } async function defaultValidateLooker(projectDir: string, connectionId: string): Promise { - const code = await runKtxConnectionMapping( + const code = await runKtxSourceMapping( { command: 'refresh', projectDir, connectionId, autoAccept: true }, { stdout: { write() {} }, stderr: { write() {} } }, ); @@ -656,6 +657,47 @@ interface MappingJsonOutput { mappings: unknown[]; } +function splitOutputLines(output: string): string[] { + return output + .split('\n') + .map((line) => line.trim()) + .filter(Boolean); +} + +function writeSetupPrefixedLines(write: (chunk: string) => void, output: string): void { + for (const line of output.split(/\r?\n/)) { + if (line.length > 0) { + write(`│ ${line}\n`); + } + } +} + +function createSetupPrefixedIo(io: KtxCliIo): KtxCliIo { + return { + stdout: { + isTTY: io.stdout.isTTY, + columns: io.stdout.columns, + write(chunk: string) { + writeSetupPrefixedLines((line) => io.stdout.write(line), chunk); + }, + }, + stderr: { + write(chunk: string) { + writeSetupPrefixedLines((line) => io.stderr.write(line), chunk); + }, + }, + }; +} + +function parseMappingListJson(output: string): unknown[] { + const trimmed = output.trim(); + if (!trimmed) { + return []; + } + const parsed = JSON.parse(trimmed) as unknown; + return Array.isArray(parsed) ? parsed : []; +} + function summarizeMappingResult(parsed: MappingJsonOutput): string { const mappingCount = parsed.mappings.length; const mappingNoun = mappingCount === 1 ? 'mapping' : 'mappings'; @@ -663,22 +705,51 @@ function summarizeMappingResult(parsed: MappingJsonOutput): string { } async function defaultRunMapping(projectDir: string, connectionId: string, io: KtxCliIo): Promise { - let captured = ''; - const captureIo: KtxCliIo = { - stdout: { write(chunk: string) { captured += chunk; } }, - stderr: io.stderr, + const outputs = { + refresh: '', + validation: '', + list: '', }; - const code = await runKtxConnection( - { command: 'map', projectDir, sourceConnectionId: connectionId, json: true }, - captureIo, + const refreshCode = await runKtxSourceMapping( + { command: 'refresh', projectDir, connectionId, autoAccept: true }, + { + stdout: { write(chunk: string) { outputs.refresh += chunk; } }, + stderr: io.stderr, + }, ); - if (code !== 0) return code; - try { - const parsed = JSON.parse(captured.trim()) as MappingJsonOutput; - io.stdout.write(`${summarizeMappingResult(parsed)}\n`); - } catch { - io.stdout.write(captured); + if (refreshCode !== 0) { + return refreshCode; } + + const validationCode = await runKtxSourceMapping( + { command: 'validate', projectDir, connectionId }, + { + stdout: { write(chunk: string) { outputs.validation += chunk; } }, + stderr: io.stderr, + }, + ); + if (validationCode !== 0) { + return validationCode; + } + + const listCode = await runKtxSourceMapping( + { command: 'list', projectDir, connectionId, json: true }, + { + stdout: { write(chunk: string) { outputs.list += chunk; } }, + stderr: io.stderr, + }, + ); + if (listCode !== 0) { + return listCode; + } + + const parsed: MappingJsonOutput = { + connectionId, + refresh: { ok: true, output: splitOutputLines(outputs.refresh) }, + validation: { ok: true, output: splitOutputLines(outputs.validation) }, + mappings: parseMappingListJson(outputs.list), + }; + io.stdout.write(`${summarizeMappingResult(parsed)}\n`); return 0; } @@ -926,6 +997,8 @@ async function promptForInteractiveSource( args: KtxSetupSourcesArgs, source: KtxSetupSourceType, prompts: KtxSetupSourcesPromptAdapter, + io: KtxCliIo, + deps: KtxSetupSourcesDeps, defaultConnectionId = `${source}-main`, testGitRepo: KtxSetupSourcesDeps['testGitRepo'] = testRepoConnection, discoverMetabaseDatabaseList?: KtxSetupSourcesDeps['discoverMetabaseDatabases'], @@ -1197,7 +1270,7 @@ async function promptForInteractiveSource( const crawlMode = await prompts.select({ message: 'Which Notion pages should KTX ingest?', options: [ - { value: 'selected_roots', label: 'Specific pages and their subpages (you\'ll paste page IDs)' }, + { value: 'selected_roots', label: 'Specific pages and their subpages (choose them in a picker)' }, { value: 'all_accessible', label: 'All pages the integration can access' }, { value: 'back', label: 'Back' }, ], @@ -1212,15 +1285,29 @@ async function promptForInteractiveSource( ...(state.notionCrawlMode === 'selected_roots' ? [ async (currentState: SourcePromptState) => { - const roots = await promptText(prompts, { - message: 'Notion page IDs to ingest (each page includes all its subpages)', - placeholder: 'page-id-1, page-id-2', - }); - if (roots === undefined) return 'back'; - currentState.notionRootPageIds = roots - .split(',') - .map((root) => root.trim()) - .filter(Boolean); + const connectionId = currentState.sourceConnectionId ?? 'notion-main'; + const result = await (deps.pickNotionRootPages ?? pickNotionRootPages)( + { + connectionId, + connection: { + driver: 'notion', + auth_token_ref: credentialRef(currentState.sourceApiKeyRef, 'Notion token ref'), + crawl_mode: 'selected_roots', + root_page_ids: currentState.notionRootPageIds ?? [], + root_database_ids: [], + root_data_source_ids: [], + }, + }, + io, + ); + if (result.kind === 'back') { + return 'back'; + } + if (result.kind === 'unavailable') { + io.stderr.write(`${result.message}\n`); + return 'back'; + } + currentState.notionRootPageIds = result.rootPageIds; return 'next'; }, ] @@ -1258,7 +1345,9 @@ async function chooseInteractiveSourceConnection(input: { source: KtxSetupSourceType; connections: Record; prompts: KtxSetupSourcesPromptAdapter; + io: KtxCliIo; testGitRepo?: KtxSetupSourcesDeps['testGitRepo']; + pickNotionRootPages?: KtxSetupSourcesDeps['pickNotionRootPages']; discoverMetabaseDatabases?: KtxSetupSourcesDeps['discoverMetabaseDatabases']; }): Promise { const existingIds = existingConnectionIdsBySource(input.connections, input.source); @@ -1270,6 +1359,11 @@ async function chooseInteractiveSourceConnection(input: { input.args, input.source, input.prompts, + input.io, + { + pickNotionRootPages: input.pickNotionRootPages, + discoverMetabaseDatabases: input.discoverMetabaseDatabases, + }, defaultConnectionId, input.testGitRepo, input.discoverMetabaseDatabases, @@ -1302,6 +1396,11 @@ async function chooseInteractiveSourceConnection(input: { input.args, input.source, input.prompts, + input.io, + { + pickNotionRootPages: input.pickNotionRootPages, + discoverMetabaseDatabases: input.discoverMetabaseDatabases, + }, defaultConnectionId, input.testGitRepo, input.discoverMetabaseDatabases, @@ -1416,7 +1515,9 @@ export async function runKtxSetupSourcesStep( source, connections: (await loadKtxProject({ projectDir: args.projectDir })).config.connections, prompts, + io, testGitRepo: deps.testGitRepo, + pickNotionRootPages: deps.pickNotionRootPages, discoverMetabaseDatabases: deps.discoverMetabaseDatabases, }); if (sourceChoice === 'back') { @@ -1448,7 +1549,11 @@ export async function runKtxSetupSourcesStep( } if (source === 'metabase' || source === 'looker') { prompts.log?.(`Validating ${sourceLabel(source)} mapping…`); - const mappingCode = await (deps.runMapping ?? defaultRunMapping)(args.projectDir, connectionId, io); + const mappingCode = await (deps.runMapping ?? defaultRunMapping)( + args.projectDir, + connectionId, + createSetupPrefixedIo(io), + ); if (mappingCode !== 0) { await rollback?.(); return { status: 'failed', projectDir: args.projectDir }; diff --git a/packages/cli/src/source-mapping.ts b/packages/cli/src/source-mapping.ts new file mode 100644 index 00000000..3f8e8782 --- /dev/null +++ b/packages/cli/src/source-mapping.ts @@ -0,0 +1,225 @@ +import { localConnectionToWarehouseDescriptor } from '@ktx/context/connections'; +import { + DEFAULT_METABASE_CLIENT_CONFIG, + DefaultLookerConnectionClientFactory, + DefaultMetabaseConnectionClientFactory, + KtxYamlMetabaseSourceStateReader, + LocalLookerRuntimeStore, + LocalMetabaseDiscoveryCache, + computeLookerMappingDrift, + computeMetabaseMappingDrift, + discoverLookerConnections, + discoverMetabaseDatabases, + lookerCredentialsFromLocalConnection, + metabaseRuntimeConfigFromLocalConnection, + seedLocalMappingStateFromKtxYaml, + validateLookerMappings, + validateMappingPhysicalMatch, + type LookerMappingClient, + type LocalMetabaseMappingListRow, + type MetabaseRuntimeClient, +} from '@ktx/context/ingest'; +import { type KtxLocalProject, ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project'; +import type { KtxCliIo } from './cli-runtime.js'; +import { profileMark } from './startup-profile.js'; + +profileMark('module:source-mapping'); + +export type KtxSourceMappingArgs = + | { command: 'list'; projectDir: string; connectionId: string; json: boolean } + | { command: 'refresh'; projectDir: string; connectionId: string; autoAccept: boolean } + | { command: 'validate'; projectDir: string; connectionId: string }; + +interface KtxSourceMappingDeps { + createMetabaseClient?: ( + project: KtxLocalProject, + connectionId: string, + ) => Promise>; + createLookerClient?: ( + project: KtxLocalProject, + connectionId: string, + ) => Promise & { cleanup?(): Promise }>; +} + +async function createDefaultMetabaseClient( + project: KtxLocalProject, + connectionId: string, +): Promise> { + const factory = new DefaultMetabaseConnectionClientFactory( + (metabaseConnectionId) => + metabaseRuntimeConfigFromLocalConnection(metabaseConnectionId, project.config.connections[metabaseConnectionId]), + DEFAULT_METABASE_CLIENT_CONFIG, + ); + return factory.createClient(connectionId); +} + +async function createDefaultLookerClient( + project: KtxLocalProject, + connectionId: string, +): Promise & { cleanup?(): Promise }> { + const factory = new DefaultLookerConnectionClientFactory({ + async resolve(lookerConnectionId) { + return lookerCredentialsFromLocalConnection(lookerConnectionId, project.config.connections[lookerConnectionId]); + }, + }); + return factory.createClient(connectionId) as unknown as Pick & { + cleanup?(): Promise; + }; +} + +function isLookerConnection(project: KtxLocalProject, connectionId: string): boolean { + return String(project.config.connections[connectionId]?.driver ?? '').toLowerCase() === 'looker'; +} + +function assertMetabaseConnection(project: KtxLocalProject, connectionId: string): void { + const connection = project.config.connections[connectionId]; + if (!connection || String(connection.driver).toLowerCase() !== 'metabase') { + throw new Error(`Connection "${connectionId}" is not a Metabase connection`); + } +} + +function targetPhysicalInfo(project: KtxLocalProject, connectionId: string) { + const descriptor = localConnectionToWarehouseDescriptor(connectionId, project.config.connections[connectionId]); + if (!descriptor) { + return { connection_type: 'UNKNOWN' }; + } + return { + connection_type: descriptor.connection_type, + host: descriptor.host ?? null, + database: descriptor.database ?? null, + account: descriptor.account ?? null, + project_id: descriptor.project_id ?? null, + dataset_id: descriptor.dataset_id ?? null, + ...descriptor.connection_params, + }; +} + +function renderMapping(row: LocalMetabaseMappingListRow): string { + const name = row.metabaseDatabaseName ?? 'unhydrated'; + const target = row.targetConnectionId ?? '[unmapped]'; + return `${row.metabaseDatabaseId} -> ${target} (${name}, sync: ${row.syncEnabled ? 'on' : 'off'}, source: ${ + row.source + })`; +} + +function renderLookerMapping(row: Awaited>[number]): string { + const target = row.ktxConnectionId ?? '[unmapped]'; + const metadata = [row.lookerDialect, row.lookerHost, row.lookerDatabase].filter(Boolean).join(', '); + return `${row.lookerConnectionName} -> ${target}${metadata ? ` (${metadata}, source: ${row.source})` : ` (source: ${row.source})`}`; +} + +export async function runKtxSourceMapping( + args: KtxSourceMappingArgs, + io: KtxCliIo = process, + deps: KtxSourceMappingDeps = {}, +): Promise { + try { + const project = await loadKtxProject({ projectDir: args.projectDir }); + await seedLocalMappingStateFromKtxYaml(project, args.connectionId); + if (isLookerConnection(project, args.connectionId)) { + const store = new LocalLookerRuntimeStore({ dbPath: ktxLocalStateDbPath(project) }); + + if (args.command === 'list') { + const rows = await store.listConnectionMappings(args.connectionId); + io.stdout.write(args.json ? `${JSON.stringify(rows, null, 2)}\n` : `${rows.map(renderLookerMapping).join('\n')}\n`); + return 0; + } + + if (args.command === 'refresh') { + const client = await (deps.createLookerClient ?? createDefaultLookerClient)(project, args.connectionId); + try { + const discovered = await discoverLookerConnections(client); + const drift = computeLookerMappingDrift({ + storedMappings: await store.readMappings(args.connectionId), + discovered, + }); + if (args.autoAccept) { + await store.refreshDiscoveredConnections({ lookerConnectionId: args.connectionId, discovered }); + } + io.stdout.write(`Discovery: ${discovered.length} ${discovered.length === 1 ? 'connection' : 'connections'}\n`); + io.stdout.write(`Unmapped discovered: ${drift.unmappedDiscovered.length}\n`); + io.stdout.write(`Stale mappings: ${drift.staleMappings.length}\n`); + return 0; + } finally { + await client.cleanup?.(); + } + } + + const knownKtxConnectionIds = new Set(Object.keys(project.config.connections)); + const knownConnectionTypes = new Map( + Object.entries(project.config.connections).map(([id]) => [id, targetPhysicalInfo(project, id).connection_type]), + ); + const validation = validateLookerMappings({ + mappings: await store.readMappings(args.connectionId), + knownKtxConnectionIds, + knownConnectionTypes, + }); + if (!validation.ok) { + for (const error of validation.errors) { + io.stderr.write(`${error.key}: ${error.reason}\n`); + } + return 1; + } + io.stdout.write(`Mapping validation passed: ${args.connectionId}\n`); + return 0; + } + + assertMetabaseConnection(project, args.connectionId); + const discoveryCache = new LocalMetabaseDiscoveryCache({ dbPath: ktxLocalStateDbPath(project) }); + const store = new KtxYamlMetabaseSourceStateReader(project, { discoveryCache }); + + if (args.command === 'list') { + const rows = await store.listDatabaseMappings(args.connectionId); + io.stdout.write(args.json ? `${JSON.stringify(rows, null, 2)}\n` : `${rows.map(renderMapping).join('\n')}\n`); + return 0; + } + + if (args.command === 'refresh') { + const client = await (deps.createMetabaseClient ?? createDefaultMetabaseClient)(project, args.connectionId); + try { + const discovered = await discoverMetabaseDatabases(client); + const existing = Object.fromEntries( + (await store.listDatabaseMappings(args.connectionId)).map((row) => [ + String(row.metabaseDatabaseId), + row.targetConnectionId, + ]), + ); + const drift = computeMetabaseMappingDrift({ currentMappings: existing, discovered }); + if (args.autoAccept) { + await discoveryCache.refreshDiscoveredDatabases({ connectionId: args.connectionId, discovered }); + } + io.stdout.write(`Discovery: ${discovered.length} ${discovered.length === 1 ? 'database' : 'databases'}\n`); + io.stdout.write(`Unmapped discovered: ${drift.unmappedDiscovered.length}\n`); + io.stdout.write(`Stale mappings: ${drift.staleMappings.length}\n`); + return 0; + } finally { + await client.cleanup(); + } + } + + const rows = await store.listDatabaseMappings(args.connectionId); + const failures = rows.flatMap((row) => { + if (!row.targetConnectionId) { + return []; + } + const reason = validateMappingPhysicalMatch( + { metabaseEngine: row.metabaseEngine, metabaseDbName: row.metabaseDbName, metabaseHost: row.metabaseHost }, + project.config.connections[row.targetConnectionId] + ? targetPhysicalInfo(project, row.targetConnectionId) + : { connection_type: 'UNKNOWN' }, + ); + return reason ? [`${row.metabaseDatabaseId}: ${reason}`] : []; + }); + if (failures.length > 0) { + for (const failure of failures) { + io.stderr.write(`${failure}\n`); + } + return 1; + } + io.stdout.write(`Mapping validation passed: ${args.connectionId}\n`); + return 0; + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } +} diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts index c2b7386c..c6fefd96 100644 --- a/packages/cli/src/standalone-smoke.test.ts +++ b/packages/cli/src/standalone-smoke.test.ts @@ -307,7 +307,7 @@ describe('standalone built ktx CLI smoke', () => { }); }); - it('adds a redacted Notion connection through the built binary', async () => { + it('rejects the removed connection add command through the built binary', async () => { const projectDir = join(tempDir, 'notion-project'); const init = await runSetupNewProject(projectDir); expectProjectStderr(init, projectDir); @@ -327,23 +327,17 @@ describe('standalone built ktx CLI smoke', () => { '5', ]); - expectProjectStderr(add, projectDir); - expect(add.stdout).toContain('Connection: notion-main'); - expect(add.stdout).toContain('Driver: notion'); + expect(add.code).toBe(1); + expect(add.stdout).toBe(''); + expect(add.stderr).toContain("unknown command 'add'"); const yaml = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); - expect(yaml).toContain('driver: notion'); - expect(yaml).toContain('auth_token_ref: env:NOTION_TOKEN'); - expect(yaml).toContain('crawl_mode: all_accessible'); - expect(yaml).toContain('max_pages_per_run: 5'); + expect(yaml).not.toContain('driver: notion'); + expect(yaml).not.toContain('auth_token_ref: env:NOTION_TOKEN'); expect(yaml).not.toContain('ntn_'); const parsed = parseKtxProjectConfig(yaml); - expect(parsed.connections['notion-main']).toMatchObject({ - driver: 'notion', - auth_token_ref: 'env:NOTION_TOKEN', - crawl_mode: 'all_accessible', - }); + expect(parsed.connections['notion-main']).toBeUndefined(); }); }); diff --git a/packages/context/src/ingest/memory-flow/known-errors.ts b/packages/context/src/ingest/memory-flow/known-errors.ts index 8273ed86..f9998f89 100644 --- a/packages/context/src/ingest/memory-flow/known-errors.ts +++ b/packages/context/src/ingest/memory-flow/known-errors.ts @@ -23,6 +23,6 @@ export function formatNotionAuthorizationExpiredDetail(unitKey: string): string export function notionAuthorizationFixSuggestions(connectionId: string): string[] { return [ `Refresh the Notion token referenced by auth_token_ref for ${connectionId}. If it uses env:NAME, export a fresh token in that variable; if it uses file:/path, replace that file.`, - `Run ktx connection notion pick ${connectionId} to confirm Notion access, then rerun ktx ingest ${connectionId}.`, + `Run ktx setup and reconfigure the Notion source to confirm page access, then rerun ktx ingest ${connectionId}.`, ]; } diff --git a/packages/context/src/ingest/memory-flow/summary.test.ts b/packages/context/src/ingest/memory-flow/summary.test.ts index 0720dbae..a22ca1ff 100644 --- a/packages/context/src/ingest/memory-flow/summary.test.ts +++ b/packages/context/src/ingest/memory-flow/summary.test.ts @@ -85,7 +85,7 @@ describe('formatMemoryFlowFinalSummary', () => { '- Refresh the Notion token referenced by auth_token_ref for notion-main. If it uses env:NAME, export a fresh token in that variable; if it uses file:/path, replace that file.', ); expect(summary).toContain( - '- Run ktx connection notion pick notion-main to confirm Notion access, then rerun ktx ingest notion-main.', + '- Run ktx setup and reconfigure the Notion source to confirm page access, then rerun ktx ingest notion-main.', ); expect(summary).not.toContain('error_uri'); }); From c202202e6b368cdd56303d1c9de8138d2d0ed9ea Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 15:41:10 +0200 Subject: [PATCH 10/15] feat(cli): clean up wiki and sl commands (#65) * feat(cli): clean up wiki and sl commands * test(scripts): update package artifact CLI smoke assertion --- .../content/docs/cli-reference/ktx-sl.mdx | 34 +--- .../content/docs/cli-reference/ktx-wiki.mdx | 63 +------ .../docs/getting-started/quickstart.mdx | 6 +- .../content/docs/guides/serving-agents.mdx | 8 +- .../content/docs/guides/writing-context.mdx | 93 +++------- .../docs/integrations/agent-clients.mdx | 10 +- packages/cli/src/command-schemas.ts | 13 -- .../cli/src/commands/knowledge-commands.ts | 49 +----- packages/cli/src/commands/sl-commands.ts | 78 ++++----- packages/cli/src/example-smoke.test.ts | 16 +- packages/cli/src/index.test.ts | 50 ++++++ packages/cli/src/knowledge.test.ts | 162 ++++++------------ packages/cli/src/knowledge.ts | 59 +------ packages/cli/src/setup-agents.ts | 4 +- packages/cli/src/sl.test.ts | 130 ++++++-------- packages/cli/src/sl.ts | 109 ++++++------ scripts/examples-docs.test.mjs | 3 +- scripts/package-artifacts.mjs | 9 +- scripts/package-artifacts.test.mjs | 2 +- 19 files changed, 312 insertions(+), 586 deletions(-) diff --git a/docs-site/content/docs/cli-reference/ktx-sl.mdx b/docs-site/content/docs/cli-reference/ktx-sl.mdx index f5a31b27..b3e5305f 100644 --- a/docs-site/content/docs/cli-reference/ktx-sl.mdx +++ b/docs-site/content/docs/cli-reference/ktx-sl.mdx @@ -1,6 +1,6 @@ --- title: "ktx sl" -description: "List, read, validate, query, or write semantic-layer sources." +description: "List, search, validate, or query semantic-layer sources." --- Interact with your project's semantic layer. Semantic sources are YAML definitions that describe your tables, columns, measures, joins, and grain — the vocabulary agents use to generate correct SQL. @@ -16,9 +16,8 @@ ktx sl [options] | Subcommand | Description | |-----------|-------------| | `list` | List semantic-layer sources | -| `read ` | Read a semantic-layer source | +| `search ` | Search semantic-layer sources | | `validate ` | Validate a semantic-layer source against the database schema | -| `write ` | Write a semantic-layer source | | `query` | Compile or execute a semantic-layer query | ## Options @@ -28,16 +27,17 @@ ktx sl [options] | Flag | Description | Default | |------|-------------|---------| | `--connection-id ` | Filter by KTX connection id | — | -| `--query ` | Search source names and descriptions | — | | `--output ` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` | | `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` | -### `sl read` +### `sl search` | Flag | Description | Default | |------|-------------|---------| -| `--connection-id ` | KTX connection id (required) | — | -| `--json` | Print JSON output | `false` | +| `--connection-id ` | Filter by KTX connection id | — | +| `--limit ` | Maximum search results | — | +| `--output ` | Output mode: `pretty` (default in TTY), `plain` (TSV), or `json` | `pretty` | +| `--json` | Shortcut for `--output=json` (overrides `--output`) | `false` | ### `sl validate` @@ -45,13 +45,6 @@ ktx sl [options] |------|-------------|---------| | `--connection-id ` | KTX connection id (required) | — | -### `sl write` - -| Flag | Description | Default | -|------|-------------|---------| -| `--connection-id ` | KTX connection id (required) | — | -| `--yaml ` | Semantic-layer source YAML content (required) | — | - ### `sl query` | Flag | Description | Default | @@ -82,20 +75,11 @@ ktx sl list --connection-id my-warehouse ktx sl list --json # Search sources as JSON -ktx sl list --json --query "revenue" - -# Read a source definition -ktx sl read orders --connection-id my-warehouse - -# Read a source definition as JSON -ktx sl read orders --connection-id my-warehouse --json +ktx sl search "revenue" --json # Validate a source against the live schema ktx sl validate orders --connection-id my-warehouse -# Write a new source from YAML -ktx sl write customers --connection-id my-warehouse --yaml "$(cat sources/customers.yaml)" - # Compile a query and view the generated SQL ktx sl query \ --connection-id my-warehouse \ @@ -159,5 +143,5 @@ Semantic-layer commands return human-readable output by default. Use `--json` or |-------|-------|----------| | Source not found | Source name or connection id is wrong | Run `ktx sl list --json` and retry with an exact source name and connection id | | Validation fails | YAML references missing columns, invalid joins, or invalid SQL expressions | Fix the source YAML and rerun `ktx sl validate` | -| Query compile fails | Measure, dimension, filter, or segment name is invalid | Read the source with `ktx sl read`, then retry using declared fields | +| Query compile fails | Measure, dimension, filter, or segment name is invalid | Search sources with `ktx sl search`, inspect the source YAML in your project files, then retry using declared fields | | Execution returns too many rows | `--max-rows` is missing or too high | Add `--max-rows` with a bounded value before executing | diff --git a/docs-site/content/docs/cli-reference/ktx-wiki.mdx b/docs-site/content/docs/cli-reference/ktx-wiki.mdx index 7e45420e..8e27b5ff 100644 --- a/docs-site/content/docs/cli-reference/ktx-wiki.mdx +++ b/docs-site/content/docs/cli-reference/ktx-wiki.mdx @@ -1,6 +1,6 @@ --- title: "ktx wiki" -description: "List, read, search, or write knowledge pages." +description: "List or search knowledge pages." --- Manage knowledge pages in your KTX project. Knowledge pages are Markdown documents that capture business definitions, rules, and gotchas. Agents search them for context when answering questions about your data. @@ -16,9 +16,7 @@ ktx wiki [options] | Subcommand | Description | |-----------|-------------| | `list` | List local wiki pages | -| `read ` | Read one local wiki page | | `search ` | Search local wiki pages | -| `write ` | Write one local wiki page | ## Options @@ -29,13 +27,6 @@ ktx wiki [options] | `--json` | Print JSON output | `false` | | `--user-id ` | Local user id | `local` | -### `wiki read` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output | `false` | -| `--user-id ` | Local user id | `local` | - ### `wiki search` | Flag | Description | Default | @@ -44,18 +35,6 @@ ktx wiki [options] | `--user-id ` | Local user id | `local` | | `--limit ` | Maximum search results | — | -### `wiki write` - -| Flag | Description | Default | -|------|-------------|---------| -| `--user-id ` | Local user id | `local` | -| `--scope ` | Scope: `global` or `user` | `global` | -| `--summary ` | Wiki page summary (required) | — | -| `--content ` | Wiki page content (required) | — | -| `--tag ` | Wiki tag; repeatable | — | -| `--ref ` | Wiki ref; repeatable | — | -| `--sl-ref ` | Semantic-layer ref; repeatable | — | - ## Examples ```bash @@ -65,48 +44,16 @@ ktx wiki list # List all wiki pages as JSON ktx wiki list --json -# Read a specific wiki page -ktx wiki read revenue-definitions - -# Read a specific wiki page as JSON -ktx wiki read revenue-definitions --json - # Search wiki pages ktx wiki search "monthly recurring revenue" # Search wiki pages as JSON ktx wiki search "monthly recurring revenue" --json --limit 10 - -# Write a global knowledge page -ktx wiki write revenue-definitions \ - --summary "Canonical revenue metric definitions" \ - --content "## MRR\nMonthly Recurring Revenue is calculated as..." - -# Write a user-scoped knowledge page -ktx wiki write my-notes \ - --scope user \ - --summary "Personal analysis notes" \ - --content "Things to check when revenue numbers look off..." - -# Write a page with tags and references -ktx wiki write churn-rules \ - --summary "Churn calculation business rules" \ - --content "A customer is considered churned when..." \ - --tag finance \ - --tag retention \ - --sl-ref customers \ - --sl-ref subscriptions - -# Write a page with external references -ktx wiki write data-freshness \ - --summary "Data pipeline SLAs and freshness guarantees" \ - --content "The orders table refreshes every 15 minutes..." \ - --ref "https://wiki.example.com/data-pipelines" ``` ## Output -Wiki commands print local knowledge pages and search results. Agents should search first, then read the most relevant page by key. +Wiki commands print local knowledge pages and search results. ```json { @@ -127,7 +74,5 @@ Wiki commands print local knowledge pages and search results. Agents should sear | Error | Cause | Recovery | |-------|-------|----------| -| Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms, then create a page if the knowledge is missing | -| Read fails for a key | The page key is wrong or scoped to a different user | Run `ktx wiki list` or search again to get the exact key | -| Write fails due to missing fields | `--summary` or `--content` was omitted | Pass both fields, and keep the summary short enough for search results | -| Agent writes duplicate pages | It did not search existing pages first | Always run `ktx wiki search` before `ktx wiki write` | +| Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms or run ingest to capture more context | +| A page is missing | The page has not been created by ingest or memory capture yet | Run ingest, then search again with `ktx wiki search` | diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index d71a0754..59a512cb 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -208,9 +208,9 @@ KTX writes project state as plain files so agents can inspect and edit changes i |------|------------|---------| | `ktx.yaml` | `ktx setup` | Main project configuration: connections, LLM settings, embeddings, and context sources | | `.ktx/secrets/*` | `ktx setup` when file-backed secrets are selected | Local secret files referenced from `ktx.yaml`; do not commit these | -| `semantic-layer//*.yaml` | context build, ingestion, or `ktx sl write` | Semantic source definitions agents use for SQL generation | -| `knowledge/global/*.md` | ingestion or `ktx wiki write --scope global` | Shared business context and metric definitions | -| `knowledge/user//*.md` | `ktx wiki write --scope user` | User-scoped notes for one agent/user context | +| `semantic-layer//*.yaml` | context build, ingestion, or direct file edits | Semantic source definitions agents use for SQL generation | +| `knowledge/global/*.md` | ingestion, memory capture, or direct file edits | Shared business context and metric definitions | +| `knowledge/user//*.md` | memory capture or direct file edits | User-scoped notes for one agent/user context | | `.claude/skills/ktx/SKILL.md`, `.agents/skills/ktx/SKILL.md` | CLI-mode agent integration setup | Agent instructions for calling public `ktx` commands | ## Verify it worked diff --git a/docs-site/content/docs/guides/serving-agents.mdx b/docs-site/content/docs/guides/serving-agents.mdx index b6f073b8..0de6934e 100644 --- a/docs-site/content/docs/guides/serving-agents.mdx +++ b/docs-site/content/docs/guides/serving-agents.mdx @@ -26,10 +26,7 @@ ktx status --json # List sources ktx sl list --json ktx sl list --json --connection-id my-postgres -ktx sl list --json --query "revenue" - -# Read a source -ktx sl read orders --json --connection-id my-postgres +ktx sl search "revenue" --json # Run a query from a JSON file ktx sl query --json \ @@ -44,9 +41,6 @@ ktx sl query --json \ ```bash # Search knowledge pages ktx wiki search "revenue recognition" --json --limit 10 - -# Read a specific page -ktx wiki read order-status-definitions --json ``` ## Setting Up Your Agent diff --git a/docs-site/content/docs/guides/writing-context.mdx b/docs-site/content/docs/guides/writing-context.mdx index 3f0e3fbd..9e08fcc7 100644 --- a/docs-site/content/docs/guides/writing-context.mdx +++ b/docs-site/content/docs/guides/writing-context.mdx @@ -10,11 +10,11 @@ After building context through scanning and ingestion, you'll want to refine it Agents should refine context in this order: 1. `ktx sl list --json` — discover available sources and connection ids. -2. `ktx sl read --connection-id ` — inspect the current YAML. -3. Edit the source YAML directly or use `ktx sl write`. +2. `ktx sl search --json` — find source candidates for a concept. +3. Edit the source YAML directly in `semantic-layer//`. 4. `ktx sl validate --connection-id ` — verify columns, joins, and table references. 5. `ktx sl query ... --format sql` — compile a representative query without executing it. -6. `ktx wiki search ...` and `ktx wiki write ...` — add business context that does not belong in schema YAML. +6. `ktx wiki search ...` — check business context captured by ingest or memory. ## Semantic Sources @@ -33,13 +33,14 @@ ktx sl list --connection-id my-postgres ktx sl list --json ``` -### Reading a source +### Searching sources ```bash -ktx sl read orders --connection-id my-postgres +ktx sl search "revenue" --connection-id my-postgres --json ``` -This prints the full YAML definition for the source. +Search returns ranked source summaries. To inspect or edit a source, open the +YAML file under `semantic-layer//`. ### The source schema @@ -147,25 +148,10 @@ Column visibility controls what agents see: | `internal` | Available for joins and measures but not shown to agents | | `hidden` | Excluded entirely — useful for ETL columns | -### Writing a source +### Editing a source -```bash -ktx sl write orders --connection-id my-postgres --yaml ' -name: orders -table: public.orders -grain: [order_id] -columns: - - name: order_id - type: string - - name: total_amount - type: number -measures: - - name: total_revenue - expr: SUM(total_amount) -' -``` - -You can also edit source files directly — they live at `semantic-layer//.yaml` in your project directory. +Edit source files directly. They live at +`semantic-layer//.yaml` in your project directory. ### Validating sources @@ -225,11 +211,10 @@ The query planner is grain-aware — it understands the cardinality of joins and ### Workflow: edit and validate a source -1. `ktx sl read orders --connection-id my-postgres > /tmp/orders.yaml` — capture the current definition. -2. Edit `/tmp/orders.yaml` to add columns, measures, joins, or descriptions. -3. `ktx sl write orders --connection-id my-postgres --yaml "$(cat /tmp/orders.yaml)"` — write the updated source. -4. `ktx sl validate orders --connection-id my-postgres` — check the definition against the live schema. -5. `ktx sl query --connection-id my-postgres --measure total_revenue --dimension order_date --format sql` — compile a representative query. +1. Open `semantic-layer/my-postgres/orders.yaml`. +2. Edit the file to add columns, measures, joins, or descriptions. +3. `ktx sl validate orders --connection-id my-postgres` — check the definition against the live schema. +4. `ktx sl query --connection-id my-postgres --measure total_revenue --dimension order_date --format sql` — compile a representative query. If validation fails, fix the YAML before asking an agent to use the source. Common validation failures are missing columns, invalid join targets, and measure expressions that reference fields outside the source. @@ -260,42 +245,16 @@ knowledge/ - **Global pages** apply across all connections — business definitions, metric standards, company terminology. - **User-scoped pages** are private to a user ID — personal notes, local gotchas, or context you do not want shared globally. -### Writing pages +### Editing pages -```bash -ktx wiki write order-status-definitions \ - --scope global \ - --summary "Business definitions for order status values" \ - --content "## Order Statuses - -- **pending**: Order placed but not yet processed -- **confirmed**: Payment received, awaiting fulfillment -- **shipped**: Order dispatched to carrier -- **delivered**: Order received by customer -- **cancelled**: Order cancelled before shipment - -Orders in pending status for more than 48 hours are flagged for review." \ - --tag orders \ - --tag definitions \ - --sl-ref orders -``` - -Write flags: - -| Flag | Description | -|------|-------------| -| `--scope ` | `global` (default) or `user` | -| `--summary ` | Short description for search results (required) | -| `--content ` | Full Markdown content (required) | -| `--tag ` | Categorization tag (repeatable) | -| `--ref ` | Reference to external resources (repeatable) | -| `--sl-ref ` | Link to a semantic source (repeatable) | +Create and edit knowledge pages directly as Markdown files in the `knowledge/` +directory. Ingest and memory capture also create these pages automatically. Knowledge page fields: | Field | Required | Description | |-------|----------|-------------| -| Key | Yes | Stable page identifier passed to `ktx wiki read` | +| Key | Yes | Stable page identifier used as the Markdown filename | | Summary | Yes | Short text shown in search results | | Content | Yes | Full Markdown business context | | Scope | No | `global` for shared context or `user` for user-scoped notes | @@ -303,20 +262,12 @@ Knowledge page fields: | External refs | No | Links or identifiers for source-of-truth systems | | Semantic-layer refs | No | Source names the page explains or constrains | -You can also create and edit knowledge pages directly as Markdown files in the `knowledge/` directory. - ### Listing pages ```bash ktx wiki list ``` -### Reading a page - -```bash -ktx wiki read order-status-definitions -``` - ### Searching ```bash @@ -328,9 +279,9 @@ Search uses both full-text matching and semantic similarity — it finds relevan ### Workflow: add searchable business context 1. Search first: `ktx wiki search "order status definitions"`. -2. If no page already covers the rule, write a page with `ktx wiki write`. -3. Include a concise `--summary`; agents see this before loading full content. -4. Add `--tag` values for the business area and `--sl-ref` values for related semantic sources. +2. If no page already covers the rule, create or edit a Markdown file under `knowledge/global/`. +3. Include concise frontmatter; agents see the summary before loading full content. +4. Add `tags` values for the business area and `sl_refs` values for related semantic sources. 5. Search again with the user's likely wording to confirm the page is discoverable. ## Common errors @@ -341,4 +292,4 @@ Search uses both full-text matching and semantic similarity — it finds relevan | Query compilation double-counts a measure | Join relationship or grain is missing or wrong | Add `grain` and explicit `relationship` values, then validate and recompile | | Agent cannot find a metric | Measure name or description does not match business terminology | Add a measure description and a knowledge page with common synonyms | | Knowledge search misses a page | Summary and tags do not include likely user wording | Rewrite the summary and add relevant tags, then search again | -| `ktx sl write` changes are hard to review | Large YAML was passed inline | Edit the source file directly or write from a temporary file, then review the git diff | +| Semantic-layer changes are hard to review | The YAML edit is too large or unfocused | Split the change into smaller source-file edits, then review the git diff | diff --git a/docs-site/content/docs/integrations/agent-clients.mdx b/docs-site/content/docs/integrations/agent-clients.mdx index 8a055fda..61538140 100644 --- a/docs-site/content/docs/integrations/agent-clients.mdx +++ b/docs-site/content/docs/integrations/agent-clients.mdx @@ -34,11 +34,9 @@ description: Use local KTX semantic context and wiki knowledge for this project. Available commands: - `ktx status --json --project-dir /path/to/project` - `ktx sl list --json --project-dir /path/to/project` -- `ktx sl list --json --project-dir /path/to/project --query ''` -- `ktx sl read '' --json --project-dir /path/to/project --connection-id ''` +- `ktx sl search '' --json --project-dir /path/to/project --connection-id ''` - `ktx sl query --json --project-dir /path/to/project --connection-id '' --query-file '' --execute --max-rows 100` - `ktx wiki search '' --json --project-dir /path/to/project --limit 10` -- `ktx wiki read '' --json --project-dir /path/to/project` ``` ### Workflow tips @@ -127,12 +125,8 @@ All supported agent clients call the same KTX CLI commands: |---------|-------------| | `ktx status --json` | Return project setup and context readiness | | `ktx wiki search --json` | Search knowledge pages | -| `ktx wiki read --json` | Read a knowledge page | -| `ktx wiki write ` | Write or update a knowledge page | | `ktx sl list --json` | List semantic-layer sources | -| `ktx sl list --query --json` | Search semantic-layer sources | -| `ktx sl read --json --connection-id ` | Read a semantic source definition | -| `ktx sl write --connection-id ` | Write or update a semantic source | +| `ktx sl search --json` | Search semantic-layer sources | | `ktx sl validate --connection-id ` | Validate semantic source definitions | | `ktx sl query --json` | Execute a semantic-layer query when semantic compute is configured | diff --git a/packages/cli/src/command-schemas.ts b/packages/cli/src/command-schemas.ts index 5caece1f..e1365d86 100644 --- a/packages/cli/src/command-schemas.ts +++ b/packages/cli/src/command-schemas.ts @@ -3,19 +3,6 @@ import { z } from 'zod'; const projectDirSchema = z.string().min(1); const stringArraySchema = z.array(z.string()); -export const wikiWriteCommandSchema = z.object({ - command: z.literal('write'), - projectDir: projectDirSchema, - key: z.string().min(1), - scope: z.enum(['GLOBAL', 'USER']), - userId: z.string().min(1), - summary: z.string().min(1), - content: z.string().min(1), - tags: stringArraySchema, - refs: stringArraySchema, - slRefs: stringArraySchema, -}); - const orderBySchema = z.union([ z.string().min(1), z.object({ diff --git a/packages/cli/src/commands/knowledge-commands.ts b/packages/cli/src/commands/knowledge-commands.ts index f8d716f7..382ebf0a 100644 --- a/packages/cli/src/commands/knowledge-commands.ts +++ b/packages/cli/src/commands/knowledge-commands.ts @@ -1,11 +1,9 @@ -import { type Command, Option } from '@commander-js/extra-typings'; +import { type Command } from '@commander-js/extra-typings'; import { - collectOption, type KtxCliCommandContext, parsePositiveIntegerOption, resolveCommandProjectDir, } from '../cli-program.js'; -import { wikiWriteCommandSchema } from '../command-schemas.js'; import type { KtxKnowledgeArgs } from '../knowledge.js'; import { profileMark } from '../startup-profile.js'; @@ -19,7 +17,7 @@ async function runKnowledgeArgs(context: KtxCliCommandContext, args: KtxKnowledg export function registerWikiCommands(program: Command, context: KtxCliCommandContext): void { const wiki = program .command('wiki') - .description('List, read, search, or write local wiki pages') + .description('List or search local wiki pages') .showHelpAfterError() .addHelpText( 'after', @@ -40,22 +38,6 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon }); }); - wiki - .command('read') - .description('Read one local wiki page') - .argument('', 'Wiki page key') - .option('--json', 'Print JSON output', false) - .option('--user-id ', 'Local user id', 'local') - .action(async (key: string, options: { userId: string; json?: boolean }, command) => { - await runKnowledgeArgs(context, { - command: 'read', - projectDir: resolveCommandProjectDir(command), - key, - userId: options.userId, - json: options.json, - }); - }); - wiki .command('search') .description('Search local wiki pages') @@ -73,31 +55,4 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon ...(options.limit !== undefined ? { limit: options.limit } : {}), }); }); - - wiki - .command('write') - .description('Write one local wiki page') - .argument('', 'Wiki page key') - .option('--user-id ', 'Local user id', 'local') - .addOption(new Option('--scope ', 'global or user').choices(['global', 'user']).default('global')) - .requiredOption('--summary ', 'Wiki summary') - .requiredOption('--content ', 'Wiki content') - .option('--tag ', 'Wiki tag; repeatable', collectOption, []) - .option('--ref ', 'Wiki ref; repeatable', collectOption, []) - .option('--sl-ref ', 'Semantic-layer ref; repeatable', collectOption, []) - .action(async (key: string, options, command) => { - const args = wikiWriteCommandSchema.parse({ - command: 'write', - projectDir: resolveCommandProjectDir(command), - key, - scope: options.scope === 'user' ? 'USER' : 'GLOBAL', - userId: options.userId, - summary: options.summary, - content: options.content, - tags: options.tag, - refs: options.ref, - slRefs: options.slRef, - }); - await runKnowledgeArgs(context, args); - }); } diff --git a/packages/cli/src/commands/sl-commands.ts b/packages/cli/src/commands/sl-commands.ts index e1b985a3..d23674cd 100644 --- a/packages/cli/src/commands/sl-commands.ts +++ b/packages/cli/src/commands/sl-commands.ts @@ -41,7 +41,7 @@ async function runSlArgs(context: KtxCliCommandContext, args: KtxSlArgs): Promis export function registerSlCommands(program: Command, context: KtxCliCommandContext, commandName = 'sl'): void { const sl = program .command(commandName) - .description('List, read, validate, query, or write local semantic-layer sources') + .description('List, search, validate, or query local semantic-layer sources') .showHelpAfterError() .addHelpText( 'after', @@ -51,7 +51,31 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte sl.command('list') .description('List semantic-layer sources') .option('--connection-id ', 'KTX connection id') - .option('--query ', 'Search source names and descriptions') + .addOption( + new Option('--output ', 'Output mode: pretty (default in TTY), plain (TSV), or json').choices([ + 'pretty', + 'plain', + 'json', + ]), + ) + .option('--json', 'Shortcut for --output=json (overrides --output)', false) + .action( + async (options: { connectionId?: string; output?: 'pretty' | 'plain' | 'json'; json?: boolean }, command) => { + await runSlArgs(context, { + command: 'list', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connectionId, + output: options.output, + json: options.json, + }); + }, + ); + + sl.command('search') + .description('Search semantic-layer sources') + .argument('', 'Search query') + .option('--connection-id ', 'KTX connection id') + .option('--limit ', 'Maximum search results', parsePositiveIntegerOption) .addOption( new Option('--output ', 'Output mode: pretty (default in TTY), plain (TSV), or json').choices([ 'pretty', @@ -62,35 +86,22 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte .option('--json', 'Shortcut for --output=json (overrides --output)', false) .action( async ( - options: { connectionId?: string; query?: string; output?: 'pretty' | 'plain' | 'json'; json?: boolean }, + query: string, + options: { connectionId?: string; limit?: number; output?: 'pretty' | 'plain' | 'json'; json?: boolean }, command, ) => { - await runSlArgs(context, { - command: 'list', - projectDir: resolveCommandProjectDir(command), - connectionId: options.connectionId, - query: options.query, - output: options.output, - json: options.json, - }); + await runSlArgs(context, { + command: 'search', + projectDir: resolveCommandProjectDir(command), + connectionId: options.connectionId, + query, + ...(options.limit !== undefined ? { limit: options.limit } : {}), + output: options.output, + json: options.json, + }); }, ); - sl.command('read') - .description('Read a semantic-layer source') - .argument('', 'Semantic-layer source name') - .requiredOption('--connection-id ', 'KTX connection id') - .option('--json', 'Print JSON output', false) - .action(async (sourceName: string, options: { connectionId: string; json?: boolean }, command) => { - await runSlArgs(context, { - command: 'read', - projectDir: resolveCommandProjectDir(command), - connectionId: options.connectionId, - sourceName, - json: options.json, - }); - }); - sl.command('validate') .description('Validate a semantic-layer source') .argument('', 'Semantic-layer source name') @@ -104,21 +115,6 @@ export function registerSlCommands(program: Command, context: KtxCliCommandConte }); }); - sl.command('write') - .description('Write a semantic-layer source') - .argument('', 'Semantic-layer source name') - .requiredOption('--connection-id ', 'KTX connection id') - .requiredOption('--yaml ', 'Semantic-layer source YAML') - .action(async (sourceName: string, options: { connectionId: string; yaml: string }, command) => { - await runSlArgs(context, { - command: 'write', - projectDir: resolveCommandProjectDir(command), - connectionId: options.connectionId, - sourceName, - yaml: options.yaml, - }); - }); - sl.command('query') .description('Compile or execute a semantic-layer query') .option('--connection-id ', 'KTX connection id') diff --git a/packages/cli/src/example-smoke.test.ts b/packages/cli/src/example-smoke.test.ts index 221c20f2..f1670544 100644 --- a/packages/cli/src/example-smoke.test.ts +++ b/packages/cli/src/example-smoke.test.ts @@ -79,12 +79,6 @@ describe('standalone local warehouse example', () => { parseJsonOutput<{ data: { items: Array<{ key: string; summary: string }> } }>(knowledgeList.stdout).data.items, ).toContainEqual(expect.objectContaining({ key: 'revenue', summary: 'Paid order value after refunds' })); - const knowledgeRead = await runBuiltCli(['wiki', 'read', 'revenue', '--json', '--project-dir', projectDir]); - expect(knowledgeRead).toMatchObject({ code: 0, stderr: '' }); - expect(parseJsonOutput<{ data: { content: string } }>(knowledgeRead.stdout).data.content).toContain( - 'Revenue is paid order amount after refund adjustments.', - ); - const slList = await runBuiltCli(['sl', 'list', '--json', '--project-dir', projectDir, '--connection-id', 'warehouse']); expect(slList).toMatchObject({ code: 0, stderr: '' }); expect( @@ -93,9 +87,9 @@ describe('standalone local warehouse example', () => { ).data.items, ).toContainEqual(expect.objectContaining({ connectionId: 'warehouse', name: 'orders', columnCount: 3 })); - const slRead = await runBuiltCli([ + const slSearch = await runBuiltCli([ 'sl', - 'read', + 'search', 'orders', '--json', '--connection-id', @@ -103,8 +97,10 @@ describe('standalone local warehouse example', () => { '--project-dir', projectDir, ]); - expect(slRead).toMatchObject({ code: 0, stderr: '' }); - expect(parseJsonOutput<{ data: { yaml: string } }>(slRead.stdout).data.yaml).toContain('name: orders'); + expect(slSearch).toMatchObject({ code: 0, stderr: '' }); + expect( + parseJsonOutput<{ data: { items: Array<{ connectionId: string; name: string }> } }>(slSearch.stdout).data.items, + ).toContainEqual(expect.objectContaining({ connectionId: 'warehouse', name: 'orders' })); const ingest = await runBuiltCli([ 'ingest', diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index 9064143a..817653f6 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -139,6 +139,56 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toBe(''); }); + it('rejects removed public wiki and sl read/write commands', async () => { + const sl = vi.fn(async () => 0); + const knowledge = vi.fn(async () => 0); + + for (const argv of [ + ['--project-dir', tempDir, 'wiki', 'read', 'revenue'], + ['--project-dir', tempDir, 'wiki', 'write', 'revenue', '--summary', 'Revenue', '--content', 'Revenue.'], + ['--project-dir', tempDir, 'sl', 'read', 'orders', '--connection-id', 'warehouse'], + ['--project-dir', tempDir, 'sl', 'write', 'orders', '--connection-id', 'warehouse', '--yaml', 'name: orders'], + ]) { + const io = makeIo(); + await expect(runKtxCli(argv, io.io, { knowledge, sl })).resolves.toBe(1); + expect(io.stderr()).toMatch(/unknown command|error:/); + } + + expect(knowledge).not.toHaveBeenCalled(); + expect(sl).not.toHaveBeenCalled(); + }); + + it('routes sl search and rejects the old sl list --query flag', async () => { + const sl = vi.fn(async () => 0); + + const searchIo = makeIo(); + await expect( + runKtxCli( + ['--project-dir', tempDir, 'sl', 'search', 'revenue', '--connection-id', 'warehouse', '--limit', '5', '--json'], + searchIo.io, + { sl }, + ), + ).resolves.toBe(0); + expect(sl).toHaveBeenCalledWith( + { + command: 'search', + projectDir: tempDir, + connectionId: 'warehouse', + query: 'revenue', + limit: 5, + json: true, + output: undefined, + }, + searchIo.io, + ); + + const listIo = makeIo(); + await expect( + runKtxCli(['--project-dir', tempDir, 'sl', 'list', '--query', 'revenue'], listIo.io, { sl }), + ).resolves.toBe(1); + expect(listIo.stderr()).toContain("unknown option '--query'"); + }); + it('routes runtime management commands with the CLI package version', async () => { const runtime = vi.fn(async () => 0); const installIo = makeIo(); diff --git a/packages/cli/src/knowledge.test.ts b/packages/cli/src/knowledge.test.ts index db794289..1982fe1c 100644 --- a/packages/cli/src/knowledge.test.ts +++ b/packages/cli/src/knowledge.test.ts @@ -3,6 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { initKtxProject } from '@ktx/context/project'; import type { KtxEmbeddingPort } from '@ktx/context'; +import { type LocalKnowledgeScope, writeLocalKnowledgePage } from '@ktx/context/wiki'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { runKtxKnowledge } from './knowledge.js'; @@ -40,6 +41,29 @@ class FakeEmbeddingPort implements KtxEmbeddingPort { } } +async function seedKnowledgePage(input: { + projectDir: string; + key: string; + summary: string; + content: string; + scope?: LocalKnowledgeScope; + tags?: string[]; + refs?: string[]; + slRefs?: string[]; +}): Promise { + const project = await initKtxProject({ projectDir: input.projectDir, projectName: 'warehouse' }); + await writeLocalKnowledgePage(project, { + key: input.key, + scope: input.scope ?? 'GLOBAL', + userId: 'local', + summary: input.summary, + content: input.content, + tags: input.tags ?? [], + refs: input.refs ?? [], + slRefs: input.slRefs ?? [], + }); +} + describe('runKtxKnowledge', () => { let tempDir: string; @@ -51,36 +75,16 @@ describe('runKtxKnowledge', () => { await rm(tempDir, { recursive: true, force: true }); }); - it('writes, reads, lists, and searches knowledge pages', async () => { + it('lists and searches knowledge pages', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - - const writeIo = makeIo(); - await expect( - runKtxKnowledge( - { - command: 'write', - projectDir, - key: 'metrics-revenue', - scope: 'GLOBAL', - userId: 'local', - summary: 'Revenue', - content: 'Revenue is paid order value.', - tags: ['finance'], - refs: [], - slRefs: ['orders'], - }, - writeIo.io, - ), - ).resolves.toBe(0); - expect(writeIo.stdout()).toContain('Wrote knowledge/global/metrics-revenue.md'); - - const readIo = makeIo(); - await expect( - runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local' }, readIo.io), - ).resolves.toBe(0); - expect(readIo.stdout()).toContain('# metrics-revenue'); - expect(readIo.stdout()).toContain('Revenue is paid order value.'); + await seedKnowledgePage({ + projectDir, + key: 'metrics-revenue', + summary: 'Revenue', + content: 'Revenue is paid order value.', + tags: ['finance'], + slRefs: ['orders'], + }); const listIo = makeIo(); await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local' }, listIo.io)).resolves.toBe(0); @@ -93,27 +97,16 @@ describe('runKtxKnowledge', () => { expect(searchIo.stdout()).toContain('metrics-revenue'); }); - it('prints wiki list, search, and read as public JSON envelopes', async () => { + it('prints wiki list and search as public JSON envelopes', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - - await expect( - runKtxKnowledge( - { - command: 'write', - projectDir, - key: 'metrics-revenue', - scope: 'GLOBAL', - userId: 'local', - summary: 'Revenue', - content: 'Revenue is paid order value.', - tags: ['finance'], - refs: [], - slRefs: ['orders'], - }, - makeIo().io, - ), - ).resolves.toBe(0); + await seedKnowledgePage({ + projectDir, + key: 'metrics-revenue', + summary: 'Revenue', + content: 'Revenue is paid order value.', + tags: ['finance'], + slRefs: ['orders'], + }); const listIo = makeIo(); await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local', json: true }, listIo.io)).resolves.toBe( @@ -137,48 +130,6 @@ describe('runKtxKnowledge', () => { data: { items: [expect.objectContaining({ key: 'metrics-revenue', summary: 'Revenue' })] }, meta: { command: 'wiki search' }, }); - - const readIo = makeIo(); - await expect( - runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local', json: true }, readIo.io), - ).resolves.toBe(0); - expect(JSON.parse(readIo.stdout())).toMatchObject({ - kind: 'wiki.page', - data: { - key: 'metrics-revenue', - summary: 'Revenue', - content: 'Revenue is paid order value.', - }, - }); - }); - - it('rejects slash-delimited write keys with a flat-key suggestion', async () => { - const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - - const writeIo = makeIo(); - await expect( - runKtxKnowledge( - { - command: 'write', - projectDir, - key: 'orbit/company-overview', - scope: 'GLOBAL', - userId: 'local', - summary: 'Orbit', - content: 'Orbit overview.', - tags: [], - refs: [], - slRefs: [], - }, - writeIo.io, - ), - ).resolves.toBe(1); - - expect(writeIo.stderr()).toContain( - 'Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".', - ); - expect(writeIo.stdout()).toBe(''); }); it('explains empty search results for a project without wiki pages', async () => { @@ -192,30 +143,19 @@ describe('runKtxKnowledge', () => { expect(searchIo.stdout()).toBe(''); expect(searchIo.stderr()).toContain('No local wiki pages found'); - expect(searchIo.stderr()).toContain('ktx wiki write'); + expect(searchIo.stderr()).toContain('Run ingest'); + expect(searchIo.stderr()).not.toContain('ktx wiki write'); }); it('uses configured embeddings for semantic wiki search', async () => { const projectDir = join(tempDir, 'semantic-project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - - await expect( - runKtxKnowledge( - { - command: 'write', - projectDir, - key: 'active-contract-arr-open-tickets', - scope: 'GLOBAL', - userId: 'local', - summary: 'Active Contract ARR Ranked by Open Support Ticket Count', - content: 'Accounts ranked by annual recurring contract value and support ticket load.', - tags: ['historic-sql'], - refs: [], - slRefs: [], - }, - makeIo().io, - ), - ).resolves.toBe(0); + await seedKnowledgePage({ + projectDir, + key: 'active-contract-arr-open-tickets', + summary: 'Active Contract ARR Ranked by Open Support Ticket Count', + content: 'Accounts ranked by annual recurring contract value and support ticket load.', + tags: ['historic-sql'], + }); const searchIo = makeIo(); await expect( diff --git a/packages/cli/src/knowledge.ts b/packages/cli/src/knowledge.ts index 5c5df1ea..0d1e194b 100644 --- a/packages/cli/src/knowledge.ts +++ b/packages/cli/src/knowledge.ts @@ -4,31 +4,12 @@ import { type KtxEmbeddingPort, } from '@ktx/context'; import { loadKtxProject } from '@ktx/context/project'; -import { - type LocalKnowledgeScope, - listLocalKnowledgePages, - readLocalKnowledgePage, - searchLocalKnowledgePages, - writeLocalKnowledgePage, -} from '@ktx/context/wiki'; +import { listLocalKnowledgePages, searchLocalKnowledgePages } from '@ktx/context/wiki'; import { writeJsonResult } from './io/print-list.js'; export type KtxKnowledgeArgs = | { command: 'list'; projectDir: string; userId: string; json?: boolean } - | { command: 'read'; projectDir: string; key: string; userId: string; json?: boolean } - | { command: 'search'; projectDir: string; query: string; userId: string; json?: boolean; limit?: number } - | { - command: 'write'; - projectDir: string; - key: string; - scope: LocalKnowledgeScope; - userId: string; - summary: string; - content: string; - tags: string[]; - refs: string[]; - slRefs: string[]; - }; + | { command: 'search'; projectDir: string; query: string; userId: string; json?: boolean; limit?: number }; interface KtxKnowledgeIo { stdout: { write(chunk: string): void }; @@ -75,25 +56,6 @@ export async function runKtxKnowledge( } return 0; } - if (args.command === 'read') { - const page = await readLocalKnowledgePage(project, { key: args.key, userId: args.userId }); - if (!page) { - throw new Error(`Knowledge page "${args.key}" was not found`); - } - if (args.json) { - writeJsonResult(io, { - kind: 'wiki.page', - data: page, - meta: { command: 'wiki read' }, - }); - return 0; - } - io.stdout.write(`# ${page.key}\n\n`); - io.stdout.write(`Scope: ${page.scope}\n`); - io.stdout.write(`Summary: ${page.summary}\n\n`); - io.stdout.write(`${page.content}\n`); - return 0; - } if (args.command === 'search') { const results = await searchLocalKnowledgePages(project, { query: args.query, @@ -113,7 +75,7 @@ export async function runKtxKnowledge( const pages = await listLocalKnowledgePages(project, { userId: args.userId }); if (pages.length === 0) { io.stderr.write( - `No local wiki pages found in ${project.projectDir}. Create one with \`ktx wiki write --summary --content \` or run ingest.\n`, + `No local wiki pages found in ${project.projectDir}. Run ingest to capture wiki context, then retry the search.\n`, ); } else { io.stderr.write( @@ -127,19 +89,8 @@ export async function runKtxKnowledge( } return 0; } - - const write = await writeLocalKnowledgePage(project, { - key: args.key, - scope: args.scope, - userId: args.userId, - summary: args.summary, - content: args.content, - tags: args.tags, - refs: args.refs, - slRefs: args.slRefs, - }); - io.stdout.write(`Wrote ${write.path}\n`); - return 0; + const _exhaustive: never = args; + throw new Error(`Unsupported wiki command: ${JSON.stringify(_exhaustive)}`); } catch (error) { io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts index 3c7829c7..da9486f5 100644 --- a/packages/cli/src/setup-agents.ts +++ b/packages/cli/src/setup-agents.ts @@ -138,8 +138,7 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun '', `- \`${ktxCommandLine(input.launcher, ['status', ...projectDirArgs])}\``, `- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...projectDirArgs])}\``, - `- \`${ktxCommandLine(input.launcher, ['sl', 'list', ...projectDirArgs, '--query', ''])}\``, - `- \`${ktxCommandLine(input.launcher, ['sl', 'read', '', ...projectDirArgs, '--connection-id', ''])}\``, + `- \`${ktxCommandLine(input.launcher, ['sl', 'search', '', ...projectDirArgs, '--connection-id', ''])}\``, `- \`${ktxCommandLine(input.launcher, [ 'sl', 'query', @@ -153,7 +152,6 @@ function cliInstructionContent(input: { projectDir: string; launcher: KtxCliLaun '100', ])}\``, `- \`${ktxCommandLine(input.launcher, ['wiki', 'search', '', ...projectDirArgs, '--limit', '10'])}\``, - `- \`${ktxCommandLine(input.launcher, ['wiki', 'read', '', ...projectDirArgs])}\``, '', 'Use semantic-layer queries before direct database access. Do not print secrets or credential references.', '', diff --git a/packages/cli/src/sl.test.ts b/packages/cli/src/sl.test.ts index 8d360c58..48c7f4c7 100644 --- a/packages/cli/src/sl.test.ts +++ b/packages/cli/src/sl.test.ts @@ -38,6 +38,22 @@ function makeIo() { }; } +async function seedSlSource(input: { + projectDir: string; + connectionId?: string; + sourceName?: string; + yaml?: string; +}): Promise { + const project = await initKtxProject({ projectDir: input.projectDir, projectName: 'warehouse' }); + await project.fileStore.writeFile( + `semantic-layer/${input.connectionId ?? 'warehouse'}/${input.sourceName ?? 'orders'}.yaml`, + input.yaml ?? ORDERS_YAML, + 'ktx', + 'ktx@example.com', + 'Add semantic-layer source', + ); +} + describe('runKtxSl', () => { let tempDir: string; @@ -49,24 +65,9 @@ describe('runKtxSl', () => { await rm(tempDir, { recursive: true, force: true }); }); - it('writes, validates, reads, and lists semantic-layer sources', async () => { + it('validates, lists, and searches semantic-layer sources', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - - const writeIo = makeIo(); - await expect( - runKtxSl( - { - command: 'write', - projectDir, - connectionId: 'warehouse', - sourceName: 'orders', - yaml: ORDERS_YAML, - }, - writeIo.io, - ), - ).resolves.toBe(0); - expect(writeIo.stdout()).toContain('Wrote semantic-layer/warehouse/orders.yaml'); + await seedSlSource({ projectDir }); const validateIo = makeIo(); await expect( @@ -74,62 +75,49 @@ describe('runKtxSl', () => { ).resolves.toBe(0); expect(validateIo.stdout()).toContain('Valid semantic-layer source: warehouse/orders'); - const readIo = makeIo(); - await expect(runKtxSl({ command: 'read', projectDir, connectionId: 'warehouse', sourceName: 'orders' }, readIo.io)) - .resolves.toBe(0); - expect(readIo.stdout()).toContain('name: orders'); - const listIo = makeIo(); await expect(runKtxSl({ command: 'list', projectDir, connectionId: 'warehouse' }, listIo.io)).resolves.toBe(0); expect(listIo.stdout()).toContain('warehouse\torders\tcolumns=1\tmeasures=0\tjoins=0'); + + const searchIo = makeIo(); + await expect( + runKtxSl({ command: 'search', projectDir, connectionId: 'warehouse', query: 'order', json: true }, searchIo.io), + ).resolves.toBe(0); + expect(JSON.parse(searchIo.stdout())).toMatchObject({ + kind: 'list', + data: { + items: [ + expect.objectContaining({ + connectionId: 'warehouse', + name: 'orders', + score: expect.any(Number), + }), + ], + }, + meta: { command: 'sl search' }, + }); }); - it('prints semantic-layer reads and searched lists as public JSON envelopes', async () => { + it('prints semantic-layer list and search as public JSON envelopes', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - - await expect( - runKtxSl( - { - command: 'write', - projectDir, - connectionId: 'warehouse', - sourceName: 'orders', - yaml: [ - 'name: orders', - 'table: public.orders', - 'description: Paid order facts', - 'grain: [order_id]', - 'columns:', - ' - name: order_id', - ' type: string', - '', - ].join('\n'), - }, - makeIo().io, - ), - ).resolves.toBe(0); - - const readIo = makeIo(); - await expect( - runKtxSl( - { command: 'read', projectDir, connectionId: 'warehouse', sourceName: 'orders', json: true }, - readIo.io, - ), - ).resolves.toBe(0); - expect(JSON.parse(readIo.stdout())).toMatchObject({ - kind: 'sl.source', - data: { - connectionId: 'warehouse', - name: 'orders', - yaml: expect.stringContaining('name: orders'), - }, + await seedSlSource({ + projectDir, + yaml: [ + 'name: orders', + 'table: public.orders', + 'description: Paid order facts', + 'grain: [order_id]', + 'columns:', + ' - name: order_id', + ' type: string', + '', + ].join('\n'), }); const listIo = makeIo(); await expect( runKtxSl( - { command: 'list', projectDir, connectionId: 'warehouse', query: 'paid', json: true }, + { command: 'search', projectDir, connectionId: 'warehouse', query: 'paid', json: true }, listIo.io, ), ).resolves.toBe(0); @@ -145,7 +133,7 @@ describe('runKtxSl', () => { }), ], }, - meta: { command: 'sl list' }, + meta: { command: 'sl search' }, }); }); @@ -566,13 +554,7 @@ joins: [] it('emits sl list as a JSON envelope when output=json', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - - const writeIo = makeIo(); - await runKtxSl( - { command: 'write', projectDir, connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML }, - writeIo.io, - ); + await seedSlSource({ projectDir }); const listIo = makeIo(); const code = await runKtxSl( @@ -604,13 +586,7 @@ joins: [] it('emits sl list with grouping and Clack-style framing when output=pretty', async () => { const projectDir = join(tempDir, 'project'); - await initKtxProject({ projectDir, projectName: 'warehouse' }); - - const writeIo = makeIo(); - await runKtxSl( - { command: 'write', projectDir, connectionId: 'warehouse', sourceName: 'orders', yaml: ORDERS_YAML }, - writeIo.io, - ); + await seedSlSource({ projectDir }); const listIo = makeIo(); const code = await runKtxSl( diff --git a/packages/cli/src/sl.ts b/packages/cli/src/sl.ts index ebf3eca7..baff239b 100644 --- a/packages/cli/src/sl.ts +++ b/packages/cli/src/sl.ts @@ -13,10 +13,10 @@ import { readLocalSlSource, searchLocalSlSources, validateLocalSlSource, - writeLocalSlSource, + type LocalSlSourceSearchResult, + type LocalSlSourceSummary, type SemanticLayerQueryInput, } from '@ktx/context/sl'; -import { writeJsonResult } from './io/print-list.js'; import { createManagedPythonSemanticLayerComputePort, type KtxManagedPythonInstallPolicy, @@ -28,10 +28,17 @@ profileMark('module:sl'); type SlQueryFormat = 'json' | 'sql'; export type KtxSlArgs = - | { command: 'list'; projectDir: string; connectionId?: string; query?: string; output?: string; json?: boolean } - | { command: 'read'; projectDir: string; connectionId: string; sourceName: string; json?: boolean } + | { command: 'list'; projectDir: string; connectionId?: string; output?: string; json?: boolean } + | { + command: 'search'; + projectDir: string; + connectionId?: string; + query: string; + limit?: number; + output?: string; + json?: boolean; + } | { command: 'validate'; projectDir: string; connectionId: string; sourceName: string } - | { command: 'write'; projectDir: string; connectionId: string; sourceName: string; yaml: string } | { command: 'query'; projectDir: string; @@ -73,6 +80,35 @@ function slSearchEmbeddingService(project: KtxLocalProject, deps: KtxSlDeps): Kt return provider ? new KtxIngestEmbeddingPortAdapter(provider) : null; } +async function printSlSources(input: { + rows: ReadonlyArray; + command: 'sl list' | 'sl search'; + output?: string; + json?: boolean; + io: KtxSlIo; + emptyMessage: string; +}): Promise { + const { resolveOutputMode } = await import('./io/mode.js'); + const { printList } = await import('./io/print-list.js'); + const mode = resolveOutputMode({ explicit: input.output, json: input.json, io: input.io }); + printList({ + rows: input.rows, + columns: [ + { key: 'connectionId', label: 'CONNECTION', plain: '' }, + { key: 'name', label: 'NAME', plain: '' }, + { key: 'columnCount', label: 'COLS', plain: 'columns=', dim: true }, + { key: 'measureCount', label: 'MEASURES', plain: 'measures=', dim: true }, + { key: 'joinCount', label: 'JOINS', plain: 'joins=', dim: true }, + { key: 'description', label: 'DESCRIPTION', plain: false, optional: true, dim: true }, + ], + groupBy: 'connectionId', + emptyMessage: input.emptyMessage, + command: input.command, + mode, + io: input.io, + }); +} + async function readSlQueryFile(path: string): Promise { const parsed = JSON.parse(await readFile(path, 'utf-8')) as unknown; if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { @@ -85,51 +121,32 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx try { const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); if (args.command === 'list') { - const sources = args.query - ? await searchLocalSlSources(project, { - connectionId: args.connectionId, - query: args.query, - embeddingService: slSearchEmbeddingService(project, deps), - }) - : await listLocalSlSources(project, { connectionId: args.connectionId }); - const { resolveOutputMode } = await import('./io/mode.js'); - const { printList } = await import('./io/print-list.js'); - const mode = resolveOutputMode({ explicit: args.output, json: args.json, io }); - printList({ + const sources = await listLocalSlSources(project, { connectionId: args.connectionId }); + await printSlSources({ rows: sources, - columns: [ - { key: 'connectionId', label: 'CONNECTION', plain: '' }, - { key: 'name', label: 'NAME', plain: '' }, - { key: 'columnCount', label: 'COLS', plain: 'columns=', dim: true }, - { key: 'measureCount', label: 'MEASURES', plain: 'measures=', dim: true }, - { key: 'joinCount', label: 'JOINS', plain: 'joins=', dim: true }, - { key: 'description', label: 'DESCRIPTION', plain: false, optional: true, dim: true }, - ], - groupBy: 'connectionId', emptyMessage: `No semantic-layer sources found in ${project.projectDir}`, command: 'sl list', - mode, + output: args.output, + json: args.json, io, }); return 0; } - if (args.command === 'read') { - const source = await readLocalSlSource(project, { + if (args.command === 'search') { + const sources = await searchLocalSlSources(project, { connectionId: args.connectionId, - sourceName: args.sourceName, + query: args.query, + embeddingService: slSearchEmbeddingService(project, deps), + limit: args.limit, + }); + await printSlSources({ + rows: sources, + emptyMessage: `No semantic-layer sources matched "${args.query}" in ${project.projectDir}`, + command: 'sl search', + output: args.output, + json: args.json, + io, }); - if (!source) { - throw new Error(`Semantic-layer source "${args.connectionId}/${args.sourceName}" was not found`); - } - if (args.json) { - writeJsonResult(io, { - kind: 'sl.source', - data: source, - meta: { command: 'sl read' }, - }); - return 0; - } - io.stdout.write(source.yaml); return 0; } if (args.command === 'validate') { @@ -178,14 +195,8 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx io.stdout.write(`${JSON.stringify(result, null, 2)}\n`); return 0; } - - const write = await writeLocalSlSource(project, { - connectionId: args.connectionId, - sourceName: args.sourceName, - yaml: args.yaml, - }); - io.stdout.write(`Wrote ${write.path}\n`); - return 0; + const _exhaustive: never = args; + throw new Error(`Unsupported sl command: ${JSON.stringify(_exhaustive)}`); } catch (error) { io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 793566ed..62a25cf7 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -154,10 +154,9 @@ describe('standalone example docs', () => { for (const command of [ 'ktx status --json', 'ktx sl list --json', - 'ktx sl read orders --json', + 'ktx sl search "revenue" --json', 'ktx sl query --json', 'ktx wiki search "revenue recognition" --json', - 'ktx wiki read order-status-definitions --json', ]) { assert.match(servingAgents, new RegExp(command.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))); } diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index ab8d7adf..7e184dde 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -729,23 +729,22 @@ try { 'exec', 'ktx', 'sl', - 'list', + 'search', + 'orders', '--json', '--connection-id', 'warehouse', - '--query', - 'orders', '--project-dir', projectDir, ]); - const slSearchJson = parseJsonResult('ktx sl list', slSearch); + const slSearchJson = parseJsonResult('ktx sl search', slSearch); assert.equal(slSearchJson.kind, 'list'); assert.equal(slSearchJson.data.items.length, 1); assert.equal(slSearchJson.data.items[0].connectionId, 'warehouse'); assert.equal(slSearchJson.data.items[0].name, 'orders'); assert.equal(typeof slSearchJson.data.items[0].score, 'number'); requireIncludes(slSearchJson.data.items[0].matchReasons, 'lexical', 'sl search match reasons'); - process.stdout.write('ktx sl list hybrid metadata verified\\n'); + process.stdout.write('ktx sl search hybrid metadata verified\\n'); const slQuery = await run('pnpm', ['exec', 'ktx', 'sl', 'query', '--connection-id', diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index 9fe5a2c1..7694ddc3 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -459,7 +459,7 @@ describe('verification snippets', () => { assert.match(source, /knowledge', 'global', 'revenue\.md'/); assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'wiki',\s*'search'/); assert.match(source, /semantic-layer', 'warehouse', 'orders\.yaml'/); - assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'sl',\s*'list'/); + assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'sl',\s*'search',\s*'orders'/); assert.match(source, /orders\.order_count/); assert.match(source, /node:sqlite/); assert.match(source, /driver: sqlite/); From e3534252665d69267520c9fb8765719449e2d3cd Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 15:49:22 +0200 Subject: [PATCH 11/15] fix(cli): show project dir in context build step (#67) --- packages/cli/src/context-build-view.test.ts | 10 ++++++++++ packages/cli/src/context-build-view.ts | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts index 647357a7..3df1f6d7 100644 --- a/packages/cli/src/context-build-view.test.ts +++ b/packages/cli/src/context-build-view.test.ts @@ -168,6 +168,15 @@ describe('renderContextBuildView', () => { expect(output).toContain('(0/1 · 1m05s)'); }); + it('renders project directory when provided', () => { + const state = initViewState([ + { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + ]); + + const output = renderContextBuildView(state, { styled: false, projectDir: '/tmp/project' }); + expect(output).toContain('Project: /tmp/project'); + }); + it('renders dynamic separator matching header width', () => { const state = initViewState([ { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, @@ -448,6 +457,7 @@ describe('runContextBuild', () => { const output = io.stdout(); expect(output).toContain('Building KTX context'); + expect(output).toContain('Project: /tmp/project'); expect(output).toContain('Primary sources:'); expect(output).toContain('warehouse'); expect(output).toContain('Context sources:'); diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index 7457f9b5..e021b144 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -204,6 +204,7 @@ export function renderContextBuildView( '', header, separator, + ...(options.projectDir ? [` Project: ${options.projectDir}`] : []), ...renderTargetGroup('Primary sources', state.primarySources, state.frame, styled, width), ...renderTargetGroup('Context sources', state.contextSources, state.frame, styled, width), '', @@ -684,7 +685,7 @@ export async function runContextBuild( } if (!repainter) { - io.stdout.write(renderContextBuildView(state, { styled: false })); + io.stdout.write(renderContextBuildView(state, { ...viewOpts, styled: false })); } else { paint(false); } From 97da9919e9f8c1f761d09017027b4563c8c58441 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 15:55:00 +0200 Subject: [PATCH 12/15] refactor: remove legacy compatibility paths (#64) * refactor: remove legacy compatibility paths * fix: support legacy metabase native queries * test: use canonical semantic layer descriptions * Rename CLI description * Recover setup scan from SQLite ABI mismatch * Remove legacy product name from CLI help --- .../semantic-layer/warehouse/orders.yaml | 3 +- .../global/orbit-customers-source.md | 2 +- .../global/sales-ops-cs-handoff-process.md | 2 +- packages/cli/src/cli-program.ts | 2 +- packages/cli/src/commands/setup-commands.ts | 12 +- packages/cli/src/index.test.ts | 4 +- packages/cli/src/ingest.test-utils.ts | 12 +- packages/cli/src/ingest.test.ts | 3 +- packages/cli/src/local-adapters.ts | 2 - .../cli/src/local-scan-connectors.test.ts | 9 +- packages/cli/src/local-scan-connectors.ts | 2 +- packages/cli/src/next-steps.test.ts | 25 ---- packages/cli/src/public-ingest.ts | 2 +- packages/cli/src/setup-databases.test.ts | 62 ++++++++- packages/cli/src/setup-databases.ts | 121 ++++++++++++++++-- packages/cli/src/setup-sources.ts | 10 +- packages/cli/src/setup.ts | 2 - packages/cli/src/sl.test.ts | 2 +- .../memory_agent_bundle_ingest_reconcile.md | 4 +- .../context/skills/ingest_triage/SKILL.md | 4 +- .../context/skills/metabase_ingest/SKILL.md | 2 +- .../context/skills/metricflow_ingest/SKILL.md | 6 +- packages/context/skills/sl/SKILL.md | 3 +- packages/context/skills/sl_capture/SKILL.md | 11 +- .../historic-sql/historic-sql.adapter.test.ts | 4 +- .../historic-sql/historic-sql.adapter.ts | 7 - .../adapters/historic-sql/projection.test.ts | 17 ++- .../adapters/historic-sql/projection.ts | 14 +- .../adapters/historic-sql/types.test.ts | 12 +- .../src/ingest/adapters/historic-sql/types.ts | 24 +--- .../adapters/looker/local-looker.adapter.ts | 8 +- .../adapters/metabase/client-port.test.ts | 11 +- .../ingest/adapters/metabase/client-port.ts | 2 +- .../ingest/adapters/metabase/client.test.ts | 41 +++++- .../src/ingest/adapters/metabase/client.ts | 30 +---- .../metabase/local-metabase.adapter.test.ts | 13 -- .../metabase/local-metabase.adapter.ts | 6 +- .../metricflow/import-semantic-models.ts | 31 ----- .../src/ingest/ingest-bundle.runner.test.ts | 3 +- .../context/src/ingest/local-adapters.test.ts | 12 +- packages/context/src/ingest/local-adapters.ts | 24 ++-- .../src/ingest/report-snapshot.test.ts | 9 +- .../context/src/ingest/report-snapshot.ts | 25 +--- packages/context/src/ingest/reports.ts | 3 +- .../src/ingest/stages/stage-index.types.ts | 2 +- .../tools/emit-eviction-decision.tool.ts | 2 +- .../emit-reconciliation-records.tool.test.ts | 8 +- .../src/ingest/tools/eviction-list.tool.ts | 2 +- .../warehouse-catalog.service.ts | 16 +-- .../src/ingest/wiki-sl-ref-repair.test.ts | 2 +- .../src/mcp/local-project-ports.test.ts | 3 +- .../context/src/memory/capture-signals.ts | 3 +- packages/context/src/memory/types.ts | 2 +- packages/context/src/project/config.test.ts | 38 +++--- packages/context/src/project/config.ts | 39 ++---- .../src/scan/relationship-benchmarks.test.ts | 4 +- .../src/scan/relationship-diagnostics.test.ts | 2 +- .../src/search/backend-conformance.test.ts | 3 +- .../src/sl/description-normalization.ts | 5 - packages/context/src/sl/local-sl.test.ts | 6 +- .../src/sl/pglite-sl-search-prototype.test.ts | 9 +- packages/context/src/sl/schemas.ts | 6 +- .../src/sl/semantic-layer.service.test.ts | 12 +- .../context/src/sl/semantic-layer.service.ts | 42 +----- .../src/sl/tools/sl-edit-source.tool.ts | 2 +- .../src/sl/tools/sl-warehouse-validation.ts | 2 +- .../src/sl/tools/sl-write-source.tool.test.ts | 6 +- .../src/sl/tools/sl-write-source.tool.ts | 2 +- packages/context/src/tools/tool-session.ts | 2 +- .../context/src/wiki/local-knowledge.test.ts | 25 ---- .../data.sqlite | Bin .../expected-links.yaml | 0 .../fixture.yaml | 4 +- .../snapshot.json | 2 +- python/ktx-sl/semantic_layer/loader.py | 11 +- python/ktx-sl/semantic_layer/manifest.py | 47 ++----- .../ktx-sl/sources/b2b_saas/churn_risk.yaml | 13 +- .../ktx-sl/sources/ecommerce/churn_risk.yaml | 7 +- python/ktx-sl/tests/test_manifest.py | 20 +-- ...d-evidence-fusion-adversarial-fixtures.mjs | 8 +- scripts/check-boundaries.mjs | 6 +- scripts/check-boundaries.test.mjs | 2 +- scripts/examples-docs.test.mjs | 1 - 83 files changed, 442 insertions(+), 527 deletions(-) rename packages/context/test/fixtures/relationship-benchmarks/{abbreviated_legacy_no_declared_constraints => abbreviated_old_no_declared_constraints}/data.sqlite (100%) rename packages/context/test/fixtures/relationship-benchmarks/{abbreviated_legacy_no_declared_constraints => abbreviated_old_no_declared_constraints}/expected-links.yaml (100%) rename packages/context/test/fixtures/relationship-benchmarks/{abbreviated_legacy_no_declared_constraints => abbreviated_old_no_declared_constraints}/fixture.yaml (50%) rename packages/context/test/fixtures/relationship-benchmarks/{abbreviated_legacy_no_declared_constraints => abbreviated_old_no_declared_constraints}/snapshot.json (98%) diff --git a/examples/local-warehouse/semantic-layer/warehouse/orders.yaml b/examples/local-warehouse/semantic-layer/warehouse/orders.yaml index ffcca12b..8ffbe973 100644 --- a/examples/local-warehouse/semantic-layer/warehouse/orders.yaml +++ b/examples/local-warehouse/semantic-layer/warehouse/orders.yaml @@ -1,6 +1,7 @@ name: orders table: public.orders -description: Orders placed through the storefront. +descriptions: + user: Orders placed through the storefront. grain: - id columns: diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md b/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md index e98c1663..2c9f2c65 100644 --- a/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md +++ b/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md @@ -57,4 +57,4 @@ Always join through `customer.id`. Do not join on `email`. - **Join key:** Always use `customer.id`, never `email`. - **Timezone:** `created_at` and `last_seen_at` are UTC. Confirm whether a question expects UTC or a local business day before filtering. - **Paying vs. all:** `free` customers must be excluded from paying-customer follow-ups. Use `paying_customer_count`, not `customer_count`. -- **plan_tier values:** `free`, `pro`, `enterprise`. Note: `pro_plus` is a legacy alias for `growth` in the account/contract layer (see `orbit-plan-segment-normalization`), but `plan_tier` on this table uses `pro` not `pro_plus`. +- **plan_tier values:** `free`, `pro`, `enterprise`. Note: use the canonical plan names from the account/contract layer (see `orbit-plan-segment-normalization`); `plan_tier` on this table uses `pro` rather than `growth`. diff --git a/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md b/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md index d547d026..65693ee6 100644 --- a/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md +++ b/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md @@ -27,7 +27,7 @@ Sales Ops must complete the handoff **before the first implementation call**. Cu | Field | Notes | |---|---| -| Current plan | Starter / Growth / Enterprise — use canonical plan name, not legacy aliases | +| Current plan | Starter / Growth / Enterprise — use canonical plan name | | Account segment | self_serve / commercial / enterprise (see `orbit-plan-segment-normalization`) | | Contract shape | Term, ARR, any discounts or custom terms | | Renewal contact | Named person on the customer side responsible for renewal | diff --git a/packages/cli/src/cli-program.ts b/packages/cli/src/cli-program.ts index dbe73a72..69437aec 100644 --- a/packages/cli/src/cli-program.ts +++ b/packages/cli/src/cli-program.ts @@ -216,7 +216,7 @@ export function resolveCommandProjectDirOverride(command: CommandWithGlobalOptio function createBaseProgram(info: KtxCliPackageInfo, io: KtxCliIo): Command { return new Command() .name('ktx') - .description('Standalone KTX developer CLI') + .description('KTX data agent context layer CLI') .option('--project-dir ', 'KTX project directory (default: KTX_PROJECT_DIR, nearest ktx.yaml, or cwd)') .option('--debug', 'Enable diagnostic logging to stderr') .version(`${info.name} ${info.version}`, '-v, --version', 'Show CLI version') diff --git a/packages/cli/src/commands/setup-commands.ts b/packages/cli/src/commands/setup-commands.ts index 6a215651..1688724d 100644 --- a/packages/cli/src/commands/setup-commands.ts +++ b/packages/cli/src/commands/setup-commands.ts @@ -121,7 +121,6 @@ function shouldShowSetupEntryMenu( disableHistoricSql?: boolean; historicSqlWindowDays?: number; historicSqlMinExecutions?: number; - historicSqlMinCalls?: number; historicSqlServiceAccountPattern?: string[]; historicSqlRedactionPattern?: string[]; skipDatabases?: boolean; @@ -194,7 +193,6 @@ function shouldShowSetupEntryMenu( 'disableHistoricSql', 'historicSqlWindowDays', 'historicSqlMinExecutions', - 'historicSqlMinCalls', 'skipDatabases', 'source', 'sourceConnectionId', @@ -283,11 +281,6 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo .option('--disable-historic-sql', 'Disable Historic SQL for the selected database', false) .option('--historic-sql-window-days ', 'Historic SQL query-history window', positiveInteger) .option('--historic-sql-min-executions ', 'Minimum Historic SQL executions for a template', positiveInteger) - .option( - '--historic-sql-min-calls ', - 'Alias for --historic-sql-min-executions', - positiveInteger, - ) .option( '--historic-sql-service-account-pattern ', 'Historic SQL service-account regex; repeatable', @@ -379,7 +372,6 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo const mode = options.new ? 'new' : options.existing ? 'existing' : 'auto'; const resolvedAgentScope = options.global ? 'global' : options.agentScope; - const historicSqlMinExecutions = options.historicSqlMinExecutions ?? options.historicSqlMinCalls; await runSetupArgs(context, { command: 'run', projectDir: resolveCommandProjectDir(command), @@ -410,7 +402,9 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo ...(options.enableHistoricSql ? { enableHistoricSql: true } : {}), ...(options.disableHistoricSql ? { disableHistoricSql: true } : {}), ...(options.historicSqlWindowDays !== undefined ? { historicSqlWindowDays: options.historicSqlWindowDays } : {}), - ...(historicSqlMinExecutions !== undefined ? { historicSqlMinExecutions } : {}), + ...(options.historicSqlMinExecutions !== undefined + ? { historicSqlMinExecutions: options.historicSqlMinExecutions } + : {}), ...(options.historicSqlServiceAccountPattern.length > 0 ? { historicSqlServiceAccountPatterns: options.historicSqlServiceAccountPattern } : {}), diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index 817653f6..f41f4b6a 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -123,12 +123,12 @@ describe('runKtxCli', () => { await expect(runKtxCli(['--help'], testIo.io)).resolves.toBe(0); expect(testIo.stdout()).toContain('Usage: ktx [options] [command]'); + expect(testIo.stdout()).toContain('KTX data agent context layer CLI'); for (const command of ['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'scan']) { expect(testIo.stdout()).toContain(`${command}`); } for (const removed of ['demo', 'init', 'connect', 'ask', 'knowledge', 'agent', 'completion', 'serve']) { - expect(testIo.stdout()).not.toContain(`${removed} [`); - expect(testIo.stdout()).not.toContain(`${removed} `); + expect(testIo.stdout()).not.toMatch(new RegExp(`^\\s+${removed}(?:\\s|\\[|$)`, 'm')); } expect(testIo.stdout()).toContain('--project-dir '); expect(testIo.stdout()).toContain('KTX_PROJECT_DIR'); diff --git a/packages/cli/src/ingest.test-utils.ts b/packages/cli/src/ingest.test-utils.ts index 3596d215..73190b0d 100644 --- a/packages/cli/src/ingest.test-utils.ts +++ b/packages/cli/src/ingest.test-utils.ts @@ -376,7 +376,7 @@ const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ collection_id: 12, archived: false, result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 101 as id' } }, + dataset_query: { type: 'native', database: 1, stages: [{ 'lib/type': 'mbql.stage/native', native: 'select 101 as id' }] }, parameters: [], dashboard_count: 0, }, @@ -390,7 +390,7 @@ const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ collection_id: 12, archived: false, result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 102 as id' } }, + dataset_query: { type: 'native', database: 1, stages: [{ 'lib/type': 'mbql.stage/native', native: 'select 102 as id' }] }, parameters: [], dashboard_count: 0, }, @@ -404,7 +404,7 @@ const SYNC_MODE_METABASE_CARDS: MetabaseCard[] = [ collection_id: 13, archived: false, result_metadata: [], - dataset_query: { type: 'native', database: 1, native: { query: 'select 103 as id' } }, + dataset_query: { type: 'native', database: 1, stages: [{ 'lib/type': 'mbql.stage/native', native: 'select 103 as id' }] }, parameters: [], dashboard_count: 0, }, @@ -454,11 +454,11 @@ function createSyncModeMetabaseClient(): MetabaseRuntimeClient { }, getAllCards: async () => SYNC_MODE_METABASE_CARDS.map(metabaseCardSummary), convertMbqlToNative: async () => ({ query: 'select 1' }), - getNativeSql: (card) => card.dataset_query?.native?.query ?? null, + getNativeSql: (card) => card.dataset_query?.stages?.[0]?.native ?? null, getTemplateTags: () => ({}), - getCardSql: async (card) => card.dataset_query?.native?.query ?? null, + getCardSql: async (card) => card.dataset_query?.stages?.[0]?.native ?? null, getResolvedSql: async (card) => ({ - resolvedSql: card.dataset_query?.native?.query ?? `select ${card.id} as id`, + resolvedSql: card.dataset_query?.stages?.[0]?.native ?? `select ${card.id} as id`, templateTags: [], resolutionStatus: 'resolved', }), diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 24f8c1ca..c847d53a 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -705,7 +705,6 @@ describe('runKtxIngest', () => { patternPagesWritten: 30, stalePatternPagesMarked: 2, archivedPatternPages: 3, - legacyPagesDeleted: 4, }, errors: [], warnings: [], @@ -739,7 +738,7 @@ describe('runKtxIngest', () => { expect(io.stderr()).toBe(''); expect(io.stdout()).toContain('Adapter: historic-sql\n'); - expect(io.stdout()).toContain('Saved memory: 39 wiki, 57 SL\n'); + expect(io.stdout()).toContain('Saved memory: 35 wiki, 57 SL\n'); }); it('returns a non-zero code when local ingest reports failed work units', async () => { diff --git a/packages/cli/src/local-adapters.ts b/packages/cli/src/local-adapters.ts index 8557674c..9a6915c2 100644 --- a/packages/cli/src/local-adapters.ts +++ b/packages/cli/src/local-adapters.ts @@ -1,4 +1,3 @@ -import { join } from 'node:path'; import { createBigQueryLiveDatabaseIntrospection, isKtxBigQueryConnectionConfig, @@ -298,7 +297,6 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli const base = { sqlAnalysis: ktxCliHistoricSqlAnalysis(options), - postgresBaselineRootDir: join(project.projectDir, '.ktx/cache/historic-sql'), }; if (dialect === 'postgres') { diff --git a/packages/cli/src/local-scan-connectors.test.ts b/packages/cli/src/local-scan-connectors.test.ts index 0fe57518..087e978d 100644 --- a/packages/cli/src/local-scan-connectors.test.ts +++ b/packages/cli/src/local-scan-connectors.test.ts @@ -62,10 +62,7 @@ describe('createKtxCliScanConnector', () => { expect(connector.driver).toBe('sqlite'); }); - it.each([ - ['maxBytesBilled', ' maxBytesBilled: 123456789', 123456789], - ['max_bytes_billed', ' max_bytes_billed: "987654321"', '987654321'], - ])('passes BigQuery %s from standalone config', async (_label, byteCapLine, expectedMaxBytesBilled) => { + it('passes BigQuery max_bytes_billed from standalone config', async () => { await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); await writeFile( join(tempDir, 'ktx.yaml'), @@ -76,7 +73,7 @@ describe('createKtxCliScanConnector', () => { ' driver: bigquery', ' dataset_id: analytics', ' readonly: true', - byteCapLine, + ' max_bytes_billed: "987654321"', '', ].join('\n'), 'utf-8', @@ -90,7 +87,7 @@ describe('createKtxCliScanConnector', () => { expect(bigQueryMock.constructorInputs).toEqual([ expect.objectContaining({ connectionId: 'warehouse', - maxBytesBilled: expectedMaxBytesBilled, + maxBytesBilled: '987654321', }), ]); }); diff --git a/packages/cli/src/local-scan-connectors.ts b/packages/cli/src/local-scan-connectors.ts index d3377b0c..3058b96e 100644 --- a/packages/cli/src/local-scan-connectors.ts +++ b/packages/cli/src/local-scan-connectors.ts @@ -6,7 +6,7 @@ const SUPPORTED_DRIVERS = 'sqlite, postgres, mysql, clickhouse, sqlserver, bigqu function bigQueryMaxBytesBilled( connection: KtxLocalProject['config']['connections'][string], ): number | string | undefined { - const raw = connection.maxBytesBilled ?? connection.max_bytes_billed; + const raw = connection.max_bytes_billed; if (typeof raw === 'number') { return Number.isFinite(raw) && raw > 0 ? raw : undefined; } diff --git a/packages/cli/src/next-steps.test.ts b/packages/cli/src/next-steps.test.ts index b4706d72..facb4eb8 100644 --- a/packages/cli/src/next-steps.test.ts +++ b/packages/cli/src/next-steps.test.ts @@ -6,8 +6,6 @@ import { formatSetupNextStepLines, } from './next-steps.js'; -const command = (...parts: string[]) => parts.join(' '); - describe('KTX demo next steps', () => { it('uses supported context-build commands before agent usage', () => { expect(KTX_CONTEXT_BUILD_COMMANDS).toEqual([ @@ -57,29 +55,6 @@ describe('KTX demo next steps', () => { expect(rendered).not.toContain('Optional MCP:'); }); - it('does not advertise removed Commander migration commands', () => { - const rendered = formatNextStepLines().join('\n'); - - expect(rendered).toContain('ktx status --json'); - expect(rendered).not.toContain('ktx agent'); - expect(rendered).toContain('ktx sl list'); - expect(rendered).toContain('ktx wiki list'); - - for (const removed of [ - command('ktx', 'ask'), - command('ktx', 'mcp'), - command('ktx', 'connect'), - command('ktx', 'knowledge'), - command('dev', 'model'), - command('dev', 'knowledge'), - command('ktx', 'ingest', 'run'), - command('ktx', 'ingest', 'replay'), - command('ktx', 'serve', '--mcp', 'stdio', '--user-id', 'local'), - ]) { - expect(rendered).not.toContain(removed); - } - }); - it('keeps setup next steps focused on building context when the build is not ready', () => { const rendered = formatSetupNextStepLines({ setupReady: true, diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index f8296177..71d93e4a 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -92,7 +92,7 @@ function normalizedDriver(connection: KtxProjectConnectionConfig): string { } function sourceDirForConnection(connection: KtxProjectConnectionConfig): string | undefined { - const value = connection.source_dir ?? connection.sourceDir; + const value = connection.source_dir; return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined; } diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 2999d365..95d1e3fb 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -64,8 +64,6 @@ function textInputPrompt(message: string): string { return `${title}\n│\n│ ${bodyLines.join('\n│ ')}\n│ Press Escape to go back.\n│`; } -const legacyHistoricSqlServiceAccountPatternsKey = ['serviceAccount', 'UserPatterns'].join(''); - describe('setup databases step', () => { let tempDir: string; @@ -1255,6 +1253,7 @@ describe('setup databases step', () => { io.io, { testConnection: vi.fn(async () => 0), + rebuildNativeSqlite: vi.fn(async () => 1), scanConnection: vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KtxCliIo) => { commandIo.stderr.write( [ @@ -1280,6 +1279,60 @@ describe('setup databases step', () => { expect(io.stderr()).not.toMatch(/^Native SQLite is built for a different Node.js ABI\./m); }); + it('rebuilds native SQLite once and retries setup scanning after a Node ABI mismatch', async () => { + const io = makeIo(); + const scanConnection = vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KtxCliIo) => { + if (scanConnection.mock.calls.length === 1) { + commandIo.stderr.write( + [ + "The module '/workspace/node_modules/better-sqlite3/build/Release/better_sqlite3.node'", + 'was compiled against a different Node.js version using', + 'NODE_MODULE_VERSION 147. This version of Node.js requires', + 'NODE_MODULE_VERSION 137. Please try re-compiling or re-installing', + 'the module (for instance, using `npm rebuild` or `npm install`).', + '', + ].join('\n'), + ); + return 1; + } + + commandIo.stdout.write('What changed\n'); + commandIo.stdout.write(' Semantic layer comparison found 0 changes across 56 tables\n'); + commandIo.stdout.write(' New tables: 0\n'); + commandIo.stdout.write(' Changed tables: 0\n'); + commandIo.stdout.write(' Removed tables: 0\n'); + commandIo.stdout.write(' Unchanged tables: 56\n'); + return 0; + }); + const rebuildNativeSqlite = vi.fn(async () => 0); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection, + rebuildNativeSqlite, + }, + ); + + expect(result.status).toBe('ready'); + expect(rebuildNativeSqlite).toHaveBeenCalledOnce(); + expect(rebuildNativeSqlite).toHaveBeenCalledWith(expect.anything()); + expect(scanConnection).toHaveBeenCalledTimes(2); + expect(io.stderr()).toContain('Native SQLite is built for a different Node.js ABI.'); + expect(io.stderr()).toContain('Rebuilding Native SQLite with pnpm run native:rebuild…'); + expect(io.stdout()).toContain('◇ Scan complete for warehouse'); + }); + it('writes Historic SQL config for supported Snowflake databases after validation succeeds', async () => { const io = makeIo(); const result = await runKtxSetupDatabasesStep( @@ -1325,7 +1378,6 @@ describe('setup databases step', () => { redactionPatterns: ['(?i)secret'], }, }); - expect(config.connections.snowflake.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey); expect(config.ingest.adapters).toContain('historic-sql'); }); @@ -1373,10 +1425,8 @@ describe('setup databases step', () => { }, }, }); - expect(config.connections.warehouse.historicSql).not.toHaveProperty('minCalls'); expect(config.connections.warehouse.historicSql).not.toHaveProperty('windowDays'); expect(config.connections.warehouse.historicSql).not.toHaveProperty('redactionPatterns'); - expect(config.connections.warehouse.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey); expect(config.ingest.adapters).toContain('historic-sql'); expect(config.ingest.workUnits.maxConcurrency).toBe(6); expect(io.stdout()).toContain('Historic SQL probe...'); @@ -1430,7 +1480,6 @@ describe('setup databases step', () => { redactionPatterns: [], }, }); - expect(config.connections.analytics.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey); expect(config.ingest.adapters).toContain('historic-sql'); }); @@ -1480,7 +1529,6 @@ describe('setup databases step', () => { }, }, }); - expect(config.connections.warehouse.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey); }); it('prints a non-blocking Postgres Historic SQL probe failure after connection test succeeds', async () => { diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index 58ee61d9..f697dd75 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -1,4 +1,8 @@ -import { writeFile } from 'node:fs/promises'; +import { execFile as execFileCallback } from 'node:child_process'; +import { readFile, writeFile } from 'node:fs/promises'; +import { delimiter, dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { promisify } from 'node:util'; import { cancel, confirm, isCancel, multiselect, password, select, text } from '@clack/prompts'; import type { HistoricSqlDialect } from '@ktx/context/ingest'; import { @@ -17,6 +21,7 @@ import { withSetupInterruptConfirmation } from './setup-interrupt.js'; import { writeProjectLocalSecretReference } from './setup-secrets.js'; const HISTORIC_SQL_WORK_UNIT_MAX_CONCURRENCY = 6; +const execFileAsync = promisify(execFileCallback); export type KtxSetupDatabaseDriver = | 'sqlite' @@ -39,7 +44,6 @@ export interface KtxSetupDatabasesArgs { disableHistoricSql?: boolean; historicSqlWindowDays?: number; historicSqlMinExecutions?: number; - historicSqlMinCalls?: number; historicSqlServiceAccountPatterns?: string[]; historicSqlRedactionPatterns?: string[]; skipDatabases: boolean; @@ -82,6 +86,7 @@ export interface KtxSetupDatabasesDeps { prompts?: KtxSetupDatabasesPromptAdapter; testConnection?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise; scanConnection?: (projectDir: string, connectionId: string, io: KtxCliIo) => Promise; + rebuildNativeSqlite?: (io: KtxCliIo) => Promise; listSchemas?: (projectDir: string, connectionId: string) => Promise; listTables?: (projectDir: string, connectionId: string) => Promise; historicSqlProbe?: KtxSetupHistoricSqlProbe; @@ -856,14 +861,13 @@ async function maybeApplyHistoricSqlConfig(input: { dialect, filters: historicSqlFiltersForSetup(input.args.historicSqlServiceAccountPatterns), }; - delete common[['serviceAccount', 'UserPatterns'].join('')]; if (dialect === 'postgres') { return { ...input.connection, historicSql: { ...common, - minExecutions: input.args.historicSqlMinExecutions ?? input.args.historicSqlMinCalls ?? 5, + minExecutions: input.args.historicSqlMinExecutions ?? 5, }, }; } @@ -959,6 +963,81 @@ function writePrefixedLines(write: (chunk: string) => void, output: string): voi } } +function envWithCurrentNodeFirst(env: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEnv { + return { + ...env, + PATH: `${dirname(process.execPath)}${delimiter}${env.PATH ?? ''}`, + }; +} + +function errorTextProperty(error: unknown, property: 'stderr' | 'stdout'): string { + if (typeof error !== 'object' || error === null || !(property in error)) { + return ''; + } + const value = (error as Record)[property]; + return typeof value === 'string' ? value : ''; +} + +function commandFailureOutput(error: unknown): string { + const stderr = errorTextProperty(error, 'stderr'); + const stdout = errorTextProperty(error, 'stdout'); + const message = error instanceof Error ? error.message : String(error); + return [stderr.trim(), stdout.trim(), message.trim()].filter((line) => line.length > 0).join('\n'); +} + +type PackageJsonScriptStatus = 'has-script' | 'exists' | 'missing'; + +async function packageJsonScriptStatus( + packageJsonPath: string, + scriptName: string, +): Promise { + try { + const parsed = JSON.parse(await readFile(packageJsonPath, 'utf-8')) as unknown; + if (typeof parsed !== 'object' || parsed === null || !('scripts' in parsed)) { + return 'exists'; + } + const scripts = (parsed as { scripts?: unknown }).scripts; + return typeof scripts === 'object' && scripts !== null && scriptName in scripts ? 'has-script' : 'exists'; + } catch { + return 'missing'; + } +} + +async function nativeSqliteRebuildCommand(): Promise<{ cwd: string; args: string[] }> { + let dir = dirname(fileURLToPath(import.meta.url)); + let packageRoot: string | undefined; + while (true) { + const status = await packageJsonScriptStatus(join(dir, 'package.json'), 'native:rebuild'); + if (status === 'has-script') { + return { cwd: dir, args: ['run', 'native:rebuild'] }; + } + if (status === 'exists') { + packageRoot ??= dir; + } + + const parent = dirname(dir); + if (parent === dir) { + return { cwd: packageRoot ?? process.cwd(), args: ['rebuild', 'better-sqlite3'] }; + } + dir = parent; + } +} + +async function defaultRebuildNativeSqlite(io: KtxCliIo): Promise { + const command = await nativeSqliteRebuildCommand(); + try { + await execFileAsync('pnpm', command.args, { + cwd: command.cwd, + env: envWithCurrentNodeFirst(), + maxBuffer: 1024 * 1024 * 16, + }); + return 0; + } catch (error) { + writePrefixedLines((chunk) => io.stderr.write(chunk), commandFailureOutput(error)); + return typeof (error as { code?: unknown })?.code === 'number' ? (error as { code: number }).code : 1; + } +} + function flushPrefixedBufferedCommandOutput(io: KtxCliIo, bufferedIo: BufferedCommandIo): void { writePrefixedLines((chunk) => io.stdout.write(chunk), bufferedIo.stdoutText()); writePrefixedLines((chunk) => io.stderr.write(chunk), bufferedIo.stderrText()); @@ -1472,8 +1551,8 @@ async function validateAndScanConnection(input: { writeSetupSection(input.io, `Scanning ${input.connectionId}`, [ 'Running structural scan…', ]); - const scanIo = createBufferedCommandIo(); - const scanCode = await scanConnection(input.projectDir, input.connectionId, scanIo); + let scanIo = createBufferedCommandIo(); + let scanCode = await scanConnection(input.projectDir, input.connectionId, scanIo); if (scanCode !== 0) { const nativeSqliteDetail = nativeSqliteAbiMismatchDetail(`${scanIo.stderrText()}\n${scanIo.stdoutText()}`); if (nativeSqliteDetail) { @@ -1483,10 +1562,32 @@ async function validateAndScanConnection(input: { `Structural scan failed for ${input.connectionId}.`, 'Native SQLite is built for a different Node.js ABI.', `Detail: ${nativeSqliteDetail}`, - 'Fix: pnpm run native:rebuild', - `Retry: ktx scan --project-dir ${input.projectDir} ${input.connectionId}`, + 'Rebuilding Native SQLite with pnpm run native:rebuild…', ].join('\n'), ); + const rebuildNativeSqlite = input.deps.rebuildNativeSqlite ?? defaultRebuildNativeSqlite; + const rebuildCode = await rebuildNativeSqlite(input.io); + if (rebuildCode === 0) { + writePrefixedLines( + (chunk) => input.io.stderr.write(chunk), + 'Native SQLite rebuild complete. Retrying structural scan…', + ); + const retryScanIo = createBufferedCommandIo(); + scanCode = await scanConnection(input.projectDir, input.connectionId, retryScanIo); + scanIo = retryScanIo; + } + if (scanCode !== 0) { + writePrefixedLines( + (chunk) => input.io.stderr.write(chunk), + [ + rebuildCode === 0 + ? `Structural scan still failed for ${input.connectionId} after rebuilding Native SQLite.` + : `Native SQLite rebuild failed for ${input.connectionId}.`, + 'Fix: pnpm run native:rebuild', + `Retry: ktx scan --project-dir ${input.projectDir} ${input.connectionId}`, + ].join('\n'), + ); + } } else { flushPrefixedBufferedCommandOutput(input.io, scanIo); writePrefixedLines( @@ -1497,7 +1598,9 @@ async function validateAndScanConnection(input: { ].join('\n'), ); } - return false; + if (scanCode !== 0) { + return false; + } } const scanOutput = scanIo.stdoutText(); const reportPath = readOutputValue(scanOutput, 'Report'); diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index 313dfbe0..6ab71106 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -544,8 +544,8 @@ function sourcePathFromFileRepoUrl(repoUrl: string, subpath?: string): string { } function repoAuthToken(connection: KtxProjectConnectionConfig | Record): string | null { - const ref = stringField(connection.auth_token_ref) ?? stringField(connection.authTokenRef); - const literal = stringField(connection.authToken) ?? stringField(connection.auth_token); + const ref = stringField(connection.auth_token_ref); + const literal = stringField(connection.auth_token); return literal ?? resolveKtxConfigReference(ref, process.env) ?? null; } @@ -563,8 +563,8 @@ async function collectYamlFilesRecursive(sourceRoot: string): Promise { - let sourceDir = stringField(connection.source_dir) ?? stringField(connection.sourceDir); - const repoUrl = stringField(connection.repo_url) ?? stringField(connection.repoUrl); + let sourceDir = stringField(connection.source_dir); + const repoUrl = stringField(connection.repo_url); if (!sourceDir && repoUrl?.startsWith('file:')) { sourceDir = sourcePathFromFileRepoUrl(repoUrl, stringField(connection.path)); } @@ -624,7 +624,7 @@ async function defaultValidateLooker(projectDir: string, connectionId: string): } async function defaultValidateLookml(connection: KtxProjectConnectionConfig): Promise { - const repoUrl = stringField(connection.repoUrl) ?? stringField(connection.repo_url); + const repoUrl = stringField(connection.repoUrl); if (!repoUrl) { return { ok: false, message: 'LookML setup requires repoUrl.' }; } diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 1ab48f0b..0dc0d7cd 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -90,7 +90,6 @@ export type KtxSetupArgs = disableHistoricSql?: boolean; historicSqlWindowDays?: number; historicSqlMinExecutions?: number; - historicSqlMinCalls?: number; historicSqlServiceAccountPatterns?: string[]; historicSqlRedactionPatterns?: string[]; skipDatabases: boolean; @@ -636,7 +635,6 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup ...(args.historicSqlMinExecutions !== undefined ? { historicSqlMinExecutions: args.historicSqlMinExecutions } : {}), - ...(args.historicSqlMinCalls !== undefined ? { historicSqlMinCalls: args.historicSqlMinCalls } : {}), ...(args.historicSqlServiceAccountPatterns ? { historicSqlServiceAccountPatterns: args.historicSqlServiceAccountPatterns } : {}), diff --git a/packages/cli/src/sl.test.ts b/packages/cli/src/sl.test.ts index 48c7f4c7..ff4132b4 100644 --- a/packages/cli/src/sl.test.ts +++ b/packages/cli/src/sl.test.ts @@ -129,7 +129,7 @@ describe('runKtxSl', () => { connectionId: 'warehouse', name: 'orders', score: expect.any(Number), - matchReasons: expect.arrayContaining(['token']), + matchReasons: expect.any(Array), }), ], }, diff --git a/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md b/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md index 5d2316fd..30b52537 100644 --- a/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md +++ b/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md @@ -1,5 +1,5 @@ -You are the reconciliation agent for a multi-file ingest bundle. Stage 3 WorkUnits have already run against this job's session worktree; your input is the deterministic Stage Index listing every write each WU made, plus an Eviction Set listing raw files present in the prior sync but absent in this one. Your job is to (a) decide what happens to each evicted artifact (remove vs retain with a deprecation marker), (b) sweep the Stage Index for any cross-WU conflicts the individual WUs missed, and (c) emit conflict + eviction records that the runner will fold into the final IngestReport. +You are the reconciliation agent for a multi-file ingest bundle. Stage 3 WorkUnits have already run against this job's session worktree; your input is the deterministic Stage Index listing every write each WU made, plus an Eviction Set listing raw files present in the prior sync but absent in this one. Your job is to (a) remove artifacts produced by deleted raw files, (b) sweep the Stage Index for any cross-WU conflicts the individual WUs missed, and (c) emit conflict + eviction records that the runner will fold into the final IngestReport. @@ -12,7 +12,7 @@ Parsimonious. Stage 3 WUs already loaded `ingest_triage` and handled conflicts t 3. If the system prompt includes ``, apply those pins before flagging a same-name or near-duplicate conflict. A pinned `canonicalArtifactKey` keeps the contested name when it is present in the Stage Index; competing variants keep or receive disambiguated names. 4. Sweep both exact-key conflicts and near-duplicate writes. Compare WUs that wrote overlapping SL source names, overlapping wiki keys, the same `tables:` or `sl_refs:` action details, or obviously equivalent topic titles under different wiki keys. Call `stage_diff` to see the actual difference, and use `wiki_read`/`sl_read_source` when two different keys appear to describe the same table, metric, or source-of-truth mapping. If they're the same content, leave one canonical artifact and record the duplicate as subsumed. If they differ per `ingest_triage` rules, apply the correct resolution (rename + capture; election of canonical; silent replace for expression-only re-ingest change; or pinned canonical), then call `emit_conflict_resolution` with the artifact key and decision. 5. For any `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call you make during reconciliation, include `rawPaths` with only the raw paths that directly caused that reconciliation action. -6. Call `eviction_list()` for deleted raw paths. For each eviction: if inbound refs are empty, remove the artifact (`sl_delete`, `wiki_remove`) and include that evicted raw path in `rawPaths`; if inbound refs exist, retain with a deprecation marker and include that evicted raw path in `rawPaths`. Then call `emit_eviction_decision` for every removed or retained artifact. +6. Call `eviction_list()` for deleted raw paths. For each listed artifact, remove it (`sl_delete`, `wiki_remove`) and include the evicted raw path in `rawPaths`. Then call `emit_eviction_decision` with `action: "removed"` for every removed artifact. 7. If the Stage 4 sweep discovers a raw file whose only honest outcome is standalone SQL, wiki-only capture, or a human flag, call `emit_unmapped_fallback` with the raw path, reason, and fallback kind. 8. Use `read_raw_span` to zoom into specific raw files when you need to resolve what two contested measures or wiki pages actually describe. 9. Exit when you've processed every item. diff --git a/packages/context/skills/ingest_triage/SKILL.md b/packages/context/skills/ingest_triage/SKILL.md index 1ac3d108..df13ed83 100644 --- a/packages/context/skills/ingest_triage/SKILL.md +++ b/packages/context/skills/ingest_triage/SKILL.md @@ -32,8 +32,8 @@ Apply the rules below before every write that could collide with an existing art | Definitional contradiction | Same name, substantively different formulas (different aggregation, different filters, different columns) | **Rename + capture**: disambiguate ALL variants with suffix derived from the domain (`churn_risk_engagement_based`, `churn_risk_billing_based`) and write a unified wiki page listing every variant with provenance. The contested name does NOT land in the SL. **Always flag.** | 5. **Eviction (Stage 4 only)**: for each entry in `eviction_list()`: - - `inbound_refs: []` → remove the artifact (`sl_delete` for SL sources, `wiki_remove` for wiki pages). - - `inbound_refs: [...]` → retain the artifact, set `deprecated: true` on SL sources (via `sl_edit_source`), write a wiki note "origin file removed in ; preserved because referenced by: …". Flag in the IngestReport so the user can plan migration. + - Remove the artifact (`sl_delete` for SL sources, `wiki_remove` for wiki pages). + - Record the removal with `emit_eviction_decision` and `action: "removed"`. ## Why same-ingest vs re-ingest differs diff --git a/packages/context/skills/metabase_ingest/SKILL.md b/packages/context/skills/metabase_ingest/SKILL.md index f5aa00e2..d35166dc 100644 --- a/packages/context/skills/metabase_ingest/SKILL.md +++ b/packages/context/skills/metabase_ingest/SKILL.md @@ -98,7 +98,7 @@ measures: expr: "" ``` -Overlay shape: `name:` plus any of `measures:`, `segments:`, `description:`, `joins:`, `disable_joins:`. Never include `sql:`, `table:`, `grain:`, or `columns:` on a manifest-backed name — those would shadow the manifest's schema and drop its joins. Overlay `joins:` are merged additively with the manifest's joins (deduped by `to` + `on`); use `disable_joins: [""]` to suppress a specific manifest join. After the overlay exists, use `sl_edit_source` for further tweaks. See `sl_capture` skill for the canonical overlay rule. +Overlay shape: `name:` plus any of `measures:`, `segments:`, `descriptions:`, `joins:`, `disable_joins:`. Never include `sql:`, `table:`, `grain:`, or `columns:` on a manifest-backed name — those would shadow the manifest's schema and drop its joins. Overlay `joins:` are merged additively with the manifest's joins (deduped by `to` + `on`); use `disable_joins: [""]` to suppress a specific manifest join. After the overlay exists, use `sl_edit_source` for further tweaks. See `sl_capture` skill for the canonical overlay rule. **Join discovery:** When your card's SQL references warehouse tables (e.g. in `FROM` or `JOIN` clauses), call `sl_discover({ query: '
' })` before writing. The matching manifest entry's `name` is the value you use in `joins: [- to: ]` only when the card output exposes a local key that matches the target source grain (for example `account_id = mart_account_segments.account_id`). Do not declare a KTX join just because the card SQL joins that table internally. If the output only exposes display fields such as `account_name`, keep the SQL source self-contained or project the key before adding the join. Use `many_to_one` for FK-to-dimension joins, `one_to_many` for the reverse. diff --git a/packages/context/skills/metricflow_ingest/SKILL.md b/packages/context/skills/metricflow_ingest/SKILL.md index 47187ffb..6ed4b916 100644 --- a/packages/context/skills/metricflow_ingest/SKILL.md +++ b/packages/context/skills/metricflow_ingest/SKILL.md @@ -177,7 +177,8 @@ semantic_models: # KTX overlay at /orders.yaml: # name: orders -description: Order fact table. +descriptions: + user: Order fact table. measures: - {name: order_count, expr: "count(order_id)"} - {name: gross_amount, expr: "sum(amount)"} @@ -221,7 +222,8 @@ metrics: # # name: orders_ext -description: Extended order fact including refund handling; `revenue` = gross - refund. +descriptions: + user: Extended order fact including refund handling; `revenue` = gross - refund. measures: - {name: order_count, expr: "count(order_id)"} - {name: gross_amount, expr: "sum(amount)"} diff --git a/packages/context/skills/sl/SKILL.md b/packages/context/skills/sl/SKILL.md index 9cdb8b34..f7077c33 100644 --- a/packages/context/skills/sl/SKILL.md +++ b/packages/context/skills/sl/SKILL.md @@ -29,7 +29,8 @@ Enrich a manifest-backed table with measures, computed columns, joins, and segme ```yaml name: fct_orders # must match an existing manifest table -description: "Overlay adding business measures to the orders fact table." +descriptions: + user: "Overlay adding business measures to the orders fact table." measures: - name: total_revenue expr: sum(amount) diff --git a/packages/context/skills/sl_capture/SKILL.md b/packages/context/skills/sl_capture/SKILL.md index 4bc383eb..a40111ea 100644 --- a/packages/context/skills/sl_capture/SKILL.md +++ b/packages/context/skills/sl_capture/SKILL.md @@ -100,13 +100,13 @@ measures: **Extract repeated filter bundles into named segments.** If the same predicate appears on multiple measures of the same source, lift it to a `segments[]` entry and have each measure reference it. One edit updates every measure that depends on it. -**Never write a standalone file on a manifest-backed name.** If `sl_discover({ tableName })` finds an existing schema for that name, you MUST write an overlay (`name:` + `measures:`/`segments:`/`description:` only — no `sql:`, `table:`, `grain:`, `columns:`, `joins:`). A standalone with `sql:` or `table:` on a manifest-backed name clobbers the inherited columns and joins; `sl_write_source` and `sl_validate` both reject this shape with a clear fix hint. Always run `sl_discover` before your first write on any existing name. +**Never write a standalone file on a manifest-backed name.** If `sl_discover({ tableName })` finds an existing schema for that name, you MUST write an overlay (`name:` + `measures:`/`segments:`/`descriptions:` only — no `sql:`, `table:`, `grain:`, `columns:`, `joins:`). A standalone with `sql:` or `table:` on a manifest-backed name clobbers the inherited columns and joins; `sl_write_source` and `sl_validate` both reject this shape with a clear fix hint. Always run `sl_discover` before your first write on any existing name. **Prefer overlay decomposition over standalone SQL sources.** Before reaching for `source_type: sql`, check whether the metric decomposes into measures on existing overlays (including cross-source derived measures). Use `source_type: sql` only when: - The metric requires per-user/per-entity derivation that cannot be expressed as a single `expr` (e.g., `EXISTS` over a time-windowed subset), OR - The metric requires multi-step CTEs whose intermediate grain is not a column in any existing source. -When an `sql` source is unavoidable, note in its `description` which SL gap forced the choice so it can be retired once the primitive ships. It must target a name NOT in the manifest — pick a distinct one (e.g. `mrr_waterfall_rollup`, not `fct_orders`). +When an `sql` source is unavoidable, note in its `descriptions` map which SL gap forced the choice so it can be retired once the primitive ships. It must target a name NOT in the manifest — pick a distinct one (e.g. `mrr_waterfall_rollup`, not `fct_orders`). ## Slim standalone sources via `inherits_columns_from` @@ -116,7 +116,8 @@ Discover the manifest key with `sl_discover` — pass the bare name (`CONSIGNMEN ```yaml name: aav_consignments -description: AAV consignments — filtered view of MARTS.CONSIGNMENTS for the auto-auction-vaulting channel. +descriptions: + user: AAV consignments — filtered view of MARTS.CONSIGNMENTS for the auto-auction-vaulting channel. source_type: sql sql: | SELECT CONSIGNED_ITEM_ID, CASH_ADV_AMOUNT, ALT_VALUE_COMBINED, my_derived_flag @@ -127,10 +128,10 @@ sql: | inherits_columns_from: CONSIGNMENTS grain: [CONSIGNED_ITEM_ID] columns: - - { name: CONSIGNED_ITEM_ID } # type/description inherited from manifest + - { name: CONSIGNED_ITEM_ID } # type/descriptions inherited from manifest - { name: CASH_ADV_AMOUNT } - { name: ALT_VALUE_COMBINED } - - { name: my_derived_flag, type: boolean, expr: "CASH_ADV_AMOUNT > 0", description: "Computed locally — has any cash advance." } + - { name: my_derived_flag, type: boolean, expr: "CASH_ADV_AMOUNT > 0", descriptions: { user: "Computed locally — has any cash advance." } } measures: - name: total_cash_advance expr: sum(CASH_ADV_AMOUNT) diff --git a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts index c2c679e5..36461bb2 100644 --- a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.test.ts @@ -13,7 +13,7 @@ async function tempDir(): Promise { const sqlAnalysis: SqlAnalysisPort = { async analyzeForFingerprint() { - throw new Error('legacy analyzeForFingerprint must not be used'); + throw new Error('analyzeForFingerprint must not be used'); }, async analyzeBatch() { return new Map(); @@ -66,7 +66,7 @@ describe('HistoricSqlSourceAdapter', () => { }; const batchSqlAnalysis: SqlAnalysisPort = { async analyzeForFingerprint() { - throw new Error('legacy analyzeForFingerprint must not be used'); + throw new Error('analyzeForFingerprint must not be used'); }, async analyzeBatch() { return new Map([ diff --git a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts index aee051e7..be2fc9f0 100644 --- a/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts +++ b/packages/context/src/ingest/adapters/historic-sql/historic-sql.adapter.ts @@ -1,5 +1,3 @@ -import { rm } from 'node:fs/promises'; -import { join } from 'node:path'; import type { ChunkResult, DiffSet, FetchContext, ScopeDescriptor, SourceAdapter } from '../../types.js'; import { chunkHistoricSqlUnifiedStagedDir, describeHistoricSqlUnifiedScope } from './chunk-unified.js'; import { detectHistoricSqlStagedDir } from './detect.js'; @@ -28,11 +26,6 @@ export class HistoricSqlSourceAdapter implements SourceAdapter { pullConfig, now: this.deps.now?.(), }); - if (this.deps.legacyPostgresBaselineRootDir) { - await rm(join(this.deps.legacyPostgresBaselineRootDir, ctx.connectionId, ['pgss', 'baseline.json'].join('-')), { - force: true, - }); - } } chunk(stagedDir: string, diffSet?: DiffSet): Promise { diff --git a/packages/context/src/ingest/adapters/historic-sql/projection.test.ts b/packages/context/src/ingest/adapters/historic-sql/projection.test.ts index f2a5b068..95adf13f 100644 --- a/packages/context/src/ingest/adapters/historic-sql/projection.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/projection.test.ts @@ -284,7 +284,7 @@ describe('projectHistoricSqlEvidence', () => { ); }); - it('marks missing table usage stale and deletes legacy historic SQL query pages', async () => { + it('marks missing table usage stale without deleting old query pages', async () => { const workdir = await tempWorkdir(); await writeText( workdir, @@ -322,22 +322,22 @@ describe('projectHistoricSqlEvidence', () => { }); await writeText( workdir, - 'knowledge/global/historic-sql-legacy-template.md', + 'knowledge/global/historic-sql-old-template.md', [ '---', YAML.stringify({ - summary: 'Legacy template page', + summary: 'Old template page', tags: ['historic-sql', 'query-pattern'], refs: [], sl_refs: ['orders'], usage_mode: 'auto', source: 'historic-sql', tables: ['public.orders'], - fingerprints: ['legacy:1'], + fingerprints: ['old:1'], }).trimEnd(), '---', '', - 'Legacy body', + 'Old body', '', ].join('\n'), ); @@ -345,7 +345,6 @@ describe('projectHistoricSqlEvidence', () => { const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' }); expect(result.staleTablesMarked).toBe(1); - expect(result.legacyPagesDeleted).toBe(1); expect(result.touchedSources).toEqual([{ connectionId: 'warehouse', sourceName: 'orders' }]); const shard = YAML.parse(await readFile(join(workdir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')); expect(shard.tables.orders.usage).toEqual({ @@ -357,8 +356,8 @@ describe('projectHistoricSqlEvidence', () => { commonJoins: [], staleSince: '2026-05-11T00:00:00.000Z', }); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql-legacy-template.md'), 'utf-8')).rejects.toMatchObject({ - code: 'ENOENT', - }); + await expect(readFile(join(workdir, 'knowledge/global/historic-sql-old-template.md'), 'utf-8')).resolves.toContain( + 'Old body', + ); }); }); diff --git a/packages/context/src/ingest/adapters/historic-sql/projection.ts b/packages/context/src/ingest/adapters/historic-sql/projection.ts index 25a317f3..7d4da94f 100644 --- a/packages/context/src/ingest/adapters/historic-sql/projection.ts +++ b/packages/context/src/ingest/adapters/historic-sql/projection.ts @@ -1,4 +1,4 @@ -import { access, mkdir, readdir, readFile, rename, rm, writeFile } from 'node:fs/promises'; +import { access, mkdir, readdir, readFile, rename, writeFile } from 'node:fs/promises'; import { dirname, join, relative } from 'node:path'; import YAML from 'yaml'; import { rawSourcesDirForSync } from '../../raw-sources-paths.js'; @@ -20,7 +20,6 @@ export interface HistoricSqlProjectionResult { patternPagesWritten: number; stalePatternPagesMarked: number; archivedPatternPages: number; - legacyPagesDeleted: number; touchedSources: Array<{ connectionId: string; sourceName: string }>; warnings: string[]; } @@ -152,11 +151,6 @@ function isHistoricPatternPage(page: HistoricSqlPatternPage): boolean { ); } -function isLegacyQueryPage(page: HistoricSqlPatternPage): boolean { - const tags = Array.isArray(page.frontmatter.tags) ? page.frontmatter.tags : []; - return page.frontmatter.source === 'historic-sql' && tags.includes('query-pattern') && !tags.includes('pattern'); -} - function isArchivedPatternPage(page: HistoricSqlPatternPage): boolean { const tags = Array.isArray(page.frontmatter.tags) ? page.frontmatter.tags : []; return tags.includes('archived'); @@ -228,7 +222,6 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp patternPagesWritten: 0, stalePatternPagesMarked: 0, archivedPatternPages: 0, - legacyPagesDeleted: 0, touchedSources: [], warnings: [], }; @@ -333,10 +326,5 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp result.stalePatternPagesMarked += 1; } - for (const page of allPages.filter(isLegacyQueryPage)) { - await rm(page.path, { force: true }); - result.legacyPagesDeleted += 1; - } - return result; } diff --git a/packages/context/src/ingest/adapters/historic-sql/types.test.ts b/packages/context/src/ingest/adapters/historic-sql/types.test.ts index 076e5d8e..f5a6f853 100644 --- a/packages/context/src/ingest/adapters/historic-sql/types.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/types.test.ts @@ -8,7 +8,7 @@ import { } from './types.js'; describe('historic-sql unified contracts', () => { - it('parses minExecutions and accepts minCalls as a one-release alias', () => { + it('parses minExecutions and service-account filters', () => { expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minExecutions: 9 })).toMatchObject({ dialect: 'postgres', minExecutions: 9, @@ -18,7 +18,15 @@ describe('historic-sql unified contracts', () => { staleArchiveAfterDays: 90, }); - expect(historicSqlUnifiedPullConfigSchema.parse({ dialect: 'postgres', minCalls: 7 }).minExecutions).toBe(7); + const parsed = historicSqlUnifiedPullConfigSchema.parse({ + dialect: 'postgres', + minExecutions: 7, + filters: { + serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' }, + }, + }); + expect(parsed.minExecutions).toBe(7); + expect(parsed.filters.serviceAccounts).toEqual({ patterns: ['^svc_'], mode: 'exclude' }); }); it('validates aggregate templates from warehouse readers', () => { diff --git a/packages/context/src/ingest/adapters/historic-sql/types.ts b/packages/context/src/ingest/adapters/historic-sql/types.ts index a827e8ae..07711d52 100644 --- a/packages/context/src/ingest/adapters/historic-sql/types.ts +++ b/packages/context/src/ingest/adapters/historic-sql/types.ts @@ -8,26 +8,7 @@ export type HistoricSqlDialect = z.infer; const filterModeSchema = z.enum(['exclude', 'include', 'mark-only']); -function isRecord(value: unknown): value is Record { - return typeof value === 'object' && value !== null && !Array.isArray(value); -} - -export const historicSqlUnifiedPullConfigSchema = z.preprocess((value) => { - if (!isRecord(value)) { - return value; - } - const next: Record = { ...value }; - if (next.minExecutions === undefined && typeof next.minCalls === 'number') { - next.minExecutions = next.minCalls; - } - if (!next.filters && Array.isArray(next.serviceAccountUserPatterns)) { - next.filters = { - serviceAccounts: { patterns: next.serviceAccountUserPatterns, mode: 'exclude' }, - dropTrivialProbes: true, - }; - } - return next; -}, z.object({ +export const historicSqlUnifiedPullConfigSchema = z.object({ dialect: historicSqlDialectSchema, windowDays: z.number().int().positive().default(90), minExecutions: z.number().int().nonnegative().default(5), @@ -48,7 +29,7 @@ export const historicSqlUnifiedPullConfigSchema = z.preprocess((value) => { }).default({ dropTrivialProbes: true }), redactionPatterns: z.array(z.string()).default([]), staleArchiveAfterDays: z.number().int().positive().default(90), -})); +}); export type HistoricSqlUnifiedPullConfig = z.infer; @@ -157,6 +138,5 @@ export interface HistoricSqlSourceAdapterDeps { sqlAnalysis: SqlAnalysisPort; reader: HistoricSqlReader; queryClient: unknown; - legacyPostgresBaselineRootDir?: string; now?: () => Date; } diff --git a/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts b/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts index a29fecd1..47299373 100644 --- a/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts +++ b/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts @@ -26,13 +26,11 @@ export function lookerCredentialsFromLocalConnection( if (!connection || String(connection.driver).toLowerCase() !== 'looker') { throw new Error(`Connection "${connectionId}" is not a Looker connection`); } - const baseUrl = stringField(connection.base_url) ?? stringField(connection.baseUrl) ?? stringField(connection.url); - const clientId = stringField(connection.client_id) ?? stringField(connection.clientId); + const baseUrl = stringField(connection.base_url); + const clientId = stringField(connection.client_id); const clientSecret = stringField(connection.client_secret) ?? - stringField(connection.clientSecret) ?? - (stringField(connection.client_secret_ref) ? resolveEnvReference(String(connection.client_secret_ref), env) : null) ?? - (stringField(connection.clientSecretRef) ? resolveEnvReference(String(connection.clientSecretRef), env) : null); + (stringField(connection.client_secret_ref) ? resolveEnvReference(String(connection.client_secret_ref), env) : null); if (!baseUrl) { throw new Error(`Connection "${connectionId}" is missing Looker base_url`); diff --git a/packages/context/src/ingest/adapters/metabase/client-port.test.ts b/packages/context/src/ingest/adapters/metabase/client-port.test.ts index 9686e552..8f775b56 100644 --- a/packages/context/src/ingest/adapters/metabase/client-port.test.ts +++ b/packages/context/src/ingest/adapters/metabase/client-port.test.ts @@ -87,10 +87,13 @@ it('allows the concrete client result shapes used by the relocated Metabase clie const datasetQuery: MetabaseDatasetQuery = { type: 'native', database: 42, - native: { - query: 'SELECT * FROM orders WHERE created_at > {{ created_at }}', - 'template-tags': { created_at: templateTag }, - }, + stages: [ + { + 'lib/type': 'mbql.stage/native', + native: 'SELECT * FROM orders WHERE created_at > {{ created_at }}', + 'template-tags': { created_at: templateTag }, + }, + ], }; const card: MetabaseCard = { id: 1, diff --git a/packages/context/src/ingest/adapters/metabase/client-port.ts b/packages/context/src/ingest/adapters/metabase/client-port.ts index 7aa1f3ed..a5fdb6ce 100644 --- a/packages/context/src/ingest/adapters/metabase/client-port.ts +++ b/packages/context/src/ingest/adapters/metabase/client-port.ts @@ -117,7 +117,7 @@ interface MetabaseNativeStage { } interface MetabaseLegacyNativeQuery { - query: string; + query?: string; 'template-tags'?: Record; } diff --git a/packages/context/src/ingest/adapters/metabase/client.test.ts b/packages/context/src/ingest/adapters/metabase/client.test.ts index 1ee3fe93..1c0fdfa9 100644 --- a/packages/context/src/ingest/adapters/metabase/client.test.ts +++ b/packages/context/src/ingest/adapters/metabase/client.test.ts @@ -32,10 +32,22 @@ function nativeCard(query: string, templateTags: Record = {}): MetabaseCard { + return { + id: 1, + name: 'Legacy native card', + type: 'model', + query_type: 'native', + database_id: 6, + dataset_query: { + type: 'native', + database: 6, + native: { query, 'template-tags': templateTags }, }, }; } @@ -277,6 +289,25 @@ describe('getDummyValueForWidgetType', () => { }); }); +describe('MetabaseClient legacy native dataset query support', () => { + it('reads SQL and template tags from dataset_query.native', async () => { + const client = new MetabaseClient(runtime, fastRetryConfig); + const card = legacyNativeCard('SELECT * FROM orders WHERE status = {{ status }}', { + status: { + name: 'status', + type: 'text', + default: 'paid', + }, + }); + + expect(client.getNativeSql(card)).toBe('SELECT * FROM orders WHERE status = {{ status }}'); + expect(client.getTemplateTags(card)).toEqual({ + status: expect.objectContaining({ name: 'status', type: 'text' }), + }); + await expect(client.getCardSql(card)).resolves.toBe('SELECT * FROM orders WHERE status = {{ status }}'); + }); +}); + describe('MetabaseClient.getResolvedSql', () => { function makeClient(setup?: (client: MetabaseClient) => void): MetabaseClient { const client = new MetabaseClient({ apiUrl: 'http://test', apiKey: 'k' }); @@ -318,7 +349,7 @@ describe('MetabaseClient.getResolvedSql', () => { dataset_query: { type: 'native', database: 6, - native: { query: 'SELECT a, b FROM base' }, + stages: [{ 'lib/type': 'mbql.stage/native', native: 'SELECT a, b FROM base' }], }, }); const client = makeClient((client) => { diff --git a/packages/context/src/ingest/adapters/metabase/client.ts b/packages/context/src/ingest/adapters/metabase/client.ts index 2b70bc79..1962bfe0 100644 --- a/packages/context/src/ingest/adapters/metabase/client.ts +++ b/packages/context/src/ingest/adapters/metabase/client.ts @@ -150,7 +150,7 @@ function injectNativeSql(datasetQuery: MetabaseDatasetQuery, sql: string): Metab stages[0] = { ...stages[0], native: sql }; return { ...datasetQuery, stages }; } - if (datasetQuery?.native) { + if (datasetQuery?.native?.query !== undefined) { return { ...datasetQuery, native: { ...datasetQuery.native, query: sql } }; } return datasetQuery; @@ -370,36 +370,12 @@ export class MetabaseClient implements MetabaseRuntimeClient { }); } - /** - * Extract native SQL from card, handling both pMBQL (v57+) and legacy formats. - * - pMBQL format: dataset_query.stages[0].native - * - Legacy format: dataset_query.native.query - */ getNativeSql(card: MetabaseCard): string | null { - // pMBQL format (v57+): stages[0].native - const pMbqlSql = card.dataset_query?.stages?.[0]?.native; - if (pMbqlSql) { - return pMbqlSql; - } - - // Legacy format: native.query - return card.dataset_query?.native?.query ?? null; + return card.dataset_query?.stages?.[0]?.native ?? card.dataset_query?.native?.query ?? null; } - /** - * Extract template tags from card, handling both pMBQL and legacy formats. - * - pMBQL format: dataset_query.stages[0]['template-tags'] - * - Legacy format: dataset_query.native['template-tags'] - */ getTemplateTags(card: MetabaseCard): Record { - // pMBQL format: stages[0]['template-tags'] - const pMbqlTags = card.dataset_query?.stages?.[0]?.['template-tags']; - if (pMbqlTags) { - return pMbqlTags; - } - - // Legacy format: native['template-tags'] - return card.dataset_query?.native?.['template-tags'] ?? {}; + return card.dataset_query?.stages?.[0]?.['template-tags'] ?? card.dataset_query?.native?.['template-tags'] ?? {}; } async getCardSql(card: MetabaseCard): Promise { diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts index 0c854f6d..7cbe913b 100644 --- a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.test.ts @@ -48,19 +48,6 @@ describe('metabaseRuntimeConfigFromLocalConnection', () => { }); }); - it('accepts url as the local api URL alias', () => { - const connection: KtxProjectConnectionConfig = { - driver: 'metabase', - url: 'https://metabase.example.com', - api_key: 'literal-test-key', // pragma: allowlist secret - }; - - expect(metabaseRuntimeConfigFromLocalConnection('prod-metabase', connection)).toEqual({ - apiUrl: 'https://metabase.example.com', - apiKey: 'literal-test-key', // pragma: allowlist secret - }); - }); - it('rejects proxy-bearing local Metabase connections', () => { const connection: KtxProjectConnectionConfig = { driver: 'metabase', diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts index a7ffc5de..8d8d5f06 100644 --- a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts @@ -37,9 +37,9 @@ export function metabaseRuntimeConfigFromLocalConnection( ); } - const apiUrl = stringField(connection.api_url) ?? stringField(connection.apiUrl) ?? stringField(connection.url); - const literalApiKey = stringField(connection.api_key) ?? stringField(connection.apiKey); - const apiKeyRef = stringField(connection.api_key_ref) ?? stringField(connection.apiKeyRef); + const apiUrl = stringField(connection.api_url); + const literalApiKey = stringField(connection.api_key); + const apiKeyRef = stringField(connection.api_key_ref); const apiKey = literalApiKey ?? (apiKeyRef ? resolveKtxConfigReference(apiKeyRef, env) : null); if (!apiUrl) { diff --git a/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts b/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts index bfdd824f..13127a3d 100644 --- a/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts +++ b/packages/context/src/ingest/adapters/metricflow/import-semantic-models.ts @@ -14,7 +14,6 @@ import { getMetricflowAvailableColumnNames, mapCrossModelMetricToSource, resolveMetricflowSemanticModelSourceName, - toKebabCaseMetricflowName, type MetricflowHostTable, type MetricflowSemanticModelImportContext, } from './semantic-models.js'; @@ -129,16 +128,6 @@ export async function importMetricflowSemanticModels( { skipValidation: true }, ); - const legacyWarning = await legacyKebabSourceWarning( - semanticLayerService, - input.connectionId, - context.model.modelRef, - context.sourceName, - ); - if (legacyWarning) { - warnings.push(legacyWarning); - } - if (existing) { sourcesUpdated++; } else { @@ -234,26 +223,6 @@ async function resolveManifestSource( return null; } -async function legacyKebabSourceWarning( - semanticLayerService: MetricflowSemanticLayerWriter, - connectionId: string, - modelRef: string, - sourceName: string, -): Promise { - const kebabName = toKebabCaseMetricflowName(modelRef); - if (kebabName === sourceName) { - return null; - } - const legacy = await semanticLayerService.loadSource(connectionId, kebabName); - if (!legacy) { - return null; - } - return ( - `MetricFlow sync: legacy kebab-case source '${kebabName}' still exists alongside the new source ` + - `'${sourceName}' (modelRef '${modelRef}'). Migrate persisted references before deleting the old file.` - ); -} - async function repairSourcesAfterPartialImportFailures(input: { semanticLayerService: MetricflowSemanticLayerWriter; connectionId: string; diff --git a/packages/context/src/ingest/ingest-bundle.runner.test.ts b/packages/context/src/ingest/ingest-bundle.runner.test.ts index 6134fbe7..b337a3f0 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.test.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.test.ts @@ -1518,7 +1518,6 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { patternPagesWritten: 3, stalePatternPagesMarked: 1, archivedPatternPages: 1, - legacyPagesDeleted: 1, }, warnings: [], errors: [], @@ -1551,7 +1550,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { expect(memoryFlow.snapshot().events).toContainEqual( expect.objectContaining({ type: 'saved', - wikiCount: 6, + wikiCount: 5, slCount: 3, }), ); diff --git a/packages/context/src/ingest/local-adapters.test.ts b/packages/context/src/ingest/local-adapters.test.ts index ad3b23f4..a962763d 100644 --- a/packages/context/src/ingest/local-adapters.test.ts +++ b/packages/context/src/ingest/local-adapters.test.ts @@ -105,7 +105,6 @@ describe('local ingest adapters', () => { return { headers: [], rows: [] }; }, }, - postgresBaselineRootDir: join(project.projectDir, '.ktx/cache/historic-sql'), }, }); @@ -181,9 +180,12 @@ describe('local ingest adapters', () => { historicSql: { enabled: true, dialect: 'postgres', - minCalls: 7, + minExecutions: 7, maxTemplatesPerRun: 123, - serviceAccountUserPatterns: ['^svc_'], + filters: { + serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' }, + dropTrivialProbes: true, + }, }, }, }); @@ -385,7 +387,7 @@ describe('local ingest adapters', () => { connections: { 'prod-lookml': { driver: 'lookml', - repo_url: 'https://github.com/acme/looker.git', + repoUrl: 'https://github.com/acme/looker.git', branch: 'main', path: 'models', auth_token_ref: 'env:GITHUB_TOKEN', @@ -410,7 +412,7 @@ describe('local ingest adapters', () => { }); }); - it('rejects local LookML scheduled pulls when repo_url is missing', async () => { + it('rejects local LookML scheduled pulls when repoUrl is missing', async () => { const lookmlProject = { projectDir: tempDir, config: { connections: { 'prod-lookml': { driver: 'lookml' } } }, diff --git a/packages/context/src/ingest/local-adapters.ts b/packages/context/src/ingest/local-adapters.ts index 533bd526..0bf5fd42 100644 --- a/packages/context/src/ingest/local-adapters.ts +++ b/packages/context/src/ingest/local-adapters.ts @@ -50,7 +50,6 @@ export interface DefaultLocalIngestAdaptersOptions { reader?: HistoricSqlReader; queryClient?: unknown; postgresQueryClient?: KtxPostgresQueryClient; - postgresBaselineRootDir?: string; now?: () => Date; }; looker?: { @@ -129,7 +128,6 @@ export function createDefaultLocalIngestAdapters( sqlAnalysis: options.historicSql.sqlAnalysis, reader: options.historicSql.reader ?? new PostgresPgssReader(), queryClient, - legacyPostgresBaselineRootDir: options.historicSql.postgresBaselineRootDir, now: options.historicSql.now, }), ); @@ -163,11 +161,11 @@ function stringField(value: unknown): string | null { function localLookmlPullConfigFromConnection(connection: Record | undefined, env: NodeJS.ProcessEnv) { const mappings = isRecord(connection?.mappings) ? connection.mappings : {}; - const authTokenRef = stringField(connection?.auth_token_ref) ?? stringField(connection?.authTokenRef); - const literalAuthToken = stringField(connection?.authToken) ?? stringField(connection?.auth_token); + const authTokenRef = stringField(connection?.auth_token_ref); + const literalAuthToken = stringField(connection?.auth_token); return pullConfigFromIntegrationConfig({ - repoUrl: stringField(connection?.repoUrl) ?? stringField(connection?.repo_url) ?? null, + repoUrl: stringField(connection?.repoUrl) ?? null, branch: stringField(connection?.branch), path: stringField(connection?.path), authToken: literalAuthToken ?? resolveKtxConfigReference(authTokenRef ?? undefined, env) ?? null, @@ -176,27 +174,21 @@ function localLookmlPullConfigFromConnection(connection: Record } function localDbtPullConfigFromConnection(connection: Record | undefined, env: NodeJS.ProcessEnv) { - const sourceDir = stringField(connection?.source_dir) ?? stringField(connection?.sourceDir); - const repoUrl = stringField(connection?.repo_url) ?? stringField(connection?.repoUrl); + const sourceDir = stringField(connection?.source_dir); + const repoUrl = stringField(connection?.repo_url); if (sourceDir) { return { sourceDir, ...(stringField(connection?.profiles_path) ? { profilesPath: stringField(connection?.profiles_path) } : {}), - ...(stringField(connection?.profilesPath) ? { profilesPath: stringField(connection?.profilesPath) } : {}), ...(stringField(connection?.target) ? { target: stringField(connection?.target) } : {}), ...(stringField(connection?.project_name) ? { projectName: stringField(connection?.project_name) } : {}), - ...(stringField(connection?.projectName) ? { projectName: stringField(connection?.projectName) } : {}), }; } if (!repoUrl) { return undefined; } const authToken = - stringField(connection?.authToken) ?? - resolveKtxConfigReference( - stringField(connection?.auth_token_ref) ?? stringField(connection?.authTokenRef) ?? undefined, - env, - ); + stringField(connection?.auth_token) ?? resolveKtxConfigReference(stringField(connection?.auth_token_ref) ?? undefined, env); return { repoUrl, ...(stringField(connection?.branch) ? { branch: stringField(connection?.branch) } : {}), @@ -280,8 +272,8 @@ export async function localPullConfigForAdapter( ? (metricflow as Record) : null; const authToken = - typeof metricflowConfig?.authToken === 'string' - ? metricflowConfig.authToken + typeof metricflowConfig?.auth_token === 'string' + ? metricflowConfig.auth_token : resolveKtxConfigReference( typeof metricflowConfig?.auth_token_ref === 'string' ? metricflowConfig.auth_token_ref : undefined, options.looker?.env ?? process.env, diff --git a/packages/context/src/ingest/report-snapshot.test.ts b/packages/context/src/ingest/report-snapshot.test.ts index 13d3eff7..c949a3cc 100644 --- a/packages/context/src/ingest/report-snapshot.test.ts +++ b/packages/context/src/ingest/report-snapshot.test.ts @@ -22,7 +22,7 @@ function validReportSnapshot() { { target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' }, { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' }, ], - touchedSlSources: ['warehouse.orders'], + touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'orders' }], }, ], failedWorkUnits: [], @@ -106,7 +106,7 @@ describe('parseIngestReportSnapshot', () => { expect(snapshot.body.toolTranscripts).toHaveLength(1); }); - it('parses target-aware actions and normalizes legacy touched source strings', () => { + it('parses target-aware actions and touched source objects', () => { const report = validReportSnapshot(); report.body.workUnits[0] = { ...report.body.workUnits[0], @@ -119,8 +119,7 @@ describe('parseIngestReportSnapshot', () => { targetConnectionId: 'warehouse-1', }, ], - // Legacy report shape: bare strings are normalized to the report connection ID. - touchedSlSources: ['looker__b2b__sales_pipeline'], + touchedSlSources: [{ connectionId: 'warehouse-1', sourceName: 'looker__b2b__sales_pipeline' }], } as never; const snapshot = parseIngestReportSnapshot(report); @@ -135,7 +134,7 @@ describe('parseIngestReportSnapshot', () => { }, ]); expect(snapshot.body.workUnits[0]?.touchedSlSources).toEqual([ - { connectionId: 'warehouse', sourceName: 'looker__b2b__sales_pipeline' }, + { connectionId: 'warehouse-1', sourceName: 'looker__b2b__sales_pipeline' }, ]); }); diff --git a/packages/context/src/ingest/report-snapshot.ts b/packages/context/src/ingest/report-snapshot.ts index 76565ad9..de377dd5 100644 --- a/packages/context/src/ingest/report-snapshot.ts +++ b/packages/context/src/ingest/report-snapshot.ts @@ -1,5 +1,4 @@ import * as z from 'zod'; -import type { TouchedSlSource } from '../tools/index.js'; import { memoryFlowReplayInputSchema } from './memory-flow/schema.js'; import type { IngestReportSnapshot } from './reports.js'; @@ -24,8 +23,6 @@ const touchedSlSourceSchema = z.object({ sourceName: z.string().min(1), }); -const touchedSlSourceInputSchema = z.union([z.string(), touchedSlSourceSchema]); - const conflictResolvedSchema = z .object({ unitKey: z.string().optional(), @@ -42,7 +39,7 @@ const evictionAppliedSchema = z rawPath: z.string(), artifactKind: z.enum(['sl', 'wiki']), artifactKey: z.string(), - action: z.enum(['removed', 'retained_deprecated']), + action: z.literal('removed'), reason: z.string(), }) .passthrough(); @@ -147,7 +144,7 @@ export const ingestReportSnapshotSchema = z status: z.enum(['success', 'failed']), reason: z.string().optional(), actions: z.array(ingestActionSchema), - touchedSlSources: z.array(touchedSlSourceInputSchema), + touchedSlSources: z.array(touchedSlSourceSchema), slDisallowed: z.boolean().optional(), slDisallowedReason: z.enum(['lookml_connection_mismatch']).optional(), }), @@ -171,26 +168,10 @@ export const ingestReportSnapshotSchema = z }) .passthrough(); -function normalizeTouchedSlSources(connectionId: string, value: Array): TouchedSlSource[] { - return value.map((entry) => - typeof entry === 'string' - ? { connectionId, sourceName: entry } - : { connectionId: entry.connectionId, sourceName: entry.sourceName }, - ); -} - export function parseIngestReportSnapshot(value: unknown): IngestReportSnapshot { const result = ingestReportSnapshotSchema.safeParse(value); if (!result.success) { throw new Error(`Invalid ingest report snapshot: ${z.prettifyError(result.error)}`); } - const snapshot = result.data as IngestReportSnapshot; - snapshot.body.workUnits = snapshot.body.workUnits.map((workUnit) => ({ - ...workUnit, - touchedSlSources: normalizeTouchedSlSources( - snapshot.connectionId, - workUnit.touchedSlSources as Array, - ), - })); - return snapshot; + return result.data as IngestReportSnapshot; } diff --git a/packages/context/src/ingest/reports.ts b/packages/context/src/ingest/reports.ts index cda4d7c1..672c5bfb 100644 --- a/packages/context/src/ingest/reports.ts +++ b/packages/context/src/ingest/reports.ts @@ -111,8 +111,7 @@ export function postProcessorSavedMemoryCounts( wikiCount: numericResultField(record, 'patternPagesWritten') + numericResultField(record, 'stalePatternPagesMarked') + - numericResultField(record, 'archivedPatternPages') + - numericResultField(record, 'legacyPagesDeleted'), + numericResultField(record, 'archivedPatternPages'), slCount: numericResultField(record, 'tableUsageMerged') + numericResultField(record, 'staleTablesMarked'), }; } diff --git a/packages/context/src/ingest/stages/stage-index.types.ts b/packages/context/src/ingest/stages/stage-index.types.ts index c8d7e4b3..7de26bc8 100644 --- a/packages/context/src/ingest/stages/stage-index.types.ts +++ b/packages/context/src/ingest/stages/stage-index.types.ts @@ -25,7 +25,7 @@ export interface EvictionAppliedRecord { rawPath: string; artifactKind: 'sl' | 'wiki'; artifactKey: string; - action: 'removed' | 'retained_deprecated'; + action: 'removed'; reason: string; } diff --git a/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts b/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts index f44214ea..28a32a5b 100644 --- a/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts +++ b/packages/context/src/ingest/tools/emit-eviction-decision.tool.ts @@ -22,7 +22,7 @@ export function createEmitEvictionDecisionTool(deps: EmitEvictionDecisionDeps) { rawPath: z.string().min(1), artifactKind: z.enum(['sl', 'wiki']), artifactKey: z.string().min(1), - action: z.enum(['removed', 'retained_deprecated']), + action: z.literal('removed'), reason: z.string().min(1), }), execute: async (input): Promise => { diff --git a/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts b/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts index a3e7b34f..9178c989 100644 --- a/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts +++ b/packages/context/src/ingest/tools/emit-reconciliation-records.tool.test.ts @@ -88,14 +88,14 @@ describe('reconciliation emit tools', () => { await executeTool(tool, { rawPath: 'views/old_orders.view.lkml', artifactKind: 'wiki', - artifactKey: 'orders/legacy', - action: 'retained_deprecated', + artifactKey: 'orders/old', + action: 'removed', reason: 'first pass', }); await executeTool(tool, { rawPath: 'views/old_orders.view.lkml', artifactKind: 'wiki', - artifactKey: 'orders/legacy', + artifactKey: 'orders/old', action: 'removed', reason: 'second pass after checking references', }); @@ -104,7 +104,7 @@ describe('reconciliation emit tools', () => { { rawPath: 'views/old_orders.view.lkml', artifactKind: 'wiki', - artifactKey: 'orders/legacy', + artifactKey: 'orders/old', action: 'removed', reason: 'second pass after checking references', }, diff --git a/packages/context/src/ingest/tools/eviction-list.tool.ts b/packages/context/src/ingest/tools/eviction-list.tool.ts index 1e2ca3a0..4ed08d63 100644 --- a/packages/context/src/ingest/tools/eviction-list.tool.ts +++ b/packages/context/src/ingest/tools/eviction-list.tool.ts @@ -12,7 +12,7 @@ export interface EvictionListDeps { export function createEvictionListTool(deps: EvictionListDeps) { return tool({ description: - 'List every artifact that the most recent completed sync produced from a now-deleted raw file. Use this to decide whether to remove (no inbound refs) or retain with deprecation (has inbound refs). Inbound refs are NOT currently computed — treat every retained entry as a candidate and ask the user via the IngestReport. After deciding, record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.', + 'List every artifact that the most recent completed sync produced from a now-deleted raw file. Remove each listed artifact and record the decision with context_eviction_decision_write so the ingest report lists every deleted-source decision.', inputSchema: z.object({}), execute: async () => { if (deps.deletedRawPaths.length === 0) { diff --git a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts index a1edf807..b916107c 100644 --- a/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts +++ b/packages/context/src/ingest/tools/warehouse-verification/warehouse-catalog.service.ts @@ -88,9 +88,8 @@ interface ConnectionCatalog { } type TableWithDescriptions = KtxSchemaTable & { - description?: string | null; descriptions?: Record; - columns: Array }>; + columns: Array }>; }; function normalize(value: string | null | undefined): string { @@ -220,14 +219,14 @@ function matchedOnTable(table: TableWithDescriptions, query: string): RawSchemaH if (normalize(table.comment).includes(q)) { return 'comment'; } - if (normalize(firstDescription(table.descriptions) ?? table.description).includes(q)) { + if (normalize(firstDescription(table.descriptions)).includes(q)) { return 'description'; } return null; } function matchedOnColumn( - column: KtxSchemaColumn & { description?: string | null; descriptions?: Record }, + column: KtxSchemaColumn & { descriptions?: Record }, query: string, ): 'name' | 'comment' | 'description' | null { const q = normalize(query); @@ -240,7 +239,7 @@ function matchedOnColumn( if (normalize(column.comment).includes(q)) { return 'comment'; } - if (normalize(firstDescription(column.descriptions) ?? column.description).includes(q)) { + if (normalize(firstDescription(column.descriptions)).includes(q)) { return 'description'; } return null; @@ -285,13 +284,10 @@ export class WarehouseCatalogService { display: formatDisplay(catalog.driver, table), kind: table.kind, comment: table.comment, - description: table.description ?? firstDescription(table.descriptions), + description: firstDescription(table.descriptions), rowCount: profileTable?.rowCount ?? table.estimatedRows ?? null, columns: table.columns.map((rawColumn) => { - const column = rawColumn as KtxSchemaColumn & { - description?: string | null; - descriptions?: Record; - }; + const column = rawColumn as KtxSchemaColumn & { descriptions?: Record }; const profileColumn = profileColumns[columnKey(table, column.name)] ?? Object.entries(profileColumns).find( diff --git a/packages/context/src/ingest/wiki-sl-ref-repair.test.ts b/packages/context/src/ingest/wiki-sl-ref-repair.test.ts index 958386c7..68f2b349 100644 --- a/packages/context/src/ingest/wiki-sl-ref-repair.test.ts +++ b/packages/context/src/ingest/wiki-sl-ref-repair.test.ts @@ -40,7 +40,7 @@ describe('repairWikiSlRefs', () => { }; const configService = { listFiles: vi.fn(async () => ({ - files: ['global/accounts-at-risk.md', 'global/historic-sql/nested-legacy.md'], + files: ['global/accounts-at-risk.md', 'global/historic-sql/nested-old.md'], })), }; const semanticLayerService = { diff --git a/packages/context/src/mcp/local-project-ports.test.ts b/packages/context/src/mcp/local-project-ports.test.ts index 85a3c2c7..e3812960 100644 --- a/packages/context/src/mcp/local-project-ports.test.ts +++ b/packages/context/src/mcp/local-project-ports.test.ts @@ -576,7 +576,8 @@ describe('createLocalProjectMcpContextPorts', () => { sourceName: 'orders', yaml: [ 'name: orders', - 'description: Revenue facts', + 'descriptions:', + ' user: Revenue facts', 'table: public.orders', 'grain:', ' - order_id', diff --git a/packages/context/src/memory/capture-signals.ts b/packages/context/src/memory/capture-signals.ts index 8860474a..856df30b 100644 --- a/packages/context/src/memory/capture-signals.ts +++ b/packages/context/src/memory/capture-signals.ts @@ -114,7 +114,6 @@ export function stepBudgetFor(sourceType: MemoryAgentSourceType): number { case 'external_ingest': return 30; case 'backfill': - case 'sql-review-migration': return 25; } } @@ -122,7 +121,7 @@ export function stepBudgetFor(sourceType: MemoryAgentSourceType): number { export function promptNameFor(sourceType: MemoryAgentSourceType): string { return sourceType === 'external_ingest' ? 'memory_agent_external_ingest' - : sourceType === 'backfill' || sourceType === 'sql-review-migration' + : sourceType === 'backfill' ? 'memory_agent_backfill' : 'memory_agent_research'; } diff --git a/packages/context/src/memory/types.ts b/packages/context/src/memory/types.ts index aa50cd8c..207eb238 100644 --- a/packages/context/src/memory/types.ts +++ b/packages/context/src/memory/types.ts @@ -16,7 +16,7 @@ import type { import type { ToolContext, ToolSession, TouchedSlSourceSet } from '../tools/index.js'; import type { KnowledgeIndexPort, KnowledgeWikiService } from '../wiki/index.js'; -export type MemoryAgentSourceType = 'research' | 'external_ingest' | 'backfill' | 'sql-review-migration'; +export type MemoryAgentSourceType = 'research' | 'external_ingest' | 'backfill'; export interface MemoryAgentInput { userId: string; diff --git a/packages/context/src/project/config.test.ts b/packages/context/src/project/config.test.ts index cad7945c..5f13729e 100644 --- a/packages/context/src/project/config.test.ts +++ b/packages/context/src/project/config.test.ts @@ -216,15 +216,15 @@ project: demo scan: relationships: enabled: false - llm_proposals: false - validation_required_for_manifest: true - accept_threshold: 0.91 - review_threshold: 0.61 - max_llm_tables_per_batch: 12 - max_candidates_per_column: 7 - profile_sample_rows: 500 - validation_concurrency: 2 - validation_budget: 0 + llmProposals: false + validationRequiredForManifest: true + acceptThreshold: 0.91 + reviewThreshold: 0.61 + maxLlmTablesPerBatch: 12 + maxCandidatesPerColumn: 7 + profileSampleRows: 500 + validationConcurrency: 2 + validationBudget: 0 `); expect(config.scan.relationships).toEqual({ @@ -256,7 +256,7 @@ scan: project: demo scan: relationships: - validation_budget: all + validationBudget: all `); expect(config.scan.relationships.validationBudget).toBe('all'); @@ -268,13 +268,13 @@ scan: project: demo scan: relationships: - accept_threshold: 2 - review_threshold: -1 - max_llm_tables_per_batch: 0 - max_candidates_per_column: -4 - profile_sample_rows: 0 - validation_concurrency: 0 - validation_budget: 1.5 + acceptThreshold: 2 + reviewThreshold: -1 + maxLlmTablesPerBatch: 0 + maxCandidatesPerColumn: -4 + profileSampleRows: 0 + validationConcurrency: 0 + validationBudget: 1.5 `); expect(config.scan.relationships).toMatchObject({ @@ -293,13 +293,13 @@ scan: project: demo scan: relationships: - validation_budget: infinite + validationBudget: infinite `); expect(config.scan.relationships).not.toHaveProperty('validationBudget'); }); - it('rejects legacy local LLM and embedding fields', () => { + it('rejects unsupported local LLM and embedding fields', () => { expect(() => parseKtxProjectConfig(` project: demo diff --git a/packages/context/src/project/config.ts b/packages/context/src/project/config.ts index 5da193f2..b00d0148 100644 --- a/packages/context/src/project/config.ts +++ b/packages/context/src/project/config.ts @@ -211,7 +211,7 @@ function scanEnrichmentMode(value: unknown, fallback: KtxScanEnrichmentMode): Kt throw new Error(`Unsupported scan.enrichment.mode: ${String(value)}`); } -function rejectLegacyProvider(section: string, value: unknown): void { +function rejectUnsupportedProvider(section: string, value: unknown): void { if (value !== undefined) { throw new Error(`Unsupported ${section}.provider: use ${section}.backend`); } @@ -276,7 +276,7 @@ function parseProjectLlmProviderConfig( defaults: KtxProjectLlmProviderConfig, section: string, ): KtxProjectLlmProviderConfig { - rejectLegacyProvider(section, raw.provider); + rejectUnsupportedProvider(section, raw.provider); const vertex = isRecord(raw.vertex) ? { @@ -309,7 +309,7 @@ function parseProjectEmbeddingConfig( defaults: KtxProjectEmbeddingConfig, section: string, ): KtxProjectEmbeddingConfig { - rejectLegacyProvider(section, raw.provider); + rejectUnsupportedProvider(section, raw.provider); const openai = optionalProviderConfig(raw.openai); const sentenceTransformers = isRecord(raw.sentenceTransformers) @@ -339,36 +339,21 @@ function parseScanRelationshipConfig( raw: Record, defaults: KtxScanRelationshipConfig, ): KtxScanRelationshipConfig { - const validationBudget = validationBudgetConfigValue( - raw.validation_budget ?? raw.validationBudget, - defaults.validationBudget, - ); + const validationBudget = validationBudgetConfigValue(raw.validationBudget, defaults.validationBudget); return { enabled: booleanValue(raw.enabled, defaults.enabled), - llmProposals: booleanValue(raw.llm_proposals ?? raw.llmProposals, defaults.llmProposals), + llmProposals: booleanValue(raw.llmProposals, defaults.llmProposals), validationRequiredForManifest: booleanValue( - raw.validation_required_for_manifest ?? raw.validationRequiredForManifest, + raw.validationRequiredForManifest, defaults.validationRequiredForManifest, ), - acceptThreshold: ratioConfigValue(raw.accept_threshold ?? raw.acceptThreshold, defaults.acceptThreshold), - reviewThreshold: ratioConfigValue(raw.review_threshold ?? raw.reviewThreshold, defaults.reviewThreshold), - maxLlmTablesPerBatch: positiveIntegerConfigValue( - raw.max_llm_tables_per_batch ?? raw.maxLlmTablesPerBatch, - defaults.maxLlmTablesPerBatch, - ), - maxCandidatesPerColumn: positiveIntegerConfigValue( - raw.max_candidates_per_column ?? raw.maxCandidatesPerColumn, - defaults.maxCandidatesPerColumn, - ), - profileSampleRows: positiveIntegerConfigValue( - raw.profile_sample_rows ?? raw.profileSampleRows, - defaults.profileSampleRows, - ), - validationConcurrency: positiveIntegerConfigValue( - raw.validation_concurrency ?? raw.validationConcurrency, - defaults.validationConcurrency, - ), + acceptThreshold: ratioConfigValue(raw.acceptThreshold, defaults.acceptThreshold), + reviewThreshold: ratioConfigValue(raw.reviewThreshold, defaults.reviewThreshold), + maxLlmTablesPerBatch: positiveIntegerConfigValue(raw.maxLlmTablesPerBatch, defaults.maxLlmTablesPerBatch), + maxCandidatesPerColumn: positiveIntegerConfigValue(raw.maxCandidatesPerColumn, defaults.maxCandidatesPerColumn), + profileSampleRows: positiveIntegerConfigValue(raw.profileSampleRows, defaults.profileSampleRows), + validationConcurrency: positiveIntegerConfigValue(raw.validationConcurrency, defaults.validationConcurrency), ...(validationBudget !== undefined ? { validationBudget } : {}), }; } diff --git a/packages/context/src/scan/relationship-benchmarks.test.ts b/packages/context/src/scan/relationship-benchmarks.test.ts index b4e5c782..aff025aa 100644 --- a/packages/context/src/scan/relationship-benchmarks.test.ts +++ b/packages/context/src/scan/relationship-benchmarks.test.ts @@ -33,7 +33,7 @@ const EXPECTED_LINKS: KtxRelationshipBenchmarkExpectedLinks = { }; const CHECKED_IN_FIXTURE_ORIGINS = { - abbreviated_legacy_no_declared_constraints: 'synthetic', + abbreviated_old_no_declared_constraints: 'synthetic', adventureworks_oltp_with_declared_metadata: 'public', adventureworkslt_with_declared_metadata: 'public', analytical_warehouse_no_naming_convention: 'synthetic', @@ -606,7 +606,7 @@ describe('relationship benchmarks', () => { const byId = new Map(fixtures.map((fixture) => [fixture.id, fixture])); const adversarialIds = [ 'non_english_naming_no_declared_constraints', - 'abbreviated_legacy_no_declared_constraints', + 'abbreviated_old_no_declared_constraints', 'analytical_warehouse_no_naming_convention', 'mixed_case_within_schema_no_declared_constraints', 'polymorphic_partial_overlap_no_declared_constraints', diff --git a/packages/context/src/scan/relationship-diagnostics.test.ts b/packages/context/src/scan/relationship-diagnostics.test.ts index 3f3bad1b..7c1dbb76 100644 --- a/packages/context/src/scan/relationship-diagnostics.test.ts +++ b/packages/context/src/scan/relationship-diagnostics.test.ts @@ -141,7 +141,7 @@ describe('relationship diagnostics artifacts', () => { ); }); - it('adapts legacy relationship updates into the richer artifact shape', () => { + it('adapts relationship updates into the artifact shape', () => { const artifacts = buildKtxRelationshipArtifacts({ connectionId: 'warehouse', relationshipUpdate: { diff --git a/packages/context/src/search/backend-conformance.test.ts b/packages/context/src/search/backend-conformance.test.ts index d2d8e3bf..95858486 100644 --- a/packages/context/src/search/backend-conformance.test.ts +++ b/packages/context/src/search/backend-conformance.test.ts @@ -45,7 +45,8 @@ const ORDERS_YAML = [ const FINANCE_ORDERS_YAML = [ 'name: orders', - 'description: Finance orders used for invoice reconciliation.', + 'descriptions:', + ' user: Finance orders used for invoice reconciliation.', 'table: finance.orders', 'grain:', ' - order_id', diff --git a/packages/context/src/sl/description-normalization.ts b/packages/context/src/sl/description-normalization.ts index 5a1b5ab6..ef657fdd 100644 --- a/packages/context/src/sl/description-normalization.ts +++ b/packages/context/src/sl/description-normalization.ts @@ -28,16 +28,11 @@ function hasDescriptions(descriptions: DescriptionMap): boolean { function withDescriptionMap(record: Record, fallback: string | null): Record { const descriptions = cleanDescriptionMap(record.descriptions); - const flatDescription = cleanText(record.description); - if (flatDescription && !descriptions.user) { - descriptions.user = flatDescription; - } if (!hasDescriptions(descriptions) && fallback) { descriptions.ktx = fallback; } const next = { ...record }; - delete next.description; if (hasDescriptions(descriptions)) { next.descriptions = descriptions; } else { diff --git a/packages/context/src/sl/local-sl.test.ts b/packages/context/src/sl/local-sl.test.ts index aa48546b..b7d56e22 100644 --- a/packages/context/src/sl/local-sl.test.ts +++ b/packages/context/src/sl/local-sl.test.ts @@ -29,7 +29,8 @@ const ORDERS_YAML = [ const SUPPORT_YAML = [ 'name: tickets', - 'description: Support tickets grouped by priority.', + 'descriptions:', + ' user: Support tickets grouped by priority.', 'table: public.tickets', 'grain:', ' - ticket_id', @@ -278,7 +279,8 @@ describe('local semantic-layer helpers', () => { sourceName: 'orders', yaml: [ 'name: orders', - 'description: Finance orders used for invoice reconciliation.', + 'descriptions:', + ' user: Finance orders used for invoice reconciliation.', 'table: finance.orders', 'grain:', ' - order_id', diff --git a/packages/context/src/sl/pglite-sl-search-prototype.test.ts b/packages/context/src/sl/pglite-sl-search-prototype.test.ts index 1d0ece25..0c599dca 100644 --- a/packages/context/src/sl/pglite-sl-search-prototype.test.ts +++ b/packages/context/src/sl/pglite-sl-search-prototype.test.ts @@ -10,7 +10,8 @@ import { searchLocalSlSourcesWithPglitePrototype } from './pglite-sl-search-prot const ORDERS_YAML = [ 'name: orders', - 'description: Orders with paid revenue and refund status.', + 'descriptions:', + ' user: Orders with paid revenue and refund status.', 'table: public.orders', 'grain:', ' - order_id', @@ -29,7 +30,8 @@ const ORDERS_YAML = [ const FINANCE_ORDERS_YAML = [ 'name: orders', - 'description: Finance orders used for invoice reconciliation.', + 'descriptions:', + ' user: Finance orders used for invoice reconciliation.', 'table: finance.orders', 'grain:', ' - order_id', @@ -43,7 +45,8 @@ const FINANCE_ORDERS_YAML = [ const CUSTOMERS_YAML = [ 'name: customers', - 'description: Customer lifecycle accounts by region.', + 'descriptions:', + ' user: Customer lifecycle accounts by region.', 'table: public.customers', 'grain:', ' - customer_id', diff --git a/packages/context/src/sl/schemas.ts b/packages/context/src/sl/schemas.ts index a42ecc87..a57359d4 100644 --- a/packages/context/src/sl/schemas.ts +++ b/packages/context/src/sl/schemas.ts @@ -80,14 +80,13 @@ const joinDeclarationSchema = z.object({ const sourceColumnSchema = z.object({ name: unqualifiedNameSchema, - // type/description optional on standalone sources: compose-time enrichment fills them + // type/descriptions optional on standalone sources: compose-time enrichment fills them // from the manifest entry named in `inherits_columns_from`. If the agent does not set // `inherits_columns_from`, or the column is not in the manifest, type must be present // — surfaced by sl_validate. type: z.enum(columnTypeValues).optional(), role: z.enum(columnRoleValues).optional(), visibility: z.enum(columnVisibilityValues).optional(), - description: z.string().optional(), descriptions: descriptionsSchema.optional(), expr: z.string().optional(), constraints: sourceKeyedColumnConstraintsSchema.optional(), @@ -102,7 +101,6 @@ const overlayColumnSchema = z type: z.enum(columnTypeValues).optional(), role: z.enum(columnRoleValues).optional(), visibility: z.enum(columnVisibilityValues).optional(), - description: z.string().optional(), descriptions: descriptionsSchema.optional(), expr: z.string().optional(), }) @@ -114,7 +112,6 @@ const overlayColumnSchema = z export const sourceDefinitionSchema = z .object({ name: z.string().min(1), - description: z.string().optional(), descriptions: descriptionsSchema.optional(), // Accepted for documentation parity with the Python spec; behavior is driven // by the `table` / `sql` fields, not by this discriminator. @@ -150,7 +147,6 @@ export const sourceDefinitionSchema = z export const sourceOverlaySchema = z .object({ name: z.string().min(1), - description: z.string().optional(), descriptions: z.record(z.string(), z.string()).optional(), grain: z.array(unqualifiedNameSchema).optional(), columns: z.array(overlayColumnSchema).optional(), diff --git a/packages/context/src/sl/semantic-layer.service.test.ts b/packages/context/src/sl/semantic-layer.service.test.ts index 308cc5aa..179904d5 100644 --- a/packages/context/src/sl/semantic-layer.service.test.ts +++ b/packages/context/src/sl/semantic-layer.service.test.ts @@ -98,7 +98,7 @@ describe('composeOverlay', () => { ...baseTable, segments: [{ name: 'pre_existing', expr: 'is_paid = true' }], }; - const overlay = { name: 'fct_labs', description: 'no segments here' }; + const overlay = { name: 'fct_labs', descriptions: { user: 'no segments here' } }; const composed = composeOverlay(baseWithSegments, overlay); expect(composed.segments).toEqual([{ name: 'pre_existing', expr: 'is_paid = true' }]); }); @@ -128,7 +128,7 @@ describe('composeOverlay', () => { it('still handles existing known keys without regression', () => { const overlay = { name: 'fct_labs', - description: 'patient lab orders', + descriptions: { user: 'patient lab orders' }, exclude_columns: ['admin_user_id'], columns: [{ name: 'is_byol', type: 'boolean', expr: "lab_type = 'byol'" }], measures: [{ name: 'count_all', expr: 'count(*)' }], @@ -675,19 +675,21 @@ describe('loadAllSources — standalone enrichment via inherits_columns_from', ( expect(aav?.columns).toEqual([{ name: 'FOO', type: 'string' }]); }); - it('normalizes legacy flat source and column descriptions when loading standalone files', async () => { + it('loads standalone source and column description maps', async () => { const standalonePath = 'semantic-layer/conn-1/orders.yaml'; configService.listFiles.mockResolvedValue({ files: [standalonePath] }); configService.readFile.mockResolvedValue({ content: [ 'name: orders', - 'description: Finance orders used for invoice reconciliation.', + 'descriptions:', + ' user: Finance orders used for invoice reconciliation.', 'table: public.orders', 'grain: [id]', 'columns:', ' - name: id', ' type: string', - ' description: Stable order identifier.', + ' descriptions:', + ' user: Stable order identifier.', ].join('\n'), }); diff --git a/packages/context/src/sl/semantic-layer.service.ts b/packages/context/src/sl/semantic-layer.service.ts index 0616851d..7d13d10a 100644 --- a/packages/context/src/sl/semantic-layer.service.ts +++ b/packages/context/src/sl/semantic-layer.service.ts @@ -113,7 +113,7 @@ export class SemanticLayerService { `standalone source '${source.name}' shadows an existing manifest entry and ` + `will drop the manifest's columns and joins. Rewrite as an overlay: remove ` + `"sql:", "table:", "grain:", "columns:", "joins:"; keep only "name:" plus ` + - `"measures:"/"segments:"/"description:"`; + `"measures:"/"segments:"/"descriptions:"`; warnings.push(msg); this.logger.warn(`[writeSource] ${msg}. Saving anyway.`); } @@ -935,16 +935,12 @@ export class SemanticLayerService { string, { descriptions?: Record; - description?: string; - db_description?: string; columns?: Array<{ name: string; type: string; pk?: boolean; nullable?: boolean; descriptions?: Record; - description?: string; - db_description?: string; }>; } >; @@ -952,12 +948,12 @@ export class SemanticLayerService { if (shard?.tables) { for (const [tableName, entry] of Object.entries(shard.tables)) { tables.set(tableName, { - descriptions: migrateDescriptions(entry.descriptions, entry.description, entry.db_description) ?? {}, + descriptions: entry.descriptions ?? {}, }); for (const col of entry.columns ?? []) { columns.set(`${tableName}.${col.name}`, { type: col.type, - descriptions: migrateDescriptions(col.descriptions, col.description, col.db_description) ?? {}, + descriptions: col.descriptions ?? {}, nullable: col.nullable, pk: col.pk, }); @@ -1055,11 +1051,7 @@ interface ManifestColumnEntry { type: string; pk?: boolean; nullable?: boolean; - // New format: descriptions map descriptions?: Record; - // Legacy format: flat fields (read-only backwards compat) - description?: string; - db_description?: string; constraints?: { dbt?: { not_null?: boolean; unique?: boolean } }; enum_values?: { dbt?: string[] }; tests?: { @@ -1077,11 +1069,7 @@ interface ManifestJoinEntry { export interface ManifestTableEntry { table: string; - // New format: descriptions map descriptions?: Record; - // Legacy format: flat fields (read-only backwards compat) - description?: string; - db_description?: string; columns: ManifestColumnEntry[]; joins?: ManifestJoinEntry[]; tags?: { dbt?: string[] }; @@ -1089,31 +1077,12 @@ export interface ManifestTableEntry { usage?: TableUsageOutput; } -/** Migrate legacy flat description/db_description fields to a descriptions map. */ -function migrateDescriptions( - descriptions?: Record, - description?: string, - dbDescription?: string, -): Record | undefined { - if (descriptions && Object.keys(descriptions).length > 0) { - return descriptions; - } - const result: Record = {}; - if (description) { - result.ai = description; - } - if (dbDescription) { - result.db = dbDescription; - } - return Object.keys(result).length > 0 ? result : undefined; -} - export function projectManifestEntry(name: string, entry: ManifestTableEntry): SemanticLayerSource { const columns = entry.columns.map((c) => ({ name: c.name, type: c.type, role: c.type === 'time' ? 'time' : undefined, - descriptions: migrateDescriptions(c.descriptions, c.description, c.db_description), + descriptions: c.descriptions, constraints: c.constraints, enum_values: c.enum_values, tests: c.tests, @@ -1126,7 +1095,7 @@ export function projectManifestEntry(name: string, entry: ManifestTableEntry): S return { name, table: entry.table, - descriptions: migrateDescriptions(entry.descriptions, entry.description, entry.db_description), + descriptions: entry.descriptions, grain, columns, joins: (entry.joins ?? []).map((j) => ({ to: j.to, on: j.on, relationship: j.relationship, source: j.source })), @@ -1359,7 +1328,6 @@ export function findDanglingSegmentRefs(source: Record): string const COMPOSE_KNOWN_KEYS = new Set([ 'name', - 'description', 'descriptions', 'grain', 'columns', diff --git a/packages/context/src/sl/tools/sl-edit-source.tool.ts b/packages/context/src/sl/tools/sl-edit-source.tool.ts index 27b582d5..30972707 100644 --- a/packages/context/src/sl/tools/sl-edit-source.tool.ts +++ b/packages/context/src/sl/tools/sl-edit-source.tool.ts @@ -127,7 +127,7 @@ If no source exists yet, use sl_write_source instead — this tool will reject t ` - name: `, ` expr: ""`, ` description: ""`, - `Overlay shape: "name:" plus any of "measures:", "segments:", "description:". Do NOT include "sql:", "table:", "grain:", "columns:", or "joins:" — those are inherited from the manifest.`, + `Overlay shape: "name:" plus any of "measures:", "segments:", "descriptions:". Do NOT include "sql:", "table:", "grain:", "columns:", or "joins:" — those are inherited from the manifest.`, ].join('\n'), ], sourceName, diff --git a/packages/context/src/sl/tools/sl-warehouse-validation.ts b/packages/context/src/sl/tools/sl-warehouse-validation.ts index a200dad9..e0d48721 100644 --- a/packages/context/src/sl/tools/sl-warehouse-validation.ts +++ b/packages/context/src/sl/tools/sl-warehouse-validation.ts @@ -89,7 +89,7 @@ export async function validateSingleSource( `${sourceName}.yaml: standalone source shadows an existing manifest entry — ` + `writing it as-is drops the manifest's columns and joins. ` + `Remove "sql:", "table:", "grain:", "columns:", and "joins:" and keep only ` + - `"name:" plus "measures:"/"segments:"/"description:" to write an overlay ` + + `"name:" plus "measures:"/"segments:"/"descriptions:" to write an overlay ` + `that inherits the manifest schema. Call sl_read_source to inspect the existing source first.`, ); return { errors, warnings }; diff --git a/packages/context/src/sl/tools/sl-write-source.tool.test.ts b/packages/context/src/sl/tools/sl-write-source.tool.test.ts index 1502c177..d9c58225 100644 --- a/packages/context/src/sl/tools/sl-write-source.tool.test.ts +++ b/packages/context/src/sl/tools/sl-write-source.tool.test.ts @@ -176,7 +176,7 @@ describe('SlWriteSourceTool — session gating', () => { expect((session.semanticLayerService as any).writeSource).toHaveBeenCalled(); }); - it('normalizes flat source and column descriptions before writing', async () => { + it('writes source and column description maps', async () => { const { tool, semanticLayerService } = makeTool(); const result = await tool.call( { @@ -184,10 +184,10 @@ describe('SlWriteSourceTool — session gating', () => { sourceName: 'orders', source: { name: 'orders', - description: 'Finance orders used for invoice reconciliation.', + descriptions: { user: 'Finance orders used for invoice reconciliation.' }, table: 'public.orders', grain: ['id'], - columns: [{ name: 'id', type: 'string', description: 'Stable order identifier.' }], + columns: [{ name: 'id', type: 'string', descriptions: { user: 'Stable order identifier.' } }], measures: [], joins: [], } as any, diff --git a/packages/context/src/sl/tools/sl-write-source.tool.ts b/packages/context/src/sl/tools/sl-write-source.tool.ts index 34b6f8c4..e7efb357 100644 --- a/packages/context/src/sl/tools/sl-write-source.tool.ts +++ b/packages/context/src/sl/tools/sl-write-source.tool.ts @@ -318,7 +318,7 @@ Do NOT join back to a table that the SQL already aggregates from if the grain co ` Writing standalone would drop the manifest's columns and joins, leaving only what you list here.`, `To add measures/segments on top of the manifest, rewrite this YAML as an overlay:`, ` - Remove "sql:", "table:", "grain:", "columns:", and "joins:".`, - ` - Keep only "name:", plus "measures:", "segments:", and/or "description:".`, + ` - Keep only "name:", plus "measures:", "segments:", and/or "descriptions:".`, ` - The manifest's schema is inherited automatically.`, `If you really need a different base table, use a different source name.`, ].join('\n'); diff --git a/packages/context/src/tools/tool-session.ts b/packages/context/src/tools/tool-session.ts index 023a8c8e..05da85d9 100644 --- a/packages/context/src/tools/tool-session.ts +++ b/packages/context/src/tools/tool-session.ts @@ -23,7 +23,7 @@ interface EvictionDecisionRecord { rawPath: string; artifactKind: 'wiki' | 'sl'; artifactKey: string; - action: 'removed' | 'retained_deprecated' | 'retained_supported'; + action: 'removed'; reason: string; } diff --git a/packages/context/src/wiki/local-knowledge.test.ts b/packages/context/src/wiki/local-knowledge.test.ts index 5ad66eb1..54bd3771 100644 --- a/packages/context/src/wiki/local-knowledge.test.ts +++ b/packages/context/src/wiki/local-knowledge.test.ts @@ -245,29 +245,4 @@ describe('local knowledge helpers', () => { ).rejects.toThrow('Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".'); }); - it('ignores nested historic-SQL legacy paths when listing local knowledge pages', async () => { - await writeLocalKnowledgePage(project, { - key: 'historic-sql-paid-orders', - scope: 'GLOBAL', - summary: 'Flat historic SQL page', - content: 'Flat page body.', - tags: ['historic-sql'], - }); - await project.fileStore.writeFile( - 'knowledge/global/historic-sql/paid-orders.md', - '---\nsummary: Nested historic SQL page\nusage_mode: auto\n---\n\nNested body\n', - 'Test', - 'test@example.com', - 'Write nested legacy page', - ); - - await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([ - { - key: 'historic-sql-paid-orders', - path: 'knowledge/global/historic-sql-paid-orders.md', - scope: 'GLOBAL', - summary: 'Flat historic SQL page', - }, - ]); - }); }); diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/data.sqlite b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/data.sqlite similarity index 100% rename from packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/data.sqlite rename to packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/data.sqlite diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/expected-links.yaml b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/expected-links.yaml similarity index 100% rename from packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/expected-links.yaml rename to packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/expected-links.yaml diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/fixture.yaml b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/fixture.yaml similarity index 50% rename from packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/fixture.yaml rename to packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/fixture.yaml index 275a1008..6a9b3810 100644 --- a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/fixture.yaml +++ b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/fixture.yaml @@ -1,5 +1,5 @@ -id: abbreviated_legacy_no_declared_constraints -name: Abbreviated legacy naming fixture with no declared constraints +id: abbreviated_old_no_declared_constraints +name: Abbreviated old naming fixture with no declared constraints tier: row_bearing origin: synthetic thresholdEligible: false diff --git a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/snapshot.json b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/snapshot.json similarity index 98% rename from packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/snapshot.json rename to packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/snapshot.json index ac3840e2..b4cb7a92 100644 --- a/packages/context/test/fixtures/relationship-benchmarks/abbreviated_legacy_no_declared_constraints/snapshot.json +++ b/packages/context/test/fixtures/relationship-benchmarks/abbreviated_old_no_declared_constraints/snapshot.json @@ -1,5 +1,5 @@ { - "connectionId": "abbreviated_legacy_no_declared_constraints", + "connectionId": "abbreviated_old_no_declared_constraints", "driver": "sqlite", "extractedAt": "2026-05-07T00:00:00.000Z", "scope": {}, diff --git a/python/ktx-sl/semantic_layer/loader.py b/python/ktx-sl/semantic_layer/loader.py index c6956c19..02b5b2c4 100644 --- a/python/ktx-sl/semantic_layer/loader.py +++ b/python/ktx-sl/semantic_layer/loader.py @@ -59,9 +59,7 @@ class SourceLoader: f"Duplicate source name '{name}' in manifest shard {path}" ) sources[name] = project_manifest_entry(name, entry) - description_sources[name] = _description_sources( - entry.descriptions, entry.description, entry.db_description - ) + description_sources[name] = _description_sources(entry.descriptions) # 2. Load files outside _schema/ for path in sorted(self.sources_dir.rglob("*.yaml")): @@ -138,11 +136,6 @@ class SourceLoader: source = deepcopy(base) description_sources = dict(base_description_sources or {}) - # Overlay description semantics match the server: `description` writes the - # `user` source key, and `descriptions` merges keyed sources before a single - # visible description is resolved from the full map. - if overlay.get("description"): - description_sources["user"] = overlay["description"] if overlay.get("descriptions"): description_sources.update( { @@ -151,7 +144,7 @@ class SourceLoader: if text } ) - if overlay.get("description") or overlay.get("descriptions"): + if overlay.get("descriptions"): source.description = _resolve_description( description_sources or None, ) diff --git a/python/ktx-sl/semantic_layer/manifest.py b/python/ktx-sl/semantic_layer/manifest.py index 3023cf58..432019e8 100644 --- a/python/ktx-sl/semantic_layer/manifest.py +++ b/python/ktx-sl/semantic_layer/manifest.py @@ -76,31 +76,17 @@ def map_column_type(db_type: str) -> str: _DEFAULT_PRIORITY = ["user", "ai", "dbt", "db"] -def _description_sources( - descriptions: dict[str, str] | None, - description: str | None = None, - db_description: str | None = None, -) -> dict[str, str] | None: +def _description_sources(descriptions: dict[str, str] | None) -> dict[str, str] | None: """Normalize multi-source descriptions to a keyed map.""" if descriptions: result = {source: text for source, text in descriptions.items() if text} if result: return result - - result: dict[str, str] = {} - if description: - result["ai"] = description - if db_description: - result["db"] = db_description - return result or None + return None -def _resolve_description( - descriptions: dict[str, str] | None, - description: str | None = None, - db_description: str | None = None, -) -> str | None: - """Resolve a single description from a multi-source map or legacy flat fields.""" +def _resolve_description(descriptions: dict[str, str] | None) -> str | None: + """Resolve a single description from a multi-source map.""" if descriptions: for source in _DEFAULT_PRIORITY: if text := descriptions.get(source): @@ -109,11 +95,6 @@ def _resolve_description( for text in descriptions.values(): if text: return text - # Legacy flat fields - if description: - return description - if db_description: - return db_description return None @@ -123,18 +104,13 @@ class ManifestColumn(BaseModel): pk: bool = False nullable: bool = True descriptions: dict[str, str] | None = None - # Legacy flat fields (backwards-compatible YAML parsing) - description: str | None = None - db_description: str | None = None constraints: dict | None = None enum_values: dict[str, list[str]] | None = None tests: SourceColumnTests | None = None @property def resolved_description(self) -> str | None: - return _resolve_description( - self.descriptions, self.description, self.db_description - ) + return _resolve_description(self.descriptions) class ManifestJoin(BaseModel): @@ -147,9 +123,6 @@ class ManifestJoin(BaseModel): class ManifestEntry(BaseModel): table: str descriptions: dict[str, str] | None = None - # Legacy flat fields (backwards-compatible YAML parsing) - description: str | None = None - db_description: str | None = None columns: list[ManifestColumn] joins: list[ManifestJoin] = [] default_time_dimension: DefaultTimeDimensionDbt | None = None @@ -158,9 +131,7 @@ class ManifestEntry(BaseModel): @property def resolved_description(self) -> str | None: - return _resolve_description( - self.descriptions, self.description, self.db_description - ) + return _resolve_description(self.descriptions) class Manifest(BaseModel): @@ -178,6 +149,8 @@ def validate_overlay(data: dict) -> list[str]: Returns a list of error messages (empty if valid). """ errors: list[str] = [] + if "description" in data: + errors.append("Overlay must use 'descriptions' for source descriptions") if "table" in data: errors.append("Overlay must not contain 'table' (owned by manifest)") if "sql" in data: @@ -185,6 +158,10 @@ def validate_overlay(data: dict) -> list[str]: "Overlay must not contain 'sql' (that makes it a standalone source)" ) for col in data.get("columns", []): + if "description" in col: + errors.append( + f"Overlay column '{col.get('name', '?')}' must use 'descriptions'" + ) if "type" in col and "expr" not in col: errors.append( f"Overlay column '{col.get('name', '?')}' specifies 'type' without 'expr' " diff --git a/python/ktx-sl/sources/b2b_saas/churn_risk.yaml b/python/ktx-sl/sources/b2b_saas/churn_risk.yaml index 2fae793e..602d263a 100644 --- a/python/ktx-sl/sources/b2b_saas/churn_risk.yaml +++ b/python/ktx-sl/sources/b2b_saas/churn_risk.yaml @@ -1,10 +1,11 @@ name: churn_risk -description: | - Per-account churn risk scoring for B2B SaaS customers. Combines signals from - subscriptions (cancellation history), support tickets (severity, SLA breaches), - product usage (adoption decline), contracts (renewal proximity), CSM activities - (engagement recency), and invoices (payment issues) into a weighted composite - risk_score (0-1) and risk_tier (High/Medium/Low). One row per customer account. +descriptions: + user: | + Per-account churn risk scoring for B2B SaaS customers. Combines signals from + subscriptions (cancellation history), support tickets (severity, SLA breaches), + product usage (adoption decline), contracts (renewal proximity), CSM activities + (engagement recency), and invoices (payment issues) into a weighted composite + risk_score (0-1) and risk_tier (High/Medium/Low). One row per customer account. sql: | WITH sub_signals AS ( SELECT diff --git a/python/ktx-sl/sources/ecommerce/churn_risk.yaml b/python/ktx-sl/sources/ecommerce/churn_risk.yaml index 32e919ed..7a009a59 100644 --- a/python/ktx-sl/sources/ecommerce/churn_risk.yaml +++ b/python/ktx-sl/sources/ecommerce/churn_risk.yaml @@ -1,7 +1,8 @@ name: churn_risk -description: | - Customer churn risk score combining tenure, - usage trends, and support burden. +descriptions: + user: | + Customer churn risk score combining tenure, + usage trends, and support burden. sql: | SELECT c.id AS customer_id, diff --git a/python/ktx-sl/tests/test_manifest.py b/python/ktx-sl/tests/test_manifest.py index e025c3da..1007fc89 100644 --- a/python/ktx-sl/tests/test_manifest.py +++ b/python/ktx-sl/tests/test_manifest.py @@ -95,7 +95,7 @@ class TestProjectManifestEntry: def orders_entry(self) -> ManifestEntry: return ManifestEntry( table="public.orders", - description="Customer orders", + descriptions={"user": "Customer orders"}, columns=[ ManifestColumn(name="id", type="integer", pk=True), ManifestColumn(name="customer_id", type="integer"), @@ -202,7 +202,7 @@ class TestValidateOverlay: def test_validate_overlay_valid(self): data = { "name": "orders", - "description": "Revenue-bearing orders", + "descriptions": {"user": "Revenue-bearing orders"}, "grain": ["id"], "measures": [{"name": "revenue", "expr": "sum(total)"}], "columns": [ @@ -259,7 +259,7 @@ def _manifest_tables() -> dict: "tables": { "orders": { "table": "public.orders", - "description": "Customer orders", + "descriptions": {"user": "Customer orders"}, "columns": [ {"name": "id", "type": "integer", "pk": True}, {"name": "customer_id", "type": "integer"}, @@ -278,7 +278,7 @@ def _manifest_tables() -> dict: }, "customers": { "table": "public.customers", - "description": "Customer accounts", + "descriptions": {"user": "Customer accounts"}, "columns": [ {"name": "id", "type": "integer", "pk": True}, {"name": "name", "type": "varchar"}, @@ -329,12 +329,12 @@ class TestTwoTierLoading: assert sources["regions"].table == "public.regions" assert sources["regions"].is_table_source - def test_overlay_descriptions_do_not_promote_base_description_to_user_source( + def test_overlay_descriptions_do_not_promote_base_map_to_user_source( self, tmp_path: Path ): standalone = { "name": "regions", - "description": "Standalone description", + "descriptions": {"ai": "Standalone description"}, "table": "public.regions", "grain": ["id"], "columns": [ @@ -376,7 +376,7 @@ class TestTwoTierLoading: overlay = { "name": "orders", - "description": "Revenue-bearing orders", + "descriptions": {"user": "Revenue-bearing orders"}, "grain": ["id"], "measures": [{"name": "revenue", "expr": "sum(total)"}], } @@ -394,11 +394,11 @@ class TestTwoTierLoading: assert len(orders.measures) == 1 assert orders.measures[0].name == "revenue" - def test_overlay_description_override(self, tmp_path: Path): + def test_overlay_description_map_override(self, tmp_path: Path): schema_dir = tmp_path / "_schema" _write_yaml(schema_dir / "public.yaml", _manifest_tables()) - overlay = {"name": "orders", "description": "Overridden description"} + overlay = {"name": "orders", "descriptions": {"user": "Overridden description"}} _write_yaml(tmp_path / "orders.yaml", overlay) _write_yaml(tmp_path / "customers.yaml", {"name": "customers"}) @@ -426,7 +426,7 @@ class TestTwoTierLoading: sources = loader.load_all() assert sources["orders"].description == "Customer orders" - def test_overlay_descriptions_map_overrides_lower_priority_db_description( + def test_overlay_descriptions_map_overrides_lower_priority_db_source( self, tmp_path: Path ): schema_dir = tmp_path / "_schema" diff --git a/scripts/build-evidence-fusion-adversarial-fixtures.mjs b/scripts/build-evidence-fusion-adversarial-fixtures.mjs index 6994d5c2..282a6477 100644 --- a/scripts/build-evidence-fusion-adversarial-fixtures.mjs +++ b/scripts/build-evidence-fusion-adversarial-fixtures.mjs @@ -129,10 +129,10 @@ function nonEnglishFixture() { }; } -function abbreviatedLegacyFixture() { +function abbreviatedOldNamingFixture() { return { - id: 'abbreviated_legacy_no_declared_constraints', - name: 'Abbreviated legacy naming fixture with no declared constraints', + id: 'abbreviated_old_no_declared_constraints', + name: 'Abbreviated old naming fixture with no declared constraints', tier: 'row_bearing', sql: [ 'CREATE TABLE cust (cust_id TEXT NOT NULL, nm TEXT NOT NULL, stat_cd TEXT NOT NULL);', @@ -480,7 +480,7 @@ function scaleFixture() { const fixtures = [ nonEnglishFixture(), - abbreviatedLegacyFixture(), + abbreviatedOldNamingFixture(), analyticalWarehouseFixture(), mixedCaseFixture(), polymorphicFixture(), diff --git a/scripts/check-boundaries.mjs b/scripts/check-boundaries.mjs index 53455abd..9f2953e7 100644 --- a/scripts/check-boundaries.mjs +++ b/scripts/check-boundaries.mjs @@ -46,15 +46,15 @@ const llmBoundaryPatterns = [ pattern: /\bembedMany\b/, }, { - label: 'legacy context LLM provider port', + label: 'context-owned LLM provider port', pattern: /\bLlmProviderPort\b/, }, { - label: 'legacy scan LLM provider port', + label: 'scan-owned LLM provider port', pattern: /\bKtxScanLlmPort\b/, }, { - label: 'legacy gateway LLM provider helper', + label: 'context-owned gateway LLM provider helper', pattern: /\bcreateGatewayLlmProvider\b/, }, ]; diff --git a/scripts/check-boundaries.test.mjs b/scripts/check-boundaries.test.mjs index db8afafe..9d5bf6f9 100644 --- a/scripts/check-boundaries.test.mjs +++ b/scripts/check-boundaries.test.mjs @@ -92,7 +92,7 @@ describe('scanFileContent', () => { ); }); - it('rejects context-owned LLM provider construction after @ktx/llm migration', () => { + it('rejects context-owned LLM provider construction outside @ktx/llm', () => { const violations = [ ...scanFileContent( 'packages/context/src/agent/local-llm-provider.ts', diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 62a25cf7..79e26f74 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -128,7 +128,6 @@ describe('standalone example docs', () => { .join('|'), ), ); - assert.doesNotMatch(readme, /--historic-sql-min-calls/); }); it('lists every workspace package in the contributor docs', async () => { From d7147f9ca1349138c92b4286930a083c7561d777 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 16:05:58 +0200 Subject: [PATCH 13/15] feat: rename project wiki directory (#66) * feat: rename project wiki directory * test: fix wiki skill ordering expectations * Show configured context sources in setup --- README.md | 6 +- .../content/docs/cli-reference/ktx-wiki.mdx | 65 ++++++- .../content/docs/concepts/context-as-code.mdx | 16 +- .../docs/concepts/the-context-layer.mdx | 14 +- .../docs/getting-started/introduction.mdx | 2 +- .../docs/getting-started/quickstart.mdx | 8 +- .../content/docs/guides/building-context.mdx | 8 +- .../content/docs/guides/serving-agents.mdx | 6 +- .../content/docs/guides/writing-context.mdx | 27 +-- .../docs/integrations/agent-clients.mdx | 4 +- .../docs/integrations/context-sources.mdx | 14 +- docs-site/lib/llm-docs.ts | 8 +- examples/local-warehouse/ktx.yaml | 2 +- .../{knowledge => wiki}/global/revenue.md | 0 .../assets/demo/orbit/links/provenance.json | 32 ++-- packages/cli/assets/demo/orbit/manifest.json | 2 +- .../demo/orbit/replay.memory-flow.v1.json | 82 ++++----- .../orbit/{knowledge => wiki}/global/.gitkeep | 0 .../global/customer-communication-policy.md | 0 .../global/new-hire-onboarding-policy.md | 0 .../global/orbit-activation-kpi-glossary.md | 0 ...orbit-activation-policy-change-jan-2026.md | 0 .../orbit-arr-contract-first-definition.md | 0 .../global/orbit-company-overview.md | 0 .../orbit-customer-health-risk-definition.md | 0 .../orbit-customer-stakeholder-needs.md | 0 .../global/orbit-customers-source.md | 0 .../global/orbit-dbt-exposures.md | 0 .../global/orbit-dbt-project-overview.md | 0 .../global/orbit-how-we-work.md | 0 .../global/orbit-known-product-gaps.md | 0 .../global/orbit-mart-account-activity.md | 0 .../global/orbit-mart-account-segments.md | 0 .../global/orbit-mart-arr-daily.md | 0 .../global/orbit-mart-nrr-quarterly.md | 0 .../global/orbit-mart-procurement-activity.md | 0 .../orbit-mart-retention-movement-breakout.md | 0 .../global/orbit-mart-revenue-daily.md | 0 .../orbit-metabase-sql-library-patterns.md | 0 ...orbit-nrr-discount-expiration-treatment.md | 0 .../orbit-plan-segment-normalization.md | 0 .../orbit-procurement-qualifying-actions.md | 0 .../global/orbit-product-design-principles.md | 0 .../global/orbit-product-review-checklist.md | 0 ...bit-revenue-gross-to-net-reconciliation.md | 0 .../global/sales-ops-cs-handoff-process.md | 0 packages/cli/scripts/build-demo-assets.mjs | 88 +++++----- packages/cli/src/command-schemas.ts | 13 ++ .../cli/src/commands/knowledge-commands.ts | 49 +++++- packages/cli/src/demo-assets.test.ts | 6 +- packages/cli/src/demo-assets.ts | 6 +- packages/cli/src/index.test.ts | 68 +++++++- packages/cli/src/ingest.test-utils.ts | 6 +- packages/cli/src/knowledge.test.ts | 162 ++++++++++++------ packages/cli/src/knowledge.ts | 59 ++++++- packages/cli/src/memory-flow-hud.tsx | 4 +- .../cli/src/memory-flow-interactive.test.ts | 4 +- packages/cli/src/memory-flow-tui.test.tsx | 12 +- packages/cli/src/setup-context.test.ts | 8 +- packages/cli/src/setup-context.ts | 2 +- packages/cli/src/setup-demo-tour.ts | 4 +- packages/cli/src/setup-sources.test.ts | 26 +++ packages/cli/src/setup-sources.ts | 27 ++- .../context/prompts/memory_agent_backfill.md | 2 +- .../memory_agent_bundle_ingest_reconcile.md | 2 +- .../memory_agent_bundle_ingest_work_unit.md | 2 +- .../prompts/memory_agent_external_ingest.md | 2 +- .../context/prompts/memory_agent_research.md | 2 +- .../context/skills/metricflow_ingest/SKILL.md | 4 +- .../SKILL.md | 6 +- .../src/ingest/action-identity.test.ts | 4 +- .../src/ingest/adapters/dbt/dbt.adapter.ts | 2 +- .../local-ingest-acceptance.test.ts | 2 +- .../adapters/historic-sql/post-processor.ts | 2 +- .../adapters/historic-sql/projection.test.ts | 20 +-- .../adapters/historic-sql/projection.ts | 2 +- .../src/ingest/ingest-bundle.runner.test.ts | 16 +- .../src/ingest/ingest-bundle.runner.ts | 6 +- .../src/ingest/ingest-runtime-assets.test.ts | 4 +- .../src/ingest/local-bundle-runtime.ts | 14 +- .../ingest/memory-flow/acceptance-fixtures.ts | 16 +- .../src/ingest/memory-flow/events.test.ts | 10 +- .../ingest/memory-flow/interaction.test.ts | 14 +- .../memory-flow/interactive-render.test.ts | 8 +- .../src/ingest/memory-flow/render.test.ts | 4 +- .../src/ingest/memory-flow/schema.test.ts | 8 +- .../src/ingest/memory-flow/view-model.test.ts | 20 +-- .../src/ingest/report-snapshot.test.ts | 8 +- .../ingest/sqlite-bundle-ingest-store.test.ts | 8 +- .../context/src/ingest/wiki-sl-ref-repair.ts | 2 +- packages/context/src/mcp/context-tools.ts | 20 +-- .../src/mcp/local-project-ports.test.ts | 4 +- packages/context/src/mcp/server.test.ts | 24 +-- .../context/src/memory/capture-signals.ts | 4 +- .../context/src/memory/local-memory.test.ts | 10 +- packages/context/src/memory/local-memory.ts | 6 +- .../src/memory/memory-agent.service.ts | 6 +- .../context/src/memory/memory-runs.test.ts | 4 +- .../src/memory/memory-runtime-assets.test.ts | 6 +- packages/context/src/project/config.test.ts | 2 +- packages/context/src/project/config.ts | 2 +- .../src/project/local-git-file-store.test.ts | 22 +-- packages/context/src/project/project.test.ts | 6 +- packages/context/src/project/project.ts | 2 +- .../skills/skills-registry.service.test.ts | 28 +-- .../src/skills/skills-registry.service.ts | 2 +- .../src/wiki/knowledge-wiki.service.test.ts | 22 +-- .../src/wiki/knowledge-wiki.service.ts | 28 +-- .../context/src/wiki/local-knowledge.test.ts | 35 +++- packages/context/src/wiki/local-knowledge.ts | 10 +- .../src/wiki/sqlite-knowledge-index.test.ts | 30 ++-- .../src/wiki/tools/wiki-remove.tool.ts | 2 +- .../src/wiki/tools/wiki-search.tool.test.ts | 4 +- .../context/src/wiki/tools/wiki-write.tool.ts | 2 +- scripts/package-artifacts.mjs | 8 +- scripts/package-artifacts.test.mjs | 4 +- 116 files changed, 839 insertions(+), 484 deletions(-) rename examples/local-warehouse/{knowledge => wiki}/global/revenue.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/.gitkeep (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/customer-communication-policy.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/new-hire-onboarding-policy.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-activation-kpi-glossary.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-activation-policy-change-jan-2026.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-arr-contract-first-definition.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-company-overview.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-customer-health-risk-definition.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-customer-stakeholder-needs.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-customers-source.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-dbt-exposures.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-dbt-project-overview.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-how-we-work.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-known-product-gaps.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-mart-account-activity.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-mart-account-segments.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-mart-arr-daily.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-mart-nrr-quarterly.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-mart-procurement-activity.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-mart-retention-movement-breakout.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-mart-revenue-daily.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-metabase-sql-library-patterns.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-nrr-discount-expiration-treatment.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-plan-segment-normalization.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-procurement-qualifying-actions.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-product-design-principles.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-product-review-checklist.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/orbit-revenue-gross-to-net-reconciliation.md (100%) rename packages/cli/assets/demo/orbit/{knowledge => wiki}/global/sales-ops-cs-handoff-process.md (100%) rename packages/context/skills/{knowledge_capture => wiki_capture}/SKILL.md (97%) diff --git a/README.md b/README.md index b52a31f6..563525e5 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ reviewable project files that agents can use while planning, querying, and updating analytics work. A KTX project is a directory of plain files — YAML semantic sources, Markdown -knowledge pages, and SQLite state — that you commit to git and review in PRs, +wiki pages, and SQLite state — that you commit to git and review in PRs, just like dbt models. ## Who KTX is for @@ -105,7 +105,7 @@ my-project/ │ ├── orders.yaml # Semantic source definitions │ ├── customers.yaml │ └── order_items.yaml -├── knowledge/ +├── wiki/ │ ├── global/ │ │ ├── revenue.md # Business definitions and rules │ │ └── segment-classification.md @@ -118,7 +118,7 @@ my-project/ └── db.sqlite # Local state (git-ignored) ``` -Semantic sources and knowledge pages are committed to git. The `.ktx/` directory +Semantic sources and wiki pages are committed to git. The `.ktx/` directory holds ephemeral state and is git-ignored — delete it and KTX rebuilds on the next run. diff --git a/docs-site/content/docs/cli-reference/ktx-wiki.mdx b/docs-site/content/docs/cli-reference/ktx-wiki.mdx index 8e27b5ff..1d57a93f 100644 --- a/docs-site/content/docs/cli-reference/ktx-wiki.mdx +++ b/docs-site/content/docs/cli-reference/ktx-wiki.mdx @@ -1,9 +1,9 @@ --- title: "ktx wiki" -description: "List or search knowledge pages." +description: "List, read, search, or write wiki pages." --- -Manage knowledge pages in your KTX project. Knowledge pages are Markdown documents that capture business definitions, rules, and gotchas. Agents search them for context when answering questions about your data. +Manage wiki pages in your KTX project. Wiki pages are Markdown documents that capture business definitions, rules, and gotchas. Agents search them for context when answering questions about your data. ## Command signature @@ -16,7 +16,9 @@ ktx wiki [options] | Subcommand | Description | |-----------|-------------| | `list` | List local wiki pages | +| `read ` | Read one local wiki page | | `search ` | Search local wiki pages | +| `write ` | Write one local wiki page | ## Options @@ -27,6 +29,13 @@ ktx wiki [options] | `--json` | Print JSON output | `false` | | `--user-id ` | Local user id | `local` | +### `wiki read` + +| Flag | Description | Default | +|------|-------------|---------| +| `--json` | Print JSON output | `false` | +| `--user-id ` | Local user id | `local` | + ### `wiki search` | Flag | Description | Default | @@ -35,6 +44,18 @@ ktx wiki [options] | `--user-id ` | Local user id | `local` | | `--limit ` | Maximum search results | — | +### `wiki write` + +| Flag | Description | Default | +|------|-------------|---------| +| `--user-id ` | Local user id | `local` | +| `--scope ` | Scope: `global` or `user` | `global` | +| `--summary ` | Wiki page summary (required) | — | +| `--content ` | Wiki page content (required) | — | +| `--tag ` | Wiki tag; repeatable | — | +| `--ref ` | Wiki ref; repeatable | — | +| `--sl-ref ` | Semantic-layer ref; repeatable | — | + ## Examples ```bash @@ -44,16 +65,48 @@ ktx wiki list # List all wiki pages as JSON ktx wiki list --json +# Read a specific wiki page +ktx wiki read revenue-definitions + +# Read a specific wiki page as JSON +ktx wiki read revenue-definitions --json + # Search wiki pages ktx wiki search "monthly recurring revenue" # Search wiki pages as JSON ktx wiki search "monthly recurring revenue" --json --limit 10 + +# Write a global wiki page +ktx wiki write revenue-definitions \ + --summary "Canonical revenue metric definitions" \ + --content "## MRR\nMonthly Recurring Revenue is calculated as..." + +# Write a user-scoped wiki page +ktx wiki write my-notes \ + --scope user \ + --summary "Personal analysis notes" \ + --content "Things to check when revenue numbers look off..." + +# Write a page with tags and references +ktx wiki write churn-rules \ + --summary "Churn calculation business rules" \ + --content "A customer is considered churned when..." \ + --tag finance \ + --tag retention \ + --sl-ref customers \ + --sl-ref subscriptions + +# Write a page with external references +ktx wiki write data-freshness \ + --summary "Data pipeline SLAs and freshness guarantees" \ + --content "The orders table refreshes every 15 minutes..." \ + --ref "https://wiki.example.com/data-pipelines" ``` ## Output -Wiki commands print local knowledge pages and search results. +Wiki commands print local wiki pages and search results. Agents should search first, then read the most relevant page by key. ```json { @@ -74,5 +127,7 @@ Wiki commands print local knowledge pages and search results. | Error | Cause | Recovery | |-------|-------|----------| -| Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms or run ingest to capture more context | -| A page is missing | The page has not been created by ingest or memory capture yet | Run ingest, then search again with `ktx wiki search` | +| Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms, then create a page if the knowledge is missing | +| Read fails for a key | The page key is wrong or scoped to a different user | Run `ktx wiki list` or search again to get the exact key | +| Write fails due to missing fields | `--summary` or `--content` was omitted | Pass both fields, and keep the summary short enough for search results | +| Agent writes duplicate pages | It did not search existing pages first | Always run `ktx wiki search` before `ktx wiki write` | diff --git a/docs-site/content/docs/concepts/context-as-code.mdx b/docs-site/content/docs/concepts/context-as-code.mdx index 3c43082e..51141b85 100644 --- a/docs-site/content/docs/concepts/context-as-code.mdx +++ b/docs-site/content/docs/concepts/context-as-code.mdx @@ -7,9 +7,9 @@ description: Treat analytics context like code — version it, review it, merge dbt proved that analytics transformations belong in version control. Before dbt, SQL lived in BI tools, scheduling systems, and spreadsheets — scattered, unreviewed, impossible to audit. "Analytics as code" changed that: put your models in git, review them in PRs, deploy them by merging. -KTX applies the same principle to analytics context. Metric definitions, business rules, join relationships, knowledge pages — these are artifacts that determine whether an agent produces correct results. They change over time. They need review. They need history. They need to be treated like code. +KTX applies the same principle to analytics context. Metric definitions, business rules, join relationships, wiki pages — these are artifacts that determine whether an agent produces correct results. They change over time. They need review. They need history. They need to be treated like code. -A KTX project is a git repository. Semantic sources are YAML files. Knowledge pages are Markdown files. Changes are commits. Updates are pull requests. Deployment is a merge. The entire lifecycle of your analytics context follows the same workflow your team already uses for dbt models, application code, and infrastructure. +A KTX project is a git repository. Semantic sources are YAML files. Wiki pages are Markdown files. Changes are commits. Updates are pull requests. Deployment is a merge. The entire lifecycle of your analytics context follows the same workflow your team already uses for dbt models, application code, and infrastructure. ## Auto-ingestion @@ -19,9 +19,9 @@ An ingestion run works like this: 1. **Adapters extract metadata.** Each configured source — dbt, LookML, Metabase, MetricFlow, Notion, or your live database — provides structured metadata about models, metrics, dimensions, questions, and documentation. -2. **The LLM agent reconciles.** KTX doesn't blindly overwrite existing context. An LLM agent compares incoming metadata against your current semantic sources and knowledge pages. It decides what to create, what to update, and what to leave alone. If your dbt project added a new model, the agent writes a new semantic source. If a Metabase question references a metric you've already defined, the agent skips the duplicate. +2. **The LLM agent reconciles.** KTX doesn't blindly overwrite existing context. An LLM agent compares incoming metadata against your current semantic sources and wiki pages. It decides what to create, what to update, and what to leave alone. If your dbt project added a new model, the agent writes a new semantic source. If a Metabase question references a metric you've already defined, the agent skips the duplicate. -3. **Files are written.** New and updated YAML sources and Markdown knowledge pages are written to the project directory. Every decision is recorded in the session transcript. +3. **Files are written.** New and updated YAML sources and Markdown wiki pages are written to the project directory. Every decision is recorded in the session transcript. This reconciliation step is what separates auto-ingestion from a simple sync. A naive import would overwrite your hand-tuned metric definitions every time dbt's manifest changes. KTX's agent-driven approach merges intelligently: it respects your edits, fills gaps, and flags conflicts for human review. @@ -43,7 +43,7 @@ dbt / Looker / Metabase / Notion | | + 3 new sources | ~ 2 updated joins - | + 1 knowledge page + | + 1 wiki page v open PR | @@ -57,7 +57,7 @@ dbt / Looker / Metabase / Notion agents see updated context ``` -A typical branch shows a semantic diff: "this ingest added 3 new sources from dbt, updated 2 join definitions based on schema changes, and created 1 knowledge page from a Notion doc." Analytics engineers review the diff, verify that the new sources look correct, and merge. +A typical branch shows a semantic diff: "this ingest added 3 new sources from dbt, updated 2 join definitions based on schema changes, and created 1 wiki page from a Notion doc." Analytics engineers review the diff, verify that the new sources look correct, and merge. Teams usually run this on demand while setting up a source, then schedule it once the source is stable. A cron job or CI schedule can run `ktx ingest run --connection-id --adapter --no-input` overnight on an ingest branch so the latest dbt manifests, BI metadata, and documentation updates are ready for review each morning. @@ -69,9 +69,9 @@ This workflow gives you the same review guarantees you have for dbt models. No s Context improves over time through two feedback channels. -**Analyst corrections.** When an analytics engineer spots something wrong — a measure formula that doesn't match the business definition, a join that should be `many_to_one` instead of `one_to_many`, a knowledge page that's out of date — they edit the YAML or Markdown directly and commit. These corrections become part of the project's git history, and the next ingestion run respects them. If you manually fix a measure definition, KTX won't overwrite it on the next ingest. +**Analyst corrections.** When an analytics engineer spots something wrong — a measure formula that doesn't match the business definition, a join that should be `many_to_one` instead of `one_to_many`, a wiki page that's out of date — they edit the YAML or Markdown directly and commit. These corrections become part of the project's git history, and the next ingestion run respects them. If you manually fix a measure definition, KTX won't overwrite it on the next ingest. -**Agent feedback.** When an agent queries the semantic layer and gets unexpected results — a query that returns no rows because of a bad filter, a join path that produces duplicated results — it can flag the issue. These signals feed back into the context: knowledge pages can note known data quality issues, and source definitions can be tightened with better filters, join paths, or grain declarations. +**Agent feedback.** When an agent queries the semantic layer and gets unexpected results — a query that returns no rows because of a bad filter, a join path that produces duplicated results — it can flag the issue. These signals feed back into the context: wiki pages can note known data quality issues, and source definitions can be tightened with better filters, join paths, or grain declarations. Each of these channels makes the next ingestion cycle better. Analyst corrections teach the system what your team considers authoritative. Agent feedback surfaces gaps in coverage. Context is not a static artifact — it's a living system that converges toward accuracy with every iteration. diff --git a/docs-site/content/docs/concepts/the-context-layer.mdx b/docs-site/content/docs/concepts/the-context-layer.mdx index 70480f48..d9021a8e 100644 --- a/docs-site/content/docs/concepts/the-context-layer.mdx +++ b/docs-site/content/docs/concepts/the-context-layer.mdx @@ -30,7 +30,7 @@ A context layer is the infrastructure that gives agents the business knowledge t KTX organizes context into four pillars: - Semantic sources -- Knowledge pages +- Wiki pages - Scan artifacts - Provenance @@ -67,7 +67,7 @@ measures: expr: count(id) ``` -**Knowledge pages** are Markdown documents that capture business definitions, rules, and operating context — the kind of context that doesn't fit in a schema definition. Pages have structured frontmatter (summary, tags, semantic layer references) and free-form content. Agents search them when they need to understand why a metric works a certain way, not just how to compute it. +**Wiki pages** are Markdown documents that capture business definitions, rules, and operating context — the kind of context that doesn't fit in a schema definition. Pages have structured frontmatter (summary, tags, semantic layer references) and free-form content. Agents search them when they need to understand why a metric works a certain way, not just how to compute it. ```markdown --- @@ -97,13 +97,13 @@ Together, these four pillars give agents enough context to produce analytics art ## How KTX compares -KTX is a context layer with an agent-native semantic layer at its core. MetricFlow, Cube, and Malloy model metrics, dimensions, joins, and generated SQL. KTX covers that semantic-layer work, then adds the context agents need to use and maintain it: knowledge pages, schema scans, provenance, ingestion, validation, and agent-facing CLI commands. +KTX is a context layer with an agent-native semantic layer at its core. MetricFlow, Cube, and Malloy model metrics, dimensions, joins, and generated SQL. KTX covers that semantic-layer work, then adds the context agents need to use and maintain it: wiki pages, schema scans, provenance, ingestion, validation, and agent-facing CLI commands. The workflow is the difference. Traditional semantic layers are powerful, but they are usually built and maintained through manual modeling work, product-specific runtimes, or language-specific workflows. They are not agent-native by default, which makes them harder for agents to inspect, edit, validate, and review in a tight loop. KTX is designed for agents that need to read context, change semantic files, inspect generated SQL, and leave a reviewable git diff. | | KTX semantic layer | MetricFlow | Cube | Malloy | |---|---|---|---|---| -| **Model surface** | Plain YAML sources plus Markdown knowledge pages | YAML semantic models and metrics in a dbt project | YAML or JavaScript cubes, views, access policies, and pre-aggregations | `.malloy` models, query pipelines, notebooks, and annotations | +| **Model surface** | Plain YAML sources plus Markdown wiki pages | YAML semantic models and metrics in a dbt project | YAML or JavaScript cubes, views, access policies, and pre-aggregations | `.malloy` models, query pipelines, notebooks, and annotations | | **What it models** | Sources, columns, measures, segments, joins, grain, filters, default time dimensions, and context references | Semantic models, entities, dimensions, measures, metrics, time grains, and metric types | Cubes, views, measures, dimensions, segments, joins, hierarchies, policies, and rollups | Sources, joins, dimensions, measures, calculations, nested results, and query pipelines | | **Agent edit loop** | First-class. Agents can patch small files, save imperfect drafts, run validation, query through the CLI, inspect SQL, and refine in the same workflow | Possible, but the interface is a dbt/metric workflow rather than an agent context workflow | Possible through code-first models and platform APIs, but changes are tied to runtime deployment and governance concerns | Possible, but agents must operate in Malloy's language and compiler model | | **Fan-out safety** | Explicit `grain` plus relationship metadata. KTX detects `one_to_many` fan-out, identifies chasm traps, pre-aggregates independent fact measures into CTEs, and rejects unsafe filters | Dataflow query planning for metric requests, multi-hop joins, metric time, and metric types | Runtime planner, modeled joins, primary keys, views, multi-fact views, and pre-aggregations | Symmetric aggregates and path-based aggregation in the language | @@ -111,7 +111,7 @@ The workflow is the difference. Traditional semantic layers are powerful, but th | **Context around semantics** | Built in: wiki pages, scan artifacts, relationship inference, ingest transcripts, replay, and agent-facing CLI commands | Primarily metric and dbt project context | Descriptions and `meta.ai_context` inside the semantic model, plus platform agent features | Annotations/tags can carry metadata; surrounding context depends on the application | | **Best fit** | Agents maintaining analytics code, metrics, joins, SQL, docs, and semantic definitions | Teams standardizing metrics inside dbt workflows | Production semantic APIs, BI integrations, access control, caching, and concurrency | Expressive modeling and exploratory analysis above SQL | -If you do not have a semantic layer, KTX can build an agent-native one from your database schema and enrich it with generated descriptions and knowledge pages. If you already use MetricFlow or LookML, KTX ingests from those tools and merges their context into KTX's files. You can keep your existing BI or metric-serving system while using KTX as the semantic and contextual surface agents work against. +If you do not have a semantic layer, KTX can build an agent-native one from your database schema and enrich it with generated descriptions and wiki pages. If you already use MetricFlow or LookML, KTX ingests from those tools and merges their context into KTX's files. You can keep your existing BI or metric-serving system while using KTX as the semantic and contextual surface agents work against. ## The plain-files philosophy @@ -125,7 +125,7 @@ my-project/ │ ├── orders.yaml # Semantic source definitions │ ├── customers.yaml │ └── order_items.yaml -├── knowledge/ +├── wiki/ │ ├── global/ │ │ ├── revenue.md # Business definitions and rules │ │ └── segment-classification.md @@ -140,7 +140,7 @@ my-project/ └── cache/ # Runtime cache (git-ignored) ``` -Semantic sources and knowledge pages are committed to git. The SQLite database holds ephemeral state — scan results, embedding indexes, session logs — and is git-ignored. If you delete it, KTX rebuilds it on the next run. +Semantic sources and wiki pages are committed to git. The SQLite database holds ephemeral state — scan results, embedding indexes, session logs — and is git-ignored. If you delete it, KTX rebuilds it on the next run. This means your analytics context travels with your code. You can fork it, branch it, review it in a PR, and merge it with the same tools you use for dbt models. There's no sync problem between a remote server and your local state. There's no migration to run. The files are the source of truth. diff --git a/docs-site/content/docs/getting-started/introduction.mdx b/docs-site/content/docs/getting-started/introduction.mdx index a9d98d3e..70ca9a84 100644 --- a/docs-site/content/docs/getting-started/introduction.mdx +++ b/docs-site/content/docs/getting-started/introduction.mdx @@ -88,5 +88,5 @@ Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, and SQL Server. | Set up a new KTX project | [Quickstart](/docs/getting-started/quickstart) | | Explain what problem KTX solves | [The Context Layer](/docs/concepts/the-context-layer) | | Scan a database and ingest metadata | [Building Context](/docs/guides/building-context) | -| Edit semantic sources or knowledge pages | [Writing Context](/docs/guides/writing-context) | +| Edit semantic sources or wiki pages | [Writing Context](/docs/guides/writing-context) | | Look up exact command flags | [CLI Reference](/docs/cli-reference/ktx-setup) | diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index 59a512cb..7aba00fd 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -146,7 +146,7 @@ This is where KTX does the heavy lifting. It runs an enriched scan of your datab │ ○ Leave context unbuilt and exit setup ``` -The build scans each primary source with LLM enrichment, detects table relationships, and runs ingestion agents that reconcile metadata from your context sources into semantic-layer YAML files and knowledge pages. +The build scans each primary source with LLM enrichment, detects table relationships, and runs ingestion agents that reconcile metadata from your context sources into semantic-layer YAML files and wiki pages. For a small database (under 50 tables), this takes a few minutes. Larger warehouses can take longer. You can press d to detach and let it run in the background: @@ -209,8 +209,8 @@ KTX writes project state as plain files so agents can inspect and edit changes i | `ktx.yaml` | `ktx setup` | Main project configuration: connections, LLM settings, embeddings, and context sources | | `.ktx/secrets/*` | `ktx setup` when file-backed secrets are selected | Local secret files referenced from `ktx.yaml`; do not commit these | | `semantic-layer//*.yaml` | context build, ingestion, or direct file edits | Semantic source definitions agents use for SQL generation | -| `knowledge/global/*.md` | ingestion, memory capture, or direct file edits | Shared business context and metric definitions | -| `knowledge/user//*.md` | memory capture or direct file edits | User-scoped notes for one agent/user context | +| `wiki/global/*.md` | ingestion, memory capture, `ktx wiki write --scope global`, or direct file edits | Shared business context and metric definitions | +| `wiki/user//*.md` | memory capture, `ktx wiki write --scope user`, or direct file edits | User-scoped notes for one agent/user context | | `.claude/skills/ktx/SKILL.md`, `.agents/skills/ktx/SKILL.md` | CLI-mode agent integration setup | Agent instructions for calling public `ktx` commands | ## Verify it worked @@ -247,6 +247,6 @@ Agent integration ready: yes (claude-code:project) ## Next steps - **Build more context** — learn about [scanning](/docs/guides/building-context), relationship detection, and ingestion workflows in the Building Context guide. -- **Refine your semantic layer** — the [Writing Context](/docs/guides/writing-context) guide covers source YAML, measures, joins, and knowledge pages. +- **Refine your semantic layer** — the [Writing Context](/docs/guides/writing-context) guide covers source YAML, measures, joins, and wiki pages. - **Understand the architecture** — read [The Context Layer](/docs/concepts/the-context-layer) to learn why a context layer is more than a semantic layer. - **Connect more agents** — see the [Agent Clients](/docs/integrations/agent-clients) integration page for per-tool setup details. diff --git a/docs-site/content/docs/guides/building-context.mdx b/docs-site/content/docs/guides/building-context.mdx index 25d873d9..c3821a52 100644 --- a/docs-site/content/docs/guides/building-context.mdx +++ b/docs-site/content/docs/guides/building-context.mdx @@ -53,7 +53,7 @@ Relationship scans run with `ktx scan --mode relationships`. Thi ## Ingestion -Ingestion pulls semantic context from your existing analytics tools — dbt projects, Looker models, Metabase questions, and more — and writes it into your KTX project as semantic sources and knowledge pages. +Ingestion pulls semantic context from your existing analytics tools — dbt projects, Looker models, Metabase questions, and more — and writes it into your KTX project as semantic sources and wiki pages. ### How it works @@ -61,7 +61,7 @@ Each ingest run follows this flow: 1. An **adapter** extracts metadata from your tool (dbt manifest, LookML files, Metabase API, etc.) 2. An **LLM agent** reconciles the extracted metadata with your existing context — it merges intelligently rather than overwriting -3. **Semantic sources** (YAML) and **knowledge pages** (Markdown) are written to your project directory +3. **Semantic sources** (YAML) and **wiki pages** (Markdown) are written to your project directory ### Running an ingest @@ -113,7 +113,7 @@ See [Context Sources](/docs/integrations/context-sources) for adapter-specific s ### What gets generated -A typical dbt ingest produces semantic sources and knowledge pages in your project: +A typical dbt ingest produces semantic sources and wiki pages in your project: **Semantic source** (`semantic-layer/my-postgres/orders.yaml`): @@ -149,7 +149,7 @@ joins: relationship: many_to_one ``` -**Knowledge page** (`knowledge/global/order-status-definitions.md`): +**Wiki page** (`wiki/global/order-status-definitions.md`): ```markdown --- diff --git a/docs-site/content/docs/guides/serving-agents.mdx b/docs-site/content/docs/guides/serving-agents.mdx index 0de6934e..4a93ae43 100644 --- a/docs-site/content/docs/guides/serving-agents.mdx +++ b/docs-site/content/docs/guides/serving-agents.mdx @@ -36,10 +36,10 @@ ktx sl query --json \ --max-rows 100 ``` -**Knowledge:** +**Wiki:** ```bash -# Search knowledge pages +# Search wiki pages ktx wiki search "revenue recognition" --json --limit 10 ``` @@ -56,4 +56,4 @@ configuration. For manual setup or per-tool details, see the [Agent Clients](/docs/integrations/agent-clients) integration page. After configuration, the agent can immediately call KTX commands to list -sources, search knowledge, and query your semantic layer. +sources, search wiki pages, and query your semantic layer. diff --git a/docs-site/content/docs/guides/writing-context.mdx b/docs-site/content/docs/guides/writing-context.mdx index 9e08fcc7..b6ca3597 100644 --- a/docs-site/content/docs/guides/writing-context.mdx +++ b/docs-site/content/docs/guides/writing-context.mdx @@ -1,9 +1,9 @@ --- title: Writing Context -description: Write and refine semantic sources and knowledge pages. +description: Write and refine semantic sources and wiki pages. --- -After building context through scanning and ingestion, you'll want to refine it — edit semantic sources to match your business logic, add knowledge pages that capture tribal knowledge, and query your data through the semantic layer to verify everything works. +After building context through scanning and ingestion, you'll want to refine it — edit semantic sources to match your business logic, add wiki pages that capture tribal knowledge, and query your data through the semantic layer to verify everything works. ## Agent workflow summary @@ -218,20 +218,20 @@ The query planner is grain-aware — it understands the cardinality of joins and If validation fails, fix the YAML before asking an agent to use the source. Common validation failures are missing columns, invalid join targets, and measure expressions that reference fields outside the source. -## Knowledge Pages +## Wiki Pages -Knowledge pages are Markdown files that capture business context — definitions, rules, gotchas, and anything an agent needs to understand beyond what the schema tells it. +Wiki pages are Markdown files that capture business context — definitions, rules, gotchas, and anything an agent needs to understand beyond what the schema tells it. ### What they are -When an agent asks "what counts as an active user?" or "why do revenue numbers differ between the dashboard and the SQL query?", the answer isn't in the schema. It's tribal knowledge that lives in Slack threads, Notion pages, or someone's head. Knowledge pages make that context searchable and available to agents. +When an agent asks "what counts as an active user?" or "why do revenue numbers differ between the dashboard and the SQL query?", the answer isn't in the schema. It's tribal knowledge that lives in Slack threads, Notion pages, or someone's head. Wiki pages make that context searchable and available to agents. ### Organization -Knowledge pages are organized by scope: +Wiki pages are organized by scope: ``` -knowledge/ +wiki/ ├── global/ # Cross-cutting definitions │ ├── order-status-definitions.md │ ├── revenue-recognition-rules.md @@ -247,10 +247,11 @@ knowledge/ ### Editing pages -Create and edit knowledge pages directly as Markdown files in the `knowledge/` -directory. Ingest and memory capture also create these pages automatically. +Create and edit wiki pages directly as Markdown files in the `wiki/` +directory, or with `ktx wiki write`. Ingest and memory capture also create +these pages automatically. -Knowledge page fields: +Wiki page fields: | Field | Required | Description | |-------|----------|-------------| @@ -279,7 +280,7 @@ Search uses both full-text matching and semantic similarity — it finds relevan ### Workflow: add searchable business context 1. Search first: `ktx wiki search "order status definitions"`. -2. If no page already covers the rule, create or edit a Markdown file under `knowledge/global/`. +2. If no page already covers the rule, create or edit a Markdown file under `wiki/global/`. 3. Include concise frontmatter; agents see the summary before loading full content. 4. Add `tags` values for the business area and `sl_refs` values for related semantic sources. 5. Search again with the user's likely wording to confirm the page is discoverable. @@ -290,6 +291,6 @@ Search uses both full-text matching and semantic similarity — it finds relevan |------------------|--------------|----------| | `ktx sl validate` reports a missing column | YAML references a column that is absent from the scanned table | Run a fresh scan or update the YAML to match the warehouse schema | | Query compilation double-counts a measure | Join relationship or grain is missing or wrong | Add `grain` and explicit `relationship` values, then validate and recompile | -| Agent cannot find a metric | Measure name or description does not match business terminology | Add a measure description and a knowledge page with common synonyms | -| Knowledge search misses a page | Summary and tags do not include likely user wording | Rewrite the summary and add relevant tags, then search again | +| Agent cannot find a metric | Measure name or description does not match business terminology | Add a measure description and a wiki page with common synonyms | +| Wiki search misses a page | Summary and tags do not include likely user wording | Rewrite the summary and add relevant tags, then search again | | Semantic-layer changes are hard to review | The YAML edit is too large or unfocused | Split the change into smaller source-file edits, then review the git diff | diff --git a/docs-site/content/docs/integrations/agent-clients.mdx b/docs-site/content/docs/integrations/agent-clients.mdx index 61538140..95786f52 100644 --- a/docs-site/content/docs/integrations/agent-clients.mdx +++ b/docs-site/content/docs/integrations/agent-clients.mdx @@ -124,7 +124,9 @@ All supported agent clients call the same KTX CLI commands: | Command | Description | |---------|-------------| | `ktx status --json` | Return project setup and context readiness | -| `ktx wiki search --json` | Search knowledge pages | +| `ktx wiki search --json` | Search wiki pages | +| `ktx wiki read --json` | Read a wiki page | +| `ktx wiki write ` | Write or update a wiki page | | `ktx sl list --json` | List semantic-layer sources | | `ktx sl search --json` | Search semantic-layer sources | | `ktx sl validate --connection-id ` | Validate semantic source definitions | diff --git a/docs-site/content/docs/integrations/context-sources.mdx b/docs-site/content/docs/integrations/context-sources.mdx index 904e3f95..5b85bff2 100644 --- a/docs-site/content/docs/integrations/context-sources.mdx +++ b/docs-site/content/docs/integrations/context-sources.mdx @@ -15,7 +15,7 @@ Agents should configure and ingest context sources in this order: 2. Store tokens as `env:NAME` or `file:/path/to/secret`. 3. Run `ktx ingest run --connection-id --adapter ` for one source or `ktx ingest run --connection-id --adapter `. 4. Check progress with `ktx ingest status --json`. -5. Review generated `semantic-layer/` YAML and `knowledge/` Markdown files in git. +5. Review generated `semantic-layer/` YAML and `wiki/` Markdown files in git. 6. Validate changed semantic sources with `ktx sl validate`. ## Shared source fields @@ -233,7 +233,7 @@ Generate an API key in Metabase: **Admin > Settings > Authentication > API Keys* ### What gets ingested - Semantic sources generated from SQL queries in questions -- Knowledge pages for dashboards (purpose, key metrics, relationships) +- Wiki pages for dashboards (purpose, key metrics, relationships) - Work units per dashboard and per question ### Warehouse mapping @@ -290,7 +290,7 @@ Generate API credentials in Looker: **Admin > Users > Edit > API Keys**. ### What gets ingested - Semantic sources from explore field definitions -- Knowledge pages for dashboards (purpose, audience, key metrics) +- Wiki pages for dashboards (purpose, audience, key metrics) - Triage signals for automated content classification - Work units per explore and per dashboard @@ -310,11 +310,11 @@ Find Looker connection names in **Admin > Database > Connections**. ## Notion -Ingests pages and databases from a Notion workspace as knowledge pages. Useful for capturing business definitions, data dictionaries, and team documentation that agents need for context. +Ingests pages and databases from a Notion workspace as wiki pages. Useful for capturing business definitions, data dictionaries, and team documentation that agents need for context. ### What it provides -- Knowledge pages synthesized from Notion content +- Wiki pages synthesized from Notion content - Page hierarchy and relationships - Database schemas (when Notion databases describe data sources) - Semantic clustering for organized ingestion @@ -364,7 +364,7 @@ Create an integration at [notion.so/my-integrations](https://www.notion.so/my-in ### What gets ingested -- Knowledge pages synthesized from Notion content (not raw copies) +- Wiki pages synthesized from Notion content (not raw copies) - Domain context extracted and organized by topic - Triage signals for classifying page relevance - Work units clustered by semantic similarity for efficient processing @@ -381,6 +381,6 @@ Create an integration at [notion.so/my-integrations](https://www.notion.so/my-in |------------------|--------------|----------| | Adapter cannot read source files | `source_dir`, `repo_url`, `branch`, or `path` is wrong | Verify the path locally or clone the repo manually with the same credentials | | Private repo/API authentication fails | Token env var or secret file is missing | Export the env var or update `auth_token_ref` to a readable file | -| Ingest creates duplicate context | Existing source names or knowledge pages do not match imported terminology | Review the diff, rename duplicates, and add knowledge pages with canonical names | +| Ingest creates duplicate context | Existing source names or wiki pages do not match imported terminology | Review the diff, rename duplicates, and add wiki pages with canonical names | | Notion ingest skips pages | Integration lacks access or root ids are missing | Share pages with the Notion integration and set `root_page_ids` or use `all_accessible` carefully | | Generated semantic sources fail validation | Tool metadata does not match the live warehouse schema | Map BI/source databases to primary warehouse connections and rerun validation | diff --git a/docs-site/lib/llm-docs.ts b/docs-site/lib/llm-docs.ts index 69aac698..cbf9ba9e 100644 --- a/docs-site/lib/llm-docs.ts +++ b/docs-site/lib/llm-docs.ts @@ -47,7 +47,7 @@ export function buildLlmsTxt() { > Agent-native context layer for analytics engineering and database agents. -KTX provides semantic-layer files, warehouse scans, knowledge pages, provenance, and agent-facing tools that help coding agents answer analytics questions without inventing metrics or joins. +KTX provides semantic-layer files, warehouse scans, wiki pages, provenance, and agent-facing tools that help coding agents answer analytics questions without inventing metrics or joins. ## Agent Entry Points @@ -60,7 +60,7 @@ ${link("/docs/ai-resources/agent-instructions", "Agent Instructions", "Suggested ${link("/docs/getting-started/introduction", "Introduction", "What KTX is and who it is for")} ${link("/docs/getting-started/quickstart", "Quickstart", "Set up KTX and build your first context")} -${link("/docs/guides/writing-context", "Writing Context", "Write semantic sources and knowledge pages")} +${link("/docs/guides/writing-context", "Writing Context", "Write semantic sources and wiki pages")} ## Machine-Readable Documentation @@ -68,13 +68,13 @@ ${link("/docs/guides/writing-context", "Writing Context", "Write semantic source - [Markdown access guide](${absoluteUrl("/docs/ai-resources/markdown-access.md")}): How to fetch llms.txt, llms-full.txt, and per-page Markdown - [Quickstart markdown](${absoluteUrl("/docs/getting-started/quickstart.md")}): Human setup walkthrough - [Semantic-layer CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-sl.md")}): Semantic-layer commands and JSON output -- [Wiki CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-wiki.md")}): Knowledge page commands and JSON output +- [Wiki CLI markdown](${absoluteUrl("/docs/cli-reference/ktx-wiki.md")}): Wiki page commands and JSON output ## CLI Reference ${link("/docs/cli-reference/ktx-setup", "ktx setup", "Interactive project setup")} ${link("/docs/cli-reference/ktx-sl", "ktx sl", "Semantic-layer commands")} -${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Knowledge page commands")} +${link("/docs/cli-reference/ktx-wiki", "ktx wiki", "Wiki page commands")} ${link("/docs/cli-reference/ktx-connection", "ktx connection", "Connection management commands")} ## Integrations diff --git a/examples/local-warehouse/ktx.yaml b/examples/local-warehouse/ktx.yaml index 7ca51365..00ccffbd 100644 --- a/examples/local-warehouse/ktx.yaml +++ b/examples/local-warehouse/ktx.yaml @@ -19,7 +19,7 @@ agent: max_iterations: 20 default_toolset: - sl_query - - knowledge_search + - wiki_search - sl_read_source memory: auto_commit: true diff --git a/examples/local-warehouse/knowledge/global/revenue.md b/examples/local-warehouse/wiki/global/revenue.md similarity index 100% rename from examples/local-warehouse/knowledge/global/revenue.md rename to examples/local-warehouse/wiki/global/revenue.md diff --git a/packages/cli/assets/demo/orbit/links/provenance.json b/packages/cli/assets/demo/orbit/links/provenance.json index 8b9e0b63..67dbb213 100644 --- a/packages/cli/assets/demo/orbit/links/provenance.json +++ b/packages/cli/assets/demo/orbit/links/provenance.json @@ -2,7 +2,7 @@ { "id": "link-001", "artifactKind": "wiki", - "artifactKey": "knowledge/global/arr-contract-first.md", + "artifactKey": "wiki/global/arr-contract-first.md", "sourceKind": "warehouse", "sourcePath": "contracts", "relationship": "describes", @@ -11,7 +11,7 @@ { "id": "link-002", "artifactKind": "wiki", - "artifactKey": "knowledge/global/arr-contract-first.md", + "artifactKey": "wiki/global/arr-contract-first.md", "sourceKind": "notion", "sourcePath": "raw-sources/notion/arr-and-contract-reporting-notes.md", "relationship": "derived_from", @@ -20,7 +20,7 @@ { "id": "link-003", "artifactKind": "wiki", - "artifactKey": "knowledge/global/revenue-gross-to-net.md", + "artifactKey": "wiki/global/revenue-gross-to-net.md", "sourceKind": "warehouse", "sourcePath": "invoices", "relationship": "describes", @@ -29,7 +29,7 @@ { "id": "link-004", "artifactKind": "wiki", - "artifactKey": "knowledge/global/revenue-gross-to-net.md", + "artifactKey": "wiki/global/revenue-gross-to-net.md", "sourceKind": "notion", "sourcePath": "raw-sources/notion/revenue-reporting-policy.md", "relationship": "derived_from", @@ -38,7 +38,7 @@ { "id": "link-005", "artifactKind": "wiki", - "artifactKey": "knowledge/global/discount-expiration.md", + "artifactKey": "wiki/global/discount-expiration.md", "sourceKind": "warehouse", "sourcePath": "arr_movements", "relationship": "describes", @@ -47,7 +47,7 @@ { "id": "link-006", "artifactKind": "wiki", - "artifactKey": "knowledge/global/nrr-retention.md", + "artifactKey": "wiki/global/nrr-retention.md", "sourceKind": "warehouse", "sourcePath": "arr_movements", "relationship": "describes", @@ -56,7 +56,7 @@ { "id": "link-007", "artifactKind": "wiki", - "artifactKey": "knowledge/global/nrr-retention.md", + "artifactKey": "wiki/global/nrr-retention.md", "sourceKind": "notion", "sourcePath": "raw-sources/notion/retention-and-nrr-definition-notes.md", "relationship": "derived_from", @@ -65,7 +65,7 @@ { "id": "link-008", "artifactKind": "wiki", - "artifactKey": "knowledge/global/nrr-retention.md", + "artifactKey": "wiki/global/nrr-retention.md", "sourceKind": "bi", "sourcePath": "raw-sources/bi/account_retention.view.lkml", "relationship": "derived_from", @@ -74,7 +74,7 @@ { "id": "link-009", "artifactKind": "wiki", - "artifactKey": "knowledge/global/segment-classification.md", + "artifactKey": "wiki/global/segment-classification.md", "sourceKind": "warehouse", "sourcePath": "plans", "relationship": "describes", @@ -83,7 +83,7 @@ { "id": "link-010", "artifactKind": "wiki", - "artifactKey": "knowledge/global/segment-classification.md", + "artifactKey": "wiki/global/segment-classification.md", "sourceKind": "notion", "sourcePath": "raw-sources/notion/sales-ops-segmentation-guide.md", "relationship": "derived_from", @@ -92,7 +92,7 @@ { "id": "link-011", "artifactKind": "wiki", - "artifactKey": "knowledge/global/activation-policy.md", + "artifactKey": "wiki/global/activation-policy.md", "sourceKind": "notion", "sourcePath": "raw-sources/notion/activation-policy-decision-record.md", "relationship": "derived_from", @@ -101,7 +101,7 @@ { "id": "link-012", "artifactKind": "wiki", - "artifactKey": "knowledge/global/procurement-workflows.md", + "artifactKey": "wiki/global/procurement-workflows.md", "sourceKind": "warehouse", "sourcePath": "purchase_requests", "relationship": "describes", @@ -110,7 +110,7 @@ { "id": "link-013", "artifactKind": "wiki", - "artifactKey": "knowledge/global/customer-health-scoring.md", + "artifactKey": "wiki/global/customer-health-scoring.md", "sourceKind": "notion", "sourcePath": "raw-sources/notion/customer-health-playbook.md", "relationship": "derived_from", @@ -119,7 +119,7 @@ { "id": "link-014", "artifactKind": "wiki", - "artifactKey": "knowledge/global/customer-health-scoring.md", + "artifactKey": "wiki/global/customer-health-scoring.md", "sourceKind": "warehouse", "sourcePath": "support_tickets", "relationship": "describes", @@ -128,7 +128,7 @@ { "id": "link-015", "artifactKind": "wiki", - "artifactKey": "knowledge/global/support-escalation.md", + "artifactKey": "wiki/global/support-escalation.md", "sourceKind": "notion", "sourcePath": "raw-sources/notion/support-escalation-runbook.md", "relationship": "derived_from", @@ -137,7 +137,7 @@ { "id": "link-016", "artifactKind": "wiki", - "artifactKey": "knowledge/global/internal-test-exclusion.md", + "artifactKey": "wiki/global/internal-test-exclusion.md", "sourceKind": "notion", "sourcePath": "raw-sources/notion/analyst-onboarding.md", "relationship": "derived_from", diff --git a/packages/cli/assets/demo/orbit/manifest.json b/packages/cli/assets/demo/orbit/manifest.json index 1fcb3bef..102c57aa 100644 --- a/packages/cli/assets/demo/orbit/manifest.json +++ b/packages/cli/assets/demo/orbit/manifest.json @@ -47,7 +47,7 @@ "sourceCount": 46 }, "knowledge": { - "path": "knowledge/global", + "path": "wiki/global", "pageCount": 28 }, "links": { diff --git a/packages/cli/assets/demo/orbit/replay.memory-flow.v1.json b/packages/cli/assets/demo/orbit/replay.memory-flow.v1.json index af4c1aa9..7bc4da18 100644 --- a/packages/cli/assets/demo/orbit/replay.memory-flow.v1.json +++ b/packages/cli/assets/demo/orbit/replay.memory-flow.v1.json @@ -71,7 +71,7 @@ "type": "work_unit_started", "unitKey": "revenue-and-contracts", "skills": [ - "knowledge_capture", + "wiki_capture", "sl_capture" ], "stepBudget": 40 @@ -81,21 +81,21 @@ "unitKey": "revenue-and-contracts", "target": "wiki", "action": "created", - "key": "knowledge/global/arr-contract-first.md" + "key": "wiki/global/arr-contract-first.md" }, { "type": "candidate_action", "unitKey": "revenue-and-contracts", "target": "wiki", "action": "created", - "key": "knowledge/global/revenue-gross-to-net.md" + "key": "wiki/global/revenue-gross-to-net.md" }, { "type": "candidate_action", "unitKey": "revenue-and-contracts", "target": "wiki", "action": "created", - "key": "knowledge/global/discount-expiration.md" + "key": "wiki/global/discount-expiration.md" }, { "type": "candidate_action", @@ -127,7 +127,7 @@ "type": "work_unit_started", "unitKey": "retention-and-segments", "skills": [ - "knowledge_capture", + "wiki_capture", "sl_capture" ], "stepBudget": 40 @@ -137,14 +137,14 @@ "unitKey": "retention-and-segments", "target": "wiki", "action": "created", - "key": "knowledge/global/nrr-retention.md" + "key": "wiki/global/nrr-retention.md" }, { "type": "candidate_action", "unitKey": "retention-and-segments", "target": "wiki", "action": "created", - "key": "knowledge/global/segment-classification.md" + "key": "wiki/global/segment-classification.md" }, { "type": "candidate_action", @@ -162,7 +162,7 @@ "type": "work_unit_started", "unitKey": "procurement-and-activation", "skills": [ - "knowledge_capture", + "wiki_capture", "sl_capture" ], "stepBudget": 40 @@ -172,14 +172,14 @@ "unitKey": "procurement-and-activation", "target": "wiki", "action": "created", - "key": "knowledge/global/activation-policy.md" + "key": "wiki/global/activation-policy.md" }, { "type": "candidate_action", "unitKey": "procurement-and-activation", "target": "wiki", "action": "created", - "key": "knowledge/global/procurement-workflows.md" + "key": "wiki/global/procurement-workflows.md" }, { "type": "candidate_action", @@ -197,7 +197,7 @@ "type": "work_unit_started", "unitKey": "support-and-health", "skills": [ - "knowledge_capture", + "wiki_capture", "sl_capture" ], "stepBudget": 40 @@ -207,14 +207,14 @@ "unitKey": "support-and-health", "target": "wiki", "action": "created", - "key": "knowledge/global/customer-health-scoring.md" + "key": "wiki/global/customer-health-scoring.md" }, { "type": "candidate_action", "unitKey": "support-and-health", "target": "wiki", "action": "created", - "key": "knowledge/global/support-escalation.md" + "key": "wiki/global/support-escalation.md" }, { "type": "candidate_action", @@ -232,7 +232,7 @@ "type": "work_unit_started", "unitKey": "governance-and-exclusions", "skills": [ - "knowledge_capture" + "wiki_capture" ], "stepBudget": 40 }, @@ -241,7 +241,7 @@ "unitKey": "governance-and-exclusions", "target": "wiki", "action": "created", - "key": "knowledge/global/internal-test-exclusion.md" + "key": "wiki/global/internal-test-exclusion.md" }, { "type": "work_unit_finished", @@ -321,7 +321,7 @@ "unitKey": "revenue-and-contracts", "target": "wiki", "action": "created", - "key": "knowledge/global/arr-contract-first.md", + "key": "wiki/global/arr-contract-first.md", "summary": "ARR follows contract precedence with cancellation and discount caveats.", "rawFiles": [ "contracts", @@ -334,7 +334,7 @@ "unitKey": "revenue-and-contracts", "target": "wiki", "action": "created", - "key": "knowledge/global/revenue-gross-to-net.md", + "key": "wiki/global/revenue-gross-to-net.md", "summary": "Invoice, refund, and revenue dashboard evidence reconcile gross to net revenue.", "rawFiles": [ "invoices", @@ -346,7 +346,7 @@ "unitKey": "revenue-and-contracts", "target": "wiki", "action": "created", - "key": "knowledge/global/discount-expiration.md", + "key": "wiki/global/discount-expiration.md", "summary": "Discount expiration is separated from organic contraction for retention reporting.", "rawFiles": [ "contracts", @@ -394,7 +394,7 @@ "unitKey": "retention-and-segments", "target": "wiki", "action": "created", - "key": "knowledge/global/nrr-retention.md", + "key": "wiki/global/nrr-retention.md", "summary": "NRR uses parent-account rollups and quarterly ARR movement windows.", "rawFiles": [ "accounts", @@ -407,7 +407,7 @@ "unitKey": "retention-and-segments", "target": "wiki", "action": "created", - "key": "knowledge/global/segment-classification.md", + "key": "wiki/global/segment-classification.md", "summary": "Segment labels come from plan mapping and sales-ops policy notes.", "rawFiles": [ "accounts", @@ -432,7 +432,7 @@ "unitKey": "procurement-and-activation", "target": "wiki", "action": "created", - "key": "knowledge/global/activation-policy.md", + "key": "wiki/global/activation-policy.md", "summary": "Activation policy changed on January 15, 2026 and is encoded for agents.", "rawFiles": [ "purchase_requests", @@ -445,7 +445,7 @@ "unitKey": "procurement-and-activation", "target": "wiki", "action": "created", - "key": "knowledge/global/procurement-workflows.md", + "key": "wiki/global/procurement-workflows.md", "summary": "Procurement requester activity and approval events explain product usage.", "rawFiles": [ "purchase_requests", @@ -468,7 +468,7 @@ "unitKey": "support-and-health", "target": "wiki", "action": "created", - "key": "knowledge/global/customer-health-scoring.md", + "key": "wiki/global/customer-health-scoring.md", "summary": "Customer health combines support severity, ARR exposure, and product usage.", "rawFiles": [ "support_tickets", @@ -480,7 +480,7 @@ "unitKey": "support-and-health", "target": "wiki", "action": "created", - "key": "knowledge/global/support-escalation.md", + "key": "wiki/global/support-escalation.md", "summary": "Escalation tiers map ticket severity to SLA expectations.", "rawFiles": [ "support_tickets", @@ -503,7 +503,7 @@ "unitKey": "governance-and-exclusions", "target": "wiki", "action": "created", - "key": "knowledge/global/internal-test-exclusion.md", + "key": "wiki/global/internal-test-exclusion.md", "summary": "Canonical metrics exclude internal and test accounts across source families.", "rawFiles": [ "raw-sources/notion/analyst-onboarding.md" @@ -515,97 +515,97 @@ { "rawPath": "contracts", "artifactKind": "wiki", - "artifactKey": "knowledge/global/arr-contract-first.md", + "artifactKey": "wiki/global/arr-contract-first.md", "actionType": "wiki_written" }, { "rawPath": "raw-sources/notion/arr-and-contract-reporting-notes.md", "artifactKind": "wiki", - "artifactKey": "knowledge/global/arr-contract-first.md", + "artifactKey": "wiki/global/arr-contract-first.md", "actionType": "wiki_written" }, { "rawPath": "invoices", "artifactKind": "wiki", - "artifactKey": "knowledge/global/revenue-gross-to-net.md", + "artifactKey": "wiki/global/revenue-gross-to-net.md", "actionType": "wiki_written" }, { "rawPath": "raw-sources/notion/revenue-reporting-policy.md", "artifactKind": "wiki", - "artifactKey": "knowledge/global/revenue-gross-to-net.md", + "artifactKey": "wiki/global/revenue-gross-to-net.md", "actionType": "wiki_written" }, { "rawPath": "arr_movements", "artifactKind": "wiki", - "artifactKey": "knowledge/global/discount-expiration.md", + "artifactKey": "wiki/global/discount-expiration.md", "actionType": "wiki_written" }, { "rawPath": "arr_movements", "artifactKind": "wiki", - "artifactKey": "knowledge/global/nrr-retention.md", + "artifactKey": "wiki/global/nrr-retention.md", "actionType": "wiki_written" }, { "rawPath": "raw-sources/notion/retention-and-nrr-definition-notes.md", "artifactKind": "wiki", - "artifactKey": "knowledge/global/nrr-retention.md", + "artifactKey": "wiki/global/nrr-retention.md", "actionType": "wiki_written" }, { "rawPath": "raw-sources/bi/account_retention.view.lkml", "artifactKind": "wiki", - "artifactKey": "knowledge/global/nrr-retention.md", + "artifactKey": "wiki/global/nrr-retention.md", "actionType": "wiki_written" }, { "rawPath": "plans", "artifactKind": "wiki", - "artifactKey": "knowledge/global/segment-classification.md", + "artifactKey": "wiki/global/segment-classification.md", "actionType": "wiki_written" }, { "rawPath": "raw-sources/notion/sales-ops-segmentation-guide.md", "artifactKind": "wiki", - "artifactKey": "knowledge/global/segment-classification.md", + "artifactKey": "wiki/global/segment-classification.md", "actionType": "wiki_written" }, { "rawPath": "raw-sources/notion/activation-policy-decision-record.md", "artifactKind": "wiki", - "artifactKey": "knowledge/global/activation-policy.md", + "artifactKey": "wiki/global/activation-policy.md", "actionType": "wiki_written" }, { "rawPath": "purchase_requests", "artifactKind": "wiki", - "artifactKey": "knowledge/global/procurement-workflows.md", + "artifactKey": "wiki/global/procurement-workflows.md", "actionType": "wiki_written" }, { "rawPath": "raw-sources/notion/customer-health-playbook.md", "artifactKind": "wiki", - "artifactKey": "knowledge/global/customer-health-scoring.md", + "artifactKey": "wiki/global/customer-health-scoring.md", "actionType": "wiki_written" }, { "rawPath": "support_tickets", "artifactKind": "wiki", - "artifactKey": "knowledge/global/customer-health-scoring.md", + "artifactKey": "wiki/global/customer-health-scoring.md", "actionType": "wiki_written" }, { "rawPath": "raw-sources/notion/support-escalation-runbook.md", "artifactKind": "wiki", - "artifactKey": "knowledge/global/support-escalation.md", + "artifactKey": "wiki/global/support-escalation.md", "actionType": "wiki_written" }, { "rawPath": "raw-sources/notion/analyst-onboarding.md", "artifactKind": "wiki", - "artifactKey": "knowledge/global/internal-test-exclusion.md", + "artifactKey": "wiki/global/internal-test-exclusion.md", "actionType": "wiki_written" }, { diff --git a/packages/cli/assets/demo/orbit/knowledge/global/.gitkeep b/packages/cli/assets/demo/orbit/wiki/global/.gitkeep similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/.gitkeep rename to packages/cli/assets/demo/orbit/wiki/global/.gitkeep diff --git a/packages/cli/assets/demo/orbit/knowledge/global/customer-communication-policy.md b/packages/cli/assets/demo/orbit/wiki/global/customer-communication-policy.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/customer-communication-policy.md rename to packages/cli/assets/demo/orbit/wiki/global/customer-communication-policy.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/new-hire-onboarding-policy.md b/packages/cli/assets/demo/orbit/wiki/global/new-hire-onboarding-policy.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/new-hire-onboarding-policy.md rename to packages/cli/assets/demo/orbit/wiki/global/new-hire-onboarding-policy.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-activation-kpi-glossary.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-activation-kpi-glossary.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-activation-kpi-glossary.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-activation-kpi-glossary.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-activation-policy-change-jan-2026.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-activation-policy-change-jan-2026.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-activation-policy-change-jan-2026.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-activation-policy-change-jan-2026.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-arr-contract-first-definition.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-arr-contract-first-definition.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-arr-contract-first-definition.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-arr-contract-first-definition.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-company-overview.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-company-overview.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-company-overview.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-company-overview.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-customer-health-risk-definition.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-customer-health-risk-definition.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-customer-health-risk-definition.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-customer-health-risk-definition.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-customer-stakeholder-needs.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-customer-stakeholder-needs.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-customer-stakeholder-needs.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-customer-stakeholder-needs.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-customers-source.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-customers-source.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-customers-source.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-dbt-exposures.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-dbt-exposures.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-dbt-exposures.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-dbt-exposures.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-dbt-project-overview.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-dbt-project-overview.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-dbt-project-overview.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-dbt-project-overview.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-how-we-work.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-how-we-work.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-how-we-work.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-how-we-work.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-known-product-gaps.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-known-product-gaps.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-known-product-gaps.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-known-product-gaps.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-account-activity.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-mart-account-activity.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-account-activity.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-mart-account-activity.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-account-segments.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-mart-account-segments.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-account-segments.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-mart-account-segments.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-arr-daily.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-mart-arr-daily.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-arr-daily.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-mart-arr-daily.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-nrr-quarterly.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-mart-nrr-quarterly.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-nrr-quarterly.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-mart-nrr-quarterly.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-procurement-activity.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-mart-procurement-activity.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-procurement-activity.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-mart-procurement-activity.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-retention-movement-breakout.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-mart-retention-movement-breakout.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-retention-movement-breakout.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-mart-retention-movement-breakout.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-revenue-daily.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-mart-revenue-daily.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-mart-revenue-daily.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-mart-revenue-daily.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-metabase-sql-library-patterns.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-metabase-sql-library-patterns.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-metabase-sql-library-patterns.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-metabase-sql-library-patterns.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-nrr-discount-expiration-treatment.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-nrr-discount-expiration-treatment.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-nrr-discount-expiration-treatment.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-nrr-discount-expiration-treatment.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-plan-segment-normalization.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-plan-segment-normalization.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-plan-segment-normalization.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-plan-segment-normalization.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-procurement-qualifying-actions.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-procurement-qualifying-actions.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-procurement-qualifying-actions.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-procurement-qualifying-actions.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-product-design-principles.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-product-design-principles.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-product-design-principles.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-product-design-principles.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-product-review-checklist.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-product-review-checklist.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-product-review-checklist.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-product-review-checklist.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/orbit-revenue-gross-to-net-reconciliation.md b/packages/cli/assets/demo/orbit/wiki/global/orbit-revenue-gross-to-net-reconciliation.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/orbit-revenue-gross-to-net-reconciliation.md rename to packages/cli/assets/demo/orbit/wiki/global/orbit-revenue-gross-to-net-reconciliation.md diff --git a/packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md b/packages/cli/assets/demo/orbit/wiki/global/sales-ops-cs-handoff-process.md similarity index 100% rename from packages/cli/assets/demo/orbit/knowledge/global/sales-ops-cs-handoff-process.md rename to packages/cli/assets/demo/orbit/wiki/global/sales-ops-cs-handoff-process.md diff --git a/packages/cli/scripts/build-demo-assets.mjs b/packages/cli/scripts/build-demo-assets.mjs index 2e135b1c..82f611a2 100644 --- a/packages/cli/scripts/build-demo-assets.mjs +++ b/packages/cli/scripts/build-demo-assets.mjs @@ -229,39 +229,39 @@ const knowledgePages = [ ]; const provenanceLinks = [ - ['wiki', 'knowledge/global/arr-contract-first.md', 'warehouse', 'contracts', 'describes', 1], + ['wiki', 'wiki/global/arr-contract-first.md', 'warehouse', 'contracts', 'describes', 1], [ 'wiki', - 'knowledge/global/arr-contract-first.md', + 'wiki/global/arr-contract-first.md', 'notion', 'raw-sources/notion/arr-and-contract-reporting-notes.md', 'derived_from', 0.95, ], - ['wiki', 'knowledge/global/revenue-gross-to-net.md', 'warehouse', 'invoices', 'describes', 1], + ['wiki', 'wiki/global/revenue-gross-to-net.md', 'warehouse', 'invoices', 'describes', 1], [ 'wiki', - 'knowledge/global/revenue-gross-to-net.md', + 'wiki/global/revenue-gross-to-net.md', 'notion', 'raw-sources/notion/revenue-reporting-policy.md', 'derived_from', 0.95, ], - ['wiki', 'knowledge/global/discount-expiration.md', 'warehouse', 'arr_movements', 'describes', 1], - ['wiki', 'knowledge/global/nrr-retention.md', 'warehouse', 'arr_movements', 'describes', 1], + ['wiki', 'wiki/global/discount-expiration.md', 'warehouse', 'arr_movements', 'describes', 1], + ['wiki', 'wiki/global/nrr-retention.md', 'warehouse', 'arr_movements', 'describes', 1], [ 'wiki', - 'knowledge/global/nrr-retention.md', + 'wiki/global/nrr-retention.md', 'notion', 'raw-sources/notion/retention-and-nrr-definition-notes.md', 'derived_from', 0.95, ], - ['wiki', 'knowledge/global/nrr-retention.md', 'bi', 'raw-sources/bi/account_retention.view.lkml', 'derived_from', 0.85], - ['wiki', 'knowledge/global/segment-classification.md', 'warehouse', 'plans', 'describes', 1], + ['wiki', 'wiki/global/nrr-retention.md', 'bi', 'raw-sources/bi/account_retention.view.lkml', 'derived_from', 0.85], + ['wiki', 'wiki/global/segment-classification.md', 'warehouse', 'plans', 'describes', 1], [ 'wiki', - 'knowledge/global/segment-classification.md', + 'wiki/global/segment-classification.md', 'notion', 'raw-sources/notion/sales-ops-segmentation-guide.md', 'derived_from', @@ -269,25 +269,25 @@ const provenanceLinks = [ ], [ 'wiki', - 'knowledge/global/activation-policy.md', + 'wiki/global/activation-policy.md', 'notion', 'raw-sources/notion/activation-policy-decision-record.md', 'derived_from', 0.95, ], - ['wiki', 'knowledge/global/procurement-workflows.md', 'warehouse', 'purchase_requests', 'describes', 1], + ['wiki', 'wiki/global/procurement-workflows.md', 'warehouse', 'purchase_requests', 'describes', 1], [ 'wiki', - 'knowledge/global/customer-health-scoring.md', + 'wiki/global/customer-health-scoring.md', 'notion', 'raw-sources/notion/customer-health-playbook.md', 'derived_from', 0.9, ], - ['wiki', 'knowledge/global/customer-health-scoring.md', 'warehouse', 'support_tickets', 'describes', 1], + ['wiki', 'wiki/global/customer-health-scoring.md', 'warehouse', 'support_tickets', 'describes', 1], [ 'wiki', - 'knowledge/global/support-escalation.md', + 'wiki/global/support-escalation.md', 'notion', 'raw-sources/notion/support-escalation-runbook.md', 'derived_from', @@ -295,7 +295,7 @@ const provenanceLinks = [ ], [ 'wiki', - 'knowledge/global/internal-test-exclusion.md', + 'wiki/global/internal-test-exclusion.md', 'notion', 'raw-sources/notion/analyst-onboarding.md', 'derived_from', @@ -490,7 +490,7 @@ function buildActions() { unitKey: 'revenue-and-contracts', target: 'wiki', action: 'created', - key: 'knowledge/global/arr-contract-first.md', + key: 'wiki/global/arr-contract-first.md', summary: 'ARR follows contract precedence with cancellation and discount caveats.', rawFiles: ['contracts', 'arr_movements', 'raw-sources/notion/arr-and-contract-reporting-notes.md'], status: 'success', @@ -499,7 +499,7 @@ function buildActions() { unitKey: 'revenue-and-contracts', target: 'wiki', action: 'created', - key: 'knowledge/global/revenue-gross-to-net.md', + key: 'wiki/global/revenue-gross-to-net.md', summary: 'Invoice, refund, and revenue dashboard evidence reconcile gross to net revenue.', rawFiles: ['invoices', 'raw-sources/bi/revenue_exec.dashboard.lookml'], status: 'success', @@ -508,7 +508,7 @@ function buildActions() { unitKey: 'revenue-and-contracts', target: 'wiki', action: 'created', - key: 'knowledge/global/discount-expiration.md', + key: 'wiki/global/discount-expiration.md', summary: 'Discount expiration is separated from organic contraction for retention reporting.', rawFiles: ['contracts', 'arr_movements'], status: 'success', @@ -544,7 +544,7 @@ function buildActions() { unitKey: 'retention-and-segments', target: 'wiki', action: 'created', - key: 'knowledge/global/nrr-retention.md', + key: 'wiki/global/nrr-retention.md', summary: 'NRR uses parent-account rollups and quarterly ARR movement windows.', rawFiles: ['accounts', 'arr_movements', 'raw-sources/notion/retention-and-nrr-definition-notes.md'], status: 'success', @@ -553,7 +553,7 @@ function buildActions() { unitKey: 'retention-and-segments', target: 'wiki', action: 'created', - key: 'knowledge/global/segment-classification.md', + key: 'wiki/global/segment-classification.md', summary: 'Segment labels come from plan mapping and sales-ops policy notes.', rawFiles: ['accounts', 'plans', 'raw-sources/notion/sales-ops-segmentation-guide.md'], status: 'success', @@ -571,7 +571,7 @@ function buildActions() { unitKey: 'procurement-and-activation', target: 'wiki', action: 'created', - key: 'knowledge/global/activation-policy.md', + key: 'wiki/global/activation-policy.md', summary: 'Activation policy changed on January 15, 2026 and is encoded for agents.', rawFiles: ['purchase_requests', 'users', 'raw-sources/notion/activation-policy-decision-record.md'], status: 'success', @@ -580,7 +580,7 @@ function buildActions() { unitKey: 'procurement-and-activation', target: 'wiki', action: 'created', - key: 'knowledge/global/procurement-workflows.md', + key: 'wiki/global/procurement-workflows.md', summary: 'Procurement requester activity and approval events explain product usage.', rawFiles: ['purchase_requests', 'raw-sources/bi/procurement_activity.view.lkml'], status: 'success', @@ -598,7 +598,7 @@ function buildActions() { unitKey: 'support-and-health', target: 'wiki', action: 'created', - key: 'knowledge/global/customer-health-scoring.md', + key: 'wiki/global/customer-health-scoring.md', summary: 'Customer health combines support severity, ARR exposure, and product usage.', rawFiles: ['support_tickets', 'raw-sources/notion/customer-health-playbook.md'], status: 'success', @@ -607,7 +607,7 @@ function buildActions() { unitKey: 'support-and-health', target: 'wiki', action: 'created', - key: 'knowledge/global/support-escalation.md', + key: 'wiki/global/support-escalation.md', summary: 'Escalation tiers map ticket severity to SLA expectations.', rawFiles: ['support_tickets', 'raw-sources/notion/support-escalation-runbook.md'], status: 'success', @@ -625,7 +625,7 @@ function buildActions() { unitKey: 'governance-and-exclusions', target: 'wiki', action: 'created', - key: 'knowledge/global/internal-test-exclusion.md', + key: 'wiki/global/internal-test-exclusion.md', summary: 'Canonical metrics exclude internal and test accounts across source families.', rawFiles: ['raw-sources/notion/analyst-onboarding.md'], status: 'success', @@ -665,27 +665,27 @@ function buildReplay(provenance, transcripts) { { type: 'raw_snapshot_written', syncId: 'demo-seeded-sync', rawFileCount: 29 }, { type: 'diff_computed', added: 29, modified: 0, deleted: 0, unchanged: 0 }, { type: 'chunks_planned', chunkCount: 5, workUnitCount: 5, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'revenue-and-contracts', skills: ['knowledge_capture', 'sl_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'revenue-and-contracts', skills: ['wiki_capture', 'sl_capture'], stepBudget: 40 }, { type: 'candidate_action', unitKey: 'revenue-and-contracts', target: 'wiki', action: 'created', - key: 'knowledge/global/arr-contract-first.md', + key: 'wiki/global/arr-contract-first.md', }, { type: 'candidate_action', unitKey: 'revenue-and-contracts', target: 'wiki', action: 'created', - key: 'knowledge/global/revenue-gross-to-net.md', + key: 'wiki/global/revenue-gross-to-net.md', }, { type: 'candidate_action', unitKey: 'revenue-and-contracts', target: 'wiki', action: 'created', - key: 'knowledge/global/discount-expiration.md', + key: 'wiki/global/discount-expiration.md', }, { type: 'candidate_action', @@ -709,20 +709,20 @@ function buildReplay(provenance, transcripts) { key: 'orbit_demo.arr_movements', }, { type: 'work_unit_finished', unitKey: 'revenue-and-contracts', status: 'success' }, - { type: 'work_unit_started', unitKey: 'retention-and-segments', skills: ['knowledge_capture', 'sl_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'retention-and-segments', skills: ['wiki_capture', 'sl_capture'], stepBudget: 40 }, { type: 'candidate_action', unitKey: 'retention-and-segments', target: 'wiki', action: 'created', - key: 'knowledge/global/nrr-retention.md', + key: 'wiki/global/nrr-retention.md', }, { type: 'candidate_action', unitKey: 'retention-and-segments', target: 'wiki', action: 'created', - key: 'knowledge/global/segment-classification.md', + key: 'wiki/global/segment-classification.md', }, { type: 'candidate_action', @@ -735,7 +735,7 @@ function buildReplay(provenance, transcripts) { { type: 'work_unit_started', unitKey: 'procurement-and-activation', - skills: ['knowledge_capture', 'sl_capture'], + skills: ['wiki_capture', 'sl_capture'], stepBudget: 40, }, { @@ -743,14 +743,14 @@ function buildReplay(provenance, transcripts) { unitKey: 'procurement-and-activation', target: 'wiki', action: 'created', - key: 'knowledge/global/activation-policy.md', + key: 'wiki/global/activation-policy.md', }, { type: 'candidate_action', unitKey: 'procurement-and-activation', target: 'wiki', action: 'created', - key: 'knowledge/global/procurement-workflows.md', + key: 'wiki/global/procurement-workflows.md', }, { type: 'candidate_action', @@ -760,20 +760,20 @@ function buildReplay(provenance, transcripts) { key: 'orbit_demo.purchase_requests', }, { type: 'work_unit_finished', unitKey: 'procurement-and-activation', status: 'success' }, - { type: 'work_unit_started', unitKey: 'support-and-health', skills: ['knowledge_capture', 'sl_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'support-and-health', skills: ['wiki_capture', 'sl_capture'], stepBudget: 40 }, { type: 'candidate_action', unitKey: 'support-and-health', target: 'wiki', action: 'created', - key: 'knowledge/global/customer-health-scoring.md', + key: 'wiki/global/customer-health-scoring.md', }, { type: 'candidate_action', unitKey: 'support-and-health', target: 'wiki', action: 'created', - key: 'knowledge/global/support-escalation.md', + key: 'wiki/global/support-escalation.md', }, { type: 'candidate_action', @@ -783,13 +783,13 @@ function buildReplay(provenance, transcripts) { key: 'orbit_demo.support_tickets', }, { type: 'work_unit_finished', unitKey: 'support-and-health', status: 'success' }, - { type: 'work_unit_started', unitKey: 'governance-and-exclusions', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'governance-and-exclusions', skills: ['wiki_capture'], stepBudget: 40 }, { type: 'candidate_action', unitKey: 'governance-and-exclusions', target: 'wiki', action: 'created', - key: 'knowledge/global/internal-test-exclusion.md', + key: 'wiki/global/internal-test-exclusion.md', }, { type: 'work_unit_finished', unitKey: 'governance-and-exclusions', status: 'success' }, { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 0 }, @@ -835,7 +835,7 @@ function buildReplay(provenance, transcripts) { async function writeGeneratedContext(rowCounts) { for (const page of knowledgePages) { - await writeText(join('knowledge/global', page.file), renderKnowledgePage(page)); + await writeText(join('wiki/global', page.file), renderKnowledgePage(page)); } for (const table of semanticLayerTables) { @@ -908,7 +908,7 @@ async function writeGeneratedContext(rowCounts) { }, generated: { semanticLayer: { path: 'semantic-layer/orbit_demo', sourceCount: 6 }, - knowledge: { path: 'knowledge/global', pageCount: 10 }, + knowledge: { path: 'wiki/global', pageCount: 10 }, links: { path: 'links', linkCount: provenanceLinks.length }, }, }); @@ -930,7 +930,7 @@ for (const relativeDir of [ 'raw-sources/bi', 'raw-sources/notion', 'semantic-layer/orbit_demo', - 'knowledge/global', + 'wiki/global', 'links', 'reports', ]) { diff --git a/packages/cli/src/command-schemas.ts b/packages/cli/src/command-schemas.ts index e1365d86..5caece1f 100644 --- a/packages/cli/src/command-schemas.ts +++ b/packages/cli/src/command-schemas.ts @@ -3,6 +3,19 @@ import { z } from 'zod'; const projectDirSchema = z.string().min(1); const stringArraySchema = z.array(z.string()); +export const wikiWriteCommandSchema = z.object({ + command: z.literal('write'), + projectDir: projectDirSchema, + key: z.string().min(1), + scope: z.enum(['GLOBAL', 'USER']), + userId: z.string().min(1), + summary: z.string().min(1), + content: z.string().min(1), + tags: stringArraySchema, + refs: stringArraySchema, + slRefs: stringArraySchema, +}); + const orderBySchema = z.union([ z.string().min(1), z.object({ diff --git a/packages/cli/src/commands/knowledge-commands.ts b/packages/cli/src/commands/knowledge-commands.ts index 382ebf0a..f8d716f7 100644 --- a/packages/cli/src/commands/knowledge-commands.ts +++ b/packages/cli/src/commands/knowledge-commands.ts @@ -1,9 +1,11 @@ -import { type Command } from '@commander-js/extra-typings'; +import { type Command, Option } from '@commander-js/extra-typings'; import { + collectOption, type KtxCliCommandContext, parsePositiveIntegerOption, resolveCommandProjectDir, } from '../cli-program.js'; +import { wikiWriteCommandSchema } from '../command-schemas.js'; import type { KtxKnowledgeArgs } from '../knowledge.js'; import { profileMark } from '../startup-profile.js'; @@ -17,7 +19,7 @@ async function runKnowledgeArgs(context: KtxCliCommandContext, args: KtxKnowledg export function registerWikiCommands(program: Command, context: KtxCliCommandContext): void { const wiki = program .command('wiki') - .description('List or search local wiki pages') + .description('List, read, search, or write local wiki pages') .showHelpAfterError() .addHelpText( 'after', @@ -38,6 +40,22 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon }); }); + wiki + .command('read') + .description('Read one local wiki page') + .argument('', 'Wiki page key') + .option('--json', 'Print JSON output', false) + .option('--user-id ', 'Local user id', 'local') + .action(async (key: string, options: { userId: string; json?: boolean }, command) => { + await runKnowledgeArgs(context, { + command: 'read', + projectDir: resolveCommandProjectDir(command), + key, + userId: options.userId, + json: options.json, + }); + }); + wiki .command('search') .description('Search local wiki pages') @@ -55,4 +73,31 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon ...(options.limit !== undefined ? { limit: options.limit } : {}), }); }); + + wiki + .command('write') + .description('Write one local wiki page') + .argument('', 'Wiki page key') + .option('--user-id ', 'Local user id', 'local') + .addOption(new Option('--scope ', 'global or user').choices(['global', 'user']).default('global')) + .requiredOption('--summary ', 'Wiki summary') + .requiredOption('--content ', 'Wiki content') + .option('--tag ', 'Wiki tag; repeatable', collectOption, []) + .option('--ref ', 'Wiki ref; repeatable', collectOption, []) + .option('--sl-ref ', 'Semantic-layer ref; repeatable', collectOption, []) + .action(async (key: string, options, command) => { + const args = wikiWriteCommandSchema.parse({ + command: 'write', + projectDir: resolveCommandProjectDir(command), + key, + scope: options.scope === 'user' ? 'USER' : 'GLOBAL', + userId: options.userId, + summary: options.summary, + content: options.content, + tags: options.tag, + refs: options.ref, + slRefs: options.slRef, + }); + await runKnowledgeArgs(context, args); + }); } diff --git a/packages/cli/src/demo-assets.test.ts b/packages/cli/src/demo-assets.test.ts index 575e9bb7..92aad645 100644 --- a/packages/cli/src/demo-assets.test.ts +++ b/packages/cli/src/demo-assets.test.ts @@ -95,7 +95,7 @@ describe('demo assets', () => { await expect(access(packagedDemoAssetPath('semantic-layer/dbt-main/mart_arr_daily.yaml'))).resolves.toBeUndefined(); await expect(access(packagedDemoAssetPath('semantic-layer/postgres-warehouse/mart_account_activity.yaml'))).resolves.toBeUndefined(); - await expect(access(packagedDemoAssetPath('knowledge/global/orbit-company-overview.md'))).resolves.toBeUndefined(); + await expect(access(packagedDemoAssetPath('wiki/global/orbit-company-overview.md'))).resolves.toBeUndefined(); await expect(access(packagedDemoAssetPath('links/provenance.json'))).resolves.toBeUndefined(); await expect(access(packagedDemoAssetPath('reports/seeded-demo-report.json'))).resolves.toBeUndefined(); }); @@ -108,7 +108,7 @@ describe('demo assets', () => { await expect(access(join(projectDir, 'state.sqlite'))).resolves.toBeUndefined(); await expect(access(join(projectDir, 'reports'))).resolves.toBeUndefined(); await expect(access(join(projectDir, 'semantic-layer'))).resolves.toBeUndefined(); - await expect(access(join(projectDir, 'knowledge'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'wiki'))).resolves.toBeUndefined(); await expect(access(join(projectDir, 'replays', 'replay.memory-flow.v1.json'))).resolves.toBeUndefined(); await expect(access(join(projectDir, 'raw-sources'))).resolves.toBeUndefined(); await expect(access(join(projectDir, '_schema'))).rejects.toMatchObject({ code: 'ENOENT' }); @@ -129,7 +129,7 @@ describe('demo assets', () => { await ensureSeededDemoProject({ projectDir, force: false }); await expect(access(join(projectDir, 'semantic-layer', 'dbt-main', 'mart_arr_daily.yaml'))).resolves.toBeUndefined(); - await expect(access(join(projectDir, 'knowledge', 'global', 'orbit-company-overview.md'))).resolves.toBeUndefined(); + await expect(access(join(projectDir, 'wiki', 'global', 'orbit-company-overview.md'))).resolves.toBeUndefined(); await expect(access(join(projectDir, 'links', 'provenance.json'))).resolves.toBeUndefined(); await expect(access(join(projectDir, 'reports', 'seeded-demo-report.json'))).resolves.toBeUndefined(); }); diff --git a/packages/cli/src/demo-assets.ts b/packages/cli/src/demo-assets.ts index 4bab5ead..1e972ef7 100644 --- a/packages/cli/src/demo-assets.ts +++ b/packages/cli/src/demo-assets.ts @@ -29,7 +29,7 @@ const REQUIRED_SEEDED_ASSET_PATHS = [ DEMO_REPLAY_FILE, join('semantic-layer', 'dbt-main', 'mart_arr_daily.yaml'), join('semantic-layer', 'postgres-warehouse', 'mart_account_activity.yaml'), - join('knowledge', 'global', 'orbit-company-overview.md'), + join('wiki', 'global', 'orbit-company-overview.md'), ] as const; function assetDir(): string { @@ -131,7 +131,7 @@ export async function ensureDemoProject(options: EnsureDemoProjectOptions): Prom } await mkdir(projectDir, { recursive: true }); - for (const relativeDir of ['reports', 'semantic-layer', 'knowledge', 'replays', 'raw-sources', 'links']) { + for (const relativeDir of ['reports', 'semantic-layer', 'wiki', 'replays', 'raw-sources', 'links']) { await mkdir(join(projectDir, relativeDir), { recursive: true }); } @@ -157,7 +157,7 @@ async function copySeededAssetDirectories(projectDir: string): Promise { await Promise.all([ copyDirIfExists(join(src, 'semantic-layer'), join(dest, 'semantic-layer')), - copyDirIfExists(join(src, 'knowledge'), join(dest, 'knowledge')), + copyDirIfExists(join(src, 'wiki'), join(dest, 'wiki')), copyDirIfExists(join(src, 'raw-sources'), join(dest, 'raw-sources')), copyDirIfExists(join(src, 'links'), join(dest, 'links')), copyDirIfExists(join(src, 'reports'), join(dest, 'reports')), diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index f41f4b6a..f914a875 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -139,22 +139,78 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toBe(''); }); - it('rejects removed public wiki and sl read/write commands', async () => { - const sl = vi.fn(async () => 0); + it('routes public wiki read and write commands', async () => { const knowledge = vi.fn(async () => 0); + const readIo = makeIo(); + await expect(runKtxCli(['--project-dir', tempDir, 'wiki', 'read', 'revenue', '--json'], readIo.io, { knowledge })) + .resolves.toBe(0); + expect(knowledge).toHaveBeenCalledWith( + { + command: 'read', + projectDir: tempDir, + key: 'revenue', + userId: 'local', + json: true, + }, + readIo.io, + ); + + const writeIo = makeIo(); + await expect( + runKtxCli( + [ + '--project-dir', + tempDir, + 'wiki', + 'write', + 'revenue', + '--scope', + 'user', + '--summary', + 'Revenue', + '--content', + 'Revenue.', + '--tag', + 'finance', + '--ref', + 'https://example.com/revenue', + '--sl-ref', + 'orders', + ], + writeIo.io, + { knowledge }, + ), + ).resolves.toBe(0); + expect(knowledge).toHaveBeenLastCalledWith( + { + command: 'write', + projectDir: tempDir, + key: 'revenue', + scope: 'USER', + userId: 'local', + summary: 'Revenue', + content: 'Revenue.', + tags: ['finance'], + refs: ['https://example.com/revenue'], + slRefs: ['orders'], + }, + writeIo.io, + ); + }); + + it('rejects removed public sl read/write commands', async () => { + const sl = vi.fn(async () => 0); + for (const argv of [ - ['--project-dir', tempDir, 'wiki', 'read', 'revenue'], - ['--project-dir', tempDir, 'wiki', 'write', 'revenue', '--summary', 'Revenue', '--content', 'Revenue.'], ['--project-dir', tempDir, 'sl', 'read', 'orders', '--connection-id', 'warehouse'], ['--project-dir', tempDir, 'sl', 'write', 'orders', '--connection-id', 'warehouse', '--yaml', 'name: orders'], ]) { const io = makeIo(); - await expect(runKtxCli(argv, io.io, { knowledge, sl })).resolves.toBe(1); + await expect(runKtxCli(argv, io.io, { sl })).resolves.toBe(1); expect(io.stderr()).toMatch(/unknown command|error:/); } - expect(knowledge).not.toHaveBeenCalled(); expect(sl).not.toHaveBeenCalled(); }); diff --git a/packages/cli/src/ingest.test-utils.ts b/packages/cli/src/ingest.test-utils.ts index 73190b0d..7b65e33a 100644 --- a/packages/cli/src/ingest.test-utils.ts +++ b/packages/cli/src/ingest.test-utils.ts @@ -159,7 +159,7 @@ export function bundleReportSnapshot(): IngestReportSnapshot { rawFiles: ['cards/1.json', 'cards/2.json'], status: 'success', actions: [ - { target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' }, + { target: 'wiki', type: 'created', key: 'wiki/global/revenue.md', detail: 'Revenue overview' }, { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' }, ], touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'warehouse.orders' }], @@ -178,7 +178,7 @@ export function bundleReportSnapshot(): IngestReportSnapshot { { rawPath: 'cards/1.json', artifactKind: 'wiki', - artifactKey: 'knowledge/global/revenue.md', + artifactKey: 'wiki/global/revenue.md', actionType: 'wiki_written', }, { @@ -194,7 +194,7 @@ export function bundleReportSnapshot(): IngestReportSnapshot { path: 'tool-transcripts/cards.jsonl', toolCallCount: 4, errorCount: 0, - toolNames: ['ingest_triage', 'knowledge_capture', 'sl_capture'], + toolNames: ['ingest_triage', 'wiki_capture', 'sl_capture'], }, ], }, diff --git a/packages/cli/src/knowledge.test.ts b/packages/cli/src/knowledge.test.ts index 1982fe1c..c4b3fdd9 100644 --- a/packages/cli/src/knowledge.test.ts +++ b/packages/cli/src/knowledge.test.ts @@ -3,7 +3,6 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { initKtxProject } from '@ktx/context/project'; import type { KtxEmbeddingPort } from '@ktx/context'; -import { type LocalKnowledgeScope, writeLocalKnowledgePage } from '@ktx/context/wiki'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { runKtxKnowledge } from './knowledge.js'; @@ -41,29 +40,6 @@ class FakeEmbeddingPort implements KtxEmbeddingPort { } } -async function seedKnowledgePage(input: { - projectDir: string; - key: string; - summary: string; - content: string; - scope?: LocalKnowledgeScope; - tags?: string[]; - refs?: string[]; - slRefs?: string[]; -}): Promise { - const project = await initKtxProject({ projectDir: input.projectDir, projectName: 'warehouse' }); - await writeLocalKnowledgePage(project, { - key: input.key, - scope: input.scope ?? 'GLOBAL', - userId: 'local', - summary: input.summary, - content: input.content, - tags: input.tags ?? [], - refs: input.refs ?? [], - slRefs: input.slRefs ?? [], - }); -} - describe('runKtxKnowledge', () => { let tempDir: string; @@ -75,16 +51,36 @@ describe('runKtxKnowledge', () => { await rm(tempDir, { recursive: true, force: true }); }); - it('lists and searches knowledge pages', async () => { + it('writes, reads, lists, and searches wiki pages', async () => { const projectDir = join(tempDir, 'project'); - await seedKnowledgePage({ - projectDir, - key: 'metrics-revenue', - summary: 'Revenue', - content: 'Revenue is paid order value.', - tags: ['finance'], - slRefs: ['orders'], - }); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + + const writeIo = makeIo(); + await expect( + runKtxKnowledge( + { + command: 'write', + projectDir, + key: 'metrics-revenue', + scope: 'GLOBAL', + userId: 'local', + summary: 'Revenue', + content: 'Revenue is paid order value.', + tags: ['finance'], + refs: [], + slRefs: ['orders'], + }, + writeIo.io, + ), + ).resolves.toBe(0); + expect(writeIo.stdout()).toContain('Wrote wiki/global/metrics-revenue.md'); + + const readIo = makeIo(); + await expect( + runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local' }, readIo.io), + ).resolves.toBe(0); + expect(readIo.stdout()).toContain('# metrics-revenue'); + expect(readIo.stdout()).toContain('Revenue is paid order value.'); const listIo = makeIo(); await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local' }, listIo.io)).resolves.toBe(0); @@ -97,16 +93,27 @@ describe('runKtxKnowledge', () => { expect(searchIo.stdout()).toContain('metrics-revenue'); }); - it('prints wiki list and search as public JSON envelopes', async () => { + it('prints wiki list, search, and read as public JSON envelopes', async () => { const projectDir = join(tempDir, 'project'); - await seedKnowledgePage({ - projectDir, - key: 'metrics-revenue', - summary: 'Revenue', - content: 'Revenue is paid order value.', - tags: ['finance'], - slRefs: ['orders'], - }); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + + await expect( + runKtxKnowledge( + { + command: 'write', + projectDir, + key: 'metrics-revenue', + scope: 'GLOBAL', + userId: 'local', + summary: 'Revenue', + content: 'Revenue is paid order value.', + tags: ['finance'], + refs: [], + slRefs: ['orders'], + }, + makeIo().io, + ), + ).resolves.toBe(0); const listIo = makeIo(); await expect(runKtxKnowledge({ command: 'list', projectDir, userId: 'local', json: true }, listIo.io)).resolves.toBe( @@ -130,6 +137,48 @@ describe('runKtxKnowledge', () => { data: { items: [expect.objectContaining({ key: 'metrics-revenue', summary: 'Revenue' })] }, meta: { command: 'wiki search' }, }); + + const readIo = makeIo(); + await expect( + runKtxKnowledge({ command: 'read', projectDir, key: 'metrics-revenue', userId: 'local', json: true }, readIo.io), + ).resolves.toBe(0); + expect(JSON.parse(readIo.stdout())).toMatchObject({ + kind: 'wiki.page', + data: { + key: 'metrics-revenue', + summary: 'Revenue', + content: 'Revenue is paid order value.', + }, + }); + }); + + it('rejects slash-delimited write keys with a flat-key suggestion', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + + const writeIo = makeIo(); + await expect( + runKtxKnowledge( + { + command: 'write', + projectDir, + key: 'orbit/company-overview', + scope: 'GLOBAL', + userId: 'local', + summary: 'Orbit', + content: 'Orbit overview.', + tags: [], + refs: [], + slRefs: [], + }, + writeIo.io, + ), + ).resolves.toBe(1); + + expect(writeIo.stderr()).toContain( + 'Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".', + ); + expect(writeIo.stdout()).toBe(''); }); it('explains empty search results for a project without wiki pages', async () => { @@ -143,19 +192,30 @@ describe('runKtxKnowledge', () => { expect(searchIo.stdout()).toBe(''); expect(searchIo.stderr()).toContain('No local wiki pages found'); - expect(searchIo.stderr()).toContain('Run ingest'); - expect(searchIo.stderr()).not.toContain('ktx wiki write'); + expect(searchIo.stderr()).toContain('ktx wiki write'); }); it('uses configured embeddings for semantic wiki search', async () => { const projectDir = join(tempDir, 'semantic-project'); - await seedKnowledgePage({ - projectDir, - key: 'active-contract-arr-open-tickets', - summary: 'Active Contract ARR Ranked by Open Support Ticket Count', - content: 'Accounts ranked by annual recurring contract value and support ticket load.', - tags: ['historic-sql'], - }); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + + await expect( + runKtxKnowledge( + { + command: 'write', + projectDir, + key: 'active-contract-arr-open-tickets', + scope: 'GLOBAL', + userId: 'local', + summary: 'Active Contract ARR Ranked by Open Support Ticket Count', + content: 'Accounts ranked by annual recurring contract value and support ticket load.', + tags: ['historic-sql'], + refs: [], + slRefs: [], + }, + makeIo().io, + ), + ).resolves.toBe(0); const searchIo = makeIo(); await expect( diff --git a/packages/cli/src/knowledge.ts b/packages/cli/src/knowledge.ts index 0d1e194b..2e039dea 100644 --- a/packages/cli/src/knowledge.ts +++ b/packages/cli/src/knowledge.ts @@ -4,12 +4,31 @@ import { type KtxEmbeddingPort, } from '@ktx/context'; import { loadKtxProject } from '@ktx/context/project'; -import { listLocalKnowledgePages, searchLocalKnowledgePages } from '@ktx/context/wiki'; +import { + type LocalKnowledgeScope, + listLocalKnowledgePages, + readLocalKnowledgePage, + searchLocalKnowledgePages, + writeLocalKnowledgePage, +} from '@ktx/context/wiki'; import { writeJsonResult } from './io/print-list.js'; export type KtxKnowledgeArgs = | { command: 'list'; projectDir: string; userId: string; json?: boolean } - | { command: 'search'; projectDir: string; query: string; userId: string; json?: boolean; limit?: number }; + | { command: 'read'; projectDir: string; key: string; userId: string; json?: boolean } + | { command: 'search'; projectDir: string; query: string; userId: string; json?: boolean; limit?: number } + | { + command: 'write'; + projectDir: string; + key: string; + scope: LocalKnowledgeScope; + userId: string; + summary: string; + content: string; + tags: string[]; + refs: string[]; + slRefs: string[]; + }; interface KtxKnowledgeIo { stdout: { write(chunk: string): void }; @@ -56,6 +75,25 @@ export async function runKtxKnowledge( } return 0; } + if (args.command === 'read') { + const page = await readLocalKnowledgePage(project, { key: args.key, userId: args.userId }); + if (!page) { + throw new Error(`Wiki page "${args.key}" was not found`); + } + if (args.json) { + writeJsonResult(io, { + kind: 'wiki.page', + data: page, + meta: { command: 'wiki read' }, + }); + return 0; + } + io.stdout.write(`# ${page.key}\n\n`); + io.stdout.write(`Scope: ${page.scope}\n`); + io.stdout.write(`Summary: ${page.summary}\n\n`); + io.stdout.write(`${page.content}\n`); + return 0; + } if (args.command === 'search') { const results = await searchLocalKnowledgePages(project, { query: args.query, @@ -75,7 +113,7 @@ export async function runKtxKnowledge( const pages = await listLocalKnowledgePages(project, { userId: args.userId }); if (pages.length === 0) { io.stderr.write( - `No local wiki pages found in ${project.projectDir}. Run ingest to capture wiki context, then retry the search.\n`, + `No local wiki pages found in ${project.projectDir}. Create one with \`ktx wiki write --summary --content \` or run ingest.\n`, ); } else { io.stderr.write( @@ -89,8 +127,19 @@ export async function runKtxKnowledge( } return 0; } - const _exhaustive: never = args; - throw new Error(`Unsupported wiki command: ${JSON.stringify(_exhaustive)}`); + + const write = await writeLocalKnowledgePage(project, { + key: args.key, + scope: args.scope, + userId: args.userId, + summary: args.summary, + content: args.content, + tags: args.tags, + refs: args.refs, + slRefs: args.slRefs, + }); + io.stdout.write(`Wrote ${write.path}\n`); + return 0; } catch (error) { io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; diff --git a/packages/cli/src/memory-flow-hud.tsx b/packages/cli/src/memory-flow-hud.tsx index 5d2be9eb..9a9b3d96 100644 --- a/packages/cli/src/memory-flow-hud.tsx +++ b/packages/cli/src/memory-flow-hud.tsx @@ -76,7 +76,7 @@ function tableName(key: string): string { function humanizeInsight(key: string, target: 'sl' | 'wiki', summary: string | undefined): string { if (summary) return summary; const name = target === 'sl' ? tableName(key) : topicName(key); - return target === 'sl' ? `Query definition: ${name}` : `Knowledge page: ${name}`; + return target === 'sl' ? `Query definition: ${name}` : `Wiki page: ${name}`; } const INTERNAL_DEMO_CONNECTION_ID = 'orbit_demo'; @@ -453,7 +453,7 @@ function CompletionSummary(props: { )} {wiki > 0 && ( - {' '}📝 {wiki} knowledge page{wiki === 1 ? '' : 's'} — so agents understand your business context + {' '}📝 {wiki} wiki page{wiki === 1 ? '' : 's'} — so agents understand your business context )} diff --git a/packages/cli/src/memory-flow-interactive.test.ts b/packages/cli/src/memory-flow-interactive.test.ts index 456a3110..d7fe8bd8 100644 --- a/packages/cli/src/memory-flow-interactive.test.ts +++ b/packages/cli/src/memory-flow-interactive.test.ts @@ -46,9 +46,9 @@ function replay(): MemoryFlowReplayInput { { type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 }, { type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 0 }, { type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 4 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 4 }, { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, - { type: 'work_unit_started', unitKey: 'customers', skills: ['knowledge_capture'], stepBudget: 4 }, + { type: 'work_unit_started', unitKey: 'customers', skills: ['wiki_capture'], stepBudget: 4 }, { type: 'work_unit_finished', unitKey: 'customers', status: 'failed', reason: 'validation reset' }, { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 1 }, { type: 'saved', commitSha: 'abc12345', wikiCount: 1, slCount: 1 }, diff --git a/packages/cli/src/memory-flow-tui.test.tsx b/packages/cli/src/memory-flow-tui.test.tsx index b555c6c1..e1df900a 100644 --- a/packages/cli/src/memory-flow-tui.test.tsx +++ b/packages/cli/src/memory-flow-tui.test.tsx @@ -23,10 +23,10 @@ function replayInput(): MemoryFlowReplayInput { ], details: { actions: [ - { unitKey: 'orders', target: 'wiki', action: 'created', key: 'knowledge/orders.md', summary: 'order lifecycle', rawFiles: ['orders'], status: 'success' }, + { unitKey: 'orders', target: 'wiki', action: 'created', key: 'wiki/orders.md', summary: 'order lifecycle', rawFiles: ['orders'], status: 'success' }, { unitKey: 'customers', target: 'sl', action: 'updated', key: 'orbit_demo.customers', summary: 'customer metrics', rawFiles: ['customers'], status: 'success' }, ], - provenance: [{ rawPath: 'orders', artifactKind: 'wiki', artifactKey: 'knowledge/orders.md', actionType: 'wiki_written' }], + provenance: [{ rawPath: 'orders', artifactKind: 'wiki', artifactKey: 'wiki/orders.md', actionType: 'wiki_written' }], transcripts: [{ unitKey: 'orders', path: '/tmp/t.jsonl', toolCallCount: 2, errorCount: 0, toolNames: ['read_raw_span', 'wiki_write'] }], }, events: [ @@ -35,8 +35,8 @@ function replayInput(): MemoryFlowReplayInput { { type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 }, { type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 0 }, { type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, - { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'knowledge/orders.md' }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'wiki/orders.md' }, { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, { type: 'work_unit_started', unitKey: 'customers', skills: ['sl_capture'], stepBudget: 40 }, { type: 'candidate_action', unitKey: 'customers', target: 'sl', action: 'updated', key: 'orbit_demo.customers' }, @@ -220,7 +220,7 @@ describe('MemoryFlowTuiApp', () => { { type: 'source_acquired', adapter: 'live-database', trigger: 'manual_resync', fileCount: 1 }, { type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }, { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 }, ], plannedWorkUnits: [{ unitKey: 'orders', rawFiles: ['orders'], peerFileCount: 0, dependencyCount: 1 }], }; @@ -240,7 +240,7 @@ describe('MemoryFlowTuiApp', () => { { type: 'source_acquired', adapter: 'dbt-descriptions', trigger: 'manual_resync', fileCount: 3 }, { type: 'diff_computed', added: 11, modified: 0, deleted: 0, unchanged: 0 }, { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 }, ], plannedWorkUnits: [{ unitKey: 'orders', rawFiles: ['orders'], peerFileCount: 0, dependencyCount: 1 }], }; diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index 9115d7a5..1c2ab320 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -257,9 +257,9 @@ describe('setup context build state', () => { it('marks context complete without prompting when initial source ingest already made agent context', async () => { await writeReadyProject(tempDir); await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true }); - await mkdir(join(tempDir, 'knowledge', 'global'), { recursive: true }); + await mkdir(join(tempDir, 'wiki', 'global'), { recursive: true }); await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n'); - await writeFile(join(tempDir, 'knowledge', 'global', 'metrics.md'), '# Metrics\n'); + await writeFile(join(tempDir, 'wiki', 'global', 'metrics.md'), '# Metrics\n'); await writeReadyEnrichedScanReport(tempDir); const io = makeIo(); const runContextBuildMock = vi.fn(async () => ({ exitCode: 0, detached: false })); @@ -332,8 +332,8 @@ describe('setup context build state', () => { await writeFile(join(tempDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml'), 'tables: {}\n'); const io = makeIo(); const runContextBuildMock = vi.fn(async () => { - await mkdir(join(tempDir, 'knowledge', 'global'), { recursive: true }); - await writeFile(join(tempDir, 'knowledge', 'global', 'metrics.md'), '# Metrics\n'); + await mkdir(join(tempDir, 'wiki', 'global'), { recursive: true }); + await writeFile(join(tempDir, 'wiki', 'global', 'metrics.md'), '# Metrics\n'); await writeReadyEnrichedScanReport(tempDir); return { exitCode: 0, detached: false }; }); diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index 94589bdc..ea23a9dc 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -441,7 +441,7 @@ async function defaultVerifyContextReady(projectDir: string): Promise { expect(options).toContainEqual({ value: 'notion', label: 'Notion' }); }); + it('shows already configured context sources in the interactive checklist', async () => { + await addPrimarySource(); + await addConnection('notion-main', { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + crawl_mode: 'all_accessible', + }); + const io = makeIo(); + const testPrompts = prompts({ multiselect: [['back']] }); + + await expect( + runKtxSetupSourcesStep( + { projectDir, inputMode: 'auto', runInitialSourceIngest: false, skipSources: false }, + io.io, + { prompts: testPrompts }, + ), + ).resolves.toEqual({ status: 'back', projectDir }); + + expect(testPrompts.multiselect).toHaveBeenCalledWith( + expect.objectContaining({ + initialValues: ['notion'], + options: expect.arrayContaining([{ value: 'notion', label: 'Notion', hint: 'configured: notion-main' }]), + }), + ); + }); + it('uses a source-specific editable connection name for new interactive connections', async () => { await addPrimarySource(); const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index 6ab71106..0561b0e2 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -73,7 +73,8 @@ export type KtxSetupSourcesResult = export interface KtxSetupSourcesPromptAdapter { multiselect(options: { message: string; - options: Array<{ value: string; label: string }>; + options: Array<{ value: string; label: string; hint?: string }>; + initialValues?: string[]; required?: boolean; }): Promise; select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; @@ -1325,6 +1326,22 @@ function existingConnectionIdsBySource( .sort((left, right) => left.localeCompare(right)); } +function sourceChecklistForConnections(connections: Record): { + options: Array<{ value: KtxSetupSourceType; label: string; hint?: string }>; + initialValues: KtxSetupSourceType[]; +} { + const initialValues: KtxSetupSourceType[] = []; + const options = SOURCE_OPTIONS.map((option) => { + const existingIds = existingConnectionIdsBySource(connections, option.value); + if (existingIds.length === 0) { + return option; + } + initialValues.push(option.value); + return { ...option, hint: `configured: ${existingIds.join(', ')}` }; + }); + return { options, initialValues }; +} + function defaultConnectionIdForSource( connections: Record, source: KtxSetupSourceType, @@ -1483,13 +1500,19 @@ export async function runKtxSetupSourcesStep( } while (true) { + const contextSourceChecklist = sourceChecklistForConnections( + (await loadKtxProject({ projectDir: args.projectDir })).config.connections, + ); const selected = args.source ? [args.source] : args.inputMode === 'disabled' ? [] : await prompts.multiselect({ message: withMultiselectNavigation('Which context sources should KTX ingest?'), - options: [...SOURCE_OPTIONS], + options: contextSourceChecklist.options, + ...(contextSourceChecklist.initialValues.length > 0 + ? { initialValues: contextSourceChecklist.initialValues } + : {}), required: false, }); if (selected.includes('back')) { diff --git a/packages/context/prompts/memory_agent_backfill.md b/packages/context/prompts/memory_agent_backfill.md index ee0f7ed4..fdf7211d 100644 --- a/packages/context/prompts/memory_agent_backfill.md +++ b/packages/context/prompts/memory_agent_backfill.md @@ -10,7 +10,7 @@ Capture only when the signal is unambiguous: a metric definition stated plainly, 1. Read the wiki and SL indexes to avoid creating duplicates. -2. If the content has wiki-style signal, load the `knowledge_capture` skill and follow its workflow. +2. If the content has wiki-style signal, load the `wiki_capture` skill and follow its workflow. 3. If the content has SL-style signal, load the `sl` skill and follow its Part 3 workflow. 4. Prefer updating existing entries over creating new ones — backfills often duplicate existing knowledge. 5. When done, exit the loop. diff --git a/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md b/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md index 30b52537..515fecd3 100644 --- a/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md +++ b/packages/context/prompts/memory_agent_bundle_ingest_reconcile.md @@ -7,7 +7,7 @@ Parsimonious. Stage 3 WUs already loaded `ingest_triage` and handled conflicts t -1. Load `ingest_triage`, then `sl_capture` + `knowledge_capture`. +1. Load `ingest_triage`, then `sl_capture` + `wiki_capture`. 2. Call `stage_list()` for the full index of this job's writes. If it is empty AND you have no evictions, exit — the runner short-circuits this case but the skill still teaches you to bail fast. 3. If the system prompt includes ``, apply those pins before flagging a same-name or near-duplicate conflict. A pinned `canonicalArtifactKey` keeps the contested name when it is present in the Stage Index; competing variants keep or receive disambiguated names. 4. Sweep both exact-key conflicts and near-duplicate writes. Compare WUs that wrote overlapping SL source names, overlapping wiki keys, the same `tables:` or `sl_refs:` action details, or obviously equivalent topic titles under different wiki keys. Call `stage_diff` to see the actual difference, and use `wiki_read`/`sl_read_source` when two different keys appear to describe the same table, metric, or source-of-truth mapping. If they're the same content, leave one canonical artifact and record the duplicate as subsumed. If they differ per `ingest_triage` rules, apply the correct resolution (rename + capture; election of canonical; silent replace for expression-only re-ingest change; or pinned canonical), then call `emit_conflict_resolution` with the artifact key and decision. diff --git a/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md b/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md index c7c9eb6d..3821537d 100644 --- a/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md +++ b/packages/context/prompts/memory_agent_bundle_ingest_work_unit.md @@ -8,7 +8,7 @@ Assertive. The bundle was explicitly submitted for ingest. Default to capturing 1. Read this WorkUnit's section at the end of the user prompt. It lists your `rawFiles`, any unchanged `dependencyPaths` you may need to resolve references, the `peerFileIndex` (paths only; you CANNOT read them), the source's `skillNames`, and any `priorProvenance` rows telling you what earlier syncs produced from these files. -2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `knowledge_capture`, and `ingest_triage` last. The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping. +2. Load the per-source review skill first (e.g. `lookml_ingest`, `metricflow_ingest`, `dbt_ingest`), then `sl_capture` and `wiki_capture`, and `ingest_triage` last. The triage skill tells you how to react when `discover_data` reveals that a prior WU already wrote something overlapping. 3. If the system prompt includes ``, read those pins before choosing artifact keys. A pin's `canonicalArtifactKey` is the preferred artifact for its `contestedKey`: prefer editing the pinned canonical artifact when it already exists or when this raw file clearly updates it. Do not create a duplicate contested artifact when a pin says another artifact is canonical; use a specific disambiguated key only when the raw file describes a genuinely different domain. 4. For each raw file: call `read_raw_file` (or `read_raw_span` for slicing large files) to load content. Before writing a new SL source or wiki page, call `discover_data` for each candidate source, table, metric, or topic name to find prior-WU writes, existing wiki pages, SL sources, and raw warehouse matches; apply `ingest_triage` when you hit one, and apply any matching canonical pin before deciding whether to edit, rename, or skip. 5. For every `wiki_write`, `wiki_remove`, `sl_write_source`, or `sl_edit_source` call, include `rawPaths` with only the raw file paths that directly support that action. If one artifact synthesizes several files, list each contributing raw file. Do not include unrelated files from the same WorkUnit. diff --git a/packages/context/prompts/memory_agent_external_ingest.md b/packages/context/prompts/memory_agent_external_ingest.md index edee6f75..dd84651a 100644 --- a/packages/context/prompts/memory_agent_external_ingest.md +++ b/packages/context/prompts/memory_agent_external_ingest.md @@ -10,7 +10,7 @@ A single artifact typically produces multiple actions: one SL source per table/v 1. Review the wiki and SL indexes in the prompt. Prefer updating existing entries over creating duplicates. -2. Load the `sl` skill for SL-writes and `knowledge_capture` for wiki-writes. Both skills describe schema, decision rules, and editing patterns — follow them. +2. Load the `sl` skill for SL-writes and `wiki_capture` for wiki-writes. Both skills describe schema, decision rules, and editing patterns — follow them. 3. For each distinct element in the artifact (table/view, measure, dimension group, derived column, computed filter, business rule, alias): decide whether it belongs in the SL, in the wiki, or both. 4. Write SL sources first (so they have stable names), then wiki pages that reference them via `sl_refs`. 5. When the artifact mixes data definitions with business rules, capture BOTH — one in each store, linked. diff --git a/packages/context/prompts/memory_agent_research.md b/packages/context/prompts/memory_agent_research.md index f8a59a79..6090e5bb 100644 --- a/packages/context/prompts/memory_agent_research.md +++ b/packages/context/prompts/memory_agent_research.md @@ -19,7 +19,7 @@ Skip: 1. Read the wiki index and the SL sources index in the prompt below. 2. Identify durable knowledge OR reusable data patterns in the turn. -3. If the turn has wiki-style signal (preferences, definitions, conventions), load the `knowledge_capture` skill and follow its workflow. +3. If the turn has wiki-style signal (preferences, definitions, conventions), load the `wiki_capture` skill and follow its workflow. 4. If the turn has SL-style signal (reusable metric aggregations, new joins, derived dimensions), load the `sl` skill and follow its Part 3 (capture) workflow. 5. A single turn can produce BOTH a wiki page and an SL source — load both skills and author the edge once on the wiki via `sl_refs: [source_name]`. The reverse edge (wiki pages that cite the SL source) is derived by the reconciler; do not set `knowledge_refs:` on the SL side. 6. When you're done, exit the loop without calling any more tools. Do NOT emit a final text summary. diff --git a/packages/context/skills/metricflow_ingest/SKILL.md b/packages/context/skills/metricflow_ingest/SKILL.md index 6ed4b916..67743892 100644 --- a/packages/context/skills/metricflow_ingest/SKILL.md +++ b/packages/context/skills/metricflow_ingest/SKILL.md @@ -140,7 +140,7 @@ metrics: ``` Do NOT emit SL for this. Instead: -- Write a wiki page at `knowledge/global/-intent.md` quoting the full YAML body and a one-line explanation of the intended semantics (base event → conversion event within window). +- Write a wiki page at `wiki/global/-intent.md` quoting the full YAML body and a one-line explanation of the intended semantics (base event → conversion event within window). - Call `emit_unmapped_fallback` with `rawPath` set to the MetricFlow file path, `reason: "conversion_metric_unsupported"`, and `fallback: "flagged"`. When KTX SL gains conversion primitives, re-ingesting will find the prior wiki note (via `priorProvenance`) and replace it with an SL source. @@ -290,7 +290,7 @@ measures: - {name: margin, expr: "sum(revenue_cents) - sum(cost_cents)"} ``` -Also write a wiki page at `knowledge/global/margin-metric.md` explaining the cross-source origin. +Also write a wiki page at `wiki/global/margin-metric.md` explaining the cross-source origin. ## Example 4 — filtered metric creates a new measure diff --git a/packages/context/skills/knowledge_capture/SKILL.md b/packages/context/skills/wiki_capture/SKILL.md similarity index 97% rename from packages/context/skills/knowledge_capture/SKILL.md rename to packages/context/skills/wiki_capture/SKILL.md index 2a111d90..30188be6 100644 --- a/packages/context/skills/knowledge_capture/SKILL.md +++ b/packages/context/skills/wiki_capture/SKILL.md @@ -1,10 +1,10 @@ --- -name: knowledge_capture -description: KTX's knowledge base — wiki pages for durable, reusable business knowledge. Covers capture workflow for user preferences, metric definitions, organizational conventions, and cross-references between knowledge pages and semantic-layer sources. Loaded by the post-turn memory-agent only. The research agent reads wiki via `wiki_read`/`wiki_search` but does not write it. +name: wiki_capture +description: KTX's knowledge base — wiki pages for durable, reusable business knowledge. Covers capture workflow for user preferences, metric definitions, organizational conventions, and cross-references between wiki pages and semantic-layer sources. Loaded by the post-turn memory-agent only. The research agent reads wiki via `wiki_read`/`wiki_search` but does not write it. callers: [memory_agent] --- -# Knowledge Capture +# Wiki Capture ## Role diff --git a/packages/context/src/ingest/action-identity.test.ts b/packages/context/src/ingest/action-identity.test.ts index 0c855c41..725a1d99 100644 --- a/packages/context/src/ingest/action-identity.test.ts +++ b/packages/context/src/ingest/action-identity.test.ts @@ -19,13 +19,13 @@ describe('memory action target identity', () => { { target: 'wiki', type: 'created', - key: 'knowledge/global/orders.md', + key: 'wiki/global/orders.md', detail: '', targetConnectionId: 'ignored', }, 'looker-run', ), - ).toBe('wiki:looker-run:knowledge/global/orders.md'); + ).toBe('wiki:looker-run:wiki/global/orders.md'); }); it('resolves action target connection only for SL actions', () => { diff --git a/packages/context/src/ingest/adapters/dbt/dbt.adapter.ts b/packages/context/src/ingest/adapters/dbt/dbt.adapter.ts index ef1c798c..cdf1a434 100644 --- a/packages/context/src/ingest/adapters/dbt/dbt.adapter.ts +++ b/packages/context/src/ingest/adapters/dbt/dbt.adapter.ts @@ -16,7 +16,7 @@ interface DbtSourceAdapterOptions { export class DbtSourceAdapter implements SourceAdapter { readonly source = 'dbt' as const; - /** Runner merges: ingest_triage, sl_capture, knowledge_capture (see ingest-bundle.runner.ts) */ + /** Runner merges: ingest_triage, sl_capture, wiki_capture (see ingest-bundle.runner.ts) */ readonly skillNames: string[] = ['dbt_ingest']; constructor(private readonly options: DbtSourceAdapterOptions = {}) {} diff --git a/packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts b/packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts index c7a334bf..8f583d9c 100644 --- a/packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/local-ingest-acceptance.test.ts @@ -277,7 +277,7 @@ describe('historic-SQL local ingest retrieval acceptance', () => { await expect(readFile(join(project.projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8')).resolves .toContain('Analysts repeatedly inspect paid order lifecycle by customer segment.'); - await expect(readFile(join(project.projectDir, 'knowledge/global/historic-sql-paid-order-lifecycle.md'), 'utf-8')) + await expect(readFile(join(project.projectDir, 'wiki/global/historic-sql-paid-order-lifecycle.md'), 'utf-8')) .resolves.toContain('Paid Order Lifecycle'); const reloaded = await loadKtxProject({ projectDir: project.projectDir }); diff --git a/packages/context/src/ingest/adapters/historic-sql/post-processor.ts b/packages/context/src/ingest/adapters/historic-sql/post-processor.ts index 8d89d397..f5e0aaec 100644 --- a/packages/context/src/ingest/adapters/historic-sql/post-processor.ts +++ b/packages/context/src/ingest/adapters/historic-sql/post-processor.ts @@ -10,7 +10,7 @@ async function commitProjectionChanges(workdir: string): Promise { const status = await git.status(); const paths = status.files .map((file) => file.path) - .filter((path) => path.startsWith('semantic-layer/') || path.startsWith('knowledge/global/historic-sql')); + .filter((path) => path.startsWith('semantic-layer/') || path.startsWith('wiki/global/historic-sql')); if (paths.length === 0) { return; } diff --git a/packages/context/src/ingest/adapters/historic-sql/projection.test.ts b/packages/context/src/ingest/adapters/historic-sql/projection.test.ts index 95adf13f..0b3c5604 100644 --- a/packages/context/src/ingest/adapters/historic-sql/projection.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/projection.test.ts @@ -106,7 +106,7 @@ describe('projectHistoricSqlEvidence', () => { await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' }); await writeText( workdir, - 'knowledge/global/historic-sql-old-order-lifecycle.md', + 'wiki/global/historic-sql-old-order-lifecycle.md', [ '---', YAML.stringify({ @@ -127,7 +127,7 @@ describe('projectHistoricSqlEvidence', () => { ); await writeText( workdir, - 'knowledge/global/historic-sql-retired-pattern.md', + 'wiki/global/historic-sql-retired-pattern.md', [ '---', YAML.stringify({ @@ -164,10 +164,10 @@ describe('projectHistoricSqlEvidence', () => { const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' }); expect(result.patternPagesWritten).toBe(1); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql-old-order-lifecycle.md'), 'utf-8')).resolves.toContain( + await expect(readFile(join(workdir, 'wiki/global/historic-sql-old-order-lifecycle.md'), 'utf-8')).resolves.toContain( 'Order Lifecycle Analysis', ); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain( + await expect(readFile(join(workdir, 'wiki/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain( 'stale_since: "2026-05-11T00:00:00.000Z"', ); }); @@ -192,7 +192,7 @@ describe('projectHistoricSqlEvidence', () => { await writeJson(workdir, 'raw-sources/warehouse/historic-sql/sync-1/tables/public.customers.json', { table: 'public.customers' }); await writeText( workdir, - 'knowledge/global/historic-sql-order-lifecycle-analysis.md', + 'wiki/global/historic-sql-order-lifecycle-analysis.md', [ '---', YAML.stringify({ @@ -230,7 +230,7 @@ describe('projectHistoricSqlEvidence', () => { const result = await projectHistoricSqlEvidence({ workdir, connectionId: 'warehouse', syncId: 'sync-1', runId: 'run-1' }); expect(result.patternPagesWritten).toBe(1); - const page = await readFile(join(workdir, 'knowledge/global/historic-sql-order-lifecycle-analysis.md'), 'utf-8'); + const page = await readFile(join(workdir, 'wiki/global/historic-sql-order-lifecycle-analysis.md'), 'utf-8'); expect(page).toContain('Analysts compare order status with customer segment again.'); expect(page).not.toContain('Archived body'); expect(page).not.toContain('archived'); @@ -254,7 +254,7 @@ describe('projectHistoricSqlEvidence', () => { }); await writeText( workdir, - 'knowledge/global/historic-sql-retired-pattern.md', + 'wiki/global/historic-sql-retired-pattern.md', [ '---', YAML.stringify({ @@ -279,7 +279,7 @@ describe('projectHistoricSqlEvidence', () => { expect(result.archivedPatternPages).toBe(0); expect(result.stalePatternPagesMarked).toBe(0); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain( + await expect(readFile(join(workdir, 'wiki/global/historic-sql-retired-pattern.md'), 'utf-8')).resolves.toContain( 'Archived retired body', ); }); @@ -322,7 +322,7 @@ describe('projectHistoricSqlEvidence', () => { }); await writeText( workdir, - 'knowledge/global/historic-sql-old-template.md', + 'wiki/global/historic-sql-old-template.md', [ '---', YAML.stringify({ @@ -356,7 +356,7 @@ describe('projectHistoricSqlEvidence', () => { commonJoins: [], staleSince: '2026-05-11T00:00:00.000Z', }); - await expect(readFile(join(workdir, 'knowledge/global/historic-sql-old-template.md'), 'utf-8')).resolves.toContain( + await expect(readFile(join(workdir, 'wiki/global/historic-sql-old-template.md'), 'utf-8')).resolves.toContain( 'Old body', ); }); diff --git a/packages/context/src/ingest/adapters/historic-sql/projection.ts b/packages/context/src/ingest/adapters/historic-sql/projection.ts index 7d4da94f..36a7be19 100644 --- a/packages/context/src/ingest/adapters/historic-sql/projection.ts +++ b/packages/context/src/ingest/adapters/historic-sql/projection.ts @@ -276,7 +276,7 @@ export async function projectHistoricSqlEvidence(input: HistoricSqlProjectionInp } } - const wikiRoot = join(input.workdir, 'knowledge/global'); + const wikiRoot = join(input.workdir, 'wiki/global'); await mkdir(wikiRoot, { recursive: true }); const allPages = await loadPatternPages(wikiRoot); const activePages = allPages.filter((page) => !isArchivedPatternPage(page)); diff --git a/packages/context/src/ingest/ingest-bundle.runner.test.ts b/packages/context/src/ingest/ingest-bundle.runner.test.ts index b337a3f0..b9831c0f 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.test.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.test.ts @@ -599,7 +599,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { currentToolSession.actions.push({ target: 'wiki', type: 'created', - key: 'knowledge/orders.md', + key: 'wiki/orders.md', detail: 'captured order context', }); } @@ -638,7 +638,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { expect.objectContaining({ type: 'work_unit_started', unitKey: 'u1', - skills: ['ingest_triage', 'sl_capture', 'knowledge_capture'], + skills: ['ingest_triage', 'sl_capture', 'wiki_capture'], stepBudget: 40, }), expect.objectContaining({ type: 'work_unit_step', unitKey: 'u1', stepIndex: 1, stepBudget: 40 }), @@ -647,7 +647,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { unitKey: 'u1', target: 'wiki', action: 'created', - key: 'knowledge/orders.md', + key: 'wiki/orders.md', }), expect.objectContaining({ type: 'work_unit_finished', unitKey: 'u1', status: 'success' }), ]), @@ -860,7 +860,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { { toolCallId: 'ledger-1', messages: [] }, ); await params.toolSet.wiki_write.execute( - { key: 'knowledge/a.md', content: 'safe summary' }, + { key: 'wiki/a.md', content: 'safe summary' }, { toolCallId: 'wiki-1', messages: [] }, ); } @@ -1351,7 +1351,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { { target: 'wiki', type: 'created', - key: 'knowledge/global/pipeline.md', + key: 'wiki/global/pipeline.md', detail: 'Pipeline article', }, { @@ -1391,7 +1391,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { }); expect(deps.knowledgeSlRefs.syncFromWiki).toHaveBeenCalledWith({ - wikiPageKey: 'knowledge/global/pipeline.md', + wikiPageKey: 'wiki/global/pipeline.md', wikiScope: 'GLOBAL', wikiScopeId: null, refs: [{ connectionId: 'warehouse-2', sourceName: 'looker__b2b__sales_pipeline' }], @@ -1410,7 +1410,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { connectionId: 'looker-run', targetConnectionId: null, artifactKind: 'wiki', - artifactKey: 'knowledge/global/pipeline.md', + artifactKey: 'wiki/global/pipeline.md', }), ]), ); @@ -1616,7 +1616,7 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { const workUnitCall = deps.agentRunner.runLoop.mock.calls.find( ([params]: any[]) => params.telemetryTags.operationName === 'ingest-bundle-wu', ); - expect(workUnitCall?.[0].userPrompt).toContain('## Knowledge Pages'); + expect(workUnitCall?.[0].userPrompt).toContain('## Wiki Pages'); expect(workUnitCall?.[0].userPrompt).toContain( '- revenue-recognition: Recognize revenue net of refunds after fulfillment.', ); diff --git a/packages/context/src/ingest/ingest-bundle.runner.ts b/packages/context/src/ingest/ingest-bundle.runner.ts index d8f47c2a..582cbbf3 100644 --- a/packages/context/src/ingest/ingest-bundle.runner.ts +++ b/packages/context/src/ingest/ingest-bundle.runner.ts @@ -293,7 +293,7 @@ export class IngestBundleRunner { return '(empty)'; } - return `## Knowledge Pages\n${pages.map((page) => `- ${page.page_key}: ${page.summary}`).join('\n')}`; + return `## Wiki Pages\n${pages.map((page) => `- ${page.page_key}: ${page.summary}`).join('\n')}`; } private async buildSlIndex(connectionIds: string[]): Promise { @@ -596,7 +596,7 @@ export class IngestBundleRunner { const baseFraming = await this.deps.promptService.loadPrompt('memory_agent_bundle_ingest_work_unit'); const wuSkillNames = Array.from( - new Set([...adapter.skillNames, 'ingest_triage', 'sl_capture', 'knowledge_capture']), + new Set([...adapter.skillNames, 'ingest_triage', 'sl_capture', 'wiki_capture']), ); const wuSkills = await this.deps.skillsRegistry.listSkills(wuSkillNames, 'memory_agent'); const skillsPrompt = this.deps.skillsRegistry.buildSkillsPrompt(wuSkills, 'memory_agent'); @@ -973,7 +973,7 @@ export class IngestBundleRunner { const reconcileBaseFraming = await this.deps.promptService.loadPrompt('memory_agent_bundle_ingest_reconcile'); const reconcileSkills = await this.deps.skillsRegistry.listSkills( Array.from( - new Set(['ingest_triage', 'sl_capture', 'knowledge_capture', ...(adapter.reconcileSkillNames ?? [])]), + new Set(['ingest_triage', 'sl_capture', 'wiki_capture', ...(adapter.reconcileSkillNames ?? [])]), ), 'memory_agent', ); diff --git a/packages/context/src/ingest/ingest-runtime-assets.test.ts b/packages/context/src/ingest/ingest-runtime-assets.test.ts index 4b75fcdf..c77bee11 100644 --- a/packages/context/src/ingest/ingest-runtime-assets.test.ts +++ b/packages/context/src/ingest/ingest-runtime-assets.test.ts @@ -17,13 +17,13 @@ const adapterSkillNames = [ 'historic_sql_table_digest', 'historic_sql_patterns', 'ingest_triage', - 'knowledge_capture', + 'wiki_capture', 'sl_capture', ] as const; const adapterReconcileSkillNames = [ 'ingest_triage', - 'knowledge_capture', + 'wiki_capture', 'sl_capture', ] as const; diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index 9eeda894..2a3c9943 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -314,7 +314,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { scope: string, scopeId: string | null, ): Promise> { - const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${scopeId}/`; + const prefix = scope === 'GLOBAL' ? 'wiki/global/' : `wiki/user/${scopeId}/`; const result = new Map(); for (const [path, page] of this.sqlite.getExistingPages()) { if (!path.startsWith(prefix)) { @@ -341,7 +341,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { } async findPageByKey(scope: string, scopeId: string | null, pageKey: string) { - const path = scope === 'GLOBAL' ? `knowledge/global/${pageKey}.md` : `knowledge/user/${scopeId}/${pageKey}.md`; + const path = scope === 'GLOBAL' ? `wiki/global/${pageKey}.md` : `wiki/user/${scopeId}/${pageKey}.md`; try { await this.project.fileStore.readFile(path); return { page_key: pageKey }; @@ -355,12 +355,12 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { ): Promise { const pages: KnowledgeIndexPageListing[] = []; for (const scope of [ - { scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' }, - { scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` }, + { scope: 'GLOBAL', scopeId: null, dir: 'wiki/global' }, + { scope: 'USER', scopeId: userId, dir: `wiki/user/${userId}` }, ]) { const listed = await this.project.fileStore.listFiles(scope.dir, true); for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) { - const parsedPath = parseKnowledgeIndexPath(file.startsWith('global/') || file.startsWith('user/') ? file : `${scope.dir.replace('knowledge/', '')}/${file}`); + const parsedPath = parseKnowledgeIndexPath(file.startsWith('global/') || file.startsWith('user/') ? file : `${scope.dir.replace('wiki/', '')}/${file}`); if (!parsedPath || parsedPath.scope !== scope.scope) { continue; } @@ -404,7 +404,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { } private async syncAllPagesFromDisk(): Promise { - const listed = await this.project.fileStore.listFiles('knowledge', true); + const listed = await this.project.fileStore.listFiles('wiki', true); const existingPages = this.sqlite.getExistingPages(); const pages: SqliteKnowledgeIndexPage[] = []; for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) { @@ -412,7 +412,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { if (!parsedPath) { continue; } - const path = `knowledge/${file}`; + const path = `wiki/${file}`; const raw = await this.project.fileStore.readFile(path); const parsed = parseWiki(raw.content); const tags = parseWikiTags(raw.content); diff --git a/packages/context/src/ingest/memory-flow/acceptance-fixtures.ts b/packages/context/src/ingest/memory-flow/acceptance-fixtures.ts index f4f01c12..66f1afb8 100644 --- a/packages/context/src/ingest/memory-flow/acceptance-fixtures.ts +++ b/packages/context/src/ingest/memory-flow/acceptance-fixtures.ts @@ -16,12 +16,12 @@ function baseScenario(overrides: Partial = {}): MemoryFlo { type: 'raw_snapshot_written', syncId: 'sync-success', rawFileCount: 4 }, { type: 'diff_computed', added: 2, modified: 1, deleted: 0, unchanged: 1 }, { type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, - { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'knowledge/global/orders.md' }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'wiki/global/orders.md' }, { type: 'candidate_action', unitKey: 'orders', target: 'sl', action: 'updated', key: 'warehouse.orders' }, { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, - { type: 'work_unit_started', unitKey: 'revenue', skills: ['knowledge_capture'], stepBudget: 40 }, - { type: 'candidate_action', unitKey: 'revenue', target: 'wiki', action: 'updated', key: 'knowledge/global/revenue.md' }, + { type: 'work_unit_started', unitKey: 'revenue', skills: ['wiki_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'revenue', target: 'wiki', action: 'updated', key: 'wiki/global/revenue.md' }, { type: 'work_unit_finished', unitKey: 'revenue', status: 'success' }, { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 0 }, { type: 'saved', commitSha: 'abc123456789', wikiCount: 2, slCount: 1 }, // pragma: allowlist secret @@ -38,7 +38,7 @@ function baseScenario(overrides: Partial = {}): MemoryFlo unitKey: 'orders', target: 'wiki', action: 'created', - key: 'knowledge/global/orders.md', + key: 'wiki/global/orders.md', summary: 'Captured order definitions', rawFiles: ['models/orders.yml'], status: 'success', @@ -56,7 +56,7 @@ function baseScenario(overrides: Partial = {}): MemoryFlo unitKey: 'revenue', target: 'wiki', action: 'updated', - key: 'knowledge/global/revenue.md', + key: 'wiki/global/revenue.md', summary: 'Updated revenue notes', rawFiles: ['docs/revenue.md'], status: 'success', @@ -66,7 +66,7 @@ function baseScenario(overrides: Partial = {}): MemoryFlo { rawPath: 'models/orders.yml', artifactKind: 'wiki', - artifactKey: 'knowledge/global/orders.md', + artifactKey: 'wiki/global/orders.md', actionType: 'created', }, { rawPath: 'models/orders.yml', artifactKind: 'sl', artifactKey: 'warehouse.orders', actionType: 'updated' }, @@ -111,7 +111,7 @@ export function validationRevertScenario(): MemoryFlowReplayInput { { type: 'raw_snapshot_written', syncId: 'sync-validation', rawFileCount: 1 }, { type: 'diff_computed', added: 1, modified: 0, deleted: 0, unchanged: 0 }, { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 }, { type: 'candidate_action', unitKey: 'orders', target: 'sl', action: 'updated', key: 'warehouse.orders' }, { type: 'work_unit_finished', diff --git a/packages/context/src/ingest/memory-flow/events.test.ts b/packages/context/src/ingest/memory-flow/events.test.ts index e65cfc83..be97342b 100644 --- a/packages/context/src/ingest/memory-flow/events.test.ts +++ b/packages/context/src/ingest/memory-flow/events.test.ts @@ -77,7 +77,7 @@ function reportSnapshot(): IngestReportSnapshot { { rawPath: 'views/orders.view.lkml', artifactKind: 'wiki', - artifactKey: 'knowledge/global/orders.md', + artifactKey: 'wiki/global/orders.md', actionType: 'wiki_written', }, { @@ -115,7 +115,7 @@ function reportSnapshot(): IngestReportSnapshot { rawFiles: ['views/orders.view.lkml'], status: 'success', actions: [ - { target: 'wiki', type: 'created', key: 'knowledge/global/orders.md', detail: 'order facts' }, + { target: 'wiki', type: 'created', key: 'wiki/global/orders.md', detail: 'order facts' }, { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'order measures' }, ], touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'warehouse.orders' }], @@ -180,7 +180,7 @@ describe('memory-flow event mapping', () => { unitKey: 'orders', target: 'wiki', action: 'created', - key: 'knowledge/global/orders.md', + key: 'wiki/global/orders.md', }); expect(replay.events).toContainEqual({ type: 'work_unit_finished', @@ -197,7 +197,7 @@ describe('memory-flow event mapping', () => { unitKey: 'orders', target: 'wiki', action: 'created', - key: 'knowledge/global/orders.md', + key: 'wiki/global/orders.md', summary: 'order facts', rawFiles: ['views/orders.view.lkml'], status: 'success', @@ -225,7 +225,7 @@ describe('memory-flow event mapping', () => { { rawPath: 'views/orders.view.lkml', artifactKind: 'wiki', - artifactKey: 'knowledge/global/orders.md', + artifactKey: 'wiki/global/orders.md', actionType: 'wiki_written', }, { diff --git a/packages/context/src/ingest/memory-flow/interaction.test.ts b/packages/context/src/ingest/memory-flow/interaction.test.ts index d997b236..290180df 100644 --- a/packages/context/src/ingest/memory-flow/interaction.test.ts +++ b/packages/context/src/ingest/memory-flow/interaction.test.ts @@ -43,7 +43,7 @@ function view(): MemoryFlowViewModel { unitKey: 'orders', target: 'wiki', action: 'created', - key: 'knowledge/orders.md', + key: 'wiki/orders.md', summary: 'order facts', rawFiles: ['orders.yml'], status: 'success', @@ -53,7 +53,7 @@ function view(): MemoryFlowViewModel { { rawPath: 'orders.yml', artifactKind: 'wiki', - artifactKey: 'knowledge/orders.md', + artifactKey: 'wiki/orders.md', actionType: 'wiki_written', }, ], @@ -104,8 +104,8 @@ function view(): MemoryFlowViewModel { status: 'complete', headline: '2 candidates', counters: ['1 wiki', '1 SL'], - chips: [{ label: 'knowledge/orders.md', status: 'complete' }], - details: ['wiki created: knowledge/orders.md', 'sl updated: warehouse.orders'], + chips: [{ label: 'wiki/orders.md', status: 'complete' }], + details: ['wiki created: wiki/orders.md', 'sl updated: warehouse.orders'], }, { id: 'gates', @@ -173,7 +173,7 @@ describe('memory-flow interaction reducer', () => { shouldQuit: false, }); expect(selectedMemoryFlowColumn(view(), selected).title).toBe('ACTIONS'); - expect(selectedMemoryFlowDetails(view(), selected)).toContain('wiki created: knowledge/orders.md'); + expect(selectedMemoryFlowDetails(view(), selected)).toContain('wiki created: wiki/orders.md'); }); it('selects and clamps a chip directly for mouse-driven renderers', () => { @@ -226,7 +226,7 @@ describe('memory-flow interaction reducer', () => { state = reduceMemoryFlowInteractionState(state, 'tab', view()); expect(state.pane).toBe('provenance'); expect(selectedMemoryFlowDetails(view(), state)).toContain( - 'orders.yml -> wiki:knowledge/orders.md (wiki_written)', + 'orders.yml -> wiki:wiki/orders.md (wiki_written)', ); state = reduceMemoryFlowInteractionState(state, 'tab', view()); @@ -241,7 +241,7 @@ describe('memory-flow interaction reducer', () => { state = reduceMemoryFlowInteractionState(state, 'provenance', view()); expect(state.pane).toBe('provenance'); expect(selectedMemoryFlowDetails(view(), state)).toContain( - 'orders.yml -> wiki:knowledge/orders.md (wiki_written)', + 'orders.yml -> wiki:wiki/orders.md (wiki_written)', ); state = reduceMemoryFlowInteractionState(state, 'transcript', view()); diff --git a/packages/context/src/ingest/memory-flow/interactive-render.test.ts b/packages/context/src/ingest/memory-flow/interactive-render.test.ts index a3ff0d5c..6b703a2a 100644 --- a/packages/context/src/ingest/memory-flow/interactive-render.test.ts +++ b/packages/context/src/ingest/memory-flow/interactive-render.test.ts @@ -36,7 +36,7 @@ function view(): MemoryFlowViewModel { unitKey: 'orders', target: 'wiki', action: 'created', - key: 'knowledge/orders.md', + key: 'wiki/orders.md', summary: 'order facts', rawFiles: ['orders.yml'], status: 'success', @@ -46,7 +46,7 @@ function view(): MemoryFlowViewModel { { rawPath: 'orders.yml', artifactKind: 'wiki', - artifactKey: 'knowledge/orders.md', + artifactKey: 'wiki/orders.md', actionType: 'wiki_written', }, ], @@ -97,8 +97,8 @@ function view(): MemoryFlowViewModel { status: 'complete', headline: '2 candidates', counters: ['1 wiki', '1 SL'], - chips: [{ label: 'knowledge/orders.md', status: 'complete' }], - details: ['wiki created: knowledge/orders.md', 'sl updated: warehouse.orders'], + chips: [{ label: 'wiki/orders.md', status: 'complete' }], + details: ['wiki created: wiki/orders.md', 'sl updated: warehouse.orders'], }, { id: 'gates', diff --git a/packages/context/src/ingest/memory-flow/render.test.ts b/packages/context/src/ingest/memory-flow/render.test.ts index e1bf425a..0053eefd 100644 --- a/packages/context/src/ingest/memory-flow/render.test.ts +++ b/packages/context/src/ingest/memory-flow/render.test.ts @@ -48,8 +48,8 @@ function view(): MemoryFlowViewModel { status: 'complete', headline: '2 candidates', counters: ['1 wiki', '1 SL'], - chips: [{ label: 'knowledge/orders.md', status: 'complete' }], - details: ['wiki created: knowledge/orders.md'], + chips: [{ label: 'wiki/orders.md', status: 'complete' }], + details: ['wiki created: wiki/orders.md'], }, { id: 'gates', diff --git a/packages/context/src/ingest/memory-flow/schema.test.ts b/packages/context/src/ingest/memory-flow/schema.test.ts index c1fbda64..c54752f8 100644 --- a/packages/context/src/ingest/memory-flow/schema.test.ts +++ b/packages/context/src/ingest/memory-flow/schema.test.ts @@ -21,9 +21,9 @@ function snapshot(overrides: Partial = {}): MemoryFlowRep { type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 }, { type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 0 }, { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 }, { type: 'work_unit_step', unitKey: 'orders', stepIndex: 1, stepBudget: 40 }, - { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'knowledge/orders.md' }, + { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'wiki/orders.md' }, { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 0 }, { type: 'saved', commitSha: 'abc12345', wikiCount: 1, slCount: 0 }, @@ -37,7 +37,7 @@ function snapshot(overrides: Partial = {}): MemoryFlowRep unitKey: 'orders', target: 'wiki', action: 'created', - key: 'knowledge/orders.md', + key: 'wiki/orders.md', summary: 'Created orders page', rawFiles: ['orders.md'], status: 'success', @@ -47,7 +47,7 @@ function snapshot(overrides: Partial = {}): MemoryFlowRep { rawPath: 'orders.md', artifactKind: 'wiki', - artifactKey: 'knowledge/orders.md', + artifactKey: 'wiki/orders.md', actionType: 'wiki_written', }, ], diff --git a/packages/context/src/ingest/memory-flow/view-model.test.ts b/packages/context/src/ingest/memory-flow/view-model.test.ts index 27322c69..4e6edae3 100644 --- a/packages/context/src/ingest/memory-flow/view-model.test.ts +++ b/packages/context/src/ingest/memory-flow/view-model.test.ts @@ -21,7 +21,7 @@ function replayInput(): MemoryFlowReplayInput { unitKey: 'orders', target: 'wiki', action: 'created', - key: 'knowledge/orders.md', + key: 'wiki/orders.md', summary: 'order facts', rawFiles: ['orders.yml'], status: 'success', @@ -40,7 +40,7 @@ function replayInput(): MemoryFlowReplayInput { { rawPath: 'orders.yml', artifactKind: 'wiki', - artifactKey: 'knowledge/orders.md', + artifactKey: 'wiki/orders.md', actionType: 'wiki_written', }, ], @@ -60,8 +60,8 @@ function replayInput(): MemoryFlowReplayInput { { type: 'raw_snapshot_written', syncId: 'sync-1', rawFileCount: 2 }, { type: 'diff_computed', added: 1, modified: 1, deleted: 0, unchanged: 3 }, { type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, - { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'knowledge/orders.md' }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 }, + { type: 'candidate_action', unitKey: 'orders', target: 'wiki', action: 'created', key: 'wiki/orders.md' }, { type: 'candidate_action', unitKey: 'orders', target: 'sl', action: 'updated', key: 'warehouse.orders' }, { type: 'work_unit_finished', unitKey: 'orders', status: 'success' }, { type: 'work_unit_finished', unitKey: 'revenue', status: 'failed', reason: 'validation failed' }, @@ -122,7 +122,7 @@ describe('buildMemoryFlowViewModel', () => { { rawPath: 'orders.yml', artifactKind: 'wiki', - artifactKey: 'knowledge/orders.md', + artifactKey: 'wiki/orders.md', actionType: 'wiki_written', }, ]); @@ -136,7 +136,7 @@ describe('buildMemoryFlowViewModel', () => { }, ]); expect(view.columns.find((column) => column.id === 'actions')?.details).toContain( - 'orders wiki created knowledge/orders.md: order facts', + 'orders wiki created wiki/orders.md: order facts', ); expect(view.columns.find((column) => column.id === 'saved')?.details).toContain('Commit: abc12345'); expect(view.completionLine).toBe( @@ -159,13 +159,13 @@ describe('buildMemoryFlowViewModel', () => { { type: 'source_acquired', adapter: 'looker', trigger: 'demo_seeded', fileCount: 7 }, { type: 'source_acquired', adapter: 'notion', trigger: 'demo_seeded', fileCount: 8 }, { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'revenue-and-contracts', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'revenue-and-contracts', skills: ['wiki_capture'], stepBudget: 40 }, { type: 'candidate_action', unitKey: 'revenue-and-contracts', target: 'wiki', action: 'created', - key: 'knowledge/global/arr-contract-first.md', + key: 'wiki/global/arr-contract-first.md', }, { type: 'work_unit_finished', unitKey: 'revenue-and-contracts', status: 'success' }, { type: 'reconciliation_finished', conflictCount: 0, fallbackCount: 0 }, @@ -376,7 +376,7 @@ describe('buildMemoryFlowViewModel', () => { { type: 'raw_snapshot_written', syncId: 'sync-errors', rawFileCount: 2 }, { type: 'diff_computed', added: 2, modified: 0, deleted: 0, unchanged: 0 }, { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'orders', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'orders', skills: ['wiki_capture'], stepBudget: 40 }, { type: 'candidate_action', unitKey: 'orders', target: 'sl', action: 'updated', key: 'warehouse.orders' }, { type: 'work_unit_finished', @@ -402,7 +402,7 @@ describe('buildMemoryFlowViewModel', () => { events: [ { type: 'source_acquired', adapter: 'metricflow', trigger: 'manual_resync', fileCount: 1 }, { type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }, - { type: 'work_unit_started', unitKey: 'docs', skills: ['knowledge_capture'], stepBudget: 40 }, + { type: 'work_unit_started', unitKey: 'docs', skills: ['wiki_capture'], stepBudget: 40 }, { type: 'work_unit_finished', unitKey: 'docs', status: 'failed', reason: 'agent step budget exhausted' }, ], plannedWorkUnits: [{ unitKey: 'docs', rawFiles: ['docs.md'], peerFileCount: 0, dependencyCount: 0 }], diff --git a/packages/context/src/ingest/report-snapshot.test.ts b/packages/context/src/ingest/report-snapshot.test.ts index c949a3cc..bdf5b193 100644 --- a/packages/context/src/ingest/report-snapshot.test.ts +++ b/packages/context/src/ingest/report-snapshot.test.ts @@ -19,7 +19,7 @@ function validReportSnapshot() { rawFiles: ['cards/1.json', 'cards/2.json'], status: 'success', actions: [ - { target: 'wiki', type: 'created', key: 'knowledge/global/revenue.md', detail: 'Revenue overview' }, + { target: 'wiki', type: 'created', key: 'wiki/global/revenue.md', detail: 'Revenue overview' }, { target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'Added order amount measure' }, ], touchedSlSources: [{ connectionId: 'warehouse', sourceName: 'orders' }], @@ -38,7 +38,7 @@ function validReportSnapshot() { { rawPath: 'cards/1.json', artifactKind: 'wiki', - artifactKey: 'knowledge/global/revenue.md', + artifactKey: 'wiki/global/revenue.md', actionType: 'wiki_written', }, ], @@ -48,7 +48,7 @@ function validReportSnapshot() { path: 'tool-transcripts/cards.jsonl', toolCallCount: 3, errorCount: 0, - toolNames: ['knowledge_capture'], + toolNames: ['wiki_capture'], }, ], reconciliationActions: [], @@ -90,7 +90,7 @@ describe('parseIngestReportSnapshot', () => { { target: 'wiki', type: 'created', - key: 'knowledge/global/revenue.md', + key: 'wiki/global/revenue.md', detail: 'Revenue overview', targetConnectionId: null, }, diff --git a/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts b/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts index 2798c64a..cd6d2385 100644 --- a/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts +++ b/packages/context/src/ingest/sqlite-bundle-ingest-store.test.ts @@ -159,7 +159,7 @@ describe('SqliteBundleIngestStore', () => { rawPath: 'pages/revenue.md', rawContentHash: 'hash-old', artifactKind: 'wiki', - artifactKey: 'knowledge/global/revenue.md', + artifactKey: 'wiki/global/revenue.md', artifactContentHash: null, actionType: 'wiki_written', }, @@ -191,7 +191,7 @@ describe('SqliteBundleIngestStore', () => { rawPath: 'pages/revenue.md', rawContentHash: 'hash-new', artifactKind: 'wiki', - artifactKey: 'knowledge/global/revenue.md', + artifactKey: 'wiki/global/revenue.md', artifactContentHash: 'artifact-hash-new', actionType: 'wiki_written', }, @@ -234,7 +234,7 @@ describe('SqliteBundleIngestStore', () => { sync_id: 'sync-new', raw_content_hash: 'hash-new', artifact_kind: 'wiki', - artifact_key: 'knowledge/global/revenue.md', + artifact_key: 'wiki/global/revenue.md', action_type: 'wiki_written', }), expect.objectContaining({ @@ -381,7 +381,7 @@ describe('SqliteBundleIngestStore', () => { rawPath: 'pages/success/page.md', rawContentHash: 'hash-success', artifactKind: 'wiki', - artifactKey: 'knowledge/notion/success.md', + artifactKey: 'wiki/notion/success.md', artifactContentHash: 'artifact-success', actionType: 'wiki_written', }, diff --git a/packages/context/src/ingest/wiki-sl-ref-repair.ts b/packages/context/src/ingest/wiki-sl-ref-repair.ts index 7d3d48f3..e416c52b 100644 --- a/packages/context/src/ingest/wiki-sl-ref-repair.ts +++ b/packages/context/src/ingest/wiki-sl-ref-repair.ts @@ -91,7 +91,7 @@ export async function repairWikiSlRefs(input: { warnings: [...warnings, 'Skipped wiki sl_refs repair: config service cannot list wiki files.'], }; } - const listed = await listFiles('knowledge', true); + const listed = await listFiles('wiki', true); const repairs: WikiSlRefRepair[] = []; for (const file of listed.files.sort()) { diff --git a/packages/context/src/mcp/context-tools.ts b/packages/context/src/mcp/context-tools.ts index 48830d44..9f84b586 100644 --- a/packages/context/src/mcp/context-tools.ts +++ b/packages/context/src/mcp/context-tools.ts @@ -208,10 +208,10 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void const knowledge = ports.knowledge; registerParsedTool( server, - 'knowledge_search', + 'wiki_search', { - title: 'Knowledge Search', - description: 'Search KTX knowledge pages and return ranked summaries.', + title: 'Wiki Search', + description: 'Search KTX wiki pages and return ranked summaries.', inputSchema: knowledgeSearchSchema.shape, }, knowledgeSearchSchema, @@ -227,25 +227,25 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void registerParsedTool( server, - 'knowledge_read', + 'wiki_read', { - title: 'Knowledge Read', - description: 'Read a KTX knowledge page by key.', + title: 'Wiki Read', + description: 'Read a KTX wiki page by key.', inputSchema: knowledgeReadSchema.shape, }, knowledgeReadSchema, async (input) => { const page = await knowledge.read({ userId: userContext.userId, key: input.key }); - return page ? jsonToolResult(page) : jsonErrorToolResult(`Knowledge page "${input.key}" was not found.`); + return page ? jsonToolResult(page) : jsonErrorToolResult(`Wiki page "${input.key}" was not found.`); }, ); registerParsedTool( server, - 'knowledge_write', + 'wiki_write', { - title: 'Knowledge Write', - description: 'Create or replace a KTX knowledge page and its SL references.', + title: 'Wiki Write', + description: 'Create or replace a KTX wiki page and its SL references.', inputSchema: knowledgeWriteSchema.shape, }, knowledgeWriteSchema, diff --git a/packages/context/src/mcp/local-project-ports.test.ts b/packages/context/src/mcp/local-project-ports.test.ts index e3812960..b95e4ad1 100644 --- a/packages/context/src/mcp/local-project-ports.test.ts +++ b/packages/context/src/mcp/local-project-ports.test.ts @@ -341,7 +341,7 @@ describe('createLocalProjectMcpContextPorts', () => { }); }); - it('writes, reads, and searches global knowledge pages', async () => { + it('writes, reads, and searches global wiki pages', async () => { const project = await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); const ports = createLocalProjectMcpContextPorts(project); @@ -372,7 +372,7 @@ describe('createLocalProjectMcpContextPorts', () => { results: [ expect.objectContaining({ key: 'revenue', - path: 'knowledge/global/revenue.md', + path: 'wiki/global/revenue.md', scope: 'GLOBAL', summary: 'Revenue definition', score: expect.any(Number), diff --git a/packages/context/src/mcp/server.test.ts b/packages/context/src/mcp/server.test.ts index 4430f6f7..193d8f67 100644 --- a/packages/context/src/mcp/server.test.ts +++ b/packages/context/src/mcp/server.test.ts @@ -76,7 +76,7 @@ describe('createKtxMcpServer', () => { captured: { wiki: ['revenue'], sl: [], xrefs: [] }, error: null, commitHash: 'abc123', - skillsLoaded: ['knowledge_capture'], + skillsLoaded: ['wiki_capture'], signalDetected: true, }), }; @@ -123,7 +123,7 @@ describe('createKtxMcpServer', () => { captured: { wiki: ['revenue'], sl: [], xrefs: [] }, error: null, commitHash: 'abc123', - skillsLoaded: ['knowledge_capture'], + skillsLoaded: ['wiki_capture'], signalDetected: true, }, null, @@ -139,7 +139,7 @@ describe('createKtxMcpServer', () => { captured: { wiki: ['revenue'], sl: [], xrefs: [] }, error: null, commitHash: 'abc123', - skillsLoaded: ['knowledge_capture'], + skillsLoaded: ['wiki_capture'], signalDetected: true, }, }); @@ -175,7 +175,7 @@ describe('createKtxMcpServer', () => { }: { toolSet: Record Promise }>; }) => { - await toolSet.load_skill.execute({ name: 'knowledge_capture' }); + await toolSet.load_skill.execute({ name: 'wiki_capture' }); await toolSet.wiki_write.execute( { key: 'arr', @@ -220,7 +220,7 @@ describe('createKtxMcpServer', () => { }); await expect(access(join(project.projectDir, '.ktx/db.sqlite'))).resolves.toBeUndefined(); await expect(access(join(project.projectDir, '.ktx/memory-runs/memory-run-mcp.json'))).rejects.toThrow(); - await expect(readFile(join(project.projectDir, 'knowledge/global/arr.md'), 'utf-8')).resolves.toContain( + await expect(readFile(join(project.projectDir, 'wiki/global/arr.md'), 'utf-8')).resolves.toContain( 'ARR means annual recurring revenue.', ); } finally { @@ -257,7 +257,7 @@ describe('createKtxMcpServer', () => { results: [ { key: 'revenue', - path: 'knowledge/global/revenue.md', + path: 'wiki/global/revenue.md', scope: 'GLOBAL', summary: 'Paid order value', score: 0.42, @@ -519,9 +519,6 @@ describe('createKtxMcpServer', () => { 'ingest_report', 'ingest_status', 'ingest_trigger', - 'knowledge_read', - 'knowledge_search', - 'knowledge_write', 'memory_capture', 'memory_capture_status', 'scan_list_artifacts', @@ -534,6 +531,9 @@ describe('createKtxMcpServer', () => { 'sl_read_source', 'sl_validate', 'sl_write_source', + 'wiki_read', + 'wiki_search', + 'wiki_write', ]); await expect(getTool(fake.tools, 'connection_list').handler({})).resolves.toEqual({ @@ -595,20 +595,20 @@ describe('createKtxMcpServer', () => { }); expect(contextTools.connections?.test).toHaveBeenCalledWith({ connectionId: 'warehouse' }); - await getTool(fake.tools, 'knowledge_search').handler({ query: 'revenue', limit: 5 }); + await getTool(fake.tools, 'wiki_search').handler({ query: 'revenue', limit: 5 }); expect(contextTools.knowledge?.search).toHaveBeenCalledWith({ userId: 'mcp-user', query: 'revenue', limit: 5, }); - await getTool(fake.tools, 'knowledge_read').handler({ key: 'revenue' }); + await getTool(fake.tools, 'wiki_read').handler({ key: 'revenue' }); expect(contextTools.knowledge?.read).toHaveBeenCalledWith({ userId: 'mcp-user', key: 'revenue', }); - await getTool(fake.tools, 'knowledge_write').handler({ + await getTool(fake.tools, 'wiki_write').handler({ key: 'revenue', summary: 'Paid order value', content: '# Revenue', diff --git a/packages/context/src/memory/capture-signals.ts b/packages/context/src/memory/capture-signals.ts index 856df30b..360f0b7c 100644 --- a/packages/context/src/memory/capture-signals.ts +++ b/packages/context/src/memory/capture-signals.ts @@ -9,7 +9,7 @@ const LOOKML_STRUCTURAL_PATTERN = /^\s*(view|explore|model|include)\s*:\s*[\w"`] const LOOKML_FIELDS_PATTERN = /^\s*(measure|dimension|dimension_group|sql_table_name|derived_table|sql_always_where|drill_fields|join)\s*:/m; -export const DEFAULT_SKILL_NAMES = ['sl', 'sl_capture', 'knowledge_capture'] as const; +export const DEFAULT_SKILL_NAMES = ['sl', 'sl_capture', 'wiki_capture'] as const; export function detectCaptureSignals(input: MemoryAgentInput): CaptureSignals { const userMessage = input.userMessage?.trim() ?? ''; @@ -56,7 +56,7 @@ export function buildRequiredSkillsBlock(signals: CaptureSignals): string { const reason = signals.reasons.find((r) => r.includes('definition keyword') || r.includes('definition table')) ?? 'wiki signal detected'; - required.push({ name: 'knowledge_capture', reason }); + required.push({ name: 'wiki_capture', reason }); } if (signals.sl) { const reason = diff --git a/packages/context/src/memory/local-memory.test.ts b/packages/context/src/memory/local-memory.test.ts index 1284f76d..e44a5bf1 100644 --- a/packages/context/src/memory/local-memory.test.ts +++ b/packages/context/src/memory/local-memory.test.ts @@ -40,7 +40,7 @@ describe('LocalMemoryRunStore', () => { await store.markDone('memory-run-1', { signalDetected: true, actions: [{ target: 'wiki', type: 'created', key: 'revenue', detail: 'Revenue definition' }], - skillsLoaded: ['knowledge_capture'], + skillsLoaded: ['wiki_capture'], commitHash: 'abc123', }); @@ -69,7 +69,7 @@ describe('LocalMemoryRunStore', () => { chatId: 'chat-1', outputSummary: { actions: [{ target: 'wiki', type: 'created', key: 'revenue', detail: 'Revenue definition' }], - skillsLoaded: ['knowledge_capture'], + skillsLoaded: ['wiki_capture'], signalDetected: true, commitHash: 'abc123', }, @@ -96,7 +96,7 @@ describe('createLocalProjectMemoryCapture', () => { }: { toolSet: Record Promise }>; }) => { - await toolSet.load_skill.execute({ name: 'knowledge_capture' }); + await toolSet.load_skill.execute({ name: 'wiki_capture' }); await toolSet.wiki_write.execute( { key: 'revenue', @@ -134,11 +134,11 @@ describe('createLocalProjectMemoryCapture', () => { status: 'done', done: true, captured: { wiki: ['revenue'], sl: [], xrefs: [] }, - skillsLoaded: ['knowledge_capture'], + skillsLoaded: ['wiki_capture'], signalDetected: true, }); - await expect(readFile(join(project.projectDir, 'knowledge/global/revenue.md'), 'utf-8')).resolves.toContain( + await expect(readFile(join(project.projectDir, 'wiki/global/revenue.md'), 'utf-8')).resolves.toContain( 'Revenue means paid order value net of refunds.', ); }); diff --git a/packages/context/src/memory/local-memory.ts b/packages/context/src/memory/local-memory.ts index af65b54e..3cc9d324 100644 --- a/packages/context/src/memory/local-memory.ts +++ b/packages/context/src/memory/local-memory.ts @@ -222,8 +222,8 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { async listPagesForUser(userId: string) { const pages: KnowledgeIndexPageListing[] = []; for (const scope of [ - { scope: 'GLOBAL', scopeId: null, dir: 'knowledge/global' }, - { scope: 'USER', scopeId: userId, dir: `knowledge/user/${userId}` }, + { scope: 'GLOBAL', scopeId: null, dir: 'wiki/global' }, + { scope: 'USER', scopeId: userId, dir: `wiki/user/${userId}` }, ]) { const listed = await this.project.fileStore.listFiles(scope.dir, true); for (const file of listed.files.filter((entry) => entry.endsWith('.md'))) { @@ -262,7 +262,7 @@ class LocalKnowledgeIndex implements KnowledgeIndexPort { } private pagePath(scope: string, scopeId: string | null, pageKey: string): string { - return scope === 'GLOBAL' ? `knowledge/global/${pageKey}.md` : `knowledge/user/${scopeId}/${pageKey}.md`; + return scope === 'GLOBAL' ? `wiki/global/${pageKey}.md` : `wiki/user/${scopeId}/${pageKey}.md`; } } diff --git a/packages/context/src/memory/memory-agent.service.ts b/packages/context/src/memory/memory-agent.service.ts index 6f239053..437111e4 100644 --- a/packages/context/src/memory/memory-agent.service.ts +++ b/packages/context/src/memory/memory-agent.service.ts @@ -318,7 +318,7 @@ export class MemoryAgentService { } const signalsActedOn: string[] = []; - if (signals.knowledge && skillsLoaded.includes('knowledge_capture')) { + if (signals.knowledge && skillsLoaded.includes('wiki_capture')) { signalsActedOn.push('knowledge'); } if (signals.sl && skillsLoaded.includes('sl')) { @@ -580,12 +580,12 @@ export class MemoryAgentService { private async buildWikiIndex(userId: string, userScopedEnabled: boolean): Promise { const pages = await this.deps.knowledgeIndex.listPagesForUser(userId); if (pages.length === 0) { - return '(empty — no knowledge pages exist yet)'; + return '(empty — no wiki pages exist yet)'; } const formatEntry = (p: { page_key: string; summary: string }) => `- ${p.page_key}: ${p.summary}`; if (!userScopedEnabled) { - return `## Knowledge Pages\n${pages.map(formatEntry).join('\n')}`; + return `## Wiki Pages\n${pages.map(formatEntry).join('\n')}`; } const globalEntries: string[] = []; diff --git a/packages/context/src/memory/memory-runs.test.ts b/packages/context/src/memory/memory-runs.test.ts index 75c25a38..049936ad 100644 --- a/packages/context/src/memory/memory-runs.test.ts +++ b/packages/context/src/memory/memory-runs.test.ts @@ -96,7 +96,7 @@ describe('MemoryCaptureService', () => { const result: MemoryAgentResult = { signalDetected: true, actions: [{ target: 'wiki', type: 'created', key: 'revenue', detail: 'captured revenue definition' }], - skillsLoaded: ['knowledge_capture'], + skillsLoaded: ['wiki_capture'], commitHash: 'abc123', }; const { capture, store, ingest, run } = buildService(); @@ -136,7 +136,7 @@ describe('MemoryCaptureService', () => { }, error: null, commitHash: 'abc123', - skillsLoaded: ['knowledge_capture'], + skillsLoaded: ['wiki_capture'], signalDetected: true, }); expect(store.rows.get('run-1')?.inputHash).toHaveLength(64); diff --git a/packages/context/src/memory/memory-runtime-assets.test.ts b/packages/context/src/memory/memory-runtime-assets.test.ts index bd18e524..973d7271 100644 --- a/packages/context/src/memory/memory-runtime-assets.test.ts +++ b/packages/context/src/memory/memory-runtime-assets.test.ts @@ -10,7 +10,7 @@ const promptsDir = fileURLToPath(new URL('../../prompts', import.meta.url)); const skillsDir = fileURLToPath(new URL('../../skills', import.meta.url)); const memorySourceTypes: MemoryAgentSourceType[] = ['research', 'external_ingest', 'backfill']; const expectedSkillHeadings: Record = { - knowledge_capture: '# Knowledge Capture', + wiki_capture: '# Wiki Capture', sl: '# Semantic Layer', sl_capture: '# Semantic Layer', }; @@ -33,7 +33,7 @@ const verificationWriterSkills = [ 'live_database_ingest', 'historic_sql_table_digest', 'historic_sql_patterns', - 'knowledge_capture', + 'wiki_capture', 'sl_capture', ] as const; @@ -77,7 +77,7 @@ describe('memory runtime assets', () => { const registry = new SkillsRegistryService({ skillsDir }); const skills = await registry.listSkills([...DEFAULT_SKILL_NAMES], 'memory_agent'); - expect(skills.map((skill) => skill.name).sort()).toEqual(['knowledge_capture', 'sl', 'sl_capture']); + expect(skills.map((skill) => skill.name).sort()).toEqual(['sl', 'sl_capture', 'wiki_capture']); for (const skill of skills) { const body = await readFile(join(skill.path, 'SKILL.md'), 'utf-8'); diff --git a/packages/context/src/project/config.test.ts b/packages/context/src/project/config.test.ts index 5f13729e..ee6b8ee9 100644 --- a/packages/context/src/project/config.test.ts +++ b/packages/context/src/project/config.test.ts @@ -37,7 +37,7 @@ describe('KTX project config', () => { run_research: { enabled: false, max_iterations: 20, - default_toolset: ['sl_query', 'knowledge_search', 'sl_read_source'], + default_toolset: ['sl_query', 'wiki_search', 'sl_read_source'], }, }, memory: { diff --git a/packages/context/src/project/config.ts b/packages/context/src/project/config.ts index b00d0148..ad5ecb8a 100644 --- a/packages/context/src/project/config.ts +++ b/packages/context/src/project/config.ts @@ -408,7 +408,7 @@ export function buildDefaultKtxProjectConfig(projectName = 'ktx-project'): KtxPr run_research: { enabled: false, max_iterations: 20, - default_toolset: ['sl_query', 'knowledge_search', 'sl_read_source'], + default_toolset: ['sl_query', 'wiki_search', 'sl_read_source'], }, }, memory: { diff --git a/packages/context/src/project/local-git-file-store.test.ts b/packages/context/src/project/local-git-file-store.test.ts index 94085488..62b7fc8c 100644 --- a/packages/context/src/project/local-git-file-store.test.ts +++ b/packages/context/src/project/local-git-file-store.test.ts @@ -32,7 +32,7 @@ describe('LocalGitFileStore', () => { it('writes, commits, and reads a project file', async () => { const write = await store.writeFile( - 'knowledge/global/revenue.md', + 'wiki/global/revenue.md', '# Revenue\n', 'Agent', 'agent@example.com', @@ -40,20 +40,20 @@ describe('LocalGitFileStore', () => { ); expect(write.commitHash).toMatch(/^[0-9a-f]{40}$/); - await expect(readFile(join(tempDir, 'knowledge/global/revenue.md'), 'utf-8')).resolves.toBe('# Revenue\n'); - await expect(store.readFile('knowledge/global/revenue.md')).resolves.toMatchObject({ + await expect(readFile(join(tempDir, 'wiki/global/revenue.md'), 'utf-8')).resolves.toBe('# Revenue\n'); + await expect(store.readFile('wiki/global/revenue.md')).resolves.toMatchObject({ content: '# Revenue\n', }); }); it('lists files recursively and can strip the requested prefix', async () => { - await store.writeFile('knowledge/global/a.md', 'a', 'Agent', 'agent@example.com', 'Add a'); - await store.writeFile('knowledge/global/nested/b.md', 'b', 'Agent', 'agent@example.com', 'Add b'); + await store.writeFile('wiki/global/a.md', 'a', 'Agent', 'agent@example.com', 'Add a'); + await store.writeFile('wiki/global/nested/b.md', 'b', 'Agent', 'agent@example.com', 'Add b'); - await expect(store.listFiles('knowledge')).resolves.toEqual({ - files: ['knowledge/global/a.md', 'knowledge/global/nested/b.md'], + await expect(store.listFiles('wiki')).resolves.toEqual({ + files: ['wiki/global/a.md', 'wiki/global/nested/b.md'], }); - await expect(store.listFiles('knowledge/global', true)).resolves.toEqual({ + await expect(store.listFiles('wiki/global', true)).resolves.toEqual({ files: ['a.md', 'nested/b.md'], }); }); @@ -77,10 +77,10 @@ describe('LocalGitFileStore', () => { }); it('exposes Git history for a file', async () => { - await store.writeFile('knowledge/global/history.md', 'v1', 'Agent', 'agent@example.com', 'Add history'); - await store.writeFile('knowledge/global/history.md', 'v2', 'Agent', 'agent@example.com', 'Update history'); + await store.writeFile('wiki/global/history.md', 'v1', 'Agent', 'agent@example.com', 'Add history'); + await store.writeFile('wiki/global/history.md', 'v2', 'Agent', 'agent@example.com', 'Update history'); - const history = await store.getFileHistory('knowledge/global/history.md'); + const history = await store.getFileHistory('wiki/global/history.md'); expect(Array.isArray(history)).toBe(true); expect(history[0]).toMatchObject({ message: 'Update history' }); diff --git a/packages/context/src/project/project.test.ts b/packages/context/src/project/project.test.ts index b6e88604..caf36220 100644 --- a/packages/context/src/project/project.test.ts +++ b/packages/context/src/project/project.test.ts @@ -37,7 +37,7 @@ describe('KTX local project runtime', () => { expect(gitignore).toContain('secrets/'); expect(gitignore).toContain('setup/'); expect(gitignore).toContain('agents/'); - await expect(stat(join(projectDir, 'knowledge/global/.gitkeep'))).resolves.toBeDefined(); + await expect(stat(join(projectDir, 'wiki/global/.gitkeep'))).resolves.toBeDefined(); await expect(stat(join(projectDir, 'semantic-layer/.gitkeep'))).resolves.toBeDefined(); await expect(stat(join(projectDir, '_schema/.gitkeep'))).rejects.toMatchObject({ code: 'ENOENT' }); await expect(stat(join(projectDir, 'raw-sources/.gitkeep'))).resolves.toBeDefined(); @@ -50,7 +50,7 @@ describe('KTX local project runtime', () => { const loaded = await loadKtxProject({ projectDir }); await loaded.fileStore.writeFile( - 'knowledge/global/revenue.md', + 'wiki/global/revenue.md', '# Revenue\n', 'Agent', 'agent@example.com', @@ -58,7 +58,7 @@ describe('KTX local project runtime', () => { ); expect(loaded.config.project).toBe('warehouse'); - await expect(loaded.fileStore.readFile('knowledge/global/revenue.md')).resolves.toMatchObject({ + await expect(loaded.fileStore.readFile('wiki/global/revenue.md')).resolves.toMatchObject({ content: '# Revenue\n', }); }); diff --git a/packages/context/src/project/project.ts b/packages/context/src/project/project.ts index 59e594a2..50f89262 100644 --- a/packages/context/src/project/project.ts +++ b/packages/context/src/project/project.ts @@ -41,7 +41,7 @@ const TRACKED_SCAFFOLD_FILES: Array<{ path: string; content: string }> = [ }, { path: '.ktx/prompts/.gitkeep', content: '' }, { path: '.ktx/skills/.gitkeep', content: '' }, - { path: 'knowledge/global/.gitkeep', content: '' }, + { path: 'wiki/global/.gitkeep', content: '' }, { path: 'semantic-layer/.gitkeep', content: '' }, { path: 'raw-sources/.gitkeep', content: '' }, ]; diff --git a/packages/context/src/skills/skills-registry.service.test.ts b/packages/context/src/skills/skills-registry.service.test.ts index 82c7c8ab..9bb716dd 100644 --- a/packages/context/src/skills/skills-registry.service.test.ts +++ b/packages/context/src/skills/skills-registry.service.test.ts @@ -64,14 +64,14 @@ describe('SkillsRegistryService', () => { it('discovers valid skills and skips invalid ones', async () => { await writeSkill('sl', '---\nname: sl\ndescription: Semantic layer.\n---\n\n# SL'); - await writeSkill('knowledge_capture', '---\nname: knowledge_capture\ndescription: Wiki capture.\n---\n\n# KC'); + await writeSkill('wiki_capture', '---\nname: wiki_capture\ndescription: Wiki capture.\n---\n\n# KC'); await writeSkill('broken', '# no frontmatter at all'); await mkdir(join(tempDir, 'not_a_skill'), { recursive: true }); const catalog = await service.discoverSkills(tempDir); expect(catalog.size).toBe(2); expect(catalog.get('sl')?.name).toBe('sl'); - expect(catalog.get('knowledge_capture')?.description).toContain('Wiki capture'); + expect(catalog.get('wiki_capture')?.description).toContain('Wiki capture'); expect(catalog.has('broken')).toBe(false); }); }); @@ -80,10 +80,10 @@ describe('SkillsRegistryService', () => { it('formats bullet list with name and description', () => { const output = service.buildSkillsPrompt([ { name: 'sl', description: 'Semantic layer.', path: '/tmp/sl' }, - { name: 'knowledge_capture', description: 'Wiki capture.', path: '/tmp/kc' }, + { name: 'wiki_capture', description: 'Wiki capture.', path: '/tmp/kc' }, ]); expect(output).toContain('- sl: Semantic layer.'); - expect(output).toContain('- knowledge_capture: Wiki capture.'); + expect(output).toContain('- wiki_capture: Wiki capture.'); expect(output).toContain('Use the `load_skill` tool'); }); @@ -144,8 +144,8 @@ describe('SkillsRegistryService', () => { '---\nname: sl_capture\ndescription: Memory-only capture skill.\ncallers: [memory_agent]\n---\n\n# Capture', ); await writeSkill( - 'knowledge_capture', - '---\nname: knowledge_capture\ndescription: Wiki capture.\ncallers: [memory_agent]\n---\n\n# KC', + 'wiki_capture', + '---\nname: wiki_capture\ndescription: Wiki capture.\ncallers: [memory_agent]\n---\n\n# KC', ); service = new SkillsRegistryService({ skillsDir: tempDir }); }); @@ -157,7 +157,7 @@ describe('SkillsRegistryService', () => { it('memory_agent caller sees memory-only and open skills', async () => { const skills = await service.listSkills('memory_agent'); - expect(skills.map((skill) => skill.name).sort()).toEqual(['knowledge_capture', 'sl', 'sl_capture']); + expect(skills.map((skill) => skill.name).sort()).toEqual(['sl', 'sl_capture', 'wiki_capture']); }); it('listSkills with names and caller intersects both filters', async () => { @@ -185,26 +185,26 @@ describe('SkillsRegistryService', () => { it('discovers skills from additional directories when the primary directory misses', async () => { const extraDir = await mkdtemp(join(tmpdir(), 'skills-registry-extra-')); try { - await mkdir(join(extraDir, 'knowledge_capture'), { recursive: true }); + await mkdir(join(extraDir, 'wiki_capture'), { recursive: true }); await writeFile( - join(extraDir, 'knowledge_capture', 'SKILL.md'), + join(extraDir, 'wiki_capture', 'SKILL.md'), [ '---', - 'name: knowledge_capture', + 'name: wiki_capture', 'description: Packaged knowledge capture skill.', 'callers: [memory_agent]', '---', '', - '# Knowledge Capture', + '# Wiki Capture', ].join('\n'), 'utf-8', ); service = new SkillsRegistryService({ skillsDir: tempDir, additionalSkillDirs: [extraDir] }); - const skills = await service.listSkills(['knowledge_capture'], 'memory_agent'); + const skills = await service.listSkills(['wiki_capture'], 'memory_agent'); - expect(skills.map((skill) => skill.name)).toEqual(['knowledge_capture']); - expect(skills[0]?.path).toBe(join(extraDir, 'knowledge_capture')); + expect(skills.map((skill) => skill.name)).toEqual(['wiki_capture']); + expect(skills[0]?.path).toBe(join(extraDir, 'wiki_capture')); } finally { await rm(extraDir, { recursive: true, force: true }); } diff --git a/packages/context/src/skills/skills-registry.service.ts b/packages/context/src/skills/skills-registry.service.ts index 2f0e8de2..cd33e6d8 100644 --- a/packages/context/src/skills/skills-registry.service.ts +++ b/packages/context/src/skills/skills-registry.service.ts @@ -223,7 +223,7 @@ export class SkillsRegistryService { const list = skills.map((skill) => `- ${skill.name}: ${skill.description}`).join('\n'); const captureNote = caller === 'research' - ? '\n\nKnowledge pages and semantic-layer sources are captured automatically by a post-turn memory agent. Focus on answering, not on saving. Use `knowledge_read`/`knowledge_search` and `sl_read_source` to consult what already exists; the memory agent will write any new conventions or measures the turn surfaces.' + ? '\n\nWiki pages and semantic-layer sources are captured automatically by a post-turn memory agent. Focus on answering, not on saving. Use `wiki_read`/`wiki_search` and `sl_read_source` to consult what already exists; the memory agent will write any new conventions or measures the turn surfaces.' : ''; return `\n## Skills\n\nUse the \`load_skill\` tool to load a skill when the task benefits from specialized instructions.${captureNote}\n\nAvailable skills:\n${list}\n`; } diff --git a/packages/context/src/wiki/knowledge-wiki.service.test.ts b/packages/context/src/wiki/knowledge-wiki.service.test.ts index 40056edc..f7bb86e4 100644 --- a/packages/context/src/wiki/knowledge-wiki.service.test.ts +++ b/packages/context/src/wiki/knowledge-wiki.service.test.ts @@ -84,9 +84,9 @@ describe('KnowledgeWikiService.syncFromCommit', () => { const { service, pagesRepository, gitService } = makeService(); gitService.diffNameStatus.mockResolvedValue([ - { status: 'A', path: 'knowledge/global/new-page.md' }, - { status: 'M', path: 'knowledge/global/changed-page.md' }, - { status: 'D', path: 'knowledge/global/gone-page.md' }, + { status: 'A', path: 'wiki/global/new-page.md' }, + { status: 'M', path: 'wiki/global/changed-page.md' }, + { status: 'D', path: 'wiki/global/gone-page.md' }, ]); gitService.getFileAtCommit.mockImplementation((path: string) => { if (path.endsWith('new-page.md')) { @@ -117,10 +117,10 @@ describe('KnowledgeWikiService.syncFromCommit', () => { const { service, pagesRepository, gitService, logger } = makeService(); gitService.diffNameStatus.mockResolvedValue([ - { status: 'A', path: 'knowledge/global/revenue-policy.md' }, - { status: 'A', path: 'knowledge/global/historic-sql-order-lifecycle.md' }, - { status: 'A', path: 'knowledge/global/historic-sql/order-lifecycle.md' }, - { status: 'A', path: 'knowledge/global/orbit/company-overview.md' }, + { status: 'A', path: 'wiki/global/revenue-policy.md' }, + { status: 'A', path: 'wiki/global/historic-sql-order-lifecycle.md' }, + { status: 'A', path: 'wiki/global/historic-sql/order-lifecycle.md' }, + { status: 'A', path: 'wiki/global/orbit/company-overview.md' }, ]); gitService.getFileAtCommit.mockImplementation((path: string) => { if (path.endsWith('revenue-policy.md')) { @@ -137,13 +137,13 @@ describe('KnowledgeWikiService.syncFromCommit', () => { await service.syncFromCommit('sha-before', 'sha-after', 'run-uuid'); - expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('knowledge/global/orbit/company-overview.md', 'sha-after'); - expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('knowledge/global/historic-sql/order-lifecycle.md', 'sha-after'); + expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('wiki/global/orbit/company-overview.md', 'sha-after'); + expect(gitService.getFileAtCommit).not.toHaveBeenCalledWith('wiki/global/historic-sql/order-lifecycle.md', 'sha-after'); expect(logger.warn).toHaveBeenCalledWith( - '[knowledge.sync] skipping unparseable path: knowledge/global/orbit/company-overview.md', + '[wiki.sync] skipping unparseable path: wiki/global/orbit/company-overview.md', ); expect(logger.warn).toHaveBeenCalledWith( - '[knowledge.sync] skipping unparseable path: knowledge/global/historic-sql/order-lifecycle.md', + '[wiki.sync] skipping unparseable path: wiki/global/historic-sql/order-lifecycle.md', ); const call = pagesRepository.applyDiffTransactional.mock.calls[0][0]; expect(call.upserts).toEqual( diff --git a/packages/context/src/wiki/knowledge-wiki.service.ts b/packages/context/src/wiki/knowledge-wiki.service.ts index fb152e83..c8e276ab 100644 --- a/packages/context/src/wiki/knowledge-wiki.service.ts +++ b/packages/context/src/wiki/knowledge-wiki.service.ts @@ -7,7 +7,7 @@ import { buildKnowledgeSearchText } from './knowledge-search-text.js'; import type { KnowledgeGitDiffPort, KnowledgeIndexPort, UpsertPageParams } from './ports.js'; import type { WikiFrontmatter, WikiPage, WikiPageWithScope } from './types.js'; -const WIKI_PREFIX = 'knowledge'; +const WIKI_PREFIX = 'wiki'; export type { WikiFrontmatter }; @@ -89,7 +89,7 @@ export class KnowledgeWikiService { ) { const path = this.pagePath(scope, scopeId, pageKey); const serialized = this.serializePage(frontmatter, content); - const message = commitMessage ?? `Update knowledge page: ${pageKey}`; + const message = commitMessage ?? `Update wiki page: ${pageKey}`; return this.configService.writeFile(path, serialized, author, authorEmail, message, { skipLock: options?.skipLock, }); @@ -115,7 +115,7 @@ export class KnowledgeWikiService { ) { const path = this.pagePath(scope, scopeId, pageKey); try { - return await this.configService.deleteFile(path, author, authorEmail, `Remove knowledge page: ${pageKey}`); + return await this.configService.deleteFile(path, author, authorEmail, `Remove wiki page: ${pageKey}`); } catch (error) { // Check if the file actually exists — if not, deletion is a no-op try { @@ -196,7 +196,7 @@ export class KnowledgeWikiService { rawContent, author, authorEmail, - commitMessage ?? `Update knowledge page (raw): ${pageKey}`, + commitMessage ?? `Update wiki page (raw): ${pageKey}`, ); await this.syncSinglePage(scope, scopeId, pageKey, parsed.frontmatter, parsed.content); return parsed; @@ -352,9 +352,9 @@ export class KnowledgeWikiService { /** * Apply the diff between two commits on the config repo to the shared - * `knowledge` index in a single transaction. Called by the ingest runner + * wiki index in a single transaction. Called by the ingest runner * after Stage 6 squashes the session branch into main: the pre-squash main - * SHA and the post-squash SHA bracket exactly the set of knowledge-file + * SHA and the post-squash SHA bracket exactly the set of wiki-file * changes this run produced. * * Any added/modified file becomes an upsert (tagged with `source_run_id`), @@ -362,7 +362,7 @@ export class KnowledgeWikiService { * transaction so the shared table stays consistent. */ async syncFromCommit(fromSha: string, toSha: string, runId: string): Promise { - const diff = await this.gitService.diffNameStatus(fromSha, toSha, 'knowledge/'); + const diff = await this.gitService.diffNameStatus(fromSha, toSha, 'wiki/'); if (diff.length === 0) { return; } @@ -372,7 +372,7 @@ export class KnowledgeWikiService { for (const entry of diff) { const parsedPath = parseKnowledgePath(entry.path); if (!parsedPath) { - this.logger.warn(`[knowledge.sync] skipping unparseable path: ${entry.path}`); + this.logger.warn(`[wiki.sync] skipping unparseable path: ${entry.path}`); continue; } if (entry.status === 'D') { @@ -392,7 +392,7 @@ export class KnowledgeWikiService { embedding = await this.embeddingService.computeEmbedding(searchText); } catch (err) { this.logger.warn( - `[knowledge.sync] embedding failed for ${parsedPath.pageKey}: ${err instanceof Error ? err.message : String(err)}`, + `[wiki.sync] embedding failed for ${parsedPath.pageKey}: ${err instanceof Error ? err.message : String(err)}`, ); } const contentHash = createHash('sha256').update(content).digest('hex'); @@ -410,21 +410,21 @@ export class KnowledgeWikiService { } await this.pagesRepository.applyDiffTransactional({ runId, upserts, deletes }); - this.logger.log(`[knowledge.sync] run=${runId} applied ${upserts.length} upsert(s), ${deletes.length} delete(s)`); + this.logger.log(`[wiki.sync] run=${runId} applied ${upserts.length} upsert(s), ${deletes.length} delete(s)`); } } /** - * Parse a `knowledge//...` file path into its scope and page key. - * `knowledge/global/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'foo' } - * `knowledge/user//bar.md` → { scope: 'USER', scopeId: '', pageKey: 'bar' } + * Parse a `wiki//...` file path into its scope and page key. + * `wiki/global/foo.md` → { scope: 'GLOBAL', scopeId: null, pageKey: 'foo' } + * `wiki/user//bar.md` → { scope: 'USER', scopeId: '', pageKey: 'bar' } */ function parseKnowledgePath(path: string): { scope: string; scopeId: string | null; pageKey: string } | null { if (!path.endsWith('.md')) { return null; } const segments = path.split('/'); - if (segments[0] !== 'knowledge') { + if (segments[0] !== 'wiki') { return null; } const rest = segments.slice(1); diff --git a/packages/context/src/wiki/local-knowledge.test.ts b/packages/context/src/wiki/local-knowledge.test.ts index 54bd3771..09d61a3c 100644 --- a/packages/context/src/wiki/local-knowledge.test.ts +++ b/packages/context/src/wiki/local-knowledge.test.ts @@ -35,7 +35,7 @@ describe('local knowledge helpers', () => { await rm(tempDir, { recursive: true, force: true }); }); - it('writes, reads, lists, and searches global knowledge pages', async () => { + it('writes, reads, lists, and searches global wiki pages', async () => { const write = await writeLocalKnowledgePage(project, { key: 'metrics-revenue', scope: 'GLOBAL', @@ -46,7 +46,7 @@ describe('local knowledge helpers', () => { slRefs: ['orders'], }); - expect(write.path).toBe('knowledge/global/metrics-revenue.md'); + expect(write.path).toBe('wiki/global/metrics-revenue.md'); expect(write.operation).toBe('write'); await expect(readLocalKnowledgePage(project, { key: 'metrics-revenue', userId: 'local' })).resolves.toMatchObject({ @@ -62,7 +62,7 @@ describe('local knowledge helpers', () => { await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([ { key: 'metrics-revenue', - path: 'knowledge/global/metrics-revenue.md', + path: 'wiki/global/metrics-revenue.md', scope: 'GLOBAL', summary: 'Revenue metric definition', }, @@ -72,7 +72,7 @@ describe('local knowledge helpers', () => { expect(search).toEqual([ expect.objectContaining({ key: 'metrics-revenue', - path: 'knowledge/global/metrics-revenue.md', + path: 'wiki/global/metrics-revenue.md', scope: 'GLOBAL', score: expect.any(Number), matchReasons: expect.arrayContaining(['lexical']), @@ -195,7 +195,7 @@ describe('local knowledge helpers', () => { fingerprints: ['fp_paid_orders'], }); - const raw = await project.fileStore.readFile('knowledge/global/monthly-paid-orders.md'); + const raw = await project.fileStore.readFile('wiki/global/monthly-paid-orders.md'); expect(raw.content).toContain('source: historic-sql'); expect(raw.content).toContain('intent: Monthly paid order count'); expect(raw.content).toContain(['tables:', ' - analytics.orders'].join('\n')); @@ -245,4 +245,29 @@ describe('local knowledge helpers', () => { ).rejects.toThrow('Invalid wiki key "orbit/company-overview". Wiki keys must be flat; use "orbit-company-overview".'); }); + it('ignores nested historic-SQL legacy paths when listing local wiki pages', async () => { + await writeLocalKnowledgePage(project, { + key: 'historic-sql-paid-orders', + scope: 'GLOBAL', + summary: 'Flat historic SQL page', + content: 'Flat page body.', + tags: ['historic-sql'], + }); + await project.fileStore.writeFile( + 'wiki/global/historic-sql/paid-orders.md', + '---\nsummary: Nested historic SQL page\nusage_mode: auto\n---\n\nNested body\n', + 'Test', + 'test@example.com', + 'Write nested legacy page', + ); + + await expect(listLocalKnowledgePages(project, { userId: 'local' })).resolves.toEqual([ + { + key: 'historic-sql-paid-orders', + path: 'wiki/global/historic-sql-paid-orders.md', + scope: 'GLOBAL', + summary: 'Flat historic SQL page', + }, + ]); + }); }); diff --git a/packages/context/src/wiki/local-knowledge.ts b/packages/context/src/wiki/local-knowledge.ts index 5d1314a8..f9b25fb1 100644 --- a/packages/context/src/wiki/local-knowledge.ts +++ b/packages/context/src/wiki/local-knowledge.ts @@ -75,13 +75,13 @@ function stringArray(value: unknown): string[] { function knowledgePath(scope: LocalKnowledgeScope, userId: string | undefined, key: string): string { const safeKey = assertFlatWikiKey(key); if (scope === 'GLOBAL') { - return `knowledge/global/${safeKey}.md`; + return `wiki/global/${safeKey}.md`; } - return `knowledge/user/${assertSafePathToken('user id', userId ?? 'local')}/${safeKey}.md`; + return `wiki/user/${assertSafePathToken('user id', userId ?? 'local')}/${safeKey}.md`; } function keyFromKnowledgePath(path: string, scope: LocalKnowledgeScope, userId: string): string | null { - const prefix = scope === 'GLOBAL' ? 'knowledge/global/' : `knowledge/user/${assertSafePathToken('user id', userId)}/`; + const prefix = scope === 'GLOBAL' ? 'wiki/global/' : `wiki/user/${assertSafePathToken('user id', userId)}/`; const key = path.slice(prefix.length).replace(/\.md$/, ''); if (isFlatWikiKey(key)) { return key; @@ -158,7 +158,7 @@ export async function writeLocalKnowledgePage( serializeKnowledgePage(input), LOCAL_AUTHOR, LOCAL_AUTHOR_EMAIL, - `Write knowledge page: ${input.key}`, + `Write wiki page: ${input.key}`, ); } @@ -181,7 +181,7 @@ export async function listLocalKnowledgePages( const userId = input.userId ?? 'local'; const pages: LocalKnowledgeSummary[] = []; for (const scope of ['GLOBAL', 'USER'] as const) { - const root = scope === 'GLOBAL' ? 'knowledge/global' : `knowledge/user/${assertSafePathToken('user id', userId)}`; + const root = scope === 'GLOBAL' ? 'wiki/global' : `wiki/user/${assertSafePathToken('user id', userId)}`; const listed = await project.fileStore.listFiles(root); for (const path of listed.files.filter((file) => file.endsWith('.md')).sort()) { const key = keyFromKnowledgePath(path, scope, userId); diff --git a/packages/context/src/wiki/sqlite-knowledge-index.test.ts b/packages/context/src/wiki/sqlite-knowledge-index.test.ts index 620702a1..2a45573d 100644 --- a/packages/context/src/wiki/sqlite-knowledge-index.test.ts +++ b/packages/context/src/wiki/sqlite-knowledge-index.test.ts @@ -19,7 +19,7 @@ describe('SqliteKnowledgeIndex', () => { function page(overrides: Partial = {}): SqliteKnowledgeIndexPage { return { - path: 'knowledge/global/revenue.md', + path: 'wiki/global/revenue.md', key: 'revenue', scope: 'GLOBAL', summary: 'Revenue definition', @@ -36,7 +36,7 @@ describe('SqliteKnowledgeIndex', () => { index.sync([ page(), page({ - path: 'knowledge/global/support.md', + path: 'wiki/global/support.md', key: 'support', summary: 'Support queue', content: 'Tickets are grouped by priority.', @@ -47,8 +47,8 @@ describe('SqliteKnowledgeIndex', () => { await expect(access(dbPath)).resolves.toBeUndefined(); expect(index.searchLexicalCandidates({ queryText: 'paid order', limit: 10 })).toEqual([ expect.objectContaining({ - id: 'knowledge/global/revenue.md', - path: 'knowledge/global/revenue.md', + id: 'wiki/global/revenue.md', + path: 'wiki/global/revenue.md', rank: 1, rawScore: expect.any(Number), }), @@ -57,7 +57,7 @@ describe('SqliteKnowledgeIndex', () => { it('removes stale rows when the Markdown source list changes', () => { const index = new SqliteKnowledgeIndex({ dbPath }); - index.rebuild([page(), page({ path: 'knowledge/global/churn.md', key: 'churn', content: 'Churn risk.' })]); + index.rebuild([page(), page({ path: 'wiki/global/churn.md', key: 'churn', content: 'Churn risk.' })]); expect(index.search('churn', 10)).toHaveLength(1); index.rebuild([page()]); @@ -67,12 +67,12 @@ describe('SqliteKnowledgeIndex', () => { it('exposes existing search text and embedding state for incremental refresh', () => { const index = new SqliteKnowledgeIndex({ dbPath }); - index.sync([page({ path: 'knowledge/global/revenue.md', key: 'revenue', embedding: [1, 0] })]); + index.sync([page({ path: 'wiki/global/revenue.md', key: 'revenue', embedding: [1, 0] })]); expect(index.getExistingPages()).toEqual( new Map([ [ - 'knowledge/global/revenue.md', + 'wiki/global/revenue.md', expect.objectContaining({ searchText: expect.stringContaining('Revenue definition'), embedding: [1, 0], @@ -84,29 +84,29 @@ describe('SqliteKnowledgeIndex', () => { it('does not treat empty embeddings as indexed semantic vectors', () => { const index = new SqliteKnowledgeIndex({ dbPath }); - index.sync([page({ path: 'knowledge/global/revenue.md', key: 'revenue', embedding: [] })]); + index.sync([page({ path: 'wiki/global/revenue.md', key: 'revenue', embedding: [] })]); - expect(index.getExistingPages().get('knowledge/global/revenue.md')?.embedding).toBeNull(); + expect(index.getExistingPages().get('wiki/global/revenue.md')?.embedding).toBeNull(); expect(index.searchSemanticCandidates({ queryEmbedding: [1, 0], limit: 10 })).toEqual([]); }); it('returns semantic lane candidates from stored page embeddings', () => { const index = new SqliteKnowledgeIndex({ dbPath }); index.sync([ - page({ path: 'knowledge/global/revenue.md', key: 'revenue', embedding: [1, 0] }), - page({ path: 'knowledge/global/support.md', key: 'support', summary: 'Support queue', embedding: [0, 1] }), + page({ path: 'wiki/global/revenue.md', key: 'revenue', embedding: [1, 0] }), + page({ path: 'wiki/global/support.md', key: 'support', summary: 'Support queue', embedding: [0, 1] }), ]); expect(index.searchSemanticCandidates({ queryEmbedding: [1, 0], limit: 10 })).toEqual([ expect.objectContaining({ - id: 'knowledge/global/revenue.md', - path: 'knowledge/global/revenue.md', + id: 'wiki/global/revenue.md', + path: 'wiki/global/revenue.md', rank: 1, rawScore: 1, }), expect.objectContaining({ - id: 'knowledge/global/support.md', - path: 'knowledge/global/support.md', + id: 'wiki/global/support.md', + path: 'wiki/global/support.md', rank: 2, rawScore: 0, }), diff --git a/packages/context/src/wiki/tools/wiki-remove.tool.ts b/packages/context/src/wiki/tools/wiki-remove.tool.ts index 7cb56e7d..4d4c1333 100644 --- a/packages/context/src/wiki/tools/wiki-remove.tool.ts +++ b/packages/context/src/wiki/tools/wiki-remove.tool.ts @@ -36,7 +36,7 @@ export class WikiRemoveTool extends BaseTool { } get description(): string { - return `Remove a knowledge page that is no longer relevant.`; + return `Remove a wiki page that is no longer relevant.`; } get inputSchema() { diff --git a/packages/context/src/wiki/tools/wiki-search.tool.test.ts b/packages/context/src/wiki/tools/wiki-search.tool.test.ts index 33bd752b..24840a4f 100644 --- a/packages/context/src/wiki/tools/wiki-search.tool.test.ts +++ b/packages/context/src/wiki/tools/wiki-search.tool.test.ts @@ -7,7 +7,7 @@ describe('WikiSearchTool', () => { results: [ { key: 'metrics-revenue', - path: 'knowledge/global/metrics-revenue.md', + path: 'wiki/global/metrics-revenue.md', scope: 'GLOBAL' as const, summary: 'Revenue metric definition', score: 0.02459016393442623, @@ -28,7 +28,7 @@ describe('WikiSearchTool', () => { results: [ { blockKey: 'metrics-revenue', - path: 'knowledge/global/metrics-revenue.md', + path: 'wiki/global/metrics-revenue.md', summary: 'Revenue metric definition', score: 0.02459016393442623, matchReasons: ['lexical', 'token'], diff --git a/packages/context/src/wiki/tools/wiki-write.tool.ts b/packages/context/src/wiki/tools/wiki-write.tool.ts index 70668950..9cd457a8 100644 --- a/packages/context/src/wiki/tools/wiki-write.tool.ts +++ b/packages/context/src/wiki/tools/wiki-write.tool.ts @@ -147,7 +147,7 @@ export class WikiWriteTool extends BaseTool { get description(): string { return ` -Create or update a knowledge page. Provide content for create/rewrite, or replacements for targeted edits. +Create or update a wiki page. Provide content for create/rewrite, or replacements for targeted edits. For existing pages, you may provide only frontmatter fields such as summary, tags, refs, or sl_refs to update metadata while preserving content. tags/refs/sl_refs use REPLACE semantics: omit to keep existing on update, [] to clear, [values] to set. Keys must be flat file names, not directory paths. Use tags/source frontmatter for grouping. diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index 7e184dde..b74b4277 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -663,9 +663,9 @@ try { ); await writeSqliteWarehouse(projectDir); - await mkdir(join(projectDir, 'knowledge', 'global'), { recursive: true }); + await mkdir(join(projectDir, 'wiki', 'global'), { recursive: true }); await writeFile( - join(projectDir, 'knowledge', 'global', 'revenue.md'), + join(projectDir, 'wiki', 'global', 'revenue.md'), [ '---', 'summary: Paid order value', @@ -698,12 +698,12 @@ try { assert.equal(wikiSearchJson.kind, 'list'); assert.equal(wikiSearchJson.data.items.length, 1); assert.equal(wikiSearchJson.data.items[0].key, 'revenue'); - assert.equal(wikiSearchJson.data.items[0].path, 'knowledge/global/revenue.md'); + assert.equal(wikiSearchJson.data.items[0].path, 'wiki/global/revenue.md'); assert.equal(typeof wikiSearchJson.data.items[0].score, 'number'); requireIncludes(wikiSearchJson.data.items[0].matchReasons, 'lexical', 'wiki search match reasons'); process.stdout.write('ktx wiki search hybrid metadata verified\\n'); await access(join(projectDir, '.ktx', 'db.sqlite')); - process.stdout.write('SQLite knowledge index: ' + join(projectDir, '.ktx', 'db.sqlite') + '\\n'); + process.stdout.write('SQLite wiki index: ' + join(projectDir, '.ktx', 'db.sqlite') + '\\n'); const slYaml = [ 'name: orders', diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index 7694ddc3..06671d7c 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -456,7 +456,7 @@ describe('verification snippets', () => { assert.doesNotMatch(source, /@modelcontextprotocol/); assert.doesNotMatch(source, /startSemanticDaemon/); assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'setup'/); - assert.match(source, /knowledge', 'global', 'revenue\.md'/); + assert.match(source, /wiki', 'global', 'revenue\.md'/); assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'wiki',\s*'search'/); assert.match(source, /semantic-layer', 'warehouse', 'orders\.yaml'/); assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'sl',\s*'search',\s*'orders'/); @@ -497,7 +497,7 @@ describe('verification snippets', () => { assert.match(source, /mode: deterministic/); assert.match(source, /run\('pnpm', \['exec', 'ktx', 'ingest', 'run'/); assert.match(source, /access\(join\(projectDir, '\.ktx', 'db\.sqlite'\)\)/); - assert.match(source, /SQLite knowledge index/); + assert.match(source, /SQLite wiki index/); assert.match(source, /ktx ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway/); assert.match(source, /ktx ingest provider guard verified/); }); From 754e4a9039db27587be375fda841cec5c921576c Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 17:01:48 +0200 Subject: [PATCH 14/15] feat(cli): improve setup progress UX (#69) --- packages/cli/src/clack.ts | 1 + packages/cli/src/context-build-view.test.ts | 97 ++++++++- packages/cli/src/context-build-view.ts | 136 +++++++++++-- packages/cli/src/ingest.test.ts | 141 ++++++++++++++ packages/cli/src/ingest.ts | 111 ++++++++--- packages/cli/src/public-ingest.ts | 59 +++--- packages/cli/src/scan.test.ts | 53 +++++ packages/cli/src/scan.ts | 12 +- packages/cli/src/setup-agents.ts | 49 ++--- packages/cli/src/setup-context.test.ts | 93 +++++++++ packages/cli/src/setup-context.ts | 37 ++-- packages/cli/src/setup-databases.ts | 61 ++---- packages/cli/src/setup-demo-tour.ts | 1 + packages/cli/src/setup-embeddings.test.ts | 4 +- packages/cli/src/setup-embeddings.ts | 32 +-- packages/cli/src/setup-models.test.ts | 4 +- packages/cli/src/setup-models.ts | 42 ++-- packages/cli/src/setup-project.ts | 33 +--- packages/cli/src/setup-prompts.test.ts | 205 ++++++++++++++++++++ packages/cli/src/setup-prompts.ts | 172 ++++++++++++++++ packages/cli/src/setup-ready-menu.ts | 23 +-- packages/cli/src/setup-sources.ts | 64 ++---- packages/cli/src/setup.ts | 41 ++-- 23 files changed, 1125 insertions(+), 346 deletions(-) create mode 100644 packages/cli/src/setup-prompts.test.ts create mode 100644 packages/cli/src/setup-prompts.ts diff --git a/packages/cli/src/clack.ts b/packages/cli/src/clack.ts index fc24f1e7..ad0dfd44 100644 --- a/packages/cli/src/clack.ts +++ b/packages/cli/src/clack.ts @@ -2,6 +2,7 @@ import { cancel, confirm, isCancel, log, spinner } from '@clack/prompts'; export interface KtxCliSpinner { start(message: string): void; + message(message: string): void; stop(message: string): void; error(message: string): void; } diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts index 3df1f6d7..c8dc5130 100644 --- a/packages/cli/src/context-build-view.test.ts +++ b/packages/cli/src/context-build-view.test.ts @@ -231,6 +231,38 @@ describe('renderContextBuildView', () => { expect(output).toContain('(15s)'); }); + it('shows how long a running target has gone without a progress update', () => { + const state = initViewState([ + { connectionId: 'notion-main', driver: 'notion', operation: 'source-ingest', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, + ]); + state.contextSources[0].status = 'running'; + state.contextSources[0].startedAt = 1_000; + state.contextSources[0].elapsedMs = 113_000; + state.contextSources[0].progressUpdatedAtMs = 46_000; + state.contextSources[0].detailLine = '[45%] No work units to process; finalizing ingest'; + + const output = renderContextBuildView(state, { styled: false }); + + expect(output).toContain('No work units to process; finalizing ingest'); + expect(output).toContain('last update 1m08s ago'); + expect(output).toContain('(1m53s)'); + }); + + it('does not show progress age while updates are recent', () => { + const state = initViewState([ + { connectionId: 'notion-main', driver: 'notion', operation: 'source-ingest', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, + ]); + state.contextSources[0].status = 'running'; + state.contextSources[0].startedAt = 1_000; + state.contextSources[0].elapsedMs = 40_000; + state.contextSources[0].progressUpdatedAtMs = 25_000; + state.contextSources[0].detailLine = '[45%] Planning work units'; + + const output = renderContextBuildView(state, { styled: false }); + + expect(output).not.toContain('last update'); + }); + it('renders completion summary when all targets are done', () => { const state = initViewState([ { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, @@ -480,7 +512,10 @@ describe('runContextBuild', () => { expect.objectContaining({ connectionId: 'warehouse', operation: 'scan' }), expect.objectContaining({ scanMode: 'enriched', detectRelationships: true }), expect.anything(), - {}, + expect.objectContaining({ + scanProgress: expect.anything(), + ingestProgress: expect.any(Function), + }), ); }); @@ -563,6 +598,43 @@ describe('runContextBuild', () => { ]); }); + it('publishes structured target progress without expanding the compact source rows', async () => { + const io = makeIo({ isTTY: true }); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + }); + const progressUpdates: Array> = []; + const executeTarget = vi.fn(async (target, _args, _targetIo, deps) => { + await deps.scanProgress?.update(0.37, 'Generating descriptions 3/8 tables', { transient: true }); + return successResult(target.connectionId, target.driver, target.operation); + }); + + await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled' }, + io.io, + { + executeTarget, + now: () => 1000, + onSourceProgress: (sources) => { + progressUpdates.push( + sources.map((s) => ({ + connectionId: s.connectionId, + ...(s.percent !== undefined ? { percent: s.percent } : {}), + ...(s.message !== undefined ? { message: s.message } : {}), + })), + ); + }, + sourceProgressThrottleMs: 0, + }, + ); + + expect(progressUpdates).toContainEqual([ + { connectionId: 'warehouse', percent: 37, message: 'Generating descriptions 3/8 tables' }, + ]); + expect(io.stdout()).toContain('Generating descriptions 3/8 tables'); + }); + it('returns report IDs and artifact paths parsed from target output', async () => { const io = makeIo(); const project = projectWithConnections({ @@ -679,4 +751,27 @@ describe('viewStateFromSourceProgress', () => { expect(output).toContain('dbt-main'); expect(output).toContain('ingesting...'); }); + + it('renders persisted percent and message as compact source-row progress', () => { + const state = viewStateFromSourceProgress( + [ + { + connectionId: 'warehouse', + operation: 'scan', + status: 'running', + startedAtMs: 900, + percent: 63, + message: 'Building embeddings 2/4 batches', + updatedAtMs: 950, + }, + ], + 1000, + ); + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('warehouse'); + expect(output).toContain('63%'); + expect(output).toContain('Building embeddings 2/4 batches'); + expect(output.match(/warehouse/g)).toHaveLength(1); + }); }); diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index e021b144..38f3d674 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -1,9 +1,12 @@ import { spawn } from 'node:child_process'; import { mkdirSync, openSync } from 'node:fs'; import { join, resolve } from 'node:path'; +import type { KtxProgressPort, KtxProgressUpdateOptions } from '@ktx/context/scan'; import type { KtxCliIo } from './index.js'; +import type { KtxIngestProgressUpdate } from './ingest.js'; import type { KtxPublicIngestArgs, + KtxPublicIngestDeps, KtxPublicIngestPlanTarget, KtxPublicIngestProject, KtxPublicIngestTargetResult, @@ -25,6 +28,7 @@ export interface ContextBuildTargetState { failureText: string | null; startedAt: number | null; elapsedMs: number; + progressUpdatedAtMs: number | null; } export interface ContextBuildViewState { @@ -55,6 +59,9 @@ export interface ContextBuildSourceProgressUpdate { status: 'queued' | 'running' | 'done' | 'failed'; startedAtMs?: number; elapsedMs?: number; + percent?: number; + message?: string; + updatedAtMs?: number; summaryText?: string; } @@ -64,6 +71,7 @@ export interface ContextBuildDeps { setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null; onDetach?: () => void; onSourceProgress?: (sources: ContextBuildSourceProgressUpdate[]) => void; + sourceProgressThrottleMs?: number; } // --- Rendering --- @@ -118,6 +126,7 @@ function extractPercent(detailLine: string | null): number | null { const BAR_WIDTH = 12; const BAR_FILLED = '█'; const BAR_EMPTY = '░'; +const STALE_PROGRESS_UPDATE_MS = 30_000; function renderProgressBar(percent: number, styled: boolean): string { const filled = Math.round((percent / 100) * BAR_WIDTH); @@ -126,6 +135,19 @@ function renderProgressBar(percent: number, styled: boolean): string { return styled ? cyan(bar) : bar; } +function staleProgressText(target: ContextBuildTargetState, styled: boolean): string | null { + if (target.startedAt === null || target.progressUpdatedAtMs === null || target.elapsedMs <= 0) { + return null; + } + const currentTimeMs = target.startedAt + target.elapsedMs; + const staleMs = currentTimeMs - target.progressUpdatedAtMs; + if (staleMs < STALE_PROGRESS_UPDATE_MS) { + return null; + } + const text = `last update ${formatDuration(staleMs)} ago`; + return styled ? dim(text) : text; +} + function targetDetail(target: ContextBuildTargetState, styled: boolean): string { if (target.status === 'done') { const parts: string[] = []; @@ -147,6 +169,8 @@ function targetDetail(target: ContextBuildTargetState, styled: boolean): string parts.push(`${renderProgressBar(percent, styled)} ${percent}%`); } parts.push(progressText); + const stale = staleProgressText(target, styled); + if (stale) parts.push(stale); if (elapsed) parts.push(styled ? dim(elapsed) : elapsed); return parts.join(' '); } @@ -309,15 +333,42 @@ function createCaptureIo(onProgress: (message: string) => void, isTTY: boolean): // --- Source progress helpers --- +function progressFieldsFromDetailLine( + detailLine: string | null, + updatedAtMs: number | null, +): Pick { + if (!detailLine) return {}; + const percent = extractPercent(detailLine); + const message = detailLine.replace(/^\[\d+%\]\s*/, ''); + return { + ...(percent !== null ? { percent } : {}), + ...(message ? { message } : {}), + ...(updatedAtMs !== null ? { updatedAtMs } : {}), + }; +} + +function detailLineFromProgressSource(source: ContextBuildSourceProgressUpdate): string | null { + if (!source.message) return null; + if (typeof source.percent === 'number' && Number.isFinite(source.percent)) { + const percent = Math.max(0, Math.min(100, Math.round(source.percent))); + return `[${percent}%] ${source.message}`; + } + return source.message; +} + function collectSourceProgress(targets: ContextBuildTargetState[]): ContextBuildSourceProgressUpdate[] { - return targets.map((t) => ({ - connectionId: t.target.connectionId, - operation: t.target.operation, - status: t.status, - ...(t.startedAt !== null ? { startedAtMs: t.startedAt } : {}), - ...(t.elapsedMs > 0 ? { elapsedMs: t.elapsedMs } : {}), - ...(t.summaryText ? { summaryText: t.summaryText } : {}), - })); + return targets.map((t) => { + const progressFields = progressFieldsFromDetailLine(t.detailLine, t.progressUpdatedAtMs); + return { + connectionId: t.target.connectionId, + operation: t.target.operation, + status: t.status, + ...(t.startedAt !== null ? { startedAtMs: t.startedAt } : {}), + ...(t.elapsedMs > 0 ? { elapsedMs: t.elapsedMs } : {}), + ...progressFields, + ...(t.summaryText ? { summaryText: t.summaryText } : {}), + }; + }); } export function viewStateFromSourceProgress( @@ -328,11 +379,12 @@ export function viewStateFromSourceProgress( const makeTarget = (s: ContextBuildSourceProgressUpdate): ContextBuildTargetState => ({ target: { connectionId: s.connectionId, driver: '', operation: s.operation, debugCommand: '', steps: [] }, status: s.status, - detailLine: null, + detailLine: detailLineFromProgressSource(s), summaryText: s.summaryText ?? null, failureText: null, startedAt: s.startedAtMs ?? null, elapsedMs: s.status === 'running' && s.startedAtMs ? now - s.startedAtMs : (s.elapsedMs ?? 0), + progressUpdatedAtMs: s.updatedAtMs ?? null, }); return { @@ -453,6 +505,7 @@ function makeTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTargetS failureText: null, startedAt: null, elapsedMs: 0, + progressUpdatedAtMs: null, }; } @@ -534,6 +587,34 @@ export function initViewState(targets: KtxPublicIngestPlanTarget[]): ContextBuil }; } +function formatProgressDetail(update: Pick): string { + const percent = Math.max(0, Math.min(100, Math.round(update.percent))); + return `[${percent}%] ${update.message}`; +} + +function createContextBuildProgressPort( + onProgress: (update: KtxIngestProgressUpdate) => void, + state: { progress: number } = { progress: 0 }, + start = 0, + weight = 1, +): KtxProgressPort { + return { + async update(value: number, message?: string, options?: KtxProgressUpdateOptions): Promise { + const absoluteValue = start + Math.max(0, Math.min(1, value)) * weight; + state.progress = Math.max(state.progress, Math.min(1, absoluteValue)); + if (!message) return; + onProgress({ + percent: Math.max(0, Math.min(100, Math.round(state.progress * 100))), + message, + ...(options?.transient !== undefined ? { transient: options.transient } : {}), + }); + }, + startPhase(phaseWeight: number): KtxProgressPort { + return createContextBuildProgressPort(onProgress, state, state.progress, weight * phaseWeight); + }, + }; +} + export async function runContextBuild( project: KtxPublicIngestProject, args: ContextBuildArgs, @@ -572,6 +653,19 @@ export async function runContextBuild( const execTarget = deps.executeTarget ?? executePublicIngestTarget; const reportIds = new Set(); const artifactPaths = new Set(); + const sourceProgressThrottleMs = deps.sourceProgressThrottleMs ?? 750; + let lastSourceProgressPublishedAt = Number.NEGATIVE_INFINITY; + + const publishSourceProgress = (force = false): boolean => { + if (!deps.onSourceProgress) return false; + const now = nowFn(); + if (!force && now - lastSourceProgressPublishedAt < sourceProgressThrottleMs) { + return false; + } + lastSourceProgressPublishedAt = now; + deps.onSourceProgress(collectSourceProgress(orderedTargets)); + return true; + }; let detached = false; let exiting = false; @@ -623,20 +717,34 @@ export async function runContextBuild( targetState.status = 'running'; targetState.startedAt = nowFn(); paint(true); - deps.onSourceProgress?.(collectSourceProgress(orderedTargets)); + publishSourceProgress(true); + let hasPendingProgressPublish = false; + + const updateTargetProgress = (update: KtxIngestProgressUpdate) => { + targetState.detailLine = formatProgressDetail(update); + targetState.progressUpdatedAtMs = nowFn(); + paint(true); + hasPendingProgressPublish = !publishSourceProgress(false); + }; const capture = createCaptureIo( (message) => { targetState.detailLine = message; + targetState.progressUpdatedAtMs = nowFn(); paint(true); + hasPendingProgressPublish = !publishSourceProgress(false); }, false, ); + const progressDeps: KtxPublicIngestDeps = { + scanProgress: createContextBuildProgressPort(updateTargetProgress), + ingestProgress: updateTargetProgress, + }; let result: KtxPublicIngestTargetResult | null = null; let thrownError: unknown = null; try { - result = await execTarget(targetState.target, runArgs, capture.io, {}); + result = await execTarget(targetState.target, runArgs, capture.io, progressDeps); } catch (error) { if (exiting) { throw error; @@ -644,6 +752,10 @@ export async function runContextBuild( thrownError = error; } + if (hasPendingProgressPublish) { + publishSourceProgress(true); + } + targetState.elapsedMs = nowFn() - (targetState.startedAt ?? nowFn()); const failed = thrownError !== null || result?.steps.some((s) => s.status === 'failed') === true; targetState.status = failed ? 'failed' : 'done'; @@ -669,7 +781,7 @@ export async function runContextBuild( if (failed) hasFailure = true; paint(true); - deps.onSourceProgress?.(collectSourceProgress(orderedTargets)); + publishSourceProgress(true); } } finally { if (spinnerInterval) clearInterval(spinnerInterval); diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index c847d53a..cf7015b9 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -103,6 +103,88 @@ describe('runKtxIngest', () => { expect(statusIo.stderr()).toBe(''); }); + it('emits structured progress for non-TTY local ingest runs', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const progressEvents: Array<{ percent: number; message: string; transient?: boolean }> = []; + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 2 }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }); + input.memoryFlow?.emit({ type: 'work_unit_started', unitKey: 'orders', skills: [], stepBudget: 4 }); + input.memoryFlow?.emit({ type: 'work_unit_step', unitKey: 'orders', stepIndex: 2, stepBudget: 4 }); + return completedLocalBundleRun(input, 'cli-local-progress-1'); + }); + const io = makeIo(); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + outputMode: 'plain', + }, + io.io, + { + runLocalIngest: runLocal, + jobIdFactory: () => 'cli-local-progress-1', + progress: (event) => progressEvents.push(event), + }, + ), + ).resolves.toBe(0); + + expect(progressEvents).toEqual( + expect.arrayContaining([ + { percent: 5, message: 'Fetching source files for warehouse/fake' }, + { percent: 15, message: 'Fetched 2 source files from fake' }, + { percent: 45, message: 'Planned 2 work units' }, + expect.objectContaining({ + message: 'Processing work units: 0/2 complete, 1 active; latest orders step 2/4', + transient: true, + }), + ]), + ); + expect(io.stderr()).not.toContain('[15%] Fetched 2 source files from fake'); + }); + + it('describes zero-work-unit ingest progress as finalizing instead of appearing half-planned', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const progressEvents: Array<{ percent: number; message: string; transient?: boolean }> = []; + const runLocal = vi.fn(async (input: RunLocalIngestOptions): Promise => { + input.memoryFlow?.emit({ type: 'source_acquired', adapter: 'fake', trigger: 'manual_resync', fileCount: 2 }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 0, workUnitCount: 0, evictionCount: 0 }); + return completedLocalBundleRun(input, 'cli-local-zero-progress-1'); + }); + const io = makeIo(); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + outputMode: 'plain', + }, + io.io, + { + runLocalIngest: runLocal, + jobIdFactory: () => 'cli-local-zero-progress-1', + progress: (event) => progressEvents.push(event), + }, + ), + ).resolves.toBe(0); + + expect(progressEvents).toEqual( + expect.arrayContaining([ + { percent: 80, message: 'No work units to process; finalizing ingest' }, + ]), + ); + expect(progressEvents).not.toContainEqual({ percent: 45, message: 'Planned 0 work units' }); + }); + it('prints provider setup guidance when a skip-llm setup project runs ingest', async () => { const projectDir = join(tempDir, 'project'); const setupIo = makeIo(); @@ -421,6 +503,65 @@ describe('runKtxIngest', () => { expect(io.stdout()).not.toContain('status=running job=metabase-child-1'); }); + it('emits structured progress for Metabase fan-out without writing progress to JSON output', async () => { + const projectDir = join(tempDir, 'project'); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + const progressEvents: Array<{ percent: number; message: string }> = []; + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'json', + }, + io.io, + { + progress: (event) => progressEvents.push(event), + runLocalMetabaseIngest: async (input) => { + input.progress?.onMetabaseFanoutPlanned?.({ + metabaseConnectionId: 'prod-metabase', + children: [{ metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a' }], + }); + input.progress?.onMetabaseChildStarted?.({ + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + jobId: 'metabase-child-1', + }); + input.progress?.onMetabaseChildCompleted?.({ + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + jobId: 'metabase-child-1', + status: 'done', + }); + return { + metabaseConnectionId: 'prod-metabase', + status: 'all_succeeded', + totals: { workUnits: 0, failedWorkUnits: 0 }, + children: [], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(progressEvents).toEqual( + expect.arrayContaining([ + { percent: 5, message: 'Checking Metabase mappings for prod-metabase' }, + { percent: 10, message: 'Metabase prod-metabase: 1 mapped database' }, + { percent: 25, message: 'Metabase database 1 -> warehouse_a running' }, + { percent: 90, message: 'Metabase database 1 -> warehouse_a done' }, + ]), + ); + expect(io.stdout()).toContain('"status": "all_succeeded"'); + expect(io.stderr()).not.toContain('Metabase ingest: prod-metabase'); + }); + it('runs Metabase scheduled ingest through the public CLI command path with real fan-out', async () => { const projectDir = join(tempDir, 'metabase-cli-project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index 6e0648b5..c1096b2b 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -67,7 +67,13 @@ interface KtxIngestIo { stderr: { write(chunk: string): void }; } -interface KtxIngestDeps { +export interface KtxIngestProgressUpdate { + percent: number; + message: string; + transient?: boolean; +} + +export interface KtxIngestDeps { jobIdFactory?: () => string; now?: () => Date; createAdapters?: typeof createKtxCliLocalIngestAdapters; @@ -88,6 +94,7 @@ interface KtxIngestDeps { | 'logger' | 'pullConfigOptions' >; + progress?: (update: KtxIngestProgressUpdate) => void; } function reportStatus(report: IngestReportSnapshot): 'done' | 'error' { @@ -145,12 +152,18 @@ function pluralize(count: number, singular: string, plural = `${singular}s`): st function createMetabaseFanoutProgress( connectionId: string, io: KtxIngestIo, + onProgress?: (update: KtxIngestProgressUpdate) => void, ): LocalMetabaseFanoutProgress { io.stderr.write(`Metabase ingest: ${connectionId}\n`); io.stderr.write('Checking mappings and scheduled-pull targets...\n'); + onProgress?.({ percent: 5, message: `Checking Metabase mappings for ${connectionId}` }); return { onMetabaseFanoutPlanned(event) { io.stderr.write(`Targets: ${pluralize(event.children.length, 'mapped database')}\n`); + onProgress?.({ + percent: 10, + message: `Metabase ${event.metabaseConnectionId}: ${pluralize(event.children.length, 'mapped database')}`, + }); for (const child of event.children) { io.stderr.write(`- database=${child.metabaseDatabaseId} target=${child.targetConnectionId} status=queued\n`); } @@ -159,11 +172,19 @@ function createMetabaseFanoutProgress( io.stderr.write( `- database=${event.metabaseDatabaseId} target=${event.targetConnectionId} status=running job=${event.jobId}\n`, ); + onProgress?.({ + percent: 25, + message: `Metabase database ${event.metabaseDatabaseId} -> ${event.targetConnectionId} running`, + }); }, onMetabaseChildCompleted(event) { io.stderr.write( `- database=${event.metabaseDatabaseId} target=${event.targetConnectionId} status=${event.status} job=${event.jobId}\n`, ); + onProgress?.({ + percent: 90, + message: `Metabase database ${event.metabaseDatabaseId} -> ${event.targetConnectionId} ${event.status}`, + }); }, }; } @@ -231,6 +252,12 @@ function plainIngestEventProgress( case 'diff_computed': return { percent: 35, message: `Computed source diff ${formatDiffProgress(event)}` }; case 'chunks_planned': + if (event.workUnitCount === 0) { + return { + percent: 80, + message: 'No work units to process; finalizing ingest', + }; + } return { percent: 45, message: `Planned ${pluralize(event.workUnitCount, 'work unit')}`, @@ -296,34 +323,22 @@ function shouldWritePlainIngestProgress( return outputMode === 'plain' && io.stdout.isTTY === true && env.CI !== 'true'; } -function createPlainIngestProgressRenderer( +function createPlainIngestProgressObserver( args: Extract, - io: KtxIngestIo, -): { start(): void; update(snapshot: MemoryFlowReplayInput): void; flush(): void } { + onProgress: (update: KtxIngestProgressUpdate) => void, +): { start(): void; update(snapshot: MemoryFlowReplayInput): void } { let printedEvents = 0; let lastPercent = 0; let printedCompletion = false; - let hasPendingTransient = false; - - const flush = () => { - if (!hasPendingTransient) { - return; - } - io.stderr.write('\n'); - hasPendingTransient = false; - }; const write = (percent: number, message: string, options?: { transient?: boolean }) => { const nextPercent = Math.max(lastPercent, Math.max(0, Math.min(100, percent))); lastPercent = nextPercent; - const line = `[${nextPercent}%] ${message}`; - if (options?.transient === true) { - io.stderr.write(`\r${line}\u001b[K`); - hasPendingTransient = true; - return; - } - flush(); - io.stderr.write(`${line}\n`); + onProgress({ + percent: nextPercent, + message, + ...(options?.transient !== undefined ? { transient: options.transient } : {}), + }); }; return { @@ -347,6 +362,41 @@ function createPlainIngestProgressRenderer( write(100, snapshot.status === 'done' ? 'Ingest completed' : 'Ingest failed'); } }, + }; +} + +function createPlainIngestProgressRenderer( + args: Extract, + io: KtxIngestIo, +): { start(): void; update(snapshot: MemoryFlowReplayInput): void; flush(): void } { + let hasPendingTransient = false; + + const flush = () => { + if (!hasPendingTransient) { + return; + } + io.stderr.write('\n'); + hasPendingTransient = false; + }; + + const observer = createPlainIngestProgressObserver(args, (update) => { + const line = `[${update.percent}%] ${update.message}`; + if (update.transient === true) { + io.stderr.write(`\r${line}\u001b[K`); + hasPendingTransient = true; + return; + } + flush(); + io.stderr.write(`${line}\n`); + }); + + return { + start() { + observer.start(); + }, + update(snapshot) { + observer.update(snapshot); + }, flush, }; } @@ -544,7 +594,15 @@ export async function runKtxIngest( if (args.adapter === 'metabase') { const executeMetabaseFanout = deps.runLocalMetabaseIngest ?? runLocalMetabaseIngest; const progress = - args.outputMode === 'json' ? undefined : createMetabaseFanoutProgress(args.connectionId, io); + args.outputMode === 'json' && !deps.progress + ? undefined + : createMetabaseFanoutProgress( + args.connectionId, + args.outputMode === 'json' + ? { ...io, stderr: { write: () => undefined } } + : io, + deps.progress, + ); const result = await executeMetabaseFanout({ project, adapters: createAdapters(project, adapterOptions), @@ -573,8 +631,13 @@ export async function runKtxIngest( const plainProgress = shouldWritePlainIngestProgress(runOutputMode, io, env) ? createPlainIngestProgressRenderer(args, io) : null; + const structuredProgress = deps.progress + ? createPlainIngestProgressObserver(args, deps.progress) + : null; const initialMemoryFlow = - shouldUseLiveViz || plainProgress ? initialRunMemoryFlowInput(args, jobId ?? 'pending') : undefined; + shouldUseLiveViz || plainProgress || structuredProgress + ? initialRunMemoryFlowInput(args, jobId ?? 'pending') + : undefined; let latestMemoryFlowSnapshot: MemoryFlowReplayInput | null = initialMemoryFlow ?? null; if (shouldUseLiveViz && initialMemoryFlow && isTuiCapableIo(io)) { @@ -595,11 +658,13 @@ export async function runKtxIngest( return; } plainProgress?.update(snapshot); + structuredProgress?.update(snapshot); }, }) : undefined; plainProgress?.start(); + structuredProgress?.start(); try { const result = await executeLocalIngest({ diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index 71d93e4a..b126e702 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -1,7 +1,8 @@ import { type KtxLocalProject, type KtxProjectConnectionConfig, loadKtxProject } from '@ktx/context/project'; +import type { KtxProgressPort } from '@ktx/context/scan'; import type { KtxCliIo } from './index.js'; -import type { KtxIngestArgs } from './ingest.js'; -import type { KtxScanArgs } from './scan.js'; +import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js'; +import type { KtxScanArgs, KtxScanDeps } from './scan.js'; import { profileMark } from './startup-profile.js'; profileMark('module:public-ingest'); @@ -59,8 +60,10 @@ export type KtxPublicIngestProject = Pick[0]) => Promise; - runScan?: (args: KtxScanArgs, io: KtxCliIo) => Promise; - runIngest?: (args: KtxIngestArgs, io: KtxCliIo) => Promise; + runScan?: (args: KtxScanArgs, io: KtxCliIo, deps?: KtxScanDeps) => Promise; + runIngest?: (args: KtxIngestArgs, io: KtxCliIo, deps?: KtxIngestDeps) => Promise; + scanProgress?: KtxProgressPort; + ingestProgress?: (update: KtxIngestProgressUpdate) => void; } const sourceAdapterByDriver = new Map([ @@ -247,33 +250,35 @@ export async function executePublicIngestTarget( ): Promise { if (target.operation === 'scan') { const { runKtxScan } = await import('./scan.js'); - const exitCode = await (deps.runScan ?? runKtxScan)( - { - command: 'run', - projectDir: args.projectDir, - connectionId: target.connectionId, - mode: args.scanMode ?? 'structural', - detectRelationships: args.detectRelationships ?? false, - dryRun: false, - }, - io, - ); + const scanArgs: KtxScanArgs = { + command: 'run', + projectDir: args.projectDir, + connectionId: target.connectionId, + mode: args.scanMode ?? 'structural', + detectRelationships: args.detectRelationships ?? false, + dryRun: false, + }; + const runScan = deps.runScan ?? runKtxScan; + const exitCode = deps.scanProgress + ? await runScan(scanArgs, io, { progress: deps.scanProgress }) + : await runScan(scanArgs, io); return markTargetResult(target, exitCode === 0 ? 'done' : 'failed'); } const { runKtxIngest } = await import('./ingest.js'); - const exitCode = await (deps.runIngest ?? runKtxIngest)( - { - command: 'run', - projectDir: args.projectDir, - connectionId: target.connectionId, - adapter: target.adapter ?? target.driver, - ...(target.sourceDir ? { sourceDir: target.sourceDir } : {}), - outputMode: sourceIngestOutputMode(args, io), - inputMode: args.inputMode, - }, - io, - ); + const ingestArgs: KtxIngestArgs = { + command: 'run', + projectDir: args.projectDir, + connectionId: target.connectionId, + adapter: target.adapter ?? target.driver, + ...(target.sourceDir ? { sourceDir: target.sourceDir } : {}), + outputMode: sourceIngestOutputMode(args, io), + inputMode: args.inputMode, + }; + const runIngest = deps.runIngest ?? runKtxIngest; + const exitCode = deps.ingestProgress + ? await runIngest(ingestArgs, io, { progress: deps.ingestProgress }) + : await runIngest(ingestArgs, io); return markTargetResult(target, exitCode === 0 ? 'done' : 'failed'); } diff --git a/packages/cli/src/scan.test.ts b/packages/cli/src/scan.test.ts index 74d52f35..28c60ea0 100644 --- a/packages/cli/src/scan.test.ts +++ b/packages/cli/src/scan.test.ts @@ -570,6 +570,59 @@ describe('runKtxScan', () => { expect(io.stdout()).toContain('[55%] Semantic layer comparison found 5 changes across 18 tables'); }); + it('uses injected structured progress without requiring TTY progress output', async () => { + await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); + const progressEvents: Array<{ progress: number; message?: string; transient?: boolean }> = []; + const structuredProgress = { + async update(progress: number, message?: string, options?: { transient?: boolean }) { + progressEvents.push({ + progress, + ...(message !== undefined ? { message } : {}), + ...(options?.transient !== undefined ? { transient: options.transient } : {}), + }); + }, + startPhase() { + return structuredProgress; + }, + }; + const runLocalScan = vi.fn(async (input: RunLocalScanOptions): Promise => { + await input.progress?.update(0.42, 'Generating descriptions 4/10 tables', { transient: true }); + return { + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report, + }; + }); + const io = makeIo(); + + await expect( + runKtxScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + }, + io.io, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters, progress: structuredProgress }, + ), + ).resolves.toBe(0); + + expect(progressEvents).toContainEqual({ + progress: 0.42, + message: 'Generating descriptions 4/10 tables', + transient: true, + }); + expect(io.stdout()).not.toContain('[42%] Generating descriptions 4/10 tables'); + }); + it('updates transient TTY progress messages in place', async () => { const io = makeIo({ isTTY: true }); const previousCi = process.env.CI; diff --git a/packages/cli/src/scan.ts b/packages/cli/src/scan.ts index bca6057d..ef5679cc 100644 --- a/packages/cli/src/scan.ts +++ b/packages/cli/src/scan.ts @@ -26,9 +26,10 @@ export interface KtxScanArgs { runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; } -interface KtxScanDeps { +export interface KtxScanDeps { runLocalScan?: typeof runLocalScan; createLocalIngestAdapters?: typeof createKtxCliLocalIngestAdapters; + progress?: KtxProgressPort; } function shouldUseStyledOutput(io: KtxCliIo): boolean { @@ -257,7 +258,8 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps args.mode !== 'structural' || args.detectRelationships ? await createKtxCliScanConnector(project, args.connectionId) : undefined; - const progress = createCliScanProgress(io); + const cliProgress = deps.progress ? null : createCliScanProgress(io); + const progress = deps.progress ?? cliProgress; try { const result = await (deps.runLocalScan ?? runLocalScan)({ project, @@ -272,12 +274,12 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps ...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}), ...(managedDaemon ? { managedDaemon } : {}), }), - progress, + ...(progress ? { progress } : {}), }); - progress.flush(); + cliProgress?.flush(); writeRunSummary(result.report, args.projectDir, io); } finally { - progress.flush(); + cliProgress?.flush(); } return 0; } catch (error) { diff --git a/packages/cli/src/setup-agents.ts b/packages/cli/src/setup-agents.ts index da9486f5..9505307d 100644 --- a/packages/cli/src/setup-agents.ts +++ b/packages/cli/src/setup-agents.ts @@ -1,15 +1,17 @@ import { mkdir, readFile, rm, writeFile } from 'node:fs/promises'; import { dirname, join, relative, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; -import { cancel, confirm, isCancel, multiselect, select } from '@clack/prompts'; import { loadKtxProject, markKtxSetupStateStepComplete, serializeKtxProjectConfig, } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; -import { withMenuOptionsSpacing, withMultiselectNavigation } from './prompt-navigation.js'; -import { withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { withMultiselectNavigation } from './prompt-navigation.js'; +import { + createKtxSetupPromptAdapter, + type KtxSetupPromptOption, +} from './setup-prompts.js'; export type KtxAgentTarget = 'claude-code' | 'codex' | 'cursor' | 'opencode' | 'universal'; export type KtxAgentScope = 'project' | 'global'; @@ -238,10 +240,10 @@ export async function removeKtxAgentInstall(projectDir: string, io: KtxCliIo): P } export interface KtxSetupAgentsPromptAdapter { - select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; multiselect(options: { message: string; - options: Array<{ value: string; label: string }>; + options: KtxSetupPromptOption[]; required?: boolean; }): Promise; cancel(message: string): void; @@ -252,38 +254,11 @@ export interface KtxSetupAgentsDeps { } function createPromptAdapter(): KtxSetupAgentsPromptAdapter { - return { - async select(options) { - const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return 'back'; - } - return String(value); - }, - async multiselect(options) { - while (true) { - const value = await withSetupInterruptConfirmation(() => multiselect(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return ['back']; - } - const selected = [...value] as string[]; - if (selected.length === 0 && !options.required) { - const skipConfirmed = await confirm({ message: 'Nothing selected. Skip this step?', initialValue: false }); - if (isCancel(skipConfirmed)) { - cancel('Setup cancelled.'); - return ['back']; - } - if (!skipConfirmed) continue; - } - return selected; - } - }, - cancel(message) { - cancel(message); - }, - }; + return createKtxSetupPromptAdapter({ + selectCancelValue: 'back', + multiselectCancelValue: 'back', + confirmEmptyOptionalMultiselect: true, + }); } const targetDisplayNames: Record = { diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index 1c2ab320..12fd332a 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -142,6 +142,16 @@ describe('setup context build state', () => { artifactPaths: [], retryableFailedTargets: [], commands: contextBuildCommands(tempDir, 'setup-context-local-abc123'), + sourceProgress: [ + { + connectionId: 'warehouse', + operation: 'scan', + status: 'running', + percent: 42, + message: 'Generating descriptions 4/10 tables', + updatedAtMs: 1000, + }, + ], }); const state = await readKtxSetupContextState(tempDir); @@ -155,6 +165,16 @@ describe('setup context build state', () => { status: `ktx status --project-dir ${tempDir}`, resume: `ktx setup --project-dir ${tempDir}`, }, + sourceProgress: [ + { + connectionId: 'warehouse', + operation: 'scan', + status: 'running', + percent: 42, + message: 'Generating descriptions 4/10 tables', + updatedAtMs: 1000, + }, + ], }); expect(JSON.stringify(state)).not.toContain('DATABASE_URL'); expect(JSON.stringify(state)).not.toContain('NOTION_TOKEN'); @@ -547,6 +567,79 @@ describe('setup context build state', () => { expect(output).not.toContain('KTX context built: detached'); }); + it('re-renders the compact progress view when watched source messages change', async () => { + await writeReadyProject(tempDir); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-progress-message', + status: 'detached', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: '2026-05-09T10:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-progress-message'), + sourceProgress: [ + { + connectionId: 'warehouse', + operation: 'scan' as const, + status: 'running' as const, + startedAtMs: Date.now() - 5000, + percent: 35, + message: 'Inspecting database schema', + updatedAtMs: 1000, + }, + ], + }); + const io = makeIo(); + let polls = 0; + const updateRun = async () => { + polls++; + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-progress-message', + status: polls === 1 ? 'detached' : 'completed', + startedAt: '2026-05-09T10:00:00.000Z', + updatedAt: polls === 1 ? '2026-05-09T10:00:01.000Z' : '2026-05-09T10:00:02.000Z', + ...(polls === 1 ? {} : { completedAt: '2026-05-09T10:00:02.000Z' }), + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-progress-message'), + sourceProgress: [ + { + connectionId: 'warehouse', + operation: 'scan' as const, + status: polls === 1 ? ('running' as const) : ('done' as const), + startedAtMs: Date.now() - 5000, + elapsedMs: polls === 1 ? undefined : 6000, + percent: polls === 1 ? 76 : undefined, + message: polls === 1 ? 'Building embeddings 3/4 batches' : undefined, + updatedAtMs: polls === 1 ? 2000 : undefined, + summaryText: polls === 1 ? undefined : '42 tables', + }, + ], + }); + }; + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto', autoWatch: true }, + io.io, + { + sleep: updateRun, + watchIntervalMs: 1, + }, + ), + ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-progress-message' }); + + expect(io.stdout()).toContain('Inspecting database schema'); + expect(io.stdout()).toContain('Building embeddings 3/4 batches'); + expect(io.stdout()).toContain('warehouse'); + }); + it('supports d to detach from the progress watch view', async () => { await writeReadyProject(tempDir); await writeKtxSetupContextState(tempDir, { diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index ea23a9dc..44185f18 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -1,7 +1,6 @@ import { mkdirSync, writeFileSync } from 'node:fs'; import { access, mkdir, readdir, readFile, writeFile } from 'node:fs/promises'; import { join, resolve } from 'node:path'; -import { cancel, isCancel, select } from '@clack/prompts'; import { type KtxLocalProject, loadKtxProject, @@ -19,8 +18,10 @@ import { runContextBuild, viewStateFromSourceProgress, } from './context-build-view.js'; -import { withMenuOptionsSpacing } from './prompt-navigation.js'; -import { withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { + createKtxSetupPromptAdapter, + type KtxSetupPromptOption, +} from './setup-prompts.js'; export type KtxSetupContextBuildStatus = | 'not_started' @@ -99,7 +100,7 @@ interface KtxSetupContextWatchArgs { } export interface KtxSetupContextPromptAdapter { - select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; cancel(message: string): void; } @@ -125,19 +126,7 @@ const SCAN_REPORT_FILE = 'scan-report.json'; const DEFAULT_WATCH_INTERVAL_MS = 2_000; function createPromptAdapter(): KtxSetupContextPromptAdapter { - return { - async select(options) { - const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return 'back'; - } - return String(value); - }, - cancel(message) { - cancel(message); - }, - }; + return createKtxSetupPromptAdapter({ selectCancelValue: 'back' }); } function statePath(projectDir: string): string { @@ -228,6 +217,9 @@ function normalizeSourceProgress(value: unknown): ContextBuildSourceProgressUpda status: rec.status as 'queued' | 'running' | 'done' | 'failed', ...(typeof rec.startedAtMs === 'number' ? { startedAtMs: rec.startedAtMs } : {}), ...(typeof rec.elapsedMs === 'number' ? { elapsedMs: rec.elapsedMs } : {}), + ...(typeof rec.percent === 'number' ? { percent: rec.percent } : {}), + ...(typeof rec.message === 'string' ? { message: rec.message } : {}), + ...(typeof rec.updatedAtMs === 'number' ? { updatedAtMs: rec.updatedAtMs } : {}), ...(typeof rec.summaryText === 'string' ? { summaryText: rec.summaryText } : {}), }); } @@ -920,7 +912,16 @@ async function watchContextStatusWithProgressView( try { while (true) { if (!repainter) { - const currentKey = JSON.stringify(state.sourceProgress?.map((s) => s.status)); + const currentKey = JSON.stringify( + state.sourceProgress?.map((s) => ({ + id: s.connectionId, + status: s.status, + percent: s.percent, + message: s.message, + summaryText: s.summaryText, + updatedAtMs: s.updatedAtMs, + })), + ); if (currentKey !== lastProgressKey || !isActiveStatus(state.status)) { io.stdout.write(renderContextBuildView(viewState, viewOpts)); lastProgressKey = currentKey; diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index f697dd75..85be2620 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -3,7 +3,6 @@ import { readFile, writeFile } from 'node:fs/promises'; import { delimiter, dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; import { promisify } from 'node:util'; -import { cancel, confirm, isCancel, multiselect, password, select, text } from '@clack/prompts'; import type { HistoricSqlDialect } from '@ktx/context/ingest'; import { type KtxProjectConnectionConfig, @@ -15,10 +14,13 @@ import { import type { KtxTableListEntry } from '@ktx/context/scan'; import type { KtxCliIo } from './cli-runtime.js'; import { runKtxConnection } from './connection.js'; -import { withMenuOptionsSpacing, withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; +import { withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; import { runKtxScan } from './scan.js'; -import { withSetupInterruptConfirmation } from './setup-interrupt.js'; import { writeProjectLocalSecretReference } from './setup-secrets.js'; +import { + createKtxSetupPromptAdapter, + type KtxSetupPromptOption, +} from './setup-prompts.js'; const HISTORIC_SQL_WORK_UNIT_MAX_CONCURRENCY = 6; const execFileAsync = promisify(execFileCallback); @@ -59,11 +61,11 @@ export type KtxSetupDatabasesResult = export interface KtxSetupDatabasesPromptAdapter { multiselect(options: { message: string; - options: Array<{ value: string; label: string }>; + options: KtxSetupPromptOption[]; required?: boolean; initialValues?: string[]; }): Promise; - select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; text(options: { message: string; placeholder?: string; initialValue?: string }): Promise; password(options: { message: string }): Promise; cancel(message: string): void; @@ -207,50 +209,11 @@ function missingConnectionDetailsPrompt( } function createPromptAdapter(): KtxSetupDatabasesPromptAdapter { - return { - async multiselect(options) { - while (true) { - const value = await withSetupInterruptConfirmation(() => multiselect(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return ['back']; - } - const selected = [...value] as string[]; - if (selected.length === 0 && !options.required) { - const skipConfirmed = await confirm({ message: 'Nothing selected. Skip this step?', initialValue: false }); - if (isCancel(skipConfirmed)) { - cancel('Setup cancelled.'); - return ['back']; - } - if (!skipConfirmed) continue; - } - return selected; - } - }, - async select(options) { - const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return 'back'; - } - return String(value); - }, - async text(options) { - const value = await withSetupInterruptConfirmation(() => - text({ ...options, message: withTextInputNavigation(options.message) }), - ); - return isCancel(value) ? undefined : String(value); - }, - async password(options) { - const value = await withSetupInterruptConfirmation(() => - password({ ...options, message: withTextInputNavigation(options.message) }), - ); - return isCancel(value) ? undefined : String(value); - }, - cancel(message) { - cancel(message); - }, - }; + return createKtxSetupPromptAdapter({ + selectCancelValue: 'back', + multiselectCancelValue: 'back', + confirmEmptyOptionalMultiselect: true, + }); } function normalizeDriver(driver: string | undefined): KtxSetupDatabaseDriver | null { diff --git a/packages/cli/src/setup-demo-tour.ts b/packages/cli/src/setup-demo-tour.ts index 40bc4a24..0f0b6c8b 100644 --- a/packages/cli/src/setup-demo-tour.ts +++ b/packages/cli/src/setup-demo-tour.ts @@ -55,6 +55,7 @@ function createTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTarge failureText: null, startedAt: null, elapsedMs: 0, + progressUpdatedAtMs: null, }; } diff --git a/packages/cli/src/setup-embeddings.test.ts b/packages/cli/src/setup-embeddings.test.ts index e66aa05a..c2d5cad2 100644 --- a/packages/cli/src/setup-embeddings.test.ts +++ b/packages/cli/src/setup-embeddings.test.ts @@ -90,7 +90,7 @@ describe('setup embeddings step', () => { message: EMBEDDING_OPTION_PROMPT_MESSAGE, options: [ { value: 'sentence-transformers', label: 'Local sentence-transformers embeddings' }, - { value: 'openai', label: 'OpenAI embeddings (recommended)' }, + { value: 'openai', label: 'OpenAI embeddings', hint: 'recommended' }, { value: 'back', label: 'Back' }, ], }); @@ -136,6 +136,7 @@ describe('setup embeddings step', () => { const spinnerEvents: string[] = []; const spinner = vi.fn(() => ({ start: (msg: string) => spinnerEvents.push(`start:${msg}`), + message: (msg: string) => spinnerEvents.push(`message:${msg}`), stop: (msg: string) => spinnerEvents.push(`stop:${msg}`), error: (msg: string) => spinnerEvents.push(`error:${msg}`), })); @@ -193,6 +194,7 @@ describe('setup embeddings step', () => { const spinnerEvents: string[] = []; const spinner = vi.fn(() => ({ start: (msg: string) => spinnerEvents.push(`start:${msg}`), + message: (msg: string) => spinnerEvents.push(`message:${msg}`), stop: (msg: string) => spinnerEvents.push(`stop:${msg}`), error: (msg: string) => spinnerEvents.push(`error:${msg}`), })); diff --git a/packages/cli/src/setup-embeddings.ts b/packages/cli/src/setup-embeddings.ts index ba3333f1..d9b43a75 100644 --- a/packages/cli/src/setup-embeddings.ts +++ b/packages/cli/src/setup-embeddings.ts @@ -1,5 +1,4 @@ import { writeFile } from 'node:fs/promises'; -import { cancel, isCancel, password, select } from '@clack/prompts'; import { resolveKtxConfigReference } from '@ktx/context/core'; import { type KtxProjectConfig, @@ -19,9 +18,12 @@ import { type ManagedLocalEmbeddingsDaemon, } from './managed-local-embeddings.js'; import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; -import { withMenuOptionsSpacing, withTextInputNavigation } from './prompt-navigation.js'; -import { withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { withTextInputNavigation } from './prompt-navigation.js'; import { envCredentialReference, writeProjectLocalSecretReference } from './setup-secrets.js'; +import { + createKtxSetupPromptAdapter, + type KtxSetupPromptOption, +} from './setup-prompts.js'; export type KtxSetupEmbeddingBackend = 'openai' | 'sentence-transformers'; @@ -46,7 +48,7 @@ export type KtxSetupEmbeddingsResult = | { status: 'failed'; projectDir: string }; export interface KtxSetupEmbeddingsPromptAdapter { - select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; password(options: { message: string }): Promise; cancel(message: string): void; } @@ -85,25 +87,7 @@ const EMBEDDING_OPTION_PROMPT_CONTEXT = const LOCAL_EMBEDDING_HEALTH_TIMEOUT_MS = 120_000; function createPromptAdapter(): KtxSetupEmbeddingsPromptAdapter { - return { - async select(options) { - const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return 'back'; - } - return value; - }, - async password(options) { - const value = await withSetupInterruptConfirmation(() => - password({ ...options, message: withTextInputNavigation(options.message) }), - ); - return isCancel(value) ? undefined : value; - }, - cancel(message) { - cancel(message); - }, - }; + return createKtxSetupPromptAdapter({ selectCancelValue: 'back' }); } async function hasCompletedEmbeddings(projectDir: string, config: KtxProjectConfig): Promise { @@ -293,7 +277,7 @@ async function chooseEmbeddingBackend( message: `Which embedding option should KTX use?\n\n${EMBEDDING_OPTION_PROMPT_CONTEXT}`, options: [ { value: 'sentence-transformers', label: 'Local sentence-transformers embeddings' }, - { value: 'openai', label: 'OpenAI embeddings (recommended)' }, + { value: 'openai', label: 'OpenAI embeddings', hint: 'recommended' }, { value: 'back', label: 'Back' }, ], }); diff --git a/packages/cli/src/setup-models.test.ts b/packages/cli/src/setup-models.test.ts index 2e83ade2..e310ea90 100644 --- a/packages/cli/src/setup-models.test.ts +++ b/packages/cli/src/setup-models.test.ts @@ -140,7 +140,7 @@ describe('setup Anthropic model step', () => { expect.objectContaining({ message: expect.stringContaining('Which Anthropic model should KTX use?'), options: [ - { value: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6 (recommended)' }, + { value: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', hint: 'recommended' }, { value: 'claude-opus-4-6', label: 'Claude Opus 4.6' }, { value: 'claude-haiku-4-5', label: 'Claude Haiku 4.5' }, { value: 'manual', label: 'Enter a model ID manually' }, @@ -763,7 +763,7 @@ describe('setup Anthropic model step', () => { expect.objectContaining({ message: expect.stringContaining('Which Anthropic model should KTX use?'), options: expect.arrayContaining([ - { value: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6 (recommended)' }, + { value: 'claude-sonnet-4-6', label: 'Claude Sonnet 4.6', hint: 'recommended' }, ]), }), ); diff --git a/packages/cli/src/setup-models.ts b/packages/cli/src/setup-models.ts index 37ebdeec..bd05bd44 100644 --- a/packages/cli/src/setup-models.ts +++ b/packages/cli/src/setup-models.ts @@ -1,7 +1,6 @@ import { execFile, spawn } from 'node:child_process'; import { writeFile } from 'node:fs/promises'; import { promisify } from 'node:util'; -import { cancel, isCancel, password, select, text } from '@clack/prompts'; import { resolveLocalKtxLlmConfig } from '@ktx/context'; import { resolveKtxConfigReference } from '@ktx/context/core'; import { @@ -13,9 +12,12 @@ import { } from '@ktx/context/project'; import { type KtxLlmConfig, type KtxLlmHealthCheckResult, runKtxLlmHealthCheck } from '@ktx/llm'; import type { KtxCliIo } from './cli-runtime.js'; -import { withMenuOptionsSpacing, withTextInputNavigation } from './prompt-navigation.js'; -import { withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { withTextInputNavigation } from './prompt-navigation.js'; import { envCredentialReference, writeProjectLocalSecretReference } from './setup-secrets.js'; +import { + createKtxSetupPromptAdapter, + type KtxSetupPromptOption, +} from './setup-prompts.js'; export interface KtxSetupModelArgs { projectDir: string; @@ -47,7 +49,7 @@ export interface AnthropicModelChoice { export type KtxSetupLlmBackend = 'anthropic' | 'vertex'; export interface KtxSetupModelPromptAdapter { - select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; text(options: { message: string; placeholder?: string }): Promise; password(options: { message: string }): Promise; cancel(message: string): void; @@ -145,31 +147,7 @@ interface GcloudProjectChoice { type GcloudCommandRunner = (args: string[], io: KtxCliIo) => Promise; function createPromptAdapter(): KtxSetupModelPromptAdapter { - return { - async select(options) { - const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return 'back'; - } - return value; - }, - async text(options) { - const value = await withSetupInterruptConfirmation(() => - text({ ...options, message: withTextInputNavigation(options.message) }), - ); - return isCancel(value) ? undefined : value; - }, - async password(options) { - const value = await withSetupInterruptConfirmation(() => - password({ ...options, message: withTextInputNavigation(options.message) }), - ); - return isCancel(value) ? undefined : value; - }, - cancel(message) { - cancel(message); - }, - }; + return createKtxSetupPromptAdapter({ selectCancelValue: 'back' }); } function createIndentedCommandIo(io: KtxCliIo): KtxCliIo { @@ -786,7 +764,8 @@ async function chooseModel( const modelOptions = [ ...selectableModels.map((model) => ({ value: model.id, - label: `${model.label || model.id}${model.recommended ? ' (recommended)' : ''}`, + label: model.label || model.id, + ...(model.recommended ? { hint: 'recommended' } : {}), })), { value: 'manual', label: 'Enter a model ID manually' }, { value: 'back', label: 'Back' }, @@ -827,7 +806,8 @@ async function chooseVertexModel(args: KtxSetupModelArgs, io: KtxCliIo, deps: Kt options: [ ...selectableModels.map((model) => ({ value: model.id, - label: `${model.label || model.id}${model.recommended ? ' (recommended)' : ''}`, + label: model.label || model.id, + ...(model.recommended ? { hint: 'recommended' } : {}), })), { value: 'manual', label: 'Enter a model ID manually' }, { value: 'back', label: 'Back' }, diff --git a/packages/cli/src/setup-project.ts b/packages/cli/src/setup-project.ts index a6b4ca71..fa2dd3ed 100644 --- a/packages/cli/src/setup-project.ts +++ b/packages/cli/src/setup-project.ts @@ -2,7 +2,6 @@ import { existsSync } from 'node:fs'; import { mkdir, readdir, readFile, stat, writeFile } from 'node:fs/promises'; import { homedir } from 'node:os'; import { basename, join, resolve } from 'node:path'; -import { cancel, isCancel, select, text } from '@clack/prompts'; import { initKtxProject, type KtxLocalProject, @@ -13,8 +12,11 @@ import { } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import { gray } from './io/symbols.js'; -import { withMenuOptionsSpacing, withTextInputNavigation } from './prompt-navigation.js'; -import { withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { withTextInputNavigation } from './prompt-navigation.js'; +import { + createKtxSetupPromptAdapter, + type KtxSetupPromptOption, +} from './setup-prompts.js'; export type KtxSetupProjectMode = 'auto' | 'new' | 'existing' | 'prompt-new'; export type KtxSetupInputMode = 'auto' | 'disabled'; @@ -34,7 +36,7 @@ export type KtxSetupProjectResult = | { status: 'missing-input'; projectDir: string }; export interface KtxSetupProjectPromptAdapter { - select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; text(options: { message: string; placeholder?: string }): Promise; cancel(message: string): void; } @@ -55,28 +57,7 @@ type PromptProjectDirResult = const DEFAULT_NEW_PROJECT_FOLDER_NAME = 'ktx-project'; function createClackSetupProjectPromptAdapter(): KtxSetupProjectPromptAdapter { - return { - async select(options) { - const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return 'exit'; - } - return value; - }, - async text(options) { - const value = await withSetupInterruptConfirmation(() => - text({ ...options, message: withTextInputNavigation(options.message) }), - ); - if (isCancel(value)) { - return undefined; - } - return value; - }, - cancel(message) { - cancel(message); - }, - }; + return createKtxSetupPromptAdapter({ selectCancelValue: 'exit' }); } function hasProjectConfig(projectDir: string): boolean { diff --git a/packages/cli/src/setup-prompts.test.ts b/packages/cli/src/setup-prompts.test.ts new file mode 100644 index 00000000..23ffd669 --- /dev/null +++ b/packages/cli/src/setup-prompts.test.ts @@ -0,0 +1,205 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { + createKtxSetupPromptAdapter, + type KtxSetupPromptOption, +} from './setup-prompts.js'; + +const mocks = vi.hoisted(() => { + const cancelSymbol = Symbol('cancel'); + return { + cancelSymbol, + cancel: vi.fn(), + confirm: vi.fn(), + intro: vi.fn(), + isCancel: vi.fn((value: unknown): value is symbol => value === cancelSymbol), + log: { info: vi.fn() }, + multiselect: vi.fn(), + note: vi.fn(), + password: vi.fn(), + select: vi.fn(), + text: vi.fn(), + withSetupInterruptConfirmation: vi.fn((prompt: () => Promise) => prompt()), + }; +}); + +vi.mock('@clack/prompts', () => ({ + cancel: mocks.cancel, + confirm: mocks.confirm, + intro: mocks.intro, + isCancel: mocks.isCancel, + log: mocks.log, + multiselect: mocks.multiselect, + note: mocks.note, + password: mocks.password, + select: mocks.select, + text: mocks.text, +})); + +vi.mock('./setup-interrupt.js', () => ({ + withSetupInterruptConfirmation: mocks.withSetupInterruptConfirmation, +})); + +describe('setup prompt adapter', () => { + beforeEach(() => { + mocks.cancel.mockReset(); + mocks.confirm.mockReset(); + mocks.intro.mockReset(); + mocks.isCancel.mockClear(); + mocks.log.info.mockReset(); + mocks.multiselect.mockReset(); + mocks.note.mockReset(); + mocks.password.mockReset(); + mocks.select.mockReset(); + mocks.text.mockReset(); + mocks.withSetupInterruptConfirmation.mockClear(); + }); + + it('passes select hint and disabled options through Clack and delegates cancellation handling', async () => { + mocks.select.mockResolvedValueOnce('openai'); + const adapter = createKtxSetupPromptAdapter({ selectCancelValue: 'back' }); + const options: KtxSetupPromptOption[] = [ + { value: 'local', label: 'Local embeddings', disabled: true }, + { value: 'openai', label: 'OpenAI embeddings', hint: 'recommended' }, + ]; + + await expect( + adapter.select({ + message: 'Which embedding option should KTX use?\n\nKTX uses embeddings for search.', + options, + }), + ).resolves.toBe('openai'); + + expect(mocks.withSetupInterruptConfirmation).toHaveBeenCalledTimes(1); + expect(mocks.select).toHaveBeenCalledWith({ + message: 'Which embedding option should KTX use?\n\nKTX uses embeddings for search.\n', + options, + }); + }); + + it('maps select cancellation to the configured sentinel', async () => { + mocks.select.mockResolvedValueOnce(mocks.cancelSymbol); + const adapter = createKtxSetupPromptAdapter({ + selectCancelValue: 'exit', + cancelOnSelectCancel: false, + }); + + await expect(adapter.select({ message: 'What do you want to do?', options: [] })).resolves.toBe('exit'); + + expect(mocks.cancel).not.toHaveBeenCalled(); + }); + + it('decorates text and password prompts with setup navigation copy', async () => { + mocks.text.mockResolvedValueOnce('analytics-ktx'); + mocks.password.mockResolvedValueOnce('secret'); + const adapter = createKtxSetupPromptAdapter({ selectCancelValue: 'back' }); + + await expect(adapter.text({ message: 'Project folder path', placeholder: './analytics-ktx' })).resolves.toBe( + 'analytics-ktx', + ); + await expect(adapter.password({ message: 'Anthropic API key' })).resolves.toBe('secret'); + + expect(mocks.text).toHaveBeenCalledWith({ + message: 'Project folder path\n│ Press Escape to go back.\n│', + placeholder: './analytics-ktx', + }); + expect(mocks.password).toHaveBeenCalledWith({ + message: 'Anthropic API key\n│ Press Escape to go back.\n│', + }); + }); + + it('passes multiselect hint and disabled options through Clack', async () => { + mocks.multiselect.mockResolvedValueOnce(['postgres']); + const adapter = createKtxSetupPromptAdapter({ + selectCancelValue: 'back', + multiselectCancelValue: 'back', + confirmEmptyOptionalMultiselect: true, + }); + const options: KtxSetupPromptOption[] = [ + { value: 'postgres', label: 'PostgreSQL', hint: 'recommended' }, + { value: 'snowflake', label: 'Snowflake', disabled: true }, + ]; + + await expect(adapter.multiselect({ message: 'Which primary sources?', options, required: true })).resolves.toEqual([ + 'postgres', + ]); + + expect(mocks.multiselect).toHaveBeenCalledWith({ + message: 'Which primary sources?', + options, + required: true, + }); + }); + + it('confirms an empty optional multiselect and retries when skip is declined', async () => { + mocks.multiselect.mockResolvedValueOnce([]).mockResolvedValueOnce(['postgres']); + mocks.confirm.mockResolvedValueOnce(false); + const adapter = createKtxSetupPromptAdapter({ + selectCancelValue: 'back', + multiselectCancelValue: 'back', + confirmEmptyOptionalMultiselect: true, + }); + + await expect(adapter.multiselect({ message: 'Which primary sources?', options: [], required: false })).resolves.toEqual([ + 'postgres', + ]); + + expect(mocks.confirm).toHaveBeenCalledWith({ message: 'Nothing selected. Skip this step?', initialValue: false }); + expect(mocks.multiselect).toHaveBeenCalledTimes(2); + }); + + it('maps multiselect cancellation to the configured back value', async () => { + mocks.multiselect.mockResolvedValueOnce(mocks.cancelSymbol); + const adapter = createKtxSetupPromptAdapter({ + selectCancelValue: 'back', + multiselectCancelValue: 'back', + confirmEmptyOptionalMultiselect: true, + }); + + await expect(adapter.multiselect({ message: 'Which primary sources?', options: [] })).resolves.toEqual(['back']); + + expect(mocks.cancel).toHaveBeenCalledWith('Setup cancelled.'); + }); + + it('keeps setup intro and note plain for non-stream output', async () => { + const { createKtxSetupUiAdapter } = await import('./setup-prompts.js'); + const chunks: string[] = []; + const io = { + stdout: { + isTTY: true, + write(chunk: string) { + chunks.push(chunk); + }, + }, + stderr: { write: vi.fn() }, + }; + + const ui = createKtxSetupUiAdapter(); + ui.intro('KTX setup', io); + ui.note(' $ ktx status', 'What you can do next', io); + + expect(chunks.join('')).toBe('KTX setup\n\nWhat you can do next:\n $ ktx status\n'); + expect(mocks.intro).not.toHaveBeenCalled(); + expect(mocks.note).not.toHaveBeenCalled(); + }); + + it('uses Clack intro and note for writable TTY output', async () => { + const { createKtxSetupUiAdapter } = await import('./setup-prompts.js'); + const output = { + columns: 80, + isTTY: true, + on: vi.fn(), + write: vi.fn(), + }; + const io = { + stdout: output, + stderr: { write: vi.fn() }, + }; + + const ui = createKtxSetupUiAdapter(); + ui.intro('KTX setup', io); + ui.note(' $ ktx status', 'What you can do next', io); + + expect(mocks.intro).toHaveBeenCalledWith('KTX setup', { output }); + expect(mocks.note).toHaveBeenCalledWith(' $ ktx status', 'What you can do next', { output }); + }); +}); diff --git a/packages/cli/src/setup-prompts.ts b/packages/cli/src/setup-prompts.ts new file mode 100644 index 00000000..ad97ec48 --- /dev/null +++ b/packages/cli/src/setup-prompts.ts @@ -0,0 +1,172 @@ +import type { Writable } from 'node:stream'; +import { + cancel, + confirm, + intro, + isCancel, + log, + multiselect, + note, + password, + select, + text, +} from '@clack/prompts'; +import type { KtxCliIo } from './cli-runtime.js'; +import { withMenuOptionsSpacing, withTextInputNavigation } from './prompt-navigation.js'; +import { withSetupInterruptConfirmation } from './setup-interrupt.js'; + +export interface KtxSetupPromptOption { + value: Value; + label: string; + hint?: string; + disabled?: boolean; +} + +interface KtxSetupSelectOptions { + message: string; + options: Array>; + initialValue?: Value; + maxItems?: number; +} + +interface KtxSetupMultiselectOptions { + message: string; + options: Array>; + required?: boolean; + initialValues?: Value[]; + maxItems?: number; + cursorAt?: Value; +} + +interface KtxSetupTextOptions { + message: string; + placeholder?: string; + initialValue?: string; + defaultValue?: string; +} + +interface KtxSetupPasswordOptions { + message: string; + mask?: string; +} + +export interface KtxSetupPromptAdapter { + select(options: KtxSetupSelectOptions): Promise; + multiselect(options: KtxSetupMultiselectOptions): Promise; + text(options: KtxSetupTextOptions): Promise; + password(options: KtxSetupPasswordOptions): Promise; + cancel(message: string): void; + log(message: string): void; +} + +export interface KtxSetupPromptAdapterOptions { + selectCancelValue: 'back' | 'exit'; + multiselectCancelValue?: 'back'; + confirmEmptyOptionalMultiselect?: boolean; + cancelOnSelectCancel?: boolean; + cancelOnMultiselectCancel?: boolean; + cancelMessage?: string; +} + +const DEFAULT_SETUP_CANCEL_MESSAGE = 'Setup cancelled.'; + +export function createKtxSetupPromptAdapter(options: KtxSetupPromptAdapterOptions): KtxSetupPromptAdapter { + const cancelMessage = options.cancelMessage ?? DEFAULT_SETUP_CANCEL_MESSAGE; + const cancelOnSelectCancel = options.cancelOnSelectCancel ?? true; + const cancelOnMultiselectCancel = options.cancelOnMultiselectCancel ?? true; + const multiselectCancelValue = options.multiselectCancelValue ?? 'back'; + + return { + async select(promptOptions) { + const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(promptOptions))); + if (isCancel(value)) { + if (cancelOnSelectCancel) { + cancel(cancelMessage); + } + return options.selectCancelValue; + } + return String(value); + }, + async multiselect(promptOptions) { + while (true) { + const value = await withSetupInterruptConfirmation(() => multiselect(withMenuOptionsSpacing(promptOptions))); + if (isCancel(value)) { + if (cancelOnMultiselectCancel) { + cancel(cancelMessage); + } + return [multiselectCancelValue]; + } + const selected = [...value].map(String); + if ( + selected.length === 0 && + !promptOptions.required && + options.confirmEmptyOptionalMultiselect === true + ) { + const skipConfirmed = await confirm({ + message: 'Nothing selected. Skip this step?', + initialValue: false, + }); + if (isCancel(skipConfirmed)) { + cancel(cancelMessage); + return [multiselectCancelValue]; + } + if (!skipConfirmed) { + continue; + } + } + return selected; + } + }, + async text(promptOptions) { + const value = await withSetupInterruptConfirmation(() => + text({ ...promptOptions, message: withTextInputNavigation(promptOptions.message) }), + ); + return isCancel(value) ? undefined : String(value); + }, + async password(promptOptions) { + const value = await withSetupInterruptConfirmation(() => + password({ ...promptOptions, message: withTextInputNavigation(promptOptions.message) }), + ); + return isCancel(value) ? undefined : String(value); + }, + cancel(message) { + cancel(message); + }, + log(message) { + log.info(message); + }, + }; +} + +export interface KtxSetupUiAdapter { + intro(title: string, io: KtxCliIo): void; + note(message: string, title: string, io: KtxCliIo): void; +} + +function isWritableTtyOutput(output: KtxCliIo['stdout']): output is KtxCliIo['stdout'] & Writable { + return ( + output.isTTY === true && + typeof (output as { on?: unknown }).on === 'function' && + typeof (output as { columns?: unknown }).columns !== 'undefined' + ); +} + +export function createKtxSetupUiAdapter(): KtxSetupUiAdapter { + return { + intro(title, io) { + if (isWritableTtyOutput(io.stdout)) { + intro(title, { output: io.stdout }); + return; + } + io.stdout.write(`${title}\n`); + }, + note(message, title, io) { + if (isWritableTtyOutput(io.stdout)) { + note(message, title, { output: io.stdout }); + return; + } + io.stdout.write(`\n${title}:\n`); + io.stdout.write(`${message}\n`); + }, + }; +} diff --git a/packages/cli/src/setup-ready-menu.ts b/packages/cli/src/setup-ready-menu.ts index a101e45a..c975d991 100644 --- a/packages/cli/src/setup-ready-menu.ts +++ b/packages/cli/src/setup-ready-menu.ts @@ -1,12 +1,13 @@ -import { cancel, isCancel, select } from '@clack/prompts'; -import { withMenuOptionsSpacing } from './prompt-navigation.js'; +import { + createKtxSetupPromptAdapter, + type KtxSetupPromptOption, +} from './setup-prompts.js'; import type { KtxSetupStatus } from './setup.js'; -import { withSetupInterruptConfirmation } from './setup-interrupt.js'; export type KtxSetupReadyAction = 'models' | 'embeddings' | 'databases' | 'sources' | 'context' | 'agents' | 'exit'; export interface KtxSetupReadyMenuPromptAdapter { - select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; cancel(message: string): void; } @@ -30,19 +31,7 @@ export function isKtxSetupReady(status: KtxSetupStatus): boolean { } function createPromptAdapter(): KtxSetupReadyMenuPromptAdapter { - return { - async select(options) { - const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return 'exit'; - } - return String(value); - }, - cancel(message) { - cancel(message); - }, - }; + return createKtxSetupPromptAdapter({ selectCancelValue: 'exit' }); } export async function runKtxSetupReadyChangeMenu( diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index 0561b0e2..d18004d9 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -2,7 +2,6 @@ import { mkdtemp, readdir, readFile, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join, relative, resolve } from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; -import { cancel, confirm, isCancel, log, multiselect, password, select, text } from '@clack/prompts'; import { localConnectionTypeForConfig, resolveNotionAuthToken } from '@ktx/context/connections'; import { resolveKtxConfigReference } from '@ktx/context/core'; import { @@ -29,10 +28,13 @@ import { import type { KtxCliIo } from './cli-runtime.js'; import { pickNotionRootPages } from './notion-page-picker.js'; import { runKtxSourceMapping } from './source-mapping.js'; -import { withMenuOptionsSpacing, withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; +import { withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; import { runKtxPublicIngest } from './public-ingest.js'; -import { withSetupInterruptConfirmation } from './setup-interrupt.js'; import { writeProjectLocalSecretReference } from './setup-secrets.js'; +import { + createKtxSetupPromptAdapter, + type KtxSetupPromptOption, +} from './setup-prompts.js'; export type KtxSetupSourceType = 'dbt' | 'metricflow' | 'metabase' | 'looker' | 'lookml' | 'notion'; @@ -73,11 +75,11 @@ export type KtxSetupSourcesResult = export interface KtxSetupSourcesPromptAdapter { multiselect(options: { message: string; - options: Array<{ value: string; label: string; hint?: string }>; + options: KtxSetupPromptOption[]; initialValues?: string[]; required?: boolean; }): Promise; - select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; text(options: { message: string; placeholder?: string; initialValue?: string }): Promise; password(options: { message: string }): Promise; cancel(message: string): void; @@ -135,53 +137,11 @@ const PRIMARY_SOURCE_DRIVERS = new Set([ ]); function createPromptAdapter(): KtxSetupSourcesPromptAdapter { - return { - async multiselect(options) { - while (true) { - const value = await withSetupInterruptConfirmation(() => multiselect(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return ['back']; - } - const selected = [...value] as string[]; - if (selected.length === 0 && !options.required) { - const skipConfirmed = await confirm({ message: 'Nothing selected. Skip this step?', initialValue: false }); - if (isCancel(skipConfirmed)) { - cancel('Setup cancelled.'); - return ['back']; - } - if (!skipConfirmed) continue; - } - return selected; - } - }, - async select(options) { - const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - cancel('Setup cancelled.'); - return 'back'; - } - return String(value); - }, - async text(options) { - const value = await withSetupInterruptConfirmation(() => - text({ ...options, message: withTextInputNavigation(options.message) }), - ); - return isCancel(value) ? undefined : String(value); - }, - async password(options) { - const value = await withSetupInterruptConfirmation(() => - password({ ...options, message: withTextInputNavigation(options.message) }), - ); - return isCancel(value) ? undefined : String(value); - }, - cancel(message) { - cancel(message); - }, - log(message) { - log.info(message); - }, - }; + return createKtxSetupPromptAdapter({ + selectCancelValue: 'back', + multiselectCancelValue: 'back', + confirmEmptyOptionalMultiselect: true, + }); } function isRecord(value: unknown): value is Record { diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 0dc0d7cd..ee7d227e 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -1,6 +1,5 @@ import { existsSync } from 'node:fs'; import { join, resolve } from 'node:path'; -import { cancel, isCancel, select } from '@clack/prompts'; import { getLatestLocalIngestStatus, savedMemoryCountsForReport } from '@ktx/context/ingest'; import { ktxLocalStateDbPath, @@ -10,7 +9,7 @@ import { } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import { formatSetupNextStepLines } from './next-steps.js'; -import { isKtxSetupExitError, withSetupInterruptConfirmation } from './setup-interrupt.js'; +import { isKtxSetupExitError } from './setup-interrupt.js'; import { type KtxAgentScope, type KtxAgentTarget, @@ -38,7 +37,12 @@ import { runKtxSetupReadyChangeMenu, } from './setup-ready-menu.js'; import { type KtxSetupSourcesDeps, type KtxSetupSourceType, runKtxSetupSourcesStep } from './setup-sources.js'; -import { withMenuOptionsSpacing } from './prompt-navigation.js'; +import { + createKtxSetupPromptAdapter, + createKtxSetupUiAdapter, + type KtxSetupPromptOption, + type KtxSetupUiAdapter, +} from './setup-prompts.js'; import { readKtxSetupContextState, type KtxSetupContextDeps, @@ -147,6 +151,7 @@ export interface KtxSetupDeps { contextDeps?: KtxSetupContextDeps; readyMenuDeps?: KtxSetupReadyMenuDeps; entryMenuDeps?: KtxSetupEntryMenuDeps; + setupUi?: KtxSetupUiAdapter; } const SOURCE_DRIVERS = new Set(['dbt', 'metricflow', 'metabase', 'looker', 'lookml', 'notion']); @@ -164,7 +169,7 @@ type KtxSetupFlowStatus = | 'interrupted'; export interface KtxSetupEntryMenuPromptAdapter { - select(options: { message: string; options: Array<{ value: string; label: string }> }): Promise; + select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; cancel(message: string): void; } @@ -173,18 +178,10 @@ export interface KtxSetupEntryMenuDeps { } function createEntryMenuPromptAdapter(): KtxSetupEntryMenuPromptAdapter { - return { - async select(options) { - const value = await withSetupInterruptConfirmation(() => select(withMenuOptionsSpacing(options))); - if (isCancel(value)) { - return 'exit'; - } - return String(value); - }, - cancel(message) { - cancel(message); - }, - }; + return createKtxSetupPromptAdapter({ + selectCancelValue: 'exit', + cancelOnSelectCancel: false, + }); } async function runKtxSetupEntryMenu( @@ -448,7 +445,8 @@ export async function runKtxSetup(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSet } async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetupDeps = {}): Promise { - io.stdout.write('KTX setup\n'); + const setupUi = deps.setupUi ?? createKtxSetupUiAdapter(); + setupUi.intro('KTX setup', io); let entryAction: KtxSetupEntryAction | undefined; let projectResult: Awaited>; const canShowEntryMenu = @@ -745,14 +743,15 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup const status = await readKtxSetupStatus(projectResult.projectDir); io.stdout.write(formatKtxSetupStatus(status)); - io.stdout.write('\nWhat you can do next:\n'); - io.stdout.write( - `${formatSetupNextStepLines({ + setupUi.note( + formatSetupNextStepLines({ setupReady: setupStatusReady(status), hasContextTargets: setupHasContextTargets(status), contextReady: setupContextReady(status), agentIntegrationReady: status.agents.some((agent) => agent.ready), - }).join('\n')}\n`, + }).join('\n'), + 'What you can do next', + io, ); return 0; } From e50fef851f4c717a228836ebf0a61e85ddfb400a Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Wed, 13 May 2026 09:16:35 -0700 Subject: [PATCH 15/15] fix(cli): hide setup project banner --- packages/cli/src/cli-program.ts | 4 ++++ packages/cli/src/index.test.ts | 16 ++++++++++++++++ packages/cli/src/project-dir.test.ts | 2 +- packages/cli/src/standalone-smoke.test.ts | 10 +++++++--- scripts/package-artifacts.mjs | 4 ++-- 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/packages/cli/src/cli-program.ts b/packages/cli/src/cli-program.ts index 69437aec..efe2e5bb 100644 --- a/packages/cli/src/cli-program.ts +++ b/packages/cli/src/cli-program.ts @@ -162,6 +162,10 @@ function shouldSuppressProjectDirLine(path: string[], options: Record { expect(testIo.stderr()).toBe(`Project: ${tempDir}\n`); }); + it('does not print the command-level project directory line for setup', async () => { + const setup = vi.fn(async () => 0); + const testIo = makeIo(); + + await expect(runKtxCli(['--project-dir', tempDir, 'setup', '--no-input'], testIo.io, { setup })).resolves.toBe(0); + + expect(setup).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + projectDir: tempDir, + }), + testIo.io, + ); + expect(testIo.stderr()).toBe(''); + }); + it('skips the project directory line for JSON and TUI output modes', async () => { const ingest = vi.fn(async () => 0); const jsonIo = makeIo(); diff --git a/packages/cli/src/project-dir.test.ts b/packages/cli/src/project-dir.test.ts index c0022d4d..02502b35 100644 --- a/packages/cli/src/project-dir.test.ts +++ b/packages/cli/src/project-dir.test.ts @@ -65,7 +65,7 @@ describe('project directory defaults', () => { argv: ['setup', '--no-input'], spy: setup, expected: { command: 'run', projectDir: '/tmp/ktx-env-project' }, - expectedStderr: 'Project: /tmp/ktx-env-project\n', + expectedStderr: '', }, { argv: ['scan', 'warehouse'], diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts index c6fefd96..6f878617 100644 --- a/packages/cli/src/standalone-smoke.test.ts +++ b/packages/cli/src/standalone-smoke.test.ts @@ -130,6 +130,10 @@ function expectProjectStderr(result: CliResult, projectDir: string): void { expect(result).toMatchObject({ code: 0, stderr: `Project: ${projectDir}\n` }); } +function expectSetupStderr(result: CliResult): void { + expect(result).toMatchObject({ code: 0, stderr: '' }); +} + async function runSetupNewProject(projectDir: string): Promise { return await runBuiltCli([ 'setup', @@ -162,7 +166,7 @@ describe('standalone built ktx CLI smoke', () => { const sourceDir = join(tempDir, 'source'); const init = await runSetupNewProject(projectDir); - expectProjectStderr(init, projectDir); + expectSetupStderr(init); expect(init.stdout).toContain(`Project: ${projectDir}`); await writeWarehouseConfig(projectDir); @@ -207,7 +211,7 @@ describe('standalone built ktx CLI smoke', () => { it('runs structural and enriched scans through the built binary with manifest artifacts', async () => { const projectDir = join(tempDir, 'scan-project'); const init = await runSetupNewProject(projectDir); - expectProjectStderr(init, projectDir); + expectSetupStderr(init); const dbPath = join(projectDir, 'warehouse.db'); createSqliteWarehouse(dbPath); @@ -310,7 +314,7 @@ describe('standalone built ktx CLI smoke', () => { it('rejects the removed connection add command through the built binary', async () => { const projectDir = join(tempDir, 'notion-project'); const init = await runSetupNewProject(projectDir); - expectProjectStderr(init, projectDir); + expectSetupStderr(init); const add = await runBuiltCli([ 'connection', diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index b74b4277..e7998c58 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -618,7 +618,7 @@ try { '--skip-sources', '--skip-agents', ]); - requireProjectStderr('ktx setup', init, projectDir); + requireSuccess('ktx setup', init); requireOutput('ktx setup', init, /Project: /); const emptyProjectDir = join(root, 'empty-project'); @@ -637,7 +637,7 @@ try { '--skip-sources', '--skip-agents', ]); - requireProjectStderr('ktx setup empty project', emptyInit, emptyProjectDir); + requireSuccess('ktx setup empty project', emptyInit); await writeFile( join(projectDir, 'ktx.yaml'), [