diff --git a/AGENTS.md b/AGENTS.md index 4a235864..86dde8e9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -187,6 +187,11 @@ normal development workflow is agent-based. - Use `@clack/prompts` for interactive flows. Always handle cancellation with `isCancel` plus `cancel`, stop active spinners before exiting, and keep prompts grouped or factored so multi-step setup flows share cancellation behavior. +- When CLI behavior is shared by the `ktx setup` wizard and other `ktx` + commands, reuse or extract components in `packages/cli/src` instead of + duplicating setup-only logic. Prefer neutral helpers such as `clack.ts`, + `prompt-navigation.ts`, and command-independent prompt adapters over imports + from setup command internals. - Keep command behavior scriptable: prefer flags and config over prompts when values are supplied, and reserve prompts for interactive missing input or explicit setup flows. diff --git a/README.md b/README.md index b9923b38..44d7c395 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ KTX project: /home/user/analytics Project ready: yes LLM ready: yes (claude-sonnet-4-6) Embeddings ready: yes (text-embedding-3-small) -Primary sources configured: yes (postgres-warehouse) +Databases configured: yes (postgres-warehouse) Context sources configured: yes (dbt-main) KTX context built: yes Agent integration ready: yes (claude-code:project) @@ -114,7 +114,7 @@ my-project/ │ └── local/ ├── raw-sources/ │ └── warehouse/ -│ └── live-database/ # Scan artifacts and reports +│ └── / # Database ingest artifacts and reports └── .ktx/ └── db.sqlite # Local state (git-ignored) ``` @@ -123,14 +123,13 @@ Semantic sources and wiki pages are committed to git. The `.ktx/` directory holds ephemeral state and is git-ignored — delete it and KTX rebuilds on the next run. -### Scan the demo warehouse +### Build demo warehouse context -Scan artifacts are written under -`raw-sources/warehouse/live-database//` in the project directory. +Database ingest artifacts are written under `raw-sources/warehouse//` +in the project directory. ```bash -SCAN_OUTPUT="$(ktx scan warehouse --project-dir "$PROJECT_DIR")" -printf '%s\n' "$SCAN_OUTPUT" +ktx ingest warehouse --project-dir "$PROJECT_DIR" --fast ktx status --project-dir "$PROJECT_DIR" ``` @@ -219,9 +218,7 @@ KTX provider. Enable it with an environment flag when running an LLM-backed command: ```bash -KTX_AI_DEVTOOLS_ENABLED=true ktx ingest run \ - --connection-id warehouse \ - --adapter metabase +KTX_AI_DEVTOOLS_ENABLED=true ktx ingest warehouse --project-dir "$PROJECT_DIR" --deep ``` Traces are written to `.devtools/generations.json` under the current working diff --git a/biome.json b/biome.json index 35c6d596..2d5ea8e1 100644 --- a/biome.json +++ b/biome.json @@ -17,6 +17,7 @@ "!**/dist/**", "!**/coverage/**", "!**/.next/**", + "!**/.source/**", "!**/node_modules/**", "!**/*.gen.ts", "!**/*.generated.ts" diff --git a/docs-site/app/docs/[[...slug]]/page.tsx b/docs-site/app/docs/[[...slug]]/page.tsx index 1e0c18ad..d1ae21d4 100644 --- a/docs-site/app/docs/[[...slug]]/page.tsx +++ b/docs-site/app/docs/[[...slug]]/page.tsx @@ -9,6 +9,7 @@ import { notFound, redirect } from "next/navigation"; import defaultMdxComponents from "fumadocs-ui/mdx"; import { CodeBlock } from "@/components/code-block"; import { DocsPageActions } from "@/components/docs-page-actions"; +import { readDocsPageMarkdown } from "@/lib/docs-markdown"; const docsIndexPath = "/docs/getting-started/introduction"; const docsIndexSlug = ["getting-started", "introduction"] as const; @@ -33,6 +34,7 @@ export default async function Page(props: { if (!page) notFound(); const MDX = page.data.body; + const mdxSource = await readDocsPageMarkdown(page.slugs); const hero = isHeroPage(params.slug); @@ -44,7 +46,7 @@ export default async function Page(props: { {page.data.title} {page.data.description} diff --git a/docs-site/app/layout.tsx b/docs-site/app/layout.tsx index dff95ef4..35a4b1fa 100644 --- a/docs-site/app/layout.tsx +++ b/docs-site/app/layout.tsx @@ -1,5 +1,5 @@ import "./global.css"; -import { RootProvider } from "fumadocs-ui/provider"; +import { RootProvider } from "fumadocs-ui/provider/next"; import { Outfit, Inter, Geist_Mono } from "next/font/google"; import type { ReactNode } from "react"; import type { Metadata } from "next"; diff --git a/docs-site/components/code-block.tsx b/docs-site/components/code-block.tsx index 15ae5ce7..8362b304 100644 --- a/docs-site/components/code-block.tsx +++ b/docs-site/components/code-block.tsx @@ -1,18 +1,16 @@ "use client"; import { + type ComponentPropsWithoutRef, type ReactNode, type ReactElement, isValidElement, } from "react"; import { CopyButton } from "./copy-button"; -type Props = { - children?: ReactNode; - className?: string; +type Props = ComponentPropsWithoutRef<"pre"> & { title?: string; - // rehype-pretty-code adds data attributes such as data-language; capture them via index signature - [key: string]: unknown; + "data-language"?: string; }; const TERMINAL_LANGS = new Set(["bash", "sh", "shell", "zsh"]); diff --git a/docs-site/content/docs/cli-reference/ktx-dev.mdx b/docs-site/content/docs/cli-reference/ktx-dev.mdx index e00a4585..16a36393 100644 --- a/docs-site/content/docs/cli-reference/ktx-dev.mdx +++ b/docs-site/content/docs/cli-reference/ktx-dev.mdx @@ -3,7 +3,7 @@ title: "ktx dev" description: "Low-level project initialization and runtime management." --- -`ktx dev` contains development-only project initialization and managed runtime commands. Scan and ingest commands live at the root as [`ktx scan`](/docs/cli-reference/ktx-scan) and [`ktx ingest`](/docs/cli-reference/ktx-ingest). +`ktx dev` contains development-only project initialization and managed runtime commands. Context building lives at the root as [`ktx ingest`](/docs/cli-reference/ktx-ingest). ## Command signature diff --git a/docs-site/content/docs/cli-reference/ktx-ingest.mdx b/docs-site/content/docs/cli-reference/ktx-ingest.mdx index e1c0e339..e7b8bbe5 100644 --- a/docs-site/content/docs/cli-reference/ktx-ingest.mdx +++ b/docs-site/content/docs/cli-reference/ktx-ingest.mdx @@ -1,73 +1,59 @@ --- title: "ktx ingest" -description: "Run and inspect local ingest memory-flow output." +description: "Build or refresh KTX context from configured connections." --- -`ktx ingest` runs adapter-level local ingest and renders stored ingest reports. +`ktx ingest` builds or refreshes KTX context from configured connections. +Database connections build schema context. Context-source connections ingest +metadata from tools such as dbt, Looker, Metabase, MetricFlow, LookML, and +Notion. ## Command signature ```bash -ktx ingest [options] +ktx ingest [options] [connectionId] ``` -## Subcommands +Use a connection id to build one configured connection. Use `--all` to build +every configured connection. Database connections run before context-source +connections when you use `--all`. -| Subcommand | Description | -|-----------|-------------| -| `run` | Run local ingest for one configured connection and source adapter | -| `status [runId]` | Print status for the latest or selected stored local ingest run or report file | -| `watch [runId]` | Open the latest or selected stored ingest visual report | -| `replay ` | Replay a stored ingest run or bundle report through memory-flow output | - -## `ingest run` +## Build options | Flag | Description | Default | |------|-------------|---------| -| `--connection-id ` | KTX connection id | Required | -| `--adapter ` | Ingest source adapter name | Required | -| `--source-dir ` | Directory containing source files | — | -| `--database-introspection-url ` | Daemon URL for live-database introspection | — | -| `--debug-llm-request-file ` | Write sanitized LLM request structure to a JSONL file | — | +| `--all` | Build every configured connection | `false` | +| `--fast` | Use deterministic database schema ingest | Stored connection default, or `fast` | +| `--deep` | Use AI-enriched database ingest | Stored connection default, or `fast` | +| `--query-history` | Include database query-history usage patterns | Stored connection default | +| `--no-query-history` | Skip database query-history usage patterns for this run | Stored connection default | +| `--query-history-window-days ` | Query-history lookback window for this run | Stored connection default | | `--plain` | Print plain text output | `true` | | `--json` | Print JSON output | `false` | -| `--viz` | Render memory-flow TUI output | `false` | -| `--yes` | Install the managed Python runtime without prompting when required | `false` | -| `--no-input` | Disable interactive terminal input for visualization and runtime installation | — | +| `--no-input` | Disable interactive terminal input | `false` | -## `ingest status`, `watch`, and `replay` - -| Flag | Description | Default | -|------|-------------|---------| -| `--report-file ` | Bundle ingest report JSON file to render | — | -| `--plain` | Print plain text output | `true` for `status` and `replay` | -| `--json` | Print JSON output | `false` | -| `--viz` | Render memory-flow TUI output | `true` for `watch` | -| `--no-input` | Disable interactive terminal input for visualization | — | +`--fast` and `--deep` are mutually exclusive. Depth flags apply only to +database connections. Query-history flags apply only to database connections +that support query history. ## Examples ```bash -ktx ingest run --connection-id my-dbt-source --adapter dbt -ktx ingest run --connection-id prod-metabase --adapter metabase --yes - -ktx ingest status -ktx ingest status run-abc123 -ktx ingest status --json - -ktx ingest watch -ktx ingest watch run-abc123 - -ktx ingest replay run-abc123 -ktx ingest replay run-abc123 --viz -ktx ingest replay run-abc123 --report-file /tmp/ingest-report.json +ktx ingest warehouse +ktx ingest warehouse --fast +ktx ingest warehouse --deep +ktx ingest warehouse --deep --query-history +ktx ingest warehouse --query-history-window-days 30 +ktx ingest notion +ktx ingest --all +ktx ingest --all --deep ``` ## Common errors | Error | Cause | Recovery | |-------|-------|----------| -| Ingest needs credentials | The source adapter requires API or git access | Configure the referenced environment variable or secret file | -| Ingest run cannot find adapter | `--adapter` does not match a supported source adapter | Use a configured adapter such as `dbt`, `metabase`, `looker`, `lookml`, `notion`, or `live-database` | -| Latest run not found | No ingest run has been started in this project | Run `ktx ingest run --connection-id --adapter ` first | -| Report watch fails in a non-interactive shell | Visual report needs a terminal | Use `ktx ingest status --json` for agent and CI workflows | +| Connection not configured | The connection id is not present in `ktx.yaml` | Add the connection with `ktx setup` or update `ktx.yaml` | +| Deep readiness is missing | `--deep` or query history needs model, embedding, and scan-enrichment configuration | Run `ktx setup` or rerun with `--fast` | +| Query history is unsupported | The selected database driver does not support query history | Run schema ingest without query-history flags | +| No ingest target was selected | No connection id was provided and `--all` was omitted | Run `ktx ingest ` or `ktx ingest --all` | diff --git a/docs-site/content/docs/cli-reference/ktx-scan.mdx b/docs-site/content/docs/cli-reference/ktx-scan.mdx deleted file mode 100644 index 2f73ed99..00000000 --- a/docs-site/content/docs/cli-reference/ktx-scan.mdx +++ /dev/null @@ -1,44 +0,0 @@ ---- -title: "ktx scan" -description: "Run standalone database scans." ---- - -Discover a configured database connection's schema, including tables, columns, types, constraints, and optional relationship signals. - -## Command signature - -```bash -ktx scan [options] -``` - -## Options - -| Flag | Description | Default | -|------|-------------|---------| -| `--mode ` | Scan mode: `structural`, `enriched`, or `relationships` | `structural` | -| `--dry-run` | Run without writing scan results | `false` | -| `--database-introspection-url ` | Daemon URL for live-database introspection | — | -| `--yes` | Install the managed Python runtime without prompting when required | `false` | -| `--no-input` | Disable interactive managed runtime installation | — | - -## Examples - -```bash -ktx scan my-warehouse -ktx scan my-warehouse --mode enriched -ktx scan my-warehouse --mode relationships -ktx scan my-warehouse --dry-run -ktx scan my-warehouse --database-introspection-url http://127.0.0.1:8765 -``` - -## Output - -`ktx scan` prints a human summary and writes scan artifacts under the KTX project directory unless `--dry-run` is set. Use `ktx status` after a scan to inspect project readiness and next setup work. - -## Common errors - -| Error | Cause | Recovery | -|-------|-------|----------| -| Scan cannot connect | Connection credentials or network access are invalid | Run `ktx connection test ` and update the connection before scanning | -| Enriched scan cannot describe columns | LLM credentials are missing or invalid | Complete LLM setup with `ktx setup` before enriched scans | -| Relationship scan has limited evidence | The connector cannot provide optional validation or statistics | Re-run with a connector that supports the missing capability, or treat relationship output as lower-confidence context | diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index 59fbe666..d2348231 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -30,7 +30,7 @@ ktx setup [options] | `--global` | Install agent integration into the global target scope (Claude Code and Codex only) | `false` | The setup wizard is the public configuration interface. It prompts for LLM -credentials, embeddings, database connections, context sources, Historic SQL, +credentials, embeddings, database connections, context sources, query history, and agent integration when those values are needed. ## Examples @@ -62,7 +62,7 @@ KTX project: /home/user/analytics Project ready: yes LLM ready: yes (claude-sonnet-4-6) Embeddings ready: yes (text-embedding-3-small) -Primary sources configured: yes (postgres-warehouse) +Databases configured: yes (postgres-warehouse) Context sources configured: yes (dbt-main) KTX context built: yes Agent integration ready: yes (codex:project) diff --git a/docs-site/content/docs/cli-reference/ktx-wiki.mdx b/docs-site/content/docs/cli-reference/ktx-wiki.mdx index 1d57a93f..a6a0ca01 100644 --- a/docs-site/content/docs/cli-reference/ktx-wiki.mdx +++ b/docs-site/content/docs/cli-reference/ktx-wiki.mdx @@ -1,6 +1,6 @@ --- title: "ktx wiki" -description: "List, read, search, or write wiki pages." +description: "List or search wiki pages." --- Manage wiki pages in your KTX project. Wiki pages are Markdown documents that capture business definitions, rules, and gotchas. Agents search them for context when answering questions about your data. @@ -16,9 +16,7 @@ ktx wiki [options] | Subcommand | Description | |-----------|-------------| | `list` | List local wiki pages | -| `read ` | Read one local wiki page | | `search ` | Search local wiki pages | -| `write ` | Write one local wiki page | ## Options @@ -29,13 +27,6 @@ ktx wiki [options] | `--json` | Print JSON output | `false` | | `--user-id ` | Local user id | `local` | -### `wiki read` - -| Flag | Description | Default | -|------|-------------|---------| -| `--json` | Print JSON output | `false` | -| `--user-id ` | Local user id | `local` | - ### `wiki search` | Flag | Description | Default | @@ -44,18 +35,6 @@ ktx wiki [options] | `--user-id ` | Local user id | `local` | | `--limit ` | Maximum search results | — | -### `wiki write` - -| Flag | Description | Default | -|------|-------------|---------| -| `--user-id ` | Local user id | `local` | -| `--scope ` | Scope: `global` or `user` | `global` | -| `--summary ` | Wiki page summary (required) | — | -| `--content ` | Wiki page content (required) | — | -| `--tag ` | Wiki tag; repeatable | — | -| `--ref ` | Wiki ref; repeatable | — | -| `--sl-ref ` | Semantic-layer ref; repeatable | — | - ## Examples ```bash @@ -65,48 +44,17 @@ ktx wiki list # List all wiki pages as JSON ktx wiki list --json -# Read a specific wiki page -ktx wiki read revenue-definitions - -# Read a specific wiki page as JSON -ktx wiki read revenue-definitions --json - # Search wiki pages ktx wiki search "monthly recurring revenue" # Search wiki pages as JSON ktx wiki search "monthly recurring revenue" --json --limit 10 - -# Write a global wiki page -ktx wiki write revenue-definitions \ - --summary "Canonical revenue metric definitions" \ - --content "## MRR\nMonthly Recurring Revenue is calculated as..." - -# Write a user-scoped wiki page -ktx wiki write my-notes \ - --scope user \ - --summary "Personal analysis notes" \ - --content "Things to check when revenue numbers look off..." - -# Write a page with tags and references -ktx wiki write churn-rules \ - --summary "Churn calculation business rules" \ - --content "A customer is considered churned when..." \ - --tag finance \ - --tag retention \ - --sl-ref customers \ - --sl-ref subscriptions - -# Write a page with external references -ktx wiki write data-freshness \ - --summary "Data pipeline SLAs and freshness guarantees" \ - --content "The orders table refreshes every 15 minutes..." \ - --ref "https://wiki.example.com/data-pipelines" ``` ## Output -Wiki commands print local wiki pages and search results. Agents should search first, then read the most relevant page by key. +Wiki commands print local wiki page listings and search results. Open the +matching Markdown files directly when you need the full page contents. ```json { @@ -128,6 +76,4 @@ Wiki commands print local wiki pages and search results. Agents should search fi | Error | Cause | Recovery | |-------|-------|----------| | Search returns no results | The query terms do not match summaries, tags, or content | Retry with business synonyms, then create a page if the knowledge is missing | -| Read fails for a key | The page key is wrong or scoped to a different user | Run `ktx wiki list` or search again to get the exact key | -| Write fails due to missing fields | `--summary` or `--content` was omitted | Pass both fields, and keep the summary short enough for search results | -| Agent writes duplicate pages | It did not search existing pages first | Always run `ktx wiki search` before `ktx wiki write` | +| A page is missing | No Markdown file exists for that business context | Add a file under `wiki/` or run `ktx ingest ` | diff --git a/docs-site/content/docs/cli-reference/meta.json b/docs-site/content/docs/cli-reference/meta.json index bed3f98c..093f75ae 100644 --- a/docs-site/content/docs/cli-reference/meta.json +++ b/docs-site/content/docs/cli-reference/meta.json @@ -4,7 +4,6 @@ "pages": [ "ktx-setup", "ktx-connection", - "ktx-scan", "ktx-ingest", "ktx-sl", "ktx-wiki", diff --git a/docs-site/content/docs/concepts/context-as-code.mdx b/docs-site/content/docs/concepts/context-as-code.mdx index 51141b85..56dca056 100644 --- a/docs-site/content/docs/concepts/context-as-code.mdx +++ b/docs-site/content/docs/concepts/context-as-code.mdx @@ -59,7 +59,10 @@ dbt / Looker / Metabase / Notion A typical branch shows a semantic diff: "this ingest added 3 new sources from dbt, updated 2 join definitions based on schema changes, and created 1 wiki page from a Notion doc." Analytics engineers review the diff, verify that the new sources look correct, and merge. -Teams usually run this on demand while setting up a source, then schedule it once the source is stable. A cron job or CI schedule can run `ktx ingest run --connection-id --adapter --no-input` overnight on an ingest branch so the latest dbt manifests, BI metadata, and documentation updates are ready for review each morning. +Teams usually run this on demand while setting up a source, then schedule it +once the source is stable. A cron job or CI schedule can run `ktx ingest --all --no-input` +overnight on an ingest branch so the latest schema context, dbt manifests, BI +metadata, and documentation updates are ready for review each morning. Once merged, agents querying through the KTX CLI see the updated context immediately. No deployment step, no cache invalidation, no restart. The files are the source of truth, and agents read them on every request. diff --git a/docs-site/content/docs/concepts/the-context-layer.mdx b/docs-site/content/docs/concepts/the-context-layer.mdx index d9021a8e..a76cbe7d 100644 --- a/docs-site/content/docs/concepts/the-context-layer.mdx +++ b/docs-site/content/docs/concepts/the-context-layer.mdx @@ -134,13 +134,13 @@ my-project/ │ └── data-quality-notes.md ├── raw-sources/ │ └── warehouse/ -│ └── live-database/ # Scan artifacts and reports +│ └── database-ingest/ # Schema ingest artifacts and reports └── .ktx/ ├── db.sqlite # Local state (git-ignored) └── cache/ # Runtime cache (git-ignored) ``` -Semantic sources and wiki pages are committed to git. The SQLite database holds ephemeral state — scan results, embedding indexes, session logs — and is git-ignored. If you delete it, KTX rebuilds it on the next run. +Semantic sources and wiki pages are committed to git. The SQLite database holds ephemeral state — schema ingest results, embedding indexes, session logs — and is git-ignored. If you delete it, KTX rebuilds it on the next run. This means your analytics context travels with your code. You can fork it, branch it, review it in a PR, and merge it with the same tools you use for dbt models. There's no sync problem between a remote server and your local state. There's no migration to run. The files are the source of truth. diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index 635c666b..28728886 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -81,7 +81,8 @@ ktx dev runtime start --feature local-embeddings ## Step 3: Connect a database -Select one or more databases for KTX to scan. The wizard supports SQLite, PostgreSQL, MySQL, ClickHouse, SQL Server, BigQuery, and Snowflake. +Select one or more databases for KTX to connect to. The wizard supports +SQLite, PostgreSQL, MySQL, ClickHouse, SQL Server, BigQuery, and Snowflake. For PostgreSQL, you can enter connection details field by field or paste a connection URL: @@ -93,22 +94,27 @@ For PostgreSQL, you can enter connection details field by field or paste a conne If your URL contains credentials, KTX saves it to `.ktx/secrets/` and writes a `file:` reference in `ktx.yaml`. You can also use `env:DATABASE_URL` to reference an environment variable. -After connecting, KTX automatically runs a connection test and a structural scan: +After connecting, KTX automatically runs a connection test and builds fast +schema context: ``` -◇ Testing postgres-warehouse -│ ✓ Connection test passed -│ Driver: PostgreSQL · Tables: 42 -│ -◇ Scanning postgres-warehouse -│ ✓ Structural scan completed -│ Changes: 42 new tables -│ -◇ Primary source ready -│ postgres-warehouse · PostgreSQL · structural scan complete +Testing postgres-warehouse + Connection test passed + Driver: PostgreSQL - Tables: 42 + +Building schema context for postgres-warehouse + Running fast database ingest + +Schema context complete for postgres-warehouse + Changes: 42 new tables + +Database ready + postgres-warehouse - PostgreSQL - schema context complete ``` -For Snowflake and BigQuery, the wizard offers **Historic SQL** configuration for query history views. For PostgreSQL, enable Historic SQL with `--enable-historic-sql` when `pg_stat_statements` is configured. +For PostgreSQL, Snowflake, and BigQuery, the wizard can enable query-history +ingest when the warehouse history feature is available. Query history is stored +under `connections..context.queryHistory` in `ktx.yaml`. ## Step 4: Add context sources @@ -138,7 +144,8 @@ Context sources are saved to `ktx.yaml` and built during the next step. ## Step 5: Build context -This is where KTX does the heavy lifting. It runs an enriched scan of your database (generating AI-powered column and table descriptions) and ingests metadata from any configured context sources. +This is where KTX builds agent-ready context. It uses the database context +depth saved by setup and ingests metadata from any configured context sources. ``` ◆ Build KTX context for agents? @@ -146,27 +153,22 @@ This is where KTX does the heavy lifting. It runs an enriched scan of your datab │ ○ Leave context unbuilt and exit setup ``` -The build scans each primary source with LLM enrichment, detects table relationships, and runs ingestion agents that reconcile metadata from your context sources into semantic-layer YAML files and wiki pages. +Fast database context builds deterministic schema grounding. Deep database +context also generates AI descriptions, embeddings, and relationship evidence +when those capabilities are configured. -For a small database (under 50 tables), this takes a few minutes. Larger warehouses can take longer. You can press d to detach and let it run in the background: - -``` -KTX context build -Run: setup-context-local-abc123 -Project: /home/user/analytics - -Detach: press d to leave this running. -Resume: ktx setup --project-dir /home/user/analytics -Status: ktx status --project-dir /home/user/analytics -``` +For a small database (under 50 tables), this can take a few minutes. Larger +warehouses can take longer. Context builds run in the foreground; press +Ctrl+C to stop the current run and rerun `ktx setup` or `ktx ingest` +when you are ready to try again. When the build completes, KTX verifies that agent-ready context was produced: ``` KTX context is ready for agents. -Primary sources: - postgres-warehouse: enriched scan complete +Databases: + postgres-warehouse: deep context complete Context sources: dbt-main: memory update complete @@ -209,8 +211,8 @@ KTX writes project state as plain files so agents can inspect and edit changes i | `ktx.yaml` | `ktx setup` | Main project configuration: connections, LLM settings, embeddings, and context sources | | `.ktx/secrets/*` | `ktx setup` when file-backed secrets are selected | Local secret files referenced from `ktx.yaml`; do not commit these | | `semantic-layer//*.yaml` | context build, ingestion, or direct file edits | Semantic source definitions agents use for SQL generation | -| `wiki/global/*.md` | ingestion, memory capture, `ktx wiki write --scope global`, or direct file edits | Shared business context and metric definitions | -| `wiki/user//*.md` | memory capture, `ktx wiki write --scope user`, or direct file edits | User-scoped notes for one agent/user context | +| `wiki/global/*.md` | ingestion, memory capture, or direct file edits | Shared business context and metric definitions | +| `wiki/user//*.md` | memory capture or direct file edits | User-scoped notes for one agent/user context | | `.claude/skills/ktx/SKILL.md`, `.agents/skills/ktx/SKILL.md` | CLI-mode agent integration setup | Agent instructions for calling public `ktx` commands | ## Verify it worked @@ -226,7 +228,7 @@ KTX project: /home/user/analytics Project ready: yes LLM ready: yes (claude-sonnet-4-6) Embeddings ready: yes (text-embedding-3-small) -Primary sources configured: yes (postgres-warehouse) +Databases configured: yes (postgres-warehouse) Context sources configured: yes (dbt-main) KTX context built: yes Agent integration ready: yes (claude-code:project) @@ -246,7 +248,7 @@ Agent integration ready: yes (claude-code:project) ## Next steps -- **Build more context** — learn about [scanning](/docs/guides/building-context), relationship detection, and ingestion workflows in the Building Context guide. +- **Build more context** — learn about [database ingest](/docs/guides/building-context), relationship detection, and source ingestion workflows in the Building Context guide. - **Refine your semantic layer** — the [Writing Context](/docs/guides/writing-context) guide covers source YAML, measures, joins, and wiki pages. - **Understand the architecture** — read [The Context Layer](/docs/concepts/the-context-layer) to learn why a context layer is more than a semantic layer. - **Connect more agents** — see the [Agent Clients](/docs/integrations/agent-clients) integration page for per-tool setup details. diff --git a/docs-site/content/docs/guides/building-context.mdx b/docs-site/content/docs/guides/building-context.mdx index c3821a52..5dcf2422 100644 --- a/docs-site/content/docs/guides/building-context.mdx +++ b/docs-site/content/docs/guides/building-context.mdx @@ -1,39 +1,48 @@ --- title: Building Context -description: Scan your database schema and ingest context from dbt, Looker, Metabase, and more. +description: Build database and source context from configured KTX connections. --- -Building context is a two-step process. First, you **scan** your database to discover its structure — tables, columns, types, constraints, and relationships. Then you **ingest** from your existing tools to enrich that structure with semantic meaning — metric definitions, business descriptions, join logic, and knowledge that agents need to generate correct analytics. +Building context reads your configured connections and writes local context that +agents can use. Database connections produce schema context, and source +connections such as dbt, Looker, Metabase, and Notion produce semantic sources +and wiki pages. -## Scanning +## Database ingest -Scanning connects to your database and extracts structural metadata. KTX stores the results locally so agents can understand your schema without querying the database directly. +Database ingest connects to your warehouse and extracts structural metadata. +KTX stores the results locally so agents can understand your schema without +querying the database directly. -### Running a scan +### Running database ingest ```bash -ktx scan +ktx ingest ``` -This runs a structural scan by default. You can control what the scan does with the `--mode` flag: +This runs a fast schema ingest by default. You can choose the depth with public +flags: -| Mode | What it does | +| Flag | What it does | |------|-------------| -| `structural` | Tables, columns, types, constraints, row counts (default) | -| `enriched` | Structural scan plus LLM-generated column descriptions | -| `relationships` | Structural scan plus foreign key relationship detection | +| `--fast` | Tables, columns, types, constraints, and row counts | +| `--deep` | Fast ingest plus AI-enriched database context | ```bash -# Scan with relationship detection -ktx scan my-postgres --mode relationships +# Build one connection quickly +ktx ingest my-postgres --fast -# Preview without writing results -ktx scan my-postgres --dry-run +# Build AI-enriched database context +ktx ingest my-postgres --deep + +# Build all configured connections +ktx ingest --all ``` -### Checking scan results +### Checking results -Every scan prints a summary and writes local artifacts. Use `ktx status` after a scan to review project readiness and follow-up setup work: +Every ingest prints a summary and writes local artifacts. Use `ktx status` +after ingest to review project readiness and follow-up setup work: ```bash ktx status @@ -49,7 +58,9 @@ Many databases lack declared foreign keys. KTX infers relationships by scoring c | 0.55 – 0.84 | `review` | Plausible — needs human review | | < 0.55 | `rejected` | Low confidence — not applied | -Relationship scans run with `ktx scan --mode relationships`. This command only executes the scan; relationship review and calibration subcommands are not part of the current CLI surface. +Deep database ingest can include relationship evidence where the connector can +provide it. Relationship review and calibration subcommands are not part of the +current public CLI surface. ## Ingestion @@ -66,50 +77,34 @@ Each ingest run follows this flow: ### Running an ingest ```bash -ktx ingest run --connection-id my-dbt-source --adapter dbt +ktx ingest my-dbt-source ``` -Useful low-level flags: +Useful output flags: | Flag | Description | |------|-------------| -| `--source-dir ` | Directory containing source files (e.g., your dbt project) | -| `--viz` | Render the memory-flow TUI for real-time progress | | `--json` | Output as JSON | | `--plain` | Plain text output | -### Watching progress +Foreground context builds do not detach into background control sessions. If a +run is interrupted, rerun `ktx ingest ` or `ktx ingest --all`. -```bash -# Check status of the latest ingest -ktx ingest status +### Supported context sources -# Check a specific run -ktx ingest status - -# Open the visual ingest report (TUI) -ktx ingest watch - -# Replay a past ingest run -ktx ingest replay -``` - -The `watch` command opens an interactive TUI that shows the memory-flow output — every tool call, LLM decision, and artifact written during the ingest. - -### Available adapters - -| Adapter | Source | What gets ingested | -|---------|--------|--------------------| +| Driver | Source | What gets ingested | +|--------|--------|--------------------| | `dbt` | dbt project | Model definitions, column descriptions, tests, tags | | `metricflow` | MetricFlow semantic models | Metrics, dimensions, entities, semantic joins | | `lookml` | LookML files | Views, explores, dimensions, measures, joins | | `looker` | Looker API | Explores, looks, dashboard metadata | | `metabase` | Metabase API | Questions, dashboards, table metadata | | `notion` | Notion API | Database pages, knowledge articles | -| `historic-sql` | Query history | Frequent queries, usage patterns, runtime stats | -| `live-database` | Direct DB connection | Live schema introspection | -See [Context Sources](/docs/integrations/context-sources) for adapter-specific setup and auth configuration. +Query history is a database connection facet. Enable it with +`connections..context.queryHistory` or pass `--query-history` for a current +run. See [Context Sources](/docs/integrations/context-sources) for +driver-specific setup and auth configuration. ### What gets generated @@ -169,12 +164,8 @@ sl_refs: [orders] Orders in "pending" status for more than 48 hours are flagged for review. ``` -### Deterministic replay +### Ingest transcripts -Every ingest session records a full transcript — tool calls, LLM responses, and write decisions. You can replay any session to debug why a source was written a certain way: - -```bash -ktx ingest replay --viz -``` - -This opens the same TUI view as the original run, letting you step through the agent's reasoning. +Every ingest session records a full transcript: tool calls, LLM responses, and +write decisions. Inspect the stored transcript files when you need to debug why +a source was written a certain way. diff --git a/docs-site/content/docs/guides/writing-context.mdx b/docs-site/content/docs/guides/writing-context.mdx index b6ca3597..b5a6db5c 100644 --- a/docs-site/content/docs/guides/writing-context.mdx +++ b/docs-site/content/docs/guides/writing-context.mdx @@ -248,8 +248,7 @@ wiki/ ### Editing pages Create and edit wiki pages directly as Markdown files in the `wiki/` -directory, or with `ktx wiki write`. Ingest and memory capture also create -these pages automatically. +directory. Ingest and memory capture also create these pages automatically. Wiki page fields: diff --git a/docs-site/content/docs/integrations/agent-clients.mdx b/docs-site/content/docs/integrations/agent-clients.mdx index 95786f52..6f7e7660 100644 --- a/docs-site/content/docs/integrations/agent-clients.mdx +++ b/docs-site/content/docs/integrations/agent-clients.mdx @@ -125,8 +125,6 @@ All supported agent clients call the same KTX CLI commands: |---------|-------------| | `ktx status --json` | Return project setup and context readiness | | `ktx wiki search --json` | Search wiki pages | -| `ktx wiki read --json` | Read a wiki page | -| `ktx wiki write ` | Write or update a wiki page | | `ktx sl list --json` | List semantic-layer sources | | `ktx sl search --json` | Search semantic-layer sources | | `ktx sl validate --connection-id ` | Validate semantic source definitions | diff --git a/docs-site/content/docs/integrations/context-sources.mdx b/docs-site/content/docs/integrations/context-sources.mdx index d741b17e..c2afd8ab 100644 --- a/docs-site/content/docs/integrations/context-sources.mdx +++ b/docs-site/content/docs/integrations/context-sources.mdx @@ -9,12 +9,13 @@ All context sources are configured in `ktx.yaml` under `connections` with their ## Ingestion workflow -Agents should configure and ingest context sources in this order: +Agents must configure and ingest context sources in this order: 1. Add the context source connection in `ktx.yaml` or with `ktx setup`. 2. Store tokens as `env:NAME` or `file:/path/to/secret`. -3. Run `ktx ingest run --connection-id --adapter ` for one source or `ktx ingest run --connection-id --adapter `. -4. Check progress with `ktx ingest status --json`. +3. Run `ktx ingest ` for one source or `ktx ingest --all` for + every configured source. +4. Review the foreground ingest output. 5. Review generated `semantic-layer/` YAML and `wiki/` Markdown files in git. 6. Validate changed semantic sources with `ktx sl validate`. diff --git a/docs-site/content/docs/integrations/primary-sources.mdx b/docs-site/content/docs/integrations/primary-sources.mdx index 46fa3716..b15d93ab 100644 --- a/docs-site/content/docs/integrations/primary-sources.mdx +++ b/docs-site/content/docs/integrations/primary-sources.mdx @@ -3,13 +3,17 @@ title: Primary Sources description: Connect KTX to PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite. --- -KTX connects to your data warehouse or database to scan schemas, discover relationships, and execute semantic layer queries. Each connection is defined in `ktx.yaml` under the `connections` key. +KTX connects to your data warehouse or database to build schema context, +discover relationships, and execute semantic layer queries. Each connection is +defined in `ktx.yaml` under the `connections` key. All connectors share these conventions: -- Sensitive values support `env:VAR_NAME` (read from environment) and `file:/path/to/secret` (read from file) references -- Connections are read-only — KTX never writes to your database -- Schema scanning discovers tables, columns, types, and constraints automatically +- Sensitive values support `env:VAR_NAME` (read from environment) and + `file:/path/to/secret` (read from file) references +- Connections are read-only; KTX never writes to your database +- Database ingest discovers tables, columns, types, and constraints + automatically ## Connection field reference @@ -21,7 +25,7 @@ Agents should prefer environment or file references over literal secrets. | `url` | One of the connection methods | URL-style connectors | Database URL, `env:NAME`, or `file:/path/to/secret` | | `host`, `port`, `database`, `username`, `password` | One of the connection methods | PostgreSQL, MySQL, ClickHouse, SQL Server | Field-by-field connection values | | `schema` or `schemas` | No | schema-aware warehouses | Single schema or list of schemas to scan | -| `historicSql` | No | supported warehouses | Enables query-history ingestion when the warehouse supports it | +| `context.queryHistory` | No | PostgreSQL, Snowflake, BigQuery | Enables query-history ingestion when the warehouse supports it | | `path` | Yes for path-style SQLite | SQLite | Local SQLite database path or `env:NAME` reference | | `max_bytes_billed` | No | BigQuery | Maximum bytes billed per query job | | `job_timeout_ms` | No | BigQuery | BigQuery query job timeout in milliseconds | @@ -29,7 +33,7 @@ Agents should prefer environment or file references over literal secrets. ## PostgreSQL -The most full-featured connector. Supports schema introspection, foreign key detection, column statistics, and historic SQL via `pg_stat_statements`. +The most full-featured connector. Supports schema introspection, foreign key detection, column statistics, and query history via `pg_stat_statements`. ### Connection config @@ -75,12 +79,13 @@ connections: | Foreign keys | Yes | Full constraint detection | | Row count estimates | Yes | Via `pg_class.reltuples` | | Column statistics | Yes | Requires `pg_read_all_stats` role | -| Historic SQL | Yes | Via `pg_stat_statements` extension | +| Query history | Yes | Via `pg_stat_statements` extension | | Table sampling | Yes | `TABLESAMPLE SYSTEM` | -### Historic SQL +### Query history -PostgreSQL Historic SQL mines real query patterns from `pg_stat_statements`. This is the most mature local Historic SQL path and helps KTX understand how your team actually queries the data. +PostgreSQL query history mines real query patterns from `pg_stat_statements`. +This helps KTX understand how your team actually queries the data. **Requirements:** - `pg_stat_statements` extension enabled @@ -89,12 +94,12 @@ PostgreSQL Historic SQL mines real query patterns from `pg_stat_statements`. Thi **Config options:** ```yaml -historicSql: - enabled: true - dialect: postgres - minExecutions: 5 - filters: - dropTrivialProbes: true + context: + queryHistory: + enabled: true + minExecutions: 5 + filters: + dropTrivialProbes: true ``` ### Dialect notes @@ -108,7 +113,7 @@ historicSql: ## Snowflake -Connects via the Snowflake SDK. Supports multi-schema scanning, RSA key authentication, and Historic SQL configuration for Snowflake query history. +Connects via the Snowflake SDK. Supports multi-schema scanning, RSA key authentication, and query-history configuration for Snowflake query history. ### Connection config @@ -150,27 +155,27 @@ For multiple schemas: | Foreign keys | No | Not available in Snowflake | | Row count estimates | Yes | From `INFORMATION_SCHEMA.TABLES.ROW_COUNT` | | Column statistics | No | — | -| Historic SQL | Yes | Via `SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY` when enabled | +| Query history | Yes | Via `SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY` when enabled | | Table sampling | Yes | — | -### Historic SQL +### Query history -Snowflake Historic SQL reads aggregated query-history templates from +Snowflake query history reads aggregated query-history templates from `SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY` and feeds the same unified staged artifact shape as Postgres and BigQuery. ```yaml -historicSql: - enabled: true - dialect: snowflake - windowDays: 90 - minExecutions: 5 - filters: - dropTrivialProbes: true - serviceAccounts: - patterns: ['^svc_'] - mode: exclude - redactionPatterns: [] + context: + queryHistory: + enabled: true + windowDays: 90 + minExecutions: 5 + filters: + dropTrivialProbes: true + serviceAccounts: + patterns: ['^svc_'] + mode: exclude + redactionPatterns: [] ``` ### Dialect notes @@ -184,7 +189,7 @@ historicSql: ## BigQuery -Authenticates via GCP service account credentials. Supports multi-dataset scanning and Historic SQL configuration for `INFORMATION_SCHEMA.JOBS_BY_PROJECT`. +Authenticates via GCP service account credentials. Supports multi-dataset scanning and query-history configuration for `INFORMATION_SCHEMA.JOBS_BY_PROJECT`. ### Connection config @@ -227,27 +232,27 @@ mapping metadata. The BigQuery connector still authenticates with the | Foreign keys | No | Not available in BigQuery | | Row count estimates | Yes | From table metadata | | Column statistics | No | — | -| Historic SQL | Yes | Via region-scoped `INFORMATION_SCHEMA.JOBS_BY_PROJECT` when enabled | +| Query history | Yes | Via region-scoped `INFORMATION_SCHEMA.JOBS_BY_PROJECT` when enabled | | Table sampling | Yes | — | -### Historic SQL +### Query history -BigQuery Historic SQL reads aggregated query-history templates from +BigQuery query history reads aggregated query-history templates from region-scoped `INFORMATION_SCHEMA.JOBS_BY_PROJECT` and feeds the same unified staged artifact shape as Postgres and Snowflake. ```yaml -historicSql: - enabled: true - dialect: bigquery - windowDays: 90 - minExecutions: 5 - filters: - dropTrivialProbes: true - serviceAccounts: - patterns: ['@bot\\.'] - mode: exclude - redactionPatterns: [] + context: + queryHistory: + enabled: true + windowDays: 90 + minExecutions: 5 + filters: + dropTrivialProbes: true + serviceAccounts: + patterns: ['@bot\\.'] + mode: exclude + redactionPatterns: [] ``` ### Dialect notes @@ -303,7 +308,7 @@ connections: | Foreign keys | No | Not a ClickHouse concept | | Row count estimates | Yes | Via `system.parts` aggregation | | Column statistics | No | — | -| Historic SQL | No | — | +| Query history | No | — | | Table sampling | Yes | — | ### Dialect notes @@ -360,7 +365,7 @@ connections: | Foreign keys | Yes | Via `REFERENTIAL_CONSTRAINTS` | | Row count estimates | Yes | From `TABLE_ROWS` (InnoDB estimate) | | Column statistics | No | — | -| Historic SQL | No | — | +| Query history | No | — | | Table sampling | Yes | Uses `RAND()` filter | ### Dialect notes @@ -426,7 +431,7 @@ For multiple schemas: | Foreign keys | Yes | Via `REFERENTIAL_CONSTRAINTS` | | Row count estimates | Yes | Via `sys.dm_db_partition_stats` | | Column statistics | No | — | -| Historic SQL | No | — | +| Query history | No | — | | Table sampling | Yes | — | | Nested analysis | No | — | @@ -484,7 +489,7 @@ No authentication required — SQLite is file-based. The file must be readable b | Foreign keys | Yes | Via `PRAGMA foreign_key_list()` (requires `PRAGMA foreign_keys = ON`) | | Row count estimates | Yes | Exact count via `SELECT COUNT(*)` | | Column statistics | No | — | -| Historic SQL | No | — | +| Query history | No | — | | Table sampling | Yes | — | | Nested analysis | No | — | @@ -502,7 +507,7 @@ No authentication required — SQLite is file-based. The file must be readable b | Error or symptom | Likely cause | Recovery | |------------------|--------------|----------| | Connection URL appears in git diff | A literal credential URL was written to `ktx.yaml` | Replace it with `env:NAME` or `file:/path/to/secret` and rotate exposed credentials | -| Scan returns no tables | Schema/database/project filter is wrong or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions | -| Historic SQL is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun scan or setup | -| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on structural scan output | +| Database ingest returns no tables | Schema, database, or project filter is wrong, or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions | +| Query history is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun `ktx ingest --query-history` or `ktx setup` | +| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on fast schema context | | Semantic query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test ` and check the `ktx sl query` flags | diff --git a/docs-site/lib/docs-markdown.ts b/docs-site/lib/docs-markdown.ts new file mode 100644 index 00000000..60312627 --- /dev/null +++ b/docs-site/lib/docs-markdown.ts @@ -0,0 +1,33 @@ +import { readFile } from "node:fs/promises"; +import { join } from "node:path"; + +export async function readDocsPageMarkdown(slugs: string[]) { + if ( + slugs.length === 0 || + slugs.some((segment) => segment.includes("/") || segment.includes("..")) + ) { + throw new Error(`Invalid docs page slug: ${slugs.join("/")}`); + } + + const docsRoot = join(process.cwd(), "content/docs"); + const directPath = join(docsRoot, `${slugs.join("/")}.mdx`); + + try { + return await readFile(directPath, "utf8"); + } catch (error) { + if (!isNotFoundError(error)) { + throw error; + } + } + + return readFile(join(docsRoot, slugs.join("/"), "index.mdx"), "utf8"); +} + +function isNotFoundError(error: unknown) { + return ( + typeof error === "object" && + error !== null && + "code" in error && + error.code === "ENOENT" + ); +} diff --git a/docs-site/lib/llm-docs.ts b/docs-site/lib/llm-docs.ts index cbf9ba9e..561f73e0 100644 --- a/docs-site/lib/llm-docs.ts +++ b/docs-site/lib/llm-docs.ts @@ -1,4 +1,5 @@ import { source } from "@/lib/source"; +import { readDocsPageMarkdown } from "@/lib/docs-markdown"; const siteOrigin = "https://docs.kaelio.com/ktx"; @@ -102,7 +103,7 @@ function toLlmDocsPage(page: ReturnType[number]) { url: page.url, markdownUrl: `${page.url}.md`, slug: page.slugs, - getMarkdown: async () => normalizeMarkdown(page.data.content), + getMarkdown: async () => normalizeMarkdown(await readDocsPageMarkdown(page.slugs)), } satisfies LlmDocsPage; } diff --git a/docs-site/lib/source.ts b/docs-site/lib/source.ts index 2fbc90cc..edbe1cbf 100644 --- a/docs-site/lib/source.ts +++ b/docs-site/lib/source.ts @@ -1,4 +1,4 @@ -import { docs } from "@/.source"; +import { docs } from "@/.source/server"; import { loader } from "fumadocs-core/source"; export const source = loader({ diff --git a/docs-site/next-env.d.ts b/docs-site/next-env.d.ts index 830fb594..9edff1c7 100644 --- a/docs-site/next-env.d.ts +++ b/docs-site/next-env.d.ts @@ -1,6 +1,6 @@ /// /// -/// +import "./.next/types/routes.d.ts"; // NOTE: This file should not be edited // see https://nextjs.org/docs/app/api-reference/config/typescript for more information. diff --git a/docs-site/tests/docs-index-route.test.mjs b/docs-site/tests/docs-index-route.test.mjs index 859ae54e..ddcd3181 100644 --- a/docs-site/tests/docs-index-route.test.mjs +++ b/docs-site/tests/docs-index-route.test.mjs @@ -1,7 +1,103 @@ import assert from "node:assert/strict"; -import test from "node:test"; +import { spawn } from "node:child_process"; +import { once } from "node:events"; +import { readFile, writeFile } from "node:fs/promises"; +import { dirname, join } from "node:path"; +import { createServer } from "node:net"; +import { after, before, test } from "node:test"; +import { setTimeout as delay } from "node:timers/promises"; +import { fileURLToPath } from "node:url"; -const docsSiteUrl = process.env.DOCS_SITE_URL ?? "http://localhost:3000"; +const configuredDocsSiteUrl = process.env.DOCS_SITE_URL; +let docsSiteUrl = configuredDocsSiteUrl; +let docsServer; +let docsServerOutput = ""; +let nextEnvPath; +let nextEnvContents; + +async function getAvailablePort() { + const server = createServer(); + server.listen(0, "127.0.0.1"); + await once(server, "listening"); + + const address = server.address(); + await new Promise((resolve, reject) => { + server.close((error) => { + if (error) reject(error); + else resolve(); + }); + }); + + assert.equal(typeof address, "object"); + assert.notEqual(address, null); + return address.port; +} + +function appendDocsServerOutput(chunk) { + docsServerOutput = `${docsServerOutput}${chunk.toString()}`.slice(-4000); +} + +async function waitForDocsServer() { + for (let attempt = 0; attempt < 150; attempt += 1) { + if (docsServer?.exitCode !== null) { + throw new Error( + `Docs server exited before it was ready.\n${docsServerOutput}`, + ); + } + + try { + await fetch(`${docsSiteUrl}/docs`, { redirect: "manual" }); + return; + } catch { + await delay(200); + } + } + + throw new Error(`Timed out waiting for docs server.\n${docsServerOutput}`); +} + +before(async () => { + if (configuredDocsSiteUrl) { + return; + } + + const docsSiteDir = join( + dirname(fileURLToPath(import.meta.url)), + "..", + ); + nextEnvPath = join(docsSiteDir, "next-env.d.ts"); + nextEnvContents = await readFile(nextEnvPath, "utf8"); + + const port = await getAvailablePort(); + docsSiteUrl = `http://127.0.0.1:${port}`; + docsServer = spawn( + "pnpm", + ["exec", "next", "dev", "--hostname", "127.0.0.1", "--port", `${port}`], + { + cwd: docsSiteDir, + env: { ...process.env, NEXT_TELEMETRY_DISABLED: "1" }, + stdio: ["ignore", "pipe", "pipe"], + }, + ); + docsServer.stdout.on("data", appendDocsServerOutput); + docsServer.stderr.on("data", appendDocsServerOutput); + + await waitForDocsServer(); +}); + +after(async () => { + if (docsServer && docsServer.exitCode === null) { + docsServer.kill("SIGTERM"); + await Promise.race([ + once(docsServer, "exit"), + delay(5000).then(() => docsServer?.kill("SIGKILL")), + ]); + } + + if (nextEnvPath && nextEnvContents !== undefined) { + await writeFile(nextEnvPath, nextEnvContents); + } +}); test("/docs redirects to the docs introduction", async () => { const response = await fetch(`${docsSiteUrl}/docs`, { redirect: "manual" }); diff --git a/docs-site/tsconfig.json b/docs-site/tsconfig.json index e2a2cc10..bccc54a5 100644 --- a/docs-site/tsconfig.json +++ b/docs-site/tsconfig.json @@ -15,7 +15,7 @@ "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, - "jsx": "preserve", + "jsx": "react-jsx", "incremental": true, "plugins": [ { @@ -33,7 +33,8 @@ "**/*.tsx", ".source/**/*.ts", "next-env.d.ts", - ".next/types/**/*.ts" + ".next/types/**/*.ts", + ".next/dev/types/**/*.ts" ], "exclude": [ "node_modules" diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-public-cli-surface.md b/docs/superpowers/plans/2026-05-13-unified-ingest-public-cli-surface.md new file mode 100644 index 00000000..279d02a8 --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-public-cli-surface.md @@ -0,0 +1,1584 @@ +# Unified Ingest Public CLI Surface Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make `ktx ingest` the public foreground context-build command for one connection or all configured connections. + +**Architecture:** Reuse the existing `public-ingest.ts` orchestration as the public command engine, then extend it to resolve database depth, query-history intent, warnings, and adapter bypasses for connection-centric ingest. Keep low-level `scan` and adapter-backed `ingest run` available as hidden debug surfaces while normal help, output, generated config, and setup recovery text point to `ktx ingest `. + +**Tech Stack:** TypeScript ESM, Commander, Vitest, KTX CLI/context packages, existing scan and local ingest adapters. + +--- + +## Current audit + +The unified ingest spec is not v1-complete. Relevant implemented pieces exist, +but they are not wired as the public product surface: + +- `packages/cli/src/public-ingest.ts` can plan database connections before + source connections and can call scan or source ingest internals. +- `packages/cli/src/context-build-view.ts` renders a foreground progress view + and captures target progress. +- Historic SQL internals exist in `packages/context/src/ingest/adapters/historic-sql` + and CLI adapter wiring exists in `packages/cli/src/local-adapters.ts`. +- The public CLI still rejects `ktx ingest warehouse`; see + `packages/cli/src/index.test.ts`, test name + `rejects removed public ingest shorthand`. +- Root help still exposes `scan`, `ktx ingest --help` still exposes `run` and + `watch`, and generated default config still includes `live-database`. + +This plan addresses the first v1-blocking slice: the public command surface, +connection-centric execution, public depth flags, query-history run overrides, +hidden legacy debug commands, and stale public wording. Setup depth prompting +and foreground-only state cleanup remain separate v1-blocking work after this +slice. + +## File structure + +- Modify `packages/cli/src/cli-runtime.ts`: add an injectable + `publicIngest` dependency for Commander tests and for command routing. +- Modify `packages/cli/src/commands/ingest-commands.ts`: make the parent + `ktx ingest` command accept `[connectionId]`, `--all`, `--fast`, `--deep`, + `--query-history`, `--no-query-history`, and + `--query-history-window-days`; hide legacy `run` and `watch`. +- Modify `packages/cli/src/commands/scan-commands.ts`: hide `ktx scan` from + root help while keeping direct debug invocation. +- Modify `packages/cli/src/cli-program.ts`: remove `scan` from normal + project-aware root command help expectations only where user-facing. +- Modify `packages/cli/src/public-ingest.ts`: resolve target type, depth, + query-history settings, warnings, readiness failures, and adapter bypasses. +- Modify `packages/cli/src/context-build-view.ts`: rename public labels and + public operation text away from scan terminology. +- Modify `packages/cli/src/ingest.ts`: allow connection-centric ingest to run + an inferred adapter without requiring `ingest.adapters`. +- Modify `packages/cli/src/local-adapters.ts`: accept current-run + query-history overrides for `context.queryHistory` without rewriting config. +- Modify `packages/context/src/project/config.ts`: stop generating + `live-database` and source adapters in default `ktx.yaml`. +- Modify `packages/cli/src/setup-sources.ts`: replace stale recovery command + suggestions with `ktx ingest `. +- Modify `README.md` and script assertions that document normal public command + output. + +## Tasks + +### Task 1: Route the public `ktx ingest` command + +**Files:** +- Modify: `packages/cli/src/cli-runtime.ts` +- Modify: `packages/cli/src/commands/ingest-commands.ts` +- Modify: `packages/cli/src/index.test.ts` +- Modify: `packages/cli/src/dev.test.ts` + +- [ ] **Step 1: Write failing Commander routing tests** + +In `packages/cli/src/index.test.ts`, replace the test named +`rejects removed public ingest shorthand` with: + +```ts + it('routes public connection-centric ingest shorthand', async () => { + const testIo = makeIo(); + const publicIngest = vi.fn().mockResolvedValue(0); + + await expect( + runKtxCli(['--project-dir', '/tmp/project', 'ingest', 'warehouse', '--fast', '--no-input'], testIo.io, { + publicIngest, + }), + ).resolves.toBe(0); + + expect(publicIngest).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + depth: 'fast', + queryHistory: 'default', + }, + testIo.io, + ); + expect(testIo.stderr()).toBe('Project: /tmp/project\n'); + }); + + it('routes public ingest --all --deep with JSON output', async () => { + const testIo = makeIo(); + const publicIngest = vi.fn().mockResolvedValue(0); + + await expect( + runKtxCli(['--project-dir', '/tmp/project', 'ingest', '--all', '--deep', '--json'], testIo.io, { + publicIngest, + }), + ).resolves.toBe(0); + + expect(publicIngest).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + all: true, + json: true, + inputMode: 'auto', + depth: 'deep', + queryHistory: 'default', + }, + testIo.io, + ); + expect(testIo.stderr()).toBe(''); + }); + + it('rejects mutually exclusive public ingest depth flags before dispatch', async () => { + const testIo = makeIo(); + const publicIngest = vi.fn().mockResolvedValue(0); + + await expect( + runKtxCli(['--project-dir', '/tmp/project', 'ingest', 'warehouse', '--fast', '--deep'], testIo.io, { + publicIngest, + }), + ).resolves.toBe(1); + + expect(publicIngest).not.toHaveBeenCalled(); + expect(testIo.stderr()).toContain("option '--deep' cannot be used with option '--fast'"); + }); +``` + +In the existing ingest help test, change the expected help assertions to: + +```ts + expect(testIo.stdout()).toContain('Usage: ktx ingest [options] [connectionId]'); + expect(testIo.stdout()).toContain('Build or inspect KTX context'); + expect(testIo.stdout()).toContain('--all'); + expect(testIo.stdout()).toContain('--fast'); + expect(testIo.stdout()).toContain('--deep'); + expect(testIo.stdout()).toContain('--query-history'); + expect(testIo.stdout()).toContain('--no-query-history'); + expect(testIo.stdout()).toContain('--query-history-window-days '); + expect(testIo.stdout()).toContain('status'); + expect(testIo.stdout()).toContain('replay'); + expect(testIo.stdout()).not.toContain('run'); + expect(testIo.stdout()).not.toContain('watch'); +``` + +In `packages/cli/src/dev.test.ts`, update the generated nested help case for +`['ingest', 'run', '--help']` so it no longer treats legacy run help as a +normal generated public help case. Add this direct hidden-command regression +test instead: + +```ts + it('keeps legacy adapter-backed ingest run callable but hidden from ingest help', async () => { + const helpIo = makeIo(); + const runIo = makeIo(); + const ingest = vi.fn(async () => 0); + + await expect(runKtxCli(['ingest', '--help'], helpIo.io, { ingest })).resolves.toBe(0); + await expect( + runKtxCli( + ['ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'metabase', '--project-dir', '/tmp/project'], + runIo.io, + { ingest }, + ), + ).resolves.toBe(0); + + expect(helpIo.stdout()).not.toContain('run'); + expect(ingest).toHaveBeenCalledWith( + expect.objectContaining({ command: 'run', connectionId: 'warehouse', adapter: 'metabase' }), + runIo.io, + ); + }); +``` + +- [ ] **Step 2: Run the failing Commander tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/index.test.ts src/dev.test.ts -t "public connection-centric ingest|public ingest --all|mutually exclusive public ingest|legacy adapter-backed ingest run|prints ingest help" +``` + +Expected: FAIL because `KtxCliDeps` has no `publicIngest`, `ktx ingest +warehouse` is unknown, and `run`/`watch` are still visible in help. + +- [ ] **Step 3: Add the injectable public ingest dependency** + +In `packages/cli/src/cli-runtime.ts`, add this import near the existing CLI +argument type imports: + +```ts +import type { KtxPublicIngestArgs } from './public-ingest.js'; +``` + +In `KtxCliDeps`, add: + +```ts + publicIngest?: (args: KtxPublicIngestArgs, io: KtxCliIo) => Promise; +``` + +- [ ] **Step 4: Register parent `ktx ingest` options and hidden legacy commands** + +In `packages/cli/src/commands/ingest-commands.ts`, add: + +```ts +import type { KtxPublicIngestArgs } from '../public-ingest.js'; +import { parsePositiveIntegerOption } from '../cli-program.js'; +``` + +Replace the current `const ingest = program.command('ingest')...` block with: + +```ts + const ingest = program + .command('ingest') + .description('Build or inspect KTX context') + .argument('[connectionId]', 'Configured connection id to ingest') + .option('--all', 'Ingest all configured connections', false) + .addOption(new Option('--fast', 'Use deterministic database schema ingest').conflicts('deep')) + .addOption(new Option('--deep', 'Use AI-enriched database ingest').conflicts('fast')) + .addOption(new Option('--query-history', 'Include database query-history usage patterns').conflicts('noQueryHistory')) + .addOption(new Option('--no-query-history', 'Skip database query-history usage patterns')) + .option('--query-history-window-days ', 'Query-history lookback window for this run', parsePositiveIntegerOption) + .addOption(new Option('--plain', 'Print plain text output').conflicts(['json'])) + .addOption(new Option('--json', 'Print JSON output').conflicts(['plain'])) + .option('--no-input', 'Disable interactive terminal input') + .showHelpAfterError(); + + ingest.action(async (connectionId: string | undefined, options, command) => { + const { runKtxPublicIngest } = await import('../public-ingest.js'); + const queryHistory = + options.queryHistory === true ? 'enabled' : options.queryHistory === false ? 'disabled' : 'default'; + const args: KtxPublicIngestArgs = { + command: 'run', + projectDir: resolveCommandProjectDir(command), + ...(connectionId ? { targetConnectionId: connectionId } : {}), + all: options.all === true, + json: options.json === true, + inputMode: options.input === false ? 'disabled' : 'auto', + ...(options.fast === true ? { depth: 'fast' as const } : {}), + ...(options.deep === true ? { depth: 'deep' as const } : {}), + queryHistory, + ...(options.queryHistoryWindowDays !== undefined + ? { queryHistoryWindowDays: options.queryHistoryWindowDays } + : {}), + }; + context.setExitCode(await (context.deps.publicIngest ?? runKtxPublicIngest)(args, context.io)); + }); +``` + +Then hide the legacy `run` and `watch` subcommands by changing: + +```ts + .command('run') +``` + +to: + +```ts + .command('run', { hidden: true }) +``` + +and changing: + +```ts + .command('watch') +``` + +to: + +```ts + .command('watch', { hidden: true }) +``` + +- [ ] **Step 5: Run Commander tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/index.test.ts src/dev.test.ts +``` + +Expected: PASS after updating any remaining help text expectations that still +assume public `run` or `watch`. + +- [ ] **Step 6: Commit public route wiring** + +Run: + +```bash +git add packages/cli/src/cli-runtime.ts packages/cli/src/commands/ingest-commands.ts packages/cli/src/index.test.ts packages/cli/src/dev.test.ts +git commit -m "feat(cli): route public connection ingest command" +``` + +### Task 2: Hide top-level `scan` from normal help + +**Files:** +- Modify: `packages/cli/src/commands/scan-commands.ts` +- Modify: `packages/cli/src/index.test.ts` +- Modify: `packages/cli/src/dev.test.ts` +- Modify: `packages/cli/src/cli-program.ts` + +- [ ] **Step 1: Update public help tests** + +In `packages/cli/src/index.test.ts`, in the test `prints the public command +surface in root help`, change the visible command list: + +```ts + for (const command of ['setup', 'connection', 'ingest', 'wiki', 'sl', 'status']) { + expect(testIo.stdout()).toContain(`${command}`); + } + expect(testIo.stdout()).not.toMatch(/^ scan\s/m); +``` + +In `packages/cli/src/dev.test.ts`, keep the direct `['scan', '--help']` case +so the hidden debug command is still callable. + +- [ ] **Step 2: Run the failing scan help tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/index.test.ts src/dev.test.ts -t "public command surface|generated nested help" +``` + +Expected: FAIL because root help still prints `scan`. + +- [ ] **Step 3: Hide the scan command** + +In `packages/cli/src/commands/scan-commands.ts`, change: + +```ts + program + .command('scan') +``` + +to: + +```ts + program + .command('scan', { hidden: true }) +``` + +In `packages/cli/src/cli-program.ts`, leave `scan` in +`PROJECT_AWARE_ROOT_COMMANDS` so hidden direct invocations still receive +project-dir behavior: + +```ts +const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'scan']); +``` + +- [ ] **Step 4: Run scan help tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/index.test.ts src/dev.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit scan help hiding** + +Run: + +```bash +git add packages/cli/src/commands/scan-commands.ts packages/cli/src/index.test.ts packages/cli/src/dev.test.ts packages/cli/src/cli-program.ts +git commit -m "feat(cli): hide standalone scan from public help" +``` + +### Task 3: Resolve public ingest depth, warnings, and query-history intent + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Modify: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Write failing public ingest planner tests** + +In `packages/cli/src/public-ingest.test.ts`, add these tests inside +`describe('buildPublicIngestPlan', ...)`: + +```ts + it('resolves database depth from flags, stored context, and defaults', () => { + const project = projectWithConnections({ + fast_default: { driver: 'postgres' }, + deep_default: { driver: 'postgres', context: { depth: 'deep' } }, + docs: { driver: 'notion' }, + }); + + expect( + buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'fast_default', + all: false, + queryHistory: 'default', + }).targets[0], + ).toMatchObject({ connectionId: 'fast_default', databaseDepth: 'fast', queryHistory: { enabled: false } }); + + expect( + buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'deep_default', + all: false, + queryHistory: 'default', + }).targets[0], + ).toMatchObject({ connectionId: 'deep_default', databaseDepth: 'deep' }); + + expect( + buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'docs', + all: false, + depth: 'deep', + queryHistory: 'default', + }).warnings, + ).toEqual(['--deep affects database ingest only; ignoring it for docs.']); + }); + + it('upgrades effective depth when query history is explicitly enabled', () => { + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: false } } }, + }); + + const plan = buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + depth: 'fast', + queryHistory: 'enabled', + queryHistoryWindowDays: 30, + }); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'warehouse', + databaseDepth: 'deep', + queryHistory: { enabled: true, windowDays: 30, dialect: 'postgres' }, + }); + expect(plan.warnings).toEqual(['--query-history requires deep ingest; running warehouse with --deep.']); + }); + + it('warns and skips query history for unsupported database drivers', () => { + const project = projectWithConnections({ local: { driver: 'sqlite' } }); + + const plan = buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'local', + all: false, + queryHistory: 'enabled', + }); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'local', + databaseDepth: 'fast', + queryHistory: { enabled: false, unsupported: true }, + }); + expect(plan.warnings).toEqual(['--query-history is not supported for sqlite; running schema ingest for local.']); + }); +``` + +- [ ] **Step 2: Run the failing public ingest planner tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "resolves database depth|upgrades effective depth|unsupported database drivers" +``` + +Expected: FAIL because `depth`, `queryHistory`, `databaseDepth`, and plan +warnings do not exist. + +- [ ] **Step 3: Extend public ingest types** + +In `packages/cli/src/public-ingest.ts`, replace the public step and args types +near the top with: + +```ts +type KtxPublicIngestStepName = 'database-schema' | 'query-history' | 'source-ingest' | 'memory-update'; +type KtxPublicIngestStepStatus = 'done' | 'skipped' | 'failed' | 'not-run'; +type KtxPublicIngestInputMode = 'auto' | 'disabled'; +type KtxPublicIngestDepth = 'fast' | 'deep'; +type KtxPublicIngestQueryHistoryFlag = 'default' | 'enabled' | 'disabled'; +type HistoricSqlDialect = 'postgres' | 'bigquery' | 'snowflake'; +``` + +In the `command: 'run'` variant of `KtxPublicIngestArgs`, add: + +```ts + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; +``` + +Replace `KtxPublicIngestPlanTarget` with: + +```ts +export interface KtxPublicIngestPlanTarget { + connectionId: string; + driver: string; + operation: 'database-ingest' | 'source-ingest'; + adapter?: string; + sourceDir?: string; + debugCommand: string; + steps: KtxPublicIngestStepName[]; + databaseDepth?: KtxPublicIngestDepth; + queryHistory?: { + enabled: boolean; + dialect?: HistoricSqlDialect; + windowDays?: number; + unsupported?: boolean; + skippedStoredByFast?: boolean; + }; +} +``` + +Add warnings to `KtxPublicIngestPlan`: + +```ts +export interface KtxPublicIngestPlan { + projectDir: string; + targets: KtxPublicIngestPlanTarget[]; + warnings: string[]; +} +``` + +- [ ] **Step 4: Add depth and query-history resolver helpers** + +Add these helpers after `warehouseDrivers`: + +```ts +const queryHistoryDialectByDriver = new Map([ + ['postgres', 'postgres'], + ['postgresql', 'postgres'], + ['bigquery', 'bigquery'], + ['snowflake', 'snowflake'], +]); + +function connectionContext(connection: KtxProjectConnectionConfig): Record { + const value = connection.context; + return typeof value === 'object' && value !== null && !Array.isArray(value) ? (value as Record) : {}; +} + +function storedDepth(connection: KtxProjectConnectionConfig): KtxPublicIngestDepth | undefined { + const value = connectionContext(connection).depth; + return value === 'fast' || value === 'deep' ? value : undefined; +} + +function storedQueryHistory(connection: KtxProjectConnectionConfig): Record { + const value = connectionContext(connection).queryHistory; + return typeof value === 'object' && value !== null && !Array.isArray(value) ? (value as Record) : {}; +} + +function positiveInteger(value: unknown): number | undefined { + return typeof value === 'number' && Number.isInteger(value) && value > 0 ? value : undefined; +} +``` + +Add: + +```ts +function resolveDatabaseTargetOptions(input: { + connectionId: string; + driver: string; + connection: KtxProjectConnectionConfig; + args: { + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + }; + warnings: string[]; +}): Pick { + const storedQh = storedQueryHistory(input.connection); + const dialect = queryHistoryDialectByDriver.get(input.driver); + const explicitQueryHistory = input.args.queryHistory ?? 'default'; + const storedEnabled = storedQh.enabled === true; + const requestedQh = explicitQueryHistory === 'enabled' || (explicitQueryHistory === 'default' && storedEnabled); + let depth = input.args.depth ?? storedDepth(input.connection) ?? 'fast'; + const queryHistory = { + enabled: false, + ...(input.args.queryHistoryWindowDays !== undefined + ? { windowDays: input.args.queryHistoryWindowDays } + : positiveInteger(storedQh.windowDays) !== undefined + ? { windowDays: positiveInteger(storedQh.windowDays) } + : {}), + }; + + if (requestedQh && !dialect) { + input.warnings.push( + explicitQueryHistory === 'enabled' || input.args.queryHistoryWindowDays !== undefined + ? `--query-history is not supported for ${input.driver}; running schema ingest for ${input.connectionId}.` + : `${input.connectionId} has query history enabled in ktx.yaml, but ${input.driver} does not support it; running schema ingest.`, + ); + return { + databaseDepth: depth, + queryHistory: { ...queryHistory, unsupported: true }, + steps: ['database-schema'], + }; + } + + if (requestedQh && dialect) { + if (depth === 'fast') { + input.warnings.push(`--query-history requires deep ingest; running ${input.connectionId} with --deep.`); + } + depth = 'deep'; + return { + databaseDepth: depth, + queryHistory: { ...queryHistory, enabled: true, dialect }, + steps: ['database-schema', 'query-history'], + }; + } + + if (input.args.depth === 'fast' && explicitQueryHistory !== 'enabled' && storedEnabled) { + input.warnings.push( + `${input.connectionId} has query history enabled in ktx.yaml, but --fast skips query-history processing.`, + ); + return { + databaseDepth: 'fast', + queryHistory: { ...queryHistory, skippedStoredByFast: true }, + steps: ['database-schema'], + }; + } + + return { + databaseDepth: depth, + queryHistory, + steps: ['database-schema'], + }; +} +``` + +- [ ] **Step 5: Use the resolver in plan construction** + +Change `targetForConnection` to accept args and warnings: + +```ts +function targetForConnection( + connectionId: string, + connection: KtxProjectConnectionConfig, + args: { + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + }, + warnings: string[], +): KtxPublicIngestPlanTarget { +``` + +In the source-adapter branch, before returning, add: + +```ts + if (args.depth) { + warnings.push(`--${args.depth} affects database ingest only; ignoring it for ${connectionId}.`); + } + if (args.queryHistory === 'enabled' || args.queryHistoryWindowDays !== undefined) { + warnings.push(`--query-history affects database ingest only; ignoring it for ${connectionId}.`); + } +``` + +Change the source debug command to: + +```ts + debugCommand: `ktx ingest ${connectionId} --debug`, +``` + +In the warehouse branch, replace the return object with: + +```ts + const options = resolveDatabaseTargetOptions({ connectionId, driver, connection, args, warnings }); + return { + connectionId, + driver, + operation: 'database-ingest', + debugCommand: `ktx ingest ${connectionId} --debug`, + ...options, + }; +``` + +In `buildPublicIngestPlan`, add warnings and return them: + +```ts + const warnings: string[] = []; + const targets = selected.map(([connectionId, connection]) => targetForConnection(connectionId, connection, args, warnings)); + return { + projectDir: args.projectDir, + targets: [ + ...targets.filter((t) => t.operation === 'database-ingest'), + ...targets.filter((t) => t.operation === 'source-ingest'), + ], + warnings, + }; +``` + +- [ ] **Step 6: Run planner tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "buildPublicIngestPlan" +``` + +Expected: PASS after updating older expected target snapshots from +`operation: 'scan'` to `operation: 'database-ingest'`, from `steps: ['scan']` +to `steps: ['database-schema']`, and adding `warnings: []`. + +- [ ] **Step 7: Commit public ingest planning** + +Run: + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "feat(cli): plan public ingest depth and query history" +``` + +### Task 4: Execute database depth and query-history facets + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Modify: `packages/cli/src/public-ingest.test.ts` +- Modify: `packages/cli/src/ingest.ts` + +- [ ] **Step 1: Write failing execution tests** + +In `packages/cli/src/public-ingest.test.ts`, add: + +```ts + it('maps fast and deep database targets to scan internals', async () => { + const io = makeIo(); + const project = projectWithConnections({ + fast: { driver: 'postgres' }, + deep: { driver: 'postgres', context: { depth: 'deep' } }, + }); + const runScan = vi.fn(async () => 0); + + await expect( + runKtxPublicIngest( + { command: 'run', projectDir: '/tmp/project', all: true, json: false, inputMode: 'disabled', queryHistory: 'default' }, + io.io, + { loadProject: vi.fn(async () => project), runScan }, + ), + ).resolves.toBe(0); + + expect(runScan).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ connectionId: 'deep', mode: 'enriched', detectRelationships: true }), + expect.anything(), + ); + expect(runScan).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ connectionId: 'fast', mode: 'structural', detectRelationships: false }), + expect.anything(), + ); + }); + + it('runs query history after schema ingest with current-run window override', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: true, windowDays: 90 } } }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn(async () => 0); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + queryHistory: 'enabled', + queryHistoryWindowDays: 30, + }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(0); + + expect(runScan).toHaveBeenCalledWith( + expect.objectContaining({ connectionId: 'warehouse', mode: 'enriched' }), + expect.anything(), + ); + expect(runIngest).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + connectionId: 'warehouse', + adapter: 'historic-sql', + allowImplicitAdapter: true, + historicSqlPullConfigOverride: expect.objectContaining({ dialect: 'postgres', windowDays: 30 }), + }), + expect.anything(), + ); + }); +``` + +- [ ] **Step 2: Run the failing execution tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "maps fast and deep|runs query history" +``` + +Expected: FAIL because execution still uses `scanMode`, no query-history step +exists, and `KtxIngestArgs` has no implicit-adapter fields. + +- [ ] **Step 3: Add implicit adapter and query-history override fields** + +In `packages/cli/src/ingest.ts`, extend the `command: 'run'` args type: + +```ts + allowImplicitAdapter?: boolean; + historicSqlPullConfigOverride?: Record; +``` + +In the `adapterOptions` object inside `runKtxIngest`, add: + +```ts + ...(args.historicSqlPullConfigOverride + ? { historicSqlPullConfigOverride: args.historicSqlPullConfigOverride } + : {}), +``` + +Before calling `executeLocalIngest`, create the project used for local ingest: + +```ts + const ingestProject = + args.allowImplicitAdapter && !project.config.ingest.adapters.includes(args.adapter) + ? { + ...project, + config: { + ...project.config, + ingest: { + ...project.config.ingest, + adapters: [...project.config.ingest.adapters, args.adapter], + }, + }, + } + : project; +``` + +Then pass `ingestProject` instead of `project` to `runLocalMetabaseIngest`, +`createAdapters`, `createQueryExecutor`, and `executeLocalIngest` in the +`command: 'run'` branch. + +Keep `packages/context/src/ingest/local-ingest.ts` unchanged. The public path +satisfies its strict `assertConfigured()` contract by passing an in-memory +project config whose adapter list includes the inferred adapter for this run. + +- [ ] **Step 4: Execute database targets from effective depth** + +In `packages/cli/src/public-ingest.ts`, update the database branch of +`executePublicIngestTarget`: + +```ts + if (target.operation === 'database-ingest') { + const { runKtxScan } = await import('./scan.js'); + const scanArgs: KtxScanArgs = { + command: 'run', + projectDir: args.projectDir, + connectionId: target.connectionId, + mode: target.databaseDepth === 'deep' ? 'enriched' : 'structural', + detectRelationships: target.databaseDepth === 'deep' ? true : false, + dryRun: false, + }; + const runScan = deps.runScan ?? runKtxScan; + const scanExitCode = deps.scanProgress + ? await runScan(scanArgs, io, { progress: deps.scanProgress }) + : await runScan(scanArgs, io); + if (scanExitCode !== 0) { + return markTargetResult(target, 'failed', 'database-schema'); + } + + if (target.queryHistory?.enabled === true) { + const { runKtxIngest } = await import('./ingest.js'); + const runIngest = deps.runIngest ?? runKtxIngest; + const ingestArgs: KtxIngestArgs = { + command: 'run', + projectDir: args.projectDir, + connectionId: target.connectionId, + adapter: 'historic-sql', + outputMode: sourceIngestOutputMode(args, io), + inputMode: args.inputMode, + allowImplicitAdapter: true, + historicSqlPullConfigOverride: { + dialect: target.queryHistory.dialect, + ...(target.queryHistory.windowDays !== undefined ? { windowDays: target.queryHistory.windowDays } : {}), + }, + }; + const qhExitCode = await runIngest(ingestArgs, io); + if (qhExitCode !== 0) { + return markTargetResult(target, 'failed', 'query-history'); + } + } + + return markTargetResult(target, 'done'); + } +``` + +Update `markTargetResult` to accept the failed operation: + +```ts +function markTargetResult( + target: KtxPublicIngestPlanTarget, + status: 'done' | 'failed', + failedOperation?: KtxPublicIngestStepName, +): KtxPublicIngestTargetResult { +``` + +Inside the function, replace the failed-operation selection with: + +```ts + const selectedFailedOperation = + failedOperation ?? (target.operation === 'database-ingest' ? 'database-schema' : 'source-ingest'); +``` + +Then use `selectedFailedOperation` in the failed-step comparison and detail. + +- [ ] **Step 5: Print plan warnings before results** + +In `runKtxPublicIngest`, after building `plan` and before executing targets, +add: + +```ts + if (!args.json && plan.warnings.length > 0) { + for (const warning of plan.warnings) { + io.stderr.write(`Warning: ${warning}\n`); + } + } +``` + +For JSON output, the existing `{ plan, results }` payload now includes +`plan.warnings`. + +- [ ] **Step 6: Run execution tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/ingest.test.ts +``` + +Expected: PASS after updating public result table labels from `Scan` to +`Database` or `Schema` in existing assertions. + +- [ ] **Step 7: Commit public execution behavior** + +Run: + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/ingest.ts +git commit -m "feat(cli): execute public database ingest facets" +``` + +### Task 5: Accept `context.queryHistory` in historic-SQL adapter plumbing + +**Files:** +- Modify: `packages/cli/src/local-adapters.ts` +- Modify: `packages/cli/src/local-adapters.test.ts` +- Modify: `packages/context/src/ingest/local-adapters.ts` +- Modify: `packages/context/src/ingest/local-adapters.test.ts` + +- [ ] **Step 1: Write failing query-history config tests** + +In `packages/context/src/ingest/local-adapters.test.ts`, add: + +```ts + it('maps connection context.queryHistory to historic-sql pull config', async () => { + const project = projectWithConnections({ + warehouse: { + driver: 'postgres', + context: { + queryHistory: { + enabled: true, + windowDays: 45, + minExecutions: 7, + filters: { dropTrivialProbes: true }, + }, + }, + }, + }); + const adapter = { source: 'historic-sql' } as never; + + await expect(localPullConfigForAdapter(project, adapter, 'warehouse')).resolves.toMatchObject({ + dialect: 'postgres', + windowDays: 45, + minExecutions: 7, + filters: { dropTrivialProbes: true }, + }); + }); + + it('prefers context.queryHistory over legacy historicSql', async () => { + const project = projectWithConnections({ + warehouse: { + driver: 'postgres', + historicSql: { enabled: true, dialect: 'postgres', windowDays: 90 }, + context: { queryHistory: { enabled: true, windowDays: 30 } }, + }, + }); + const adapter = { source: 'historic-sql' } as never; + + await expect(localPullConfigForAdapter(project, adapter, 'warehouse')).resolves.toMatchObject({ + dialect: 'postgres', + windowDays: 30, + }); + }); +``` + +In `packages/cli/src/local-adapters.test.ts`, add a test that creates a +Postgres connection with `context.queryHistory.enabled: true`, calls +`createKtxCliLocalIngestAdapters(project, { historicSqlConnectionId: +'warehouse' })`, and expects one adapter with `source === 'historic-sql'`. + +- [ ] **Step 2: Run the failing adapter tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/local-adapters.test.ts +pnpm --filter @ktx/cli exec vitest run src/local-adapters.test.ts +``` + +Expected: FAIL because both layers only look at `connection.historicSql`. + +- [ ] **Step 3: Add context-query-history mapping in context local adapters** + +In `packages/context/src/ingest/local-adapters.ts`, add: + +```ts +const historicSqlDialectByDriver = new Map([ + ['postgres', 'postgres'], + ['postgresql', 'postgres'], + ['bigquery', 'bigquery'], + ['snowflake', 'snowflake'], +]); + +function queryHistoryRecord(connection: unknown): Record | null { + if (!isRecord(connection)) return null; + const context = isRecord(connection.context) ? connection.context : null; + const queryHistory = isRecord(context?.queryHistory) ? context.queryHistory : null; + return queryHistory; +} + +function queryHistoryPullConfig(connection: unknown): Record | null { + const queryHistory = queryHistoryRecord(connection); + if (queryHistory?.enabled !== true || !isRecord(connection)) return null; + const dialect = historicSqlDialectByDriver.get(String(connection.driver ?? '').toLowerCase()); + if (!dialect) return null; + return { ...queryHistory, dialect }; +} +``` + +In `localPullConfigForAdapter`, replace the historic-SQL block with: + +```ts + if (adapter.source === HISTORIC_SQL_SOURCE_KEY) { + const queryHistory = queryHistoryPullConfig(connection); + if (queryHistory) { + return historicSqlUnifiedPullConfigSchema.parse(queryHistory); + } + const historicSql = isRecord(connection?.historicSql) ? connection.historicSql : null; + if (historicSql?.enabled !== true) { + throw new Error(`Connection "${connectionId}" does not have context.queryHistory.enabled: true`); + } + return historicSqlUnifiedPullConfigSchema.parse({ + ...historicSql, + }); + } +``` + +- [ ] **Step 4: Add context-query-history detection in CLI local adapters** + +In `packages/cli/src/local-adapters.ts`, replace `enabledHistoricSqlDialect` +with: + +```ts +function enabledHistoricSqlDialect(connection: unknown): 'postgres' | 'bigquery' | 'snowflake' | null { + const direct = historicSqlRecord(connection); + const context = + connection && typeof connection === 'object' && !Array.isArray(connection) + ? (connection as { context?: unknown }).context + : null; + const queryHistory = + context && typeof context === 'object' && !Array.isArray(context) + ? (context as { queryHistory?: unknown }).queryHistory + : null; + const enabled = + queryHistory && typeof queryHistory === 'object' && !Array.isArray(queryHistory) + ? (queryHistory as { enabled?: unknown }).enabled === true + : direct?.enabled === true; + if (!enabled) { + return null; + } + const driver = String((connection as { driver?: unknown })?.driver ?? '').toLowerCase(); + if (driver === 'postgres' || driver === 'postgresql') return 'postgres'; + if (driver === 'bigquery') return 'bigquery'; + if (driver === 'snowflake') return 'snowflake'; + const legacyDialect = String(direct?.dialect ?? '').toLowerCase(); + return legacyDialect === 'postgres' || legacyDialect === 'bigquery' || legacyDialect === 'snowflake' + ? legacyDialect + : null; +} +``` + +- [ ] **Step 5: Run adapter tests again** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/ingest/local-adapters.test.ts +pnpm --filter @ktx/cli exec vitest run src/local-adapters.test.ts +``` + +Expected: PASS. + +- [ ] **Step 6: Commit query-history adapter config** + +Run: + +```bash +git add packages/context/src/ingest/local-adapters.ts packages/context/src/ingest/local-adapters.test.ts packages/cli/src/local-adapters.ts packages/cli/src/local-adapters.test.ts +git commit -m "feat(ingest): read connection query history config" +``` + +### Task 6: Remove normal `live-database`, adapter, and scan wording from public output + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Modify: `packages/cli/src/context-build-view.ts` +- Modify: `packages/cli/src/context-build-view.test.ts` +- Modify: `packages/cli/src/setup-sources.ts` +- Modify: `packages/cli/src/setup-sources.test.ts` + +- [ ] **Step 1: Write failing wording tests** + +In `packages/cli/src/context-build-view.test.ts`, change the group label +assertions from `Primary sources:` to `Databases:` and update the running +database detail test to expect `reading schema` instead of `scanning...`. + +Add this setup recovery assertion in the test covering failed initial source +ingest in `packages/cli/src/setup-sources.test.ts`: + +```ts + expect(io.stdout()).toContain(`Run later: ktx ingest ${connectionId}`); + expect(io.stdout()).not.toContain('ktx ingest run --connection-id'); + expect(io.stdout()).not.toContain('--adapter'); +``` + +- [ ] **Step 2: Run failing wording tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts src/setup-sources.test.ts -t "Databases|reading schema|Run later" +``` + +Expected: FAIL because labels still say `Primary sources`, running database +detail says `scanning...`, and setup recovery still suggests adapter-backed +ingest. + +- [ ] **Step 3: Update public render labels** + +In `packages/cli/src/context-build-view.ts`, change: + +```ts + ...renderTargetGroup('Primary sources', state.primarySources, state.frame, styled, width), +``` + +to: + +```ts + ...renderTargetGroup('Databases', state.primarySources, state.frame, styled, width), +``` + +In `targetDetail`, change: + +```ts + ?? (target.target.operation === 'scan' ? 'scanning...' : 'ingesting...'); +``` + +to: + +```ts + ?? (target.target.operation === 'database-ingest' ? 'reading schema' : 'ingesting...'); +``` + +Update type comparisons in this file from `'scan'` to `'database-ingest'` for +public target operation checks. + +- [ ] **Step 4: Update setup source recovery text** + +In `packages/cli/src/setup-sources.ts`, replace: + +```ts + input.io.stdout.write(`│ Run later: ktx ingest run --connection-id ${input.connectionId} --adapter \n`); +``` + +with: + +```ts + input.io.stdout.write(`│ Run later: ktx ingest ${input.connectionId}\n`); +``` + +- [ ] **Step 5: Run wording tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts src/setup-sources.test.ts +``` + +Expected: PASS after updating existing snapshots for the new public operation +name. + +- [ ] **Step 6: Commit public wording cleanup** + +Run: + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/setup-sources.ts packages/cli/src/setup-sources.test.ts +git commit -m "fix(cli): use public ingest wording" +``` + +### Task 7: Stop generating adapter allow-list entries in normal config + +**Files:** +- Modify: `packages/context/src/project/config.ts` +- Modify: `packages/context/src/project/config.test.ts` +- Modify: `packages/cli/src/setup-sources.ts` +- Modify: `packages/cli/src/setup-sources.test.ts` +- Modify: `packages/cli/src/setup-databases.ts` +- Modify: `packages/cli/src/setup-databases.test.ts` + +- [ ] **Step 1: Write failing config tests** + +In `packages/context/src/project/config.test.ts`, update default assertions: + +```ts + ingest: { + adapters: [], +``` + +and: + +```ts + expect(serialized).not.toContain('live-database'); + expect(parsed.ingest.adapters).toEqual([]); +``` + +In setup database and source tests, add assertions after generated config is +read: + +```ts + expect(configText).not.toContain('live-database'); + expect(configText).not.toContain('historic-sql'); + expect(configText).not.toMatch(/^\s+adapters:/m); +``` + +- [ ] **Step 2: Run failing config tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/project/config.test.ts +pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts src/setup-sources.test.ts +``` + +Expected: FAIL because defaults and setup still write adapter entries. + +- [ ] **Step 3: Change default config** + +In `packages/context/src/project/config.ts`, change: + +```ts + adapters: ['live-database', 'lookml', 'metabase', 'metricflow', 'notion'], +``` + +to: + +```ts + adapters: [], +``` + +- [ ] **Step 4: Stop setup from appending normal source adapters** + +In `packages/cli/src/setup-sources.ts`, change `writeSourceConnection` so the +new config only writes `connections`: + +```ts + const nextConfig = { + ...project.config, + connections: { + ...project.config.connections, + [connectionId]: connection, + }, + }; +``` + +Remove the `adapters` mutation in that helper and remove adapter rollback code +that only exists to undo automatic adapter appends. + +- [ ] **Step 5: Stop Historic SQL setup from appending adapters** + +In `packages/cli/src/setup-databases.ts`, change `ensureHistoricSqlIngestDefaults` +so it only raises `ingest.workUnits.maxConcurrency`: + +```ts +async function ensureHistoricSqlIngestDefaults(projectDir: string): Promise { + const project = await loadKtxProject({ projectDir }); + const maxConcurrency = Math.max( + project.config.ingest.workUnits.maxConcurrency, + HISTORIC_SQL_WORK_UNIT_MAX_CONCURRENCY, + ); + if (maxConcurrency === project.config.ingest.workUnits.maxConcurrency) { + return; + } + await writeFile( + project.configPath, + serializeKtxProjectConfig({ + ...project.config, + ingest: { + ...project.config.ingest, + workUnits: { + ...project.config.ingest.workUnits, + maxConcurrency, + }, + }, + }), + 'utf-8', + ); +} +``` + +- [ ] **Step 6: Run config tests again** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/project/config.test.ts +pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts src/setup-sources.test.ts src/public-ingest.test.ts +``` + +Expected: PASS. Public source ingest still works because Task 4 synthesizes the +inferred adapter for public connection-centric runs. + +- [ ] **Step 7: Commit config cleanup** + +Run: + +```bash +git add packages/context/src/project/config.ts packages/context/src/project/config.test.ts packages/cli/src/setup-sources.ts packages/cli/src/setup-sources.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts +git commit -m "fix(config): stop generating ingest adapter allow lists" +``` + +### Task 8: Update public docs and script assertions + +**Files:** +- Modify: `README.md` +- Modify: `scripts/examples-docs.test.mjs` +- Modify: `scripts/package-artifacts.mjs` +- Modify: `scripts/package-artifacts.test.mjs` +- Modify: `scripts/installed-live-database-smoke.mjs` +- Modify: `scripts/installed-live-database-smoke.test.mjs` + +- [ ] **Step 1: Write failing docs assertion changes** + +In `scripts/examples-docs.test.mjs`, replace assertions that require +`ktx scan `, `ktx scan [options]`, and +`live-database/` in normal README output with assertions for: + +```js +assert.match(buildingContext, /ktx ingest /); +assert.match(buildingContext, /ktx ingest --all/); +assert.doesNotMatch(rootReadme, /live-database\//); +assert.doesNotMatch(rootReadme, /ktx scan/); +``` + +In package artifact smoke tests, change normal public smoke labels from +`ktx scan structural` and `ktx scan enriched` to `ktx ingest fast` and +`ktx ingest deep`. + +- [ ] **Step 2: Run failing docs/script tests** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs scripts/package-artifacts.test.mjs scripts/installed-live-database-smoke.test.mjs +``` + +Expected: FAIL because docs and smoke scripts still mention `scan` and +`live-database`. + +- [ ] **Step 3: Update README public examples** + +In `README.md`, replace normal context-build examples: + +```md +ktx scan warehouse --project-dir "$PROJECT_DIR" +``` + +with: + +```md +ktx ingest warehouse --project-dir "$PROJECT_DIR" --fast +``` + +Replace enriched examples with: + +```md +ktx ingest warehouse --project-dir "$PROJECT_DIR" --deep +``` + +Replace adapter-backed ingest examples for normal users with: + +```md +ktx ingest notion --project-dir "$PROJECT_DIR" +``` + +Keep internal artifact paths only in sections explicitly labeled as debug or +implementation details. + +- [ ] **Step 4: Update smoke scripts to use public ingest** + +In `scripts/package-artifacts.mjs`, replace public scan smoke invocations with: + +```js +const structuralScan = await run('pnpm', [ + 'exec', + 'ktx', + 'ingest', + 'warehouse', + '--project-dir', + projectDir, + '--fast', + '--no-input', +]); +``` + +and: + +```js +const enrichedScan = await run('pnpm', [ + 'exec', + 'ktx', + 'ingest', + 'warehouse', + '--project-dir', + projectDir, + '--deep', + '--no-input', +]); +``` + +Update expected output matches from `Mode: structural` and `Mode: enriched` to +the public result summary that `runKtxPublicIngest` prints, for example +`Database schema` or `database-schema done` depending on the final Task 4 +rendering. + +In `scripts/installed-live-database-smoke.mjs`, keep the file name if renaming +would churn scripts, but change the public CLI invocation from adapter-backed +`ktx ingest run --adapter live-database` to: + +```js +return ['exec', 'ktx', 'ingest', connectionId, '--project-dir', projectDir, '--fast', '--no-input']; +``` + +- [ ] **Step 5: Run docs/script tests again** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs scripts/package-artifacts.test.mjs scripts/installed-live-database-smoke.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 6: Commit docs and smoke cleanup** + +Run: + +```bash +git add README.md scripts/examples-docs.test.mjs scripts/package-artifacts.mjs scripts/package-artifacts.test.mjs scripts/installed-live-database-smoke.mjs scripts/installed-live-database-smoke.test.mjs +git commit -m "docs: document public ingest command" +``` + +### Task 9: Run final verification + +**Files:** +- Verify only. + +- [ ] **Step 1: Run focused CLI and context tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/index.test.ts src/dev.test.ts src/public-ingest.test.ts src/context-build-view.test.ts src/ingest.test.ts src/local-adapters.test.ts src/setup-sources.test.ts src/setup-databases.test.ts +pnpm --filter @ktx/context exec vitest run src/project/config.test.ts src/ingest/local-adapters.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run workspace type checks for touched packages** + +Run: + +```bash +pnpm --filter @ktx/cli run type-check +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run docs and script tests** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs scripts/package-artifacts.test.mjs scripts/installed-live-database-smoke.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 4: Run dead-code check** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS, or only pre-existing findings unrelated to the files changed +in this plan. + +- [ ] **Step 5: Commit any verification-only expectation fixes** + +If verification required expectation-only changes, run: + +```bash +git add packages/cli/src packages/context/src scripts README.md +git commit -m "test: align ingest surface expectations" +``` + +If there were no changes, do not create an empty commit. + +## Self-review notes + +Spec coverage in this plan: + +- Covers `ktx ingest ` and `ktx ingest --all`. +- Covers public `--fast` and `--deep` mapping to structural and enriched scan + internals. +- Covers hidden legacy `scan`, `ingest run`, and `ingest watch` help behavior. +- Covers adapter allow-list bypass for public connection-centric ingest. +- Covers current-run query-history enablement and window override. +- Covers normal generated config removing adapter allow lists. +- Covers normal help, docs, setup recovery text, and progress wording. + +Known v1-blocking work not included in this plan: + +- Setup must ask for and store `connections..context.depth`. +- Setup readiness must treat fast and deep contexts differently. +- Setup context state must remove detach, watch, resume, stop, paused, and + background subprocess behavior. +- Config rewrite must migrate legacy `connection.historicSql` into + `connection.context.queryHistory`. +- Config/setup validation must reject connection ids that collide with + surviving ingest subcommands. + +Placeholder scan: no task uses deferred code markers or unnamed edge handling. +Each implementation task names exact files, tests, commands, and the concrete +code shape to add. diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-v1-closure.md b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-closure.md new file mode 100644 index 00000000..f7e293b4 --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-closure.md @@ -0,0 +1,1865 @@ +# Unified Ingest V1 Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1-blocking gaps in the unified `ktx ingest` +redesign after the public CLI surface slice. + +**Architecture:** Keep the implemented connection-centric `ktx ingest` command +as the public entry point, and add the missing policy layer around it: depth +readiness, setup depth defaults, foreground-only context builds, legacy +query-history config migration, and reserved connection-id validation. Put +shared depth policy in a small CLI helper so public ingest and setup use the +same rules. + +**Tech Stack:** TypeScript ESM, Commander, Vitest, KTX CLI/context packages, +YAML config serialization. + +--- + +## Current audit + +The implemented `2026-05-13-unified-ingest-public-cli-surface.md` slice covers +the first public-surface layer: + +- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, + `--query-history`, `--no-query-history`, and + `--query-history-window-days` are routed in + `packages/cli/src/commands/ingest-commands.ts`. +- `ktx scan`, `ktx ingest run`, and `ktx ingest watch` are hidden from normal + help while still callable as debug or stored-report surfaces. +- `packages/cli/src/public-ingest.ts` plans database targets before source + targets, maps fast/deep to scan internals, runs query history after schema + ingest, and bypasses adapter allow lists for inferred public adapters. +- `packages/context/src/project/config.ts` no longer generates normal + `ingest.adapters` entries. +- README and smoke scripts now document public `ktx ingest` examples. + +Remaining v1-blocking gaps against the original spec: + +- Explicit or stored `deep` currently does not fail before work starts when + model, scan-enrichment, or scan-embedding config is missing. +- Deep database ingest always passes `detectRelationships: true` instead of + honoring `scan.relationships.enabled`. +- `ktx setup` does not ask for or store + `connections..context.depth`, and still forces enriched context builds. +- Setup readiness still requires enriched AI artifacts for every database + context and blocks all context builds when AI inputs are missing, even when + the effective depth is `fast`. +- Setup still writes legacy `connections..historicSql` instead of + canonical `connections..context.queryHistory`. +- Legacy `historicSql` migration is not performed by setup. +- Context build still supports detach, watch, resume, stop, paused/detached + state, and background subprocesses. +- Setup/config validation does not reject connection ids that collide with + surviving ingest subcommands: `status`, `replay`, `run`, and `watch`. + +Non-blocking gaps after this plan: + +- Internal package names, adapter names, raw-source artifact paths, and debug + output can still use `scan`, `live-database`, and `historic-sql`. +- The hidden debug `ktx scan` and hidden adapter-backed `ktx ingest run` + commands can remain until an explicit internal cleanup plan removes them. +- MCP scan tool names and low-level scan tests can continue to use scan + terminology because the original spec only requires normal CLI/help/output + cleanup for v1. + +## File structure + +- Create `packages/cli/src/ingest-depth.ts`: shared database driver detection, + depth defaults, deep-readiness checks, and context-depth config helpers. +- Modify `packages/cli/src/public-ingest.ts`: use shared depth policy, add + preflight failures, and pass relationship detection only when enabled. +- Modify `packages/cli/src/public-ingest.test.ts`: cover deep preflight, + per-target `--all` isolation, and relationship flag mapping. +- Modify `packages/cli/src/setup-databases.ts`: write + `context.queryHistory`, migrate legacy `historicSql`, and read the canonical + shape for query-history probe behavior. +- Modify `packages/cli/src/setup-databases.test.ts`: replace legacy + `historicSql` expectations with canonical `context.queryHistory` + expectations and migration coverage. +- Modify `packages/cli/src/setup-context.ts`: prompt/store context depth, + remove foreground detach/background logic, normalize legacy state, and make + readiness depth-aware. +- Modify `packages/cli/src/setup-context.test.ts`: cover fast readiness, deep + readiness, stored depth, foreground-only state, and removed watch/detach + affordances. +- Modify `packages/cli/src/context-build-view.ts`: remove detach hint and + background subprocess support. +- Modify `packages/cli/src/context-build-view.test.ts`: assert foreground-only + progress copy. +- Modify `packages/context/src/project/config.ts`: reject reserved connection + ids during config parse. +- Modify `packages/context/src/project/index.ts`: export reserved-id helpers + for setup flows. +- Modify `packages/context/src/project/config.test.ts`: cover reserved + connection ids. +- Modify `packages/cli/src/setup-sources.ts`, + `packages/cli/src/setup-sources.test.ts`, + `packages/cli/src/commands/setup-commands.ts`, and + `packages/cli/src/index.test.ts`: reject reserved ids during setup prompts + and setup flags. + +## Tasks + +### Task 1: Add depth policy and public deep preflight + +**Files:** +- Create: `packages/cli/src/ingest-depth.ts` +- Modify: `packages/cli/src/public-ingest.ts` +- Test: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Write failing public ingest preflight tests** + +In `packages/cli/src/public-ingest.test.ts`, add this helper after +`projectWithConnections`: + +```ts +function deepReadyProject(connections: KtxProjectConfig['connections'], relationshipsEnabled = true): KtxPublicIngestProject { + const config = buildDefaultKtxProjectConfig('warehouse'); + return { + projectDir: '/tmp/project', + config: { + ...config, + connections, + llm: { + ...config.llm, + provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret + models: { default: 'gpt-test' }, + }, + scan: { + ...config.scan, + enrichment: { + mode: 'llm', + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + }, + }, + relationships: { + ...config.scan.relationships, + enabled: relationshipsEnabled, + }, + }, + }, + }; +} +``` + +Add these tests inside the `buildPublicIngestPlan` describe block: + +```ts + it('records a preflight failure for deep database ingest when readiness config is missing', () => { + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }); + + const plan = buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + queryHistory: 'default', + }); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'warehouse', + databaseDepth: 'deep', + preflightFailure: + 'warehouse requires deep ingest readiness: model configuration, scan enrichment mode, scan embeddings. Run ktx setup or rerun with --fast.', + }); + }); + + it('honors scan.relationships.enabled when planning deep database ingest', () => { + const plan = buildPublicIngestPlan( + deepReadyProject({ warehouse: { driver: 'postgres', context: { depth: 'deep' } } }, false), + { + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + queryHistory: 'default', + }, + ); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'warehouse', + databaseDepth: 'deep', + detectRelationships: false, + }); + }); +``` + +Add this test inside the `runKtxPublicIngest` describe block: + +```ts + it('fails deep-readiness targets before work starts while continuing independent --all targets', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + docs: { driver: 'notion' }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn(async () => 0); + + await expect( + runKtxPublicIngest( + { command: 'run', projectDir: '/tmp/project', all: true, json: false, inputMode: 'disabled' }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(1); + + expect(runScan).not.toHaveBeenCalled(); + expect(runIngest).toHaveBeenCalledWith( + expect.objectContaining({ command: 'run', connectionId: 'docs', adapter: 'notion' }), + expect.anything(), + ); + expect(io.stdout()).toContain('warehouse requires deep ingest readiness'); + }); +``` + +- [ ] **Step 2: Run the failing public ingest tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "preflight failure|relationships.enabled|deep-readiness" +``` + +Expected: FAIL because `preflightFailure`, shared depth policy, and +relationship-aware deep planning do not exist. + +- [ ] **Step 3: Create shared depth policy** + +Create `packages/cli/src/ingest-depth.ts`: + +```ts +import type { KtxProjectConfig, KtxProjectConnectionConfig } from '@ktx/context/project'; + +export type KtxDatabaseContextDepth = 'fast' | 'deep'; + +export const KTX_DATABASE_DRIVER_IDS = new Set([ + 'sqlite', + 'postgres', + 'postgresql', + 'mysql', + 'clickhouse', + 'sqlserver', + 'bigquery', + 'snowflake', +]); + +export function normalizeConnectionDriver(connection: KtxProjectConnectionConfig): string { + return String(connection.driver ?? '').trim().toLowerCase(); +} + +export function isDatabaseDriver(driver: string): boolean { + return KTX_DATABASE_DRIVER_IDS.has(driver.trim().toLowerCase()); +} + +export function connectionContextRecord(connection: KtxProjectConnectionConfig): Record { + const context = connection.context; + return typeof context === 'object' && context !== null && !Array.isArray(context) + ? (context as Record) + : {}; +} + +export function databaseContextDepth(connection: KtxProjectConnectionConfig): KtxDatabaseContextDepth | undefined { + const depth = connectionContextRecord(connection).depth; + return depth === 'fast' || depth === 'deep' ? depth : undefined; +} + +export function withDatabaseContextDepth( + connection: KtxProjectConnectionConfig, + depth: KtxDatabaseContextDepth, +): KtxProjectConnectionConfig { + return { + ...connection, + context: { + ...connectionContextRecord(connection), + depth, + }, + }; +} + +export function deepReadinessGaps(config: KtxProjectConfig): string[] { + const gaps: string[] = []; + if (config.llm.provider.backend === 'none' || !config.llm.models.default) { + gaps.push('model configuration'); + } + + if (config.scan.enrichment.mode !== 'llm') { + gaps.push('scan enrichment mode'); + } + + const embeddings = config.scan.enrichment.embeddings; + if ( + !embeddings || + embeddings.backend === 'none' || + embeddings.backend === 'deterministic' || + !embeddings.model || + embeddings.dimensions <= 0 + ) { + gaps.push('scan embeddings'); + } + + return gaps; +} + +export function recommendedDatabaseContextDepth(config: KtxProjectConfig): KtxDatabaseContextDepth { + return deepReadinessGaps(config).length === 0 ? 'deep' : 'fast'; +} +``` + +- [ ] **Step 4: Apply preflight and relationship policy in public ingest** + +In `packages/cli/src/public-ingest.ts`, replace the local depth and warehouse +driver definitions with imports: + +```ts +import { + type KtxDatabaseContextDepth, + databaseContextDepth, + deepReadinessGaps, + isDatabaseDriver, + normalizeConnectionDriver, +} from './ingest-depth.js'; +``` + +Change `type KtxPublicIngestDepth = 'fast' | 'deep';` to: + +```ts +type KtxPublicIngestDepth = KtxDatabaseContextDepth; +``` + +Remove the local `warehouseDrivers`, `normalizedDriver`, +`connectionContext`, and `storedDepth` helpers. + +Add these fields to `KtxPublicIngestPlanTarget`: + +```ts + detectRelationships?: boolean; + preflightFailure?: string; +``` + +In `resolveDatabaseTargetOptions`, replace: + +```ts + let depth = input.args.depth ?? depthFromLegacyScanMode(input.args.scanMode) ?? storedDepth(input.connection) ?? 'fast'; +``` + +with: + +```ts + let depth = + input.args.depth ?? depthFromLegacyScanMode(input.args.scanMode) ?? databaseContextDepth(input.connection) ?? 'fast'; +``` + +Change `targetForConnection` to accept the project config: + +```ts +function targetForConnection( + connectionId: string, + connection: KtxProjectConnectionConfig, + projectConfig: KtxPublicIngestProject['config'], + args: { + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + scanMode?: Extract['mode']; + }, + warnings: string[], +): KtxPublicIngestPlanTarget { +``` + +Use shared driver detection: + +```ts + const driver = normalizeConnectionDriver(connection); +``` + +Replace the warehouse branch with: + +```ts + if (isDatabaseDriver(driver)) { + const options = resolveDatabaseTargetOptions({ connectionId, driver, connection, args, warnings }); + const gaps = options.databaseDepth === 'deep' ? deepReadinessGaps(projectConfig) : []; + return { + connectionId, + driver, + operation: 'database-ingest', + debugCommand: `ktx ingest ${connectionId} --debug`, + detectRelationships: options.databaseDepth === 'deep' && projectConfig.scan.relationships.enabled, + ...(gaps.length > 0 + ? { + preflightFailure: `${connectionId} requires deep ingest readiness: ${gaps.join( + ', ', + )}. Run ktx setup or rerun with --fast.`, + } + : {}), + ...options, + }; + } +``` + +In `buildPublicIngestPlan`, pass `project.config`: + +```ts + const targets = selected.map(([connectionId, connection]) => + targetForConnection(connectionId, connection, project.config, args, warnings), + ); +``` + +At the start of `executePublicIngestTarget`, add: + +```ts + if (target.preflightFailure) { + return { + connectionId: target.connectionId, + driver: target.driver, + steps: defaultSteps(target).map((step) => + step.operation === 'database-schema' + ? { + ...step, + status: 'failed', + detail: target.preflightFailure, + } + : step, + ), + }; + } +``` + +Change database scan args from: + +```ts + detectRelationships: target.databaseDepth === 'deep' ? true : false, +``` + +to: + +```ts + detectRelationships: target.detectRelationships === true, +``` + +- [ ] **Step 5: Run public ingest tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts +``` + +Expected: PASS. + +- [ ] **Step 6: Commit depth preflight** + +Run: + +```bash +git add packages/cli/src/ingest-depth.ts packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "feat(cli): preflight deep public ingest readiness" +``` + +### Task 2: Store query history under `context.queryHistory` + +**Files:** +- Modify: `packages/cli/src/setup-databases.ts` +- Test: `packages/cli/src/setup-databases.test.ts` + +- [ ] **Step 1: Write failing setup query-history shape tests** + +In `packages/cli/src/setup-databases.test.ts`, update the existing tests that +expect `config.connections..historicSql` so they expect +`config.connections..context.queryHistory` instead. + +Add this test near the existing Historic SQL setup tests: + +```ts + it('migrates legacy historicSql to context.queryHistory during database setup', async () => { + await writeProjectConfig(tempDir, { + connections: { + warehouse: { + driver: 'postgres', + readonly: true, + historicSql: { + enabled: true, + dialect: 'postgres', + windowDays: 45, + minExecutions: 9, + concurrency: 3, + staleArchiveAfterDays: 120, + filters: { + dropTrivialProbes: true, + serviceAccounts: { mode: 'exclude', patterns: ['^svc_'] }, + orchestrators: { mode: 'exclude', patterns: ['airflow'] }, + dropFailedBelow: 2, + }, + redactionPatterns: ['(?i)secret'], + }, + }, + }, + }); + + const io = makeIo(); + + await expect( + runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseConnectionIds: ['warehouse'], + skipConnectionTest: true, + skipInitialScan: true, + }, + io.io, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse.historicSql).toBeUndefined(); + expect(config.connections.warehouse.context).toMatchObject({ + queryHistory: { + enabled: true, + windowDays: 45, + minExecutions: 9, + concurrency: 3, + staleArchiveAfterDays: 120, + filters: { + dropTrivialProbes: true, + serviceAccounts: { mode: 'exclude', patterns: ['^svc_'] }, + orchestrators: { mode: 'exclude', patterns: ['airflow'] }, + dropFailedBelow: 2, + }, + redactionPatterns: ['(?i)secret'], + }, + }); + }); +``` + +- [ ] **Step 2: Run failing setup database tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts -t "queryHistory|historicSql|migrates legacy" +``` + +Expected: FAIL because setup still writes and reads `historicSql`. + +- [ ] **Step 3: Add query-history config helpers** + +In `packages/cli/src/setup-databases.ts`, add these helpers after +`historicSqlConfigRecord`: + +```ts +function contextRecord(connection: KtxProjectConnectionConfig | undefined): Record { + const context = connection?.context; + return context && typeof context === 'object' && !Array.isArray(context) ? (context as Record) : {}; +} + +function queryHistoryConfigRecord(connection: KtxProjectConnectionConfig | undefined): Record | null { + const queryHistory = contextRecord(connection).queryHistory; + return queryHistory && typeof queryHistory === 'object' && !Array.isArray(queryHistory) + ? (queryHistory as Record) + : null; +} + +function stripLegacyHistoricSql(connection: KtxProjectConnectionConfig): KtxProjectConnectionConfig { + const { historicSql: _historicSql, ...rest } = connection as KtxProjectConnectionConfig & { + historicSql?: unknown; + }; + return rest; +} + +function withQueryHistoryConfig( + connection: KtxProjectConnectionConfig, + queryHistory: Record, +): KtxProjectConnectionConfig { + return { + ...stripLegacyHistoricSql(connection), + context: { + ...contextRecord(connection), + queryHistory, + }, + }; +} + +function migrateLegacyHistoricSqlConnection(connection: KtxProjectConnectionConfig): KtxProjectConnectionConfig { + const existingQueryHistory = queryHistoryConfigRecord(connection); + const legacy = historicSqlConfigRecord(connection); + if (existingQueryHistory || !legacy) { + return existingQueryHistory ? stripLegacyHistoricSql(connection) : connection; + } + const { dialect: _dialect, ...queryHistory } = legacy; + return withQueryHistoryConfig(connection, queryHistory); +} +``` + +- [ ] **Step 4: Write canonical query-history config from setup** + +In `applyHistoricSqlConfigToConnection`, replace each returned `historicSql` +object with a call to `withQueryHistoryConfig(input.connection, queryHistory)`. + +For disabled query history, return: + +```ts + return withQueryHistoryConfig(input.connection, { ...existing, enabled: false }); +``` + +For Postgres enabled query history, return: + +```ts + return withQueryHistoryConfig(input.connection, { + ...common, + minExecutions: input.args.historicSqlMinExecutions ?? 5, + }); +``` + +For BigQuery and Snowflake enabled query history, return: + +```ts + return withQueryHistoryConfig(input.connection, { + ...common, + windowDays: input.args.historicSqlWindowDays ?? 90, + redactionPatterns: input.args.historicSqlRedactionPatterns ?? [], + }); +``` + +Change `common` so it does not include `dialect`: + +```ts + const common: Record = { + ...existing, + enabled: true, + filters: historicSqlFiltersForSetup(input.args.historicSqlServiceAccountPatterns), + }; +``` + +Where `existing` is built, prefer canonical config: + +```ts + const existing = queryHistoryConfigRecord(input.connection) ?? historicSqlConfigRecord(input.connection) ?? {}; +``` + +- [ ] **Step 5: Migrate legacy blocks during setup writes** + +In `writeConnectionConfig`, normalize all project connections before writing: + +```ts + const migratedConnections = Object.fromEntries( + Object.entries(project.config.connections).map(([connectionId, connection]) => [ + connectionId, + migrateLegacyHistoricSqlConnection(connection), + ]), + ); + const nextConnection = migrateLegacyHistoricSqlConnection(input.connection); + const config = { + ...project.config, + connections: { + ...migratedConnections, + [input.connectionId]: nextConnection, + }, + }; +``` + +Change the post-write Historic SQL defaults check to read canonical config: + +```ts + const queryHistory = queryHistoryConfigRecord(nextConnection); + if (queryHistory?.enabled === true) { + await ensureHistoricSqlIngestDefaults(input.projectDir); + } +``` + +Update `historicSqlConfigRecord` callers used for probe decisions to prefer +`queryHistoryConfigRecord(connection)` and only fall back to legacy +`historicSqlConfigRecord(connection)`. + +- [ ] **Step 6: Run setup database tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts +``` + +Expected: PASS after updating assertions from `historicSql` to +`context.queryHistory`. + +- [ ] **Step 7: Commit setup query-history config** + +Run: + +```bash +git add packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts +git commit -m "feat(setup): store query history in connection context" +``` + +### Task 3: Store setup database context depth + +**Files:** +- Modify: `packages/cli/src/setup-context.ts` +- Test: `packages/cli/src/setup-context.test.ts` + +- [ ] **Step 1: Write failing setup depth tests** + +In `packages/cli/src/setup-context.test.ts`, replace the test named +`does not treat schema-only scan shards as completed setup context` with: + +```ts + it('treats fast database context as ready from schema manifest shards without AI artifacts', async () => { + await writeReadyProject(tempDir, { + connections: { + warehouse: { driver: 'postgres', readonly: true, context: { depth: 'fast' } }, + }, + llm: { provider: { backend: 'none' }, models: {} }, + scan: { enrichment: { mode: 'none' } }, + }); + await mkdir(join(tempDir, 'semantic-layer', 'warehouse', '_schema'), { recursive: true }); + await writeFile(join(tempDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml'), 'tables: {}\n'); + await writeScanReport(tempDir, '2026-05-09T10:00:00.000Z', { + mode: 'structural', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + }); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { runContextBuild: runContextBuildMock }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(runContextBuildMock).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('Existing context artifacts were found from setup ingest.'); + }); +``` + +Add these tests near the existing setup context build tests: + +```ts + it('stores fast context depth non-interactively when deep readiness is missing', async () => { + await writeReadyProject(tempDir, { + connections: { warehouse: { driver: 'postgres', readonly: true } }, + llm: { provider: { backend: 'none' }, models: {} }, + scan: { enrichment: { mode: 'none' } }, + }); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); + const verifyContextReady = vi.fn(async () => ({ + ready: true, + agentContextReady: true, + semanticSearchReady: true, + details: ['ready'], + })); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { runContextBuild: runContextBuildMock, verifyContextReady }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse.context).toMatchObject({ depth: 'fast' }); + expect(runContextBuildMock).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ projectDir: tempDir, inputMode: 'disabled' }), + expect.anything(), + expect.anything(), + ); + expect(runContextBuildMock.mock.calls[0]?.[1]).not.toMatchObject({ + scanMode: 'enriched', + detectRelationships: true, + }); + }); + + it('prompts for database context depth after final readiness is known', async () => { + await writeReadyProject(tempDir, { + connections: { warehouse: { driver: 'postgres', readonly: true } }, + llm: { + provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret + models: { default: 'gpt-test' }, + }, + scan: { + enrichment: { + mode: 'llm', + embeddings: { backend: 'openai', model: 'text-embedding-3-small', dimensions: 1536 }, + }, + }, + }); + const io = makeIo(); + const select = vi.fn(async () => 'deep'); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); + const verifyContextReady = vi.fn(async () => ({ + ready: true, + agentContextReady: true, + semanticSearchReady: true, + details: ['ready'], + })); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto' }, + io.io, + { + prompts: { select, cancel: vi.fn() }, + runContextBuild: runContextBuildMock, + verifyContextReady, + }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('How much database context should KTX build?'), + }), + ); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse.context).toMatchObject({ depth: 'deep' }); + }); +``` + +- [ ] **Step 2: Run failing setup depth tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-context.test.ts -t "fast database context|stores fast context depth|prompts for database context depth" +``` + +Expected: FAIL because setup has no depth prompt/storage and still gates all +context builds on AI readiness. + +- [ ] **Step 3: Add setup depth helpers** + +In `packages/cli/src/setup-context.ts`, add imports: + +```ts +import { + type KtxDatabaseContextDepth, + databaseContextDepth, + deepReadinessGaps, + isDatabaseDriver, + normalizeConnectionDriver, + recommendedDatabaseContextDepth, + withDatabaseContextDepth, +} from './ingest-depth.js'; +``` + +Add these helpers after `listContextTargets`: + +```ts +function databaseConnectionsNeedingDepth(project: KtxLocalProject): string[] { + return Object.entries(project.config.connections) + .filter(([, connection]) => isDatabaseDriver(normalizeConnectionDriver(connection))) + .filter(([, connection]) => databaseContextDepth(connection) === undefined) + .map(([connectionId]) => connectionId) + .sort((left, right) => left.localeCompare(right)); +} + +async function writeDatabaseContextDepths( + project: KtxLocalProject, + connectionIds: string[], + depth: KtxDatabaseContextDepth, +): Promise { + if (connectionIds.length === 0) { + return project; + } + const nextConnections = { ...project.config.connections }; + for (const connectionId of connectionIds) { + const connection = nextConnections[connectionId]; + if (connection) { + nextConnections[connectionId] = withDatabaseContextDepth(connection, depth); + } + } + const nextConfig = { ...project.config, connections: nextConnections }; + await writeFile(project.configPath, serializeKtxProjectConfig(nextConfig), 'utf-8'); + return await loadKtxProject({ projectDir: project.projectDir }); +} + +async function ensureSetupDatabaseContextDepths(input: { + project: KtxLocalProject; + args: KtxSetupContextStepArgs; + prompts: KtxSetupContextPromptAdapter; +}): Promise { + const missingDepthConnectionIds = databaseConnectionsNeedingDepth(input.project); + if (missingDepthConnectionIds.length === 0) { + return input.project; + } + + const recommended = recommendedDatabaseContextDepth(input.project.config); + if (input.args.inputMode === 'disabled') { + return await writeDatabaseContextDepths(input.project, missingDepthConnectionIds, recommended); + } + + const deepReady = deepReadinessGaps(input.project.config).length === 0; + const options = + recommended === 'deep' + ? [ + { value: 'deep', label: 'Deep: AI descriptions, embeddings, relationships, slower' }, + { value: 'fast', label: 'Fast: schema only, no AI, quickest' }, + { value: 'back', label: 'Back' }, + ] + : [ + { value: 'fast', label: 'Fast: schema only, no AI, quickest' }, + { value: 'deep', label: 'Deep: AI descriptions, embeddings, relationships, slower' }, + { value: 'back', label: 'Back' }, + ]; + + const choice = await input.prompts.select({ + message: + 'How much database context should KTX build?\n\n' + + (deepReady + ? 'Deep is available because model, embedding, and scan enrichment are configured.' + : 'Fast is recommended because model, embedding, or scan enrichment is not configured.'), + options, + }); + if (choice === 'back') { + return 'back'; + } + return await writeDatabaseContextDepths(input.project, missingDepthConnectionIds, choice as KtxDatabaseContextDepth); +} +``` + +- [ ] **Step 4: Use stored depth in setup context builds** + +In `runKtxSetupContextStep`, after loading `project` and before reading the +existing setup context state, change `const project` to `let project`, then +add: + +```ts + const depthProject = await ensureSetupDatabaseContextDepths({ + project, + args, + prompts: deps.prompts ?? createPromptAdapter(), + }); + if (depthProject === 'back') { + return { status: 'back', projectDir: args.projectDir }; + } + project = depthProject; +``` + +Remove the unconditional missing-capability gate: + +```ts + const missing = missingCapabilities(project); + if (missing.length > 0) { + if (args.allowEmpty === true) { + return { status: 'skipped', projectDir: args.projectDir }; + } + writeMissingCapabilities(missing, io); + return { status: 'missing-input', projectDir: args.projectDir }; + } +``` + +Replace it with a deep-only target preflight gate: + +```ts + const preflightPlan = buildPublicIngestPlan(project, { projectDir: project.projectDir, all: true }); + const preflightFailures = preflightPlan.targets.flatMap((target) => + target.preflightFailure ? [`${target.connectionId}: ${target.preflightFailure}`] : [], + ); + if (preflightFailures.length > 0) { + if (args.allowEmpty === true) { + return { status: 'skipped', projectDir: args.projectDir }; + } + writeMissingCapabilities(preflightFailures, io); + return { status: 'missing-input', projectDir: args.projectDir }; + } +``` + +In `runBuild`, change the `runContextBuild` call from: + +```ts + { + projectDir: args.projectDir, + inputMode: args.inputMode, + scanMode: 'enriched', + detectRelationships: true, + }, +``` + +to: + +```ts + { + projectDir: args.projectDir, + inputMode: args.inputMode, + }, +``` + +- [ ] **Step 5: Run setup context depth tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-context.test.ts -t "fast database context|stores fast context depth|prompts for database context depth" +``` + +Expected: PASS after updating helper fixtures to accept the override shape in +the new tests. + +- [ ] **Step 6: Commit setup context depth** + +Run: + +```bash +git add packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts +git commit -m "feat(setup): store database context depth" +``` + +### Task 4: Make setup readiness depth-aware + +**Files:** +- Modify: `packages/cli/src/setup-context.ts` +- Test: `packages/cli/src/setup-context.test.ts` + +- [ ] **Step 1: Write failing depth-aware readiness tests** + +In `packages/cli/src/setup-context.test.ts`, add: + +```ts + it('requires completed relationships for deep context when relationship discovery is enabled', async () => { + await writeReadyProject(tempDir, { + connections: { + warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } }, + }, + scan: { relationships: { enabled: true } }, + }); + await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true }); + await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n'); + await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:00:00.000Z', { + completedStages: ['descriptions', 'embeddings'], + relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 }, + }); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => { + await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:01:00.000Z', { + completedStages: ['descriptions', 'embeddings', 'relationships'], + relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 }, + }); + return { exitCode: 0 }; + }); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { runContextBuild: runContextBuildMock }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(runContextBuildMock).toHaveBeenCalledOnce(); + }); + + it('does not require relationships for deep context when relationship discovery is disabled', async () => { + await writeReadyProject(tempDir, { + connections: { + warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } }, + }, + scan: { relationships: { enabled: false } }, + }); + await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true }); + await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n'); + await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:00:00.000Z', { + completedStages: ['descriptions', 'embeddings'], + }); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { runContextBuild: runContextBuildMock }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(runContextBuildMock).not.toHaveBeenCalled(); + }); +``` + +- [ ] **Step 2: Run failing depth-aware readiness tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-context.test.ts -t "requires completed relationships|does not require relationships" +``` + +Expected: FAIL because readiness only checks enriched descriptions and +embeddings. + +- [ ] **Step 3: Replace scan readiness helpers** + +In `packages/cli/src/setup-context.ts`, replace +`scanReportHasCompletedDescriptionEnrichment` with: + +```ts +function scanReportHasSchemaManifest(report: unknown, connectionId: string): boolean { + if (!isRecord(report)) { + return false; + } + if (report.connectionId !== connectionId || report.dryRun === true) { + return false; + } + return stringArrayValue(isRecord(report.artifactPaths) ? report.artifactPaths.manifestShards : undefined).length > 0; +} + +function scanReportHasCompletedDeepEnrichment( + report: unknown, + connectionId: string, + relationshipsRequired: boolean, +): boolean { + if (!isRecord(report)) { + return false; + } + if (report.connectionId !== connectionId || report.mode !== 'enriched' || report.dryRun === true) { + return false; + } + if (!isRecord(report.enrichment) || !isRecord(report.enrichmentState) || !isRecord(report.artifactPaths)) { + return false; + } + const completedStages = stringArrayValue(report.enrichmentState.completedStages); + return ( + report.enrichment.tableDescriptions === 'completed' && + report.enrichment.columnDescriptions === 'completed' && + report.enrichment.embeddings === 'completed' && + completedStages.includes('descriptions') && + completedStages.includes('embeddings') && + (!relationshipsRequired || completedStages.includes('relationships')) && + stringArrayValue(report.artifactPaths.manifestShards).length > 0 + ); +} + +function scanReportSatisfiesDepth(input: { + report: unknown; + connectionId: string; + depth: KtxDatabaseContextDepth; + relationshipsRequired: boolean; +}): boolean { + if (input.depth === 'fast') { + return scanReportHasSchemaManifest(input.report, input.connectionId); + } + return scanReportHasCompletedDeepEnrichment(input.report, input.connectionId, input.relationshipsRequired); +} +``` + +Replace `verifyPrimarySourceScans` with: + +```ts +async function verifyPrimarySourceScans( + project: KtxLocalProject, + connectionIds: string[], +): Promise<{ ready: boolean; details: string[] }> { + const details: string[] = []; + const relationshipsRequired = project.config.scan.relationships.enabled; + for (const connectionId of connectionIds) { + const connection = project.config.connections[connectionId]; + const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast'; + const report = await readLatestScanReport(project.projectDir, connectionId); + if (!scanReportSatisfiesDepth({ report, connectionId, depth, relationshipsRequired })) { + details.push( + depth === 'fast' + ? `${connectionId}: schema context has not completed.` + : `${connectionId}: deep database context has not completed.`, + ); + } + } + return { ready: details.length === 0, details }; +} +``` + +In `defaultVerifyContextReady`, change: + +```ts + const primarySourceScans = await verifyPrimarySourceScans(projectDir, targets.primarySourceConnectionIds); +``` + +to: + +```ts + const primarySourceScans = await verifyPrimarySourceScans(project, targets.primarySourceConnectionIds); +``` + +- [ ] **Step 4: Update success wording away from scan** + +In `writeSuccess`, replace: + +```ts + io.stdout.write(` ${connectionId}: enriched scan complete\n`); +``` + +with: + +```ts + const connection = project.config.connections[connectionId]; + const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast'; + io.stdout.write(` ${connectionId}: ${depth === 'deep' ? 'deep context complete' : 'schema context complete'}\n`); +``` + +Change the function signature to accept `project`: + +```ts +function writeSuccess( + project: KtxLocalProject, + readiness: KtxSetupContextReadiness, + targets: KtxSetupContextTargets, + io: KtxCliIo, +): void { +``` + +Change the caller from: + +```ts + writeSuccess(readiness, targets, io); +``` + +to: + +```ts + writeSuccess(project, readiness, targets, io); +``` + +- [ ] **Step 5: Run setup context readiness tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-context.test.ts +``` + +Expected: PASS after updating old test names and assertions that referred to +`enriched scan complete`. + +- [ ] **Step 6: Commit depth-aware readiness** + +Run: + +```bash +git add packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts +git commit -m "feat(setup): verify context readiness by database depth" +``` + +### Task 5: Remove background context-build control + +**Files:** +- Modify: `packages/cli/src/context-build-view.ts` +- Modify: `packages/cli/src/setup-context.ts` +- Test: `packages/cli/src/context-build-view.test.ts` +- Test: `packages/cli/src/setup-context.test.ts` +- Test: `packages/cli/src/setup.test.ts` + +- [ ] **Step 1: Write failing foreground-only tests** + +In `packages/cli/src/context-build-view.test.ts`, add: + +```ts + it('renders foreground-only progress hints without detach or resume commands', () => { + const state = initViewState([ + { + connectionId: 'warehouse', + driver: 'postgres', + operation: 'database-ingest', + debugCommand: 'ktx ingest warehouse --debug', + steps: ['database-schema'], + }, + ]); + state.primarySources[0]!.status = 'running'; + + const rendered = renderContextBuildView(state, { styled: false, showHint: true, projectDir: '/tmp/project' }); + + expect(rendered).toContain('Ctrl+C to stop'); + expect(rendered).not.toContain('d to detach'); + expect(rendered).not.toContain('resume'); + }); +``` + +In `packages/cli/src/setup-context.test.ts`, replace tests that expect +detached/watch behavior with: + +```ts + it('normalizes legacy detached and paused setup context states to stale', async () => { + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-old', + status: 'detached' as never, + startedAt: '2026-05-09T09:00:00.000Z', + updatedAt: '2026-05-09T09:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-old'), + }); + + await expect(readKtxSetupContextState(tempDir)).resolves.toMatchObject({ + status: 'stale', + failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.', + }); + }); + + it('starts a fresh foreground build when a stale running state is found', async () => { + await writeReadyProject(tempDir, { + connections: { warehouse: { driver: 'postgres', readonly: true, context: { depth: 'fast' } } }, + }); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-running', + status: 'running', + startedAt: '2026-05-09T09:00:00.000Z', + updatedAt: '2026-05-09T09:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-running'), + }); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); + const verifyContextReady = vi.fn(async () => ({ + ready: true, + agentContextReady: true, + semanticSearchReady: true, + details: ['ready'], + })); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { runContextBuild: runContextBuildMock, verifyContextReady }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(runContextBuildMock).toHaveBeenCalledOnce(); + }); +``` + +- [ ] **Step 2: Run failing foreground-only tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts src/setup-context.test.ts src/setup.test.ts -t "foreground-only|legacy detached|stale running|detached|watch" +``` + +Expected: FAIL because detach, watch, paused, and background logic still +exist. + +- [ ] **Step 3: Remove detach and background spawning from the progress view** + +In `packages/cli/src/context-build-view.ts`, remove these imports: + +```ts +import { spawn } from 'node:child_process'; +import { mkdirSync, openSync } from 'node:fs'; +``` + +Delete these functions: + +```ts +function resolveKtxEntryScript(): string | null +function spawnBackgroundBuild(projectDir: string): { logPath: string } | null +export function defaultSetupKeystroke( + onDetach: () => void, + onCtrlC: () => void, +): (() => void) | null +``` + +Change the default hint in `renderContextBuildView`: + +```ts + const hintContent = options.hintText ?? 'Ctrl+C to stop'; +``` + +Remove these fields from `ContextBuildDeps`: + +```ts + setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null; + onDetach?: () => void; +``` + +Change `ContextBuildResult` to: + +```ts +export interface ContextBuildResult { + exitCode: number; + reportIds?: string[]; + artifactPaths?: string[]; +} +``` + +In `runContextBuild`, delete the `detached`, `exiting`, `cleanupKeystroke`, +and `setupKeystroke` block. Keep the `try/finally` cleanup for +`spinnerInterval`. + +Delete this branch: + +```ts + if (detached) { + return { exitCode: 0, detached: true }; + } +``` + +Return: + +```ts + return { + exitCode: hasFailure ? 1 : 0, + ...(reportIds.size > 0 ? { reportIds: [...reportIds] } : {}), + ...(artifactPaths.size > 0 ? { artifactPaths: [...artifactPaths] } : {}), + }; +``` + +- [ ] **Step 4: Normalize setup context state to foreground-only statuses** + +In `packages/cli/src/setup-context.ts`, remove `detached` and `paused` from +`KtxSetupContextBuildStatus` and `KtxSetupContextResult`. + +Change `KtxSetupContextCommands` to: + +```ts +export interface KtxSetupContextCommands { + build: string; + status: string; +} +``` + +Change `contextBuildCommands` to return: + +```ts + return { + build: `ktx setup --project-dir ${resolvedProjectDir}`, + status: `ktx status --project-dir ${resolvedProjectDir}`, + }; +``` + +In `normalizeState`, normalize legacy states: + +```ts + const rawStatus = record.status ?? 'not_started'; + const legacyActive = rawStatus === 'detached' || rawStatus === 'paused' || rawStatus === 'running'; + const status: KtxSetupContextBuildStatus = legacyActive ? 'stale' : rawStatus; +``` + +Add a default failure reason for legacy active states: + +```ts + ...(typeof record.failureReason === 'string' + ? { failureReason: record.failureReason } + : legacyActive + ? { failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.' } + : {}), +``` + +In `setupContextStatusFromState`, remove `watchCommand`: + +```ts + ...(state.runId ? { statusCommand: state.commands.status } : {}), +``` + +In `runBuild`, remove `onDetach` handling and remove the +`buildResult.detached` branch. + +Delete `isActiveStatus`, `watchExitCode`, `defaultSleep`, `writeContextStatus`, +`watchContextStatus`, `watchContextStatusText`, +`watchContextStatusWithProgressView`, and `setupResultFromWatchedState`. + +In `runKtxSetupContextStep`, remove the branch that prompts: + +```ts + 'A context build is running in the background.\n\n' + + 'You can watch it until it finishes, check its status once, or start a fresh build.' +``` + +Replace it with: + +```ts + if (existingState.status === 'stale') { + io.stdout.write('Previous context build state is stale; starting a fresh foreground build.\n'); + } +``` + +- [ ] **Step 5: Update setup tests that referenced detached/watch** + +In `packages/cli/src/setup.test.ts`, replace expectations for returned +`status: 'detached'` from the context step with `status: 'failed'` only when +the mocked context step returns failed. Remove tests named: + +- `does not install agents when full setup context build is detached` +- `skips entry menu and auto-watches when context build is active and showEntryMenu is true` + +Replace them with one test: + +```ts + it('does not offer background watch choices from setup status', async () => { + const tempDir = await makeTempProject(); + await writeKtxSetupContextState(tempDir, { + runId: 'setup-context-local-stale', + status: 'running', + startedAt: '2026-05-09T09:00:00.000Z', + updatedAt: '2026-05-09T09:00:00.000Z', + primarySourceConnectionIds: ['warehouse'], + contextSourceConnectionIds: [], + reportIds: [], + artifactPaths: [], + retryableFailedTargets: [], + commands: contextBuildCommands(tempDir, 'setup-context-local-stale'), + }); + + const result = await runKtxSetupStatus({ projectDir: tempDir }, makeIo().io); + + expect(result).toBe(0); + const state = await readKtxSetupContextState(tempDir); + expect(state.status).toBe('stale'); + }); +``` + +- [ ] **Step 6: Run foreground-only tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts src/setup-context.test.ts src/setup.test.ts +``` + +Expected: PASS after removing stale detached/watch assertions. + +- [ ] **Step 7: Commit foreground-only cleanup** + +Run: + +```bash +git add packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/setup.test.ts +git commit -m "fix(setup): keep context build foreground only" +``` + +### Task 6: Reject ingest subcommand connection ids + +**Files:** +- Modify: `packages/context/src/project/config.ts` +- Modify: `packages/context/src/project/index.ts` +- Modify: `packages/context/src/project/config.test.ts` +- Modify: `packages/cli/src/setup-sources.ts` +- Modify: `packages/cli/src/setup-sources.test.ts` +- Modify: `packages/cli/src/commands/setup-commands.ts` +- Modify: `packages/cli/src/index.test.ts` +- Modify: `packages/cli/src/setup-databases.ts` +- Modify: `packages/cli/src/setup-databases.test.ts` + +- [ ] **Step 1: Write failing reserved-id tests** + +In `packages/context/src/project/config.test.ts`, add: + +```ts + it.each(['status', 'replay', 'run', 'watch'])( + 'rejects reserved ingest connection id "%s"', + (connectionId) => { + expect(() => + parseKtxProjectConfig(` +project: reserved-test +connections: + ${connectionId}: + driver: postgres +`), + ).toThrow(`"${connectionId}" is reserved for ktx ingest ${connectionId}`); + }, + ); +``` + +In `packages/cli/src/index.test.ts`, add a Commander setup flag test: + +```ts + it('rejects reserved setup database connection ids before dispatch', async () => { + const testIo = makeIo(); + const setup = vi.fn(async () => 0); + + await expect( + runKtxCli(['setup', '--new-database-connection-id', 'status', '--no-input'], testIo.io, { setup }), + ).resolves.toBe(1); + + expect(setup).not.toHaveBeenCalled(); + expect(testIo.stderr()).toContain('"status" is reserved for ktx ingest status; choose a different connection id.'); + }); +``` + +In `packages/cli/src/setup-sources.test.ts`, add a prompt test that enters +`status` for a Notion connection id and expects the step to fail with the same +message. + +In `packages/cli/src/setup-databases.test.ts`, add a non-interactive test that +passes `databaseConnectionId: 'replay'` and expects `status: 'failed'` with the +same reserved-id message. + +- [ ] **Step 2: Run failing reserved-id tests** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/project/config.test.ts -t "reserved ingest connection" +pnpm --filter @ktx/cli exec vitest run src/index.test.ts src/setup-sources.test.ts src/setup-databases.test.ts -t "reserved" +``` + +Expected: FAIL because only the unsafe-character regex exists. + +- [ ] **Step 3: Add reserved-id validation to project config** + +In `packages/context/src/project/config.ts`, add after `isRecord`: + +```ts +const RESERVED_INGEST_CONNECTION_IDS = new Map([ + ['status', 'ktx ingest status'], + ['replay', 'ktx ingest replay'], + ['run', 'ktx ingest run'], + ['watch', 'ktx ingest watch'], +]); + +export function reservedKtxIngestConnectionIdMessage(connectionId: string): string | null { + const command = RESERVED_INGEST_CONNECTION_IDS.get(connectionId); + return command ? `"${connectionId}" is reserved for ${command}; choose a different connection id.` : null; +} + +export function assertKtxConnectionIdIsNotReserved(connectionId: string): void { + const message = reservedKtxIngestConnectionIdMessage(connectionId); + if (message) { + throw new Error(message); + } +} +``` + +In `parseKtxProjectConfig`, before returning the parsed object, validate +connection ids: + +```ts + const parsedConnections = isRecord(parsed.connections) + ? (parsed.connections as Record) + : defaults.connections; + for (const connectionId of Object.keys(parsedConnections)) { + assertKtxConnectionIdIsNotReserved(connectionId); + } +``` + +Then change the returned `connections` field to: + +```ts + connections: parsedConnections, +``` + +In `packages/context/src/project/index.ts`, export the helpers: + +```ts +export { + assertKtxConnectionIdIsNotReserved, + buildDefaultKtxProjectConfig, + parseKtxProjectConfig, + reservedKtxIngestConnectionIdMessage, + serializeKtxProjectConfig, +} from './config.js'; +``` + +- [ ] **Step 4: Use reserved-id validation in setup** + +In `packages/cli/src/setup-sources.ts`, import: + +```ts +import { assertKtxConnectionIdIsNotReserved } from '@ktx/context/project'; +``` + +Change `assertSafeConnectionId`: + +```ts +function assertSafeConnectionId(connectionId: string): void { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { + throw new Error(`Unsafe connection id: ${connectionId}`); + } + assertKtxConnectionIdIsNotReserved(connectionId); +} +``` + +In `packages/cli/src/setup-databases.ts`, import +`assertKtxConnectionIdIsNotReserved` and add: + +```ts +function assertSafeDatabaseConnectionId(connectionId: string): void { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { + throw new Error(`Unsafe connection id: ${connectionId}`); + } + assertKtxConnectionIdIsNotReserved(connectionId); +} +``` + +In `chooseConnectionIdForDriver`, validate every new id before returning: + +```ts + assertSafeDatabaseConnectionId(input.args.databaseConnectionId); + return { kind: 'new', connectionId: input.args.databaseConnectionId }; +``` + +and: + +```ts + assertSafeDatabaseConnectionId(connectionId); + return connectionId ? { kind: 'new', connectionId } : 'missing-input'; +``` + +In `packages/cli/src/commands/setup-commands.ts`, update +`--new-database-connection-id` parsing: + +```ts + .option('--new-database-connection-id ', 'Connection id for one new database connection', (value) => { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(value)) { + throw new InvalidArgumentError(`Unsafe connection id: ${value}`); + } + const reservedMessage = reservedKtxIngestConnectionIdMessage(value); + if (reservedMessage) { + throw new InvalidArgumentError(reservedMessage); + } + return value; + }) +``` + +Add the import: + +```ts +import { reservedKtxIngestConnectionIdMessage } from '@ktx/context/project'; +``` + +- [ ] **Step 5: Run reserved-id tests again** + +Run: + +```bash +pnpm --filter @ktx/context exec vitest run src/project/config.test.ts -t "reserved ingest connection" +pnpm --filter @ktx/cli exec vitest run src/index.test.ts src/setup-sources.test.ts src/setup-databases.test.ts -t "reserved" +``` + +Expected: PASS. + +- [ ] **Step 6: Commit reserved-id validation** + +Run: + +```bash +git add packages/context/src/project/config.ts packages/context/src/project/index.ts packages/context/src/project/config.test.ts packages/cli/src/setup-sources.ts packages/cli/src/setup-sources.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts packages/cli/src/commands/setup-commands.ts packages/cli/src/index.test.ts +git commit -m "fix(config): reject reserved ingest connection ids" +``` + +### Task 7: Final verification + +**Files:** +- Verify only. + +- [ ] **Step 1: Run focused TypeScript tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/setup-context.test.ts src/context-build-view.test.ts src/setup-databases.test.ts src/setup-sources.test.ts src/setup.test.ts src/index.test.ts +pnpm --filter @ktx/context exec vitest run src/project/config.test.ts src/ingest/local-adapters.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run package type checks** + +Run: + +```bash +pnpm --filter @ktx/cli run type-check +pnpm --filter @ktx/context run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run docs and script tests touched by unified ingest** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs scripts/package-artifacts.test.mjs scripts/installed-live-database-smoke.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 4: Run dead-code check** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS. If the check reports unrelated pre-existing findings, record +the exact findings in the implementation notes and do not silence them with a +broad ignore. + +- [ ] **Step 5: Run pre-commit for changed files** + +Run: + +```bash +uv run pre-commit run --files \ + packages/cli/src/ingest-depth.ts \ + packages/cli/src/public-ingest.ts \ + packages/cli/src/public-ingest.test.ts \ + packages/cli/src/setup-context.ts \ + packages/cli/src/setup-context.test.ts \ + packages/cli/src/context-build-view.ts \ + packages/cli/src/context-build-view.test.ts \ + packages/cli/src/setup-databases.ts \ + packages/cli/src/setup-databases.test.ts \ + packages/cli/src/setup-sources.ts \ + packages/cli/src/setup-sources.test.ts \ + packages/cli/src/commands/setup-commands.ts \ + packages/cli/src/index.test.ts \ + packages/context/src/project/config.ts \ + packages/context/src/project/config.test.ts \ + packages/context/src/project/index.ts +``` + +Expected: PASS. If local `uv` cannot satisfy the pinned project version, state +the version mismatch and run the TypeScript checks above as the closest +available verification. + +- [ ] **Step 6: Commit verification-only fixes** + +If verification required expectation or formatting changes, run: + +```bash +git add packages/cli/src packages/context/src scripts README.md +git commit -m "test: close unified ingest v1 expectations" +``` + +If no files changed during verification, do not create an empty commit. + +## Self-review notes + +Spec coverage in this plan: + +- Covers deep readiness failures before work starts for explicit or stored + `deep` and for query-history depth upgrades. +- Covers `scan.relationships.enabled` in deep database ingest. +- Covers setup depth prompting and storage under + `connections..context.depth`. +- Covers fast readiness without AI descriptions or embeddings. +- Covers deep readiness with relationship-stage gating only when relationship + discovery is enabled. +- Covers generated setup query-history config under + `connections..context.queryHistory`. +- Covers setup migration from legacy `connection.historicSql`. +- Covers foreground-only context build by removing detach, watch, resume, stop, + paused/detached state, and background subprocess behavior. +- Covers reserved ingest subcommand ids in setup and config validation. + +Placeholder scan: no deferred markers, unnamed edge handling, or undefined +types remain in the plan. The plan uses concrete file paths, commands, and +code shapes for each implementation task. diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-v1-docs-site-closure.md b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-docs-site-closure.md new file mode 100644 index 00000000..c1edc24e --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-docs-site-closure.md @@ -0,0 +1,829 @@ +# Unified Ingest V1 Docs Site Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Remove the remaining public documentation surfaces that still present +`ktx scan`, adapter-backed `ktx ingest run`, `ktx ingest watch`, +`live-database`, or `Historic SQL` as normal v1 user workflows. + +**Architecture:** Keep the implemented CLI behavior unchanged. Update the +Fumadocs content, example READMEs, and documentation regression tests so public +guidance uses connection-centric `ktx ingest `, `ktx ingest +--all`, `--fast`, `--deep`, `--query-history`, `ktx ingest status`, and +`ktx ingest replay`. + +**Tech Stack:** Markdown, MDX frontmatter, Fumadocs page metadata, Node test +runner, pnpm workspace scripts. + +--- + +## Current audit + +The four implemented unified-ingest plans cover the CLI and setup v1 surface: + +- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, + `--query-history`, `--no-query-history`, and + `--query-history-window-days` route through `public-ingest.ts`. +- Database targets run before source targets, public source ingest bypasses + adapter allow-lists, and public database ingest captures internal scan output. +- `ktx scan`, `ktx ingest run`, and `ktx ingest watch` are hidden from normal + help. +- Setup stores `connections..context.depth`, writes + `connections..context.queryHistory`, rejects reserved ingest ids, and + uses foreground-only context-build state. + +### V1-blocking gaps + +- `docs-site/content/docs/cli-reference/ktx-ingest.mdx` still documents + adapter-level `ktx ingest run`, `--adapter`, `ktx ingest watch`, and + `live-database`. +- `docs-site/content/docs/cli-reference/ktx-scan.mdx` still presents + `ktx scan` as a public command, and + `docs-site/content/docs/cli-reference/meta.json` still publishes it in the + CLI reference. +- `docs-site/content/docs/cli-reference/ktx-dev.mdx` still links to root + `ktx scan` as a normal command. +- `docs-site/content/docs/guides/building-context.mdx` still has an adapter + table that lists `historic-sql` and `live-database`, and it still documents + `ktx ingest watch` as the visual progress path. +- `docs-site/content/docs/integrations/context-sources.mdx` still instructs + users to run + `ktx ingest run --connection-id --adapter `. +- `docs-site/content/docs/concepts/context-as-code.mdx` still recommends + scheduled + `ktx ingest run --connection-id --adapter --no-input`. +- `docs-site/content/docs/getting-started/quickstart.mdx` still says setup + runs structural/enriched scans, exposes Historic SQL flags, and describes + detach/background context-build behavior. +- `docs-site/content/docs/integrations/primary-sources.mdx` still uses the + legacy `historicSql` config shape and Historic SQL wording for supported + query-history drivers. +- `examples/README.md` and `examples/local-warehouse/README.md` still present + `ktx ingest run --adapter fake` as the example command. + +### Non-blocking gaps + +- Hidden debug commands can continue to call `ktx scan`, + `ktx ingest run`, and `ktx ingest watch`. +- Internal source keys, raw artifact paths, tests, scripts, and developer-only + package taxonomy can continue to use `scan`, `live-database`, and + `historic-sql`. +- Contributor docs can continue to mention scan internals when describing + package ownership or connector implementation details. +- The `examples/local-warehouse/ktx.yaml` fake adapter fixture can remain for + CLI smoke tests if the public example docs stop recommending it as a normal + user workflow. + +## File structure + +- Modify `scripts/examples-docs.test.mjs`: add regression assertions for + docs-site and example README unified-ingest wording. +- Modify `docs-site/content/docs/cli-reference/ktx-ingest.mdx`: rewrite the + page around the connection-centric public command. +- Delete `docs-site/content/docs/cli-reference/ktx-scan.mdx`: remove the + public scan reference page. +- Modify `docs-site/content/docs/cli-reference/meta.json`: remove + `ktx-scan` from published CLI reference pages. +- Modify `docs-site/content/docs/cli-reference/ktx-dev.mdx`: remove the + root-scan link and clarify that database context is built by `ktx ingest`. +- Modify `docs-site/content/docs/guides/building-context.mdx`: remove + adapter tables and live watch guidance; describe status/replay only. +- Modify `docs-site/content/docs/integrations/context-sources.mdx`: replace + adapter-backed ingest commands with `ktx ingest `. +- Modify `docs-site/content/docs/concepts/context-as-code.mdx`: replace + scheduled adapter-backed ingest guidance with `ktx ingest --all`. +- Modify `docs-site/content/docs/getting-started/quickstart.mdx`: update setup + language for schema context, depth, query history, and foreground-only + progress. +- Modify `docs-site/content/docs/integrations/primary-sources.mdx`: replace + `historicSql` with `context.queryHistory` and query-history wording. +- Modify `examples/README.md`: stop advertising the fake adapter command as a + public example workflow. +- Modify `examples/local-warehouse/README.md`: mark the fake adapter fixture as + contributor-only and point users to public ingest docs. + +## Tasks + +### Task 1: Add stale public-doc regression tests + +**Files:** +- Modify: `scripts/examples-docs.test.mjs` + +- [ ] **Step 1: Add failing docs-site unified-ingest assertions** + +In `scripts/examples-docs.test.mjs`, replace the existing test named +`documents public context build workflows in the docs site` with: + +```js + it('documents unified public ingest workflows in the docs site', async () => { + const rootReadme = await readText('README.md'); + const cliMeta = await readText('docs-site/content/docs/cli-reference/meta.json'); + const ingestReference = await readText('docs-site/content/docs/cli-reference/ktx-ingest.mdx'); + const devReference = await readText('docs-site/content/docs/cli-reference/ktx-dev.mdx'); + const buildingContext = await readText('docs-site/content/docs/guides/building-context.mdx'); + const contextSources = await readText('docs-site/content/docs/integrations/context-sources.mdx'); + const contextAsCode = await readText('docs-site/content/docs/concepts/context-as-code.mdx'); + const quickstart = await readText('docs-site/content/docs/getting-started/quickstart.mdx'); + const primarySources = await readText('docs-site/content/docs/integrations/primary-sources.mdx'); + const examplesIndex = await readText('examples/README.md'); + const localWarehouseReadme = await readText('examples/local-warehouse/README.md'); + + assert.match(ingestReference, /ktx ingest /); + assert.match(ingestReference, /ktx ingest --all --deep/); + assert.match(ingestReference, /--query-history-window-days /); + assert.match(buildingContext, /ktx ingest /); + assert.match(buildingContext, /ktx ingest --all/); + assert.match(buildingContext, /ktx ingest replay /); + assert.match(contextSources, /ktx ingest /); + assert.match(contextAsCode, /ktx ingest --all --no-input/); + assert.match(quickstart, /schema context/); + assert.match(primarySources, /context:\\n queryHistory:/); + + assert.doesNotMatch(cliMeta, /ktx-scan/); + assert.doesNotMatch(ingestReference, /ktx ingest run/); + assert.doesNotMatch(ingestReference, /--adapter/); + assert.doesNotMatch(ingestReference, /ktx ingest watch/); + assert.doesNotMatch(ingestReference, /live-database/); + assert.doesNotMatch(devReference, /ktx scan/); + assert.doesNotMatch(buildingContext, /ktx ingest watch/); + assert.doesNotMatch(buildingContext, /historic-sql/); + assert.doesNotMatch(buildingContext, /live-database/); + assert.doesNotMatch(contextSources, /ktx ingest run --connection-id/); + assert.doesNotMatch(contextSources, /--adapter /); + assert.doesNotMatch(contextAsCode, /ktx ingest run --connection-id/); + assert.doesNotMatch(quickstart, /Historic SQL/); + assert.doesNotMatch(quickstart, /--enable-historic-sql/); + assert.doesNotMatch(quickstart, /press d<\\/kbd> to detach/); + assert.doesNotMatch(primarySources, /historicSql/); + assert.doesNotMatch(primarySources, /Historic SQL/); + assert.doesNotMatch(examplesIndex, /ktx ingest run --project-dir/); + assert.doesNotMatch(localWarehouseReadme, /ktx ingest run --project-dir/); + + assert.match(rootReadme, /raw-sources\//); + assert.doesNotMatch(rootReadme, new RegExp(`${['live', 'database'].join('-')}/`)); + assert.doesNotMatch(rootReadme, /ktx scan/); + assert.doesNotMatch(rootReadme, /Run a local ingest smoke test/); + assert.doesNotMatch(rootReadme, /ktx ingest run --project-dir/); + assert.doesNotMatch(rootReadme, /ktx ingest status --project-dir/); + }); +``` + +- [ ] **Step 2: Run the failing docs regression test** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: FAIL with assertions matching the stale docs-site and example README +content. + +- [ ] **Step 3: Commit the failing test** + +```bash +git add scripts/examples-docs.test.mjs +git commit -m "test(docs): cover unified ingest public docs" +``` + +### Task 2: Rewrite the CLI reference surface + +**Files:** +- Modify: `docs-site/content/docs/cli-reference/ktx-ingest.mdx` +- Delete: `docs-site/content/docs/cli-reference/ktx-scan.mdx` +- Modify: `docs-site/content/docs/cli-reference/meta.json` +- Modify: `docs-site/content/docs/cli-reference/ktx-dev.mdx` + +- [ ] **Step 1: Rewrite `ktx-ingest.mdx`** + +Replace `docs-site/content/docs/cli-reference/ktx-ingest.mdx` with: + +````mdx +--- +title: "ktx ingest" +description: "Build, inspect, and replay KTX context ingest runs." +--- + +`ktx ingest` builds or refreshes KTX context from configured connections. +Database connections build schema context. Context-source connections ingest +metadata from tools such as dbt, Looker, Metabase, MetricFlow, LookML, and +Notion. + +## Command signature + +```bash +ktx ingest [options] [connectionId] +``` + +Use a connection id to build one configured connection. Use `--all` to build +every configured connection. Database connections run before context-source +connections when you use `--all`. + +## Build options + +| Flag | Description | Default | +|------|-------------|---------| +| `--all` | Build every configured connection | `false` | +| `--fast` | Use deterministic database schema ingest | Stored connection default, or `fast` | +| `--deep` | Use AI-enriched database ingest | Stored connection default, or `fast` | +| `--query-history` | Include database query-history usage patterns | Stored connection default | +| `--no-query-history` | Skip database query-history usage patterns for this run | Stored connection default | +| `--query-history-window-days ` | Query-history lookback window for this run | Stored connection default | +| `--plain` | Print plain text output | `true` | +| `--json` | Print JSON output | `false` | +| `--no-input` | Disable interactive terminal input | `false` | + +`--fast` and `--deep` are mutually exclusive. Depth flags apply only to +database connections. Query-history flags apply only to database connections +that support query history. + +## Status and replay + +| Subcommand | Description | +|------------|-------------| +| `status [runId]` | Print status for the latest or selected stored ingest run or report file | +| `replay ` | Replay a stored ingest run or bundle report through memory-flow output | + +Both subcommands accept `--report-file `, `--plain`, `--json`, `--viz`, +and `--no-input`. + +## Examples + +```bash +ktx ingest warehouse +ktx ingest warehouse --fast +ktx ingest warehouse --deep +ktx ingest warehouse --deep --query-history +ktx ingest warehouse --query-history-window-days 30 +ktx ingest notion +ktx ingest --all +ktx ingest --all --deep + +ktx ingest status +ktx ingest status run-abc123 +ktx ingest status --json + +ktx ingest replay run-abc123 +ktx ingest replay run-abc123 --viz +ktx ingest replay run-abc123 --report-file /tmp/ingest-report.json +``` + +## Common errors + +| Error | Cause | Recovery | +|-------|-------|----------| +| Connection not configured | The connection id is not present in `ktx.yaml` | Add the connection with `ktx setup` or update `ktx.yaml` | +| Deep readiness is missing | `--deep` or query history needs model, embedding, and scan-enrichment configuration | Run `ktx setup` or rerun with `--fast` | +| Query history is unsupported | The selected database driver does not support query history | Run schema ingest without query-history flags | +| Latest run not found | No stored ingest report exists in this project | Run `ktx ingest ` first | +| Visual replay fails in a non-interactive shell | Visual report replay needs a terminal | Use `ktx ingest status --json` for agent and CI workflows | +```` + +- [ ] **Step 2: Remove the public scan page** + +Delete `docs-site/content/docs/cli-reference/ktx-scan.mdx`. + +- [ ] **Step 3: Remove `ktx-scan` from CLI metadata** + +In `docs-site/content/docs/cli-reference/meta.json`, replace the full file +with: + +```json +{ + "title": "CLI Reference", + "defaultOpen": true, + "pages": [ + "ktx-setup", + "ktx-connection", + "ktx-ingest", + "ktx-sl", + "ktx-wiki", + "ktx-status", + "ktx-dev" + ] +} +``` + +- [ ] **Step 4: Update the dev command reference** + +In `docs-site/content/docs/cli-reference/ktx-dev.mdx`, replace this paragraph: + +```mdx +`ktx dev` contains development-only project initialization and managed runtime commands. Scan and ingest commands live at the root as [`ktx scan`](/docs/cli-reference/ktx-scan) and [`ktx ingest`](/docs/cli-reference/ktx-ingest). +``` + +with: + +```mdx +`ktx dev` contains development-only project initialization and managed runtime commands. Context building lives at the root as [`ktx ingest`](/docs/cli-reference/ktx-ingest). +``` + +- [ ] **Step 5: Run the docs regression test** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: FAIL only on the remaining guide, integration, quickstart, primary +source, and example README stale wording. + +- [ ] **Step 6: Commit CLI reference cleanup** + +```bash +git add docs-site/content/docs/cli-reference/ktx-ingest.mdx docs-site/content/docs/cli-reference/meta.json docs-site/content/docs/cli-reference/ktx-dev.mdx +git rm docs-site/content/docs/cli-reference/ktx-scan.mdx +git commit -m "docs: align ingest CLI reference with unified UX" +``` + +### Task 3: Update context-build guides + +**Files:** +- Modify: `docs-site/content/docs/guides/building-context.mdx` +- Modify: `docs-site/content/docs/integrations/context-sources.mdx` +- Modify: `docs-site/content/docs/concepts/context-as-code.mdx` + +- [ ] **Step 1: Update stored report guidance in `building-context.mdx`** + +In `docs-site/content/docs/guides/building-context.mdx`, replace the +`### Watching progress` section through the paragraph after it with: + +````mdx +### Inspecting stored reports + +```bash +# Check status of the latest ingest +ktx ingest status + +# Check a specific run +ktx ingest status + +# Replay a past ingest run +ktx ingest replay +``` + +`ktx ingest replay` opens the stored memory-flow output for a completed run. +Foreground context builds do not detach into background control sessions; if a +run is interrupted, rerun `ktx ingest ` or `ktx ingest --all`. +```` + +- [ ] **Step 2: Replace the adapter table in `building-context.mdx`** + +In the same file, replace the `### Available adapters` heading, table, and +following sentence with: + +```mdx +### Supported context sources + +| Driver | Source | What gets ingested | +|--------|--------|--------------------| +| `dbt` | dbt project | Model definitions, column descriptions, tests, tags | +| `metricflow` | MetricFlow semantic models | Metrics, dimensions, entities, semantic joins | +| `lookml` | LookML files | Views, explores, dimensions, measures, joins | +| `looker` | Looker API | Explores, looks, dashboard metadata | +| `metabase` | Metabase API | Questions, dashboards, table metadata | +| `notion` | Notion API | Database pages, knowledge articles | + +Query history is a database connection facet. Enable it with +`connections..context.queryHistory` or pass `--query-history` for a current +run. See [Context Sources](/docs/integrations/context-sources) for +driver-specific setup and auth configuration. +``` + +- [ ] **Step 3: Update context-source workflow commands** + +In `docs-site/content/docs/integrations/context-sources.mdx`, replace the +numbered workflow with: + +```mdx +Agents must configure and ingest context sources in this order: + +1. Add the context source connection in `ktx.yaml` or with `ktx setup`. +2. Store tokens as `env:NAME` or `file:/path/to/secret`. +3. Run `ktx ingest ` for one source or `ktx ingest --all` for + every configured source. +4. Check progress with `ktx ingest status --json`. +5. Review generated `semantic-layer/` YAML and `wiki/` Markdown files in git. +6. Validate changed semantic sources with `ktx sl validate`. +``` + +- [ ] **Step 4: Update scheduled ingest wording** + +In `docs-site/content/docs/concepts/context-as-code.mdx`, replace this +paragraph: + +```mdx +Teams usually run this on demand while setting up a source, then schedule it once the source is stable. A cron job or CI schedule can run `ktx ingest run --connection-id --adapter --no-input` overnight on an ingest branch so the latest dbt manifests, BI metadata, and documentation updates are ready for review each morning. +``` + +with: + +```mdx +Teams usually run this on demand while setting up a source, then schedule it +once the source is stable. A cron job or CI schedule can run `ktx ingest --all +--no-input` overnight on an ingest branch so the latest schema context, dbt +manifests, BI metadata, and documentation updates are ready for review each +morning. +``` + +- [ ] **Step 5: Run the docs regression test** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: FAIL only on quickstart, primary source, and example README stale +wording. + +- [ ] **Step 6: Commit guide cleanup** + +```bash +git add docs-site/content/docs/guides/building-context.mdx docs-site/content/docs/integrations/context-sources.mdx docs-site/content/docs/concepts/context-as-code.mdx +git commit -m "docs: update context build guides for unified ingest" +``` + +### Task 4: Update setup and primary-source docs + +**Files:** +- Modify: `docs-site/content/docs/getting-started/quickstart.mdx` +- Modify: `docs-site/content/docs/integrations/primary-sources.mdx` + +- [ ] **Step 1: Update database setup copy in quickstart** + +In `docs-site/content/docs/getting-started/quickstart.mdx`, replace the first +paragraph under `## Step 3: Connect a database` with: + +```mdx +Select one or more databases for KTX to connect to. The wizard supports +SQLite, PostgreSQL, MySQL, ClickHouse, SQL Server, BigQuery, and Snowflake. +``` + +Replace this sentence: + +```mdx +After connecting, KTX automatically runs a connection test and a structural scan: +``` + +with: + +```mdx +After connecting, KTX automatically runs a connection test and builds fast +schema context: +``` + +Replace the example output block in Step 3 with: + +````mdx +``` +Testing postgres-warehouse + Connection test passed + Driver: PostgreSQL - Tables: 42 + +Building schema context for postgres-warehouse + Running fast database ingest + +Schema context complete for postgres-warehouse + Changes: 42 new tables + +Primary source ready + postgres-warehouse - PostgreSQL - schema context complete +``` +```` + +Replace this paragraph: + +```mdx +For Snowflake and BigQuery, the wizard offers **Historic SQL** configuration for query history views. For PostgreSQL, enable Historic SQL with `--enable-historic-sql` when `pg_stat_statements` is configured. +``` + +with: + +```mdx +For PostgreSQL, Snowflake, and BigQuery, the wizard can enable query-history +ingest when the warehouse history feature is available. Query history is stored +under `connections..context.queryHistory` in `ktx.yaml`. +``` + +- [ ] **Step 2: Update context-build copy in quickstart** + +In the same file, replace the first two paragraphs under +`## Step 5: Build context` with: + +```mdx +This is where KTX builds agent-ready context. It uses the database context +depth saved by setup and ingests metadata from any configured context sources. + +Fast database context builds deterministic schema grounding. Deep database +context also generates AI descriptions, embeddings, and relationship evidence +when those capabilities are configured. +``` + +Replace the paragraph and background example that starts with `For a small +database` and ends with the fenced context-build block with: + +````mdx +For a small database (under 50 tables), this can take a few minutes. Larger +warehouses can take longer. Context builds run in the foreground; press +Ctrl+C to stop the current run and rerun `ktx setup` or `ktx ingest` +when you are ready to try again. +```` + +Replace this output line in the completion example: + +```text + postgres-warehouse: enriched scan complete +``` + +with: + +```text + postgres-warehouse: deep context complete +``` + +Replace the next-steps bullet: + +```mdx +- **Build more context** - learn about [scanning](/docs/guides/building-context), relationship detection, and ingestion workflows in the Building Context guide. +``` + +with: + +```mdx +- **Build more context** - learn about [database ingest](/docs/guides/building-context), relationship detection, and source ingestion workflows in the Building Context guide. +``` + +- [ ] **Step 3: Update primary-source query-history config** + +In `docs-site/content/docs/integrations/primary-sources.mdx`, replace the +introductory paragraph and shared conventions with: + +```mdx +KTX connects to your data warehouse or database to build schema context, +discover relationships, and execute semantic layer queries. Each connection is +defined in `ktx.yaml` under the `connections` key. + +All connectors share these conventions: + +- Sensitive values support `env:VAR_NAME` (read from environment) and + `file:/path/to/secret` (read from file) references +- Connections are read-only; KTX never writes to your database +- Database ingest discovers tables, columns, types, and constraints + automatically +``` + +In the connection field reference table, replace the `historicSql` row with: + +```mdx +| `context.queryHistory` | No | PostgreSQL, Snowflake, BigQuery | Enables query-history ingestion when the warehouse supports it | +``` + +Replace every feature row label `Historic SQL` with `Query history`. + +Replace each `### Historic SQL` heading with `### Query history`. + +Replace the PostgreSQL query-history config block with: + +```yaml +context: + queryHistory: + enabled: true + minExecutions: 5 + filters: + dropTrivialProbes: true +``` + +Replace the Snowflake query-history config block with: + +```yaml +context: + queryHistory: + enabled: true + windowDays: 90 + minExecutions: 5 + filters: + dropTrivialProbes: true + serviceAccounts: + patterns: ['^svc_'] + mode: exclude + redactionPatterns: [] +``` + +Replace the BigQuery query-history config block with: + +```yaml +context: + queryHistory: + enabled: true + windowDays: 90 + minExecutions: 5 + filters: + dropTrivialProbes: true + serviceAccounts: + patterns: ['@bot\\.'] + mode: exclude + redactionPatterns: [] +``` + +Replace the common-errors row: + +```mdx +| Historic SQL is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun scan or setup | +``` + +with: + +```mdx +| Query history is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun `ktx ingest --query-history` or `ktx setup` | +``` + +Replace the common-errors row: + +```mdx +| Scan returns no tables | Schema/database/project filter is wrong or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions | +``` + +with: + +```mdx +| Database ingest returns no tables | Schema, database, or project filter is wrong, or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions | +``` + +Replace the common-errors row: + +```mdx +| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on structural scan output | +``` + +with: + +```mdx +| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on fast schema context | +``` + +- [ ] **Step 4: Run targeted stale-term search** + +Run: + +```bash +rg -n "Historic SQL|historicSql|--enable-historic-sql|--historic-sql|ktx scan|ktx ingest watch|ktx ingest run --connection-id|--adapter |live-database" docs-site/content/docs/getting-started/quickstart.mdx docs-site/content/docs/integrations/primary-sources.mdx docs-site/content/docs/cli-reference docs-site/content/docs/guides/building-context.mdx docs-site/content/docs/integrations/context-sources.mdx docs-site/content/docs/concepts/context-as-code.mdx +``` + +Expected: no output. + +- [ ] **Step 5: Run the docs regression test** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: FAIL only on example README stale adapter-command wording. + +- [ ] **Step 6: Commit setup and primary-source docs cleanup** + +```bash +git add docs-site/content/docs/getting-started/quickstart.mdx docs-site/content/docs/integrations/primary-sources.mdx +git commit -m "docs: update setup and primary source ingest wording" +``` + +### Task 5: Remove public fake-adapter example commands + +**Files:** +- Modify: `examples/README.md` +- Modify: `examples/local-warehouse/README.md` + +- [ ] **Step 1: Rewrite the local-warehouse section in `examples/README.md`** + +In `examples/README.md`, replace the `## local-warehouse` section with: + +````md +## local-warehouse + +`local-warehouse/` is a contributor fixture for local CLI smoke tests. It uses +the internal fake ingest adapter so tests can exercise memory-flow behavior +without a live database or external service. + +For normal context building, use the public connection-centric commands: + +```bash +ktx ingest +ktx ingest --all +``` + +The copied project initializes its own Git repository on first use. +```` + +- [ ] **Step 2: Rewrite `examples/local-warehouse/README.md`** + +Replace `examples/local-warehouse/README.md` with: + +````md +# local-warehouse fixture + +This directory is a contributor fixture for KTX CLI smoke tests. It uses the +internal fake ingest adapter so tests can run without a live database or +external service. + +Normal users should build context with connection-centric ingest: + +```bash +ktx ingest +ktx ingest --all +``` + +The public ingest workflow is documented in +`docs-site/content/docs/cli-reference/ktx-ingest.mdx` and +`docs-site/content/docs/guides/building-context.mdx`. +```` + +- [ ] **Step 3: Run the docs regression test** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 4: Commit example docs cleanup** + +```bash +git add examples/README.md examples/local-warehouse/README.md +git commit -m "docs: stop advertising adapter-backed example ingest" +``` + +### Task 6: Final verification + +**Files:** +- Verify: `scripts/examples-docs.test.mjs` +- Verify: `docs-site/content/docs/**/*.mdx` +- Verify: `examples/**/*.md` + +- [ ] **Step 1: Run docs regression tests** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 2: Run docs-site build** + +Run: + +```bash +pnpm --filter ktx-docs run build +``` + +Expected: PASS. If the build fails because this workspace lacks external build +prerequisites, capture the error and run `pnpm --filter ktx-docs run test` as +the closest available docs-site check. + +- [ ] **Step 3: Run final stale public-surface search** + +Run: + +```bash +rg -n "ktx scan|ktx ingest run --connection-id|--adapter |ktx ingest watch|live-database|Historic SQL|historicSql|--enable-historic-sql|--historic-sql" docs-site/content/docs examples/README.md examples/local-warehouse/README.md +``` + +Expected: no output. + +- [ ] **Step 4: Inspect git status** + +Run: + +```bash +git status --short +``` + +Expected: only the files intentionally changed by this plan appear. + +- [ ] **Step 5: Commit verification updates if needed** + +If verification required small documentation or test fixes, commit them: + +```bash +git add scripts/examples-docs.test.mjs docs-site/content/docs examples/README.md examples/local-warehouse/README.md +git commit -m "docs: close unified ingest public docs gaps" +``` + +## Self-review + +- Spec coverage: This plan covers the remaining public documentation surfaces + that still contradicted the unified ingest UX spec. It intentionally does not + rename internal scan packages, internal adapter keys, raw artifact paths, or + developer-only test fixtures. +- Placeholder scan: No task contains open-ended placeholders. Each edit names + exact files and exact replacement text or commands. +- Type consistency: This is a documentation-only plan. Command names and config + keys match the implemented CLI and config code: `ktx ingest `, + `ktx ingest --all`, `ktx ingest status`, `ktx ingest replay`, and + `connections..context.queryHistory`. diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-v1-final-public-surface-closure.md b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-final-public-surface-closure.md new file mode 100644 index 00000000..f6a33580 --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-final-public-surface-closure.md @@ -0,0 +1,494 @@ +# Unified Ingest V1 Final Public Surface Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1-blocking public-surface gaps in unified +`ktx ingest`. + +**Architecture:** Keep the current connection-centric ingest planner and hidden +legacy debug commands. Fix the public query-history execution path so it passes +the full canonical `connections..context.queryHistory` pull config to the +historic-SQL adapter, and filter hidden Commander commands from the +documentation command-tree script so docs/discovery output matches normal CLI +help. + +**Tech Stack:** TypeScript ESM, Commander, Vitest, KTX CLI/context packages, +pnpm workspace scripts. + +--- + +## Current audit + +The implemented unified-ingest plan chain covers most of the original +`docs/superpowers/specs/2026-05-13-unified-ingest-ux-design.md` spec: + +- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, + `--query-history`, `--no-query-history`, and + `--query-history-window-days` route through `public-ingest.ts`. +- Database targets run before source targets. Public source ingest uses + `allowImplicitAdapter: true`, so `ingest.adapters` is no longer required for + inferred public adapters. +- Public database ingest maps `fast` to structural scan internals and `deep` to + enriched scan internals, honors `scan.relationships.enabled`, and isolates + deep-readiness failures per target under `--all`. +- Normal `ktx --help` hides `scan`; normal `ktx ingest --help` hides `run` and + `watch`; setup help exposes query-history flags instead of Historic SQL flags. +- Setup stores `connections..context.depth` and + `connections..context.queryHistory`, migrates legacy `historicSql`, and + uses foreground-only context-build state. +- Public docs-site CLI pages no longer document `ktx scan`, + `ktx ingest run --adapter`, or live `ktx ingest watch` as normal workflows. + +### V1-blocking gaps + +- Public query-history ingest drops configured pull fields. The lower-level + adapter path maps canonical `context.queryHistory` to the existing + `historicSqlUnifiedPullConfigSchema`, but `executePublicIngestTarget()` always + passes `historicSqlPullConfigOverride` with only `dialect` and sometimes + `windowDays`. Normal `ktx ingest warehouse --query-history` can therefore + ignore configured `minExecutions`, `filters`, `redactionPatterns`, + `concurrency`, and `staleArchiveAfterDays`. +- The documentation command-tree script still prints hidden commands. Running + `pnpm --filter @ktx/cli run docs:commands` currently prints top-level + `scan ` and `ktx ingest run` / `ktx ingest watch`, even though + the spec requires `ktx scan` and live `ingest watch` not to be presented as + normal public command surfaces. + +### Non-blocking gaps + +- Hidden debug commands remain callable: `ktx scan`, `ktx ingest run`, and + `ktx ingest watch`. The spec allows hidden/debug placement for old + implementation surfaces in v1. +- Internal adapter keys, package names, WorkUnit keys, raw artifact paths, and + JSON/debug output can continue to use `scan`, `live-database`, and + `historic-sql`. +- Developer-only scripts and tests can keep scan/live-database terminology when + they exercise internal connector or artifact behavior. +- Public docs still use "scan" as a generic noun in a few conceptual database + sections. They do not document `ktx scan` as the public command, so this is + wording cleanup, not v1-blocking behavior. + +## File structure + +- Modify `packages/cli/src/public-ingest.ts`: preserve the full canonical + query-history pull config in public ingest plans and pass that config to the + lower-level historic-SQL adapter run. +- Modify `packages/cli/src/public-ingest.test.ts`: add regression coverage for + configured query-history fields and current-run `windowDays` overrides. +- Modify `packages/cli/src/command-tree.ts`: filter Commander commands marked + hidden via Commander private `_hidden`, matching Commander help behavior. +- Modify `packages/cli/src/command-tree.test.ts`: cover hidden top-level and + nested command filtering in the pure walker. +- Modify `packages/cli/src/print-command-tree.test.ts`: lock the rendered KTX + docs command tree against hidden unified-ingest commands. + +## Tasks + +### Task 1: Preserve canonical query-history pull config in public ingest + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Test: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Write the failing public-ingest query-history config test** + +In `packages/cli/src/public-ingest.test.ts`, add this test inside the +`runKtxPublicIngest` describe block, near the existing query-history execution +tests: + +```ts + it('preserves configured query-history pull fields while overriding the current-run window', async () => { + const io = makeIo(); + const project = deepReadyProject({ + warehouse: { + driver: 'postgres', + context: { + queryHistory: { + enabled: true, + windowDays: 90, + minExecutions: 7, + concurrency: 3, + staleArchiveAfterDays: 120, + filters: { + dropTrivialProbes: true, + serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' }, + orchestrators: { mode: 'mark-only' }, + dropFailedBelow: { errorRate: 0.5, executions: 3 }, + }, + redactionPatterns: ['(?i)secret'], + }, + }, + }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn(async () => 0); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + queryHistory: 'enabled', + queryHistoryWindowDays: 30, + }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(0); + + const ingestArgs = runIngest.mock.calls[0]?.[0]; + expect(ingestArgs).toMatchObject({ + command: 'run', + connectionId: 'warehouse', + adapter: 'historic-sql', + allowImplicitAdapter: true, + historicSqlPullConfigOverride: { + dialect: 'postgres', + windowDays: 30, + minExecutions: 7, + concurrency: 3, + staleArchiveAfterDays: 120, + filters: { + dropTrivialProbes: true, + serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' }, + orchestrators: { mode: 'mark-only' }, + dropFailedBelow: { errorRate: 0.5, executions: 3 }, + }, + redactionPatterns: ['(?i)secret'], + }, + }); + expect(ingestArgs?.historicSqlPullConfigOverride).not.toHaveProperty('enabled'); + }); +``` + +- [ ] **Step 2: Run the failing public-ingest test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts --testTimeout 30000 +``` + +Expected: FAIL. The new assertion sees `historicSqlPullConfigOverride` with +`dialect: 'postgres'` and `windowDays: 30`, but without `minExecutions`, +`filters`, `redactionPatterns`, `concurrency`, or +`staleArchiveAfterDays`. + +- [ ] **Step 3: Add the full query-history pull config to public plans** + +In `packages/cli/src/public-ingest.ts`, update the `queryHistory` field on +`KtxPublicIngestPlanTarget` to include a pull config for enabled query-history +runs: + +```ts + queryHistory?: { + enabled: boolean; + dialect?: HistoricSqlDialect; + windowDays?: number; + pullConfig?: Record; + unsupported?: boolean; + skippedStoredByFast?: boolean; + }; +``` + +Still in `packages/cli/src/public-ingest.ts`, add this helper below +`positiveInteger()`: + +```ts +function queryHistoryPullConfig(input: { + stored: Record; + dialect: HistoricSqlDialect; + windowDays?: number; +}): Record { + const { enabled: _enabled, dialect: _dialect, ...storedConfig } = input.stored; + return { + ...storedConfig, + dialect: input.dialect, + ...(input.windowDays !== undefined ? { windowDays: input.windowDays } : {}), + }; +} +``` + +Then replace the enabled-query-history return inside +`resolveDatabaseTargetOptions()` with this version: + +```ts + if (requestedQh && dialect) { + if (depth === 'fast') { + input.warnings.push(`--query-history requires deep ingest; running ${input.connectionId} with --deep.`); + } + depth = 'deep'; + return { + databaseDepth: depth, + queryHistory: { + ...queryHistory, + enabled: true, + dialect, + pullConfig: queryHistoryPullConfig({ + stored: storedQh, + dialect, + windowDays: queryHistory.windowDays, + }), + }, + steps: ['database-schema', 'query-history'], + }; + } +``` + +- [ ] **Step 4: Pass the preserved pull config into the historic-SQL adapter** + +In `packages/cli/src/public-ingest.ts`, replace the +`historicSqlPullConfigOverride` construction in `executePublicIngestTarget()` +with: + +```ts + historicSqlPullConfigOverride: + target.queryHistory.pullConfig ?? { + dialect: target.queryHistory.dialect, + ...(target.queryHistory.windowDays !== undefined ? { windowDays: target.queryHistory.windowDays } : {}), + }, +``` + +The surrounding `ingestArgs` object must still include: + +```ts + adapter: 'historic-sql', + outputMode: sourceIngestOutputMode(args, io), + inputMode: args.inputMode, + allowImplicitAdapter: true, +``` + +- [ ] **Step 5: Run the public-ingest tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts --testTimeout 30000 +``` + +Expected: PASS. The new regression test proves public ingest preserves stored +query-history fields while `--query-history-window-days 30` overrides only +`windowDays` for the current run. + +- [ ] **Step 6: Commit** + +Run: + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "fix(cli): preserve query-history pull config in public ingest" +``` + +### Task 2: Hide debug commands from the docs command tree + +**Files:** +- Modify: `packages/cli/src/command-tree.ts` +- Test: `packages/cli/src/command-tree.test.ts` +- Test: `packages/cli/src/print-command-tree.test.ts` + +- [ ] **Step 1: Write the failing hidden-command walker test** + +In `packages/cli/src/command-tree.test.ts`, add this test inside the +`walkCommandTree` describe block: + +```ts + it('omits Commander hidden commands from the public tree', () => { + const root = new Command('ktx'); + root.command('scan', { hidden: true }).description('Run a standalone connection scan'); + const ingest = root.command('ingest').description('Build or inspect KTX context'); + ingest.command('run', { hidden: true }).description('Run local ingest by adapter'); + ingest.command('watch', { hidden: true }).description('Open a stored visual report'); + ingest.command('status').description('Print status'); + root.command('status').description('Check readiness'); + + const tree = walkCommandTree(root); + + expect(tree.children.map((child) => child.name)).toEqual(['ingest', 'status']); + expect(tree.children[0]).toMatchObject({ + name: 'ingest', + children: [{ name: 'status', description: 'Print status', aliases: [], arguments: [], children: [] }], + }); + }); +``` + +- [ ] **Step 2: Write the failing rendered KTX tree assertions** + +In `packages/cli/src/print-command-tree.test.ts`, add these assertions to the +first `renders an indented tree rooted at "ktx" with known top-level commands` +test after the existing `not.toContain()` assertions: + +```ts + expect(output).not.toContain('scan '); + expect(output).not.toContain('│ ├── run'); + expect(output).not.toContain('│ ├── watch'); + expect(output).not.toContain('│ └── watch'); +``` + +- [ ] **Step 3: Run the failing command-tree tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/command-tree.test.ts src/print-command-tree.test.ts +``` + +Expected: FAIL. The walker includes hidden commands because it currently maps +over `command.commands` without filtering Commander `_hidden` entries. + +- [ ] **Step 4: Filter hidden Commander commands in the walker** + +In `packages/cli/src/command-tree.ts`, add this helper above +`walkCommandTree()`: + +```ts +function isHiddenCommand(command: CommandUnknownOpts): boolean { + return (command as CommandUnknownOpts & { _hidden?: boolean })._hidden === true; +} +``` + +Then replace the `children` field inside `walkCommandTree()` with: + +```ts + children: command.commands.filter((child) => !isHiddenCommand(child)).map((child) => walkCommandTree(child)), +``` + +The complete function should read: + +```ts +export function walkCommandTree(command: CommandUnknownOpts): CommandTreeNode { + return { + name: command.name(), + description: command.description(), + aliases: command.aliases(), + arguments: command.registeredArguments.map(formatArgumentDeclaration), + children: command.commands.filter((child) => !isHiddenCommand(child)).map((child) => walkCommandTree(child)), + }; +} +``` + +- [ ] **Step 5: Run the command-tree tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/command-tree.test.ts src/print-command-tree.test.ts +``` + +Expected: PASS. The pure walker omits hidden commands and the rendered KTX tree +no longer contains `scan `, `ingest run`, or `ingest watch`. + +- [ ] **Step 6: Verify the docs command output directly** + +Run: + +```bash +pnpm --filter @ktx/cli run docs:commands > /tmp/ktx-command-tree.txt +rg -n "scan |^[[:space:][:graph:]]*run[[:space:]]+Run local ingest|^[[:space:][:graph:]]*watch \\[runId\\]" /tmp/ktx-command-tree.txt +``` + +Expected: the first command succeeds and writes the command tree. The `rg` +command exits with status `1` and prints no matches. + +- [ ] **Step 7: Commit** + +Run: + +```bash +git add packages/cli/src/command-tree.ts packages/cli/src/command-tree.test.ts packages/cli/src/print-command-tree.test.ts +git commit -m "fix(cli): omit hidden commands from docs command tree" +``` + +### Task 3: Final verification + +**Files:** +- Verify: `packages/cli/src/public-ingest.ts` +- Verify: `packages/cli/src/command-tree.ts` +- Verify: `packages/cli/src/public-ingest.test.ts` +- Verify: `packages/cli/src/command-tree.test.ts` +- Verify: `packages/cli/src/print-command-tree.test.ts` + +- [ ] **Step 1: Run focused CLI regression tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/local-adapters.test.ts src/index.test.ts src/command-tree.test.ts src/print-command-tree.test.ts --testTimeout 30000 +``` + +Expected: PASS. This covers public ingest execution, adapter config mapping, +normal help routing, and docs command-tree rendering. + +- [ ] **Step 2: Run CLI type-check** + +Run: + +```bash +pnpm --filter @ktx/cli run type-check +``` + +Expected: PASS with no TypeScript errors. + +- [ ] **Step 3: Run docs command-tree output check** + +Run: + +```bash +pnpm --filter @ktx/cli run docs:commands > /tmp/ktx-command-tree.txt +rg -n "scan |^[[:space:][:graph:]]*run[[:space:]]+Run local ingest|^[[:space:][:graph:]]*watch \\[runId\\]" /tmp/ktx-command-tree.txt +``` + +Expected: the `docs:commands` command succeeds. The `rg` command exits `1` +with no matches. + +- [ ] **Step 4: Run TypeScript dead-code checks** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS. If Knip reports unrelated existing findings, inspect them and +record the exact findings in the implementation notes before deciding whether +they are related to this plan. + +- [ ] **Step 5: Inspect the final diff** + +Run: + +```bash +git status --short +git diff -- packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/command-tree.ts packages/cli/src/command-tree.test.ts packages/cli/src/print-command-tree.test.ts +``` + +Expected: only the intended files are modified. The diff contains no generated +`dist/` output and no unrelated documentation changes. + +- [ ] **Step 6: Commit verification-only fixes if needed** + +If verification required expectation or type-only fixes, run: + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/command-tree.ts packages/cli/src/command-tree.test.ts packages/cli/src/print-command-tree.test.ts +git commit -m "test(cli): close unified ingest final public surface checks" +``` + +If no files changed during verification, do not create an empty commit. + +## Self-review + +- Spec coverage: This plan covers the remaining v1-blocking public query-history + config mapping and public command discovery output. It intentionally leaves + hidden debug command callability and internal scan/live-database/historic-sql + names as non-blocking because the original spec allows internal/debug names + in v1. +- Placeholder scan: No task uses deferred placeholders or unnamed edge-handling + steps. Each code step names the exact file, insertion point, and code shape. +- Type consistency: New `pullConfig` data stays under + `KtxPublicIngestPlanTarget.queryHistory` and flows unchanged into the + existing `KtxIngestArgs.historicSqlPullConfigOverride` field. Command-tree + filtering uses Commander `_hidden`, the same field Commander help uses. diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-v1-final-ux-labels.md b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-final-ux-labels.md new file mode 100644 index 00000000..b2db0aa9 --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-final-ux-labels.md @@ -0,0 +1,802 @@ +# Unified Ingest V1 Final UX Labels Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1-blocking public UX gaps in unified ingest warning aggregation and setup/status terminology. + +**Architecture:** Keep the implemented connection-centric ingest planner, hidden debug commands, and internal scan/live-database/historic-sql boundaries. Add one warning accumulator lane for unsupported database query-history targets, then update normal setup/status/docs copy so public database groups are called `Databases` rather than `Primary sources`. + +**Tech Stack:** TypeScript ESM, Commander, Vitest, Node test runner, KTX CLI/context packages. + +--- + +## Current Audit + +Implemented unified-ingest plans already cover the original spec's main v1 behavior: + +- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, `--query-history`, `--no-query-history`, and `--query-history-window-days` route through `packages/cli/src/public-ingest.ts`. +- Database targets are ordered before source targets, public source ingest bypasses `ingest.adapters`, and database depth maps to structural/enriched scan internals. +- Deep readiness is evaluated before target work starts, and `--all` isolates per-target failures. +- Setup stores `connections..context.depth` and `connections..context.queryHistory`, migrates legacy `historicSql`, and uses foreground-only context-build state. +- Normal help hides `ktx scan`, `ktx ingest run`, and live `ktx ingest watch`; docs no longer present those as normal public workflows. +- Foreground progress uses `Databases` and `Context sources`, and normal progress/failure output sanitizes scan/live-database/historic-sql internals. + +### V1-Blocking Gaps + +- `ktx ingest --all --query-history` does not aggregate unsupported database query-history warnings. Source depth/query-history warnings are aggregated, but unsupported database drivers currently add one warning per target from `resolveDatabaseTargetOptions()`, contrary to the original spec's `--all` warning aggregation rule for non-applicable query-history flags. +- Normal setup/status surfaces still use the old `Primary sources` public label for databases: + - `packages/cli/src/setup.ts` prints `Primary sources configured`. + - `packages/cli/src/setup-context.ts` prints a `Primary sources:` success group. + - `packages/cli/src/setup-ready-menu.ts` labels the database section `Primary sources`. + - `packages/cli/src/setup-databases.ts` uses `primary source` in normal interactive prompts, skip/failure messages, and success headings. + - `README.md`, `docs-site/content/docs/getting-started/quickstart.mdx`, and `docs-site/content/docs/cli-reference/ktx-setup.mdx` still mirror the old label. + +### Non-Blocking Gaps + +- Hidden debug commands can remain callable: `ktx scan`, `ktx ingest run`, and `ktx ingest watch`. +- Internal adapter keys, raw artifact paths, WorkUnit keys, package names, tests, and developer-only scripts can continue to use `scan`, `live-database`, and `historic-sql`. +- Public conceptual docs may still use `scan` as a generic noun where they are describing internal database metadata artifacts rather than documenting `ktx scan` as the public command. +- Internal readiness config names such as `scan.enrichment.mode` can remain because they are current `ktx.yaml` field names. + +## File Structure + +- Modify `packages/cli/src/public-ingest.ts`: aggregate unsupported database query-history warnings for `--all`. +- Modify `packages/cli/src/public-ingest.test.ts`: add regression tests for explicit and stored unsupported query-history aggregation. +- Modify `packages/cli/src/setup-ready-menu.ts`: change the ready-project database menu label to `Databases`. +- Modify `packages/cli/src/setup-ready-menu.test.ts`: update the ready-menu expected label. +- Modify `packages/cli/src/setup.ts`: change setup status output from `Primary sources configured` to `Databases configured`. +- Modify `packages/cli/src/setup.test.ts`: update status and empty-selection expectations. +- Modify `packages/cli/src/setup-context.ts`: change setup context success grouping from `Primary sources` to `Databases`. +- Modify `packages/cli/src/setup-context.test.ts`: assert the success output uses `Databases`. +- Modify `packages/cli/src/setup-databases.ts`: change normal database setup copy from `primary source(s)` / `knowledge sources` to `database(s)` / `context sources`. +- Modify `packages/cli/src/setup-databases.test.ts`: update expected prompt/output strings. +- Modify `README.md`: update the setup status example label. +- Modify `docs-site/content/docs/getting-started/quickstart.mdx`: update setup success/status examples. +- Modify `docs-site/content/docs/cli-reference/ktx-setup.mdx`: update setup status example. +- Modify `scripts/examples-docs.test.mjs`: add docs regression assertions for the old `Primary sources` label. + +## Tasks + +### Task 1: Aggregate Unsupported Query-History Warnings + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Test: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Add failing unsupported warning aggregation tests** + +In `packages/cli/src/public-ingest.test.ts`, add these tests after the existing test named `warns and skips query history for unsupported database drivers`: + +```ts + it('aggregates unsupported query-history warnings for all database targets', () => { + const plan = buildPublicIngestPlan( + deepReadyProject({ + local: { driver: 'sqlite' }, + mysql_warehouse: { driver: 'mysql' }, + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }), + { + projectDir: '/tmp/project', + all: true, + depth: 'deep', + queryHistory: 'enabled', + }, + ); + + expect(plan.targets).toEqual([ + expect.objectContaining({ + connectionId: 'local', + queryHistory: { enabled: false, unsupported: true }, + steps: ['database-schema'], + }), + expect.objectContaining({ + connectionId: 'mysql_warehouse', + queryHistory: { enabled: false, unsupported: true }, + steps: ['database-schema'], + }), + expect.objectContaining({ + connectionId: 'warehouse', + queryHistory: expect.objectContaining({ enabled: true, dialect: 'postgres' }), + steps: ['database-schema', 'query-history'], + }), + ]); + expect(plan.warnings).toEqual([ + '--query-history is not supported for 2 database connections (mysql, sqlite); running schema ingest for those connections.', + ]); + }); + + it('aggregates stored unsupported query-history config warnings for all database targets', () => { + const plan = buildPublicIngestPlan( + projectWithConnections({ + local: { driver: 'sqlite', context: { queryHistory: { enabled: true } } }, + mysql_warehouse: { driver: 'mysql', context: { queryHistory: { enabled: true } } }, + }), + { + projectDir: '/tmp/project', + all: true, + queryHistory: 'default', + }, + ); + + expect(plan.targets).toEqual([ + expect.objectContaining({ + connectionId: 'local', + queryHistory: { enabled: false, unsupported: true }, + steps: ['database-schema'], + }), + expect.objectContaining({ + connectionId: 'mysql_warehouse', + queryHistory: { enabled: false, unsupported: true }, + steps: ['database-schema'], + }), + ]); + expect(plan.warnings).toEqual([ + '2 database connections have query history enabled in ktx.yaml, but their drivers do not support it; running schema ingest for those connections.', + ]); + }); +``` + +- [ ] **Step 2: Run the failing public ingest tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "unsupported query-history" +``` + +Expected: FAIL because the new `--all` cases currently receive one warning per unsupported database target. + +- [ ] **Step 3: Add unsupported query-history warning accumulator state** + +In `packages/cli/src/public-ingest.ts`, replace the current warning accumulator interface and factory with: + +```ts +interface KtxUnsupportedQueryHistoryWarning { + connectionId: string; + driver: string; + reason: 'explicit' | 'stored'; +} + +interface KtxPublicIngestWarningAccumulator { + warnings: string[]; + ignoredDepthForSources: string[]; + ignoredQueryHistoryForSources: string[]; + unsupportedQueryHistoryForDatabases: KtxUnsupportedQueryHistoryWarning[]; +} + +function createWarningAccumulator(): KtxPublicIngestWarningAccumulator { + return { + warnings: [], + ignoredDepthForSources: [], + ignoredQueryHistoryForSources: [], + unsupportedQueryHistoryForDatabases: [], + }; +} +``` + +- [ ] **Step 4: Add unsupported database warning formatting** + +In `packages/cli/src/public-ingest.ts`, add these helpers after `sourceIgnoredWarning()`: + +```ts +function unsupportedDriverList(entries: KtxUnsupportedQueryHistoryWarning[]): string { + return [...new Set(entries.map((entry) => entry.driver))].sort((left, right) => left.localeCompare(right)).join(', '); +} + +function unsupportedQueryHistoryWarnings( + entries: KtxUnsupportedQueryHistoryWarning[], + all: boolean, +): string[] { + if (entries.length === 0) { + return []; + } + + const warnings: string[] = []; + const explicitEntries = entries.filter((entry) => entry.reason === 'explicit'); + const storedEntries = entries.filter((entry) => entry.reason === 'stored'); + + if (explicitEntries.length === 1 || (!all && explicitEntries.length > 0)) { + warnings.push( + ...explicitEntries.map( + (entry) => + `--query-history is not supported for ${entry.driver}; running schema ingest for ${entry.connectionId}.`, + ), + ); + } else if (explicitEntries.length > 1) { + warnings.push( + `--query-history is not supported for ${explicitEntries.length} database connections (${unsupportedDriverList( + explicitEntries, + )}); running schema ingest for those connections.`, + ); + } + + if (storedEntries.length === 1 || (!all && storedEntries.length > 0)) { + warnings.push( + ...storedEntries.map( + (entry) => + `${entry.connectionId} has query history enabled in ktx.yaml, but ${entry.driver} does not support it; running schema ingest.`, + ), + ); + } else if (storedEntries.length > 1) { + warnings.push( + `${storedEntries.length} database connections have query history enabled in ktx.yaml, but their drivers do not support it; running schema ingest for those connections.`, + ); + } + + return warnings; +} +``` + +- [ ] **Step 5: Use the accumulator in `finalizeWarnings()`** + +In `packages/cli/src/public-ingest.ts`, replace the start of `finalizeWarnings()` with: + +```ts + const warnings = [ + ...accumulator.warnings, + ...unsupportedQueryHistoryWarnings(accumulator.unsupportedQueryHistoryForDatabases, args.all), + ]; +``` + +Keep the existing source depth/query-history aggregation logic below that new `warnings` initialization. + +- [ ] **Step 6: Record unsupported database targets instead of pushing immediate warnings** + +In `packages/cli/src/public-ingest.ts`, change the `resolveDatabaseTargetOptions()` input type so `warnings` is the full accumulator: + +```ts + warnings: KtxPublicIngestWarningAccumulator; +``` + +Inside the unsupported query-history branch, replace the current `input.warnings.push(...)` block with: + +```ts + input.warnings.unsupportedQueryHistoryForDatabases.push({ + connectionId: input.connectionId, + driver: input.driver, + reason: explicitQueryHistory === 'enabled' || input.args.queryHistoryWindowDays !== undefined ? 'explicit' : 'stored', + }); +``` + +In the supported query-history branch, replace: + +```ts + input.warnings.push(`--query-history requires deep ingest; running ${input.connectionId} with --deep.`); +``` + +with: + +```ts + input.warnings.warnings.push(`--query-history requires deep ingest; running ${input.connectionId} with --deep.`); +``` + +In the stored query-history skipped-by-fast branch, replace: + +```ts + input.warnings.push( + `${input.connectionId} has query history enabled in ktx.yaml, but --fast skips query-history processing.`, + ); +``` + +with: + +```ts + input.warnings.warnings.push( + `${input.connectionId} has query history enabled in ktx.yaml, but --fast skips query-history processing.`, + ); +``` + +In `targetForConnection()`, replace the database resolver call with: + +```ts + const options = resolveDatabaseTargetOptions({ connectionId, driver, connection, args, warnings }); +``` + +- [ ] **Step 7: Verify unsupported warning aggregation passes** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "unsupported query-history" +``` + +Expected: PASS. The single-target warning tests keep the old exact messages, while `--all` unsupported database targets receive one aggregate warning per reason. + +- [ ] **Step 8: Commit unsupported warning aggregation** + +Run: + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "fix(cli): aggregate unsupported query-history warnings" +``` + +### Task 2: Rename Public Setup Database Labels + +**Files:** +- Modify: `packages/cli/src/setup-ready-menu.ts` +- Modify: `packages/cli/src/setup.ts` +- Modify: `packages/cli/src/setup-context.ts` +- Modify: `packages/cli/src/setup-databases.ts` +- Test: `packages/cli/src/setup-ready-menu.test.ts` +- Test: `packages/cli/src/setup.test.ts` +- Test: `packages/cli/src/setup-context.test.ts` +- Test: `packages/cli/src/setup-databases.test.ts` +- Modify: `README.md` +- Modify: `docs-site/content/docs/getting-started/quickstart.mdx` +- Modify: `docs-site/content/docs/cli-reference/ktx-setup.mdx` +- Test: `scripts/examples-docs.test.mjs` + +- [ ] **Step 1: Write failing CLI copy expectations** + +In `packages/cli/src/setup-ready-menu.test.ts`, change the expected database option to: + +```ts + { value: 'databases', label: 'Databases' }, +``` + +In `packages/cli/src/setup-context.test.ts`, add these assertions after each `expect(io.stdout()).toContain('KTX context is ready for agents.');` assertion in the successful build and existing-context tests: + +```ts + expect(io.stdout()).toContain('Databases:'); + expect(io.stdout()).not.toContain('Primary sources:'); +``` + +In `packages/cli/src/setup.test.ts`, change the empty database selection expectation to: + +```ts + expect(testIo.stdout()).toContain( + 'KTX cannot work without at least one database. Select a database or press Escape to go back.', + ); + expect(testIo.stderr()).not.toContain('No databases selected.'); +``` + +In `packages/cli/src/setup.test.ts`, in the existing-project status test, add: + +```ts + expect(rendered).toContain('Databases configured: no'); + expect(rendered).not.toContain('Primary sources configured'); +``` + +- [ ] **Step 2: Write failing setup database prompt expectations** + +In `packages/cli/src/setup-databases.test.ts`, update the old public copy expectations to the new database labels: + +```ts +expect(prompts.multiselect).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('Which databases should KTX connect to?'), + }), +); +``` + +For configured database menu expectations, use: + +```ts +expect(prompts.select).toHaveBeenCalledWith({ + message: 'Databases already configured: warehouse\nWhat would you like to do?', + options: [ + { value: 'continue', label: 'Continue to context sources' }, + { value: 'add', label: 'Add another database' }, + ], +}); +``` + +For the `postgres-warehouse` configured menu expectations, use: + +```ts +expect(prompts.select).toHaveBeenCalledWith({ + message: 'Databases already configured: postgres-warehouse\nWhat would you like to do?', + options: [ + { value: 'continue', label: 'Continue to context sources' }, + { value: 'add', label: 'Add another database' }, + ], +}); +``` + +For empty-selection output expectations, use: + +```ts +expect(io.stdout()).not.toContain('KTX cannot work without at least one database'); +``` + +For successful initial scan/setup output, use: + +```ts +expect(io.stdout()).toContain('◇ Database ready'); +expect(io.stdout()).not.toContain('Primary source ready'); +``` + +Rename test descriptions that contain `primary source` or `primary sources` so they use `database` or `databases`. For example: + +```ts + it('shows every supported database in the interactive checklist', async () => { +``` + +```ts + it('shows a configured database menu instead of the type checklist when a database exists', async () => { +``` + +```ts + it('lets users add another database after completing the first one', async () => { +``` + +- [ ] **Step 3: Run failing setup label tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-ready-menu.test.ts src/setup.test.ts src/setup-context.test.ts src/setup-databases.test.ts -t "ready menu|readiness checklist|context is ready|database|primary source|configured" +``` + +Expected: FAIL because production copy still uses `Primary sources` and `primary source`. + +- [ ] **Step 4: Update the ready menu and status labels** + +In `packages/cli/src/setup-ready-menu.ts`, change: + +```ts + { value: 'databases', label: 'Primary sources' }, +``` + +to: + +```ts + { value: 'databases', label: 'Databases' }, +``` + +In `packages/cli/src/setup.ts`, change: + +```ts + `Primary sources configured: ${formatConnectionList(status.databases.map((database) => database.connectionId))}`, +``` + +to: + +```ts + `Databases configured: ${formatConnectionList(status.databases.map((database) => database.connectionId))}`, +``` + +In `packages/cli/src/setup-context.ts`, change: + +```ts + io.stdout.write('Primary sources:\n'); +``` + +to: + +```ts + io.stdout.write('Databases:\n'); +``` + +- [ ] **Step 5: Update setup database prompt and output copy** + +In `packages/cli/src/setup-databases.ts`, change: + +```ts + const backDestination = canReturnToDriverSelection ? 'primary source selection' : 'the previous setup step'; +``` + +to: + +```ts + const backDestination = canReturnToDriverSelection ? 'database selection' : 'the previous setup step'; +``` + +Replace the entire `configuredPrimarySourcesPrompt()` return value with: + +```ts + return { + message: `Databases already configured: ${connectionIds.join(', ')}\nWhat would you like to do?`, + options: [ + { value: 'continue', label: 'Continue to context sources' }, + { value: 'add', label: 'Add another database' }, + ], + }; +``` + +Change the successful database setup heading from: + +```ts + writeSetupSection(input.io, 'Primary source ready', [ +``` + +to: + +```ts + writeSetupSection(input.io, 'Database ready', [ +``` + +Change the non-interactive no-database error from: + +```ts + 'KTX cannot work without a primary source. Pass --database or --database-connection-id, or pass --skip-databases to leave setup incomplete.\n', +``` + +to: + +```ts + 'KTX cannot work without a database. Pass --database or --database-connection-id, or pass --skip-databases to leave setup incomplete.\n', +``` + +Change the driver multiselect message from: + +```ts + message: withMultiselectNavigation('Which primary sources should KTX connect to?'), +``` + +to: + +```ts + message: withMultiselectNavigation('Which databases should KTX connect to?'), +``` + +Change the empty-selection warning from: + +```ts + io.stdout.write('│ KTX cannot work without at least one primary source. Select a source or press Escape to go back.\n'); +``` + +to: + +```ts + io.stdout.write('│ KTX cannot work without at least one database. Select a database or press Escape to go back.\n'); +``` + +Change the skip output from: + +```ts + io.stdout.write('│ Primary source setup skipped. KTX cannot work until you add a primary source.\n'); +``` + +to: + +```ts + io.stdout.write('│ Database setup skipped. KTX cannot work until you add a database.\n'); +``` + +Change the no-completed-database output from: + +```ts + io.stdout.write('│ KTX cannot work without a primary source.\n'); +``` + +to: + +```ts + io.stdout.write('│ KTX cannot work without a database.\n'); +``` + +Change the retry prompt message and skip label from: + +```ts + message: `Primary source setup failed for ${connectionChoice.connectionId}`, +``` + +```ts + { value: 'skip', label: 'Skip this primary source' }, +``` + +to: + +```ts + message: `Database setup failed for ${connectionChoice.connectionId}`, +``` + +```ts + { value: 'skip', label: 'Skip this database' }, +``` + +Change the final failure line from: + +```ts + io.stderr.write('No primary source connections completed setup.\n'); +``` + +to: + +```ts + io.stderr.write('No database connections completed setup.\n'); +``` + +- [ ] **Step 6: Update public docs examples** + +In `README.md`, replace: + +```text +Primary sources configured: yes (postgres-warehouse) +``` + +with: + +```text +Databases configured: yes (postgres-warehouse) +``` + +In `docs-site/content/docs/getting-started/quickstart.mdx`, replace the database-ready heading line: + +```text +Primary source ready + postgres-warehouse - PostgreSQL - schema context complete +``` + +with: + +```text +Database ready + postgres-warehouse - PostgreSQL - schema context complete +``` + +In `docs-site/content/docs/getting-started/quickstart.mdx`, replace the setup success group: + +```text +Primary sources: + postgres-warehouse: deep context complete +``` + +with: + +```text +Databases: + postgres-warehouse: deep context complete +``` + +In `docs-site/content/docs/getting-started/quickstart.mdx`, replace: + +```text +Primary sources configured: yes (postgres-warehouse) +``` + +with: + +```text +Databases configured: yes (postgres-warehouse) +``` + +In `docs-site/content/docs/cli-reference/ktx-setup.mdx`, replace: + +```text +Primary sources configured: yes (postgres-warehouse) +``` + +with: + +```text +Databases configured: yes (postgres-warehouse) +``` + +- [ ] **Step 7: Add public docs regression assertions** + +In `scripts/examples-docs.test.mjs`, inside the test named `documents unified public ingest workflows in the docs site`, add: + +```js + const setupReference = await readText('docs-site/content/docs/cli-reference/ktx-setup.mdx'); +``` + +Then add these assertions near the existing `quickstart` and `rootReadme` assertions: + +```js + assert.match(rootReadme, /Databases configured: yes \(postgres-warehouse\)/); + assert.match(quickstart, /Databases:\n postgres-warehouse: deep context complete/); + assert.match(quickstart, /Databases configured: yes \(postgres-warehouse\)/); + assert.match(setupReference, /Databases configured: yes \(postgres-warehouse\)/); + assert.doesNotMatch(rootReadme, /Primary sources configured/); + assert.doesNotMatch(quickstart, /Primary sources/); + assert.doesNotMatch(setupReference, /Primary sources configured/); +``` + +- [ ] **Step 8: Verify setup label tests pass** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-ready-menu.test.ts src/setup.test.ts src/setup-context.test.ts src/setup-databases.test.ts +``` + +Expected: PASS. + +- [ ] **Step 9: Verify docs examples pass** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 10: Scan for stale public labels** + +Run: + +```bash +rg -n "Primary sources?:|Primary sources? configured|Primary source ready|knowledge sources" packages/cli/src README.md docs-site/content/docs scripts/examples-docs.test.mjs +``` + +Expected: no matches in public CLI source, README/docs examples, or the docs regression test. + +- [ ] **Step 11: Commit public setup labels** + +Run: + +```bash +git add packages/cli/src/setup-ready-menu.ts packages/cli/src/setup-ready-menu.test.ts packages/cli/src/setup.ts packages/cli/src/setup.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts README.md docs-site/content/docs/getting-started/quickstart.mdx docs-site/content/docs/cli-reference/ktx-setup.mdx scripts/examples-docs.test.mjs +git commit -m "fix(cli): align setup database labels" +``` + +### Task 3: Final V1 Verification + +**Files:** +- Verify: `packages/cli/src/public-ingest.ts` +- Verify: `packages/cli/src/setup-ready-menu.ts` +- Verify: `packages/cli/src/setup.ts` +- Verify: `packages/cli/src/setup-context.ts` +- Verify: `packages/cli/src/setup-databases.ts` +- Verify: `README.md` +- Verify: `docs-site/content/docs/getting-started/quickstart.mdx` +- Verify: `docs-site/content/docs/cli-reference/ktx-setup.mdx` + +- [ ] **Step 1: Run focused CLI tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts src/setup-ready-menu.test.ts src/setup.test.ts src/setup-context.test.ts src/setup-databases.test.ts src/index.test.ts src/command-tree.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run docs regression tests** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 3: Run public unified-ingest stale-copy scans** + +Run: + +```bash +rg -n "Primary sources?:|Primary sources? configured|Primary source ready|knowledge sources" packages/cli/src README.md docs-site/content/docs scripts/examples-docs.test.mjs +``` + +Expected: no matches. + +Run: + +```bash +rg -n "ktx scan|ktx ingest run --connection-id|--adapter |ktx ingest watch|live-database|Historic SQL|historicSql" README.md docs-site/content/docs examples/README.md examples/local-warehouse/README.md +``` + +Expected: no matches. Matches in developer scripts, internal package names, tests, or artifact paths outside this public-docs command are non-blocking under the original spec. + +- [ ] **Step 4: Run package pre-commit on changed files** + +Run: + +```bash +uv run pre-commit run --files packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/setup-ready-menu.ts packages/cli/src/setup-ready-menu.test.ts packages/cli/src/setup.ts packages/cli/src/setup.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts README.md docs-site/content/docs/getting-started/quickstart.mdx docs-site/content/docs/cli-reference/ktx-setup.mdx scripts/examples-docs.test.mjs +``` + +Expected: PASS. If pre-commit is unavailable because the local `uv` version or hook environment is missing, record the exact failure and run the focused Vitest and Node tests from Steps 1 and 2. + +- [ ] **Step 5: Commit final verification if needed** + +If Step 4 made formatting changes, run: + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/setup-ready-menu.ts packages/cli/src/setup-ready-menu.test.ts packages/cli/src/setup.ts packages/cli/src/setup.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts README.md docs-site/content/docs/getting-started/quickstart.mdx docs-site/content/docs/cli-reference/ktx-setup.mdx scripts/examples-docs.test.mjs +git commit -m "test: verify unified ingest final ux labels" +``` + +If Step 4 made no changes, do not create an empty commit. + +## Self-Review + +- Spec coverage: This plan covers the remaining v1-blocking public gaps found in the audit: unsupported database query-history warning aggregation for `--all`, and old public `Primary sources` terminology in setup/status/docs where the spec's user-facing grouping is `Databases`. Core routing, depth, query-history execution, setup config, foreground-only state, hidden debug commands, public docs command shape, and output sanitization are already implemented by the prior plan chain. +- Placeholder scan: The plan contains exact files, exact tests, exact code snippets, exact commands, and expected outcomes. +- Type consistency: The new accumulator type is `KtxUnsupportedQueryHistoryWarning`; `resolveDatabaseTargetOptions()` receives `KtxPublicIngestWarningAccumulator`; warning strings used in tests match the implementation snippets exactly. diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-v1-foreground-and-retry-closure.md b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-foreground-and-retry-closure.md new file mode 100644 index 00000000..f665d6a1 --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-foreground-and-retry-closure.md @@ -0,0 +1,932 @@ +# Unified Ingest V1 Foreground and Retry Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1-blocking public UX gaps in the unified +`ktx ingest` redesign. + +**Architecture:** Keep the implemented connection-centric ingest planner and +shared foreground context-build view. Add a small public messaging layer for +notices, warnings, and retry guidance so TTY, non-TTY, and setup next-step +surfaces all match the original spec without changing internal adapter names. + +**Tech Stack:** TypeScript ESM, Commander, Vitest, KTX CLI/context packages, +Markdown plan documentation. + +--- + +## Current audit + +The implemented unified-ingest plans cover the main v1 behavior: + +- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, + `--query-history`, `--no-query-history`, and + `--query-history-window-days` route through the public ingest planner. +- Database targets run before source targets. Public source ingest bypasses + `ingest.adapters`. Fast and deep map to structural and enriched database + ingest, and deep readiness failures are isolated per target under `--all`. +- `ktx scan`, `ktx ingest run`, and `ktx ingest watch` are hidden from normal + help. Setup stores `connections..context.depth` and + `connections..context.queryHistory`. +- Setup context builds are foreground-only, legacy context-build states are + normalized to stale, and public docs no longer advertise `ktx scan` or + adapter-backed `ktx ingest run` as normal workflows. + +### V1-blocking gaps + +- Interactive foreground `ktx ingest` and setup context builds compute public + warnings but never render them. A TTY user can pass `--deep` for source + connections, `--query-history` for unsupported targets, or `--fast` with + stored query history and receive no warning in the foreground view. +- Explicit query-history runs do not state that database schema ingest runs + before query-history processing. The spec requires that message when a user + explicitly passes `--query-history`. +- Plain non-TTY failures report generic step failures such as + `warehouse failed at database-schema.` and a debug command, but they do not + include the retry guidance required by the error-handling section. +- Setup next-step output still describes the context-build action as + `Build or resume agent-ready context` through `ktx setup`, and it says the + build covers `primary-source scans and context-source ingests`. The public + model is `setup` configures, `ingest` builds or refreshes context, and status + explains readiness. +- The guided demo foreground replay still shows `scanning tables...` and + `tables scanned`, even though the normal foreground view must use + `reading schema` or `building schema context`. + +### Non-blocking gaps + +- Hidden debug commands can continue to call `ktx scan`, `ktx ingest run`, and + `ktx ingest watch`. +- Internal adapter keys, raw artifact paths, WorkUnit keys, package names, and + JSON or debug output can continue to use `scan`, `live-database`, and + `historic-sql`. +- Developer docs can continue to mention scan internals when they describe + connector implementation details. +- Existing `autoWatch`, `detached`, and `paused` type remnants in setup code + are not user-facing because setup context state is normalized before display. + +## File structure + +- Modify `packages/cli/src/public-ingest.ts`: add public plan notices, print + schema-before-query-history notices, and add retry guidance to plain + non-TTY failure details. +- Modify `packages/cli/src/public-ingest.test.ts`: cover explicit + query-history notices and retry guidance in plain output. +- Modify `packages/cli/src/context-build-view.ts`: render foreground notices + and warnings from `buildPublicIngestPlan`. +- Modify `packages/cli/src/context-build-view.test.ts`: cover warning and + notice rendering in the foreground view. +- Modify `packages/cli/src/next-steps.ts`: make the public build command + `ktx ingest --all` and remove resume/scan wording from setup next steps. +- Modify `packages/cli/src/next-steps.test.ts`: update public next-step + expectations. +- Modify `packages/cli/src/setup-demo-tour.ts`: replace demo replay scan copy + with schema-context copy. +- Modify `packages/cli/src/setup-demo-tour.test.ts`: lock the demo replay + wording against `scan` terms. + +## Tasks + +### Task 1: Render foreground notices and warnings + +**Files:** +- Modify: `packages/cli/src/context-build-view.ts` +- Test: `packages/cli/src/context-build-view.test.ts` + +- [ ] **Step 1: Write failing foreground-message tests** + +In `packages/cli/src/context-build-view.test.ts`, add these tests inside the +`renderContextBuildView` describe block, near the existing rendering tests: + +```ts + it('renders public warnings in the foreground view', () => { + const state = initViewState([ + { + connectionId: 'docs', + driver: 'notion', + operation: 'source-ingest', + adapter: 'notion', + debugCommand: 'ktx ingest docs --debug', + steps: ['source-ingest', 'memory-update'], + }, + ]); + + const rendered = renderContextBuildView(state, { + styled: false, + warnings: ['--deep affects database ingest only; ignoring it for docs.'], + }); + + expect(rendered).toContain('Warnings:'); + expect(rendered).toContain('--deep affects database ingest only; ignoring it for docs.'); + }); + + it('renders public notices in the foreground view before warnings', () => { + const state = initViewState([ + { + connectionId: 'warehouse', + driver: 'postgres', + operation: 'database-ingest', + debugCommand: 'ktx ingest warehouse --debug', + steps: ['database-schema', 'query-history'], + databaseDepth: 'deep', + detectRelationships: true, + queryHistory: { enabled: true, dialect: 'postgres' }, + }, + ]); + + const rendered = renderContextBuildView(state, { + styled: false, + notices: ['Schema ingest runs before query history for warehouse.'], + warnings: ['--query-history requires deep ingest; running warehouse with --deep.'], + }); + + expect(rendered.indexOf('Notices:')).toBeLessThan(rendered.indexOf('Warnings:')); + expect(rendered).toContain('Schema ingest runs before query history for warehouse.'); + expect(rendered).toContain('--query-history requires deep ingest; running warehouse with --deep.'); + }); +``` + +- [ ] **Step 2: Run the failing foreground-message tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts -t "renders public warnings|renders public notices" +``` + +Expected: FAIL because `renderContextBuildView` does not accept or render +`warnings` or `notices`. + +- [ ] **Step 3: Add render options for foreground messages** + +In `packages/cli/src/context-build-view.ts`, add this helper after +`renderTargetGroup`: + +```ts +function renderMessageGroup(label: string, messages: string[], styled: boolean): string[] { + if (messages.length === 0) return []; + const renderedMessages = messages.map((message) => ` - ${message}`); + return ['', ` ${label}:`, ...renderedMessages.map((line) => (styled ? dim(line) : line))]; +} +``` + +Then change the `renderContextBuildView` signature from: + +```ts +export function renderContextBuildView( + state: ContextBuildViewState, + options: { styled?: boolean; showHint?: boolean; hintText?: string; projectDir?: string } = {}, +): string { +``` + +to: + +```ts +export function renderContextBuildView( + state: ContextBuildViewState, + options: { + styled?: boolean; + showHint?: boolean; + hintText?: string; + projectDir?: string; + notices?: string[]; + warnings?: string[]; + } = {}, +): string { +``` + +In the `lines` array inside `renderContextBuildView`, insert the notice and +warning groups after the `Context sources` group: + +```ts + ...renderTargetGroup('Databases', state.primarySources, state.frame, styled, width), + ...renderTargetGroup('Context sources', state.contextSources, state.frame, styled, width), + ...renderMessageGroup('Notices', options.notices ?? [], styled), + ...renderMessageGroup('Warnings', options.warnings ?? [], styled), + '', +``` + +- [ ] **Step 4: Pass plan messages into foreground rendering** + +In `packages/cli/src/context-build-view.ts`, inside `runContextBuild`, change: + +```ts + const viewOpts = { styled: true, projectDir: args.projectDir }; +``` + +to: + +```ts + const viewOpts = { + styled: true, + projectDir: args.projectDir, + notices: plan.notices ?? [], + warnings: plan.warnings, + }; +``` + +This makes every call to `paint()` and the final non-TTY foreground fallback +render the same public messages. + +- [ ] **Step 5: Run the foreground-message tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts -t "renders public warnings|renders public notices" +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts +git commit -m "fix: render unified ingest foreground warnings" +``` + +### Task 2: State schema-before-query-history for explicit runs + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Modify: `packages/cli/src/context-build-view.ts` +- Test: `packages/cli/src/public-ingest.test.ts` +- Test: `packages/cli/src/context-build-view.test.ts` + +- [ ] **Step 1: Write failing explicit query-history notice tests** + +In `packages/cli/src/public-ingest.test.ts`, add this test inside +`describe('buildPublicIngestPlan', ...)` after the existing query-history +planning tests: + +```ts + it('adds a schema-first notice when query history is explicitly enabled', () => { + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }); + + expect( + buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + queryHistory: 'enabled', + }).notices, + ).toEqual(['Schema ingest runs before query history for warehouse.']); + }); +``` + +In `packages/cli/src/public-ingest.test.ts`, add this test inside +`describe('runKtxPublicIngest', ...)` after +`runs query history after schema ingest with current-run window override`: + +```ts + it('prints the schema-first notice for explicit query-history runs', async () => { + const io = makeIo(); + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn(async () => 0); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + queryHistory: 'enabled', + }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Schema ingest runs before query history for warehouse.'); + }); +``` + +In `packages/cli/src/context-build-view.test.ts`, add this test near the +existing `runContextBuild` tests: + +```ts + it('passes schema-first notices from the plan into foreground output', async () => { + const io = makeIo(); + const project = { + ...projectWithConnections({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }), + config: { + ...projectWithConnections({ warehouse: { driver: 'postgres' } }).config, + connections: { + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }, + llm: { + provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret + models: { default: 'gpt-test' }, + }, + scan: { + ...projectWithConnections({ warehouse: { driver: 'postgres' } }).config.scan, + enrichment: { + mode: 'llm', + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + }, + }, + }, + }, + }; + const executeTarget = vi.fn(async (target) => successResult(target.connectionId, target.driver, target.operation)); + + await expect( + runContextBuild( + project, + { + projectDir: '/tmp/project', + inputMode: 'disabled', + targetConnectionId: 'warehouse', + all: false, + queryHistory: 'enabled', + }, + io.io, + { executeTarget, now: () => 1000 }, + ), + ).resolves.toMatchObject({ exitCode: 0 }); + + expect(io.stdout()).toContain('Schema ingest runs before query history for warehouse.'); + }); +``` + +- [ ] **Step 2: Run the failing query-history notice tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts -t "schema-first notice|passes schema-first" +``` + +Expected: FAIL because plans do not include `notices`, and plain output does +not print schema-first text. + +- [ ] **Step 3: Add notices to the public ingest plan** + +In `packages/cli/src/public-ingest.ts`, update `KtxPublicIngestPlan`: + +```ts +export interface KtxPublicIngestPlan { + projectDir: string; + targets: KtxPublicIngestPlanTarget[]; + warnings: string[]; + notices?: string[]; +} +``` + +Add this helper after `finalizeWarnings`: + +```ts +function schemaFirstQueryHistoryNotice( + targets: KtxPublicIngestPlanTarget[], + args: { queryHistory?: KtxPublicIngestQueryHistoryFlag }, +): string | null { + if (args.queryHistory !== 'enabled') { + return null; + } + const queryHistoryTargets = targets.filter((target) => target.queryHistory?.enabled === true); + if (queryHistoryTargets.length === 0) { + return null; + } + if (queryHistoryTargets.length === 1) { + return `Schema ingest runs before query history for ${queryHistoryTargets[0].connectionId}.`; + } + return `Schema ingest runs before query history for ${queryHistoryTargets.length} database connections.`; +} +``` + +In `buildPublicIngestPlan`, replace the direct return with: + +```ts + const orderedTargets = [ + ...targets.filter((t) => t.operation === 'database-ingest'), + ...targets.filter((t) => t.operation === 'source-ingest'), + ]; + const notice = schemaFirstQueryHistoryNotice(orderedTargets, args); + return { + projectDir: args.projectDir, + targets: orderedTargets, + warnings: finalizeWarnings(warnings, args), + ...(notice ? { notices: [notice] } : {}), + }; +``` + +- [ ] **Step 4: Print notices in plain public ingest** + +In `packages/cli/src/public-ingest.ts`, inside `runKtxPublicIngest`, change: + +```ts + if (!args.json && plan.warnings.length > 0) { + for (const warning of plan.warnings) { + io.stderr.write(`Warning: ${warning}\n`); + } + } +``` + +to: + +```ts + if (!args.json) { + for (const notice of plan.notices ?? []) { + io.stdout.write(`${notice}\n`); + } + for (const warning of plan.warnings) { + io.stderr.write(`Warning: ${warning}\n`); + } + } +``` + +Task 1 already passes `plan.notices` into `runContextBuild`, so explicit +query-history foreground runs render the same notice in the view. + +- [ ] **Step 5: Run the query-history notice tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts -t "schema-first notice|passes schema-first" +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts +git commit -m "fix: explain query history schema order" +``` + +### Task 3: Add retry guidance to plain public failures + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Test: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Write failing plain retry tests** + +In `packages/cli/src/public-ingest.test.ts`, replace these assertions in +`runs all independent targets and reports partial failures`: + +```ts + expect(io.stdout()).toContain('warehouse failed at database-schema.'); + expect(io.stdout()).toContain('Debug: ktx ingest warehouse --debug'); +``` + +with: + +```ts + expect(io.stdout()).toContain('warehouse failed at database-schema.'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --fast'); + expect(io.stdout()).not.toContain('Debug: ktx ingest warehouse --debug'); +``` + +Then add this test after `runs all independent targets and reports partial +failures`: + +```ts + it('prints query-history retry guidance for query-history facet failures', async () => { + const io = makeIo(); + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn(async () => 1); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + queryHistory: 'enabled', + }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(1); + + expect(io.stdout()).toContain('warehouse failed at query-history.'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history'); + expect(io.stdout()).not.toContain('historic-sql'); + }); +``` + +- [ ] **Step 2: Run the failing retry tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "partial failures|query-history retry" +``` + +Expected: FAIL because plain failures still print `Debug:` and lack retry +commands. + +- [ ] **Step 3: Add retry command formatting to public ingest** + +In `packages/cli/src/public-ingest.ts`, add these helpers before +`markTargetResult`: + +```ts +function retryCommandForTarget( + target: KtxPublicIngestPlanTarget, + args: Extract, +): string { + const projectPart = ` --project-dir ${args.projectDir}`; + const depthPart = target.databaseDepth ? ` --${target.databaseDepth}` : ''; + const queryHistoryPart = target.queryHistory?.enabled === true ? ' --query-history' : ''; + const windowPart = + target.queryHistory?.enabled === true && target.queryHistory.windowDays !== undefined + ? ` --query-history-window-days ${target.queryHistory.windowDays}` + : ''; + return `ktx ingest ${target.connectionId}${projectPart}${depthPart}${queryHistoryPart}${windowPart}`; +} + +function trimTrailingPeriod(value: string): string { + return value.endsWith('.') ? value.slice(0, -1) : value; +} + +function failureDetailWithRetry(input: { + target: KtxPublicIngestPlanTarget; + args: Extract; + failedOperation: KtxPublicIngestStepName; + failureDetail?: string; +}): string { + const detail = input.failureDetail?.trim(); + const base = + detail && detail.startsWith(`${input.target.connectionId} `) + ? detail + : detail + ? `${input.target.connectionId} failed: ${detail}` + : `${input.target.connectionId} failed at ${input.failedOperation}.`; + return `${trimTrailingPeriod(base)}. Retry: ${retryCommandForTarget(input.target, input.args)}`; +} +``` + +- [ ] **Step 4: Thread run args into failure detail construction** + +Change the `markTargetResult` signature in `packages/cli/src/public-ingest.ts` +from: + +```ts +function markTargetResult( + target: KtxPublicIngestPlanTarget, + status: 'done' | 'failed', + failedOperation?: KtxPublicIngestStepName, + failureDetail?: string, +): KtxPublicIngestTargetResult { +``` + +to: + +```ts +function markTargetResult( + target: KtxPublicIngestPlanTarget, + args: Extract, + status: 'done' | 'failed', + failedOperation?: KtxPublicIngestStepName, + failureDetail?: string, +): KtxPublicIngestTargetResult { +``` + +Inside the failed-step branch, replace: + +```ts + detail: failureDetail ?? `${target.connectionId} failed at ${selectedFailedOperation}.`, +``` + +with: + +```ts + detail: failureDetailWithRetry({ + target, + args, + failedOperation: selectedFailedOperation, + failureDetail, + }), +``` + +Update every `markTargetResult` call in `executePublicIngestTarget`: + +```ts + return markTargetResult( + target, + args, + 'failed', + 'database-schema', + capturedScanIo ? firstCapturedFailureLine(capturedScanIo.capturedOutput()) : undefined, + ); +``` + +```ts + return markTargetResult(target, args, 'failed', 'query-history'); +``` + +```ts + return markTargetResult(target, args, 'done'); +``` + +```ts + return markTargetResult(target, args, exitCode === 0 ? 'done' : 'failed'); +``` + +- [ ] **Step 5: Stop printing debug commands in plain failure summaries** + +In `renderPlainResults`, remove this block: + +```ts + if (failedStep.debugCommand) { + io.stdout.write(` Debug: ${failedStep.debugCommand}\n`); + } +``` + +Debug commands remain available through JSON and debug surfaces, but normal +plain output now focuses on the connection and retry action. + +- [ ] **Step 6: Run the retry tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "partial failures|query-history retry" +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "fix: add public ingest retry guidance" +``` + +### Task 4: Replace setup next-step scan/resume wording + +**Files:** +- Modify: `packages/cli/src/next-steps.ts` +- Test: `packages/cli/src/next-steps.test.ts` + +- [ ] **Step 1: Write failing next-step copy tests** + +In `packages/cli/src/next-steps.test.ts`, replace the expected +`KTX_CONTEXT_BUILD_COMMANDS` value with: + +```ts + expect(KTX_CONTEXT_BUILD_COMMANDS).toEqual([ + { + command: 'ktx ingest --all', + description: 'Build or refresh agent-ready context from configured connections', + }, + { + command: 'ktx status', + description: 'Check setup and context readiness', + }, + ]); +``` + +In the test named `keeps setup next steps focused on building context when the +build is not ready`, replace: + +```ts + expect(rendered).toContain('primary-source scans and context-source ingests'); + expect(rendered).toContain('ktx setup'); +``` + +with: + +```ts + expect(rendered).toContain('Run ingest to build database schema context before context-source ingest.'); + expect(rendered).toContain('ktx ingest --all'); + expect(rendered).not.toContain('resume'); + expect(rendered).not.toContain('scan'); +``` + +- [ ] **Step 2: Run the failing next-step copy tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/next-steps.test.ts +``` + +Expected: FAIL because the current copy still recommends `ktx setup` for the +context-build action and uses resume/scan wording. + +- [ ] **Step 3: Update the next-step command constants** + +In `packages/cli/src/next-steps.ts`, change `KTX_CONTEXT_BUILD_COMMANDS` to: + +```ts +export const KTX_CONTEXT_BUILD_COMMANDS = [ + { + command: 'ktx ingest --all', + description: 'Build or refresh agent-ready context from configured connections', + }, + { + command: 'ktx status', + description: 'Check setup and context readiness', + }, +] as const; +``` + +In `formatSetupNextStepLines`, replace: + +```ts + `${indent}Preferred route: run the CLI build; it covers primary-source scans and context-source ingests.`, +``` + +with: + +```ts + `${indent}Run ingest to build database schema context before context-source ingest.`, +``` + +- [ ] **Step 4: Run the next-step copy tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/next-steps.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/next-steps.ts packages/cli/src/next-steps.test.ts +git commit -m "fix: align setup next steps with unified ingest" +``` + +### Task 5: Clean guided demo foreground scan wording + +**Files:** +- Modify: `packages/cli/src/setup-demo-tour.ts` +- Test: `packages/cli/src/setup-demo-tour.test.ts` + +- [ ] **Step 1: Write failing demo wording tests** + +In `packages/cli/src/setup-demo-tour.test.ts`, add this test inside +`describe('buildDemoReplayTimeline', ...)`: + +```ts + it('uses schema-context wording for database progress', () => { + const renderedTimeline = timeline + .map((event) => [event.detailLine, event.summaryText].filter(Boolean).join(' ')) + .join('\n'); + + expect(renderedTimeline).toContain('reading schema'); + expect(renderedTimeline).toContain('56 tables'); + expect(renderedTimeline).not.toContain('scanning'); + expect(renderedTimeline).not.toContain('scanned'); + }); +``` + +- [ ] **Step 2: Run the failing demo wording test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-demo-tour.test.ts -t "schema-context wording" +``` + +Expected: FAIL because the demo timeline still uses `scanning tables...` and +`tables scanned`. + +- [ ] **Step 3: Replace demo timeline database copy** + +In `packages/cli/src/setup-demo-tour.ts`, inside `buildDemoReplayTimeline`, +replace the first three events: + +```ts + // postgres-warehouse: scan + { delayMs: 0, connectionId: 'postgres-warehouse', status: 'running', detailLine: null, summaryText: null }, + { delayMs: 1200, connectionId: 'postgres-warehouse', status: 'running', detailLine: '[50%] scanning tables...', summaryText: null }, + { delayMs: 2400, connectionId: 'postgres-warehouse', status: 'done', detailLine: null, summaryText: '56 tables scanned' }, +``` + +with: + +```ts + // postgres-warehouse: database schema context + { delayMs: 0, connectionId: 'postgres-warehouse', status: 'running', detailLine: null, summaryText: null }, + { delayMs: 1200, connectionId: 'postgres-warehouse', status: 'running', detailLine: '[50%] reading schema...', summaryText: null }, + { delayMs: 2400, connectionId: 'postgres-warehouse', status: 'done', detailLine: null, summaryText: '56 tables' }, +``` + +- [ ] **Step 4: Run the demo wording test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-demo-tour.test.ts -t "schema-context wording" +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts +git commit -m "fix: remove scan wording from demo progress" +``` + +### Task 6: Final verification + +**Files:** +- Verify: `packages/cli/src/public-ingest.ts` +- Verify: `packages/cli/src/context-build-view.ts` +- Verify: `packages/cli/src/next-steps.ts` +- Verify: `packages/cli/src/setup-demo-tour.ts` +- Verify: relevant tests + +- [ ] **Step 1: Run focused Vitest coverage** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts src/next-steps.test.ts src/setup-demo-tour.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run CLI type-check** + +Run: + +```bash +pnpm --filter @ktx/cli run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run CLI tests** + +Run: + +```bash +pnpm --filter @ktx/cli run test +``` + +Expected: PASS. + +- [ ] **Step 4: Run dead-code check after TypeScript changes** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS. + +- [ ] **Step 5: Search for stale public wording in touched surfaces** + +Run: + +```bash +rg -n "Build or resume agent-ready|primary-source scans|scanning tables|tables scanned|Debug: ktx ingest" packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/next-steps.ts packages/cli/src/next-steps.test.ts packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts +``` + +Expected: no matches. + +- [ ] **Step 6: Commit verification fixes if any were needed** + +If verification required edits, run: + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/next-steps.ts packages/cli/src/next-steps.test.ts packages/cli/src/setup-demo-tour.ts packages/cli/src/setup-demo-tour.test.ts +git commit -m "test: verify unified ingest ux closure" +``` + +If no edits were needed, do not create an empty commit. + +## Self-review + +- Spec coverage: The plan covers the remaining v1-blocking warning, + schema-first query-history, retry-guidance, setup next-step, and foreground + demo wording gaps. Core command routing, depth policy, query-history config, + setup depth, docs-site command references, foreground-only state, and reserved + ids are already covered by earlier implemented plans. +- Placeholder scan: The plan contains exact file paths, concrete test code, + implementation snippets, commands, and expected results. No red-flag + placeholders are present. +- Type consistency: `notices` is added as an optional + `KtxPublicIngestPlan` property and threaded through `renderContextBuildView` + options. Retry helpers use existing `KtxPublicIngestPlanTarget`, + `KtxPublicIngestArgs`, and `KtxPublicIngestStepName` types. diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-v1-progress-copy-closure.md b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-progress-copy-closure.md new file mode 100644 index 00000000..edfb80cc --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-progress-copy-closure.md @@ -0,0 +1,559 @@ +# Unified Ingest V1 Progress Copy Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Remove the remaining v1-blocking scan wording from normal public +unified-ingest progress, failure, and setup scope-selection output. + +**Architecture:** Keep the implemented connection-centric ingest planner, +hidden legacy commands, and foreground context-build view. Add a small shared +public-copy helper for lower-level database ingest and query-history messages, +then use it from foreground progress and direct public failure summarization. + +**Tech Stack:** TypeScript ESM, Commander, Vitest, KTX CLI/context packages. + +--- + +## Current audit + +The implemented unified-ingest plan chain covers the original spec's main v1 +behavior: + +- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, + `--query-history`, `--no-query-history`, and + `--query-history-window-days` route through `public-ingest.ts`. +- Database targets run before source targets, inferred public adapters bypass + `ingest.adapters`, and `fast` or `deep` maps to structural or enriched + database ingest internals. +- Deep readiness is evaluated before target work starts, and `--all` isolates + per-target deep-readiness failures. +- Setup stores `connections..context.depth` and + `connections..context.queryHistory`, migrates legacy `historicSql`, and + uses foreground-only setup context state. +- Normal help hides `ktx scan`, `ktx ingest run`, and `ktx ingest watch`; docs + and command-tree output no longer present those as normal public workflows. + +### V1-blocking gaps + +- Foreground `ktx ingest` and setup context-build progress still pass database + ingest progress messages through from scan internals. A normal user can see + messages such as `Preparing scan`, even though the spec says the foreground + view must use `reading schema` or `building schema context` and must not show + `scan` in normal mode. +- Direct public database ingest failure summaries sanitize `live-database` and + `historic-sql`, but not scan-specific failure lines such as + `KTX scan enrichment failed after structural scan completed: ...`. +- Interactive database setup still asks for `PostgreSQL schemas to scan`, which + keeps scan wording in normal setup output after the public model changed to + database schema context. + +### Non-blocking gaps + +- Hidden debug commands can remain callable: `ktx scan`, `ktx ingest run`, and + `ktx ingest watch`. +- Internal adapter keys, raw artifact paths, WorkUnit keys, package names, + tests, and developer-only scripts can continue to use `scan`, + `live-database`, and `historic-sql`. +- README package taxonomy such as `Postgres scan connector` can remain because + it describes internal package ownership, not normal command usage. +- Internal readiness configuration names such as `scan.enrichment.mode` can + remain because they refer to existing `ktx.yaml` configuration fields. + +## File structure + +- Create `packages/cli/src/public-ingest-copy.ts`: shared copy sanitizer for + database ingest and query-history messages used by public output paths. +- Create `packages/cli/src/public-ingest-copy.test.ts`: unit coverage for the + sanitizer. +- Modify `packages/cli/src/context-build-view.ts`: sanitize foreground + database progress messages and reuse the shared query-history sanitizer. +- Modify `packages/cli/src/context-build-view.test.ts`: cover foreground + progress output with lower-level scan messages. +- Modify `packages/cli/src/public-ingest.ts`: use the shared public output-line + sanitizer for captured failure details. +- Modify `packages/cli/src/public-ingest.test.ts`: cover direct public failure + output for scan-enrichment failures. +- Modify `packages/cli/src/setup-databases.ts`: change the schema scope prompt + from `schemas to scan` to `schemas to include`. +- Modify `packages/cli/src/setup-databases.test.ts`: update the schema prompt + expectation and assert scan wording is absent. + +## Tasks + +### Task 1: Add shared public ingest copy sanitizers + +**Files:** +- Create: `packages/cli/src/public-ingest-copy.ts` +- Create: `packages/cli/src/public-ingest-copy.test.ts` + +- [ ] **Step 1: Write the public-copy tests** + +Create `packages/cli/src/public-ingest-copy.test.ts`: + +```ts +import { describe, expect, it } from 'vitest'; +import { + publicDatabaseIngestMessage, + publicIngestOutputLine, + publicQueryHistoryMessage, +} from './public-ingest-copy.js'; + +describe('public ingest copy sanitizers', () => { + it('maps database scan progress into schema-context wording', () => { + expect(publicDatabaseIngestMessage('Preparing scan')).toBe('Preparing database ingest'); + expect(publicDatabaseIngestMessage('Inspecting database schema')).toBe('Reading database schema'); + expect(publicDatabaseIngestMessage('Writing schema artifacts')).toBe('Writing schema context'); + expect(publicDatabaseIngestMessage('Enriching schema metadata')).toBe('Building enriched schema context'); + }); + + it('maps database scan failure text into public database ingest wording', () => { + expect( + publicDatabaseIngestMessage( + 'KTX scan enrichment failed after structural scan completed: embedding service timed out', + ), + ).toBe('Database enrichment failed after schema context completed: embedding service timed out'); + expect(publicDatabaseIngestMessage('structural scan wrote partial artifacts')).toBe( + 'schema context wrote partial artifacts', + ); + expect(publicDatabaseIngestMessage('scan results may be less complete')).toBe( + 'database context may be less complete', + ); + }); + + it('maps query-history adapter progress into public wording', () => { + expect(publicQueryHistoryMessage('Fetching source files for warehouse/historic-sql', 'warehouse')).toBe( + 'Fetching query history for warehouse', + ); + expect(publicQueryHistoryMessage('Curating warehouse/historic-sql work units', 'warehouse')).toBe( + 'Curating warehouse query history work units', + ); + expect(publicQueryHistoryMessage('historic SQL local ingest failed', 'warehouse')).toBe( + 'query history local ingest failed', + ); + }); + + it('sanitizes captured public output lines across database and query-history internals', () => { + expect( + publicIngestOutputLine( + 'KTX scan enrichment failed after structural scan completed in raw-sources/warehouse/live-database/sync-1', + ), + ).toBe('Database enrichment failed after schema context completed in raw-sources/warehouse/database schema/sync-1'); + expect(publicIngestOutputLine('Historic SQL local ingest requires a configured reader')).toBe( + 'query history local ingest requires a configured reader', + ); + }); +}); +``` + +- [ ] **Step 2: Run the failing public-copy tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest-copy.test.ts +``` + +Expected: FAIL because `packages/cli/src/public-ingest-copy.ts` does not exist. + +- [ ] **Step 3: Implement the shared sanitizers** + +Create `packages/cli/src/public-ingest-copy.ts`: + +```ts +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +const DATABASE_INGEST_REPLACEMENTS: Array<[RegExp, string]> = [ + [/\bPreparing scan\b/gi, 'Preparing database ingest'], + [/\bInspecting database schema\b/gi, 'Reading database schema'], + [/\bWriting schema artifacts\b/gi, 'Writing schema context'], + [/\bEnriching schema metadata\b/gi, 'Building enriched schema context'], + [ + /\bKTX scan enrichment failed after structural scan completed\b/gi, + 'Database enrichment failed after schema context completed', + ], + [/\bstructural scan\b/gi, 'schema context'], + [/\benriched scan\b/gi, 'deep database ingest'], + [/\bscan results\b/gi, 'database context'], +]; + +export function publicDatabaseIngestMessage(message: string): string { + return DATABASE_INGEST_REPLACEMENTS.reduce( + (current, [pattern, replacement]) => current.replace(pattern, replacement), + message, + ); +} + +export function publicQueryHistoryMessage(message: string, connectionId?: string): string { + let current = message; + if (connectionId && connectionId.length > 0) { + const escapedConnectionId = escapeRegExp(connectionId); + current = current + .replace( + new RegExp(`Fetching source files for ${escapedConnectionId}/historic-sql`, 'i'), + `Fetching query history for ${connectionId}`, + ) + .replace(`${connectionId}/historic-sql`, `${connectionId} query history`); + } + return current.replace(/\bhistoric-sql\b/g, 'query history').replace(/\bhistoric SQL\b/gi, 'query history'); +} + +export function publicIngestOutputLine(line: string): string { + return publicQueryHistoryMessage(publicDatabaseIngestMessage(line)).replace(/\blive-database\b/g, 'database schema'); +} +``` + +- [ ] **Step 4: Run the public-copy tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest-copy.test.ts +``` + +Expected: PASS. + +- [ ] **Step 5: Commit the shared sanitizer** + +Run: + +```bash +git add packages/cli/src/public-ingest-copy.ts packages/cli/src/public-ingest-copy.test.ts +git commit -m "fix(cli): add public ingest copy sanitizers" +``` + +### Task 2: Sanitize foreground progress and captured public failures + +**Files:** +- Modify: `packages/cli/src/context-build-view.ts` +- Modify: `packages/cli/src/context-build-view.test.ts` +- Modify: `packages/cli/src/public-ingest.ts` +- Modify: `packages/cli/src/public-ingest.test.ts` +- Test: `packages/cli/src/public-ingest-copy.test.ts` + +- [ ] **Step 1: Write the failing foreground progress test** + +In `packages/cli/src/context-build-view.test.ts`, add this test inside the +`runContextBuild` describe block near the existing query-history progress test: + +```ts + it('renders database ingest progress without scan wording', async () => { + const io = makeIo(); + const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const executeTarget = vi.fn(async (target, _args, _targetIo, deps) => { + await deps.scanProgress?.update(0.05, 'Preparing scan'); + await deps.scanProgress?.update(0.15, 'Inspecting database schema'); + await deps.scanProgress?.update(0.7, 'Writing schema artifacts'); + return successResult(target.connectionId, target.driver, target.operation); + }); + + await expect( + runContextBuild( + project, + { + projectDir: '/tmp/project', + inputMode: 'disabled', + targetConnectionId: 'warehouse', + all: false, + }, + io.io, + { executeTarget, now: () => 1000, sourceProgressThrottleMs: 0 }, + ), + ).resolves.toMatchObject({ exitCode: 0 }); + + expect(io.stdout()).toContain('Preparing database ingest'); + expect(io.stdout()).toContain('Reading database schema'); + expect(io.stdout()).toContain('Writing schema context'); + expect(io.stdout()).not.toContain('Preparing scan'); + expect(io.stdout()).not.toMatch(/\bscan\b/i); + }); +``` + +- [ ] **Step 2: Write the failing direct public failure test** + +In `packages/cli/src/public-ingest.test.ts`, add this test inside the +`runKtxPublicIngest` describe block near +`suppresses internal scan output for public database ingest summaries`: + +```ts + it('sanitizes captured database scan failure details in direct public output', async () => { + const io = makeIo(); + const project = deepReadyProject({ warehouse: { driver: 'postgres', context: { depth: 'deep' } } }); + const runScan = vi.fn(async (_args, scanIo) => { + scanIo.stdout.write('KTX scan enrichment failed after structural scan completed: embedding service timed out\n'); + return 1; + }); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + depth: 'deep', + }, + io.io, + { loadProject: vi.fn(async () => project), runScan }, + ), + ).resolves.toBe(1); + + expect(io.stdout()).toContain( + 'warehouse failed: Database enrichment failed after schema context completed: embedding service timed out.', + ); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep'); + expect(io.stdout()).not.toContain('KTX scan enrichment failed'); + expect(io.stdout()).not.toContain('structural scan'); + }); +``` + +- [ ] **Step 3: Run the failing integration tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts src/public-ingest.test.ts -t "database ingest progress|captured database scan failure" --testTimeout 30000 +``` + +Expected: FAIL because foreground progress still prints `Preparing scan`, and +captured direct failures still print the lower-level scan failure text. + +- [ ] **Step 4: Use the shared sanitizer in foreground progress** + +In `packages/cli/src/context-build-view.ts`, add this import: + +```ts +import { publicDatabaseIngestMessage, publicQueryHistoryMessage } from './public-ingest-copy.js'; +``` + +Replace the existing `publicProgressMessage()` implementation: + +```ts +function publicProgressMessage(message: string, target: KtxPublicIngestPlanTarget): string { + if (!target.steps.includes('query-history')) { + return message; + } + return message + .replace( + new RegExp(`Fetching source files for ${target.connectionId}/historic-sql`, 'i'), + `Fetching query history for ${target.connectionId}`, + ) + .replace(`${target.connectionId}/historic-sql`, `${target.connectionId} query history`) + .replace(/\bhistoric-sql\b/g, 'query history') + .replace(/\bhistoric SQL\b/gi, 'query history'); +} +``` + +with: + +```ts +function publicProgressMessage(message: string, target: KtxPublicIngestPlanTarget): string { + if (target.operation === 'database-ingest') { + return publicDatabaseIngestMessage(message); + } + if (target.steps.includes('query-history')) { + return publicQueryHistoryMessage(message, target.connectionId); + } + return message; +} +``` + +- [ ] **Step 5: Use the shared sanitizer in public ingest failure capture** + +In `packages/cli/src/public-ingest.ts`, add this import: + +```ts +import { publicIngestOutputLine } from './public-ingest-copy.js'; +``` + +Delete the local `publicIngestOutputLine()` function: + +```ts +function publicIngestOutputLine(line: string): string { + return line + .replace(/\blive-database\b/g, 'database schema') + .replace(/\bhistoric-sql\b/g, 'query history') + .replace(/\bhistoric SQL\b/gi, 'query history'); +} +``` + +Leave `firstCapturedFailureLine()` calling `publicIngestOutputLine` unchanged; +the imported function now provides the broader public wording. + +- [ ] **Step 6: Run the integration tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest-copy.test.ts src/context-build-view.test.ts src/public-ingest.test.ts --testTimeout 30000 +``` + +Expected: PASS. + +- [ ] **Step 7: Commit foreground and failure sanitization** + +Run: + +```bash +git add packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/public-ingest-copy.ts packages/cli/src/public-ingest-copy.test.ts +git commit -m "fix(cli): sanitize public ingest progress copy" +``` + +### Task 3: Rename setup schema scope prompt + +**Files:** +- Modify: `packages/cli/src/setup-databases.ts` +- Modify: `packages/cli/src/setup-databases.test.ts` + +- [ ] **Step 1: Update the setup prompt expectation** + +In `packages/cli/src/setup-databases.test.ts`, in the test named +`prompts for discovered Postgres schemas before the first scan`, replace: + +```ts + message: expect.stringContaining('PostgreSQL schemas to scan'), +``` + +with: + +```ts + message: expect.stringContaining('PostgreSQL schemas to include'), +``` + +Add this assertion after the `toHaveBeenCalledWith` block: + +```ts + expect(String(prompts.multiselect.mock.calls[0]?.[0].message)).not.toContain('to scan'); +``` + +- [ ] **Step 2: Run the failing setup prompt test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts -t "prompts for discovered Postgres schemas before the first scan" --testTimeout 30000 +``` + +Expected: FAIL because the prompt still says `PostgreSQL schemas to scan`. + +- [ ] **Step 3: Rename the setup scope prompt** + +In `packages/cli/src/setup-databases.ts`, replace: + +```ts + `${spec.promptLabel} to scan\n` + + `KTX found multiple ${spec.nounPlural}. Select every ${spec.noun} agents should use.`, +``` + +with: + +```ts + `${spec.promptLabel} to include\n` + + `KTX found multiple ${spec.nounPlural}. Select every ${spec.noun} agents should use.`, +``` + +- [ ] **Step 4: Run the setup prompt test again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts -t "prompts for discovered Postgres schemas before the first scan" --testTimeout 30000 +``` + +Expected: PASS. + +- [ ] **Step 5: Commit setup prompt wording** + +Run: + +```bash +git add packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts +git commit -m "fix(cli): rename setup schema scope prompt" +``` + +### Task 4: Final verification + +**Files:** +- Verify: `packages/cli/src/public-ingest-copy.ts` +- Verify: `packages/cli/src/context-build-view.ts` +- Verify: `packages/cli/src/public-ingest.ts` +- Verify: `packages/cli/src/setup-databases.ts` + +- [ ] **Step 1: Run targeted unified-ingest tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest-copy.test.ts src/context-build-view.test.ts src/public-ingest.test.ts src/setup-databases.test.ts --testTimeout 30000 +``` + +Expected: PASS. + +- [ ] **Step 2: Run CLI type-check** + +Run: + +```bash +pnpm --filter @ktx/cli run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Scan normal public files for the closed wording gaps** + +Run: + +```bash +rg -n "Preparing scan|KTX scan enrichment failed|structural scan completed|schemas to scan" packages/cli/src/context-build-view.ts packages/cli/src/public-ingest.ts packages/cli/src/setup-databases.ts packages/cli/src/*.test.ts +``` + +Expected: no matches except historical expectations in low-level `scan.test.ts` +or internal scan-specific tests that are not part of the command above. + +- [ ] **Step 4: Run workspace dead-code check** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS. + +- [ ] **Step 5: Commit final verification marker if needed** + +If the verification steps required only the commits above, no additional +commit is needed. If a verification fix changed files, run: + +```bash +git add packages/cli/src/public-ingest-copy.ts packages/cli/src/public-ingest-copy.test.ts packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts +git commit -m "test(cli): verify unified ingest public progress copy" +``` + +## Self-review + +Spec coverage: this plan covers the remaining normal public output paths where +scan wording still leaks into unified ingest: + +- Foreground progress now maps database scan progress into schema-context copy. +- Captured direct public failure summaries now map scan-enrichment failures into + database ingest copy. +- Interactive setup schema scope selection now says `schemas to include`, not + `schemas to scan`. + +The plan intentionally leaves hidden debug commands, internal artifact paths, +developer scripts, low-level scan tests, and configuration field names alone. +Those are non-blocking under the original spec's implementation-detail +allowances. + +Placeholder scan: no task uses deferred code markers, unnamed edge handling, or +undefined helper names. Every changed helper, test, and command is named with +the file that owns it. + +Type consistency: the new helper exports +`publicDatabaseIngestMessage()`, `publicQueryHistoryMessage()`, and +`publicIngestOutputLine()`. Later tasks import those exact names from +`./public-ingest-copy.js`. diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-v1-public-output-closure.md b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-public-output-closure.md new file mode 100644 index 00000000..47b88817 --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-public-output-closure.md @@ -0,0 +1,1224 @@ +# Unified Ingest V1 Public Output Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1-blocking gaps where public `ktx ingest` and +setup still expose internal scan, adapter, or Historic SQL behavior. + +**Architecture:** Keep the current connection-centric `ktx ingest` planner and +depth policy. Tighten the public execution layer so inferred source adapters +bypass `ingest.adapters`, database ingest captures internal scan output, TTY +public ingest uses the shared foreground context-build view, and setup output +uses schema-context and query-history language. + +**Tech Stack:** TypeScript ESM, Commander, Vitest, KTX CLI/context packages. + +--- + +## Audit + +The implemented unified-ingest plans cover the core command routing, depth +policy, setup depth defaults, foreground-only setup context build, canonical +`context.queryHistory` setup storage, reserved ingest connection ids, and +default config cleanup. + +### V1-blocking gaps + +- `ktx ingest ` still calls `runKtxIngest` without + `allowImplicitAdapter: true`, so public source ingest still requires + `ingest.adapters` entries. +- Direct public database ingest still streams internal `runKtxScan` output, + including `KTX scan completed`, `Mode: structural`, artifact paths, and + `live-database` path segments. +- Direct interactive public ingest does not use the shared foreground + context-build view; only setup uses that view. +- `--query-history-window-days` does not itself request query-history ingest, + so `ktx ingest warehouse --query-history-window-days 30` silently runs only + schema ingest when stored query history is disabled. +- `ktx ingest --all --deep` emits one ignored-depth warning per non-database + source instead of aggregating the warning. +- Setup database output still says `Scanning`, `structural scan`, prints + `live-database` report paths, and suggests `ktx scan` as a retry/debug + command. +- Setup help and prompts still expose `Historic SQL` flags and wording instead + of query-history wording. + +### Non-blocking gaps + +- Hidden debug surfaces can still call internal commands: `ktx scan`, + `ktx ingest run`, and `ktx ingest watch`. +- Internal package names, adapter keys, raw artifact paths, scan tests, and + scripts can continue to use `scan`, `live-database`, and `historic-sql`. +- README package descriptions such as `Postgres scan connector` are internal + package taxonomy, not normal CLI command guidance. +- `README.md` says rerunning setup resumes the wizard; that is setup-flow + language, not a context-build background resume path. + +## File structure + +- Modify `packages/cli/src/public-ingest.ts`: set implicit adapters for public + source ingest, treat query-history window overrides as query-history + requests, aggregate `--all` source warnings, capture database scan output for + plain public ingest, and delegate interactive TTY runs to the shared context + build view. +- Modify `packages/cli/src/public-ingest.test.ts`: cover adapter bypass, + quiet database ingest output, query-history window semantics, aggregated + warnings, and TTY foreground delegation. +- Modify `packages/cli/src/context-build-view.ts`: allow the foreground view + to run a single requested connection and pass through public ingest flags. +- Modify `packages/cli/src/context-build-view.test.ts`: cover single-target + foreground execution and flag passthrough. +- Modify `packages/cli/src/setup-databases.ts`: rename public setup wording to + schema context and query history, stop printing internal report paths in + normal setup output, and replace `ktx scan` retry/debug suggestions with + `ktx ingest --fast`. +- Modify `packages/cli/src/setup-databases.test.ts`: update setup output, + failure, and query-history expectations. +- Modify `packages/cli/src/commands/setup-commands.ts`: replace public + Historic SQL setup flags with query-history setup flags. +- Modify `packages/cli/src/index.test.ts`: update setup help and conflicting + query-history flag tests. + +## Tasks + +### Task 1: Bypass adapter allow-lists for public source ingest + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Test: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Write the failing adapter-bypass test** + +Add this test inside the `runKtxPublicIngest` describe block in +`packages/cli/src/public-ingest.test.ts`: + +```ts + it('bypasses adapter allow-lists for connection-centric source ingest', async () => { + const runIngest = vi.fn(async () => 0); + const io = makeIo(); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/ktx', + targetConnectionId: 'docs', + all: false, + json: false, + inputMode: 'disabled', + }, + io.io, + { + loadProject: async () => + projectWithConnections({ + docs: { driver: 'notion' }, + }), + runIngest, + }, + ), + ).resolves.toBe(0); + + expect(runIngest).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + connectionId: 'docs', + adapter: 'notion', + allowImplicitAdapter: true, + }), + io.io, + ); + }); +``` + +- [ ] **Step 2: Run the failing adapter-bypass test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "adapter allow-lists" +``` + +Expected: FAIL because public source ingest does not pass +`allowImplicitAdapter: true`. + +- [ ] **Step 3: Add `allowImplicitAdapter` for inferred source adapters** + +In `packages/cli/src/public-ingest.ts`, update the source-ingest +`KtxIngestArgs` object in `executePublicIngestTarget`: + +```ts + const ingestArgs: KtxIngestArgs = { + command: 'run', + projectDir: args.projectDir, + connectionId: target.connectionId, + adapter: target.adapter ?? target.driver, + ...(target.sourceDir ? { sourceDir: target.sourceDir } : {}), + outputMode: sourceIngestOutputMode(args, io), + inputMode: args.inputMode, + allowImplicitAdapter: true, + }; +``` + +- [ ] **Step 4: Run the adapter-bypass test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "adapter allow-lists" +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "fix(ingest): bypass adapter allow-list for public source ingest" +``` + +### Task 2: Fix query-history window semantics and aggregate source warnings + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Test: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Write failing query-history and warning tests** + +Add these tests inside the `buildPublicIngestPlan` describe block in +`packages/cli/src/public-ingest.test.ts`: + +```ts + it('treats query-history window override as current-run query-history enablement', () => { + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: false, windowDays: 90 } } }, + }); + + const plan = buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + queryHistory: 'default', + queryHistoryWindowDays: 30, + }); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'warehouse', + databaseDepth: 'deep', + queryHistory: { enabled: true, dialect: 'postgres', windowDays: 30 }, + steps: ['database-schema', 'query-history'], + }); + }); + + it('warns and skips query-history window override for unsupported database drivers', () => { + const plan = buildPublicIngestPlan( + projectWithConnections({ + local: { driver: 'sqlite' }, + }), + { + projectDir: '/tmp/project', + targetConnectionId: 'local', + all: false, + queryHistory: 'default', + queryHistoryWindowDays: 30, + }, + ); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'local', + databaseDepth: 'fast', + queryHistory: { enabled: false, windowDays: 30, unsupported: true }, + steps: ['database-schema'], + }); + expect(plan.warnings).toEqual(['--query-history is not supported for sqlite; running schema ingest for local.']); + }); + + it('aggregates ignored database-depth warnings for all source targets', () => { + const plan = buildPublicIngestPlan( + projectWithConnections({ + warehouse: { driver: 'postgres' }, + docs: { driver: 'notion' }, + dbt: { driver: 'dbt' }, + }), + { + projectDir: '/tmp/project', + all: true, + depth: 'deep', + queryHistory: 'default', + }, + ); + + expect(plan.warnings).toEqual(['--deep ignored for 2 non-database sources.']); + }); +``` + +- [ ] **Step 2: Run the failing public ingest planning tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "query-history window override|unsupported database drivers|aggregates ignored" +``` + +Expected: FAIL because window-days alone does not request query history and +source warnings are emitted per source. + +- [ ] **Step 3: Add a warning accumulator** + +In `packages/cli/src/public-ingest.ts`, add these types and helpers near +`queryHistoryDialectByDriver`: + +```ts +interface KtxPublicIngestWarningAccumulator { + warnings: string[]; + ignoredDepthForSources: string[]; + ignoredQueryHistoryForSources: string[]; +} + +function createWarningAccumulator(): KtxPublicIngestWarningAccumulator { + return { + warnings: [], + ignoredDepthForSources: [], + ignoredQueryHistoryForSources: [], + }; +} + +function sourceIgnoredWarning(option: string, connectionIds: string[], all: boolean): string | null { + if (connectionIds.length === 0) { + return null; + } + if (all) { + const sourceLabel = connectionIds.length === 1 ? '1 non-database source' : `${connectionIds.length} non-database sources`; + return `${option} ignored for ${sourceLabel}.`; + } + return `${option} affects database ingest only; ignoring it for ${connectionIds[0]}.`; +} + +function finalizeWarnings( + accumulator: KtxPublicIngestWarningAccumulator, + args: { + all: boolean; + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + }, +): string[] { + const warnings = [...accumulator.warnings]; + const depthOption = args.depth ? `--${args.depth}` : null; + if (depthOption) { + const warning = sourceIgnoredWarning(depthOption, accumulator.ignoredDepthForSources, args.all); + if (warning) warnings.push(warning); + } + if (args.queryHistory === 'enabled' || args.queryHistoryWindowDays !== undefined) { + const warning = sourceIgnoredWarning('--query-history', accumulator.ignoredQueryHistoryForSources, args.all); + if (warning) warnings.push(warning); + } + return warnings; +} +``` + +- [ ] **Step 4: Use window-days as query-history intent** + +In `resolveDatabaseTargetOptions`, replace the current `requestedQh` line with: + +```ts + const windowOverrideRequested = input.args.queryHistoryWindowDays !== undefined; + const requestedQh = + explicitQueryHistory === 'enabled' || + (explicitQueryHistory !== 'disabled' && (windowOverrideRequested || storedEnabled)); +``` + +Leave the existing `--query-history requires deep ingest` warning in place so +`--fast --query-history-window-days 30` upgrades the run to deep with the same +warning as `--fast --query-history`. + +- [ ] **Step 5: Route source warnings through the accumulator** + +Change the `warnings` parameter in `targetForConnection` from `string[]` to +`KtxPublicIngestWarningAccumulator`. In the source-adapter branch, replace the +current warning pushes with: + +```ts + if (args.depth) { + warnings.ignoredDepthForSources.push(connectionId); + } + if (args.queryHistory === 'enabled' || args.queryHistoryWindowDays !== undefined) { + warnings.ignoredQueryHistoryForSources.push(connectionId); + } +``` + +In the database branch, pass `warnings.warnings` into +`resolveDatabaseTargetOptions`: + +```ts + const options = resolveDatabaseTargetOptions({ + connectionId, + driver, + connection, + args, + warnings: warnings.warnings, + }); +``` + +In `buildPublicIngestPlan`, replace the `warnings` array construction with: + +```ts + const warnings = createWarningAccumulator(); + const targets = selected.map(([connectionId, connection]) => + targetForConnection(connectionId, connection, project.config, args, warnings), + ); + return { + projectDir: args.projectDir, + targets: [ + ...targets.filter((t) => t.operation === 'database-ingest'), + ...targets.filter((t) => t.operation === 'source-ingest'), + ], + warnings: finalizeWarnings(warnings, args), + }; +``` + +- [ ] **Step 6: Run the public ingest planning tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "query-history window override|unsupported database drivers|aggregates ignored" +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "fix(ingest): honor query history window intent" +``` + +### Task 3: Suppress internal scan output in public database ingest + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Test: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Write the failing quiet-output test** + +Add this test inside the `runKtxPublicIngest` describe block in +`packages/cli/src/public-ingest.test.ts`: + +```ts + it('suppresses internal scan output for public database ingest summaries', async () => { + const io = makeIo(); + const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const runScan = vi.fn(async (_args, scanIo) => { + scanIo.stdout.write('KTX scan completed\n'); + scanIo.stdout.write('Mode: structural\n'); + scanIo.stdout.write('Report: raw-sources/warehouse/live-database/sync-1/scan-report.json\n'); + scanIo.stdout.write('Raw sources: raw-sources/warehouse/live-database/sync-1\n'); + return 0; + }); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + }, + io.io, + { loadProject: vi.fn(async () => project), runScan }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Ingest finished\n'); + expect(io.stdout()).toContain('warehouse'); + expect(io.stdout()).not.toContain('KTX scan completed'); + expect(io.stdout()).not.toContain('Mode: structural'); + expect(io.stdout()).not.toContain('Report: raw-sources'); + expect(io.stdout()).not.toContain('live-database'); + }); +``` + +- [ ] **Step 2: Run the failing quiet-output test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "suppresses internal scan output" +``` + +Expected: FAIL because `executePublicIngestTarget` passes the public IO +directly to `runScan`. + +- [ ] **Step 3: Add captured public scan IO** + +In `packages/cli/src/public-ingest.ts`, add these helpers near +`sourceIngestOutputMode`: + +```ts +interface CapturedPublicIngestIo extends KtxCliIo { + capturedOutput(): string; +} + +function createCapturedPublicIngestIo(): CapturedPublicIngestIo { + let output = ''; + return { + stdout: { + isTTY: false, + write(chunk: string) { + output += chunk; + }, + }, + stderr: { + write(chunk: string) { + output += chunk; + }, + }, + capturedOutput() { + return output; + }, + }; +} + +function firstCapturedFailureLine(output: string): string | undefined { + return output + .split(/\r?\n/) + .map((line) => line.trim()) + .find((line) => line.length > 0 && !line.startsWith('KTX scan completed')); +} +``` + +Change `markTargetResult` to accept a detail override: + +```ts +function markTargetResult( + target: KtxPublicIngestPlanTarget, + status: 'done' | 'failed', + failedOperation?: KtxPublicIngestStepName, + failureDetail?: string, +): KtxPublicIngestTargetResult { + const selectedFailedOperation = + failedOperation ?? (target.operation === 'database-ingest' ? 'database-schema' : 'source-ingest'); + return { + connectionId: target.connectionId, + driver: target.driver, + steps: defaultSteps(target).map((step) => { + if (!target.steps.includes(step.operation)) { + return step; + } + if (status === 'done') { + return { ...step, status: 'done' }; + } + if (step.operation === selectedFailedOperation) { + return { + ...step, + status: 'failed', + detail: failureDetail ?? `${target.connectionId} failed at ${selectedFailedOperation}.`, + }; + } + return { ...step, status: 'not-run' }; + }), + }; +} +``` + +In the database-ingest branch of `executePublicIngestTarget`, replace the direct +`runScan` call block with: + +```ts + const runScan = deps.runScan ?? runKtxScan; + const capturedScanIo = deps.scanProgress ? null : createCapturedPublicIngestIo(); + const scanIo = capturedScanIo ?? io; + const scanExitCode = deps.scanProgress + ? await runScan(scanArgs, scanIo, { progress: deps.scanProgress }) + : await runScan(scanArgs, scanIo); + if (scanExitCode !== 0) { + return markTargetResult( + target, + 'failed', + 'database-schema', + capturedScanIo ? firstCapturedFailureLine(capturedScanIo.capturedOutput()) : undefined, + ); + } +``` + +- [ ] **Step 4: Run the quiet-output test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts -t "suppresses internal scan output" +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "fix(ingest): hide scan internals from public database ingest" +``` + +### Task 4: Use the shared foreground view for interactive public ingest + +**Files:** +- Modify: `packages/cli/src/context-build-view.ts` +- Modify: `packages/cli/src/public-ingest.ts` +- Test: `packages/cli/src/context-build-view.test.ts` +- Test: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Write failing foreground-view tests** + +In `packages/cli/src/context-build-view.test.ts`, add this test inside the +`runContextBuild` describe block: + +```ts + it('runs only the requested connection when foreground build receives a target', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + docs: { driver: 'notion' }, + }); + const executeTarget = vi.fn(async (target) => + successResult(target.connectionId, target.driver, target.operation), + ); + + await expect( + runContextBuild( + project, + { + projectDir: '/tmp/project', + inputMode: 'disabled', + targetConnectionId: 'warehouse', + all: false, + depth: 'fast', + queryHistory: 'default', + }, + io.io, + { executeTarget, now: () => 1000 }, + ), + ).resolves.toMatchObject({ exitCode: 0 }); + + expect(executeTarget).toHaveBeenCalledTimes(1); + expect(executeTarget.mock.calls[0]?.[0]).toMatchObject({ + connectionId: 'warehouse', + operation: 'database-ingest', + databaseDepth: 'fast', + }); + expect(io.stdout()).toContain('Databases:'); + expect(io.stdout()).toContain('warehouse'); + expect(io.stdout()).not.toContain('docs'); + }); +``` + +In `packages/cli/src/public-ingest.test.ts`, update `makeIo` to accept +interactive stdin: + +```ts +function makeIo(options: { isTTY?: boolean; interactive?: boolean } = {}) { + let stdout = ''; + let stderr = ''; + return { + io: { + ...(options.interactive + ? { + stdin: { + isTTY: true, + setRawMode: vi.fn(), + }, + } + : {}), + stdout: { + isTTY: options.isTTY, + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} +``` + +Then add this test inside the `runKtxPublicIngest` describe block: + +```ts + it('delegates interactive TTY public ingest to the foreground context-build view', async () => { + const io = makeIo({ isTTY: true, interactive: true }); + const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const runContextBuild = vi.fn(async () => ({ exitCode: 0 })); + const runScan = vi.fn(async () => 0); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'auto', + depth: 'fast', + queryHistory: 'default', + }, + io.io, + { loadProject: vi.fn(async () => project), runContextBuild, runScan }, + ), + ).resolves.toBe(0); + + expect(runContextBuild).toHaveBeenCalledWith( + project, + expect.objectContaining({ + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + depth: 'fast', + queryHistory: 'default', + }), + io.io, + ); + expect(runScan).not.toHaveBeenCalled(); + }); +``` + +- [ ] **Step 2: Run the failing foreground-view tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts src/public-ingest.test.ts -t "requested connection|foreground context-build view" +``` + +Expected: FAIL because `runContextBuild` always plans `--all`, and +`runKtxPublicIngest` does not delegate interactive TTY runs. + +- [ ] **Step 3: Extend foreground context-build args** + +In `packages/cli/src/context-build-view.ts`, replace `ContextBuildArgs` with: + +```ts +export interface ContextBuildArgs { + projectDir: string; + inputMode: 'auto' | 'disabled'; + targetConnectionId?: string; + all?: boolean; + depth?: Extract['depth']; + queryHistory?: Extract['queryHistory']; + queryHistoryWindowDays?: number; + scanMode?: 'structural' | 'enriched'; + detectRelationships?: boolean; +} +``` + +In `runContextBuild`, replace the hard-coded plan call with: + +```ts + const plan = buildPublicIngestPlan(project, { + projectDir: args.projectDir, + ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), + all: args.all ?? true, + ...(args.depth ? { depth: args.depth } : {}), + ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), + ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), + ...(args.scanMode ? { scanMode: args.scanMode } : {}), + }); +``` + +Replace the `runArgs` construction with: + +```ts + const runArgs: Extract = { + command: 'run', + projectDir: args.projectDir, + ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), + all: args.all ?? true, + json: false, + inputMode: args.inputMode, + ...(args.depth ? { depth: args.depth } : {}), + ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), + ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), + ...(args.scanMode ? { scanMode: args.scanMode } : {}), + ...(args.detectRelationships !== undefined ? { detectRelationships: args.detectRelationships } : {}), + }; +``` + +- [ ] **Step 4: Add a foreground-build dependency to public ingest** + +In `packages/cli/src/public-ingest.ts`, add this interface near +`KtxPublicIngestDeps`: + +```ts +interface KtxPublicContextBuildArgs { + projectDir: string; + inputMode: 'auto' | 'disabled'; + targetConnectionId?: string; + all?: boolean; + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + scanMode?: Extract['mode']; + detectRelationships?: boolean; +} +``` + +Add this optional dependency to `KtxPublicIngestDeps`: + +```ts + runContextBuild?: ( + project: KtxPublicIngestProject, + args: KtxPublicContextBuildArgs, + io: KtxCliIo, + ) => Promise<{ exitCode: number }>; +``` + +Add this helper near `sourceIngestOutputMode`: + +```ts +function shouldUseForegroundContextBuildView( + args: Extract, + io: KtxCliIo, +): boolean { + return args.inputMode === 'auto' && args.json !== true && io.stdout.isTTY === true && hasInteractiveInput(io); +} +``` + +In `runKtxPublicIngest`, after loading `project` and before rendering warnings +or executing targets, add: + +```ts + if (shouldUseForegroundContextBuildView(args, io)) { + const { runContextBuild } = await import('./context-build-view.js'); + const contextBuild = deps.runContextBuild ?? runContextBuild; + const result = await contextBuild( + project, + { + projectDir: args.projectDir, + ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), + all: args.all, + inputMode: args.inputMode, + ...(args.depth ? { depth: args.depth } : {}), + ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), + ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), + ...(args.scanMode ? { scanMode: args.scanMode } : {}), + ...(args.detectRelationships !== undefined ? { detectRelationships: args.detectRelationships } : {}), + }, + io, + ); + return result.exitCode; + } +``` + +- [ ] **Step 5: Run the foreground-view tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts src/public-ingest.test.ts -t "requested connection|foreground context-build view" +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "feat(ingest): use foreground view for interactive public ingest" +``` + +### Task 5: Clean setup database output and query-history setup wording + +**Files:** +- Modify: `packages/cli/src/setup-databases.ts` +- Modify: `packages/cli/src/setup-databases.test.ts` +- Modify: `packages/cli/src/commands/setup-commands.ts` +- Modify: `packages/cli/src/index.test.ts` + +- [ ] **Step 1: Write failing setup output expectations** + +In `packages/cli/src/setup-databases.test.ts`, update the test named +`summarizes connection test and structural scan output during setup` so the +final output expectation is: + +```ts + expect(io.stdout()).toContain('◇ Building schema context for postgres-warehouse'); + expect(io.stdout()).toContain('│ Running fast database ingest…'); + expect(io.stdout()).toContain('◇ Schema context complete for postgres-warehouse'); + expect(io.stdout()).toContain('│ Changes: 3 changed tables'); + expect(io.stdout()).toContain('◇ Primary source ready'); + expect(io.stdout()).toContain('│ postgres-warehouse · PostgreSQL · schema context complete'); + expect(io.stdout()).not.toContain('Scanning postgres-warehouse'); + expect(io.stdout()).not.toContain('Scan complete for postgres-warehouse'); + expect(io.stdout()).not.toContain('structural scan complete'); + expect(io.stdout()).not.toContain('Report: raw-sources'); + expect(io.stdout()).not.toContain('live-database'); +``` + +In the setup scan-failure test that currently expects `ktx scan`, replace the +expectation with: + +```ts + expect(io.stderr()).toContain(`Retry: ktx ingest warehouse --project-dir ${tempDir} --fast`); + expect(io.stderr()).not.toContain('ktx scan'); +``` + +In the test named `writes Postgres Historic SQL config with minExecutions and +ignores window/redaction output`, replace the output expectation with: + +```ts + expect(io.stdout()).toContain('Query history probe...'); + expect(io.stdout()).not.toContain('Historic SQL probe...'); +``` + +In the test named `prints a non-blocking Postgres Historic SQL probe failure +after connection test succeeds`, replace the output expectation with: + +```ts + expect(io.stdout()).toContain('Query history probe...'); + expect(io.stdout()).not.toContain('Historic SQL probe...'); +``` + +- [ ] **Step 2: Write failing setup help expectations** + +In `packages/cli/src/index.test.ts`, update the setup help assertion that +currently checks Historic SQL flags to: + +```ts + for (const expected of [ + '--enable-query-history', + '--disable-query-history', + '--query-history-window-days', + '--query-history-min-executions', + '--query-history-service-account-pattern', + '--query-history-redaction-pattern', + ]) { + expect(testIo.stdout()).toContain(expected); + } + expect(testIo.stdout()).not.toContain('--enable-historic-sql'); + expect(testIo.stdout()).not.toContain('--historic-sql-window-days'); +``` + +Replace the conflicting Historic SQL setup flags test with: + +```ts + it('rejects conflicting query-history setup flags', async () => { + const tempDir = await makeTempProject(); + const setupIo = makeIo(); + + await expect( + runKtxCli(['--project-dir', tempDir, 'setup', '--enable-query-history', '--disable-query-history'], setupIo.io, { + setup: vi.fn(async () => 0), + }), + ).resolves.toBe(1); + + expect(setupIo.stderr()).toContain( + 'Choose only one query-history action: --enable-query-history or --disable-query-history.', + ); + }); +``` + +- [ ] **Step 3: Run the failing setup tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts src/index.test.ts -t "structural scan output|query-history setup flags|conflicting query-history|Postgres Historic SQL|non-blocking Postgres" +``` + +Expected: FAIL because setup still uses scan and Historic SQL wording. + +- [ ] **Step 4: Rename setup database args to query history** + +In `packages/cli/src/setup-databases.ts`, replace the Historic SQL argument +fields in `KtxSetupDatabasesArgs` with query-history fields: + +```ts + enableQueryHistory?: boolean; + disableQueryHistory?: boolean; + queryHistoryWindowDays?: number; + queryHistoryMinExecutions?: number; + queryHistoryServiceAccountPatterns?: string[]; + queryHistoryRedactionPatterns?: string[]; +``` + +Update references in `maybeApplyHistoricSqlConfig`: + +```ts + if (!dialect) { + if (input.args.enableQueryHistory === true) { + throw new Error( + `Query history setup is only supported for Snowflake, BigQuery, and Postgres, not ${driverLabel(input.driver)}.`, + ); + } + return input.connection; + } + + let enabled = input.args.enableQueryHistory === true; + if (input.args.disableQueryHistory === true) { + enabled = false; + } else if (input.args.inputMode !== 'disabled' && input.args.enableQueryHistory !== true && dialect !== 'postgres') { + const choice = await input.prompts.select({ + message: `Enable query-history ingest for this ${driverLabel(input.driver)} connection?`, + options: [ + { value: 'yes', label: 'Enable query history' }, + { value: 'no', label: 'Do not enable query history' }, + { value: 'back', label: 'Back' }, + ], + }); + if (choice === 'back') return 'back'; + enabled = choice === 'yes'; + } + + if (dialect === 'postgres' && input.args.enableQueryHistory !== true && input.args.disableQueryHistory !== true) { + return input.connection; + } +``` + +Update the query-history config construction: + +```ts + const common: Record = { + ...existing, + enabled: true, + filters: historicSqlFiltersForSetup(input.args.queryHistoryServiceAccountPatterns), + }; + + if (dialect === 'postgres') { + return withQueryHistoryConfig(input.connection, { + ...common, + minExecutions: input.args.queryHistoryMinExecutions ?? 5, + }); + } + + return withQueryHistoryConfig(input.connection, { + ...common, + windowDays: input.args.queryHistoryWindowDays ?? 90, + redactionPatterns: input.args.queryHistoryRedactionPatterns ?? [], + }); +``` + +Update both calls to `maybeApplyHistoricSqlConfig` and +`applyHistoricSqlConfigToExistingConnection` by using the renamed args fields; +the function name can remain internal for this task because the source adapter +key is still `historic-sql`. + +- [ ] **Step 5: Replace setup scan wording and command suggestions** + +In `packages/cli/src/setup-databases.ts`, delete `shortenScanReportPath`. +Then replace the scan output block in `validateAndScanConnection` with: + +```ts + writeSetupSection(input.io, `Building schema context for ${input.connectionId}`, [ + 'Running fast database ingest…', + ]); +``` + +Replace the Native SQLite retry failure lines with: + +```ts + [ + rebuildCode === 0 + ? `Fast database ingest still failed for ${input.connectionId} after rebuilding Native SQLite.` + : `Native SQLite rebuild failed for ${input.connectionId}.`, + 'Fix: pnpm run native:rebuild', + `Retry: ktx ingest ${input.connectionId} --project-dir ${input.projectDir} --fast`, + ].join('\n'), +``` + +Replace the non-ABI failure lines with: + +```ts + [ + `Fast database ingest failed for ${input.connectionId}.`, + `Debug command: ktx ingest ${input.connectionId} --project-dir ${input.projectDir} --fast --debug`, + ].join('\n'), +``` + +Replace the success section with: + +```ts + const scanOutput = scanIo.stdoutText(); + writeSetupSection( + input.io, + `Schema context complete for ${input.connectionId}`, + [`Changes: ${summarizeScanChanges(scanOutput)}`], + ); + writeSetupSection(input.io, 'Primary source ready', [ + `${input.connectionId} · ${driverDisplay} · schema context complete`, + ]); +``` + +Replace the probe label in `maybeRunHistoricSqlSetupProbe`: + +```ts + input.io.stdout.write('│ Query history probe...\n'); +``` + +- [ ] **Step 6: Replace public setup flags** + +In `packages/cli/src/commands/setup-commands.ts`, replace the Historic SQL +options with: + +```ts + .option('--enable-query-history', 'Enable query history when the selected database supports it', false) + .option('--disable-query-history', 'Disable query history for the selected database', false) + .option('--query-history-window-days ', 'Query-history lookback window', positiveInteger) + .option('--query-history-min-executions ', 'Minimum executions for a query-history template', positiveInteger) + .option( + '--query-history-service-account-pattern ', + 'Query-history service-account regex; repeatable', + (value, previous: string[]) => [...previous, value], + [], + ) + .option( + '--query-history-redaction-pattern ', + 'Query-history SQL-literal redaction regex; repeatable', + (value, previous: string[]) => [...previous, value], + [], + ) +``` + +Replace the conflict check with: + +```ts + if (options.enableQueryHistory && options.disableQueryHistory) { + context.io.stderr.write( + 'Choose only one query-history action: --enable-query-history or --disable-query-history.\n', + ); + context.setExitCode(1); + return; + } +``` + +Replace the setup arg mapping with: + +```ts + ...(options.enableQueryHistory ? { enableQueryHistory: true } : {}), + ...(options.disableQueryHistory ? { disableQueryHistory: true } : {}), + ...(options.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: options.queryHistoryWindowDays } : {}), + ...(options.queryHistoryMinExecutions !== undefined + ? { queryHistoryMinExecutions: options.queryHistoryMinExecutions } + : {}), + ...(options.queryHistoryServiceAccountPattern.length > 0 + ? { queryHistoryServiceAccountPatterns: options.queryHistoryServiceAccountPattern } + : {}), + ...(options.queryHistoryRedactionPattern.length > 0 + ? { queryHistoryRedactionPatterns: options.queryHistoryRedactionPattern } + : {}), +``` + +- [ ] **Step 7: Update setup database tests to renamed args** + +In `packages/cli/src/setup-databases.test.ts`, replace test input property +names as follows: + +```ts +enableHistoricSql -> enableQueryHistory +disableHistoricSql -> disableQueryHistory +historicSqlWindowDays -> queryHistoryWindowDays +historicSqlMinExecutions -> queryHistoryMinExecutions +historicSqlServiceAccountPatterns -> queryHistoryServiceAccountPatterns +historicSqlRedactionPatterns -> queryHistoryRedactionPatterns +``` + +Also rename test names that include `Historic SQL` to use `query history`. +Keep assertions that `configText` does not contain `historic-sql`. + +- [ ] **Step 8: Run the setup tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts src/index.test.ts -t "schema context|query history|query-history setup flags|conflicting query-history" +``` + +Expected: PASS. + +- [ ] **Step 9: Commit** + +```bash +git add packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts packages/cli/src/commands/setup-commands.ts packages/cli/src/index.test.ts +git commit -m "fix(setup): use schema context and query history wording" +``` + +### Task 6: Final verification + +**Files:** +- Verify: `packages/cli/src/public-ingest.ts` +- Verify: `packages/cli/src/context-build-view.ts` +- Verify: `packages/cli/src/setup-databases.ts` +- Verify: `packages/cli/src/commands/setup-commands.ts` + +- [ ] **Step 1: Run targeted CLI tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts src/setup-databases.test.ts src/index.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run CLI type-check** + +Run: + +```bash +pnpm --filter @ktx/cli run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run the CLI test suite** + +Run: + +```bash +pnpm --filter @ktx/cli run test 2>&1 | tee /tmp/ktx-cli-unified-ingest-public-output.log +``` + +Expected: PASS. If it fails, inspect +`/tmp/ktx-cli-unified-ingest-public-output.log`, fix the failing assertion or +implementation, and rerun this command. + +- [ ] **Step 4: Run dead-code checks** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS. If Knip reports public exports or dynamic CLI entrypoints, +verify each report before deleting code. + +- [ ] **Step 5: Commit verification fixes** + +If Step 1 through Step 4 required any changes, commit them: + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts packages/cli/src/commands/setup-commands.ts packages/cli/src/index.test.ts +git commit -m "test(cli): verify unified ingest public output" +``` + +If no files changed during verification, do not create an empty commit. + +## Self-review + +- Spec coverage: This plan covers the remaining public v1 gaps: adapter + allow-list bypass, quiet public database ingest output, TTY foreground view, + query-history window overrides, aggregated `--all` source warnings, setup + schema-context wording, setup query-history wording, and `ktx scan` retry + removal from normal setup output. +- Placeholder scan: The plan contains no placeholder markers, deferred tasks, + or "write tests later" steps. +- Type consistency: The plan keeps public ingest fields aligned with + `KtxPublicIngestArgs`, uses `allowImplicitAdapter` consistently with + `runKtxIngest`, and renames setup query-history args consistently from the + Commander layer through `runKtxSetupDatabasesStep`. diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-v1-public-plain-output-closure.md b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-public-plain-output-closure.md new file mode 100644 index 00000000..f9b9c956 --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-public-plain-output-closure.md @@ -0,0 +1,598 @@ +# Unified Ingest V1 Public Plain Output Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Remove the last v1-blocking adapter-centric and internal source-key leaks from normal public `ktx ingest` plain output. + +**Architecture:** Keep the current connection-centric public ingest planner and hidden debug commands. Sanitize low-level ingest report labels in `ingest.ts`, and capture low-level source/query-history output in `public-ingest.ts` so public plain `ktx ingest ` renders only the unified result table, warnings, notices, and retry guidance. JSON output and hidden debug commands may continue to expose raw `sourceKey` values for troubleshooting. + +**Tech Stack:** TypeScript, Commander, Vitest, pnpm workspace scripts. + +--- + +## Current audit + +The unified ingest plan chain has implemented the main v1 behavior: + +- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, + `--query-history`, `--no-query-history`, and + `--query-history-window-days` route through `public-ingest.ts`. +- Database targets run before source targets, deep readiness is target-local + for `--all`, and inferred public adapters bypass `ingest.adapters`. +- Normal command help hides `ktx scan`, `ktx ingest run`, and + `ktx ingest watch`; docs-site command references no longer publish those + as normal workflows. +- Setup stores `connections..context.depth` and + `connections..context.queryHistory`, migrates legacy `historicSql`, and + uses foreground-only context-build state. + +### V1-blocking gaps + +- Direct public non-TTY or `--no-input` source ingest still delegates to + `runKtxIngest()` with the real CLI IO. The lower-level reporter prints + `Adapter: ` and routine report details before the public result + table. For query history this can print `Adapter: historic-sql`, violating + the spec requirement that normal output use query-history wording and keep + internal adapter names out of routine output. +- `ktx ingest status` and `ktx ingest replay` plain output call the same + lower-level report formatter. Stored database reports can therefore print + `Adapter: live-database`, and stored query-history reports can print + `Adapter: historic-sql`, even though `status` and `replay` are public + report-viewing surfaces. + +### Non-blocking gaps + +- Hidden debug commands remain callable: `ktx scan`, `ktx ingest run`, and + `ktx ingest watch`. +- JSON output, debug output, tests, internal artifact paths, WorkUnit keys, + adapter package names, and developer scripts can continue to use + `scan`, `live-database`, and `historic-sql`. +- Public docs still use "scan" as a generic implementation noun in a few + contributor or concept pages. They do not present `ktx scan` as the normal + public command, so that is later wording cleanup. + +## File structure + +- Modify `packages/cli/src/ingest.ts`: replace the plain report `Adapter:` + label with public source labels, while leaving JSON report payloads intact. +- Modify `packages/cli/src/public-ingest.ts`: capture lower-level source and + query-history plain output for direct public ingest, sanitize failure detail + lines, and render only the public summary table. +- Modify `packages/cli/src/ingest.test.ts`: update existing report label + expectations and add regressions for `live-database` and `historic-sql` + stored-report labels. +- Modify `packages/cli/src/public-ingest.test.ts`: add regressions proving + direct public source and query-history runs do not leak lower-level adapter + report output. + +## Tasks + +### Task 1: Use public source labels in stored report output + +**Files:** +- Modify: `packages/cli/src/ingest.ts` +- Modify: `packages/cli/src/ingest.test.ts` + +- [ ] **Step 1: Add failing stored-report label tests** + +Add these tests inside the existing `describe('runKtxIngest', () => { ... })` +block in `packages/cli/src/ingest.test.ts`, near the existing +`runs local ingest and reads status` test: + +```typescript + it('labels internal database reports without adapter names in plain status output', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const report = localFakeBundleReport('scan-job-1', { + id: 'report-scan-1', + runId: 'run-scan-1', + connectionId: 'warehouse', + sourceKey: 'live-database', + }); + const io = makeIo(); + + await expect( + runKtxIngest( + { + command: 'status', + projectDir, + reportFile: '/tmp/scan-report.json', + outputMode: 'plain', + }, + io.io, + { + readReportFile: vi.fn(async () => report), + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Source: Database schema\n'); + expect(io.stdout()).not.toContain('Adapter:'); + expect(io.stdout()).not.toContain('live-database'); + expect(io.stderr()).toBe(''); + }); + + it('labels internal query-history reports without adapter names in plain status output', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const report = localFakeBundleReport('query-history-job-1', { + id: 'report-query-history-1', + runId: 'run-query-history-1', + connectionId: 'warehouse', + sourceKey: 'historic-sql', + }); + const io = makeIo(); + + await expect( + runKtxIngest( + { + command: 'status', + projectDir, + reportFile: '/tmp/query-history-report.json', + outputMode: 'plain', + }, + io.io, + { + readReportFile: vi.fn(async () => report), + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Source: Query history\n'); + expect(io.stdout()).not.toContain('Adapter:'); + expect(io.stdout()).not.toContain('historic-sql'); + expect(io.stderr()).toBe(''); + }); +``` + +- [ ] **Step 2: Run the failing stored-report tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/ingest.test.ts --testNamePattern "labels internal" +``` + +Expected: FAIL. The output still contains `Adapter: live-database` or +`Adapter: historic-sql`, and it does not contain the new public `Source:` +labels. + +- [ ] **Step 3: Add public report source labels** + +In `packages/cli/src/ingest.ts`, add these helpers above +`function writeReportStatus(...)`: + +```typescript +const REPORT_SOURCE_LABELS = new Map([ + ['live-database', 'Database schema'], + ['historic-sql', 'Query history'], + ['dbt', 'dbt'], + ['metricflow', 'MetricFlow'], + ['lookml', 'LookML'], + ['looker', 'Looker'], + ['metabase', 'Metabase'], + ['notion', 'Notion'], +]); + +function reportSourceLabel(sourceKey: string): string { + const label = REPORT_SOURCE_LABELS.get(sourceKey); + if (label) { + return label; + } + return sourceKey + .split(/[-_]+/) + .filter((part) => part.length > 0) + .map((part) => `${part[0]?.toUpperCase() ?? ''}${part.slice(1)}`) + .join(' '); +} +``` + +Then replace the `Adapter:` line in `writeReportStatus()`: + +```typescript + io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`); +``` + +The full function should keep the remaining fields unchanged: + +```typescript +function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void { + const counts = savedMemoryCountsForReport(report); + io.stdout.write(`Report: ${report.id}\n`); + io.stdout.write(`Run: ${report.runId}\n`); + io.stdout.write(`Job: ${report.jobId}\n`); + io.stdout.write(`Status: ${reportStatus(report)}\n`); + io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`); + io.stdout.write(`Connection: ${report.connectionId}\n`); + io.stdout.write(`Sync: ${report.body.syncId}\n`); + io.stdout.write( + `Diff: +${report.body.diffSummary.added}/~${report.body.diffSummary.modified}/-${report.body.diffSummary.deleted}/=${report.body.diffSummary.unchanged}\n`, + ); + io.stdout.write(`Work units: ${report.body.workUnits.length}\n`); + io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`); + io.stdout.write(`Provenance rows: ${report.body.provenanceRows.length}\n`); +} +``` + +- [ ] **Step 4: Update existing report label expectations** + +In `packages/cli/src/ingest.test.ts`, update the existing assertions that +still expect the old `Adapter:` label: + +```typescript +expect(statusIo.stdout()).toContain('Source: Metabase'); +``` + +```typescript +expect(io.stdout()).toContain('Source: Query history\n'); +``` + +```typescript +expect(io.stdout()).toContain('Source: Looker'); +``` + +```typescript +expect(statusIo.stdout()).toContain('Source: Looker'); +``` + +Remove the corresponding `Adapter: metabase`, `Adapter: historic-sql`, and +`Adapter: looker` expectations. + +- [ ] **Step 5: Run the stored-report tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/ingest.test.ts --testNamePattern "labels internal|runs public Metabase|historic-sql projection|Looker" +``` + +Expected: PASS. Plain report output uses `Source:` labels and does not print +`Adapter:` for the covered status and run summaries. + +- [ ] **Step 6: Commit stored-report label cleanup** + +Run: + +```bash +git add packages/cli/src/ingest.ts packages/cli/src/ingest.test.ts +git commit -m "fix(cli): use public source labels in ingest reports" +``` + +### Task 2: Capture low-level output during public source ingest + +**Files:** +- Modify: `packages/cli/src/public-ingest.ts` +- Modify: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Add failing public source-output tests** + +Add these tests to `packages/cli/src/public-ingest.test.ts` near the existing +public output tests for captured scan output and query-history retry guidance: + +```typescript + it('suppresses lower-level source report output during direct public source ingest', async () => { + const io = makeIo(); + const project = projectWithConnections({ + docs: { driver: 'notion' }, + }); + const runIngest = vi.fn(async (_args, ingestIo) => { + ingestIo.stdout.write('Report: report-docs-1\n'); + ingestIo.stdout.write('Adapter: notion\n'); + ingestIo.stdout.write('Saved memory: 2 wiki, 0 SL\n'); + return 0; + }); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'docs', + all: false, + json: false, + inputMode: 'disabled', + }, + io.io, + { loadProject: vi.fn(async () => project), runIngest }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Ingest finished'); + expect(io.stdout()).toContain('docs'); + expect(io.stdout()).toContain('source-ingest'); + expect(io.stdout()).not.toContain('Report: report-docs-1'); + expect(io.stdout()).not.toContain('Adapter:'); + expect(io.stdout()).not.toContain('notion\n'); + expect(io.stderr()).toBe(''); + }); + + it('suppresses historic-sql report output during direct public query-history ingest', async () => { + const io = makeIo(); + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn(async (_args, ingestIo) => { + ingestIo.stdout.write('Report: report-query-history-1\n'); + ingestIo.stdout.write('Adapter: historic-sql\n'); + ingestIo.stdout.write('Saved memory: 1 wiki, 1 SL\n'); + return 0; + }); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + queryHistory: 'enabled', + }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Schema ingest runs before query history for warehouse.'); + expect(io.stdout()).toContain('Ingest finished'); + expect(io.stdout()).toContain('warehouse'); + expect(io.stdout()).toContain('done'); + expect(io.stdout()).not.toContain('Report: report-query-history-1'); + expect(io.stdout()).not.toContain('Adapter:'); + expect(io.stdout()).not.toContain('historic-sql'); + expect(io.stderr()).toBe(''); + }); +``` + +- [ ] **Step 2: Run the failing public source-output tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts --testNamePattern "suppresses" +``` + +Expected: FAIL. The direct public run writes lower-level `Report:` and +`Adapter:` lines into normal public stdout. + +- [ ] **Step 3: Add captured ingest output helpers** + +In `packages/cli/src/public-ingest.ts`, keep the existing +`createCapturedPublicIngestIo()` helper and replace +`firstCapturedFailureLine()` with these helpers: + +```typescript +const INTERNAL_STATUS_LINE_RE = + /^(Report|Run|Job|Status|Adapter|Connection|Sync|Diff|Work units|Saved memory|Provenance rows):\s*/; + +function publicIngestOutputLine(line: string): string { + return line + .replace(/\blive-database\b/g, 'database schema') + .replace(/\bhistoric-sql\b/g, 'query history') + .replace(/\bhistoric SQL\b/gi, 'query history'); +} + +function firstCapturedFailureLine(output: string): string | undefined { + return output + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line.length > 0) + .filter((line) => !line.startsWith('KTX scan completed')) + .filter((line) => !INTERNAL_STATUS_LINE_RE.test(line)) + .map(publicIngestOutputLine) + .find((line) => line.length > 0); +} +``` + +- [ ] **Step 4: Capture query-history ingest output** + +In `executePublicIngestTarget()`, replace the query-history branch with this +captured-output flow: + +```typescript + if (target.queryHistory?.enabled === true) { + const { runKtxIngest } = await import('./ingest.js'); + const runIngest = deps.runIngest ?? runKtxIngest; + const ingestArgs: KtxIngestArgs = { + command: 'run', + projectDir: args.projectDir, + connectionId: target.connectionId, + adapter: 'historic-sql', + outputMode: sourceIngestOutputMode(args, io), + inputMode: args.inputMode, + allowImplicitAdapter: true, + historicSqlPullConfigOverride: + target.queryHistory.pullConfig ?? { + dialect: target.queryHistory.dialect, + ...(target.queryHistory.windowDays !== undefined ? { windowDays: target.queryHistory.windowDays } : {}), + }, + }; + const capturedIngestIo = deps.ingestProgress ? null : createCapturedPublicIngestIo(); + const ingestIo = capturedIngestIo ?? io; + const qhExitCode = deps.ingestProgress + ? await runIngest(ingestArgs, ingestIo, { progress: deps.ingestProgress }) + : await runIngest(ingestArgs, ingestIo); + if (qhExitCode !== 0) { + return markTargetResult( + target, + args, + 'failed', + 'query-history', + capturedIngestIo ? firstCapturedFailureLine(capturedIngestIo.capturedOutput()) : undefined, + ); + } + } +``` + +This keeps foreground progress working because `runContextBuild()` supplies +`deps.ingestProgress` and already passes a captured IO object into +`executePublicIngestTarget()`. + +- [ ] **Step 5: Capture source ingest output** + +In the source-ingest branch of `executePublicIngestTarget()`, replace the +direct `runIngest(..., io, ...)` call with this captured-output flow: + +```typescript + const runIngest = deps.runIngest ?? runKtxIngest; + const capturedIngestIo = deps.ingestProgress ? null : createCapturedPublicIngestIo(); + const ingestIo = capturedIngestIo ?? io; + const exitCode = deps.ingestProgress + ? await runIngest(ingestArgs, ingestIo, { progress: deps.ingestProgress }) + : await runIngest(ingestArgs, ingestIo); + return markTargetResult( + target, + args, + exitCode === 0 ? 'done' : 'failed', + 'source-ingest', + capturedIngestIo ? firstCapturedFailureLine(capturedIngestIo.capturedOutput()) : undefined, + ); +``` + +Keep the existing `ingestArgs` object unchanged: + +```typescript + const ingestArgs: KtxIngestArgs = { + command: 'run', + projectDir: args.projectDir, + connectionId: target.connectionId, + adapter: target.adapter ?? target.driver, + ...(target.sourceDir ? { sourceDir: target.sourceDir } : {}), + outputMode: sourceIngestOutputMode(args, io), + inputMode: args.inputMode, + allowImplicitAdapter: true, + }; +``` + +- [ ] **Step 6: Run the public source-output tests again** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts --testNamePattern "suppresses|retry guidance|foreground" +``` + +Expected: PASS. Direct public source and query-history runs no longer print +low-level `Report:`, `Adapter:`, `live-database`, or `historic-sql` lines in +plain stdout, while existing foreground and retry guidance tests still pass. + +- [ ] **Step 7: Commit public source-output capture** + +Run: + +```bash +git add packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "fix(cli): suppress low-level public ingest output" +``` + +### Task 3: Final verification + +**Files:** +- Verify: `packages/cli/src/ingest.ts` +- Verify: `packages/cli/src/public-ingest.ts` +- Verify: `packages/cli/src/ingest.test.ts` +- Verify: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Run focused CLI tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run \ + src/public-ingest.test.ts \ + src/context-build-view.test.ts \ + src/ingest.test.ts \ + src/ingest-viz.test.ts \ + src/command-tree.test.ts \ + src/print-command-tree.test.ts +``` + +Expected: PASS. These tests cover direct public ingest, foreground context +builds, stored report rendering, visual report rendering, and hidden command +tree filtering. + +- [ ] **Step 2: Run CLI type-check** + +Run: + +```bash +pnpm --filter @ktx/cli run type-check +``` + +Expected: PASS with no TypeScript errors. + +- [ ] **Step 3: Verify generated command tree still hides debug commands** + +Run: + +```bash +pnpm --filter @ktx/cli run docs:commands >/tmp/ktx-command-tree.txt +rg "scan |ingest run|ingest watch" /tmp/ktx-command-tree.txt +``` + +Expected: the `docs:commands` command succeeds. The `rg` command exits `1` +with no matches. + +- [ ] **Step 4: Search public docs and normal CLI surfaces for old public command guidance** + +Run: + +```bash +rg -n "ktx scan|ktx ingest run|ktx ingest watch|--enable-historic-sql|--historic-sql|historicSql|Historic SQL|live-database" \ + README.md docs-site/content examples/README.md examples/local-warehouse/README.md examples/postgres-historic/README.md +``` + +Expected: no v1-blocking matches. Matches that refer only to internal raw +artifact paths such as `raw-sources/warehouse/historic-sql` are allowed only in +the Postgres query-history smoke README. + +- [ ] **Step 5: Run dead-code checks after TypeScript changes** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS. If Knip reports unrelated existing findings, inspect them and +record the unrelated findings before finishing. + +- [ ] **Step 6: Inspect final diff** + +Run: + +```bash +git status --short +git diff -- packages/cli/src/ingest.ts packages/cli/src/public-ingest.ts packages/cli/src/ingest.test.ts packages/cli/src/public-ingest.test.ts +``` + +Expected: only the intended TypeScript source and test files are modified. +The diff contains no generated `dist/` files and no docs changes beyond this +plan. + +- [ ] **Step 7: Commit verification-only fixes if needed** + +Run only if verification required small expectation or formatting fixes: + +```bash +git add packages/cli/src/ingest.ts packages/cli/src/public-ingest.ts packages/cli/src/ingest.test.ts packages/cli/src/public-ingest.test.ts +git commit -m "test(cli): verify unified ingest public plain output" +``` + +Expected: no commit is needed when all checks pass after Tasks 1 and 2. + +## Self-review + +- Spec coverage: This plan closes the remaining v1-blocking normal-output + leaks for direct public source ingest, public query-history ingest, and + public stored-report status/replay output. It intentionally leaves hidden + debug commands, JSON payloads, internal artifact paths, and developer tests + untouched. +- Placeholder scan: The plan contains concrete file paths, exact test code, + exact implementation snippets, commands, and expected results. +- Type consistency: The snippets use existing local types and helpers: + `KtxIngestArgs`, `createCapturedPublicIngestIo()`, + `firstCapturedFailureLine()`, `sourceIngestOutputMode()`, + `markTargetResult()`, `localFakeBundleReport()`, and `makeIo()`. diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-v1-query-history-status-cleanup.md b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-query-history-status-cleanup.md new file mode 100644 index 00000000..e9d03ec8 --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-query-history-status-cleanup.md @@ -0,0 +1,1339 @@ +# Unified Ingest V1 Query History Status Cleanup Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1-blocking public UX gaps in the unified +`ktx ingest` redesign. + +**Architecture:** Keep the implemented connection-centric ingest planner and +foreground context-build view. Patch the public setup, status, doctor, retry, +and example surfaces so canonical `context.queryHistory` and +`ktx ingest ` are the only normal user-facing paths. + +**Tech Stack:** TypeScript ESM, Commander, Vitest, KTX CLI/context packages, +Markdown examples, shell smoke scripts. + +--- + +## Current audit + +The three implemented unified-ingest plans cover most of the original spec: + +- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, + `--query-history`, `--no-query-history`, and + `--query-history-window-days` are routed through `public-ingest.ts`. +- Database targets run before source targets, inferred public adapters bypass + `ingest.adapters`, fast/deep map to structural/enriched scan internals, and + deep readiness failures are per-target failures under `--all`. +- `ktx scan`, `ktx ingest run`, and `ktx ingest watch` are hidden from normal + help. +- Setup stores `connections..context.depth`, config parsing rejects + reserved ingest subcommand ids, generated default config omits normal + adapter allow-list entries, and setup context builds are foreground-only. +- Public database ingest suppresses normal internal scan output, source ingest + passes `allowImplicitAdapter: true`, query-history window overrides enable + query history for the current run, and TTY public ingest delegates to the + shared foreground view. + +### V1-blocking gaps + +- `packages/cli/src/setup.ts` still exposes and forwards + `enableHistoricSql`, `disableHistoricSql`, and `historicSql*` args into the + database setup step. Public Commander flags now produce `enableQueryHistory` + and `queryHistory*`, so full `ktx setup --enable-query-history ...` does not + reach `runKtxSetupDatabasesStep`. +- Interactive Postgres setup does not ask whether to enable query history when + no query-history flag is provided, even though Postgres is a supported v1 + query-history driver. +- `ktx status`/project doctor still reads legacy + `connections..historicSql`, ignores canonical + `connections..context.queryHistory`, and prints public + `Postgres Historic SQL` labels. +- `ktx ingest status` with no stored reports still suggests + `ktx ingest run --connection-id --adapter `, which the spec + explicitly removes from normal guidance. +- Public query-history failures can surface `Historic SQL local ingest...` + messages from `local-adapters.ts`. +- The shared foreground view always formats retry guidance as `ktx setup`, + even when it is running direct public `ktx ingest `. +- Query-history foreground progress can show raw `historic-sql` adapter text + from lower-level ingest progress messages. +- Public examples still document old query-history and adapter surfaces: + `examples/postgres-historic/README.md`, + `examples/postgres-historic/scripts/smoke.sh`, and + `examples/README.md` still use `Historic SQL`, `--enable-historic-sql`, + `--historic-sql-*`, and `ktx ingest run --adapter historic-sql`. +- Checked-in example project configs still contain normal + `ingest.adapters: [live-database]`, contrary to the v1 config model. + +### Non-blocking gaps + +- Hidden debug commands can continue to call `ktx scan`, `ktx ingest run`, and + `ktx ingest watch`. +- Internal adapter keys, package names, raw artifact paths, WorkUnit keys, + skill names, and JSON/debug output can continue to use `scan`, + `live-database`, and `historic-sql`. +- Internal scripts such as relationship verification and artifact packaging can + keep standalone scan/live-database terminology when they are explicitly + developer-only. +- `setup.ts` still has dead `detached`/`paused`/`autoWatch` type remnants. + They are not currently user-facing because setup context state is normalized + and background watch flows have been removed. +- README package taxonomy such as `Postgres scan connector` can remain because + it describes internal package ownership, not public command usage. + +## File structure + +- Modify `packages/cli/src/setup.ts`: rename setup args and database-step + forwarding from historic-SQL names to query-history names. +- Modify `packages/cli/src/setup.test.ts`: cover full setup forwarding of + query-history flags into the database setup runner. +- Modify `packages/cli/src/setup-databases.ts`: ask the query-history prompt + for Postgres when interactive and no explicit query-history flag is supplied. +- Modify `packages/cli/src/setup-databases.test.ts`: cover interactive + Postgres query-history enablement through the canonical + `context.queryHistory` shape. +- Modify `packages/cli/src/historic-sql-doctor.ts`: read canonical + query-history config, keep legacy fallback for pre-migration configs, and + rename public doctor labels/messages to query history. +- Modify `packages/cli/src/historic-sql-doctor.test.ts`: update doctor unit + expectations for canonical config and public wording. +- Modify `packages/cli/src/doctor.test.ts`: update project doctor integration + expectations. +- Modify `packages/cli/src/ingest.ts`: replace stale no-report status guidance + with `ktx ingest ` wording. +- Modify `packages/cli/src/ingest-viz.test.ts`: cover the no-report status + guidance. +- Modify `packages/cli/src/local-adapters.ts`: change public-facing + query-history capability errors away from `Historic SQL`. +- Modify `packages/cli/src/local-adapters.test.ts`: cover at least one + query-history capability error message. +- Modify `packages/cli/src/context-build-view.ts`: accept an entrypoint for + retry text and sanitize public query-history progress messages. +- Modify `packages/cli/src/context-build-view.test.ts`: cover direct ingest + retry guidance and sanitized query-history progress. +- Modify `packages/cli/src/public-ingest.ts`: pass `entrypoint: 'ingest'` to + the foreground context-build view. +- Modify `packages/cli/src/public-ingest.test.ts`: cover public foreground + delegation with the entrypoint. +- Modify `examples/postgres-historic/README.md`: rename public query-history + wording and commands. +- Modify `examples/postgres-historic/scripts/smoke.sh`: use new setup flags. +- Modify `examples/README.md`: remove old Historic SQL public wording. +- Modify `examples/local-warehouse/ktx.yaml` and + `examples/orbit-relationship-verification/ktx.yaml`: remove + `live-database` from normal checked-in `ingest.adapters`. +- Modify `scripts/examples-docs.test.mjs`: assert the public examples no + longer advertise old flags or adapter commands. + +## Tasks + +### Task 1: Fix full setup query-history argument plumbing + +**Files:** +- Modify: `packages/cli/src/setup.ts` +- Test: `packages/cli/src/setup.test.ts` + +- [ ] **Step 1: Write the failing setup forwarding test** + +In `packages/cli/src/setup.test.ts`, add query-history fields to the existing +test named `runs database setup after embeddings succeed`: + +```ts + enableQueryHistory: true, + queryHistoryWindowDays: 30, + queryHistoryMinExecutions: 12, + queryHistoryServiceAccountPatterns: ['^svc_'], + queryHistoryRedactionPatterns: ['(?i)secret'], +``` + +The full args object in that test should include: + +```ts + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: ['public'], + enableQueryHistory: true, + queryHistoryWindowDays: 30, + queryHistoryMinExecutions: 12, + queryHistoryServiceAccountPatterns: ['^svc_'], + queryHistoryRedactionPatterns: ['(?i)secret'], + skipDatabases: false, +``` + +Extend the `expect(databases).toHaveBeenCalledWith(...)` assertion in the same +test: + +```ts + enableQueryHistory: true, + queryHistoryWindowDays: 30, + queryHistoryMinExecutions: 12, + queryHistoryServiceAccountPatterns: ['^svc_'], + queryHistoryRedactionPatterns: ['(?i)secret'], +``` + +- [ ] **Step 2: Run the failing setup test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup.test.ts -t "runs database setup after embeddings succeed" +``` + +Expected: FAIL because `runKtxSetup` still forwards the old +`enableHistoricSql` and `historicSql*` fields. + +- [ ] **Step 3: Rename setup args and forwarding** + +In `packages/cli/src/setup.ts`, replace the query-history section of +`KtxSetupArgs`: + +```ts + enableHistoricSql?: boolean; + disableHistoricSql?: boolean; + historicSqlWindowDays?: number; + historicSqlMinExecutions?: number; + historicSqlServiceAccountPatterns?: string[]; + historicSqlRedactionPatterns?: string[]; +``` + +with: + +```ts + enableQueryHistory?: boolean; + disableQueryHistory?: boolean; + queryHistoryWindowDays?: number; + queryHistoryMinExecutions?: number; + queryHistoryServiceAccountPatterns?: string[]; + queryHistoryRedactionPatterns?: string[]; +``` + +In the database-step call in `runKtxSetupInner`, replace: + +```ts + ...(args.enableHistoricSql !== undefined ? { enableHistoricSql: args.enableHistoricSql } : {}), + ...(args.disableHistoricSql !== undefined ? { disableHistoricSql: args.disableHistoricSql } : {}), + ...(args.historicSqlWindowDays !== undefined ? { historicSqlWindowDays: args.historicSqlWindowDays } : {}), + ...(args.historicSqlMinExecutions !== undefined + ? { historicSqlMinExecutions: args.historicSqlMinExecutions } + : {}), + ...(args.historicSqlServiceAccountPatterns + ? { historicSqlServiceAccountPatterns: args.historicSqlServiceAccountPatterns } + : {}), + ...(args.historicSqlRedactionPatterns + ? { historicSqlRedactionPatterns: args.historicSqlRedactionPatterns } + : {}), +``` + +with: + +```ts + ...(args.enableQueryHistory !== undefined ? { enableQueryHistory: args.enableQueryHistory } : {}), + ...(args.disableQueryHistory !== undefined ? { disableQueryHistory: args.disableQueryHistory } : {}), + ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), + ...(args.queryHistoryMinExecutions !== undefined + ? { queryHistoryMinExecutions: args.queryHistoryMinExecutions } + : {}), + ...(args.queryHistoryServiceAccountPatterns + ? { queryHistoryServiceAccountPatterns: args.queryHistoryServiceAccountPatterns } + : {}), + ...(args.queryHistoryRedactionPatterns + ? { queryHistoryRedactionPatterns: args.queryHistoryRedactionPatterns } + : {}), +``` + +- [ ] **Step 4: Run the setup forwarding test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup.test.ts -t "runs database setup after embeddings succeed" +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/setup.ts packages/cli/src/setup.test.ts +git commit -m "fix(setup): forward query history flags" +``` + +### Task 2: Ask Postgres query-history setup interactively + +**Files:** +- Modify: `packages/cli/src/setup-databases.ts` +- Test: `packages/cli/src/setup-databases.test.ts` + +- [ ] **Step 1: Write the failing Postgres prompt test** + +In `packages/cli/src/setup-databases.test.ts`, add this test after +`writes Postgres query history config with minExecutions and ignores window/redaction output`: + +```ts + it('asks interactive Postgres setup whether to enable query history', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const prompts = makePromptAdapter({ selectValues: ['yes'] }); + const historicSqlProbe = vi.fn(async () => ({ ok: true, lines: [] })); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseConnectionIds: ['warehouse'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { + prompts, + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + historicSqlProbe, + }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.select).toHaveBeenCalledWith({ + message: 'Enable query-history ingest for this PostgreSQL connection?', + options: [ + { value: 'yes', label: 'Enable query history' }, + { value: 'no', label: 'Do not enable query history' }, + { value: 'back', label: 'Back' }, + ], + }); + expect(historicSqlProbe).toHaveBeenCalledWith({ + projectDir: tempDir, + connectionId: 'warehouse', + dialect: 'postgres', + }); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse).toMatchObject({ + context: { + queryHistory: { + enabled: true, + minExecutions: 5, + filters: { dropTrivialProbes: true }, + }, + }, + }); + }); +``` + +- [ ] **Step 2: Run the failing Postgres prompt test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts -t "asks interactive Postgres setup" +``` + +Expected: FAIL because Postgres currently returns without asking when no +explicit query-history flag is supplied. + +- [ ] **Step 3: Prompt for all supported query-history drivers** + +In `packages/cli/src/setup-databases.ts`, replace this branch in +`maybeApplyHistoricSqlConfig`: + +```ts + } else if (input.args.inputMode !== 'disabled' && input.args.enableQueryHistory !== true && dialect !== 'postgres') { +``` + +with: + +```ts + } else if (input.args.inputMode !== 'disabled' && input.args.enableQueryHistory !== true) { +``` + +Then delete this early return: + +```ts + if (dialect === 'postgres' && input.args.enableQueryHistory !== true && input.args.disableQueryHistory !== true) { + return input.connection; + } +``` + +- [ ] **Step 4: Run the Postgres prompt test** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-databases.test.ts -t "asks interactive Postgres setup|writes Postgres query history config" +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/setup-databases.ts packages/cli/src/setup-databases.test.ts +git commit -m "fix(setup): prompt for postgres query history" +``` + +### Task 3: Rename status/doctor query-history readiness output + +**Files:** +- Modify: `packages/cli/src/historic-sql-doctor.ts` +- Modify: `packages/cli/src/historic-sql-doctor.test.ts` +- Modify: `packages/cli/src/doctor.test.ts` + +- [ ] **Step 1: Write failing canonical doctor expectations** + +In `packages/cli/src/historic-sql-doctor.test.ts`, update the first test name +and expected object: + +```ts + it('passes when no Postgres query-history connections are enabled', async () => { +``` + +```ts + expect(checks).toEqual([ + { + id: 'query-history-postgres', + label: 'Postgres query history', + status: 'pass', + detail: 'No enabled Postgres query-history connections', + }, + ]); +``` + +In the success test, replace the configured connection with canonical +query-history config: + +```ts + warehouse: { + driver: 'postgres', + url: 'env:WAREHOUSE_DATABASE_URL', + readonly: true, + context: { queryHistory: { enabled: true } }, + }, +``` + +Update the probe assertion to match the same connection shape, and update the +expected check: + +```ts + { + id: 'query-history-postgres-warehouse', + label: 'Postgres query history (warehouse)', + status: 'pass', + detail: 'pg_stat_statements ready (PostgreSQL 16.4)', + }, +``` + +Update the warning and capability-error tests to expect +`query-history-postgres-warehouse` and +`Postgres query history (warehouse)`. + +Add this legacy fallback test before the non-Postgres-driver failure test: + +```ts + it('still checks legacy historicSql blocks before setup migration', async () => { + const probe = vi.fn(async () => ({ + pgServerVersion: 'PostgreSQL 16.4', + warnings: [], + })); + + const checks = await runPostgresHistoricSqlDoctorChecks( + projectWithConnections({ + warehouse: { + driver: 'postgres', + url: 'env:WAREHOUSE_DATABASE_URL', + readonly: true, + historicSql: { enabled: true, dialect: 'postgres' }, + }, + }), + { postgresHistoricSqlProbe: probe }, + ); + + expect(checks).toEqual([ + { + id: 'query-history-postgres-warehouse', + label: 'Postgres query history (warehouse)', + status: 'pass', + detail: 'pg_stat_statements ready (PostgreSQL 16.4)', + }, + ]); + }); +``` + +Update the non-Postgres-driver failure expected object: + +```ts + { + id: 'query-history-postgres-warehouse', + label: 'Postgres query history (warehouse)', + status: 'fail', + detail: 'connections.warehouse.context.queryHistory is enabled but driver is mysql', + fix: 'Set connections.warehouse.driver to postgres or disable query history for this connection', + }, +``` + +In `packages/cli/src/doctor.test.ts`, rename the test to +`includes Postgres query-history readiness in project doctor output`, write +canonical config, and update the injected check: + +```ts + ' context:', + ' queryHistory:', + ' enabled: true', +``` + +```ts + id: 'query-history-postgres-warehouse', + label: 'Postgres query history (warehouse)', +``` + +Update the output assertion: + +```ts + expect(testIo.stdout()).toContain('PASS Postgres query history (warehouse): pg_stat_statements ready'); +``` + +- [ ] **Step 2: Run the failing doctor tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/historic-sql-doctor.test.ts src/doctor.test.ts -t "query-history|historicSql blocks" +``` + +Expected: FAIL because the doctor still reads `historicSql` only and prints +`Postgres Historic SQL`. + +- [ ] **Step 3: Read canonical query-history config in the doctor** + +In `packages/cli/src/historic-sql-doctor.ts`, replace `historicSqlRecord` and +`isEnabledPostgresHistoricSql` with: + +```ts +function recordValue(value: unknown): Record | null { + return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record) : null; +} + +function queryHistoryRecord(connection: KtxProjectConnectionConfig): Record | null { + const context = recordValue(connection.context); + return recordValue(context?.queryHistory); +} + +function legacyHistoricSqlRecord(connection: KtxProjectConnectionConfig): Record | null { + return recordValue(connection.historicSql); +} + +function isEnabledPostgresQueryHistory(connection: KtxProjectConnectionConfig): boolean { + const queryHistory = queryHistoryRecord(connection); + if (queryHistory) { + return queryHistory.enabled === true; + } + const legacy = legacyHistoricSqlRecord(connection); + return legacy?.enabled === true && legacy.dialect === 'postgres'; +} +``` + +Rename `checkId`: + +```ts +function checkId(connectionId: string): string { + return `query-history-postgres-${connectionId.replace(/[^a-z0-9_-]+/gi, '-')}`; +} +``` + +Update `capabilityFailureFix`: + +```ts + if (error instanceof Error && error.name === 'HistoricSqlVersionUnsupportedError') { + return 'Use PostgreSQL 14 or newer, or disable query history for this connection'; + } + return `Fix connections.${connectionId} Postgres settings, then rerun \`ktx status --project-dir ${projectDir}\``; +``` + +Update `runPostgresHistoricSqlDoctorChecks` target selection and no-target +result: + +```ts + const targets = Object.entries(project.config.connections) + .filter(([, connection]) => isEnabledPostgresQueryHistory(connection)) + .sort(([left], [right]) => left.localeCompare(right)); + + if (targets.length === 0) { + return [ + check('pass', 'query-history-postgres', 'Postgres query history', 'No enabled Postgres query-history connections'), + ]; + } +``` + +Update the per-target label and non-Postgres failure: + +```ts + const label = `Postgres query history (${connectionId})`; + if (!isPostgresDriver(connection)) { + checks.push( + check( + 'fail', + checkId(connectionId), + label, + `connections.${connectionId}.context.queryHistory is enabled but driver is ${String(connection.driver)}`, + `Set connections.${connectionId}.driver to postgres or disable query history for this connection`, + ), + ); + continue; + } +``` + +- [ ] **Step 4: Run the doctor tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/historic-sql-doctor.test.ts src/doctor.test.ts -t "query-history|historicSql blocks" +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add packages/cli/src/historic-sql-doctor.ts packages/cli/src/historic-sql-doctor.test.ts packages/cli/src/doctor.test.ts +git commit -m "fix(status): report query history readiness" +``` + +### Task 4: Remove stale adapter-command suggestions and public Historic SQL errors + +**Files:** +- Modify: `packages/cli/src/ingest.ts` +- Modify: `packages/cli/src/ingest-viz.test.ts` +- Modify: `packages/cli/src/local-adapters.ts` +- Modify: `packages/cli/src/local-adapters.test.ts` + +- [ ] **Step 1: Write failing no-report status guidance test** + +In `packages/cli/src/ingest-viz.test.ts`, add this test after +`returns an error code for missing status`: + +```ts + it('suggests public ingest when status has no stored reports', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const io = makeIo(); + + await expect(runKtxIngest({ command: 'status', projectDir, outputMode: 'plain' }, io.io)).resolves.toBe(1); + + expect(io.stderr()).toContain('No local ingest reports were found. Run `ktx ingest ` first.'); + expect(io.stderr()).not.toContain('ktx ingest run --connection-id'); + expect(io.stderr()).not.toContain('--adapter'); + }); +``` + +- [ ] **Step 2: Write failing query-history error wording test** + +In `packages/cli/src/local-adapters.test.ts`, add this test before the closing +`describe` block: + +```ts + it('uses query-history wording for public BigQuery capability errors', async () => { + await writeProject( + tempDir, + [ + 'project: warehouse', + 'connections:', + ' bq:', + ' driver: bigquery', + ' readonly: true', + ' dataset_id: analytics', + ' credentials_json: "{}"', + ' context:', + ' queryHistory:', + ' enabled: true', + 'ingest:', + ' adapters:', + ' - historic-sql', + '', + ].join('\n'), + ); + const project = await loadKtxProject({ projectDir: tempDir }); + + expect(() => + createKtxCliLocalIngestAdapters(project, { + historicSqlConnectionId: 'bq', + sqlAnalysis: sqlAnalysisStub(), + }), + ).toThrow('Query history BigQuery connection requires credentials_json.project_id'); + }); +``` + +- [ ] **Step 3: Run the failing output tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/ingest-viz.test.ts src/local-adapters.test.ts -t "public ingest when status|query-history wording" +``` + +Expected: FAIL because current output still mentions `ktx ingest run` and +`Historic SQL`. + +- [ ] **Step 4: Replace stale status guidance** + +In `packages/cli/src/ingest.ts`, replace: + +```ts + : 'No local ingest reports were found. Run `ktx ingest run --connection-id --adapter ` first.', +``` + +with: + +```ts + : 'No local ingest reports were found. Run `ktx ingest ` first.', +``` + +- [ ] **Step 5: Rename public query-history capability errors** + +In `packages/cli/src/local-adapters.ts`, replace user-facing error strings: + +```ts +`Historic SQL local ingest requires a Postgres connection, got ${String(connection?.driver ?? 'unknown')}` +`Historic SQL local ingest requires a BigQuery connection, got ${String(connection?.driver ?? 'unknown')}` +`Historic SQL local ingest requires a Snowflake connection, got ${String(connection?.driver ?? 'unknown')}` +'Historic SQL BigQuery connection requires credentials_json.project_id' +``` + +with: + +```ts +`Query history ingest requires a Postgres connection, got ${String(connection?.driver ?? 'unknown')}` +`Query history ingest requires a BigQuery connection, got ${String(connection?.driver ?? 'unknown')}` +`Query history ingest requires a Snowflake connection, got ${String(connection?.driver ?? 'unknown')}` +'Query history BigQuery connection requires credentials_json.project_id' +``` + +- [ ] **Step 6: Run the output tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/ingest-viz.test.ts src/local-adapters.test.ts -t "public ingest when status|query-history wording" +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add packages/cli/src/ingest.ts packages/cli/src/ingest-viz.test.ts packages/cli/src/local-adapters.ts packages/cli/src/local-adapters.test.ts +git commit -m "fix(ingest): remove legacy public guidance" +``` + +### Task 5: Fix foreground retry guidance and query-history progress copy + +**Files:** +- Modify: `packages/cli/src/context-build-view.ts` +- Modify: `packages/cli/src/context-build-view.test.ts` +- Modify: `packages/cli/src/public-ingest.ts` +- Modify: `packages/cli/src/public-ingest.test.ts` + +- [ ] **Step 1: Write failing foreground view tests** + +In `packages/cli/src/context-build-view.test.ts`, add this test in the +`runContextBuild` describe block: + +```ts + it('uses direct ingest retry guidance for public ingest failures', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + }); + const executeTarget = vi.fn(async (target) => failedResult(target.connectionId, target.driver, target.operation)); + + await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled', targetConnectionId: 'warehouse', all: false, entrypoint: 'ingest' }, + io.io, + { executeTarget, now: () => 1000 }, + ); + + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project'); + expect(io.stdout()).not.toContain('Retry: ktx setup'); + }); +``` + +Add this progress-copy test in the same describe block: + +```ts + it('renders query-history progress without the historic-sql adapter key', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: true } } }, + }); + const executeTarget = vi.fn(async (target, _args, _targetIo, deps) => { + deps.ingestProgress?.({ percent: 5, message: 'Fetching source files for warehouse/historic-sql' }); + return successResult(target.connectionId, target.driver, target.operation); + }); + + await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled', targetConnectionId: 'warehouse', all: false, entrypoint: 'ingest' }, + io.io, + { executeTarget, now: () => 1000, sourceProgressThrottleMs: 0 }, + ); + + expect(io.stdout()).toContain('Fetching query history for warehouse'); + expect(io.stdout()).not.toContain('historic-sql'); + }); +``` + +In `packages/cli/src/public-ingest.test.ts`, update the test named +`delegates interactive TTY public ingest to the foreground context-build view` +so the `runContextBuild` assertion includes: + +```ts + entrypoint: 'ingest', +``` + +- [ ] **Step 2: Run the failing foreground tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts src/public-ingest.test.ts -t "direct ingest retry|query-history progress|foreground context-build view" +``` + +Expected: FAIL because `ContextBuildArgs` has no entrypoint and progress text +is not sanitized. + +- [ ] **Step 3: Add entrypoint-aware retry commands** + +In `packages/cli/src/context-build-view.ts`, extend `ContextBuildArgs`: + +```ts + entrypoint?: 'setup' | 'ingest'; +``` + +Replace `resumeCommand` with: + +```ts +function retryCommand(input: { + projectDir?: string; + entrypoint?: 'setup' | 'ingest'; + connectionId?: string; + depth?: 'fast' | 'deep'; +}): string { + const projectPart = input.projectDir ? ` --project-dir ${input.projectDir}` : ''; + if (input.entrypoint === 'ingest' && input.connectionId) { + const depthPart = input.depth ? ` --${input.depth}` : ''; + return `ktx ingest ${input.connectionId}${projectPart}${depthPart}`; + } + return input.projectDir ? `ktx setup --project-dir ${input.projectDir}` : 'ktx setup'; +} +``` + +Update `failureTextForTarget` to accept `entrypoint` and pass the target depth: + +```ts + entrypoint?: 'setup' | 'ingest'; +``` + +Replace the network retry line: + +```ts + `Retry: ${resumeCommand(input.projectDir)}`, +``` + +with: + +```ts + `Retry: ${retryCommand({ + projectDir: input.projectDir, + entrypoint: input.entrypoint, + connectionId: input.target.connectionId, + depth: input.target.databaseDepth, + })}`, +``` + +For non-network failures, append retry text when `entrypoint === 'ingest'`: + +```ts + const fallback = input.fallback ?? `${input.target.connectionId} failed.`; + if (input.entrypoint === 'ingest') { + return `${fallback} Retry: ${retryCommand({ + projectDir: input.projectDir, + entrypoint: input.entrypoint, + connectionId: input.target.connectionId, + depth: input.target.databaseDepth, + })}`; + } + return fallback; +``` + +Pass `entrypoint: args.entrypoint` where `failureTextForTarget` is called. + +- [ ] **Step 4: Sanitize public query-history progress text** + +In `packages/cli/src/context-build-view.ts`, add: + +```ts +function publicProgressMessage(message: string, target: KtxPublicIngestPlanTarget): string { + if (target.steps.includes('query-history')) { + return message + .replace(`${target.connectionId}/historic-sql`, `${target.connectionId} query history`) + .replace(/\bhistoric-sql\b/g, 'query history') + .replace(/\bhistoric SQL\b/gi, 'query history'); + } + return message; +} +``` + +Change `formatProgressDetail` to accept the target: + +```ts +function formatProgressDetail( + update: Pick, + target: KtxPublicIngestPlanTarget, +): string { + const percent = Math.max(0, Math.min(100, Math.round(update.percent))); + return `[${percent}%] ${publicProgressMessage(update.message, target)}`; +} +``` + +Update the `updateTargetProgress` call site: + +```ts + targetState.detailLine = formatProgressDetail(update, targetState.target); +``` + +Update the capture progress callback: + +```ts + targetState.detailLine = publicProgressMessage(message, targetState.target); +``` + +- [ ] **Step 5: Pass foreground entrypoint from public ingest** + +In `packages/cli/src/public-ingest.ts`, add this field to the +`contextBuild(...)` args object: + +```ts + entrypoint: 'ingest', +``` + +- [ ] **Step 6: Run the foreground tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/context-build-view.test.ts src/public-ingest.test.ts -t "direct ingest retry|query-history progress|foreground context-build view" +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add packages/cli/src/context-build-view.ts packages/cli/src/context-build-view.test.ts packages/cli/src/public-ingest.ts packages/cli/src/public-ingest.test.ts +git commit -m "fix(ingest): polish foreground retry copy" +``` + +### Task 6: Update public examples and checked-in example configs + +**Files:** +- Modify: `examples/postgres-historic/README.md` +- Modify: `examples/postgres-historic/scripts/smoke.sh` +- Modify: `examples/README.md` +- Modify: `examples/local-warehouse/ktx.yaml` +- Modify: `examples/orbit-relationship-verification/ktx.yaml` +- Modify: `scripts/examples-docs.test.mjs` + +- [ ] **Step 1: Write failing examples-docs assertions** + +In `scripts/examples-docs.test.mjs`, replace the historic-SQL assertions with: + +```js + assert.doesNotMatch(examples, /Historic SQL/); + assert.doesNotMatch(examples, /historic-SQL/); + assert.match(examples, /query-history ingest via `pg_stat_statements`/); + assert.doesNotMatch(readme, /--enable-historic-sql/); + assert.doesNotMatch(readme, /--historic-sql-min-executions/); + assert.doesNotMatch(readme, /ktx ingest run --project-dir/); + assert.doesNotMatch(readme, /--adapter historic-sql/); + assert.match(readme, /--enable-query-history/); + assert.match(readme, /--query-history-min-executions 2/); + assert.match(readme, /Postgres query history/); +``` + +Add assertions for checked-in example configs: + +```js + test('checked-in example configs do not include public live-database adapters', async () => { + const localWarehouseConfig = await readFile('examples/local-warehouse/ktx.yaml', 'utf8'); + const orbitConfig = await readFile('examples/orbit-relationship-verification/ktx.yaml', 'utf8'); + + assert.doesNotMatch(localWarehouseConfig, /live-database/); + assert.doesNotMatch(orbitConfig, /live-database/); + }); +``` + +- [ ] **Step 2: Run the failing examples-docs test** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: FAIL because examples still document old flags and configs still +contain `live-database`. + +- [ ] **Step 3: Update Postgres query-history example docs** + +In `examples/postgres-historic/README.md`, replace the title: + +```md +# Postgres Historic SQL Example +``` + +with: + +```md +# Postgres Query History Example +``` + +Replace the opening paragraph: + +```md +This example is a manual smoke for the redesigned Postgres historic-SQL ingest +path through `pg_stat_statements`. It starts Postgres 14 with the extension +preloaded, generates query workload under separate users, runs `ktx setup` with +`--enable-historic-sql`, and verifies the unified staged artifacts: +``` + +with: + +```md +This example is a manual smoke for Postgres query-history ingest through +`pg_stat_statements`. It starts Postgres 14 with the extension preloaded, +generates query workload under separate users, runs `ktx setup` with +`--enable-query-history`, and verifies the staged query-history artifacts: +``` + +Replace setup flags: + +```bash + --enable-historic-sql \ + --historic-sql-min-executions 2 \ +``` + +with: + +```bash + --enable-query-history \ + --query-history-min-executions 2 \ +``` + +Replace the manual ingest command: + +```bash +pnpm run ktx -- ingest run --project-dir /tmp/ktx-postgres-historic \ + --connection-id warehouse \ + --adapter historic-sql \ + --plain \ + --yes \ + --no-input +``` + +with: + +```bash +pnpm run ktx -- ingest warehouse --project-dir /tmp/ktx-postgres-historic \ + --query-history \ + --no-input +``` + +Apply these exact prose replacements in `examples/postgres-historic/README.md`: + +```md +Postgres Historic SQL Example +``` + +becomes: + +```md +Postgres Query History Example +``` + +```md +The smoke validates the historic-SQL raw snapshot path without requiring LLM +credentials. It uses KTX's local stage-only ingest API after `ktx setup`, so the +deterministic reader, batch SQL parser, stable artifact writer, and diff-based +WorkUnit planning are checked independently from curation. +``` + +becomes: + +```md +The smoke validates the query-history raw snapshot path without requiring LLM +credentials. It uses KTX's local stage-only ingest API after `ktx setup`, so the +deterministic reader, batch SQL parser, stable artifact writer, and diff-based +WorkUnit planning are checked independently from curation. +``` + +```md +Create a project and enable historic SQL: +``` + +becomes: + +```md +Create a project and enable query history: +``` + +```md +Expected output includes `PASS Postgres Historic SQL (warehouse)` when +`pg_stat_statements` is installed, `pg_read_all_stats` is granted, and tracking +is enabled. +``` + +becomes: + +```md +Expected output includes `PASS Postgres query history (warehouse)` when +`pg_stat_statements` is installed, `pg_read_all_stats` is granted, and tracking +is enabled. +``` + +```md +Run local historic-SQL ingest: +``` + +becomes: + +```md +Run query-history ingest: +``` + +```md +The full `ingest run` path also runs curation WorkUnits, so it requires a +configured LLM provider. +``` + +becomes: + +```md +The public query-history ingest path also runs curation WorkUnits, so it +requires a configured LLM provider. +``` + +Keep literal `source: "historic-sql"`, raw +`raw-sources/.../historic-sql` paths, and WorkUnit key examples only in the +artifact inspection section where they describe internal artifacts. + +Replace the troubleshooting bullet: + +```md +- Empty snapshot: rerun `scripts/generate-workload.sh base` and keep + `--historic-sql-min-executions 2` for the smoke. +``` + +with: + +```md +- Empty snapshot: rerun `scripts/generate-workload.sh base` and keep + `--query-history-min-executions 2` for the smoke. +``` + +- [ ] **Step 4: Update the smoke setup flags** + +In `examples/postgres-historic/scripts/smoke.sh`, replace: + +```bash + --enable-historic-sql \ + --historic-sql-min-executions 2 \ +``` + +with: + +```bash + --enable-query-history \ + --query-history-min-executions 2 \ +``` + +- [ ] **Step 5: Update example index wording** + +In `examples/README.md`, replace: + +```md +`postgres-historic/` is a manual Docker-backed smoke for Postgres historic-SQL +ingest via `pg_stat_statements`. It verifies setup, unified Historic SQL artifacts, +managed daemon batch SQL analysis, bounded pattern WorkUnit shards, and +no-WorkUnit idempotency for unchanged bucketed table inputs and pattern shards. +``` + +with: + +```md +`postgres-historic/` is a manual Docker-backed smoke for Postgres query-history +ingest via `pg_stat_statements`. It verifies setup, staged query-history +artifacts, managed daemon batch SQL analysis, bounded pattern WorkUnit shards, +and no-WorkUnit idempotency for unchanged bucketed table inputs and pattern +shards. +``` + +- [ ] **Step 6: Remove live-database from example configs** + +In `examples/local-warehouse/ktx.yaml`, replace: + +```yaml +ingest: + adapters: + - fake + - live-database +``` + +with: + +```yaml +ingest: + adapters: + - fake +``` + +In `examples/orbit-relationship-verification/ktx.yaml`, replace: + +```yaml +ingest: + adapters: + - live-database +``` + +with: + +```yaml +ingest: + adapters: [] +``` + +- [ ] **Step 7: Run examples-docs tests** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 8: Commit** + +```bash +git add examples/postgres-historic/README.md examples/postgres-historic/scripts/smoke.sh examples/README.md examples/local-warehouse/ktx.yaml examples/orbit-relationship-verification/ktx.yaml scripts/examples-docs.test.mjs +git commit -m "docs(examples): use unified query history wording" +``` + +### Task 7: Final verification + +**Files:** +- Verify: `packages/cli/src/setup.ts` +- Verify: `packages/cli/src/setup-databases.ts` +- Verify: `packages/cli/src/historic-sql-doctor.ts` +- Verify: `packages/cli/src/ingest.ts` +- Verify: `packages/cli/src/local-adapters.ts` +- Verify: `packages/cli/src/context-build-view.ts` +- Verify: `packages/cli/src/public-ingest.ts` +- Verify: `examples/` +- Verify: `scripts/examples-docs.test.mjs` + +- [ ] **Step 1: Run focused CLI tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run \ + src/setup.test.ts \ + src/setup-databases.test.ts \ + src/historic-sql-doctor.test.ts \ + src/doctor.test.ts \ + src/ingest-viz.test.ts \ + src/local-adapters.test.ts \ + src/context-build-view.test.ts \ + src/public-ingest.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run CLI type-check** + +Run: + +```bash +pnpm --filter @ktx/cli run type-check +``` + +Expected: PASS. + +- [ ] **Step 3: Run examples docs test** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 4: Run dead-code check for TypeScript changes** + +Run: + +```bash +pnpm run dead-code +``` + +Expected: PASS, or only known unrelated Knip findings. Investigate and fix +new findings introduced by this plan. + +- [ ] **Step 5: Check remaining public old-surface references** + +Run: + +```bash +rg -n "ktx ingest run --connection-id|--enable-historic-sql|--historic-sql|Postgres Historic SQL|Historic SQL local ingest|live-database" README.md examples packages/cli/src scripts/examples-docs.test.mjs +``` + +Expected: no matches in public docs, setup/status/ingest public output, or +example configs. Matches in hidden-command tests, internal adapter tests, +debug-only scripts, and low-level scan tests are acceptable only when the file +is explicitly exercising internal behavior. + +- [ ] **Step 6: Commit verification-only fixes if needed** + +If Step 4 or Step 5 required edits, commit them: + +```bash +git add +git commit -m "chore(ingest): finish public query history cleanup" +``` + +Expected: no commit is needed when all checks pass without further edits. + +## Self-review + +- Spec coverage: this plan covers the remaining public setup query-history + path, canonical status readiness, stale command guidance, public foreground + retry/progress copy, public examples, and generated/example config cleanup. +- Placeholder scan: no task uses placeholder implementation language. +- Type consistency: all new public fields use `queryHistory*`; internal file + names and adapter keys can remain `historic-sql` where they are not normal + public UX. diff --git a/docs/superpowers/plans/2026-05-13-unified-ingest-v1-verification-copy-closure.md b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-verification-copy-closure.md new file mode 100644 index 00000000..ce3e7eba --- /dev/null +++ b/docs/superpowers/plans/2026-05-13-unified-ingest-v1-verification-copy-closure.md @@ -0,0 +1,326 @@ +# Unified Ingest V1 Verification Copy Closure Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Close the remaining v1-blocking verification and setup-copy gaps in the unified `ktx ingest` UX. + +**Architecture:** Keep the implemented connection-centric ingest planner unchanged. Fix the test-only TypeScript error that currently blocks `@ktx/cli` type-check, then replace the remaining normal setup help/output references to old "primary source" terminology with database-oriented copy. + +**Tech Stack:** TypeScript ESM, Commander, Vitest, pnpm workspace scripts, uv pre-commit. + +--- + +## Current Audit + +Implemented unified-ingest plans already cover the original spec's main v1 behavior: + +- `ktx ingest [connectionId]`, `ktx ingest --all`, `--fast`, `--deep`, `--query-history`, `--no-query-history`, and `--query-history-window-days` route through `packages/cli/src/public-ingest.ts`. +- Database targets are ordered before source targets, public source ingest bypasses `ingest.adapters`, and database depth maps to structural/enriched scan internals. +- Deep readiness is evaluated per target before target work starts, and `--all` isolates eligible targets from independent failures. +- Setup stores `connections..context.depth` and `connections..context.queryHistory`, migrates legacy `historicSql`, and uses foreground-only context-build state. +- Normal `ktx` and `ktx ingest` help hide `ktx scan`, `ktx ingest run`, and live `ktx ingest watch`. +- Foreground progress and normal public output sanitize scan/live-database/historic-sql internals. + +### V1-Blocking Gaps + +- `pnpm --filter @ktx/cli run type-check` fails: + +```text +src/setup-databases.test.ts(1078,39): error TS2339: Property 'mock' does not exist on type '(options: { message: string; options: KtxSetupPromptOption[]; required?: boolean | undefined; initialValues?: string[] | undefined; }) => Promise'. +``` + +- Normal setup help/output still exposes the old database category as "primary source": + - `packages/cli/src/commands/setup-commands.ts` documents `--skip-databases` as `KTX cannot work until a primary source is added`. + - `packages/cli/src/setup-sources.ts` prints `Connect a primary source before adding context sources.` + - `packages/cli/src/setup-context.ts` prints `No primary or context sources are configured for a KTX context build.` + +### Non-Blocking Gaps + +- Hidden debug commands remain callable: `ktx scan`, `ktx ingest run`, and `ktx ingest watch`. +- Internal adapter keys, artifact paths, WorkUnit keys, package names, tests, and developer-only scripts can continue to use `scan`, `live-database`, `historic-sql`, and internal `primarySource*` identifiers. +- Public docs still have a `Primary Sources` integration page and a quickstart sentence about BI metadata mapping to primary source connections. That is broader documentation information architecture cleanup, not a v1 blocker for the normal command/help/output behavior in this spec. + +## File Structure + +- Modify `packages/cli/src/setup-databases.test.ts`: use Vitest's typed mock helper for the existing `prompts.multiselect` assertion. +- Modify `packages/cli/src/setup-sources.ts`: change the normal missing-database message before context source setup. +- Modify `packages/cli/src/setup-sources.test.ts`: update the missing-database regression. +- Modify `packages/cli/src/setup-context.ts`: change the normal no-target context-build error. +- Modify `packages/cli/src/setup-context.test.ts`: update the no-target context-build regression. +- Modify `packages/cli/src/commands/setup-commands.ts`: change the public `--skip-databases` help copy. +- Modify `packages/cli/src/index.test.ts`: assert setup help no longer contains public "primary source" wording. + +## Tasks + +### Task 1: Repair Setup Database Test Type-Check + +**Files:** +- Modify: `packages/cli/src/setup-databases.test.ts` + +- [ ] **Step 1: Replace the untyped mock access** + +In `packages/cli/src/setup-databases.test.ts`, in the test named `prompts for discovered Postgres schemas before the first scan`, replace: + +```ts + expect(String(prompts.multiselect.mock.calls[0]?.[0].message)).not.toContain('to scan'); +``` + +with: + +```ts + expect(String(vi.mocked(prompts.multiselect).mock.calls[0]?.[0].message)).not.toContain('to scan'); +``` + +- [ ] **Step 2: Run the setup database type-check regression** + +Run: + +```bash +pnpm --filter @ktx/cli run type-check +``` + +Expected before the fix: FAIL with `TS2339: Property 'mock' does not exist`. + +Expected after the fix: PASS. + +- [ ] **Step 3: Commit the type-check repair** + +Run: + +```bash +git add packages/cli/src/setup-databases.test.ts +git commit -m "test(cli): fix setup database test type-check" +``` + +### Task 2: Replace Remaining Normal Setup Primary-Source Copy + +**Files:** +- Modify: `packages/cli/src/setup-sources.ts` +- Modify: `packages/cli/src/setup-sources.test.ts` +- Modify: `packages/cli/src/setup-context.ts` +- Modify: `packages/cli/src/setup-context.test.ts` +- Modify: `packages/cli/src/commands/setup-commands.ts` +- Modify: `packages/cli/src/index.test.ts` + +- [ ] **Step 1: Update setup source missing-database expectations** + +In `packages/cli/src/setup-sources.test.ts`, replace the test name and output expectation: + +```ts + it('does not offer context sources until a primary source exists', async () => { +``` + +with: + +```ts + it('does not offer context sources until a database exists', async () => { +``` + +and replace: + +```ts + expect(io.stdout()).toContain('Connect a primary source before adding context sources.'); +``` + +with: + +```ts + expect(io.stdout()).toContain('Connect a database before adding context sources.'); +``` + +- [ ] **Step 2: Update setup context no-target expectations** + +In `packages/cli/src/setup-context.test.ts`, replace: + +```ts + expect(io.stderr()).toContain('No primary or context sources are configured for a KTX context build.'); +``` + +with: + +```ts + expect(io.stderr()).toContain('No databases or context sources are configured for a KTX context build.'); +``` + +- [ ] **Step 3: Add setup help regression coverage** + +In `packages/cli/src/index.test.ts`, in the test named `documents setup as a bare command without subcommands`, add these assertions after the existing query-history flag assertions and before the historic-SQL assertions: + +```ts + expect(testIo.stdout()).toContain('KTX cannot work until a database is added'); + expect(testIo.stdout()).not.toContain('primary source'); + expect(testIo.stdout()).not.toContain('primary sources'); +``` + +- [ ] **Step 4: Run the failing setup-copy tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-sources.test.ts src/setup-context.test.ts src/index.test.ts -t "context sources until a database exists|No databases or context sources|documents setup as a bare command" +``` + +Expected: FAIL because implementation still prints `primary source` in setup source/context output and setup help. + +- [ ] **Step 5: Update setup source output** + +In `packages/cli/src/setup-sources.ts`, replace: + +```ts + const message = 'Connect a primary source before adding context sources.'; +``` + +with: + +```ts + const message = 'Connect a database before adding context sources.'; +``` + +- [ ] **Step 6: Update setup context output** + +In `packages/cli/src/setup-context.ts`, replace: + +```ts + io.stderr.write('No primary or context sources are configured for a KTX context build.\n'); +``` + +with: + +```ts + io.stderr.write('No databases or context sources are configured for a KTX context build.\n'); +``` + +- [ ] **Step 7: Update public setup help output** + +In `packages/cli/src/commands/setup-commands.ts`, replace: + +```ts + .option('--skip-databases', 'Leave database setup incomplete; KTX cannot work until a primary source is added', false) +``` + +with: + +```ts + .option('--skip-databases', 'Leave database setup incomplete; KTX cannot work until a database is added', false) +``` + +- [ ] **Step 8: Run the setup-copy tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/setup-sources.test.ts src/setup-context.test.ts src/index.test.ts -t "context sources until a database exists|No databases or context sources|documents setup as a bare command" +``` + +Expected: PASS. + +- [ ] **Step 9: Commit the setup-copy repair** + +Run: + +```bash +git add packages/cli/src/setup-sources.ts packages/cli/src/setup-sources.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/commands/setup-commands.ts packages/cli/src/index.test.ts +git commit -m "fix(cli): remove primary-source wording from setup output" +``` + +### Task 3: Final V1 Verification + +**Files:** +- Verify: `packages/cli/src/setup-databases.test.ts` +- Verify: `packages/cli/src/setup-sources.ts` +- Verify: `packages/cli/src/setup-sources.test.ts` +- Verify: `packages/cli/src/setup-context.ts` +- Verify: `packages/cli/src/setup-context.test.ts` +- Verify: `packages/cli/src/commands/setup-commands.ts` +- Verify: `packages/cli/src/index.test.ts` + +- [ ] **Step 1: Run focused unified ingest tests** + +Run: + +```bash +pnpm --filter @ktx/cli exec vitest run src/public-ingest.test.ts src/context-build-view.test.ts src/setup-ready-menu.test.ts src/setup.test.ts src/setup-context.test.ts src/setup-databases.test.ts src/setup-sources.test.ts src/index.test.ts src/command-tree.test.ts +``` + +Expected: PASS. + +- [ ] **Step 2: Run docs regression tests** + +Run: + +```bash +node --test scripts/examples-docs.test.mjs +``` + +Expected: PASS. + +- [ ] **Step 3: Run CLI type-check** + +Run: + +```bash +pnpm --filter @ktx/cli run type-check +``` + +Expected: PASS. + +- [ ] **Step 4: Check the normal setup public-copy surface** + +Run: + +```bash +rg -n "primary source|primary sources|Primary Sources|primary-source" \ + packages/cli/src/commands/setup-commands.ts \ + packages/cli/src/setup-sources.ts \ + packages/cli/src/setup-context.ts \ + packages/cli/src/index.test.ts \ + packages/cli/src/setup-sources.test.ts \ + packages/cli/src/setup-context.test.ts +``` + +Expected: no matches. + +- [ ] **Step 5: Check the unified ingest public command surface** + +Run: + +```bash +node packages/cli/dist/bin.js ingest --help +node packages/cli/dist/bin.js --help +``` + +Expected: normal help lists `ktx ingest [connectionId]`, `--all`, `--fast`, `--deep`, `--query-history`, `status`, and `replay`; it does not list `ktx scan`, `ktx ingest run`, or `ktx ingest watch`. + +- [ ] **Step 6: Run pre-commit on changed files** + +Run: + +```bash +uv run pre-commit run --files \ + packages/cli/src/setup-databases.test.ts \ + packages/cli/src/setup-sources.ts \ + packages/cli/src/setup-sources.test.ts \ + packages/cli/src/setup-context.ts \ + packages/cli/src/setup-context.test.ts \ + packages/cli/src/commands/setup-commands.ts \ + packages/cli/src/index.test.ts +``` + +Expected: PASS. If pre-commit cannot run because the local hook environment or pinned tool version is unavailable, record the exact failure and keep the focused Vitest, docs, and type-check results from Steps 1-3. + +- [ ] **Step 7: Commit verification formatting if needed** + +If Step 6 changes files, run: + +```bash +git add packages/cli/src/setup-databases.test.ts packages/cli/src/setup-sources.ts packages/cli/src/setup-sources.test.ts packages/cli/src/setup-context.ts packages/cli/src/setup-context.test.ts packages/cli/src/commands/setup-commands.ts packages/cli/src/index.test.ts +git commit -m "test(cli): verify unified ingest setup closure" +``` + +If Step 6 makes no changes, do not create an empty commit. + +## Self-Review + +- Spec coverage: This plan covers the remaining v1-blocking issues found in the audit: package type-check is currently red, and normal setup help/output still exposes the old public database category as `primary source` instead of database-oriented copy. Core ingest routing, depth behavior, query-history behavior, foreground-only state, warning aggregation, public command help, and scan/live-database/historic-sql output sanitization are already implemented by prior plans. +- Placeholder scan: The plan contains concrete file paths, exact replacement snippets, exact commands, and expected outcomes. +- Type consistency: The only test typing change uses the existing Vitest pattern already used elsewhere in `packages/cli/src/setup-databases.test.ts`: `vi.mocked(prompts.multiselect).mock.calls`. diff --git a/docs/superpowers/specs/2026-05-13-unified-ingest-ux-design.md b/docs/superpowers/specs/2026-05-13-unified-ingest-ux-design.md new file mode 100644 index 00000000..3697f47b --- /dev/null +++ b/docs/superpowers/specs/2026-05-13-unified-ingest-ux-design.md @@ -0,0 +1,593 @@ +# Unified Ingest UX Design + +**Date:** 2026-05-13 +**Author:** Andrey Avtomonov +**Status:** Design — pending implementation plan + +## Background + +KTX currently exposes multiple user-facing ideas for one product action: +building context from configured connections. Database connections use +`ktx scan `, source connections use +`ktx ingest run --connection-id --adapter `, and setup uses a +context-build wrapper that plans database scans before source ingestion. + +The implementation already points toward one concept. `ktx scan` runs a +stage-only ingest with the `live-database` adapter, then writes scan-specific +reports, schema manifests, and enrichment artifacts. `ktx setup` already +builds context from all configured connections by routing database connections +to scan internals and source connections to source-ingest internals. + +The user-facing model must become simpler: + +- Setup configures KTX. +- Ingest builds or refreshes context. +- Status explains readiness. + +`scan`, `live-database`, and adapter selection are implementation details. + +## Goals + +The redesign makes `ktx ingest` the single public context-building command and +keeps the foreground experience rich, clear, and robust. + +- Remove `ktx scan` as a normal external verb. +- Remove `live-database` from user-facing CLI help, output, docs, and + `ktx.yaml`. +- Treat database schema ingest as mandatory baseline behavior for database + connections. +- Keep slow AI-heavy database behavior explicit with `--deep`; keep fast, + deterministic behavior explicit with `--fast`. +- Fold query-history ingestion into database connection ingest as an optional + facet. +- Keep `ktx setup` guided. It stores defaults in `ktx.yaml` and uses the same + foreground context-build engine as `ktx ingest`. +- Remove detach, attach, watch, resume, stop, and background context-build + flows. +- Preserve a polished foreground progress view for TTY users and scriptable + output for non-TTY and JSON users. + +## Non-goals + +This spec does not redesign the semantic-layer YAML format, the ingest bundle +agent loop, or warehouse verification tools. + +- Do not remove the internal scan implementation if it remains the cleanest + module boundary. +- Do not remove internal adapter/source keys in one large rename. User-facing + terminology changes first; internal cleanup can follow where it reduces + complexity. +- Do not make query-history ingestion mandatory. +- Do not make AI enrichment mandatory for database connections. +- Do not add `--fast` or `--deep` to top-level `ktx setup`. +- Do not preserve compatibility shims for old public `scan` or + `ingest run --adapter live-database` usage unless an implementation plan + explicitly chooses a short deprecation window. + +## Public command model + +`ktx ingest` becomes the direct command for building context from one +connection or all configured connections. + +```bash +ktx ingest warehouse +ktx ingest warehouse --fast +ktx ingest warehouse --deep +ktx ingest warehouse --deep --query-history +ktx ingest warehouse --no-query-history +ktx ingest notion +ktx ingest --all +ktx ingest --all --deep +``` + +The command dispatches by connection driver: + +- Database drivers run database ingest. +- Source drivers run source ingest. +- `--all` runs database ingest targets first, then source ingest targets. + +The old `ktx ingest run --connection-id --adapter ` command is +removed from the public interface. Normal users configure and ingest +connections, not adapters. + +`ktx scan` is no longer a documented public command. Database schema scanning +continues as an internal phase of database ingest. + +Stored report inspection is separate from live context-build control. The +public `ktx ingest` namespace has no subcommands, so `run`, `status`, `watch`, +and `replay` are ordinary connection IDs: + +```bash +ktx ingest run +ktx ingest status +ktx ingest watch +ktx ingest replay +``` + +No setup or config validation rejects those names. Old adapter-backed command +shapes such as `ktx ingest run --connection-id warehouse --adapter +live-database` fail through normal option parsing because `--connection-id` and +`--adapter` are not public `ktx ingest` options. + +## Database ingest depth + +Database ingest always includes a schema baseline. The depth controls how much +extra work KTX may perform. + +Depth is the public abstraction over the current scan engine: + +- `fast` maps to `KtxScanMode: structural` with `detectRelationships: false`. +- `deep` maps to `KtxScanMode: enriched` and requests relationship detection. +- The internal `relationships` scan mode remains an advanced implementation + detail. It is not a separate public depth in this v1. + +Deep mode includes relationship discovery when the project's +`scan.relationships.enabled` setting is true. Relationship validation thresholds +and budgets remain governed by the existing internal `scan.relationships` +configuration; users do not get a separate public relationship flag in this +surface. If `scan.relationships.enabled` is false, `--deep` still runs enriched +database ingest but relationship discovery remains disabled. + +### Fast + +`--fast` means KTX builds deterministic schema context quickly. + +- No LLM calls. +- No embeddings. +- No AI-generated descriptions. +- No expensive relationship discovery that depends on sampling, read-only SQL, + or model calls. +- Introspect tables, columns, native types, comments, declared primary keys, + and declared foreign keys when the connector can read them. +- Write or update database schema context that agents can use as grounding. +- Do not run query-history synthesis, because the current query-history path + uses ingest work units and model-backed synthesis. + +This is the safe default for new database connections, CI, smoke tests, and +large unknown warehouses. + +### Deep + +`--deep` means KTX builds richer database context through the enriched scan path +and uses slower capabilities. + +- Requires LLM, embedding, and scan-enrichment readiness before work starts. +- Generates table and column descriptions. +- Generates embeddings. +- May sample or query data through read-only connector capabilities. +- Discovers and validates relationships when relationship discovery is enabled. +- May process query history into usage patterns when query history is enabled. + +Deep mode is the best agent-readiness mode, but it can take longer and can +require model, embedding, and database permissions. + +KTX must not silently downgrade an explicit or stored `deep` request to `fast`. +For a single database target, if the project is missing the model, embedding, or +scan-enrichment configuration required for deep ingest, KTX errors before +starting the run and tells the user to run `ktx setup` or rerun with `--fast`. +For `--all`, deep-readiness failures follow the per-target rule in +**Error handling and warnings**. + +### Flag rules + +`--fast` and `--deep` are mutually exclusive. Passing both is an error. + +When neither flag is passed, `ktx ingest` uses the stored connection default. +If no default exists, database connections use `fast`. + +If a depth flag is passed for a non-database source, KTX prints a warning and +continues: + +```text +--deep affects database ingest only; ignoring it for notion. +``` + +For `--all`, KTX aggregates warnings instead of repeating noisy lines: + +```text +--deep ignored for 2 non-database sources. +``` + +## Query history + +Historic SQL becomes the database connection's query-history facet. The term +`historic-sql` remains an internal source key unless a later cleanup renames +it. + +Query history is optional because it can require extra grants and can expose +sensitive SQL text. Setup asks about it only for database drivers that support +it. + +```bash +ktx ingest warehouse --query-history +ktx ingest warehouse --no-query-history +ktx ingest warehouse --query-history-window-days 30 +``` + +Query-history flags apply only to database connections that support the feature. +In v1, supported query-history drivers are `postgres` or `postgresql`, +`bigquery`, and `snowflake`. They map to the existing historic-SQL dialects +`postgres`, `bigquery`, and `snowflake`. `sqlite`, `mysql`, `clickhouse`, and +`sqlserver` are database ingest targets but do not support query history in v1. + +Non-applicable query-history flags produce warnings and continue when the target +can otherwise be ingested. For a single unsupported database target, +`--query-history` or `--query-history-window-days` runs schema ingest, skips the +query-history facet, and prints a warning. For `--all`, KTX aggregates those +warnings and continues other eligible targets. Stored +`connections..context.queryHistory.enabled: true` on an unsupported driver +is a config warning and is skipped for that driver; it must not abort schema +ingest for that target. + +Query history uses schema context as grounding. KTX must run the database +schema facet before query-history processing in the same ingest run. If a user +explicitly enables query history for a run, the output states that schema +ingest runs first. + +Because query-history synthesis is model-backed in the current architecture, +`--query-history` upgrades the effective database depth to deep for that run. +KTX prints a warning when a user combines `--fast` with `--query-history`: + +```text +--query-history requires deep ingest; running warehouse with --deep. +``` + +Stored `connections..context.queryHistory.enabled: true` has the same +depth requirement. When no explicit depth flag is passed, stored query-history +enablement upgrades the effective database depth to `deep` for that run. When a +user explicitly passes `--fast` and does not pass `--query-history`, KTX honors +the explicit fast request, skips stored query-history processing for that run, +does not modify `ktx.yaml`, and prints a warning: + +```text +warehouse has query history enabled in ktx.yaml, but --fast skips query-history processing. +``` + +`--query-history-window-days ` overrides +`connections..context.queryHistory.windowDays` only for the current run. It +must not rewrite `ktx.yaml`. The effective value flows into the same +`historicSqlUnifiedPullConfigSchema.windowDays` field used by the current +historic-SQL pull path. + +## Configuration model + +User-authored `ktx.yaml` becomes connection-centric. Database schema ingest is +implied by the database connection and no longer appears as an ingest adapter. + +```yaml +connections: + warehouse: + driver: postgres + readonly: true + context: + depth: fast + queryHistory: + enabled: false + + notion: + driver: notion + context: + enabled: true +``` + +Deep database defaults and query history use the same connection-local shape: + +```yaml +connections: + warehouse: + driver: postgres + readonly: true + context: + depth: deep + queryHistory: + enabled: true + windowDays: 90 + minExecutions: 5 + filters: + dropTrivialProbes: true + serviceAccounts: + mode: exclude + patterns: + - "^svc_" + redactionPatterns: [] +``` + +`context.queryHistory` is the canonical user-facing shape. Runtime code maps it +to the existing historic-SQL pull config as follows: + +- `dialect` is derived from the database driver (`postgres` or `postgresql`, + `bigquery`, or `snowflake`) and is not normally user-authored. +- `windowDays`, `minExecutions`, and `redactionPatterns` copy through directly. +- `filters.dropTrivialProbes` defaults to `true`. +- `filters.serviceAccounts.patterns` and `filters.serviceAccounts.mode` map to + the existing service-account filter fields. The default mode is `exclude`. +- `concurrency`, `staleArchiveAfterDays`, + `filters.orchestrators.mode`, and `filters.dropFailedBelow` are advanced + query-history fields. When present, they map directly to the same fields in + `historicSqlUnifiedPullConfigSchema`. When absent, KTX uses the existing + historic-SQL schema defaults and omitted-field behavior. + +Existing `connection.historicSql` blocks are legacy cutover input. Setup or the +explicit config rewrite path must migrate them into +`connection.context.queryHistory` while preserving all mapped query-history +fields, including the advanced fields listed above. `ktx ingest` must not +rewrite `ktx.yaml`; it may read legacy `historicSql` blocks for the current run +and emit a cleanup warning. If both `context.queryHistory` and `historicSql` are +present, `context.queryHistory` wins and KTX emits a config-cleanup warning +instead of running both. + +Config migration must be idempotent. A setup or explicit rewrite pass that +migrates a connection removes the legacy `connection.historicSql` block after +copying preserved fields, does not regenerate normal `ingest.adapters` entries, +and produces the same `ktx.yaml` on repeated runs. If `ktx ingest` sees a legacy +block before cleanup, the warning may repeat because ingest is config-read-only. + +`ingest.adapters` is no longer normal user config. Existing `ingest.adapters` +entries load as advanced/internal overrides during the transition, but +public `ktx ingest ` must not fail solely because the +driver-to-adapter mapping chooses an adapter missing from that list. The rule +applies to database internals (`live-database` and `historic-sql`) and to all +source adapters selected from configured drivers, including `notion`, `dbt`, +`metabase`, `looker`, `metricflow`, and `lookml`. + +The implementation can satisfy this by bypassing the adapter allow-list for +connection-centric public ingest, or by synthesizing the adapters required by +configured connections before dispatch. The old adapter-backed advanced command +may continue to honor `ingest.adapters` while it exists. Normal generated +`ktx.yaml` must not include `live-database`, `historic-sql`, or source adapter +entries just to make public `ktx ingest ` work. + +## Setup flow + +`ktx setup` remains a guided configuration flow. It does not expose +`ktx setup --fast` or `ktx setup --deep`. + +During interactive setup, KTX asks for database context depth when a database +connection is configured or when setup reaches the context-build step: + +```text +How much database context should KTX build? + +Fast: schema only, no AI, quickest +Deep: AI descriptions, embeddings, relationships, slower +``` + +The recommended selection depends on readiness: + +- Recommend Fast when model, embedding, or scan-enrichment configuration is + missing. +- Recommend Deep when model, embedding, and scan-enrichment configuration are + ready. + +The recommendation is based on the final configuration produced by the current +setup run, not on an earlier intermediate state. Setup must either ask the depth +question after the model, embedding, and scan-enrichment setup paths complete, +or defer or repeat the depth prompt before the foreground context build starts +when those capabilities are configured later in the same setup run. + +Setup stores the chosen default in `connections..context.depth`. The +foreground context build uses that stored default. Setup can still expose a +non-prominent automation flag later, such as `--context-depth fast`, if +headless setup needs it, but the main product surface is guided. + +Setup readiness is depth-aware: + +- For `fast`, a database context is ready when the latest non-dry-run + structural scan for the connection completed and wrote schema manifest shards. + Model, embedding, description-enrichment, and scan-enrichment checks are + skipped for fast contexts. +- For `deep`, a database context is ready only when the enriched scan completed + table descriptions, column descriptions, embeddings, and schema manifest + shards. When relationship discovery is enabled, readiness requires the + relationship stage to have completed for the latest enriched scan. A + completed relationship stage with zero accepted, review, rejected, or skipped + relationships still counts as ready; readiness must not require non-empty + relationship artifacts or accepted relationships. If relationship discovery is + disabled, the relationship stage is not part of the readiness gate. + +The missing-input gate uses the same rule. Missing model, embedding, or +scan-enrichment configuration must not block a user who selected `fast`. The +same missing inputs must block `deep` before the foreground build starts, with a +message that offers `fast` as the no-AI path. + +## Foreground progress UX + +KTX keeps a rich foreground progress view. It removes detach and background +execution. + +The shared build view groups work by user-facing source type: + +```text +Building KTX context (2/4 · 1m 12s) +─────────────────────────────────── + +Databases + ✓ warehouse 42 tables · 6 changed · relationships found + ⠹ billing reading schema · 18/64 tables + +Context sources + ✓ dbt 18 models · 42 metrics + ○ notion queued + +Warnings + --deep ignored for notion; it only applies to database connections. +``` + +The view must not show `scan` or `live-database` in normal mode. It uses: + +- `Databases` instead of `Primary sources`. +- `Context sources` for docs, BI, metrics, and modeling sources. +- `reading schema` or `building schema context` instead of `scanning`. +- `query history` or `usage patterns` instead of `historic-sql`. + +Non-TTY output remains append-only and scriptable. `--json` returns structured +results. Routine artifact paths and internal adapter names appear only in +`--debug` or JSON output. + +## Removing detach and watch + +The context build is foreground only. + +- `Ctrl+C` stops the current run. +- KTX records interrupted or failed state where useful for status reporting. +- Rerunning `ktx setup` or `ktx ingest` starts a fresh foreground build or + reuses existing completed artifacts when safe. + +Remove these user-facing concepts from context build: + +- detach +- attach +- watch +- resume +- stop +- background context-build subprocesses +- prompts that offer "Watch progress" +- hints such as `d to detach` + +Existing `running` or `detached` state from older versions must be treated as +stale or interrupted with a clear rerun instruction. + +`.ktx/setup/context-build.json` remains only as a foreground status cache, not a +background control plane. New writes may use `not_started`, `running`, +`completed`, `failed`, `interrupted`, or `stale`. `running` means the current +foreground process is active; a later setup process that finds a leftover +`running` record from an older process must mark it `stale` or `interrupted` +before offering a fresh run. `detached` and `paused` are legacy-only statuses +and must be normalized to `stale` or `interrupted` on read or on the next setup +write. + +The state file must not keep user-facing `watch`, `resume`, or `stop` command +affordances after this redesign. It may retain run ids, report ids, artifact +paths, source progress, failure details, and a retry/build command when those +help status reporting. + +## Internal naming and migration + +User-facing surfaces must stop saying `live-database`. + +This includes: + +- CLI help. +- Normal command output. +- Setup prompts. +- Generated `ktx.yaml`. +- README quickstart and examples. +- Friendly errors and warnings. + +Internal paths and source keys can keep `live-database` during the first +implementation if renaming them would add risk. Debug output and JSON may +include internal names when they are necessary for troubleshooting. + +The implementation plan must also update stale command suggestions. For +example, setup source recovery must no longer tell users to run +`ktx ingest run --connection-id ... --adapter `. It must suggest the +new connection-centric command: + +```bash +ktx ingest +``` + +## Error handling and warnings + +Warnings are non-fatal when KTX can still perform the requested ingest. + +- Ignored depth flag on a non-database source: warn and continue. +- Ignored query-history flag on an unsupported database: warn and continue if + schema ingest can run. +- Both `--fast` and `--deep`: error before any work starts. +- Explicit or stored `deep` without required model, embedding, or + scan-enrichment readiness: error before any work starts for that target. +- `--query-history` without required model, embedding, or scan-enrichment + readiness: error before any work starts for that target because query history + upgrades the run to `deep`. +- Query-history requested without required grants: fail that query-history + facet and keep schema results when schema ingest succeeded. +- Database schema ingest failure: fail that database target. + +`--all` isolates target failures. It runs all database targets first, then all +source targets, even when one or more database targets fail. Source targets may +therefore run against previously completed database context if the current +database refresh failed. The final exit code is non-zero when any target or +required facet fails, and the summary identifies partial failures by +connection. + +For `--all`, readiness is evaluated per target after resolving each target's +effective depth and query-history settings. A database target whose effective +run requires deep readiness but lacks model, embedding, or scan-enrichment +configuration fails before work starts for that target; eligible database and +source targets still run. Command-level errors that make target planning +impossible, such as mutually exclusive flags, an unreadable project config, or +no eligible targets, still abort before any target work starts. + +Failure messages focus on the connection and user action: + +```text +warehouse failed: connection refused. +Retry: ktx ingest warehouse --deep +``` + +They do not mention internal adapter names unless debug output is enabled. + +## Acceptance criteria + +The implementation is complete when these conditions hold: + +- `ktx ingest ` works for database and source connections. +- `ktx ingest --all` runs database targets before source targets. +- `ktx ingest ` does not require `ingest.adapters` entries for + any adapter chosen from the configured connection driver. +- Connection ids that collide with surviving `ktx ingest` subcommands are + rejected during setup or config validation. +- `--fast` and `--deep` control database depth and are mutually exclusive. +- `--fast` maps to structural database ingest without relationship detection. +- `--deep` maps to enriched database ingest with relationship detection when + `scan.relationships.enabled` is true. +- `--deep` and `--query-history` fail before work starts when required model, + embedding, or scan-enrichment configuration is missing. +- `ktx ingest --all` continues independent targets after partial failures and + exits non-zero when any target or required facet fails. +- `ktx ingest --all` treats deep-readiness failures as per-target failures + after target planning, rather than aborting eligible independent targets. +- `ktx setup` stores a database context depth without exposing top-level + `--fast` or `--deep`. +- `ktx setup` bases the recommended/default database context depth on the final + model, embedding, and scan-enrichment readiness reached by the setup run. +- `ktx setup` treats fast database context as ready after completed structural + schema ingest and does not require AI descriptions or embeddings for fast. +- Generated `ktx.yaml` does not include `live-database` for normal projects. +- Generated `ktx.yaml` uses `connections..context.queryHistory`, not + `connections..historicSql`, for query-history configuration. +- Normal CLI help and output do not mention `live-database`. +- Normal CLI help and output do not present `scan` as a public verb. +- Normal CLI help and output do not present `ktx ingest watch` as live context + build control. +- Query history is optional, connection-local, and overridable per ingest run. +- Query history is supported only for `postgres` or `postgresql`, `bigquery`, + and `snowflake` in v1; unsupported database drivers warn and skip the + query-history facet without blocking schema ingest. +- Stored query-history enablement upgrades default database ingest to deep, but + explicit `--fast` skips stored query history for that run with a warning. +- `--query-history-window-days` overrides the effective historic-SQL + `windowDays` pull config for the current run only and does not rewrite + `ktx.yaml`. +- Legacy `connection.historicSql` migration is idempotent, preserves all mapped + query-history fields, and is performed by setup or an explicit config rewrite, + not by `ktx ingest`. +- Context build has no detach, attach, watch, resume, stop, or background + execution path. +- `.ktx/setup/context-build.json` is retained only as foreground status cache + state; legacy `detached` or `paused` records do not trigger background + recovery branches. +- Existing setup context progress UX is consolidated with `ktx ingest` rather + than duplicated. +- Non-TTY and JSON output remain suitable for scripts. + +## Open implementation questions + +The implementation plan must decide these lower-level details: + +- Whether old `ktx scan` exits with an error, is hidden, or remains as a + temporary undocumented debug command. +- Whether internal artifact paths keep `raw-sources//live-database` + for the first implementation. +- Whether setup needs a headless `--context-depth fast|deep` flag for CI. diff --git a/examples/README.md b/examples/README.md index 806cb476..e84d9330 100644 --- a/examples/README.md +++ b/examples/README.md @@ -2,19 +2,15 @@ ## local-warehouse -`local-warehouse/` is a runnable standalone KTX project for local CLI smoke -testing. It uses the fake ingest adapter and does not require a database or -external app server. +`local-warehouse/` is a contributor fixture for local CLI smoke tests. It uses +the internal fake ingest adapter so tests can exercise memory-flow behavior +without a live database or external service. -Copy it before running commands: +For normal context building, use the public connection-centric commands: ```bash -pnpm --filter @ktx/cli run build -EXAMPLE_DIR="$(mktemp -d)/local-warehouse" -cp -R examples/local-warehouse "$EXAMPLE_DIR" -node packages/cli/dist/bin.js knowledge list --project-dir "$EXAMPLE_DIR" -node packages/cli/dist/bin.js sl list --project-dir "$EXAMPLE_DIR" --connection-id warehouse -node packages/cli/dist/bin.js ingest run --project-dir "$EXAMPLE_DIR" --connection-id warehouse --adapter fake --source-dir "$EXAMPLE_DIR/source" +ktx ingest +ktx ingest --all ``` The copied project initializes its own Git repository on first use. @@ -29,10 +25,11 @@ warehouse credential. ## postgres-historic -`postgres-historic/` is a manual Docker-backed smoke for Postgres historic-SQL -ingest via `pg_stat_statements`. It verifies setup, unified Historic SQL artifacts, -managed daemon batch SQL analysis, bounded pattern WorkUnit shards, and -no-WorkUnit idempotency for unchanged bucketed table inputs and pattern shards. +`postgres-historic/` is a manual Docker-backed smoke for Postgres +query-history ingest via `pg_stat_statements`. It verifies setup, staged +query-history artifacts, managed daemon batch SQL analysis, bounded pattern +WorkUnit shards, and no-WorkUnit idempotency for unchanged bucketed table +inputs and pattern shards. ## package-artifacts diff --git a/examples/local-warehouse/README.md b/examples/local-warehouse/README.md index 6d6457ab..2b9dbcf9 100644 --- a/examples/local-warehouse/README.md +++ b/examples/local-warehouse/README.md @@ -1,20 +1,16 @@ -# Local Warehouse Example +# local-warehouse fixture -This example is a standalone KTX project that can be copied to a temp directory -and used with the local CLI. It uses the `fake` ingest adapter so it does not -require a database or external app server. +This directory is a contributor fixture for KTX CLI smoke tests. It uses the +internal fake ingest adapter so tests can run without a live database or +external service. -Run the example from the repository root after building the CLI: +Normal users should build context with connection-centric ingest: ```bash -pnpm --filter @ktx/cli run build -EXAMPLE_DIR="$(mktemp -d)/local-warehouse" -cp -R examples/local-warehouse "$EXAMPLE_DIR" -node packages/cli/dist/bin.js knowledge list --project-dir "$EXAMPLE_DIR" -node packages/cli/dist/bin.js sl list --project-dir "$EXAMPLE_DIR" --connection-id warehouse -node packages/cli/dist/bin.js ingest run --project-dir "$EXAMPLE_DIR" --connection-id warehouse --adapter fake --source-dir "$EXAMPLE_DIR/source" +ktx ingest +ktx ingest --all ``` -The copied project creates its own Git repository on first use. Keep commands -pointed at a copy when experimenting so the checked-in example fixture stays -unchanged. +The public ingest workflow is documented in +`docs-site/content/docs/cli-reference/ktx-ingest.mdx` and +`docs-site/content/docs/guides/building-context.mdx`. diff --git a/examples/local-warehouse/ktx.yaml b/examples/local-warehouse/ktx.yaml index 7e814188..a967e31c 100644 --- a/examples/local-warehouse/ktx.yaml +++ b/examples/local-warehouse/ktx.yaml @@ -11,7 +11,6 @@ storage: ingest: adapters: - fake - - live-database agent: run_research: enabled: false diff --git a/examples/orbit-relationship-verification/ktx.yaml b/examples/orbit-relationship-verification/ktx.yaml index 082e0835..bcfad298 100644 --- a/examples/orbit-relationship-verification/ktx.yaml +++ b/examples/orbit-relationship-verification/ktx.yaml @@ -10,8 +10,7 @@ storage: auto_commit: true author: "ktx " ingest: - adapters: - - live-database + adapters: [] scan: enrichment: backend: none diff --git a/examples/postgres-historic/README.md b/examples/postgres-historic/README.md index 40ae1674..30dcfde5 100644 --- a/examples/postgres-historic/README.md +++ b/examples/postgres-historic/README.md @@ -1,9 +1,9 @@ -# Postgres Historic SQL Example +# Postgres Query History Example -This example is a manual smoke for the redesigned Postgres historic-SQL ingest -path through `pg_stat_statements`. It starts Postgres 14 with the extension -preloaded, generates query workload under separate users, runs `ktx setup` with -`--enable-historic-sql`, and verifies the unified staged artifacts: +This example is a manual smoke for Postgres query-history ingest through +`pg_stat_statements`. It starts Postgres 14 with the extension preloaded, +generates query workload under separate users, runs `ktx setup` with +`--enable-query-history`, and verifies the staged query-history artifacts: - `manifest.json` - `tables/*.json` @@ -40,7 +40,7 @@ postgresql://ktx_reader:ktx_reader@127.0.0.1:55432/analytics # pragma: allowlist Set `KTX_POSTGRES_HISTORIC_KEEP_DOCKER=1` to leave the container running after the script exits. -The smoke validates the historic-SQL raw snapshot path without requiring LLM +The smoke validates the query-history raw snapshot path without requiring LLM credentials. It uses KTX's local stage-only ingest API after `ktx setup`, so the deterministic reader, batch SQL parser, stable artifact writer, and diff-based WorkUnit planning are checked independently from curation. @@ -54,7 +54,7 @@ docker compose -f examples/postgres-historic/docker-compose.yml up -d --wait examples/postgres-historic/scripts/generate-workload.sh base ``` -Create a project and enable historic SQL: +Create a project and enable query history: ```bash export WAREHOUSE_DATABASE_URL=postgresql://ktx_reader:ktx_reader@127.0.0.1:55432/analytics # pragma: allowlist secret @@ -69,8 +69,8 @@ node packages/cli/dist/bin.js --project-dir /tmp/ktx-postgres-historic setup \ --new-database-connection-id warehouse \ --database-url env:WAREHOUSE_DATABASE_URL \ --database-schema public \ - --enable-historic-sql \ - --historic-sql-min-executions 2 \ + --enable-query-history \ + --query-history-min-executions 2 \ --yes \ --no-input ``` @@ -87,24 +87,21 @@ The installed CLI form is: ktx status --project-dir /tmp/ktx-postgres-historic --no-input ``` -Expected output includes `PASS Postgres Historic SQL (warehouse)` when +Expected output includes `PASS Postgres query history (warehouse)` when `pg_stat_statements` is installed, `pg_read_all_stats` is granted, and tracking is enabled. A low `pg_stat_statements.max` value is reported as an informational note, not a warning. -Run local historic-SQL ingest: +Run query-history ingest: ```bash -pnpm run ktx -- ingest run --project-dir /tmp/ktx-postgres-historic \ - --connection-id warehouse \ - --adapter historic-sql \ - --plain \ - --yes \ +pnpm run ktx -- ingest warehouse --project-dir /tmp/ktx-postgres-historic \ + --query-history \ --no-input ``` -The full `ingest run` path also runs curation WorkUnits, so it requires a -configured LLM provider. +The public query-history ingest path also runs curation WorkUnits, so it +requires a configured LLM provider. Inspect the latest manifest: @@ -126,7 +123,7 @@ table. database. - Missing grants: confirm `GRANT pg_read_all_stats TO ktx_reader;`. - Empty snapshot: rerun `scripts/generate-workload.sh base` and keep - `--historic-sql-min-executions 2` for the smoke. + `--query-history-min-executions 2` for the smoke. - SQL-analysis failures: run `pnpm run ktx -- dev runtime status` from the KTX repository root and confirm `uv`, the bundled Python wheel, and the managed runtime all pass. diff --git a/examples/postgres-historic/scripts/smoke.sh b/examples/postgres-historic/scripts/smoke.sh index c67a0856..1fe10b70 100755 --- a/examples/postgres-historic/scripts/smoke.sh +++ b/examples/postgres-historic/scripts/smoke.sh @@ -215,8 +215,8 @@ node "$KTX_BIN" --project-dir "$PROJECT_DIR" setup \ --new-database-connection-id warehouse \ --database-url env:WAREHOUSE_DATABASE_URL \ --database-schema public \ - --enable-historic-sql \ - --historic-sql-min-executions 2 \ + --enable-query-history \ + --query-history-min-executions 2 \ --yes \ --no-input diff --git a/packages/cli/src/cli-program.ts b/packages/cli/src/cli-program.ts index c1935495..93f31ae9 100644 --- a/packages/cli/src/cli-program.ts +++ b/packages/cli/src/cli-program.ts @@ -3,7 +3,6 @@ import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js'; import { registerConnectionCommands } from './commands/connection-commands.js'; import { registerIngestCommands } from './commands/ingest-commands.js'; import { registerWikiCommands } from './commands/knowledge-commands.js'; -import { registerScanCommands } from './commands/scan-commands.js'; import { registerSetupCommands } from './commands/setup-commands.js'; import { registerSlCommands } from './commands/sl-commands.js'; import { registerStatusCommands } from './commands/status-commands.js'; @@ -53,7 +52,24 @@ type CommandPathNode = CommandWithGlobalOptions & { parent?: CommandPathNode | null; }; -const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'scan']); +const PROJECT_AWARE_ROOT_COMMANDS = new Set(['setup', 'connection', 'ingest', 'wiki', 'sl', 'status']); +const REMOVED_COMMAND_PATHS = new Set([ + 'scan', + 'wiki read', + 'wiki write', +]); +const GLOBAL_OPTIONS_WITH_VALUE = new Set(['--project-dir']); +const OPTIONS_WITH_VALUE = new Set([ + '--project-dir', + '--query-history-window-days', + '--user-id', + '--limit', + '--format', + '--connection-id', + '--source-name', + '--query-file', + '--max-rows', +]); export interface CommandWithGlobalOptions { opts: () => object; @@ -179,9 +195,6 @@ function shouldSuppressProjectDirLine(path: string[], options: Record= 0) { const demoCommand = path[demoIndex + 1]; @@ -226,10 +239,6 @@ function createBaseProgram(info: KtxCliPackageInfo, io: KtxCliIo): Command { .version(`${info.name} ${info.version}`, '-v, --version', 'Show CLI version') .helpOption('-h, --help', 'Show this help text') .configureHelp({ showGlobalOptions: true }) - .addHelpText( - 'after', - '\nAdvanced:\n ktx dev Low-level project initialization and runtime management.\n', - ) .showHelpAfterError() .exitOverride() .configureOutput({ @@ -259,6 +268,45 @@ function formatCliError(error: unknown): string { return error instanceof Error ? error.message : String(error); } +function commandPathFromArgv(argv: string[]): string[] { + const path: string[] = []; + for (let index = 0; index < argv.length && path.length < 2; index += 1) { + const arg = argv[index]; + if (arg === undefined) { + continue; + } + if (arg === '--') { + break; + } + if ((path.length === 0 ? GLOBAL_OPTIONS_WITH_VALUE : OPTIONS_WITH_VALUE).has(arg)) { + index += 1; + continue; + } + const optionsWithValue = path.length === 0 ? GLOBAL_OPTIONS_WITH_VALUE : OPTIONS_WITH_VALUE; + if ([...optionsWithValue].some((option) => arg.startsWith(`${option}=`))) { + continue; + } + if (path.length === 0 && arg === '--debug') { + continue; + } + if (arg.startsWith('-')) { + continue; + } + path.push(arg); + } + return path; +} + +function removedCommandName(argv: string[]): string | null { + const path = commandPathFromArgv(argv); + if (path.length === 0) { + return null; + } + + const pathKey = path.join(' '); + return REMOVED_COMMAND_PATHS.has(pathKey) ? path.at(-1) ?? null : null; +} + async function runBareInteractiveCommand( program: Command, io: KtxCliIo, @@ -314,14 +362,11 @@ export function buildKtxProgram(options: BuildKtxProgramOptions): Command { registerSetupCommands(program, context); registerConnectionCommands(program, context); registerIngestCommands(program, context, { - runIngestWithProgress: async (ingestArgs, ingestIo, ingestDeps, defaultRunIngest) => - await (ingestDeps.ingest ?? defaultRunIngest)(ingestArgs, ingestIo), runTextIngest: async (textIngestArgs, ingestIo, ingestDeps) => { const { runKtxTextIngest } = await import('./text-ingest.js'); return await (ingestDeps.textIngest ?? runKtxTextIngest)(textIngestArgs, ingestIo); }, }); - registerScanCommands(program, context); registerWikiCommands(program, context); registerSlCommands(program, context); registerStatusCommands(program, context); @@ -375,6 +420,12 @@ export async function runCommanderKtxCli( return 0; } + const removedCommand = removedCommandName(argv); + if (removedCommand) { + io.stderr.write(`error: unknown command '${removedCommand}'\n`); + return 1; + } + try { await profileSpan('commander:parseAsync', () => program.parseAsync(argv, { from: 'user' })); } catch (error) { diff --git a/packages/cli/src/cli-runtime.ts b/packages/cli/src/cli-runtime.ts index 2712558f..a2147904 100644 --- a/packages/cli/src/cli-runtime.ts +++ b/packages/cli/src/cli-runtime.ts @@ -2,10 +2,9 @@ import { createRequire } from 'node:module'; import type { KtxConnectionArgs } from './connection.js'; import type { KtxDoctorArgs } from './doctor.js'; -import type { KtxIngestArgs } from './ingest.js'; import type { KtxKnowledgeArgs } from './knowledge.js'; +import type { KtxPublicIngestArgs } from './public-ingest.js'; import type { KtxRuntimeArgs } from './runtime.js'; -import type { KtxScanArgs } from './scan.js'; import type { KtxSetupArgs } from './setup.js'; import type { KtxSlArgs } from './sl.js'; import { profileMark, profileSpan } from './startup-profile.js'; @@ -30,10 +29,9 @@ export interface KtxCliDeps { setup?: (args: KtxSetupArgs, io: KtxCliIo) => Promise; connection?: (args: KtxConnectionArgs, io: KtxCliIo) => Promise; doctor?: (args: KtxDoctorArgs, io: KtxCliIo) => Promise; - ingest?: (args: KtxIngestArgs, io: KtxCliIo) => Promise; + publicIngest?: (args: KtxPublicIngestArgs, io: KtxCliIo) => Promise; textIngest?: (args: KtxTextIngestArgs, io: KtxCliIo) => Promise; runtime?: (args: KtxRuntimeArgs, io: KtxCliIo) => Promise; - scan?: (args: KtxScanArgs, io: KtxCliIo) => Promise; knowledge?: (args: KtxKnowledgeArgs, io: KtxCliIo) => Promise; sl?: (args: KtxSlArgs, io: KtxCliIo) => Promise; } diff --git a/packages/cli/src/command-schemas.ts b/packages/cli/src/command-schemas.ts index 5caece1f..e1365d86 100644 --- a/packages/cli/src/command-schemas.ts +++ b/packages/cli/src/command-schemas.ts @@ -3,19 +3,6 @@ import { z } from 'zod'; const projectDirSchema = z.string().min(1); const stringArraySchema = z.array(z.string()); -export const wikiWriteCommandSchema = z.object({ - command: z.literal('write'), - projectDir: projectDirSchema, - key: z.string().min(1), - scope: z.enum(['GLOBAL', 'USER']), - userId: z.string().min(1), - summary: z.string().min(1), - content: z.string().min(1), - tags: stringArraySchema, - refs: stringArraySchema, - slRefs: stringArraySchema, -}); - const orderBySchema = z.union([ z.string().min(1), z.object({ diff --git a/packages/cli/src/command-tree.test.ts b/packages/cli/src/command-tree.test.ts index 85fa0e84..181fac77 100644 --- a/packages/cli/src/command-tree.test.ts +++ b/packages/cli/src/command-tree.test.ts @@ -52,6 +52,33 @@ describe('walkCommandTree', () => { expect(walkCommandTree(command).arguments).toEqual(['', '[schemas...]']); }); + + it('walks registered commands without applying hidden-command policy', () => { + const root = new Command('ktx'); + root.command('scan', { hidden: true }).description('Run a standalone connection scan'); + const ingest = root.command('ingest').description('Build or inspect KTX context'); + ingest.command('run', { hidden: true }).description('Run local ingest by adapter'); + ingest.command('watch', { hidden: true }).description('Open a stored visual report'); + ingest.command('status').description('Print status'); + root.command('status').description('Check readiness'); + + const tree = walkCommandTree(root); + + expect(tree.children.map((child) => child.name)).toEqual(['scan', 'ingest', 'status']); + expect(tree.children[0]).toMatchObject({ + name: 'scan', + description: 'Run a standalone connection scan', + children: [], + }); + expect(tree.children[1]).toMatchObject({ + name: 'ingest', + children: [ + { name: 'run', description: 'Run local ingest by adapter', aliases: [], arguments: [], children: [] }, + { name: 'watch', description: 'Open a stored visual report', aliases: [], arguments: [], children: [] }, + { name: 'status', description: 'Print status', aliases: [], arguments: [], children: [] }, + ], + }); + }); }); describe('formatCommandTree', () => { diff --git a/packages/cli/src/commands/ingest-commands.ts b/packages/cli/src/commands/ingest-commands.ts index 952b6aa0..01e262c7 100644 --- a/packages/cli/src/commands/ingest-commands.ts +++ b/packages/cli/src/commands/ingest-commands.ts @@ -1,57 +1,22 @@ -import { resolve } from 'node:path'; import { type Command, Option } from '@commander-js/extra-typings'; -import { collectOption, type KtxCliCommandContext, type OutputModeOptions, resolveCommandProjectDir } from '../cli-program.js'; +import { + collectOption, + type KtxCliCommandContext, + parsePositiveIntegerOption, + resolveCommandProjectDir, +} from '../cli-program.js'; import type { KtxCliDeps, KtxCliIo } from '../index.js'; -import type { KtxIngestArgs, KtxIngestOutputMode } from '../ingest.js'; import { runtimeInstallPolicyFromFlags } from '../managed-python-command.js'; +import type { KtxPublicIngestArgs } from '../public-ingest.js'; import { profileMark } from '../startup-profile.js'; import type { KtxTextIngestArgs } from '../text-ingest.js'; profileMark('module:commands/ingest-commands'); interface IngestCommandOptions { - runIngestWithProgress: ( - args: KtxIngestArgs, - io: KtxCliIo, - deps: KtxCliDeps, - defaultRunIngest: (args: KtxIngestArgs, io: KtxCliIo) => Promise, - ) => Promise; runTextIngest: (args: KtxTextIngestArgs, io: KtxCliIo, deps: KtxCliDeps) => Promise; } -function outputMode(options: OutputModeOptions): KtxIngestOutputMode { - if (options.json === true) { - return 'json'; - } - if (options.viz === true) { - return 'viz'; - } - return 'plain'; -} - -function watchOutputMode(options: OutputModeOptions): KtxIngestOutputMode { - if (options.json === true) { - return 'json'; - } - if (options.plain === true) { - return 'plain'; - } - return 'viz'; -} - -function inputMode(options: OutputModeOptions): Pick { - return options.input === false ? { inputMode: 'disabled' } : {}; -} - -async function runIngestArgs( - context: KtxCliCommandContext, - args: KtxIngestArgs, - options: IngestCommandOptions, -): Promise { - const { runKtxIngest } = await import('../ingest.js'); - context.setExitCode(await options.runIngestWithProgress(args, context.io, context.deps, runKtxIngest)); -} - export function registerIngestCommands( program: Command, context: KtxCliCommandContext, @@ -59,50 +24,45 @@ export function registerIngestCommands( ): void { const ingest = program .command('ingest') - .description('Run or inspect local ingest memory-flow output') + .description('Build or inspect KTX context') + .usage('[options] [connectionId]') + .argument('[connectionId]', 'Configured connection id to ingest') + .option('--all', 'Ingest all configured connections', false) + .addOption(new Option('--fast', 'Use deterministic database schema ingest').conflicts('deep')) + .addOption(new Option('--deep', 'Use AI-enriched database ingest').conflicts('fast')) + .addOption(new Option('--query-history', 'Include database query-history usage patterns').conflicts('noQueryHistory')) + .addOption(new Option('--no-query-history', 'Skip database query-history usage patterns')) + .option('--query-history-window-days ', 'Query-history lookback window for this run', parsePositiveIntegerOption) + .addOption(new Option('--plain', 'Print plain text output').conflicts(['json'])) + .addOption(new Option('--json', 'Print JSON output').conflicts(['plain'])) + .option('--no-input', 'Disable interactive terminal input') .showHelpAfterError(); + ingest.action(async (connectionId: string | undefined, options, command) => { + const { runKtxPublicIngest } = await import('../public-ingest.js'); + const queryHistory = + options.queryHistory === true ? 'enabled' : options.queryHistory === false ? 'disabled' : 'default'; + const args: KtxPublicIngestArgs = { + command: 'run', + projectDir: resolveCommandProjectDir(command), + ...(connectionId ? { targetConnectionId: connectionId } : {}), + all: options.all === true, + json: options.json === true, + inputMode: options.input === false ? 'disabled' : 'auto', + ...(options.fast === true ? { depth: 'fast' as const } : {}), + ...(options.deep === true ? { depth: 'deep' as const } : {}), + queryHistory, + ...(options.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: options.queryHistoryWindowDays } : {}), + cliVersion: context.packageInfo.version, + runtimeInstallPolicy: runtimeInstallPolicyFromFlags(options), + }; + context.setExitCode(await (context.deps.publicIngest ?? runKtxPublicIngest)(args, context.io)); + }); + ingest.hook('preAction', (_thisCommand, actionCommand) => { context.writeDebug?.('ingest', actionCommand); }); - ingest - .command('run') - .description('Run local ingest for one configured connection and source adapter') - .requiredOption('--connection-id ', 'KTX connection id') - .requiredOption('--adapter ', 'Ingest source adapter name') - .option('--source-dir ', 'Directory containing source files') - .option('--database-introspection-url ', 'Daemon URL for live-database introspection') - .option('--debug-llm-request-file ', 'Write sanitized LLM request structure to a JSONL file') - .option('--report-file ', 'Unsupported for ingest run; use ingest status/watch instead') - .addOption(new Option('--plain', 'Print plain text output').conflicts(['json', 'viz'])) - .addOption(new Option('--json', 'Print JSON output').conflicts(['plain', 'viz'])) - .addOption(new Option('--viz', 'Render memory-flow TUI output').conflicts(['plain', 'json'])) - .option('--yes', 'Install the managed Python runtime without prompting when required', false) - .option('--no-input', 'Disable interactive terminal input for visualization') - .action(async (options, command) => { - if (options.reportFile) { - throw new Error('--report-file is only supported for ingest status/watch'); - } - await runIngestArgs( - context, - { - command: 'run', - projectDir: resolveCommandProjectDir(command), - connectionId: options.connectionId, - adapter: options.adapter, - sourceDir: options.sourceDir ? resolve(options.sourceDir) : undefined, - databaseIntrospectionUrl: options.databaseIntrospectionUrl || undefined, - cliVersion: context.packageInfo.version, - runtimeInstallPolicy: runtimeInstallPolicyFromFlags({ yes: options.yes }), - ...(options.debugLlmRequestFile ? { debugLlmRequestFile: resolve(options.debugLlmRequestFile) } : {}), - outputMode: outputMode(options), - ...inputMode(options), - }, - commandOptions, - ); - }); - ingest .command('text') .description('Ingest free-form text artifacts into KTX memory') @@ -113,6 +73,7 @@ export function registerIngestCommands( .option('--json', 'Print JSON output') .option('--fail-fast', 'Stop after the first failed text item', false) .action(async (files: string[], options, command) => { + const parentOptions = command.parent?.opts() as { json?: boolean } | undefined; context.setExitCode( await commandOptions.runTextIngest( { @@ -121,7 +82,7 @@ export function registerIngestCommands( files, ...(options.connectionId ? { connectionId: options.connectionId } : {}), userId: options.userId, - json: options.json === true, + json: options.json === true || parentOptions?.json === true, failFast: options.failFast === true, }, context.io, @@ -129,76 +90,4 @@ export function registerIngestCommands( ), ); }); - - ingest - .command('status') - .description('Print status for the latest or selected stored local ingest run or report file') - .argument('[runId]', 'Local ingest run id, report id, run id, or job id') - .option('--report-file ', 'Bundle ingest report JSON file to render') - .addOption(new Option('--plain', 'Print plain text output').conflicts(['json', 'viz'])) - .addOption(new Option('--json', 'Print JSON output').conflicts(['plain', 'viz'])) - .addOption(new Option('--viz', 'Render memory-flow TUI output').conflicts(['plain', 'json'])) - .option('--no-input', 'Disable interactive terminal input for visualization') - .action(async (runId: string | undefined, options, command) => { - await runIngestArgs( - context, - { - command: 'status', - projectDir: resolveCommandProjectDir(command), - ...(runId ? { runId } : {}), - ...(options.reportFile ? { reportFile: resolve(options.reportFile) } : {}), - outputMode: outputMode(options), - ...inputMode(options), - }, - commandOptions, - ); - }); - - ingest - .command('watch') - .description('Open the latest or selected stored ingest visual report') - .argument('[runId]', 'Local ingest run id, report id, run id, or job id') - .option('--report-file ', 'Bundle ingest report JSON file to render') - .addOption(new Option('--plain', 'Print plain text output').conflicts(['json', 'viz'])) - .addOption(new Option('--json', 'Print JSON output').conflicts(['plain', 'viz'])) - .addOption(new Option('--viz', 'Render memory-flow TUI output').conflicts(['plain', 'json'])) - .option('--no-input', 'Disable interactive terminal input for visualization') - .action(async (runId: string | undefined, options, command) => { - await runIngestArgs( - context, - { - command: 'watch', - projectDir: resolveCommandProjectDir(command), - ...(runId ? { runId } : {}), - ...(options.reportFile ? { reportFile: resolve(options.reportFile) } : {}), - outputMode: watchOutputMode(options), - ...inputMode(options), - }, - commandOptions, - ); - }); - - ingest - .command('replay') - .description('Replay a stored ingest run or bundle report through memory-flow output') - .argument('', 'Local ingest run id, report id, run id, or job id') - .option('--report-file ', 'Bundle ingest report JSON file to render') - .addOption(new Option('--plain', 'Print plain text output').conflicts(['json', 'viz'])) - .addOption(new Option('--json', 'Print JSON output').conflicts(['plain', 'viz'])) - .addOption(new Option('--viz', 'Render memory-flow TUI output').conflicts(['plain', 'json'])) - .option('--no-input', 'Disable interactive terminal input for visualization') - .action(async (runId: string, options, command) => { - await runIngestArgs( - context, - { - command: 'replay', - projectDir: resolveCommandProjectDir(command), - runId, - ...(options.reportFile ? { reportFile: resolve(options.reportFile) } : {}), - outputMode: outputMode(options), - ...inputMode(options), - }, - commandOptions, - ); - }); } diff --git a/packages/cli/src/commands/knowledge-commands.ts b/packages/cli/src/commands/knowledge-commands.ts index f8d716f7..d0c04a32 100644 --- a/packages/cli/src/commands/knowledge-commands.ts +++ b/packages/cli/src/commands/knowledge-commands.ts @@ -1,11 +1,9 @@ -import { type Command, Option } from '@commander-js/extra-typings'; +import type { Command } from '@commander-js/extra-typings'; import { - collectOption, type KtxCliCommandContext, parsePositiveIntegerOption, resolveCommandProjectDir, } from '../cli-program.js'; -import { wikiWriteCommandSchema } from '../command-schemas.js'; import type { KtxKnowledgeArgs } from '../knowledge.js'; import { profileMark } from '../startup-profile.js'; @@ -19,7 +17,7 @@ async function runKnowledgeArgs(context: KtxCliCommandContext, args: KtxKnowledg export function registerWikiCommands(program: Command, context: KtxCliCommandContext): void { const wiki = program .command('wiki') - .description('List, read, search, or write local wiki pages') + .description('List or search local wiki pages') .showHelpAfterError() .addHelpText( 'after', @@ -40,22 +38,6 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon }); }); - wiki - .command('read') - .description('Read one local wiki page') - .argument('', 'Wiki page key') - .option('--json', 'Print JSON output', false) - .option('--user-id ', 'Local user id', 'local') - .action(async (key: string, options: { userId: string; json?: boolean }, command) => { - await runKnowledgeArgs(context, { - command: 'read', - projectDir: resolveCommandProjectDir(command), - key, - userId: options.userId, - json: options.json, - }); - }); - wiki .command('search') .description('Search local wiki pages') @@ -73,31 +55,4 @@ export function registerWikiCommands(program: Command, context: KtxCliCommandCon ...(options.limit !== undefined ? { limit: options.limit } : {}), }); }); - - wiki - .command('write') - .description('Write one local wiki page') - .argument('', 'Wiki page key') - .option('--user-id ', 'Local user id', 'local') - .addOption(new Option('--scope ', 'global or user').choices(['global', 'user']).default('global')) - .requiredOption('--summary ', 'Wiki summary') - .requiredOption('--content ', 'Wiki content') - .option('--tag ', 'Wiki tag; repeatable', collectOption, []) - .option('--ref ', 'Wiki ref; repeatable', collectOption, []) - .option('--sl-ref ', 'Semantic-layer ref; repeatable', collectOption, []) - .action(async (key: string, options, command) => { - const args = wikiWriteCommandSchema.parse({ - command: 'write', - projectDir: resolveCommandProjectDir(command), - key, - scope: options.scope === 'user' ? 'USER' : 'GLOBAL', - userId: options.userId, - summary: options.summary, - content: options.content, - tags: options.tag, - refs: options.ref, - slRefs: options.slRef, - }); - await runKnowledgeArgs(context, args); - }); } diff --git a/packages/cli/src/commands/scan-commands.ts b/packages/cli/src/commands/scan-commands.ts deleted file mode 100644 index 2c19bcdf..00000000 --- a/packages/cli/src/commands/scan-commands.ts +++ /dev/null @@ -1,76 +0,0 @@ -import { type Command, InvalidArgumentError } from '@commander-js/extra-typings'; -import { type KtxCliCommandContext, resolveCommandProjectDir } from '../cli-program.js'; -import { runtimeInstallPolicyFromFlags } from '../managed-python-command.js'; -import type { KtxScanArgs } from '../scan.js'; -import { profileMark } from '../startup-profile.js'; - -profileMark('module:commands/scan-commands'); - -async function runScanArgs(context: KtxCliCommandContext, args: KtxScanArgs): Promise { - const runner = context.deps.scan ?? (await import('../scan.js')).runKtxScan; - context.setExitCode(await runner(args, context.io)); -} - -type KtxScanModeOption = Extract['mode']; - -const REMOVED_SCAN_SUBCOMMAND_NAMES = new Set([ - 'status', - 'report', - 'relationships', - 'relationship-apply', - 'relationship-feedback', - 'relationship-calibration', - 'relationship-thresholds', -]); - -function parseScanModeOption(value: string): KtxScanModeOption { - if (value === 'structural' || value === 'enriched' || value === 'relationships') { - return value; - } - throw new InvalidArgumentError('Allowed choices are structural, enriched, relationships'); -} - -function parseConnectionId(value: string): string { - if (REMOVED_SCAN_SUBCOMMAND_NAMES.has(value)) { - throw new InvalidArgumentError(`"${value}" is not a scan connection id`); - } - return value; -} - -export function registerScanCommands(program: Command, context: KtxCliCommandContext): void { - program - .command('scan') - .description('Run a standalone connection scan') - .argument('', 'KTX connection id to scan', parseConnectionId) - .option( - '--mode ', - 'Scan mode: structural, enriched, relationships (default: structural)', - parseScanModeOption, - ) - .option('--dry-run', 'Run without writing scan results', false) - .option('--database-introspection-url ', 'Daemon URL for live-database introspection') - .option('--yes', 'Install the managed Python runtime without prompting when required', false) - .option('--no-input', 'Disable interactive managed runtime installation') - .showHelpAfterError() - .addHelpText( - 'after', - '\nProject directory defaults to KTX_PROJECT_DIR when set, otherwise the current working directory.\n', - ) - .hook('preAction', (_thisCommand, actionCommand) => { - context.writeDebug?.('scan', actionCommand); - }) - .action(async (connectionId: string, options, command) => { - const mode = options.mode ?? 'structural'; - await runScanArgs(context, { - command: 'run', - projectDir: resolveCommandProjectDir(command), - connectionId, - mode, - detectRelationships: mode === 'relationships', - dryRun: options.dryRun === true, - databaseIntrospectionUrl: options.databaseIntrospectionUrl, - cliVersion: context.packageInfo.version, - runtimeInstallPolicy: runtimeInstallPolicyFromFlags(options), - }); - }); -} diff --git a/packages/cli/src/commands/setup-commands.ts b/packages/cli/src/commands/setup-commands.ts index 3da8d094..4f6f0c32 100644 --- a/packages/cli/src/commands/setup-commands.ts +++ b/packages/cli/src/commands/setup-commands.ts @@ -109,12 +109,12 @@ function shouldShowSetupEntryMenu( newDatabaseConnectionId?: string; databaseUrl?: string; databaseSchema?: string[]; - enableHistoricSql?: boolean; - disableHistoricSql?: boolean; - historicSqlWindowDays?: number; - historicSqlMinExecutions?: number; - historicSqlServiceAccountPattern?: string[]; - historicSqlRedactionPattern?: string[]; + enableQueryHistory?: boolean; + disableQueryHistory?: boolean; + queryHistoryWindowDays?: number; + queryHistoryMinExecutions?: number; + queryHistoryServiceAccountPattern?: string[]; + queryHistoryRedactionPattern?: string[]; skipDatabases?: boolean; source?: KtxSetupSourceType; sourceConnectionId?: string; @@ -147,10 +147,10 @@ function shouldShowSetupEntryMenu( if (options.databaseSchema && options.databaseSchema.length > 0) { return false; } - if (options.historicSqlServiceAccountPattern && options.historicSqlServiceAccountPattern.length > 0) { + if (options.queryHistoryServiceAccountPattern && options.queryHistoryServiceAccountPattern.length > 0) { return false; } - if (options.historicSqlRedactionPattern && options.historicSqlRedactionPattern.length > 0) { + if (options.queryHistoryRedactionPattern && options.queryHistoryRedactionPattern.length > 0) { return false; } if (options.notionRootPageId && options.notionRootPageId.length > 0) { @@ -179,10 +179,10 @@ function shouldShowSetupEntryMenu( 'skipEmbeddings', 'newDatabaseConnectionId', 'databaseUrl', - 'enableHistoricSql', - 'disableHistoricSql', - 'historicSqlWindowDays', - 'historicSqlMinExecutions', + 'enableQueryHistory', + 'disableQueryHistory', + 'queryHistoryWindowDays', + 'queryHistoryMinExecutions', 'skipDatabases', 'source', 'sourceConnectionId', @@ -282,33 +282,37 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo .hideHelp(), ) .addOption( - new Option('--enable-historic-sql', 'Enable Historic SQL when the selected database supports it') + new Option('--enable-query-history', 'Enable query history when the selected database supports it') .hideHelp() .default(false), ) .addOption( - new Option('--disable-historic-sql', 'Disable Historic SQL for the selected database').hideHelp().default(false), + new Option('--disable-query-history', 'Disable query history for the selected database').hideHelp().default(false), ) - .addOption(new Option('--historic-sql-window-days ', 'Historic SQL query-history window').argParser(positiveInteger).hideHelp()) .addOption( - new Option('--historic-sql-min-executions ', 'Minimum Historic SQL executions for a template') + new Option('--query-history-window-days ', 'Query-history lookback window') .argParser(positiveInteger) .hideHelp(), ) .addOption( - new Option('--historic-sql-service-account-pattern ', 'Historic SQL service-account regex; repeatable') + new Option('--query-history-min-executions ', 'Minimum executions for a query-history template') + .argParser(positiveInteger) + .hideHelp(), + ) + .addOption( + new Option('--query-history-service-account-pattern ', 'Query-history service-account regex; repeatable') .argParser((value, previous: string[]) => [...previous, value]) .default([] as string[]) .hideHelp(), ) .addOption( - new Option('--historic-sql-redaction-pattern ', 'Historic SQL SQL-literal redaction regex; repeatable') + new Option('--query-history-redaction-pattern ', 'Query-history SQL-literal redaction regex; repeatable') .argParser((value, previous: string[]) => [...previous, value]) .default([] as string[]) .hideHelp(), ) .addOption( - new Option('--skip-databases', 'Leave database setup incomplete; KTX cannot work until a primary source is added') + new Option('--skip-databases', 'Leave database setup incomplete; KTX cannot work until a database is added') .hideHelp() .default(false), ) @@ -371,9 +375,9 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo context.setExitCode(1); return; } - if (options.enableHistoricSql && options.disableHistoricSql) { + if (options.enableQueryHistory && options.disableQueryHistory) { context.io.stderr.write( - 'Choose only one Historic SQL action: --enable-historic-sql or --disable-historic-sql.\n', + 'Choose only one query-history action: --enable-query-history or --disable-query-history.\n', ); context.setExitCode(1); return; @@ -418,17 +422,17 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo ...(options.newDatabaseConnectionId ? { databaseConnectionId: options.newDatabaseConnectionId } : {}), ...(options.databaseUrl ? { databaseUrl: options.databaseUrl } : {}), databaseSchemas: options.databaseSchema, - ...(options.enableHistoricSql ? { enableHistoricSql: true } : {}), - ...(options.disableHistoricSql ? { disableHistoricSql: true } : {}), - ...(options.historicSqlWindowDays !== undefined ? { historicSqlWindowDays: options.historicSqlWindowDays } : {}), - ...(options.historicSqlMinExecutions !== undefined - ? { historicSqlMinExecutions: options.historicSqlMinExecutions } + ...(options.enableQueryHistory ? { enableQueryHistory: true } : {}), + ...(options.disableQueryHistory ? { disableQueryHistory: true } : {}), + ...(options.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: options.queryHistoryWindowDays } : {}), + ...(options.queryHistoryMinExecutions !== undefined + ? { queryHistoryMinExecutions: options.queryHistoryMinExecutions } : {}), - ...(options.historicSqlServiceAccountPattern.length > 0 - ? { historicSqlServiceAccountPatterns: options.historicSqlServiceAccountPattern } + ...(options.queryHistoryServiceAccountPattern.length > 0 + ? { queryHistoryServiceAccountPatterns: options.queryHistoryServiceAccountPattern } : {}), - ...(options.historicSqlRedactionPattern.length > 0 - ? { historicSqlRedactionPatterns: options.historicSqlRedactionPattern } + ...(options.queryHistoryRedactionPattern.length > 0 + ? { queryHistoryRedactionPatterns: options.queryHistoryRedactionPattern } : {}), skipDatabases: options.skipDatabases === true, ...(options.source ? { source: options.source } : {}), diff --git a/packages/cli/src/context-build-view.test.ts b/packages/cli/src/context-build-view.test.ts index db172484..487e5468 100644 --- a/packages/cli/src/context-build-view.test.ts +++ b/packages/cli/src/context-build-view.test.ts @@ -2,6 +2,7 @@ import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '@ktx/contex import { describe, expect, it, vi } from 'vitest'; import type { KtxPublicIngestProject, KtxPublicIngestTargetResult } from './public-ingest.js'; import { + type ContextBuildTargetState, extractProgressMessage, createRepainter, initViewState, @@ -45,27 +46,39 @@ function projectWithConnections(connections: KtxProjectConfig['connections']): K }; } -function successResult(connectionId: string, driver: string, operation: 'scan' | 'source-ingest'): KtxPublicIngestTargetResult { +function successResult( + connectionId: string, + driver: string, + operation: 'database-ingest' | 'source-ingest', +): KtxPublicIngestTargetResult { return { connectionId, driver, steps: [ - { operation: 'scan', status: operation === 'scan' ? 'done' : 'skipped' }, + { operation: 'database-schema', status: operation === 'database-ingest' ? 'done' : 'skipped' }, + { operation: 'query-history', status: 'skipped' }, { operation: 'source-ingest', status: operation === 'source-ingest' ? 'done' : 'skipped' }, - { operation: 'enrich', status: 'skipped' }, { operation: 'memory-update', status: operation === 'source-ingest' ? 'done' : 'skipped' }, ], }; } -function failedResult(connectionId: string, driver: string, operation: 'scan' | 'source-ingest'): KtxPublicIngestTargetResult { +function failedResult( + connectionId: string, + driver: string, + operation: 'database-ingest' | 'source-ingest', +): KtxPublicIngestTargetResult { return { connectionId, driver, steps: [ - { operation: 'scan', status: operation === 'scan' ? 'failed' : 'skipped', detail: `${connectionId} failed at scan.` }, + { + operation: 'database-schema', + status: operation === 'database-ingest' ? 'failed' : 'skipped', + detail: `${connectionId} failed at database-schema.`, + }, + { operation: 'query-history', status: 'skipped' }, { operation: 'source-ingest', status: operation === 'source-ingest' ? 'failed' : 'skipped' }, - { operation: 'enrich', status: 'skipped' }, { operation: 'memory-update', status: 'not-run' }, ], }; @@ -100,15 +113,19 @@ describe('parseScanSummary', () => { }); describe('parseIngestSummary', () => { - it('extracts work units and saved memory', () => { - expect(parseIngestSummary('Work units: 5\nSaved memory: 3 wiki, 2 SL')).toBe('3 wiki, 2 SL'); + it('extracts task count and saved memory', () => { + expect(parseIngestSummary('Tasks: 5\nSaved memory: 3 wiki, 2 SL')).toBe('3 wiki, 2 SL'); }); - it('extracts work units alone when no saved memory', () => { - expect(parseIngestSummary('Work units: 5\nStatus: done')).toBe('5 work units'); + it('extracts task count alone when no saved memory', () => { + expect(parseIngestSummary('Tasks: 5\nStatus: done')).toBe('5 tasks'); }); - it('extracts saved memory alone when no work units', () => { + it('still parses the legacy "Work units:" wording for backward compat', () => { + expect(parseIngestSummary('Work units: 7\nStatus: done')).toBe('7 tasks'); + }); + + it('extracts saved memory alone when no task count', () => { expect(parseIngestSummary('Saved memory: 3 wiki, 2 SL')).toBe('3 wiki, 2 SL'); }); @@ -120,7 +137,7 @@ describe('parseIngestSummary', () => { describe('initViewState', () => { it('partitions targets into primary and context sources', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, ]); @@ -133,7 +150,7 @@ describe('initViewState', () => { it('initializes global timing fields', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, ]); expect(state.startedAt).toBeNull(); expect(state.totalElapsedMs).toBe(0); @@ -143,7 +160,7 @@ describe('initViewState', () => { describe('renderContextBuildView', () => { it('renders all-queued state with ○ icon and progress counter', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, ]); @@ -151,7 +168,7 @@ describe('renderContextBuildView', () => { expect(output).toContain('Building KTX context'); expect(output).toContain('(0/2)'); expect(output).toContain('○'); - expect(output).toContain('Primary sources:'); + expect(output).toContain('Databases:'); expect(output).toContain('warehouse'); expect(output).toContain('queued'); expect(output).toContain('Context sources:'); @@ -184,7 +201,7 @@ describe('renderContextBuildView', () => { it('renders header with total elapsed time when set', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, ]); state.totalElapsedMs = 65000; @@ -194,16 +211,62 @@ describe('renderContextBuildView', () => { it('renders project directory when provided', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, ]); const output = renderContextBuildView(state, { styled: false, projectDir: '/tmp/project' }); expect(output).toContain('Project: /tmp/project'); }); + it('renders public warnings in the foreground view', () => { + const state = initViewState([ + { + connectionId: 'docs', + driver: 'notion', + operation: 'source-ingest', + adapter: 'notion', + debugCommand: 'ktx ingest docs --debug', + steps: ['source-ingest', 'memory-update'], + }, + ]); + + const rendered = renderContextBuildView(state, { + styled: false, + warnings: ['--deep affects database ingest only; ignoring it for docs.'], + }); + + expect(rendered).toContain('Warnings:'); + expect(rendered).toContain('--deep affects database ingest only; ignoring it for docs.'); + }); + + it('renders public notices in the foreground view before warnings', () => { + const state = initViewState([ + { + connectionId: 'warehouse', + driver: 'postgres', + operation: 'database-ingest', + debugCommand: 'ktx ingest warehouse --debug', + steps: ['database-schema', 'query-history'], + databaseDepth: 'deep', + detectRelationships: true, + queryHistory: { enabled: true, dialect: 'postgres' }, + }, + ]); + + const rendered = renderContextBuildView(state, { + styled: false, + notices: ['Schema ingest runs before query history for warehouse.'], + warnings: ['--query-history requires deep ingest; running warehouse with --deep.'], + }); + + expect(rendered.indexOf('Notices:')).toBeLessThan(rendered.indexOf('Warnings:')); + expect(rendered).toContain('Schema ingest runs before query history for warehouse.'); + expect(rendered).toContain('--query-history requires deep ingest; running warehouse with --deep.'); + }); + it('renders dynamic separator matching header width', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, ]); state.totalElapsedMs = 120000; @@ -216,7 +279,7 @@ describe('renderContextBuildView', () => { it('renders completed state with summary', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, ]); state.primarySources[0].status = 'done'; state.primarySources[0].elapsedMs = 72000; @@ -230,19 +293,19 @@ describe('renderContextBuildView', () => { it('renders running target with elapsed time', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, ]); state.primarySources[0].status = 'running'; state.primarySources[0].elapsedMs = 30000; const output = renderContextBuildView(state, { styled: false }); - expect(output).toContain('scanning...'); + expect(output).toContain('reading schema'); expect(output).toContain('(30s)'); }); it('renders running target with progress bar when percentage is available', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, ]); state.primarySources[0].status = 'running'; state.primarySources[0].detailLine = '[50%] Scanning tables...'; @@ -263,11 +326,11 @@ describe('renderContextBuildView', () => { state.contextSources[0].startedAt = 1_000; state.contextSources[0].elapsedMs = 113_000; state.contextSources[0].progressUpdatedAtMs = 46_000; - state.contextSources[0].detailLine = '[45%] No work units to process; finalizing ingest'; + state.contextSources[0].detailLine = '[45%] No tasks to process; finalizing ingest'; const output = renderContextBuildView(state, { styled: false }); - expect(output).toContain('No work units to process; finalizing ingest'); + expect(output).toContain('No tasks to process; finalizing ingest'); expect(output).toContain('last update 1m08s ago'); expect(output).toContain('(1m53s)'); }); @@ -280,7 +343,7 @@ describe('renderContextBuildView', () => { state.contextSources[0].startedAt = 1_000; state.contextSources[0].elapsedMs = 40_000; state.contextSources[0].progressUpdatedAtMs = 25_000; - state.contextSources[0].detailLine = '[45%] Planning work units'; + state.contextSources[0].detailLine = '[45%] Planning tasks'; const output = renderContextBuildView(state, { styled: false }); @@ -289,7 +352,7 @@ describe('renderContextBuildView', () => { it('renders completion summary when all targets are done', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, ]); state.primarySources[0].status = 'done'; @@ -304,7 +367,7 @@ describe('renderContextBuildView', () => { it('renders singular source label in completion summary', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, ]); state.primarySources[0].status = 'done'; state.primarySources[0].elapsedMs = 5000; @@ -316,7 +379,7 @@ describe('renderContextBuildView', () => { it('does not render completion summary while targets are still active', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, { connectionId: 'dbt-main', driver: 'dbt', operation: 'source-ingest', adapter: 'dbt', debugCommand: '', steps: ['source-ingest', 'memory-update'] }, ]); state.primarySources[0].status = 'done'; @@ -329,14 +392,14 @@ describe('renderContextBuildView', () => { it('renders failed state', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, ]); state.primarySources[0].status = 'failed'; - state.primarySources[0].failureText = 'KTX lost its connection to PostgreSQL while scanning warehouse.'; + state.primarySources[0].failureText = 'KTX lost its connection to PostgreSQL while reading schema for warehouse.'; const output = renderContextBuildView(state, { styled: false }); expect(output).toContain('✗'); - expect(output).toContain('KTX lost its connection to PostgreSQL while scanning warehouse.'); + expect(output).toContain('KTX lost its connection to PostgreSQL while reading schema for warehouse.'); }); it('omits empty groups', () => { @@ -345,31 +408,174 @@ describe('renderContextBuildView', () => { ]); const output = renderContextBuildView(state, { styled: false }); - expect(output).not.toContain('Primary sources:'); + expect(output).not.toContain('Databases:'); expect(output).toContain('Context sources:'); }); - it('preserves detach hint while targets are active', () => { + it('renders foreground-only progress hints without detach or resume commands', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { + connectionId: 'warehouse', + driver: 'postgres', + operation: 'database-ingest', + debugCommand: 'ktx ingest warehouse --debug', + steps: ['database-schema'], + }, ]); state.primarySources[0].status = 'running'; - const output = renderContextBuildView(state, { styled: false, showHint: true, projectDir: '/tmp/project' }); - expect(output).toContain('d to detach'); - expect(output).toContain('ktx setup --project-dir /tmp/project'); - expect(output).toContain('to resume'); + const rendered = renderContextBuildView(state, { styled: false, showHint: true, projectDir: '/tmp/project' }); + + expect(rendered).toContain('Ctrl+C to stop'); + expect(rendered).not.toContain('d to detach'); + expect(rendered).not.toContain('resume'); }); it('omits detach hint when all targets are done', () => { const state = initViewState([ - { connectionId: 'warehouse', driver: 'postgres', operation: 'scan', debugCommand: '', steps: ['scan'] }, + { connectionId: 'warehouse', driver: 'postgres', operation: 'database-ingest', debugCommand: '', steps: ['database-schema'] }, ]); state.primarySources[0].status = 'done'; state.totalElapsedMs = 5000; const output = renderContextBuildView(state, { styled: false, showHint: true }); - expect(output).not.toContain('d to detach'); + expect(output).not.toContain('Ctrl+C to stop'); + }); +}); + +describe('renderContextBuildView phase rows', () => { + function dbTarget(connectionId: string, queryHistoryEnabled = false) { + return { + connectionId, + driver: 'postgres', + operation: 'database-ingest' as const, + debugCommand: '', + steps: queryHistoryEnabled + ? (['database-schema', 'query-history'] as ('database-schema' | 'query-history')[]) + : (['database-schema'] as ('database-schema' | 'query-history')[]), + ...(queryHistoryEnabled ? { queryHistory: { enabled: true, dialect: 'postgres' as const } } : {}), + }; + } + + function sourceTarget(connectionId: string) { + return { + connectionId, + driver: 'dbt', + operation: 'source-ingest' as const, + adapter: 'dbt', + debugCommand: '', + steps: ['source-ingest', 'memory-update'] as ('source-ingest' | 'memory-update')[], + }; + } + + function setPhase( + state: ReturnType, + connectionId: string, + phaseKey: 'database-schema' | 'query-history' | 'source-ingest', + patch: Partial, + ): void { + const target = [...state.primarySources, ...state.contextSources].find((t) => t.target.connectionId === connectionId); + const phase = target?.phases.find((p) => p.key === phaseKey); + if (!phase) throw new Error(`No phase ${phaseKey} on ${connectionId}`); + Object.assign(phase, patch); + } + + it('renders two phase rows for a database-ingest target with query history', () => { + const state = initViewState([dbTarget('warehouse', true)]); + state.primarySources[0].status = 'running'; + setPhase(state, 'warehouse', 'database-schema', { + status: 'done', + percent: 100, + summary: '172 tables', + elapsedMs: 52_000, + }); + setPhase(state, 'warehouse', 'query-history', { + status: 'running', + percent: 7, + detail: '12/172 · arr-movements', + elapsedMs: 36_000, + }); + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Schema'); + expect(output).toContain('100%'); + expect(output).toContain('172 tables'); + expect(output).toContain('(52s)'); + expect(output).toContain('Query history'); + expect(output).toContain('7%'); + expect(output).toContain('12/172 · arr-movements'); + expect(output).toContain('(36s)'); + }); + + it('renders a single Schema phase row when query history is disabled', () => { + const state = initViewState([dbTarget('warehouse', false)]); + state.primarySources[0].status = 'running'; + setPhase(state, 'warehouse', 'database-schema', { + status: 'running', + percent: 42, + detail: 'Profiling 73/172 tables', + }); + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Schema'); + expect(output).toContain('42%'); + expect(output).toContain('Profiling 73/172 tables'); + expect(output).not.toContain('Query history'); + }); + + it('renders Source ingest phase row for a source-ingest target', () => { + const state = initViewState([sourceTarget('dbt-main')]); + state.contextSources[0].status = 'running'; + setPhase(state, 'dbt-main', 'source-ingest', { + status: 'running', + percent: 25, + detail: 'Reading models', + }); + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Source ingest'); + expect(output).toContain('25%'); + expect(output).toContain('Reading models'); + expect(output).not.toContain('Schema '); + }); + + it('renders skipped Query history when schema phase fails', () => { + const state = initViewState([dbTarget('warehouse', true)]); + state.primarySources[0].status = 'running'; + setPhase(state, 'warehouse', 'database-schema', { status: 'failed', percent: 30 }); + setPhase(state, 'warehouse', 'query-history', { status: 'skipped' }); + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Schema'); + expect(output).toContain('failed'); + expect(output).toContain('Query history'); + expect(output).toContain('skipped'); + }); + + it('renders queued Query history with an em-dash and empty bar', () => { + const state = initViewState([dbTarget('warehouse', true)]); + state.primarySources[0].status = 'running'; + setPhase(state, 'warehouse', 'database-schema', { + status: 'running', + percent: 12, + detail: 'Introspecting', + }); + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Query history'); + expect(output).toContain('queued'); + expect(output).toContain('—'); + }); + + it('falls back to single-line legacy detail when no phase has started yet', () => { + const state = initViewState([dbTarget('warehouse', false)]); + state.primarySources[0].status = 'running'; + state.primarySources[0].detailLine = '[5%] Preparing database ingest'; + + const output = renderContextBuildView(state, { styled: false }); + expect(output).toContain('Preparing database ingest'); + expect(output).toContain('5%'); + expect(output).not.toContain('○ Schema'); }); }); @@ -429,10 +635,47 @@ describe('runContextBuild', () => { { executeTarget, now: () => 1000 }, ); - expect(result).toEqual({ exitCode: 0, detached: false }); + expect(result).toEqual({ exitCode: 0 }); expect(callOrder).toEqual(['warehouse', 'dbt_main']); }); + it('runs only the requested connection when foreground build receives a target', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + docs: { driver: 'notion' }, + }); + const executeTarget = vi.fn(async (target) => + successResult(target.connectionId, target.driver, target.operation), + ); + + await expect( + runContextBuild( + project, + { + projectDir: '/tmp/project', + inputMode: 'disabled', + targetConnectionId: 'warehouse', + all: false, + depth: 'fast', + queryHistory: 'default', + }, + io.io, + { executeTarget, now: () => 1000 }, + ), + ).resolves.toMatchObject({ exitCode: 0 }); + + expect(executeTarget).toHaveBeenCalledTimes(1); + expect(executeTarget.mock.calls[0]?.[0]).toMatchObject({ + connectionId: 'warehouse', + operation: 'database-ingest', + databaseDepth: 'fast', + }); + expect(io.stdout()).toContain('Databases:'); + expect(io.stdout()).toContain('warehouse'); + expect(io.stdout()).not.toContain('docs'); + }); + it('returns exit code 1 when any target fails', async () => { const io = makeIo(); const project = projectWithConnections({ @@ -447,7 +690,7 @@ describe('runContextBuild', () => { { executeTarget, now: () => 1000 }, ); - expect(result).toEqual({ exitCode: 1, detached: false }); + expect(result).toEqual({ exitCode: 1 }); }); it('renders a friendly network failure when target output contains a network error code', async () => { @@ -467,13 +710,91 @@ describe('runContextBuild', () => { { executeTarget, now: () => 1000 }, ); - expect(result).toEqual({ exitCode: 1, detached: false }); - expect(io.stdout()).toContain('KTX lost its connection to PostgreSQL while scanning warehouse.'); + expect(result).toEqual({ exitCode: 1 }); + expect(io.stdout()).toContain('KTX lost its connection to PostgreSQL while reading schema for warehouse.'); expect(io.stdout()).toContain('network address unavailable (EADDRNOTAVAIL)'); expect(io.stdout()).toContain('Retry: ktx setup --project-dir /tmp/project'); expect(io.stdout()).not.toContain('BoundPool'); }); + it('renders localhost SQL analysis refusal as a runtime failure during query history', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { depth: 'deep', queryHistory: { enabled: true } } }, + }); + const executeTarget = vi.fn(async (target, _args, targetIo) => { + targetIo.stderr.write('connect ECONNREFUSED 127.0.0.1:8765\n'); + return { + connectionId: target.connectionId, + driver: target.driver, + steps: [ + { operation: 'database-schema', status: 'done' }, + { operation: 'query-history', status: 'failed', detail: 'warehouse failed at query-history.' }, + { operation: 'source-ingest', status: 'skipped' }, + { operation: 'memory-update', status: 'skipped' }, + ], + } satisfies KtxPublicIngestTargetResult; + }); + + const result = await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled' }, + io.io, + { executeTarget, now: () => 1000 }, + ); + + expect(result).toEqual({ exitCode: 1 }); + expect(io.stdout()).toContain( + 'KTX could not reach the local SQL analysis runtime while processing query history for warehouse.', + ); + expect(io.stdout()).toContain('connection refused (ECONNREFUSED)'); + expect(io.stdout()).toContain('Retry: ktx setup --project-dir /tmp/project'); + expect(io.stdout()).not.toContain('KTX lost its connection to PostgreSQL'); + }); + + it('uses captured query-history stderr instead of generic failed-at detail', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { depth: 'deep', queryHistory: { enabled: true } } }, + }); + const executeTarget = vi.fn(async (target, _args, targetIo) => { + targetIo.stdout.write('KTX scan completed\n'); + targetIo.stdout.write('Mode: enriched\n'); + targetIo.stderr.write('Missing bundled Python runtime manifest: /tmp/assets/python/manifest.json\n'); + targetIo.stderr.write('In a source checkout, build the local runtime assets with: pnpm run artifacts:build\n'); + targetIo.stderr.write('Then retry the runtime-backed KTX command.\n'); + return { + connectionId: target.connectionId, + driver: target.driver, + steps: [ + { operation: 'database-schema', status: 'done' }, + { + operation: 'query-history', + status: 'failed', + detail: + 'warehouse failed at query-history. Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history', + }, + { operation: 'source-ingest', status: 'skipped' }, + { operation: 'memory-update', status: 'skipped' }, + ], + } satisfies KtxPublicIngestTargetResult; + }); + + const result = await runContextBuild( + project, + { projectDir: '/tmp/project', inputMode: 'disabled', entrypoint: 'ingest' }, + io.io, + { executeTarget, now: () => 1000 }, + ); + + expect(result).toEqual({ exitCode: 1 }); + expect(io.stdout()).toContain('Missing bundled Python runtime manifest: /tmp/assets/python/manifest.json.'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history'); + expect(io.stdout()).not.toContain('Then retry the runtime-backed KTX command'); + expect(io.stdout()).not.toContain('warehouse failed at query-history'); + expect(io.stdout().match(/Retry: /g)).toHaveLength(1); + }); + it('renders a friendly network failure when target execution throws', async () => { const io = makeIo(); const project = projectWithConnections({ @@ -491,11 +812,141 @@ describe('runContextBuild', () => { { executeTarget, now: () => 1000 }, ); - expect(result).toEqual({ exitCode: 1, detached: false }); - expect(io.stdout()).toContain('KTX lost its connection to PostgreSQL while scanning warehouse.'); + expect(result).toEqual({ exitCode: 1 }); + expect(io.stdout()).toContain('KTX lost its connection to PostgreSQL while reading schema for warehouse.'); expect(io.stdout()).toContain('connection reset (ECONNRESET)'); }); + it('uses direct ingest retry guidance for public ingest failures', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + }); + const executeTarget = vi.fn(async (target) => failedResult(target.connectionId, target.driver, target.operation)); + + await runContextBuild( + project, + { + projectDir: '/tmp/project', + inputMode: 'disabled', + targetConnectionId: 'warehouse', + all: false, + entrypoint: 'ingest', + }, + io.io, + { executeTarget, now: () => 1000 }, + ); + + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project'); + expect(io.stdout()).not.toContain('Retry: ktx setup'); + }); + + it('renders query-history progress without the historic-sql adapter key', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: true } } }, + }); + const executeTarget = vi.fn(async (target, _args, _targetIo, deps) => { + deps.ingestProgress?.({ percent: 5, message: 'Fetching source files for warehouse/historic-sql' }); + return successResult(target.connectionId, target.driver, target.operation); + }); + + await runContextBuild( + project, + { + projectDir: '/tmp/project', + inputMode: 'disabled', + targetConnectionId: 'warehouse', + all: false, + entrypoint: 'ingest', + }, + io.io, + { executeTarget, now: () => 1000, sourceProgressThrottleMs: 0 }, + ); + + expect(io.stdout()).toContain('Fetching query history for warehouse'); + expect(io.stdout()).not.toContain('historic-sql'); + }); + + it('renders database ingest progress without scan wording', async () => { + const io = makeIo(); + const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const executeTarget = vi.fn(async (target, _args, _targetIo, deps) => { + await deps.scanProgress?.update(0.05, 'Preparing scan'); + await deps.scanProgress?.update(0.15, 'Inspecting database schema'); + await deps.scanProgress?.update(0.7, 'Writing schema artifacts'); + return successResult(target.connectionId, target.driver, target.operation); + }); + + await expect( + runContextBuild( + project, + { + projectDir: '/tmp/project', + inputMode: 'disabled', + targetConnectionId: 'warehouse', + all: false, + }, + io.io, + { executeTarget, now: () => 1000, sourceProgressThrottleMs: 0 }, + ), + ).resolves.toMatchObject({ exitCode: 0 }); + + expect(io.stdout()).toContain('Preparing database ingest'); + expect(io.stdout()).toContain('Reading database schema'); + expect(io.stdout()).toContain('Writing schema context'); + expect(io.stdout()).not.toContain('Preparing scan'); + expect(io.stdout()).not.toMatch(/\bscan\b/i); + }); + + it('passes schema-first notices from the plan into foreground output', async () => { + const io = makeIo(); + const project: KtxPublicIngestProject = { + ...projectWithConnections({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }), + config: { + ...projectWithConnections({ warehouse: { driver: 'postgres' } }).config, + connections: { + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }, + llm: { + provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret + models: { default: 'gpt-test' }, + }, + scan: { + ...projectWithConnections({ warehouse: { driver: 'postgres' } }).config.scan, + enrichment: { + mode: 'llm', + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + }, + }, + }, + }, + }; + const executeTarget = vi.fn(async (target) => successResult(target.connectionId, target.driver, target.operation)); + + await expect( + runContextBuild( + project, + { + projectDir: '/tmp/project', + inputMode: 'disabled', + targetConnectionId: 'warehouse', + all: false, + queryHistory: 'enabled', + }, + io.io, + { executeTarget, now: () => 1000 }, + ), + ).resolves.toMatchObject({ exitCode: 0 }); + + expect(io.stdout()).toContain('Schema ingest runs before query history for warehouse.'); + }); + it('renders final view for non-TTY output', async () => { const io = makeIo(); const project = projectWithConnections({ @@ -514,7 +965,7 @@ describe('runContextBuild', () => { const output = io.stdout(); expect(output).toContain('Building KTX context'); expect(output).toContain('Project: /tmp/project'); - expect(output).toContain('Primary sources:'); + expect(output).toContain('Databases:'); expect(output).toContain('warehouse'); expect(output).toContain('Context sources:'); expect(output).toContain('dbt_main'); @@ -533,7 +984,7 @@ describe('runContextBuild', () => { ); expect(executeTarget).toHaveBeenCalledWith( - expect.objectContaining({ connectionId: 'warehouse', operation: 'scan' }), + expect.objectContaining({ connectionId: 'warehouse', operation: 'database-ingest' }), expect.objectContaining({ scanMode: 'enriched', detectRelationships: true }), expect.anything(), expect.objectContaining({ @@ -543,44 +994,6 @@ describe('runContextBuild', () => { ); }); - it('exits immediately with paused message when d is pressed', async () => { - const mockExit = vi.spyOn(process, 'exit').mockImplementation(() => { - throw new Error('process.exit'); - }); - const io = makeIo(); - const project = projectWithConnections({ - warehouse: { driver: 'postgres' }, - dbt_main: { driver: 'dbt' }, - }); - let triggerDetach: (() => void) | null = null; - const executeTarget = vi.fn(async (target) => { - if (target.connectionId === 'warehouse') triggerDetach?.(); - return successResult(target.connectionId, target.driver, target.operation); - }); - - await expect( - runContextBuild( - project, - { projectDir: '/tmp/project', inputMode: 'disabled' }, - io.io, - { - executeTarget, - now: () => 1000, - setupKeystroke: (onDetach) => { - triggerDetach = onDetach; - return () => {}; - }, - }, - ), - ).rejects.toThrow('process.exit'); - - expect(mockExit).toHaveBeenCalledWith(0); - expect(io.stdout()).toContain('Context build continuing in the background.'); - expect(io.stdout()).toContain('Resume: ktx setup --project-dir /tmp/project'); - expect(io.stdout()).toContain('Status: ktx status --project-dir /tmp/project'); - mockExit.mockRestore(); - }); - it('calls onSourceProgress when sources start and finish', async () => { const io = makeIo(); const project = projectWithConnections({ @@ -666,7 +1079,7 @@ describe('runContextBuild', () => { dbt_main: { driver: 'dbt' }, }); const executeTarget = vi.fn(async (target, _args, targetIo) => { - if (target.operation === 'scan') { + if (target.operation === 'database-ingest') { targetIo.stdout.write('Report: raw-sources/warehouse/live-database/sync-1/scan-report.json\n'); targetIo.stdout.write('Raw sources: raw-sources/warehouse/live-database/sync-1\n'); } else { @@ -685,7 +1098,6 @@ describe('runContextBuild', () => { expect(result).toMatchObject({ exitCode: 0, - detached: false, reportIds: ['report-dbt-1'], artifactPaths: [ 'raw-sources/warehouse/live-database/sync-1/scan-report.json', @@ -701,12 +1113,12 @@ describe('runContextBuild', () => { dbt_main: { driver: 'dbt' }, }); const executeTarget = vi.fn(async (target, _args, targetIo) => { - if (target.operation === 'scan') { + if (target.operation === 'database-ingest') { return successResult(target.connectionId, target.driver, target.operation); } targetIo.stdout.write('Report: report-dbt-failed\n'); - targetIo.stdout.write('Work units: 3\n'); + targetIo.stdout.write('Tasks: 3\n'); return failedResult(target.connectionId, target.driver, target.operation); }); @@ -719,7 +1131,6 @@ describe('runContextBuild', () => { expect(result).toMatchObject({ exitCode: 1, - detached: false, reportIds: ['report-dbt-failed'], }); }); @@ -729,7 +1140,7 @@ describe('viewStateFromSourceProgress', () => { it('partitions sources into primary and context groups', () => { const state = viewStateFromSourceProgress( [ - { connectionId: 'warehouse', operation: 'scan', status: 'running', startedAtMs: 900 }, + { connectionId: 'warehouse', operation: 'database-ingest', status: 'running', startedAtMs: 900 }, { connectionId: 'dbt-main', operation: 'source-ingest', status: 'queued' }, ], 1000, @@ -748,7 +1159,7 @@ describe('viewStateFromSourceProgress', () => { it('uses stored elapsedMs for completed sources', () => { const state = viewStateFromSourceProgress( - [{ connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 72000, summaryText: '42 tables' }], + [{ connectionId: 'warehouse', operation: 'database-ingest', status: 'done', elapsedMs: 72000, summaryText: '42 tables' }], 99999, ); @@ -759,7 +1170,7 @@ describe('viewStateFromSourceProgress', () => { it('renders the same view format as the foreground build', () => { const state = viewStateFromSourceProgress( [ - { connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 72000, summaryText: '42 tables' }, + { connectionId: 'warehouse', operation: 'database-ingest', status: 'done', elapsedMs: 72000, summaryText: '42 tables' }, { connectionId: 'dbt-main', operation: 'source-ingest', status: 'running', startedAtMs: 900 }, ], 1000, @@ -768,7 +1179,7 @@ describe('viewStateFromSourceProgress', () => { const output = renderContextBuildView(state, { styled: false }); expect(output).toContain('Building KTX context'); - expect(output).toContain('Primary sources:'); + expect(output).toContain('Databases:'); expect(output).toContain('warehouse'); expect(output).toContain('42 tables'); expect(output).toContain('Context sources:'); @@ -781,7 +1192,7 @@ describe('viewStateFromSourceProgress', () => { [ { connectionId: 'warehouse', - operation: 'scan', + operation: 'database-ingest', status: 'running', startedAtMs: 900, percent: 63, diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index e1f43ead..6df8ad2a 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -1,9 +1,8 @@ -import { spawn } from 'node:child_process'; -import { mkdirSync, openSync } from 'node:fs'; -import { join, resolve } from 'node:path'; import type { KtxProgressPort, KtxProgressUpdateOptions } from '@ktx/context/scan'; import type { KtxCliIo } from './index.js'; import type { KtxIngestProgressUpdate } from './ingest.js'; +import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; +import { publicDatabaseIngestMessage, publicQueryHistoryMessage } from './public-ingest-copy.js'; import type { KtxPublicIngestArgs, KtxPublicIngestDeps, @@ -20,6 +19,21 @@ profileMark('module:context-build-view'); const SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] as const; const ESC = String.fromCharCode(0x1b); +type PhaseKey = 'database-schema' | 'query-history' | 'source-ingest'; +type PhaseStatus = 'queued' | 'running' | 'done' | 'failed' | 'skipped'; + +interface PhaseState { + key: PhaseKey; + name: string; + status: PhaseStatus; + percent: number; + detail: string | null; + summary: string | null; + startedAt: number | null; + elapsedMs: number; + progressUpdatedAtMs: number | null; +} + export interface ContextBuildTargetState { target: KtxPublicIngestPlanTarget; status: 'queued' | 'running' | 'done' | 'failed'; @@ -29,6 +43,35 @@ export interface ContextBuildTargetState { startedAt: number | null; elapsedMs: number; progressUpdatedAtMs: number | null; + phases: PhaseState[]; +} + +const PHASE_LABELS: Record = { + 'database-schema': 'Schema', + 'query-history': 'Query history', + 'source-ingest': 'Source ingest', +}; + +function makePhasesForTarget(target: KtxPublicIngestPlanTarget): PhaseState[] { + const make = (key: PhaseKey): PhaseState => ({ + key, + name: PHASE_LABELS[key], + status: 'queued', + percent: 0, + detail: null, + summary: null, + startedAt: null, + elapsedMs: 0, + progressUpdatedAtMs: null, + }); + if (target.operation === 'database-ingest') { + const phases: PhaseState[] = [make('database-schema')]; + if (target.queryHistory?.enabled === true) { + phases.push(make('query-history')); + } + return phases; + } + return [make('source-ingest')]; } export interface ContextBuildViewState { @@ -42,20 +85,27 @@ export interface ContextBuildViewState { export interface ContextBuildArgs { projectDir: string; inputMode: 'auto' | 'disabled'; - scanMode?: 'structural' | 'enriched'; + targetConnectionId?: string; + all?: boolean; + entrypoint?: 'setup' | 'ingest'; + depth?: Extract['depth']; + queryHistory?: Extract['queryHistory']; + queryHistoryWindowDays?: number; + scanMode?: Extract['scanMode']; detectRelationships?: boolean; + cliVersion?: string; + runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; } export interface ContextBuildResult { exitCode: number; - detached: boolean; reportIds?: string[]; artifactPaths?: string[]; } export interface ContextBuildSourceProgressUpdate { connectionId: string; - operation: 'scan' | 'source-ingest'; + operation: 'database-ingest' | 'source-ingest'; status: 'queued' | 'running' | 'done' | 'failed'; startedAtMs?: number; elapsedMs?: number; @@ -81,13 +131,13 @@ interface ContextBuildRenderOptions { scanRunningText?: string; sourceIngestRunningText?: string; completedItemName?: CompletedItemName; + notices?: string[]; + warnings?: string[]; } export interface ContextBuildDeps { executeTarget?: typeof executePublicIngestTarget; now?: () => number; - setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null; - onDetach?: () => void; onSourceProgress?: (sources: ContextBuildSourceProgressUpdate[]) => void; sourceProgressThrottleMs?: number; } @@ -135,6 +185,34 @@ function statusIcon(status: ContextBuildTargetState['status'], frame: number, st } } +function phaseStatusIcon(status: PhaseStatus, frame: number, styled: boolean): string { + const raw = (() => { + switch (status) { + case 'done': + return '✓'; + case 'failed': + return '✗'; + case 'running': + return SPINNER_FRAMES[frame % SPINNER_FRAMES.length] ?? '⠋'; + case 'skipped': + return '·'; + default: + return '○'; + } + })(); + if (!styled) return raw; + switch (status) { + case 'done': + return green(raw); + case 'failed': + return red(raw); + case 'running': + return cyan(raw); + default: + return dim(raw); + } +} + function extractPercent(detailLine: string | null): number | null { if (!detailLine) return null; const match = detailLine.match(/^\[(\d+)%\]/); @@ -179,9 +257,10 @@ function targetDetail(target: ContextBuildTargetState, styled: boolean, options: } if (target.status === 'running') { const percent = extractPercent(target.detailLine); - const progressText = target.detailLine?.replace(/^\[\d+%\]\s*/, '') - ?? (target.target.operation === 'scan' - ? (options.scanRunningText ?? 'scanning...') + const progressText = + target.detailLine?.replace(/^\[\d+%\]\s*/, '') ?? + (target.target.operation === 'database-ingest' + ? (options.scanRunningText ?? 'reading schema') : (options.sourceIngestRunningText ?? 'ingesting...')); const elapsed = target.elapsedMs > 0 ? `(${formatDuration(target.elapsedMs)})` : null; const parts: string[] = []; @@ -197,19 +276,76 @@ function targetDetail(target: ContextBuildTargetState, styled: boolean, options: return styled ? dim('queued') : 'queued'; } +const PHASE_NAME_WIDTH = 14; + +function renderRunningTargetHeaderDetail(target: ContextBuildTargetState, styled: boolean): string { + const elapsed = target.elapsedMs > 0 ? `(${formatDuration(target.elapsedMs)})` : ''; + if (!elapsed) return ''; + return styled ? dim(elapsed) : elapsed; +} + +function renderPhaseRow(phase: PhaseState, frame: number, styled: boolean): string { + const icon = phaseStatusIcon(phase.status, frame, styled); + const name = phase.name.padEnd(PHASE_NAME_WIDTH); + const segments: string[] = []; + if (phase.status === 'queued' || phase.status === 'skipped') { + const emptyBar = BAR_EMPTY.repeat(BAR_WIDTH); + segments.push(styled ? dim(emptyBar) : emptyBar); + segments.push(styled ? dim(' —') : ' —'); + } else { + const pct = Math.max(0, Math.min(100, Math.round(phase.percent))); + segments.push(renderProgressBar(pct, styled)); + segments.push(`${String(pct).padStart(3)}%`); + } + let trailing = ''; + if (phase.status === 'done') { + const parts: string[] = []; + if (phase.summary) parts.push(phase.summary); + if (phase.elapsedMs > 0) { + const elapsed = `(${formatDuration(phase.elapsedMs)})`; + parts.push(styled ? dim(elapsed) : elapsed); + } + trailing = parts.join(' '); + } else if (phase.status === 'running') { + const parts: string[] = []; + if (phase.detail) parts.push(phase.detail); + if (phase.elapsedMs > 0) { + const elapsed = `(${formatDuration(phase.elapsedMs)})`; + parts.push(styled ? dim(elapsed) : elapsed); + } + trailing = parts.join(' '); + } else if (phase.status === 'queued') { + trailing = styled ? dim('queued') : 'queued'; + } else if (phase.status === 'skipped') { + trailing = styled ? dim('skipped') : 'skipped'; + } else if (phase.status === 'failed') { + trailing = styled ? red('failed') : 'failed'; + } + const bar = `${segments.join(' ')} ${trailing}`.trimEnd(); + return ` ${icon} ${name} ${bar}`; +} + function columnWidth(state: ContextBuildViewState): number { const all = [...state.primarySources, ...state.contextSources]; return Math.max(12, ...all.map((t) => t.target.connectionId.length)) + 2; } -function renderTargetLine( +function renderTargetRows( target: ContextBuildTargetState, frame: number, styled: boolean, width: number, options: ContextBuildRenderOptions, -): string { - return ` ${statusIcon(target.status, frame, styled)} ${target.target.connectionId.padEnd(width)} ${targetDetail(target, styled, options)}`; +): string[] { + const icon = statusIcon(target.status, frame, styled); + const name = target.target.connectionId.padEnd(width); + const anyPhaseStarted = target.phases.some((p) => p.status !== 'queued'); + if (target.status === 'running' && target.phases.length > 0 && anyPhaseStarted) { + const headerDetail = renderRunningTargetHeaderDetail(target, styled); + const headerLine = ` ${icon} ${name} ${headerDetail}`.trimEnd(); + return [headerLine, ...target.phases.map((phase) => renderPhaseRow(phase, frame, styled))]; + } + return [` ${icon} ${name} ${targetDetail(target, styled, options)}`]; } function renderTargetGroup( @@ -221,11 +357,34 @@ function renderTargetGroup( options: ContextBuildRenderOptions, ): string[] { if (targets.length === 0) return []; - return ['', ` ${label}:`, ...targets.map((t) => renderTargetLine(t, frame, styled, width, options))]; + return ['', ` ${label}:`, ...targets.flatMap((t) => renderTargetRows(t, frame, styled, width, options))]; } -function resumeCommand(projectDir?: string): string { - return projectDir ? `ktx setup --project-dir ${projectDir}` : 'ktx setup'; +function renderMessageGroup(label: string, messages: string[], styled: boolean): string[] { + if (messages.length === 0) return []; + const renderedMessages = messages.map((message) => ` - ${message}`); + return ['', ` ${label}:`, ...renderedMessages.map((line) => (styled ? dim(line) : line))]; +} + +function retryCommand(input: { + projectDir?: string; + entrypoint?: 'setup' | 'ingest'; + connectionId?: string; + depth?: 'fast' | 'deep'; + queryHistory?: boolean; + queryHistoryWindowDays?: number; +}): string { + const projectPart = input.projectDir ? ` --project-dir ${input.projectDir}` : ''; + if (input.entrypoint === 'ingest' && input.connectionId) { + const depthPart = input.depth ? ` --${input.depth}` : ''; + const queryHistoryPart = input.queryHistory ? ' --query-history' : ''; + const windowPart = + input.queryHistory && input.queryHistoryWindowDays !== undefined + ? ` --query-history-window-days ${input.queryHistoryWindowDays}` + : ''; + return `ktx ingest ${input.connectionId}${projectPart}${depthPart}${queryHistoryPart}${windowPart}`; + } + return input.projectDir ? `ktx setup --project-dir ${input.projectDir}` : 'ktx setup'; } export function renderContextBuildView( @@ -256,8 +415,10 @@ export function renderContextBuildView( header, separator, ...(options.projectDir ? [` Project: ${options.projectDir}`] : []), - ...renderTargetGroup(options.primaryGroupLabel ?? 'Primary sources', state.primarySources, state.frame, styled, width, options), + ...renderTargetGroup(options.primaryGroupLabel ?? 'Databases', state.primarySources, state.frame, styled, width, options), ...renderTargetGroup(options.contextGroupLabel ?? 'Context sources', state.contextSources, state.frame, styled, width, options), + ...renderMessageGroup('Notices', options.notices ?? [], styled), + ...renderMessageGroup('Warnings', options.warnings ?? [], styled), '', ]; @@ -270,7 +431,7 @@ export function renderContextBuildView( } if (options.showHint && hasActive) { - const hintContent = options.hintText ?? `d to detach · ${resumeCommand(options.projectDir)} to resume`; + const hintContent = options.hintText ?? 'Ctrl+C to stop'; const hint = ` ${hintContent}`; lines.push(styled ? dim(hint) : hint); lines.push(''); @@ -297,8 +458,8 @@ export function parseScanSummary(output: string): string | null { export function parseIngestSummary(output: string): string | null { const savedMemory = output.match(/Saved memory: (.+)/); if (savedMemory) return savedMemory[1]; - const workUnits = output.match(/Work units: (\d+)/); - if (workUnits) return `${workUnits[1]} work units`; + const tasks = output.match(/(?:Tasks|Work units): (\d+)/); + if (tasks) return `${tasks[1]} tasks`; return null; } @@ -314,7 +475,7 @@ function collectOutputMetadata( if (reportLine) { const value = reportLine[1].trim(); if (value && value !== 'none') { - if (operation === 'scan') artifactPaths.add(value); + if (operation === 'database-ingest') artifactPaths.add(value); else reportIds.add(value); } } @@ -413,10 +574,11 @@ export function viewStateFromSourceProgress( startedAt: s.startedAtMs ?? null, elapsedMs: s.status === 'running' && s.startedAtMs ? now - s.startedAtMs : (s.elapsedMs ?? 0), progressUpdatedAtMs: s.updatedAtMs ?? null, + phases: [], }); return { - primarySources: sources.filter((s) => s.operation === 'scan').map(makeTarget), + primarySources: sources.filter((s) => s.operation === 'database-ingest').map(makeTarget), contextSources: sources.filter((s) => s.operation === 'source-ingest').map(makeTarget), frame: 0, startedAt: startedAtMs ?? null, @@ -471,57 +633,6 @@ export function createRepainter(io: KtxCliIo) { }; } -// --- Background build --- - -function resolveKtxEntryScript(): string | null { - const argv1 = process.argv[1]; - if (argv1 && (argv1.endsWith('.js') || argv1.endsWith('.ts') || argv1.endsWith('.mjs'))) { - return argv1; - } - return null; -} - -function spawnBackgroundBuild(projectDir: string): { logPath: string } | null { - const entryScript = resolveKtxEntryScript(); - if (!entryScript) return null; - - const resolvedDir = resolve(projectDir); - const logDir = join(resolvedDir, '.ktx', 'setup'); - mkdirSync(logDir, { recursive: true }); - const logPath = join(logDir, 'context-build.log'); - const logFd = openSync(logPath, 'w'); - - const child = spawn( - process.execPath, - [entryScript, 'setup', '--project-dir', resolvedDir, '--no-input'], - { detached: true, stdio: ['ignore', logFd, logFd] }, - ); - child.unref(); - return { logPath }; -} - -// --- Keystroke handling --- - -export function defaultSetupKeystroke(onDetach: () => void, onCtrlC: () => void): (() => void) | null { - const stdin = process.stdin; - if (!stdin.isTTY || typeof stdin.setRawMode !== 'function') { - return null; - } - stdin.setRawMode(true); - stdin.resume(); - const onData = (data: Buffer) => { - const char = data.toString(); - if (char === 'd' || char === 'D') onDetach(); - else if (char === '\x03') onCtrlC(); - }; - stdin.on('data', onData); - return () => { - stdin.off('data', onData); - if (typeof stdin.setRawMode === 'function') stdin.setRawMode(false); - stdin.pause(); - }; -} - // --- Orchestration --- function makeTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTargetState { @@ -534,6 +645,7 @@ function makeTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTargetS startedAt: null, elapsedMs: 0, progressUpdatedAtMs: null, + phases: makePhasesForTarget(target), }; } @@ -570,6 +682,11 @@ function networkErrorCode(error: unknown, capturedOutput = ''): string | null { return networkErrorCodeFromText(`${unknownErrorMessage(error)}\n${capturedOutput}`); } +function isLocalSqlAnalysisConnectionRefused(input: { capturedOutput?: string; fallback?: string | null }): boolean { + const text = `${input.capturedOutput ?? ''}\n${input.fallback ?? ''}`; + return /\bECONNREFUSED\b/.test(text) && /\b(?:127\.0\.0\.1|localhost):8765\b/.test(text); +} + function friendlyDriverName(driver: string): string { const normalized = driver.toLowerCase(); if (normalized === 'postgres' || normalized === 'postgresql') return 'PostgreSQL'; @@ -586,28 +703,102 @@ function failedStepDetail(result: KtxPublicIngestTargetResult): string | null { return result.steps.find((step) => step.status === 'failed')?.detail ?? null; } +const INTERNAL_FAILURE_LINE_RE = + /^(Report|Run|Job|Status|Adapter|Connection|Sync|Mode|Dry run|Diff|Tasks|Work units|Failed tasks|Saved memory|Provenance rows):\s*/; +const ACTIONABLE_FAILURE_LINE_RE = + /^(Missing bundled Python runtime manifest|KTX Python runtime is required|KTX managed daemon|Error:|Failed\b|Could not\b|Cannot\b)/; + +function firstCapturedFailureLine(output: string | undefined): string | null { + const lines = (output ?? '') + .split(/\r?\n/) + .map((candidate) => candidate.trim()) + .filter((candidate) => candidate.length > 0) + .filter((candidate) => !candidate.startsWith('KTX scan completed')) + .filter((candidate) => !INTERNAL_FAILURE_LINE_RE.test(candidate)); + return lines.find((candidate) => ACTIONABLE_FAILURE_LINE_RE.test(candidate)) ?? lines.at(-1) ?? null; +} + +function isGenericFailedAtDetail(target: KtxPublicIngestPlanTarget, detail: string | null | undefined): boolean { + return new RegExp(`^${target.connectionId} failed at [a-z-]+\\.?(?: Retry: .*)?$`).test(detail ?? ''); +} + +function appendRetryIfNeeded(input: { + message: string; + target: KtxPublicIngestPlanTarget; + projectDir: string; + entrypoint?: 'setup' | 'ingest'; +}): string { + const base = input.message.trim().replace(/\.+$/, ''); + if (/\bRetry:\s/.test(base)) { + return base; + } + return `${base}. Retry: ${retryCommand({ + projectDir: input.projectDir, + entrypoint: input.entrypoint, + connectionId: input.target.connectionId, + depth: input.target.databaseDepth, + queryHistory: input.target.queryHistory?.enabled === true, + queryHistoryWindowDays: input.target.queryHistory?.windowDays, + })}`; +} + function failureTextForTarget(input: { target: KtxPublicIngestPlanTarget; projectDir: string; + entrypoint?: 'setup' | 'ingest'; capturedOutput?: string; error?: unknown; fallback?: string | null; }): string { const code = networkErrorCode(input.error, input.capturedOutput); + if (code && isLocalSqlAnalysisConnectionRefused({ capturedOutput: input.capturedOutput, fallback: input.fallback })) { + return [ + `KTX could not reach the local SQL analysis runtime while processing query history for ${input.target.connectionId}.`, + `Reason: ${NETWORK_ERROR_REASONS[code]} (${code}).`, + `Retry: ${retryCommand({ + projectDir: input.projectDir, + entrypoint: input.entrypoint, + connectionId: input.target.connectionId, + depth: input.target.databaseDepth, + queryHistory: input.target.queryHistory?.enabled === true, + queryHistoryWindowDays: input.target.queryHistory?.windowDays, + })}`, + ].join(' '); + } if (code) { - const operation = input.target.operation === 'scan' ? 'scanning' : 'ingesting'; + const operation = input.target.operation === 'database-ingest' ? 'reading schema for' : 'ingesting'; return [ `KTX lost its connection to ${friendlyDriverName(input.target.driver)} while ${operation} ${input.target.connectionId}.`, `Reason: ${NETWORK_ERROR_REASONS[code]} (${code}).`, - `Retry: ${resumeCommand(input.projectDir)}`, + `Retry: ${retryCommand({ + projectDir: input.projectDir, + entrypoint: input.entrypoint, + connectionId: input.target.connectionId, + depth: input.target.databaseDepth, + queryHistory: input.target.queryHistory?.enabled === true, + queryHistoryWindowDays: input.target.queryHistory?.windowDays, + })}`, ].join(' '); } - return input.fallback ?? `${input.target.connectionId} failed.`; + const capturedFailure = firstCapturedFailureLine(input.capturedOutput); + const fallback = + capturedFailure && isGenericFailedAtDetail(input.target, input.fallback) + ? capturedFailure + : (input.fallback ?? capturedFailure ?? `${input.target.connectionId} failed.`); + if (input.entrypoint === 'ingest') { + return appendRetryIfNeeded({ + message: fallback, + target: input.target, + projectDir: input.projectDir, + entrypoint: input.entrypoint, + }); + } + return fallback; } export function initViewState(targets: KtxPublicIngestPlanTarget[]): ContextBuildViewState { return { - primarySources: targets.filter((t) => t.operation === 'scan').map(makeTargetState), + primarySources: targets.filter((t) => t.operation === 'database-ingest').map(makeTargetState), contextSources: targets.filter((t) => t.operation === 'source-ingest').map(makeTargetState), frame: 0, startedAt: null, @@ -615,9 +806,23 @@ export function initViewState(targets: KtxPublicIngestPlanTarget[]): ContextBuil }; } -function formatProgressDetail(update: Pick): string { +function publicProgressMessage(message: string, target: KtxPublicIngestPlanTarget): string { + let current = message; + if (target.operation === 'database-ingest') { + current = publicDatabaseIngestMessage(current); + } + if (target.steps.includes('query-history')) { + current = publicQueryHistoryMessage(current, target.connectionId); + } + return current; +} + +function formatProgressDetail( + update: Pick, + target: KtxPublicIngestPlanTarget, +): string { const percent = Math.max(0, Math.min(100, Math.round(update.percent))); - return `[${percent}%] ${update.message}`; + return `[${percent}%] ${publicProgressMessage(update.message, target)}`; } function createContextBuildProgressPort( @@ -649,7 +854,15 @@ export async function runContextBuild( io: KtxCliIo, deps: ContextBuildDeps = {}, ): Promise { - const plan = buildPublicIngestPlan(project, { projectDir: args.projectDir, all: true }); + const plan = buildPublicIngestPlan(project, { + projectDir: args.projectDir, + ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), + all: args.all ?? true, + ...(args.depth ? { depth: args.depth } : {}), + ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), + ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), + ...(args.scanMode ? { scanMode: args.scanMode } : {}), + }); const state = initViewState(plan.targets); const isTTY = io.stdout.isTTY === true; const nowFn = deps.now ?? (() => Date.now()); @@ -657,7 +870,12 @@ export async function runContextBuild( state.startedAt = nowFn(); const repainter = isTTY ? createRepainter(io) : null; - const viewOpts = { styled: true, projectDir: args.projectDir }; + const viewOpts = { + styled: true, + projectDir: args.projectDir, + notices: plan.notices ?? [], + warnings: plan.warnings, + }; const paint = (hint: boolean) => repainter?.paint(renderContextBuildView(state, { ...viewOpts, showHint: hint })); paint(true); @@ -672,6 +890,11 @@ export async function runContextBuild( if (t.status === 'running' && t.startedAt !== null) { t.elapsedMs = nowFn() - t.startedAt; } + for (const phase of t.phases) { + if (phase.status === 'running' && phase.startedAt !== null) { + phase.elapsedMs = nowFn() - phase.startedAt; + } + } } paint(true); }, 140); @@ -695,78 +918,112 @@ export async function runContextBuild( return true; }; - let detached = false; - let exiting = false; - let cleanupKeystroke: (() => void) | null = null; - - if (isTTY || deps.setupKeystroke) { - const cleanup = () => { - if (spinnerInterval) clearInterval(spinnerInterval); - cleanupKeystroke?.(); - }; - cleanupKeystroke = (deps.setupKeystroke ?? defaultSetupKeystroke)( - () => { - detached = true; - cleanup(); - deps.onDetach?.(); - const bg = spawnBackgroundBuild(args.projectDir); - io.stdout.write('\n\nContext build continuing in the background.\n'); - if (bg) io.stdout.write(`Log: ${bg.logPath}\n`); - io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`); - io.stdout.write(`Status: ktx status --project-dir ${resolve(args.projectDir)}\n`); - exiting = true; - process.exit(0); - }, - () => { - cleanup(); - io.stdout.write('\n\nContext build stopped. Nothing is running in the background.\n'); - io.stdout.write(`Resume: ${resumeCommand(args.projectDir)}\n`); - exiting = true; - process.exit(130); - }, - ); - } const runArgs: Extract = { command: 'run', projectDir: args.projectDir, - all: true, + ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), + all: args.all ?? true, json: false, inputMode: args.inputMode, - scanMode: args.scanMode, - detectRelationships: args.detectRelationships, + ...(args.depth ? { depth: args.depth } : {}), + ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), + ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), + ...(args.scanMode ? { scanMode: args.scanMode } : {}), + ...(args.detectRelationships !== undefined ? { detectRelationships: args.detectRelationships } : {}), + ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), + ...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}), }; let hasFailure = false; try { for (const targetState of orderedTargets) { - if (detached) break; - targetState.status = 'running'; targetState.startedAt = nowFn(); paint(true); publishSourceProgress(true); let hasPendingProgressPublish = false; + const ingestPhaseKeyForTarget: PhaseKey = + targetState.target.operation === 'database-ingest' ? 'query-history' : 'source-ingest'; - const updateTargetProgress = (update: KtxIngestProgressUpdate) => { - targetState.detailLine = formatProgressDetail(update); + const updateNamedPhase = (key: PhaseKey, update: KtxIngestProgressUpdate): void => { + const phase = targetState.phases.find((p) => p.key === key); + if (phase) { + if (phase.status === 'queued') { + phase.status = 'running'; + phase.startedAt = nowFn(); + } + const sanitizedMessage = update.message.replace(/^\[\d+%\]\s*/, ''); + phase.detail = publicProgressMessage(sanitizedMessage, targetState.target); + phase.percent = Math.max(phase.percent, Math.max(0, Math.min(100, Math.round(update.percent)))); + phase.progressUpdatedAtMs = nowFn(); + } + targetState.detailLine = formatProgressDetail(update, targetState.target); targetState.progressUpdatedAtMs = nowFn(); + if (!repainter) { + io.stdout.write(`${targetState.detailLine}\n`); + } paint(true); hasPendingProgressPublish = !publishSourceProgress(false); }; + const updateSchemaPhase = (update: KtxIngestProgressUpdate): void => updateNamedPhase('database-schema', update); + const updateIngestPhase = (update: KtxIngestProgressUpdate): void => updateNamedPhase(ingestPhaseKeyForTarget, update); + const capture = createCaptureIo( (message) => { - targetState.detailLine = message; + targetState.detailLine = publicProgressMessage(message, targetState.target); targetState.progressUpdatedAtMs = nowFn(); + if (!repainter) { + io.stdout.write(`${targetState.detailLine}\n`); + } paint(true); hasPendingProgressPublish = !publishSourceProgress(false); }, false, ); + + const onPhaseStart = (key: PhaseKey): void => { + const phase = targetState.phases.find((p) => p.key === key); + if (!phase) return; + phase.status = 'running'; + if (phase.startedAt === null) phase.startedAt = nowFn(); + phase.progressUpdatedAtMs = nowFn(); + paint(true); + hasPendingProgressPublish = !publishSourceProgress(false); + }; + + const onPhaseEnd = (key: PhaseKey, status: 'done' | 'failed' | 'skipped', summary?: string): void => { + const phase = targetState.phases.find((p) => p.key === key); + if (!phase) return; + phase.status = status; + if (phase.startedAt !== null) { + phase.elapsedMs = nowFn() - phase.startedAt; + } + if (status === 'done') { + phase.percent = 100; + } + let resolvedSummary = summary; + if (status === 'done' && !resolvedSummary) { + const captured = capture.captured(); + if (key === 'database-schema') { + resolvedSummary = parseScanSummary(captured) ?? undefined; + } else if (key === 'query-history' || key === 'source-ingest') { + resolvedSummary = parseIngestSummary(captured) ?? undefined; + } + } + if (resolvedSummary) { + phase.summary = resolvedSummary; + } + paint(true); + hasPendingProgressPublish = !publishSourceProgress(false); + }; + const progressDeps: KtxPublicIngestDeps = { - scanProgress: createContextBuildProgressPort(updateTargetProgress), - ingestProgress: updateTargetProgress, + scanProgress: createContextBuildProgressPort(updateSchemaPhase), + ingestProgress: updateIngestPhase, + onPhaseStart, + onPhaseEnd, }; let result: KtxPublicIngestTargetResult | null = null; @@ -774,9 +1031,6 @@ export async function runContextBuild( try { result = await execTarget(targetState.target, runArgs, capture.io, progressDeps); } catch (error) { - if (exiting) { - throw error; - } thrownError = error; } @@ -794,13 +1048,14 @@ export async function runContextBuild( for (const artifactPath of metadata.artifactPaths) artifactPaths.add(artifactPath); if (!failed) { targetState.summaryText = - targetState.target.operation === 'scan' + targetState.target.operation === 'database-ingest' ? parseScanSummary(capturedOutput) : parseIngestSummary(capturedOutput); } else { targetState.failureText = failureTextForTarget({ target: targetState.target, projectDir: args.projectDir, + entrypoint: args.entrypoint, capturedOutput, error: thrownError, fallback: result ? failedStepDetail(result) : null, @@ -813,17 +1068,12 @@ export async function runContextBuild( } } finally { if (spinnerInterval) clearInterval(spinnerInterval); - cleanupKeystroke?.(); } if (state.startedAt !== null) { state.totalElapsedMs = nowFn() - state.startedAt; } - if (detached) { - return { exitCode: 0, detached: true }; - } - if (!repainter) { io.stdout.write(renderContextBuildView(state, { ...viewOpts, styled: false })); } else { @@ -832,7 +1082,6 @@ export async function runContextBuild( return { exitCode: hasFailure ? 1 : 0, - detached: false, ...(reportIds.size > 0 ? { reportIds: [...reportIds] } : {}), ...(artifactPaths.size > 0 ? { artifactPaths: [...artifactPaths] } : {}), }; diff --git a/packages/cli/src/dev.test.ts b/packages/cli/src/dev.test.ts index fe75d1af..e2c72012 100644 --- a/packages/cli/src/dev.test.ts +++ b/packages/cli/src/dev.test.ts @@ -52,14 +52,14 @@ describe('dev Commander tree', () => { expect(testIo.stderr()).toBe(''); }); - it('keeps dev callable while hiding it from root command rows', async () => { + it('lists dev in root command rows', async () => { const testIo = makeIo(); await expect(runKtxCli(['--help'], testIo.io)).resolves.toBe(0); - expect(testIo.stdout()).toContain('Advanced:'); - expect(testIo.stdout()).toContain('ktx dev'); - expect(testIo.stdout()).not.toContain('dev Low-level diagnostics'); + expect(testIo.stdout()).not.toContain('Advanced:'); + expect(testIo.stdout()).toContain('dev'); + expect(testIo.stdout()).toMatch(/Low-level project initialization and runtime\s+management/); expect(testIo.stderr()).toBe(''); }); @@ -129,21 +129,11 @@ describe('dev Commander tree', () => { argv: ['dev', 'runtime', '--help'], expected: ['Usage: ktx dev runtime', 'install', 'start', 'stop', 'status'], }, - { - argv: ['scan', '--help'], - expected: ['Usage: ktx scan [options] ', '--mode ', 'structural', 'relationships', '--dry-run'], - }, - { - argv: ['ingest', 'run', '--help'], - expected: ['Usage: ktx ingest run [options]', '--connection-id ', '--adapter '], - }, ])('prints generated nested help for $argv', async ({ argv, expected }) => { const io = makeIo(); const doctor = vi.fn(async () => 0); - const ingest = vi.fn(async () => 0); - const scan = vi.fn(async () => 0); - await expect(runKtxCli(argv, io.io, { doctor, ingest, scan })).resolves.toBe(0); + await expect(runKtxCli(argv, io.io, { doctor })).resolves.toBe(0); for (const text of expected) { expect(io.stdout()).toContain(text); @@ -154,109 +144,46 @@ describe('dev Commander tree', () => { } expect(io.stderr()).toBe(''); expect(doctor).not.toHaveBeenCalled(); - expect(ingest).not.toHaveBeenCalled(); - expect(scan).not.toHaveBeenCalled(); }); - it('dispatches top-level scan through Commander with injected dependencies', async () => { - const scanIo = makeIo(); - const scan = vi.fn(async () => 0); + it('rejects old adapter-backed ingest flags through public option parsing and keeps run out of ingest help', async () => { + const helpIo = makeIo(); + const runIo = makeIo(); + const publicIngest = vi.fn(async () => 0); + await expect(runKtxCli(['ingest', '--help'], helpIo.io, { publicIngest })).resolves.toBe(0); await expect( - runKtxCli(['scan', 'warehouse', '--project-dir', '/tmp/project', '--dry-run'], scanIo.io, { scan }), - ).resolves.toBe(0); + runKtxCli( + ['ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'metabase', '--project-dir', '/tmp/project'], + runIo.io, + { publicIngest }, + ), + ).resolves.toBe(1); - expect(scan).toHaveBeenCalledWith( - { - command: 'run', - projectDir: '/tmp/project', - connectionId: 'warehouse', - mode: 'structural', - detectRelationships: false, - dryRun: true, - databaseIntrospectionUrl: undefined, - cliVersion: '0.0.0-private', - runtimeInstallPolicy: 'prompt', - }, - scanIo.io, - ); - expect(scanIo.stderr()).toBe('Project: /tmp/project\n'); - }); - - it('dispatches top-level scan --mode relationships through Commander', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect( - runKtxCli(['scan', 'warehouse', '--project-dir', '/tmp/project', '--mode', 'relationships'], io.io, { - scan, - }), - ).resolves.toBe(0); - - expect(scan).toHaveBeenCalledWith( - { - command: 'run', - projectDir: '/tmp/project', - connectionId: 'warehouse', - mode: 'relationships', - detectRelationships: true, - dryRun: false, - databaseIntrospectionUrl: undefined, - cliVersion: '0.0.0-private', - runtimeInstallPolicy: 'prompt', - }, - io.io, - ); - expect(io.stderr()).toBe('Project: /tmp/project\n'); - }); - - it.each(['--enrich', '--detect-relationships'])('rejects removed scan shorthand option %s', async (option) => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect(runKtxCli(['scan', 'warehouse', option], io.io, { scan })).resolves.toBe(1); - - expect(scan).not.toHaveBeenCalled(); - expect(io.stderr()).toContain(`unknown option '${option}'`); - }); - - it('rejects scan without a connection id', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect(runKtxCli(['scan', '--dry-run'], io.io, { scan })).resolves.toBe(1); - - expect(scan).not.toHaveBeenCalled(); - expect(io.stderr()).toMatch(/missing required argument/i); - }); - - it('rejects invalid scan modes before dispatch', async () => { - const io = makeIo(); - const scan = vi.fn(async () => 0); - - await expect(runKtxCli(['scan', 'warehouse', '--mode', 'deep'], io.io, { scan })).resolves.toBe(1); - - expect(scan).not.toHaveBeenCalled(); - expect(io.stderr()).toContain("argument 'deep' is invalid"); - expect(io.stderr()).toContain('Allowed choices are structural, enriched, relationships'); + expect(helpIo.stdout()).not.toMatch(/^ run\s/m); + expect(runIo.stderr()).toMatch(/unknown option '--connection-id'|error:/); + expect(publicIngest).not.toHaveBeenCalled(); }); it.each([ - ['scan', 'report', 'scan-run-1'], - ['scan', 'relationships', 'scan-run-1'], - ])('rejects removed scan subcommand %s %s', async (command, subcommand, runId) => { + { argv: ['scan'] }, + { argv: ['scan', '--help'] }, + { argv: ['scan', 'warehouse'] }, + { argv: ['scan', 'warehouse', '--project-dir', '/tmp/project', '--dry-run'] }, + { argv: ['scan', 'warehouse', '--project-dir', '/tmp/project', '--mode', 'relationships'] }, + ])('rejects removed top-level scan command $argv', async ({ argv }) => { const io = makeIo(); - const scan = vi.fn(async () => 0); + const publicIngest = vi.fn(async () => 0); - await expect(runKtxCli([command, subcommand, runId], io.io, { scan })).resolves.toBe(1); + await expect(runKtxCli(argv, io.io, { publicIngest })).resolves.toBe(1); - expect(scan).not.toHaveBeenCalled(); - expect(io.stderr()).toMatch(/too many arguments|unknown command|error:/); + expect(publicIngest).not.toHaveBeenCalled(); + expect(io.stderr()).toMatch(/unknown command|error:/); }); - it('dispatches top-level ingest run through the low-level ingest Commander registration', async () => { + it('rejects old adapter-backed top-level ingest flags without low-level ingest registration', async () => { const io = makeIo(); - const ingest = vi.fn(async () => 0); + const publicIngest = vi.fn(async () => 0); await expect( runKtxCli( @@ -272,24 +199,11 @@ describe('dev Commander tree', () => { '--json', ], io.io, - { ingest }, + { publicIngest }, ), - ).resolves.toBe(0); + ).resolves.toBe(1); - expect(ingest).toHaveBeenCalledWith( - { - command: 'run', - projectDir: '/tmp/project', - connectionId: 'warehouse', - adapter: 'metabase', - sourceDir: undefined, - databaseIntrospectionUrl: undefined, - cliVersion: '0.0.0-private', - runtimeInstallPolicy: 'prompt', - outputMode: 'json', - }, - io.io, - ); - expect(io.stderr()).toBe(''); + expect(publicIngest).not.toHaveBeenCalled(); + expect(io.stderr()).toMatch(/unknown option '--connection-id'|error:/); }); }); diff --git a/packages/cli/src/dev.ts b/packages/cli/src/dev.ts index 9391cc43..37865c57 100644 --- a/packages/cli/src/dev.ts +++ b/packages/cli/src/dev.ts @@ -8,7 +8,7 @@ profileMark('module:dev'); export function registerDevCommands(program: Command, context: KtxCliCommandContext): void { const dev = program - .command('dev', { hidden: true }) + .command('dev') .description('Low-level project initialization and runtime management') .showHelpAfterError(); diff --git a/packages/cli/src/doctor.test.ts b/packages/cli/src/doctor.test.ts index f4cfb38b..b89e52d3 100644 --- a/packages/cli/src/doctor.test.ts +++ b/packages/cli/src/doctor.test.ts @@ -329,6 +329,68 @@ describe('runKtxDoctor', () => { delete process.env.OPENAI_API_KEY; }); + it('includes Postgres query-history readiness in project doctor output', async () => { + process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret + process.env.OPENAI_API_KEY = 'test-key'; // pragma: allowlist secret + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_DATABASE_URL', + ' context:', + ' queryHistory:', + ' enabled: true', + 'llm:', + ' provider:', + ' backend: anthropic', + 'ingest:', + ' adapters:', + ' - live-database', + ' - historic-sql', + ' embeddings:', + ' backend: openai', + ' model: text-embedding-3-small', + ' dimensions: 1536', + '', + ].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + let probeCalls = 0; + + await expect( + runKtxDoctor( + { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + { + postgresQueryHistoryProbe: async () => { + probeCalls += 1; + return { + pgServerVersion: 'PostgreSQL 16.4', + warnings: [], + info: [ + 'pg_stat_statements.max is 1000; set it to at least 5000 to reduce query-template eviction churn', + ], + }; + }, + }, + ), + ).resolves.toBe(0); + + const out = testIo.stdout(); + expect(probeCalls).toBe(1); + expect(out).toContain('Query history'); + expect(out).toContain('warehouse'); + expect(out).toContain('pg_stat_statements ready (PostgreSQL 16.4)'); + expect(out).toContain('info: pg_stat_statements.max is 1000'); + expect(out).not.toContain('Update the Postgres parameter group or config'); + delete process.env.ANTHROPIC_API_KEY; + delete process.env.OPENAI_API_KEY; + }); + it('returns blocked verdict when LLM is not configured', async () => { await writeFile( join(tempDir, 'ktx.yaml'), @@ -398,7 +460,13 @@ describe('runKtxDoctor', () => { runKtxDoctor( { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, testIo.io, - {}, + { + postgresQueryHistoryProbe: async () => ({ + pgServerVersion: 'PostgreSQL 16.4', + warnings: [], + info: [], + }), + }, ), ).resolves.toBe(0); diff --git a/packages/cli/src/doctor.ts b/packages/cli/src/doctor.ts index 9342f24e..c4928e5c 100644 --- a/packages/cli/src/doctor.ts +++ b/packages/cli/src/doctor.ts @@ -4,6 +4,7 @@ import { access } from 'node:fs/promises'; import { join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import { promisify } from 'node:util'; +import type { BuildProjectStatusOptions } from './status-project.js'; const execFileAsync = promisify(execFile); @@ -54,7 +55,7 @@ interface SetupDoctorDeps { importBetterSqlite3?: () => Promise; } -interface KtxDoctorDeps { +interface KtxDoctorDeps extends BuildProjectStatusOptions { runSetupChecks?: () => Promise; } @@ -462,7 +463,7 @@ export async function runKtxDoctor( const { loadKtxProject } = await import('@ktx/context/project'); const { buildProjectStatus, renderProjectStatus } = await import('./status-project.js'); const project = await loadKtxProject({ projectDir: args.projectDir }); - const projectStatus = buildProjectStatus(project); + const projectStatus = await buildProjectStatus(project, deps); const verbose = args.verbose ?? false; const toolchainChecks = verbose ? await runSetupChecks() : undefined; if (args.outputMode === 'json') { diff --git a/packages/cli/src/example-smoke.test.ts b/packages/cli/src/example-smoke.test.ts index f1670544..e59d7d7e 100644 --- a/packages/cli/src/example-smoke.test.ts +++ b/packages/cli/src/example-smoke.test.ts @@ -71,7 +71,6 @@ describe('standalone local warehouse example', () => { it('runs local CLI commands against the copied example project', async () => { const projectDir = await copyExampleProject(tempDir); - const sourceDir = join(projectDir, 'source'); const knowledgeList = await runBuiltCli(['wiki', 'search', 'revenue', '--json', '--project-dir', projectDir]); expect(knowledgeList).toMatchObject({ code: 0, stderr: '' }); @@ -105,19 +104,13 @@ describe('standalone local warehouse example', () => { const ingest = await runBuiltCli([ 'ingest', 'run', - '--project-dir', - projectDir, '--connection-id', 'warehouse', '--adapter', 'fake', - '--source-dir', - sourceDir, ]); expect(ingest).toMatchObject({ code: 1, stdout: '' }); - expect(ingest.stderr).toContain( - 'ktx ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', - ); + expect(ingest.stderr).toContain("unknown option '--connection-id'"); }, 30_000); }); diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index d1c2587e..35c425c0 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -124,9 +124,10 @@ describe('runKtxCli', () => { expect(testIo.stdout()).toContain('Usage: ktx [options] [command]'); expect(testIo.stdout()).toContain('KTX data agent context layer CLI'); - for (const command of ['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'scan']) { + for (const command of ['setup', 'connection', 'ingest', 'wiki', 'sl', 'status', 'dev']) { expect(testIo.stdout()).toContain(`${command}`); } + expect(testIo.stdout()).not.toMatch(/^ scan\s/m); for (const removed of ['demo', 'init', 'connect', 'ask', 'knowledge', 'agent', 'completion', 'serve']) { expect(testIo.stdout()).not.toMatch(new RegExp(`^\\s+${removed}(?:\\s|\\[|$)`, 'm')); } @@ -134,71 +135,60 @@ describe('runKtxCli', () => { expect(testIo.stdout()).toContain('KTX_PROJECT_DIR'); expect(testIo.stdout()).toContain('--debug'); expect(testIo.stdout()).not.toContain('--' + 'verbose'); - expect(testIo.stdout()).toContain('Advanced:'); - expect(testIo.stdout()).toContain('ktx dev'); + expect(testIo.stdout()).not.toContain('Advanced:'); expect(testIo.stderr()).toBe(''); }); - it('routes public wiki read and write commands', async () => { + it('routes supported public wiki commands', async () => { const knowledge = vi.fn(async () => 0); - const readIo = makeIo(); - await expect(runKtxCli(['--project-dir', tempDir, 'wiki', 'read', 'revenue', '--json'], readIo.io, { knowledge })) + const listIo = makeIo(); + await expect(runKtxCli(['--project-dir', tempDir, 'wiki', 'list', '--json'], listIo.io, { knowledge })) .resolves.toBe(0); expect(knowledge).toHaveBeenCalledWith( { - command: 'read', + command: 'list', projectDir: tempDir, - key: 'revenue', userId: 'local', json: true, }, - readIo.io, + listIo.io, ); - const writeIo = makeIo(); + const searchIo = makeIo(); await expect( - runKtxCli( - [ - '--project-dir', - tempDir, - 'wiki', - 'write', - 'revenue', - '--scope', - 'user', - '--summary', - 'Revenue', - '--content', - 'Revenue.', - '--tag', - 'finance', - '--ref', - 'https://example.com/revenue', - '--sl-ref', - 'orders', - ], - writeIo.io, - { knowledge }, - ), + runKtxCli(['--project-dir', tempDir, 'wiki', 'search', 'revenue', '--limit', '5'], searchIo.io, { knowledge }), ).resolves.toBe(0); expect(knowledge).toHaveBeenLastCalledWith( { - command: 'write', + command: 'search', projectDir: tempDir, - key: 'revenue', - scope: 'USER', + query: 'revenue', userId: 'local', - summary: 'Revenue', - content: 'Revenue.', - tags: ['finance'], - refs: ['https://example.com/revenue'], - slRefs: ['orders'], + json: false, + limit: 5, }, - writeIo.io, + searchIo.io, ); }); + it('rejects removed public wiki read and write commands', async () => { + const knowledge = vi.fn(async () => 0); + + for (const argv of [ + ['--project-dir', tempDir, 'wiki', 'read', 'revenue', '--json'], + ['--project-dir', tempDir, 'wiki', 'write', 'revenue', '--summary', 'Revenue', '--content', 'Revenue.'], + ]) { + const io = makeIo(); + + await expect(runKtxCli(argv, io.io, { knowledge })).resolves.toBe(1); + + expect(io.stderr()).toMatch(/unknown command|error:/); + } + + expect(knowledge).not.toHaveBeenCalled(); + }); + it('rejects removed public sl read/write commands', async () => { const sl = vi.fn(async () => 0); @@ -349,23 +339,15 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toBe(''); }); - it('skips the project directory line for JSON and TUI output modes', async () => { - const ingest = vi.fn(async () => 0); + it('skips the project directory line for JSON output mode', async () => { + const publicIngest = vi.fn(async () => 0); const jsonIo = makeIo(); - const vizIo = makeIo({ stdoutIsTty: true }); - await expect(runKtxCli(['--project-dir', tempDir, 'ingest', 'status', 'run-1', '--json'], jsonIo.io, { ingest })) - .resolves.toBe(0); await expect( - runKtxCli( - ['--project-dir', tempDir, 'ingest', 'status', 'run-1', '--viz'], - vizIo.io, - { ingest }, - ), + runKtxCli(['--project-dir', tempDir, 'ingest', 'warehouse', '--json'], jsonIo.io, { publicIngest }), ).resolves.toBe(0); expect(jsonIo.stderr()).toBe(''); - expect(vizIo.stderr()).toBe(''); }); it('documents runtime stop all in command help', async () => { @@ -476,6 +458,12 @@ describe('runKtxCli', () => { '--new-database-connection-id', '--enable-historic-sql', '--historic-sql-min-executions', + '--enable-query-history', + '--disable-query-history', + '--query-history-window-days', + '--query-history-min-executions', + '--query-history-service-account-pattern', + '--query-history-redaction-pattern', '--skip-databases', '--source ', '--source-connection-id', @@ -492,6 +480,8 @@ describe('runKtxCli', () => { expect(stdout).not.toContain(hiddenFlag); } expect(stdout).not.toMatch(/^ --project\s/m); + expect(stdout).not.toContain('primary ' + 'source'); + expect(stdout).not.toContain('primary ' + 'sources'); expect(testIo.stderr()).toBe(''); }); @@ -661,73 +651,104 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toMatch(/unknown command|error:/); }); - it('rejects removed public ingest shorthand', async () => { + it('routes public connection-centric ingest shorthand', async () => { const testIo = makeIo(); - const ingest = vi.fn().mockResolvedValue(0); + const publicIngest = vi.fn().mockResolvedValue(0); - await expect(runKtxCli(['--project-dir', '/tmp/project', 'ingest', 'warehouse'], testIo.io, { ingest })) - .resolves.toBe(1); + await expect( + runKtxCli(['--project-dir', '/tmp/project', 'ingest', 'warehouse', '--fast', '--no-input'], testIo.io, { + publicIngest, + }), + ).resolves.toBe(0); - expect(ingest).not.toHaveBeenCalled(); - expect(testIo.stderr()).toMatch(/unknown command|error:/); + expect(publicIngest).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + depth: 'fast', + queryHistory: 'default', + cliVersion: '0.0.0-private', + runtimeInstallPolicy: 'never', + }, + testIo.io, + ); + expect(testIo.stderr()).toBe('Project: /tmp/project\n'); }); - it('prints ingest watch help from Commander', async () => { + it('routes public ingest --all --deep with JSON output', async () => { const testIo = makeIo(); - const ingest = vi.fn(async () => 0); + const publicIngest = vi.fn().mockResolvedValue(0); - await expect(runKtxCli(['ingest', 'watch', '--help'], testIo.io, { ingest })).resolves.toBe(0); + await expect( + runKtxCli(['--project-dir', '/tmp/project', 'ingest', '--all', '--deep', '--json'], testIo.io, { + publicIngest, + }), + ).resolves.toBe(0); - expect(testIo.stdout()).toContain('Usage: ktx ingest watch [options] [runId]'); - expect(testIo.stdout()).toContain('[runId]'); - expect(testIo.stdout()).toContain('--project-dir '); - expect(testIo.stdout()).toContain('--json'); - expect(testIo.stdout()).toContain('--no-input'); + expect(publicIngest).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + all: true, + json: true, + inputMode: 'auto', + depth: 'deep', + queryHistory: 'default', + cliVersion: '0.0.0-private', + runtimeInstallPolicy: 'prompt', + }, + testIo.io, + ); expect(testIo.stderr()).toBe(''); - expect(ingest).not.toHaveBeenCalled(); }); - it('dispatches ingest status and watch through Commander', async () => { - const statusIo = makeIo(); - const watchIo = makeIo(); - const ingest = vi.fn(async () => 0); + it('rejects mutually exclusive public ingest depth flags before dispatch', async () => { + const testIo = makeIo(); + const publicIngest = vi.fn().mockResolvedValue(0); await expect( - runKtxCli(['--project-dir', tempDir, 'ingest', 'status', 'run-1', '--json', '--no-input'], statusIo.io, { - ingest, + runKtxCli(['--project-dir', '/tmp/project', 'ingest', 'warehouse', '--fast', '--deep'], testIo.io, { + publicIngest, }), - ).resolves.toBe(0); - await expect( - runKtxCli(['--project-dir', tempDir, 'ingest', 'watch', '--no-input'], watchIo.io, { - ingest, - }), - ).resolves.toBe(0); + ).resolves.toBe(1); - expect(ingest).toHaveBeenNthCalledWith( - 1, - { - command: 'status', - projectDir: tempDir, - runId: 'run-1', - outputMode: 'json', - inputMode: 'disabled', - }, - statusIo.io, - ); - expect(ingest).toHaveBeenNthCalledWith( - 2, - { - command: 'watch', - projectDir: tempDir, - outputMode: 'viz', - inputMode: 'disabled', - }, - watchIo.io, - ); - expect(statusIo.stderr()).toBe(''); - expect(watchIo.stderr()).toBe(''); + expect(publicIngest).not.toHaveBeenCalled(); + expect(testIo.stderr()).toMatch(/option '--(deep|fast)' cannot be used with option '--(fast|deep)'/); }); + it.each(['run', 'status', 'watch', 'replay'])( + 'routes former ingest subcommand name "%s" as a connection id', + async (connectionId) => { + const testIo = makeIo(); + const publicIngest = vi.fn(async () => 0); + + await expect( + runKtxCli(['--project-dir', '/tmp/project', 'ingest', connectionId, '--no-input'], testIo.io, { + publicIngest, + }), + ).resolves.toBe(0); + + expect(publicIngest).toHaveBeenCalledWith( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: connectionId, + all: false, + json: false, + inputMode: 'disabled', + queryHistory: 'default', + cliVersion: '0.0.0-private', + runtimeInstallPolicy: 'never', + }, + testIo.io, + ); + }, + ); + it('rejects standalone demo commands', async () => { const testIo = makeIo(); @@ -778,21 +799,26 @@ describe('runKtxCli', () => { it('prints ingest help without invoking ingest execution', async () => { const testIo = makeIo(); - const ingest = vi.fn(); + const publicIngest = vi.fn(); - await expect(runKtxCli(['ingest', '--help'], testIo.io, { ingest })).resolves.toBe(0); + await expect(runKtxCli(['ingest', '--help'], testIo.io, { publicIngest })).resolves.toBe(0); - expect(testIo.stdout()).toContain('Usage: ktx ingest [options] [command]'); - expect(testIo.stdout()).toContain('Run or inspect local ingest memory-flow output'); - expect(testIo.stdout()).toContain('run'); + expect(testIo.stdout()).toContain('Usage: ktx ingest'); + expect(testIo.stdout()).toContain('Build or inspect KTX context'); + expect(testIo.stdout()).toContain('--all'); + expect(testIo.stdout()).toContain('--fast'); + expect(testIo.stdout()).toContain('--deep'); + expect(testIo.stdout()).toContain('--query-history'); + expect(testIo.stdout()).toContain('--no-query-history'); + expect(testIo.stdout()).toContain('--query-history-window-days '); expect(testIo.stdout()).toContain('text'); - expect(testIo.stdout()).toContain('status'); - expect(testIo.stdout()).toContain('watch'); - expect(testIo.stdout()).toContain('replay'); + expect(testIo.stdout()).not.toMatch(/^ status\s/m); + expect(testIo.stdout()).not.toMatch(/^ replay\s/m); + expect(testIo.stdout()).not.toMatch(/^ run\s/m); + expect(testIo.stdout()).not.toMatch(/^ watch\s/m); expect(testIo.stdout()).not.toContain('--manifest'); - expect(testIo.stdout()).not.toContain('--all'); expect(testIo.stderr()).toBe(''); - expect(ingest).not.toHaveBeenCalled(); + expect(publicIngest).not.toHaveBeenCalled(); }); it('routes text memory ingest through Commander without exposing chat ids', async () => { @@ -852,32 +878,30 @@ describe('runKtxCli', () => { expect(textIngest).not.toHaveBeenCalled(); }); - it('routes ingest run at the top level and rejects removed dev ingest', async () => { - const runIo = makeIo(); + it('rejects old adapter-backed ingest flags at the top level and under dev', async () => { + const rootRunIo = makeIo(); const devRunIo = makeIo(); - const ingest = vi.fn(async () => 0); + const publicIngest = vi.fn(async () => 0); await expect( - runKtxCli(['ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'metabase'], runIo.io, { ingest }), - ).resolves.toBe(0); - await expect( - runKtxCli(['dev', 'ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'metabase'], devRunIo.io, { - ingest, + runKtxCli(['ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'metabase'], rootRunIo.io, { + publicIngest, }), ).resolves.toBe(1); - expect(ingest).toHaveBeenCalledWith( - expect.objectContaining({ command: 'run', connectionId: 'warehouse', adapter: 'metabase' }), - expect.anything(), - ); + await expect( + runKtxCli(['dev', 'ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'metabase'], devRunIo.io, { + publicIngest, + }), + ).resolves.toBe(1); + expect(publicIngest).not.toHaveBeenCalled(); + expect(rootRunIo.stderr()).toMatch(/unknown option '--connection-id'|error:/); expect(devRunIo.stderr()).toMatch(/unknown command|error:/); }); - it('rejects removed dev doctor while keeping ingest parser cases at the root', async () => { + it('rejects removed dev doctor and removed ingest parser cases', async () => { const doctor = vi.fn(async () => 0); - const ingest = vi.fn(async () => 0); const doctorIo = makeIo(); const ingestRunIo = makeIo(); - const ingestReplayHelpIo = makeIo(); await expect(runKtxCli(['dev', 'doctor', 'setup', '--json', '--no-input'], doctorIo.io, { doctor })).resolves.toBe(1); await expect( @@ -899,94 +923,13 @@ describe('runKtxCli', () => { '--no-input', ], ingestRunIo.io, - { ingest }, + {}, ), - ).resolves.toBe(0); - await expect(runKtxCli(['ingest', 'replay', '--help'], ingestReplayHelpIo.io, { ingest })).resolves.toBe(0); + ).resolves.toBe(1); expect(doctor).not.toHaveBeenCalled(); - expect(ingest).toHaveBeenCalledWith( - { - command: 'run', - projectDir: tempDir, - connectionId: 'warehouse', - adapter: 'fake', - sourceDir: tempDir, - databaseIntrospectionUrl: undefined, - cliVersion: '0.0.0-private', - runtimeInstallPolicy: 'prompt', - debugLlmRequestFile: `${tempDir}/debug.jsonl`, - outputMode: 'json', - inputMode: 'disabled', - }, - ingestRunIo.io, - ); - expect(ingestReplayHelpIo.stdout()).toContain('Usage: ktx ingest replay [options] '); - expect(ingestReplayHelpIo.stdout()).toContain(''); expect(doctorIo.stderr()).toMatch(/unknown command|error:/); - expect(ingestRunIo.stderr()).toBe(''); - expect(ingestReplayHelpIo.stderr()).toBe(''); - }); - - it('routes ingest managed runtime install policy separately from visualization input mode', async () => { - const autoIo = makeIo(); - const nonInteractiveIo = makeIo(); - const ingest = vi.fn(async () => 0); - - await expect( - runKtxCli( - [ - 'ingest', - 'run', - '--project-dir', - tempDir, - '--connection-id', - 'warehouse', - '--adapter', - 'looker', - '--yes', - ], - autoIo.io, - { ingest }, - ), - ).resolves.toBe(0); - await expect( - runKtxCli( - [ - 'ingest', - 'run', - '--project-dir', - tempDir, - '--connection-id', - 'warehouse', - '--adapter', - 'looker', - '--yes', - '--no-input', - ], - nonInteractiveIo.io, - { ingest }, - ), - ).resolves.toBe(0); - - expect(ingest).toHaveBeenCalledWith( - expect.objectContaining({ - command: 'run', - cliVersion: '0.0.0-private', - runtimeInstallPolicy: 'auto', - }), - autoIo.io, - ); - expect(ingest).toHaveBeenCalledWith( - expect.objectContaining({ - command: 'run', - cliVersion: '0.0.0-private', - runtimeInstallPolicy: 'auto', - inputMode: 'disabled', - }), - nonInteractiveIo.io, - ); - expect(nonInteractiveIo.stderr()).toBe(`Project: ${tempDir}\n`); + expect(ingestRunIo.stderr()).toMatch(/unknown option '--connection-id'|error:/); }); it('dispatches public connection through the existing connection implementation', async () => { @@ -1208,10 +1151,10 @@ describe('runKtxCli', () => { 'env:DATABASE_URL', '--database-schema', 'public', - '--enable-historic-sql', - '--historic-sql-window-days', + '--enable-query-history', + '--query-history-window-days', '30', - '--historic-sql-min-executions', + '--query-history-min-executions', '12', ], setupIo.io, @@ -1232,15 +1175,32 @@ describe('runKtxCli', () => { databaseConnectionId: 'warehouse', databaseUrl: 'env:DATABASE_URL', databaseSchemas: ['public'], - enableHistoricSql: true, - historicSqlWindowDays: 30, - historicSqlMinExecutions: 12, + enableQueryHistory: true, + queryHistoryWindowDays: 30, + queryHistoryMinExecutions: 12, skipDatabases: false, }), setupIo.io, ); }); + it('dispatches setup database connection ids that match former ingest subcommand names', async () => { + const testIo = makeIo(); + const setup = vi.fn(async () => 0); + + await expect( + runKtxCli(['setup', '--new-database-connection-id', 'status', '--no-input'], testIo.io, { setup }), + ).resolves.toBe(0); + + expect(setup).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + databaseConnectionId: 'status', + }), + testIo.io, + ); + }); + it('dispatches setup source flags', async () => { const setup = vi.fn(async () => 0); const testIo = makeIo(); @@ -1399,18 +1359,20 @@ describe('runKtxCli', () => { expect(setupIo.stderr()).toContain('Choose only one embedding credential source'); }); - it('rejects conflicting Historic SQL setup flags', async () => { + it('rejects conflicting query-history setup flags', async () => { const setup = vi.fn(async () => 0); const setupIo = makeIo(); await expect( - runKtxCli(['--project-dir', tempDir, 'setup', '--enable-historic-sql', '--disable-historic-sql'], setupIo.io, { + runKtxCli(['--project-dir', tempDir, 'setup', '--enable-query-history', '--disable-query-history'], setupIo.io, { setup, }), ).resolves.toBe(1); expect(setup).not.toHaveBeenCalled(); - expect(setupIo.stderr()).toContain('Choose only one Historic SQL action'); + expect(setupIo.stderr()).toContain( + 'Choose only one query-history action: --enable-query-history or --disable-query-history.', + ); }); it('rejects the removed hidden agent command', async () => { @@ -1601,63 +1563,20 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toContain('[debug] dispatch=connection'); }); - it('routes scan through the top-level command with top-level project-dir', async () => { + it.each([ + { argv: ['scan'] }, + { argv: ['scan', '--help'] }, + { argv: ['scan', 'warehouse'] }, + { argv: ['scan', 'warehouse', '--project-dir', '/tmp/project'] }, + { argv: ['scan', 'warehouse', '--mode', 'relationships'] }, + ])('rejects removed top-level scan command $argv', async ({ argv }) => { const testIo = makeIo(); - const scan = vi.fn().mockResolvedValue(0); + const publicIngest = vi.fn().mockResolvedValue(0); - await expect(runKtxCli(['--project-dir', tempDir, 'scan', 'warehouse'], testIo.io, { scan })).resolves.toBe( - 0, - ); + await expect(runKtxCli(argv, testIo.io, { publicIngest })).resolves.toBe(1); - expect(scan).toHaveBeenCalledWith( - { - command: 'run', - projectDir: tempDir, - connectionId: 'warehouse', - mode: 'structural', - detectRelationships: false, - dryRun: false, - databaseIntrospectionUrl: undefined, - cliVersion: '0.0.0-private', - runtimeInstallPolicy: 'prompt', - }, - testIo.io, - ); - }); - - it('routes scan managed runtime install policies', async () => { - const autoIo = makeIo(); - const neverIo = makeIo(); - const conflictIo = makeIo(); - const scan = vi.fn().mockResolvedValue(0); - - await expect(runKtxCli(['--project-dir', tempDir, 'scan', 'warehouse', '--yes'], autoIo.io, { scan })) - .resolves.toBe(0); - await expect(runKtxCli(['--project-dir', tempDir, 'scan', 'warehouse', '--no-input'], neverIo.io, { scan })) - .resolves.toBe(0); - await expect( - runKtxCli(['--project-dir', tempDir, 'scan', 'warehouse', '--yes', '--no-input'], conflictIo.io, { - scan, - }), - ).resolves.toBe(1); - - expect(scan).toHaveBeenNthCalledWith( - 1, - expect.objectContaining({ - command: 'run', - runtimeInstallPolicy: 'auto', - }), - autoIo.io, - ); - expect(scan).toHaveBeenNthCalledWith( - 2, - expect.objectContaining({ - command: 'run', - runtimeInstallPolicy: 'never', - }), - neverIo.io, - ); - expect(conflictIo.stderr()).toContain('Choose only one runtime install mode: --yes or --no-input'); + expect(testIo.stderr()).toMatch(/unknown command|error:/); + expect(publicIngest).not.toHaveBeenCalled(); }); it('rejects removed public serve command options before dispatch', async () => { @@ -1705,27 +1624,17 @@ describe('runKtxCli', () => { it('rejects removed dev command groups without invoking execution', async () => { for (const command of ['scan', 'ingest', 'mapping']) { const testIo = makeIo(); - const scan = vi.fn().mockResolvedValue(0); + const publicIngest = vi.fn().mockResolvedValue(0); const sl = vi.fn().mockResolvedValue(0); - await expect(runKtxCli(['dev', command], testIo.io, { scan, sl })).resolves.toBe(1); + await expect(runKtxCli(['dev', command], testIo.io, { publicIngest, sl })).resolves.toBe(1); expect(testIo.stderr()).toMatch(/unknown command|error:/); - expect(scan).not.toHaveBeenCalled(); + expect(publicIngest).not.toHaveBeenCalled(); expect(sl).not.toHaveBeenCalled(); } }); - it('rejects removed scan subcommands without invoking scan execution', async () => { - const testIo = makeIo(); - const scan = vi.fn().mockResolvedValue(0); - - await expect(runKtxCli(['scan', 'report'], testIo.io, { scan })).resolves.toBe(1); - - expect(testIo.stderr()).toMatch(/too many arguments|unknown command|error:/); - expect(scan).not.toHaveBeenCalled(); - }); - it('rejects removed reserved dev subcommands', async () => { const testIo = makeIo(); @@ -1734,19 +1643,16 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toMatch(/unknown command|error:/); }); - it('rejects mutually exclusive output modes before invoking runners', async () => { - const ingest = vi.fn(async () => 0); + it('rejects mutually exclusive public ingest output modes before invoking runners', async () => { + const publicIngest = vi.fn(async () => 0); - for (const argv of [ - ['ingest', 'run', '--connection-id', 'warehouse', '--adapter', 'fake', '--json', '--plain'], - ['ingest', 'status', 'run-1', '--json', '--viz'], - ]) { - const testIo = makeIo(); - await expect(runKtxCli(argv, testIo.io, { ingest })).resolves.toBe(1); - expect(testIo.stderr()).toMatch(/conflict|cannot be used/i); - } + const testIo = makeIo(); + await expect(runKtxCli(['ingest', 'warehouse', '--json', '--plain'], testIo.io, { publicIngest })).resolves.toBe( + 1, + ); - expect(ingest).not.toHaveBeenCalled(); + expect(testIo.stderr()).toMatch(/conflict|cannot be used/i); + expect(publicIngest).not.toHaveBeenCalled(); }); it('does not expose root init after setup owns project creation', async () => { diff --git a/packages/cli/src/ingest-depth.ts b/packages/cli/src/ingest-depth.ts new file mode 100644 index 00000000..f5706d8d --- /dev/null +++ b/packages/cli/src/ingest-depth.ts @@ -0,0 +1,77 @@ +import type { KtxProjectConfig, KtxProjectConnectionConfig } from '@ktx/context/project'; + +export type KtxDatabaseContextDepth = 'fast' | 'deep'; + +const KTX_DATABASE_DRIVER_IDS = new Set([ + 'sqlite', + 'postgres', + 'postgresql', + 'mysql', + 'clickhouse', + 'sqlserver', + 'bigquery', + 'snowflake', +]); + +export function normalizeConnectionDriver(connection: KtxProjectConnectionConfig): string { + return String(connection.driver ?? '') + .trim() + .toLowerCase(); +} + +export function isDatabaseDriver(driver: string): boolean { + return KTX_DATABASE_DRIVER_IDS.has(driver.trim().toLowerCase()); +} + +function connectionContextRecord(connection: KtxProjectConnectionConfig): Record { + const context = connection.context; + return typeof context === 'object' && context !== null && !Array.isArray(context) + ? (context as Record) + : {}; +} + +export function databaseContextDepth(connection: KtxProjectConnectionConfig): KtxDatabaseContextDepth | undefined { + const depth = connectionContextRecord(connection).depth; + return depth === 'fast' || depth === 'deep' ? depth : undefined; +} + +export function withDatabaseContextDepth( + connection: KtxProjectConnectionConfig, + depth: KtxDatabaseContextDepth, +): KtxProjectConnectionConfig { + return { + ...connection, + context: { + ...connectionContextRecord(connection), + depth, + }, + }; +} + +export function deepReadinessGaps(config: KtxProjectConfig): string[] { + const gaps: string[] = []; + if (config.llm.provider.backend === 'none' || !config.llm.models.default) { + gaps.push('model configuration'); + } + + if (config.scan.enrichment.mode !== 'llm') { + gaps.push('scan enrichment mode'); + } + + const embeddings = config.scan.enrichment.embeddings; + if ( + !embeddings || + embeddings.backend === 'none' || + embeddings.backend === 'deterministic' || + !embeddings.model || + embeddings.dimensions <= 0 + ) { + gaps.push('scan embeddings'); + } + + return gaps; +} + +export function recommendedDatabaseContextDepth(config: KtxProjectConfig): KtxDatabaseContextDepth { + return deepReadinessGaps(config).length === 0 ? 'deep' : 'fast'; +} diff --git a/packages/cli/src/ingest-viz.test.ts b/packages/cli/src/ingest-viz.test.ts index a37c1ed8..7d790ab7 100644 --- a/packages/cli/src/ingest-viz.test.ts +++ b/packages/cli/src/ingest-viz.test.ts @@ -514,6 +514,18 @@ describe('runKtxIngest viz and replay', () => { expect(io.stderr()).toContain('Local ingest run or report "missing-run" was not found'); }); + it('suggests public ingest when status has no stored reports', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const io = makeIo(); + + await expect(runKtxIngest({ command: 'status', projectDir, outputMode: 'plain' }, io.io)).resolves.toBe(1); + + expect(io.stderr()).toContain('No local ingest reports were found. Run `ktx ingest ` first.'); + expect(io.stderr()).not.toContain('ktx ingest run --connection-id'); + expect(io.stderr()).not.toContain('--adapter'); + }); + it('uses the latest local ingest report when status has no run id', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index cf7015b9..5384ef78 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -103,6 +103,70 @@ describe('runKtxIngest', () => { expect(statusIo.stderr()).toBe(''); }); + it('labels internal database reports without adapter names in plain status output', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const report = localFakeBundleReport('scan-job-1', { + id: 'report-scan-1', + runId: 'run-scan-1', + connectionId: 'warehouse', + sourceKey: 'live-database', + }); + const io = makeIo(); + + await expect( + runKtxIngest( + { + command: 'status', + projectDir, + reportFile: '/tmp/scan-report.json', + outputMode: 'plain', + }, + io.io, + { + readReportFile: vi.fn(async () => report), + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Source: Database schema\n'); + expect(io.stdout()).not.toContain('Adapter:'); + expect(io.stdout()).not.toContain('live-database'); + expect(io.stderr()).toBe(''); + }); + + it('labels internal query-history reports without adapter names in plain status output', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const report = localFakeBundleReport('query-history-job-1', { + id: 'report-query-history-1', + runId: 'run-query-history-1', + connectionId: 'warehouse', + sourceKey: 'historic-sql', + }); + const io = makeIo(); + + await expect( + runKtxIngest( + { + command: 'status', + projectDir, + reportFile: '/tmp/query-history-report.json', + outputMode: 'plain', + }, + io.io, + { + readReportFile: vi.fn(async () => report), + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Source: Query history\n'); + expect(io.stdout()).not.toContain('Adapter:'); + expect(io.stdout()).not.toContain('historic-sql'); + expect(io.stderr()).toBe(''); + }); + it('emits structured progress for non-TTY local ingest runs', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); @@ -138,9 +202,9 @@ describe('runKtxIngest', () => { expect.arrayContaining([ { percent: 5, message: 'Fetching source files for warehouse/fake' }, { percent: 15, message: 'Fetched 2 source files from fake' }, - { percent: 45, message: 'Planned 2 work units' }, + { percent: 45, message: 'Planned 2 tasks' }, expect.objectContaining({ - message: 'Processing work units: 0/2 complete, 1 active; latest orders step 2/4', + message: 'Processing tasks: 0/2 complete, 1 active; latest orders step 2/4', transient: true, }), ]), @@ -179,10 +243,10 @@ describe('runKtxIngest', () => { expect(progressEvents).toEqual( expect.arrayContaining([ - { percent: 80, message: 'No work units to process; finalizing ingest' }, + { percent: 80, message: 'No tasks to process; finalizing ingest' }, ]), ); - expect(progressEvents).not.toContainEqual({ percent: 45, message: 'Planned 0 work units' }); + expect(progressEvents).not.toContainEqual({ percent: 45, message: 'Planned 0 tasks' }); }); it('prints provider setup guidance when a skip-llm setup project runs ingest', async () => { @@ -206,7 +270,7 @@ describe('runKtxIngest', () => { databaseConnectionId: 'warehouse', databaseUrl: 'env:WAREHOUSE_URL', databaseSchemas: [], - enableHistoricSql: true, + enableQueryHistory: true, skipDatabases: false, skipSources: true, }, @@ -238,6 +302,7 @@ describe('runKtxIngest', () => { connectionId: 'warehouse', adapter: 'historic-sql', sourceDir, + allowImplicitAdapter: true, outputMode: 'plain', }, runIo.io, @@ -246,7 +311,7 @@ describe('runKtxIngest', () => { expect(runIo.stdout()).toBe(''); expect(runIo.stderr()).toContain( - 'ktx ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + 'ktx ingest requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', ); expect(runIo.stderr()).toContain( `ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, @@ -375,7 +440,7 @@ describe('runKtxIngest', () => { ).resolves.toBe(1); expect(io.stdout()).toContain('Metabase fan-out: partial_failure'); - expect(io.stdout()).toContain('Failed work units: 1'); + expect(io.stdout()).toContain('Failed tasks: 1'); expect(io.stdout()).toContain('status=error'); expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); }); @@ -653,7 +718,7 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); expect(statusIo.stdout()).toContain('Job: metabase-child-1'); - expect(statusIo.stdout()).toContain('Adapter: metabase'); + expect(statusIo.stdout()).toContain('Source: Metabase'); expect(statusIo.stdout()).toContain('Connection: warehouse_a'); expect(statusIo.stderr()).toBe(''); }); @@ -789,7 +854,7 @@ describe('runKtxIngest', () => { ).resolves.toBe(1); expect(io.stderr()).toContain('source-dir uploads are not supported for the Metabase fan-out adapter'); - expect(io.stderr()).not.toContain('ktx ingest run requires llm.provider.backend'); + expect(io.stderr()).not.toContain('ktx ingest requires llm.provider.backend'); expect(io.stdout()).toBe(''); }); @@ -878,7 +943,7 @@ describe('runKtxIngest', () => { ).resolves.toBe(0); expect(io.stderr()).toBe(''); - expect(io.stdout()).toContain('Adapter: historic-sql\n'); + expect(io.stdout()).toContain('Source: Query history\n'); expect(io.stdout()).toContain('Saved memory: 35 wiki, 57 SL\n'); }); @@ -1242,8 +1307,8 @@ describe('runKtxIngest', () => { const stderr = io.stderr(); expect(stderr).toContain('[5%] Fetching source files for warehouse/historic-sql'); expect(stderr).toContain('[15%] Fetched 3 source files from historic-sql'); - expect(stderr).toContain('[45%] Planned 1 work unit'); - expect(stderr).toContain('[80%] Processed 1/1 work units'); + expect(stderr).toContain('[45%] Planned 1 task'); + expect(stderr).toContain('[80%] Processed 1/1 tasks'); expect(stderr).toContain('[100%] Ingest completed'); expect(stdout).toContain('Report: report-live-1'); expect(stdout).not.toContain('[5%]'); @@ -1366,12 +1431,12 @@ describe('runKtxIngest', () => { ).resolves.toBe(0); const stderr = io.stderr(); - expect(stderr).toContain('[45%] Planned 2 work units'); - expect(stderr).toContain('[55%] Processing 1/2 work units: historic-sql-table-public-orders'); + expect(stderr).toContain('[45%] Planned 2 tasks'); + expect(stderr).toContain('[55%] Processing 1/2 tasks: historic-sql-table-public-orders'); expect(stderr).toContain( - '\r[58%] Processing work units: 0/2 complete, 1 active; latest historic-sql-table-public-orders step 7/40\u001b[K', + '\r[58%] Processing tasks: 0/2 complete, 1 active; latest historic-sql-table-public-orders step 7/40\u001b[K', ); - expect(stderr).toContain('[68%] Processed 1/2 work units'); + expect(stderr).toContain('[68%] Processed 1/2 tasks'); }); it('renders concurrent WorkUnit step progress as transient aggregate status', async () => { @@ -1459,10 +1524,10 @@ describe('runKtxIngest', () => { const stderr = io.stderr(); expect(stderr).toContain( - '\r[56%] Processing work units: 0/6 complete, 6 active; latest historic-sql-table-public-suppliers step 1/40\u001b[K', + '\r[56%] Processing tasks: 0/6 complete, 6 active; latest historic-sql-table-public-suppliers step 1/40\u001b[K', ); expect(stderr).not.toContain( - '\n[56%] Processing 6/6 work units: historic-sql-table-public-suppliers step 1/40\n', + '\n[56%] Processing 6/6 tasks: historic-sql-table-public-suppliers step 1/40\n', ); expect(stderr).toContain('\n[100%] Ingest completed\n'); }); @@ -1593,7 +1658,7 @@ describe('runKtxIngest', () => { expect(io.stderr()).toBe(''); expect(io.stdout()).toContain('Job: cli-looker-job'); - expect(io.stdout()).toContain('Adapter: looker'); + expect(io.stdout()).toContain('Source: Looker'); expect(io.stdout()).toContain('Connection: prod-looker'); expect(io.stdout()).toContain('Status: done'); expect(io.stdout()).toContain('Saved memory: 0 wiki, 1 SL'); @@ -1616,7 +1681,7 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); expect(statusIo.stdout()).toContain('Job: cli-looker-job'); - expect(statusIo.stdout()).toContain('Adapter: looker'); + expect(statusIo.stdout()).toContain('Source: Looker'); expect(statusIo.stderr()).toBe(''); }); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index c1096b2b..c508c5cf 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -35,7 +35,7 @@ import { profileMark } from './startup-profile.js'; profileMark('module:ingest'); -export type KtxIngestOutputMode = 'plain' | 'json' | 'viz'; +type KtxIngestOutputMode = 'plain' | 'json' | 'viz'; type KtxIngestInputMode = 'auto' | 'disabled'; export type KtxIngestArgs = @@ -49,6 +49,8 @@ export type KtxIngestArgs = cliVersion?: string; runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; debugLlmRequestFile?: string; + allowImplicitAdapter?: boolean; + historicSqlPullConfigOverride?: Record; outputMode: KtxIngestOutputMode; inputMode?: KtxIngestInputMode; } @@ -101,19 +103,42 @@ function reportStatus(report: IngestReportSnapshot): 'done' | 'error' { return report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; } +const REPORT_SOURCE_LABELS = new Map([ + ['live-database', 'Database schema'], + ['historic-sql', 'Query history'], + ['dbt', 'dbt'], + ['metricflow', 'MetricFlow'], + ['lookml', 'LookML'], + ['looker', 'Looker'], + ['metabase', 'Metabase'], + ['notion', 'Notion'], +]); + +function reportSourceLabel(sourceKey: string): string { + const label = REPORT_SOURCE_LABELS.get(sourceKey); + if (label) { + return label; + } + return sourceKey + .split(/[-_]+/) + .filter((part) => part.length > 0) + .map((part) => `${part[0]?.toUpperCase() ?? ''}${part.slice(1)}`) + .join(' '); +} + function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void { const counts = savedMemoryCountsForReport(report); io.stdout.write(`Report: ${report.id}\n`); io.stdout.write(`Run: ${report.runId}\n`); io.stdout.write(`Job: ${report.jobId}\n`); io.stdout.write(`Status: ${reportStatus(report)}\n`); - io.stdout.write(`Adapter: ${report.sourceKey}\n`); + io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`); io.stdout.write(`Connection: ${report.connectionId}\n`); io.stdout.write(`Sync: ${report.body.syncId}\n`); io.stdout.write( `Diff: +${report.body.diffSummary.added}/~${report.body.diffSummary.modified}/-${report.body.diffSummary.deleted}/=${report.body.diffSummary.unchanged}\n`, ); - io.stdout.write(`Work units: ${report.body.workUnits.length}\n`); + io.stdout.write(`Tasks: ${report.body.workUnits.length}\n`); io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`); io.stdout.write(`Provenance rows: ${report.body.provenanceRows.length}\n`); } @@ -133,8 +158,8 @@ function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIng io.stdout.write(`Source: ${result.metabaseConnectionId}\n`); io.stdout.write(`Children: ${result.children.length}\n`); if (result.totals) { - io.stdout.write(`Work units: ${result.totals.workUnits}\n`); - io.stdout.write(`Failed work units: ${result.totals.failedWorkUnits}\n`); + io.stdout.write(`Tasks: ${result.totals.workUnits}\n`); + io.stdout.write(`Failed tasks: ${result.totals.failedWorkUnits}\n`); } io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`); for (const child of result.children) { @@ -255,19 +280,19 @@ function plainIngestEventProgress( if (event.workUnitCount === 0) { return { percent: 80, - message: 'No work units to process; finalizing ingest', + message: 'No tasks to process; finalizing ingest', }; } return { percent: 45, - message: `Planned ${pluralize(event.workUnitCount, 'work unit')}`, + message: `Planned ${pluralize(event.workUnitCount, 'task')}`, }; case 'stage_skipped': return { percent: 45, message: `Skipped ${event.stage}: ${event.reason}` }; case 'work_unit_started': { const total = plannedWorkUnitCountThrough(snapshot, eventIndex); const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey); - const progress = total > 0 ? `${ordinal}/${total} work units: ` : ''; + const progress = total > 0 ? `${ordinal}/${total} tasks: ` : ''; return { percent: 55, message: `Processing ${progress}${event.unitKey}` }; } case 'work_unit_step': { @@ -279,7 +304,7 @@ function plainIngestEventProgress( const latest = `${event.unitKey} step ${event.stepIndex}/${event.stepBudget}`; return { percent, - message: `Processing work units: ${completed}/${total} complete, ${active} active; latest ${latest}`, + message: `Processing tasks: ${completed}/${total} complete, ${active} active; latest ${latest}`, transient: true, }; } @@ -289,7 +314,7 @@ function plainIngestEventProgress( const percent = total > 0 ? 55 + Math.round((completed / total) * 25) : 80; return { percent, - message: `Processed ${completed}/${total} work units`, + message: `Processed ${completed}/${total} tasks`, }; } case 'reconciliation_finished': @@ -571,6 +596,19 @@ export async function runKtxIngest( const project = await loadKtxProject({ projectDir: args.projectDir }); const env = deps.env ?? process.env; if (args.command === 'run') { + const ingestProject = + args.allowImplicitAdapter && !project.config.ingest.adapters.includes(args.adapter) + ? { + ...project, + config: { + ...project.config, + ingest: { + ...project.config.ingest, + adapters: [...project.config.ingest.adapters, args.adapter], + }, + }, + } + : project; const createAdapters = deps.createAdapters ?? (deps.runLocalIngest || deps.runLocalMetabaseIngest ? () => [] : createKtxCliLocalIngestAdapters); @@ -583,11 +621,14 @@ export async function runKtxIngest( ...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}), ...(managedDaemon ? { managedDaemon } : {}), ...(args.adapter === 'historic-sql' ? { historicSqlConnectionId: args.connectionId } : {}), + ...(args.historicSqlPullConfigOverride + ? { historicSqlPullConfigOverride: args.historicSqlPullConfigOverride } + : {}), logger: operationalLogger, }; const queryExecutor = localIngestOptions.queryExecutor ?? - (deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(project); + (deps.createQueryExecutor ?? createKtxCliIngestQueryExecutor)(ingestProject); if (args.adapter === 'metabase' && args.sourceDir) { throw new Error('source-dir uploads are not supported for the Metabase fan-out adapter'); } @@ -604,8 +645,8 @@ export async function runKtxIngest( deps.progress, ); const result = await executeMetabaseFanout({ - project, - adapters: createAdapters(project, adapterOptions), + project: ingestProject, + adapters: createAdapters(ingestProject, adapterOptions), metabaseConnectionId: args.connectionId, ...localIngestOptions, queryExecutor, @@ -668,8 +709,8 @@ export async function runKtxIngest( try { const result = await executeLocalIngest({ - project, - adapters: createAdapters(project, adapterOptions), + project: ingestProject, + adapters: createAdapters(ingestProject, adapterOptions), adapter: args.adapter, connectionId: args.connectionId, sourceDir: args.sourceDir, @@ -720,7 +761,7 @@ export async function runKtxIngest( throw new Error( args.runId ? `Local ingest run or report "${args.runId}" was not found` - : 'No local ingest reports were found. Run `ktx ingest run --connection-id --adapter ` first.', + : 'No local ingest reports were found. Run `ktx ingest ` first.', ); } await writeReportRecord(report, args.outputMode, io, { diff --git a/packages/cli/src/knowledge.test.ts b/packages/cli/src/knowledge.test.ts index c4b3fdd9..2486d621 100644 --- a/packages/cli/src/knowledge.test.ts +++ b/packages/cli/src/knowledge.test.ts @@ -192,7 +192,7 @@ describe('runKtxKnowledge', () => { expect(searchIo.stdout()).toBe(''); expect(searchIo.stderr()).toContain('No local wiki pages found'); - expect(searchIo.stderr()).toContain('ktx wiki write'); + expect(searchIo.stderr()).toContain('ktx ingest '); }); it('uses configured embeddings for semantic wiki search', async () => { diff --git a/packages/cli/src/knowledge.ts b/packages/cli/src/knowledge.ts index 2e039dea..b4585c0e 100644 --- a/packages/cli/src/knowledge.ts +++ b/packages/cli/src/knowledge.ts @@ -113,7 +113,7 @@ export async function runKtxKnowledge( const pages = await listLocalKnowledgePages(project, { userId: args.userId }); if (pages.length === 0) { io.stderr.write( - `No local wiki pages found in ${project.projectDir}. Create one with \`ktx wiki write --summary --content \` or run ingest.\n`, + `No local wiki pages found in ${project.projectDir}. Add Markdown files under wiki/ or run \`ktx ingest \`.\n`, ); } else { io.stderr.write( diff --git a/packages/cli/src/local-adapters.test.ts b/packages/cli/src/local-adapters.test.ts index b7491920..12f8d652 100644 --- a/packages/cli/src/local-adapters.test.ts +++ b/packages/cli/src/local-adapters.test.ts @@ -67,6 +67,38 @@ describe('CLI local ingest adapters', () => { ]); }); + it('registers Postgres historic SQL from connection context query history', async () => { + await writeProject( + tempDir, + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_DATABASE_URL', + ' readonly: true', + ' context:', + ' queryHistory:', + ' enabled: true', + 'ingest:', + ' adapters:', + ' - historic-sql', + '', + ].join('\n'), + ); + const project = await loadKtxProject({ projectDir: tempDir }); + + const adapters = createKtxCliLocalIngestAdapters(project, { + historicSqlConnectionId: 'warehouse', + sqlAnalysis: sqlAnalysisStub(), + }); + + expect(adapters.find((adapter) => adapter.source === 'historic-sql')?.skillNames).toEqual([ + 'historic_sql_table_digest', + 'historic_sql_patterns', + ]); + }); + it('registers BigQuery historic SQL from the requested connection', async () => { await writeProject( tempDir, @@ -135,4 +167,34 @@ describe('CLI local ingest adapters', () => { 'historic_sql_patterns', ]); }); + + it('uses query-history wording for public BigQuery capability errors', async () => { + await writeProject( + tempDir, + [ + 'project: warehouse', + 'connections:', + ' bq:', + ' driver: bigquery', + ' readonly: true', + ' dataset_id: analytics', + ' credentials_json: "{}"', + ' context:', + ' queryHistory:', + ' enabled: true', + 'ingest:', + ' adapters:', + ' - historic-sql', + '', + ].join('\n'), + ); + const project = await loadKtxProject({ projectDir: tempDir }); + + expect(() => + createKtxCliLocalIngestAdapters(project, { + historicSqlConnectionId: 'bq', + sqlAnalysis: sqlAnalysisStub(), + }), + ).toThrow('Query history BigQuery connection requires credentials_json.project_id'); + }); }); diff --git a/packages/cli/src/local-adapters.ts b/packages/cli/src/local-adapters.ts index 010a7188..daa8f63b 100644 --- a/packages/cli/src/local-adapters.ts +++ b/packages/cli/src/local-adapters.ts @@ -180,19 +180,37 @@ function historicSqlRecord(connection: unknown): Record | null } function enabledHistoricSqlDialect(connection: unknown): 'postgres' | 'bigquery' | 'snowflake' | null { - const historicSql = historicSqlRecord(connection); - if (historicSql?.enabled !== true) { + const direct = historicSqlRecord(connection); + const context = + connection && typeof connection === 'object' && !Array.isArray(connection) + ? (connection as { context?: unknown }).context + : null; + const queryHistory = + context && typeof context === 'object' && !Array.isArray(context) + ? (context as { queryHistory?: unknown }).queryHistory + : null; + const enabled = + queryHistory && typeof queryHistory === 'object' && !Array.isArray(queryHistory) + ? (queryHistory as { enabled?: unknown }).enabled === true + : direct?.enabled === true; + if (!enabled) { return null; } - const dialect = String(historicSql.dialect ?? '').toLowerCase(); - return dialect === 'postgres' || dialect === 'bigquery' || dialect === 'snowflake' ? dialect : null; + const driver = String((connection as { driver?: unknown })?.driver ?? '').toLowerCase(); + if (driver === 'postgres' || driver === 'postgresql') return 'postgres'; + if (driver === 'bigquery') return 'bigquery'; + if (driver === 'snowflake') return 'snowflake'; + const legacyDialect = String(direct?.dialect ?? '').toLowerCase(); + return legacyDialect === 'postgres' || legacyDialect === 'bigquery' || legacyDialect === 'snowflake' + ? legacyDialect + : null; } function createEphemeralPostgresHistoricSqlClient(project: KtxLocalProject, connectionId: string) { const connection = project.config.connections[connectionId] as KtxPostgresConnectionConfig | undefined; const inputDriver = connection?.driver ?? 'unknown'; if (!isKtxPostgresConnectionConfig(connection)) { - throw new Error(`Historic SQL local ingest requires a Postgres connection, got ${String(inputDriver)}`); + throw new Error(`Query history ingest requires a Postgres connection, got ${String(inputDriver)}`); } return { async executeQuery(sql: string, params?: unknown[]) { @@ -213,7 +231,7 @@ function createEphemeralBigQueryHistoricSqlClient(project: KtxLocalProject, conn const connection = project.config.connections[connectionId] as KtxBigQueryConnectionConfig | undefined; const inputDriver = connection?.driver ?? 'unknown'; if (!isKtxBigQueryConnectionConfig(connection)) { - throw new Error(`Historic SQL local ingest requires a BigQuery connection, got ${String(inputDriver)}`); + throw new Error(`Query history ingest requires a BigQuery connection, got ${String(inputDriver)}`); } return { async executeQuery(query: string) { @@ -243,7 +261,7 @@ async function createEphemeralSnowflakeHistoricSqlClient( const connection = project.config.connections[connectionId]; const inputDriver = connection?.driver ?? 'unknown'; if (!connectorModule.isKtxSnowflakeConnectionConfig(connection)) { - throw new Error(`Historic SQL local ingest requires a Snowflake connection, got ${String(inputDriver)}`); + throw new Error(`Query history ingest requires a Snowflake connection, got ${String(inputDriver)}`); } return { async executeQuery(query: string) { @@ -270,7 +288,7 @@ function bigQueryProjectId(connection: KtxBigQueryConnectionConfig, env: NodeJS. const resolved = raw.startsWith('env:') ? env[raw.slice('env:'.length)] ?? '' : raw; const parsed = JSON.parse(resolved) as { project_id?: unknown }; if (typeof parsed.project_id !== 'string' || parsed.project_id.trim().length === 0) { - throw new Error('Historic SQL BigQuery connection requires credentials_json.project_id'); + throw new Error('Query history BigQuery connection requires credentials_json.project_id'); } return parsed.project_id; } @@ -307,7 +325,7 @@ function historicSqlOptionsForLocalRun(project: KtxLocalProject, options: KtxCli if (dialect === 'bigquery') { const inputDriver = connection?.driver ?? 'unknown'; if (!isKtxBigQueryConnectionConfig(connection)) { - throw new Error(`Historic SQL local ingest requires a BigQuery connection, got ${String(inputDriver)}`); + throw new Error(`Query history ingest requires a BigQuery connection, got ${String(inputDriver)}`); } return { ...base, diff --git a/packages/cli/src/next-steps.test.ts b/packages/cli/src/next-steps.test.ts index facb4eb8..d7904555 100644 --- a/packages/cli/src/next-steps.test.ts +++ b/packages/cli/src/next-steps.test.ts @@ -10,8 +10,8 @@ describe('KTX demo next steps', () => { it('uses supported context-build commands before agent usage', () => { expect(KTX_CONTEXT_BUILD_COMMANDS).toEqual([ { - command: 'ktx setup', - description: 'Build or resume agent-ready context from configured sources', + command: 'ktx ingest --all', + description: 'Build or refresh agent-ready context from configured connections', }, { command: 'ktx status', @@ -64,8 +64,10 @@ describe('KTX demo next steps', () => { }).join('\n'); expect(rendered).toContain('Build KTX context next.'); - expect(rendered).toContain('primary-source scans and context-source ingests'); - expect(rendered).toContain('ktx setup'); + expect(rendered).toContain('Run ingest to build database schema context before context-source ingest.'); + expect(rendered).toContain('ktx ingest --all'); + expect(rendered).not.toContain('resume'); + expect(rendered).not.toContain('scan'); expect(rendered).toContain('ktx status'); expect(rendered).not.toContain('ktx agent context --json'); expect(rendered).not.toContain('ktx serve --mcp'); diff --git a/packages/cli/src/next-steps.ts b/packages/cli/src/next-steps.ts index ee7535d7..c36c5591 100644 --- a/packages/cli/src/next-steps.ts +++ b/packages/cli/src/next-steps.ts @@ -1,7 +1,7 @@ export const KTX_CONTEXT_BUILD_COMMANDS = [ { - command: 'ktx setup', - description: 'Build or resume agent-ready context from configured sources', + command: 'ktx ingest --all', + description: 'Build or refresh agent-ready context from configured connections', }, { command: 'ktx status', @@ -69,7 +69,7 @@ export function formatSetupNextStepLines(state: KtxSetupNextStepState, indent = if (!state.contextReady) { return [ `${indent}Build KTX context next.`, - `${indent}Preferred route: run the CLI build; it covers primary-source scans and context-source ingests.`, + `${indent}Run ingest to build database schema context before context-source ingest.`, ...commandLines(KTX_CONTEXT_BUILD_COMMANDS, indent), ]; } diff --git a/packages/cli/src/print-command-tree.test.ts b/packages/cli/src/print-command-tree.test.ts index 1385d37d..86ef451e 100644 --- a/packages/cli/src/print-command-tree.test.ts +++ b/packages/cli/src/print-command-tree.test.ts @@ -23,6 +23,16 @@ describe('renderKtxCommandTree', () => { expect(output).not.toContain('│ ├── mapping'); expect(output).not.toContain('│ ├── metabase'); expect(output).not.toContain('│ ├── notion'); + expect(output).not.toContain('scan '); + expect(output).not.toContain('│ ├── status'); + expect(output).not.toContain('│ ├── replay'); + expect(output).not.toContain('│ └── replay'); + expect(output).not.toContain('│ ├── run'); + expect(output).not.toContain('│ ├── watch'); + expect(output).not.toContain('│ └── watch'); + expect(output).not.toContain('│ ├── read'); + expect(output).not.toContain('│ ├── write'); + expect(output).not.toContain('│ └── write'); }); it('ends with a single trailing newline', () => { diff --git a/packages/cli/src/project-dir.test.ts b/packages/cli/src/project-dir.test.ts index 02502b35..7d25e56d 100644 --- a/packages/cli/src/project-dir.test.ts +++ b/packages/cli/src/project-dir.test.ts @@ -32,10 +32,9 @@ describe('project directory defaults', () => { const connection = vi.fn(async () => 0); const doctor = vi.fn(async () => 0); - const ingest = vi.fn(async () => 0); - const scan = vi.fn(async () => 0); + const publicIngest = vi.fn(async () => 0); const setup = vi.fn(async () => 0); - const deps: KtxCliDeps = { connection, doctor, ingest, scan, setup }; + const deps: KtxCliDeps = { connection, doctor, publicIngest, setup }; const cases: Array<{ argv: string[]; @@ -55,12 +54,6 @@ describe('project directory defaults', () => { expected: { command: 'project', projectDir: '/tmp/ktx-env-project' }, expectedStderr: 'Project: /tmp/ktx-env-project\n', }, - { - argv: ['ingest', 'status', 'run-1'], - spy: ingest, - expected: { command: 'status', projectDir: '/tmp/ktx-env-project', runId: 'run-1', outputMode: 'plain' }, - expectedStderr: 'Project: /tmp/ktx-env-project\n', - }, { argv: ['setup', '--no-input'], spy: setup, @@ -68,9 +61,9 @@ describe('project directory defaults', () => { expectedStderr: '', }, { - argv: ['scan', 'warehouse'], - spy: scan, - expected: { command: 'run', projectDir: '/tmp/ktx-env-project', connectionId: 'warehouse' }, + argv: ['ingest', 'warehouse', '--no-input'], + spy: publicIngest, + expected: { command: 'run', projectDir: '/tmp/ktx-env-project', targetConnectionId: 'warehouse' }, expectedStderr: 'Project: /tmp/ktx-env-project\n', }, ]; @@ -86,30 +79,33 @@ describe('project directory defaults', () => { it('lets explicit global --project-dir override KTX_PROJECT_DIR before and after nested commands', async () => { process.env.KTX_PROJECT_DIR = '/tmp/ktx-env-project'; - const scan = vi.fn(async () => 0); - const ingest = vi.fn(async () => 0); - const scanIo = makeIo(); - const ingestIo = makeIo(); + const publicIngest = vi.fn(async () => 0); + const beforeCommandIo = makeIo(); + const afterCommandIo = makeIo(); await expect( - runKtxCli(['--project-dir', '/tmp/ktx-explicit-project', 'scan', 'warehouse'], scanIo.io, { scan }), + runKtxCli(['--project-dir', '/tmp/ktx-explicit-project', 'ingest', 'warehouse', '--no-input'], beforeCommandIo.io, { + publicIngest, + }), ).resolves.toBe(0); await expect( - runKtxCli(['ingest', 'status', 'run-1', '--project-dir=/tmp/ktx-explicit-project'], ingestIo.io, { - ingest, + runKtxCli(['ingest', 'warehouse', '--project-dir=/tmp/ktx-explicit-project', '--no-input'], afterCommandIo.io, { + publicIngest, }), ).resolves.toBe(0); - expect(scan).toHaveBeenCalledWith( + expect(publicIngest).toHaveBeenNthCalledWith( + 1, expect.objectContaining({ command: 'run', projectDir: '/tmp/ktx-explicit-project' }), - scanIo.io, + beforeCommandIo.io, ); - expect(ingest).toHaveBeenCalledWith( - expect.objectContaining({ command: 'status', projectDir: '/tmp/ktx-explicit-project' }), - ingestIo.io, + expect(publicIngest).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ command: 'run', projectDir: '/tmp/ktx-explicit-project' }), + afterCommandIo.io, ); - expect(scanIo.stderr()).toBe('Project: /tmp/ktx-explicit-project\n'); - expect(ingestIo.stderr()).toBe('Project: /tmp/ktx-explicit-project\n'); + expect(beforeCommandIo.stderr()).toBe('Project: /tmp/ktx-explicit-project\n'); + expect(afterCommandIo.stderr()).toBe('Project: /tmp/ktx-explicit-project\n'); }); it('uses nearest ancestor containing ktx.yaml when no explicit or environment project-dir exists', async () => { @@ -126,18 +122,18 @@ describe('project directory defaults', () => { await writeFile(join(projectDir, 'ktx.yaml'), 'project: warehouse\n', 'utf-8'); const expectedProjectDir = await realpath(projectDir); - const scan = vi.fn(async () => 0); + const publicIngest = vi.fn(async () => 0); const testIo = makeIo(); try { process.chdir(nestedDir); - await expect(runKtxCli(['scan', 'warehouse'], testIo.io, { scan })).resolves.toBe(0); + await expect(runKtxCli(['ingest', 'warehouse', '--no-input'], testIo.io, { publicIngest })).resolves.toBe(0); } finally { process.chdir(originalCwd); await rm(root, { recursive: true, force: true }); } - expect(scan).toHaveBeenCalledWith( + expect(publicIngest).toHaveBeenCalledWith( expect.objectContaining({ command: 'run', projectDir: expectedProjectDir }), testIo.io, ); diff --git a/packages/cli/src/public-ingest-copy.test.ts b/packages/cli/src/public-ingest-copy.test.ts new file mode 100644 index 00000000..d13696df --- /dev/null +++ b/packages/cli/src/public-ingest-copy.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, it } from 'vitest'; +import { + publicDatabaseIngestMessage, + publicIngestOutputLine, + publicQueryHistoryMessage, +} from './public-ingest-copy.js'; + +describe('public ingest copy sanitizers', () => { + it('maps database scan progress into schema-context wording', () => { + expect(publicDatabaseIngestMessage('Preparing scan')).toBe('Preparing database ingest'); + expect(publicDatabaseIngestMessage('Inspecting database schema')).toBe('Reading database schema'); + expect(publicDatabaseIngestMessage('Writing schema artifacts')).toBe('Writing schema context'); + expect(publicDatabaseIngestMessage('Enriching schema metadata')).toBe('Building enriched schema context'); + }); + + it('maps database scan failure text into public database ingest wording', () => { + expect( + publicDatabaseIngestMessage( + 'KTX scan enrichment failed after structural scan completed: embedding service timed out', + ), + ).toBe('Database enrichment failed after schema context completed: embedding service timed out'); + expect(publicDatabaseIngestMessage('structural scan wrote partial artifacts')).toBe( + 'schema context wrote partial artifacts', + ); + expect(publicDatabaseIngestMessage('scan results may be less complete')).toBe( + 'database context may be less complete', + ); + }); + + it('maps query-history adapter progress into public wording', () => { + expect(publicQueryHistoryMessage('Fetching source files for warehouse/historic-sql', 'warehouse')).toBe( + 'Fetching query history for warehouse', + ); + expect(publicQueryHistoryMessage('Curating warehouse/historic-sql tasks', 'warehouse')).toBe( + 'Curating warehouse query history tasks', + ); + expect(publicQueryHistoryMessage('historic SQL local ingest failed', 'warehouse')).toBe( + 'query history local ingest failed', + ); + }); + + it('sanitizes captured public output lines across database and query-history internals', () => { + expect( + publicIngestOutputLine( + 'KTX scan enrichment failed after structural scan completed in raw-sources/warehouse/live-database/sync-1', + ), + ).toBe('Database enrichment failed after schema context completed in raw-sources/warehouse/database schema/sync-1'); + expect(publicIngestOutputLine('Historic SQL local ingest requires a configured reader')).toBe( + 'query history local ingest requires a configured reader', + ); + }); +}); diff --git a/packages/cli/src/public-ingest-copy.ts b/packages/cli/src/public-ingest-copy.ts new file mode 100644 index 00000000..be1206c1 --- /dev/null +++ b/packages/cli/src/public-ingest-copy.ts @@ -0,0 +1,42 @@ +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +const DATABASE_INGEST_REPLACEMENTS: Array<[RegExp, string]> = [ + [/\bPreparing scan\b/gi, 'Preparing database ingest'], + [/\bInspecting database schema\b/gi, 'Reading database schema'], + [/\bWriting schema artifacts\b/gi, 'Writing schema context'], + [/\bEnriching schema metadata\b/gi, 'Building enriched schema context'], + [ + /\bKTX scan enrichment failed after structural scan completed\b/gi, + 'Database enrichment failed after schema context completed', + ], + [/\bstructural scan\b/gi, 'schema context'], + [/\benriched scan\b/gi, 'deep database ingest'], + [/\bscan results\b/gi, 'database context'], +]; + +export function publicDatabaseIngestMessage(message: string): string { + return DATABASE_INGEST_REPLACEMENTS.reduce( + (current, [pattern, replacement]) => current.replace(pattern, replacement), + message, + ); +} + +export function publicQueryHistoryMessage(message: string, connectionId?: string): string { + let current = message; + if (connectionId && connectionId.length > 0) { + const escapedConnectionId = escapeRegExp(connectionId); + current = current + .replace( + new RegExp(`Fetching source files for ${escapedConnectionId}/historic-sql`, 'i'), + `Fetching query history for ${connectionId}`, + ) + .replace(`${connectionId}/historic-sql`, `${connectionId} query history`); + } + return current.replace(/\bhistoric-sql\b/g, 'query history').replace(/\bhistoric SQL\b/gi, 'query history'); +} + +export function publicIngestOutputLine(line: string): string { + return publicQueryHistoryMessage(publicDatabaseIngestMessage(line)).replace(/\blive-database\b/g, 'database schema'); +} diff --git a/packages/cli/src/public-ingest.test.ts b/packages/cli/src/public-ingest.test.ts index 1c133a19..e1c0e612 100644 --- a/packages/cli/src/public-ingest.test.ts +++ b/packages/cli/src/public-ingest.test.ts @@ -1,12 +1,25 @@ import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '@ktx/context/project'; import { describe, expect, it, vi } from 'vitest'; -import { buildPublicIngestPlan, type KtxPublicIngestProject, runKtxPublicIngest } from './public-ingest.js'; +import { + buildPublicIngestPlan, + type KtxPublicIngestDeps, + type KtxPublicIngestProject, + runKtxPublicIngest, +} from './public-ingest.js'; -function makeIo(options: { isTTY?: boolean } = {}) { +function makeIo(options: { isTTY?: boolean; interactive?: boolean } = {}) { let stdout = ''; let stderr = ''; return { io: { + ...(options.interactive + ? { + stdin: { + isTTY: true, + setRawMode: vi.fn(), + }, + } + : {}), stdout: { isTTY: options.isTTY, write: (chunk: string) => { @@ -34,6 +47,40 @@ function projectWithConnections(connections: KtxProjectConfig['connections']): K }; } +function deepReadyProject( + connections: KtxProjectConfig['connections'], + relationshipsEnabled = true, +): KtxPublicIngestProject { + const config = buildDefaultKtxProjectConfig('warehouse'); + return { + projectDir: '/tmp/project', + config: { + ...config, + connections, + llm: { + ...config.llm, + provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret + models: { default: 'gpt-test' }, + }, + scan: { + ...config.scan, + enrichment: { + mode: 'llm', + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + }, + }, + relationships: { + ...config.scan.relationships, + enabled: relationshipsEnabled, + }, + }, + }, + }; +} + describe('buildPublicIngestPlan', () => { it('plans warehouse connections as scan targets and source connections as source ingest targets', () => { const project = projectWithConnections({ @@ -48,16 +95,19 @@ describe('buildPublicIngestPlan', () => { { connectionId: 'warehouse', driver: 'postgres', - operation: 'scan', - debugCommand: 'ktx scan warehouse --debug', - steps: ['scan'], + operation: 'database-ingest', + debugCommand: 'ktx ingest warehouse --debug', + steps: ['database-schema'], + databaseDepth: 'fast', + detectRelationships: false, + queryHistory: { enabled: false }, }, { connectionId: 'docs', driver: 'notion', operation: 'source-ingest', adapter: 'notion', - debugCommand: 'ktx ingest run --connection-id docs --adapter notion --debug', + debugCommand: 'ktx ingest docs --debug', steps: ['source-ingest', 'memory-update'], }, { @@ -65,10 +115,11 @@ describe('buildPublicIngestPlan', () => { driver: 'metabase', operation: 'source-ingest', adapter: 'metabase', - debugCommand: 'ktx ingest run --connection-id prod_metabase --adapter metabase --debug', + debugCommand: 'ktx ingest prod_metabase --debug', steps: ['source-ingest', 'memory-update'], }, ], + warnings: [], }); }); @@ -80,9 +131,616 @@ describe('buildPublicIngestPlan', () => { ); }); + it('resolves database depth from flags, stored context, and defaults', () => { + const project = projectWithConnections({ + fast_default: { driver: 'postgres' }, + deep_default: { driver: 'postgres', context: { depth: 'deep' } }, + docs: { driver: 'notion' }, + }); + + expect( + buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'fast_default', + all: false, + queryHistory: 'default', + }).targets[0], + ).toMatchObject({ connectionId: 'fast_default', databaseDepth: 'fast', queryHistory: { enabled: false } }); + + expect( + buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'deep_default', + all: false, + queryHistory: 'default', + }).targets[0], + ).toMatchObject({ connectionId: 'deep_default', databaseDepth: 'deep' }); + + expect( + buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'docs', + all: false, + depth: 'deep', + queryHistory: 'default', + }).warnings, + ).toEqual(['--deep affects database ingest only; ignoring it for docs.']); + }); + + it('upgrades effective depth when query history is explicitly enabled', () => { + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: false } } }, + }); + + const plan = buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + depth: 'fast', + queryHistory: 'enabled', + queryHistoryWindowDays: 30, + }); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'warehouse', + databaseDepth: 'deep', + queryHistory: { enabled: true, windowDays: 30, dialect: 'postgres' }, + }); + expect(plan.warnings).toEqual(['--query-history requires deep ingest; running warehouse with --deep.']); + }); + + it('warns and skips query history for unsupported database drivers', () => { + const project = projectWithConnections({ local: { driver: 'sqlite' } }); + + const plan = buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'local', + all: false, + queryHistory: 'enabled', + }); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'local', + databaseDepth: 'fast', + queryHistory: { enabled: false, unsupported: true }, + }); + expect(plan.warnings).toEqual(['--query-history is not supported for sqlite; running schema ingest for local.']); + }); + + it('aggregates unsupported query-history warnings for all database targets', () => { + const plan = buildPublicIngestPlan( + deepReadyProject({ + local: { driver: 'sqlite' }, + mysql_warehouse: { driver: 'mysql' }, + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }), + { + projectDir: '/tmp/project', + all: true, + depth: 'deep', + queryHistory: 'enabled', + }, + ); + + expect(plan.targets).toEqual([ + expect.objectContaining({ + connectionId: 'local', + queryHistory: { enabled: false, unsupported: true }, + steps: ['database-schema'], + }), + expect.objectContaining({ + connectionId: 'mysql_warehouse', + queryHistory: { enabled: false, unsupported: true }, + steps: ['database-schema'], + }), + expect.objectContaining({ + connectionId: 'warehouse', + queryHistory: expect.objectContaining({ enabled: true, dialect: 'postgres' }), + steps: ['database-schema', 'query-history'], + }), + ]); + expect(plan.warnings).toEqual([ + '--query-history is not supported for 2 database connections (mysql, sqlite); running schema ingest for those connections.', + ]); + }); + + it('aggregates stored unsupported query-history config warnings for all database targets', () => { + const plan = buildPublicIngestPlan( + projectWithConnections({ + local: { driver: 'sqlite', context: { queryHistory: { enabled: true } } }, + mysql_warehouse: { driver: 'mysql', context: { queryHistory: { enabled: true } } }, + }), + { + projectDir: '/tmp/project', + all: true, + queryHistory: 'default', + }, + ); + + expect(plan.targets).toEqual([ + expect.objectContaining({ + connectionId: 'local', + queryHistory: { enabled: false, unsupported: true }, + steps: ['database-schema'], + }), + expect.objectContaining({ + connectionId: 'mysql_warehouse', + queryHistory: { enabled: false, unsupported: true }, + steps: ['database-schema'], + }), + ]); + expect(plan.warnings).toEqual([ + '2 database connections have query history enabled in ktx.yaml, but their drivers do not support it; running schema ingest for those connections.', + ]); + }); + + it('treats query-history window override as current-run query-history enablement', () => { + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: false, windowDays: 90 } } }, + }); + + const plan = buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + queryHistory: 'default', + queryHistoryWindowDays: 30, + }); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'warehouse', + databaseDepth: 'deep', + queryHistory: { enabled: true, dialect: 'postgres', windowDays: 30 }, + steps: ['database-schema', 'query-history'], + }); + }); + + it('adds a schema-first notice when query history is explicitly enabled', () => { + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }); + + expect( + buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + queryHistory: 'enabled', + }).notices, + ).toEqual(['Schema ingest runs before query history for warehouse.']); + }); + + it('warns and skips query-history window override for unsupported database drivers', () => { + const plan = buildPublicIngestPlan( + projectWithConnections({ + local: { driver: 'sqlite' }, + }), + { + projectDir: '/tmp/project', + targetConnectionId: 'local', + all: false, + queryHistory: 'default', + queryHistoryWindowDays: 30, + }, + ); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'local', + databaseDepth: 'fast', + queryHistory: { enabled: false, windowDays: 30, unsupported: true }, + steps: ['database-schema'], + }); + expect(plan.warnings).toEqual(['--query-history is not supported for sqlite; running schema ingest for local.']); + }); + + it('aggregates ignored database-depth warnings for all source targets', () => { + const plan = buildPublicIngestPlan( + projectWithConnections({ + warehouse: { driver: 'postgres' }, + docs: { driver: 'notion' }, + dbt: { driver: 'dbt' }, + }), + { + projectDir: '/tmp/project', + all: true, + depth: 'deep', + queryHistory: 'default', + }, + ); + + expect(plan.warnings).toEqual(['--deep ignored for 2 non-database sources.']); + }); + + it('records a preflight failure for deep database ingest when readiness config is missing', () => { + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }); + + const plan = buildPublicIngestPlan(project, { + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + queryHistory: 'default', + }); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'warehouse', + databaseDepth: 'deep', + preflightFailure: + 'warehouse requires deep ingest readiness: model configuration, scan enrichment mode, scan embeddings. Run ktx setup or rerun with --fast.', + }); + }); + + it('honors scan.relationships.enabled when planning deep database ingest', () => { + const plan = buildPublicIngestPlan( + deepReadyProject({ warehouse: { driver: 'postgres', context: { depth: 'deep' } } }, false), + { + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + queryHistory: 'default', + }, + ); + + expect(plan.targets[0]).toMatchObject({ + connectionId: 'warehouse', + databaseDepth: 'deep', + detectRelationships: false, + }); + }); }); describe('runKtxPublicIngest', () => { + it('maps fast and deep database targets to scan internals', async () => { + const io = makeIo(); + const project = deepReadyProject({ + fast: { driver: 'postgres' }, + deep: { driver: 'postgres', context: { depth: 'deep' } }, + }); + const runScan = vi.fn(async () => 0); + + await expect( + runKtxPublicIngest( + { command: 'run', projectDir: '/tmp/project', all: true, json: false, inputMode: 'disabled', queryHistory: 'default' }, + io.io, + { loadProject: vi.fn(async () => project), runScan }, + ), + ).resolves.toBe(0); + + expect(runScan).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ connectionId: 'deep', mode: 'enriched', detectRelationships: true }), + expect.anything(), + ); + expect(runScan).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ connectionId: 'fast', mode: 'structural', detectRelationships: false }), + expect.anything(), + ); + }); + + it('runs query history after schema ingest with current-run window override', async () => { + const io = makeIo(); + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: true, windowDays: 90 } } }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn>(async () => 0); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + cliVersion: '0.0.0-test', + runtimeInstallPolicy: 'never', + queryHistory: 'enabled', + queryHistoryWindowDays: 30, + }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(0); + + expect(runScan).toHaveBeenCalledWith( + expect.objectContaining({ connectionId: 'warehouse', mode: 'enriched' }), + expect.anything(), + ); + expect(runIngest).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + connectionId: 'warehouse', + adapter: 'historic-sql', + allowImplicitAdapter: true, + cliVersion: '0.0.0-test', + runtimeInstallPolicy: 'never', + historicSqlPullConfigOverride: expect.objectContaining({ dialect: 'postgres', windowDays: 30 }), + }), + expect.anything(), + ); + }); + + it('preserves configured query-history pull fields while overriding the current-run window', async () => { + const io = makeIo(); + const project = deepReadyProject({ + warehouse: { + driver: 'postgres', + enabled_tables: ['orbit_analytics.int_active_contract_arr'], + context: { + queryHistory: { + enabled: true, + windowDays: 90, + minExecutions: 7, + concurrency: 3, + staleArchiveAfterDays: 120, + filters: { + dropTrivialProbes: true, + serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' }, + orchestrators: { mode: 'mark-only' }, + dropFailedBelow: { errorRate: 0.5, executions: 3 }, + }, + redactionPatterns: ['(?i)secret'], + }, + }, + }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn>(async () => 0); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + queryHistory: 'enabled', + queryHistoryWindowDays: 30, + }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(0); + + const ingestArgs = runIngest.mock.calls[0]?.[0] as + | Extract>[0], { command: 'run' }> + | undefined; + expect(ingestArgs).toMatchObject({ + command: 'run', + connectionId: 'warehouse', + adapter: 'historic-sql', + allowImplicitAdapter: true, + historicSqlPullConfigOverride: { + dialect: 'postgres', + windowDays: 30, + minExecutions: 7, + concurrency: 3, + staleArchiveAfterDays: 120, + filters: { + dropTrivialProbes: true, + serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' }, + orchestrators: { mode: 'mark-only' }, + dropFailedBelow: { errorRate: 0.5, executions: 3 }, + }, + redactionPatterns: ['(?i)secret'], + enabledTables: ['orbit_analytics.int_active_contract_arr'], + }, + }); + expect(ingestArgs?.historicSqlPullConfigOverride).not.toHaveProperty('enabled'); + }); + + it('prints the schema-first notice for explicit query-history runs', async () => { + const io = makeIo(); + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn(async () => 0); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + queryHistory: 'enabled', + }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Schema ingest runs before query history for warehouse.'); + }); + + it('suppresses internal scan output for public database ingest summaries', async () => { + const io = makeIo(); + const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const runScan = vi.fn(async (_args, scanIo) => { + scanIo.stdout.write('KTX scan completed\n'); + scanIo.stdout.write('Mode: structural\n'); + scanIo.stdout.write('Report: raw-sources/warehouse/live-database/sync-1/scan-report.json\n'); + scanIo.stdout.write('Raw sources: raw-sources/warehouse/live-database/sync-1\n'); + return 0; + }); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + }, + io.io, + { loadProject: vi.fn(async () => project), runScan }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Ingest finished\n'); + expect(io.stdout()).toContain('warehouse'); + expect(io.stdout()).not.toContain('KTX scan completed'); + expect(io.stdout()).not.toContain('Mode: structural'); + expect(io.stdout()).not.toContain('Report: raw-sources'); + expect(io.stdout()).not.toContain('live-database'); + }); + + it('sanitizes captured database scan failure details in direct public output', async () => { + const io = makeIo(); + const project = deepReadyProject({ warehouse: { driver: 'postgres', context: { depth: 'deep' } } }); + const runScan = vi.fn(async (_args, scanIo) => { + scanIo.stdout.write('KTX scan enrichment failed after structural scan completed: embedding service timed out\n'); + return 1; + }); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + depth: 'deep', + }, + io.io, + { loadProject: vi.fn(async () => project), runScan }, + ), + ).resolves.toBe(1); + + expect(io.stdout()).toContain( + 'warehouse failed: Database enrichment failed after schema context completed: embedding service timed out.', + ); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep'); + expect(io.stdout()).not.toContain('KTX scan enrichment failed'); + expect(io.stdout()).not.toContain('structural scan'); + }); + + it('suppresses lower-level source report output during direct public source ingest', async () => { + const io = makeIo(); + const project = projectWithConnections({ + docs: { driver: 'notion' }, + }); + const runIngest = vi.fn(async (_args, ingestIo) => { + ingestIo.stdout.write('Report: report-docs-1\n'); + ingestIo.stdout.write('Adapter: notion\n'); + ingestIo.stdout.write('Saved memory: 2 wiki, 0 SL\n'); + return 0; + }); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'docs', + all: false, + json: false, + inputMode: 'disabled', + }, + io.io, + { loadProject: vi.fn(async () => project), runIngest }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Ingest finished'); + expect(io.stdout()).toContain('docs'); + expect(io.stdout()).toContain('Source ingest'); + expect(io.stdout()).not.toContain('Report: report-docs-1'); + expect(io.stdout()).not.toContain('Adapter:'); + expect(io.stdout()).not.toContain('notion\n'); + expect(io.stderr()).toBe(''); + }); + + it('suppresses historic-sql report output during direct public query-history ingest', async () => { + const io = makeIo(); + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn(async (_args, ingestIo) => { + ingestIo.stdout.write('Report: report-query-history-1\n'); + ingestIo.stdout.write('Adapter: historic-sql\n'); + ingestIo.stdout.write('Saved memory: 1 wiki, 1 SL\n'); + return 0; + }); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + queryHistory: 'enabled', + }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Schema ingest runs before query history for warehouse.'); + expect(io.stdout()).toContain('Ingest finished'); + expect(io.stdout()).toContain('warehouse'); + expect(io.stdout()).toContain('done'); + expect(io.stdout()).not.toContain('Report: report-query-history-1'); + expect(io.stdout()).not.toContain('Adapter:'); + expect(io.stdout()).not.toContain('historic-sql'); + expect(io.stderr()).toBe(''); + }); + + it('delegates interactive TTY public ingest to the foreground context-build view', async () => { + const io = makeIo({ isTTY: true, interactive: true }); + const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const runContextBuild = vi.fn(async () => ({ exitCode: 0 })); + const runScan = vi.fn(async () => 0); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'auto', + depth: 'fast', + queryHistory: 'default', + }, + io.io, + { loadProject: vi.fn(async () => project), runContextBuild, runScan }, + ), + ).resolves.toBe(0); + + expect(runContextBuild).toHaveBeenCalledWith( + project, + expect.objectContaining({ + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + entrypoint: 'ingest', + depth: 'fast', + queryHistory: 'default', + }), + io.io, + ); + expect(runScan).not.toHaveBeenCalled(); + }); + it('runs all independent targets and reports partial failures', async () => { const io = makeIo(); const project = projectWithConnections({ @@ -105,14 +763,15 @@ describe('runKtxPublicIngest', () => { ).resolves.toBe(1); expect(runIngest).toHaveBeenCalledWith( - { + expect.objectContaining({ command: 'run', projectDir: '/tmp/project', connectionId: 'prod_metabase', adapter: 'metabase', + allowImplicitAdapter: true, outputMode: 'plain', inputMode: 'disabled', - }, + }), expect.anything(), ); expect(runScan).toHaveBeenCalledWith( @@ -127,13 +786,68 @@ describe('runKtxPublicIngest', () => { expect.anything(), ); expect(io.stdout()).toContain('Ingest finished with partial failures'); - expect(io.stdout()).toContain('warehouse failed at scan.'); - expect(io.stdout()).toContain('Debug: ktx scan warehouse --debug'); + expect(io.stdout()).toContain('warehouse failed at database-schema.'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --fast'); + expect(io.stdout()).not.toContain('Debug:'); + }); + + it('prints query-history retry guidance for query-history facet failures', async () => { + const io = makeIo(); + const project = deepReadyProject({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn(async () => 1); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'disabled', + queryHistory: 'enabled', + }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(1); + + expect(io.stdout()).toContain('warehouse failed at query-history.'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history'); + expect(io.stdout()).not.toContain('historic-sql'); + }); + + it('fails deep-readiness targets before work starts while continuing independent --all targets', async () => { + const io = makeIo(); + const project = projectWithConnections({ + warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + docs: { driver: 'notion' }, + }); + const runScan = vi.fn(async () => 0); + const runIngest = vi.fn(async () => 0); + + await expect( + runKtxPublicIngest( + { command: 'run', projectDir: '/tmp/project', all: true, json: false, inputMode: 'disabled' }, + io.io, + { loadProject: vi.fn(async () => project), runScan, runIngest }, + ), + ).resolves.toBe(1); + + expect(runScan).not.toHaveBeenCalled(); + expect(runIngest).toHaveBeenCalledWith( + expect.objectContaining({ command: 'run', connectionId: 'docs', adapter: 'notion' }), + expect.anything(), + ); + expect(io.stdout()).toContain('warehouse requires deep ingest readiness'); }); it('can request enriched relationship scans for setup-managed context builds', async () => { const io = makeIo(); - const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const project = deepReadyProject({ warehouse: { driver: 'postgres' } }); const runScan = vi.fn(async () => 0); await expect( @@ -164,7 +878,7 @@ describe('runKtxPublicIngest', () => { detectRelationships: true, dryRun: false, }, - io.io, + expect.objectContaining({ capturedOutput: expect.any(Function) }), ); }); @@ -236,50 +950,44 @@ describe('runKtxPublicIngest', () => { adapter: 'dbt', sourceDir: '/repo/dbt', }), - io.io, + expect.objectContaining({ capturedOutput: expect.any(Function) }), ); }); - it('routes public status and watch to the ingest status renderer', async () => { + it('bypasses adapter allow-lists for connection-centric source ingest', async () => { const runIngest = vi.fn(async () => 0); - const statusIo = makeIo(); - const watchIo = makeIo(); + const io = makeIo(); await expect( runKtxPublicIngest( - { command: 'status', projectDir: '/tmp/ktx', json: false, inputMode: 'disabled' }, - statusIo.io, - { runIngest }, - ), - ).resolves.toBe(0); - await expect( - runKtxPublicIngest( - { command: 'watch', projectDir: '/tmp/ktx', runId: 'run-1', json: false, inputMode: 'auto' }, - watchIo.io, - { runIngest }, + { + command: 'run', + projectDir: '/tmp/ktx', + targetConnectionId: 'docs', + all: false, + json: false, + inputMode: 'disabled', + }, + io.io, + { + loadProject: async () => + projectWithConnections({ + docs: { driver: 'notion' }, + }), + runIngest, + }, ), ).resolves.toBe(0); - expect(runIngest).toHaveBeenNthCalledWith( - 1, - { - command: 'status', - projectDir: '/tmp/ktx', - outputMode: 'plain', - inputMode: 'disabled', - }, - statusIo.io, - ); - expect(runIngest).toHaveBeenNthCalledWith( - 2, - { - command: 'watch', - projectDir: '/tmp/ktx', - runId: 'run-1', - outputMode: 'viz', - inputMode: 'auto', - }, - watchIo.io, + expect(runIngest).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + connectionId: 'docs', + adapter: 'notion', + allowImplicitAdapter: true, + }), + expect.objectContaining({ capturedOutput: expect.any(Function) }), ); }); + }); diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index b126e702..7916a711 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -2,47 +2,70 @@ import { type KtxLocalProject, type KtxProjectConnectionConfig, loadKtxProject } import type { KtxProgressPort } from '@ktx/context/scan'; import type { KtxCliIo } from './index.js'; import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js'; +import { + type KtxDatabaseContextDepth, + databaseContextDepth, + deepReadinessGaps, + isDatabaseDriver, + normalizeConnectionDriver, +} from './ingest-depth.js'; +import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; +import { publicIngestOutputLine } from './public-ingest-copy.js'; import type { KtxScanArgs, KtxScanDeps } from './scan.js'; import { profileMark } from './startup-profile.js'; profileMark('module:public-ingest'); -type KtxPublicIngestStepName = 'scan' | 'source-ingest' | 'enrich' | 'memory-update'; +type KtxPublicIngestStepName = 'database-schema' | 'query-history' | 'source-ingest' | 'memory-update'; type KtxPublicIngestStepStatus = 'done' | 'skipped' | 'failed' | 'not-run'; type KtxPublicIngestInputMode = 'auto' | 'disabled'; +type KtxPublicIngestDepth = KtxDatabaseContextDepth; +type KtxPublicIngestQueryHistoryFlag = 'default' | 'enabled' | 'disabled'; +type HistoricSqlDialect = 'postgres' | 'bigquery' | 'snowflake'; export type KtxPublicIngestArgs = - | { - command: 'run'; - projectDir: string; - targetConnectionId?: string; - all: boolean; - json: boolean; - inputMode: KtxPublicIngestInputMode; - scanMode?: Extract['mode']; - detectRelationships?: boolean; - } - | { - command: 'status' | 'watch'; - projectDir: string; - runId?: string; - json: boolean; - inputMode: KtxPublicIngestInputMode; - }; + { + command: 'run'; + projectDir: string; + targetConnectionId?: string; + all: boolean; + json: boolean; + inputMode: KtxPublicIngestInputMode; + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + scanMode?: Extract['mode']; + detectRelationships?: boolean; + cliVersion?: string; + runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; + }; export interface KtxPublicIngestPlanTarget { connectionId: string; driver: string; - operation: 'scan' | 'source-ingest'; + operation: 'database-ingest' | 'source-ingest'; adapter?: string; sourceDir?: string; debugCommand: string; steps: KtxPublicIngestStepName[]; + databaseDepth?: KtxPublicIngestDepth; + detectRelationships?: boolean; + preflightFailure?: string; + queryHistory?: { + enabled: boolean; + dialect?: HistoricSqlDialect; + windowDays?: number; + pullConfig?: Record; + unsupported?: boolean; + skippedStoredByFast?: boolean; + }; } export interface KtxPublicIngestPlan { projectDir: string; targets: KtxPublicIngestPlanTarget[]; + warnings: string[]; + notices?: string[]; } export interface KtxPublicIngestTargetResult { @@ -58,12 +81,35 @@ export interface KtxPublicIngestTargetResult { export type KtxPublicIngestProject = Pick; +type KtxPublicIngestPhaseKey = 'database-schema' | 'query-history' | 'source-ingest'; + export interface KtxPublicIngestDeps { loadProject?: (options: Parameters[0]) => Promise; runScan?: (args: KtxScanArgs, io: KtxCliIo, deps?: KtxScanDeps) => Promise; runIngest?: (args: KtxIngestArgs, io: KtxCliIo, deps?: KtxIngestDeps) => Promise; + runContextBuild?: ( + project: KtxPublicIngestProject, + args: KtxPublicContextBuildArgs, + io: KtxCliIo, + ) => Promise<{ exitCode: number }>; scanProgress?: KtxProgressPort; ingestProgress?: (update: KtxIngestProgressUpdate) => void; + onPhaseStart?: (phaseKey: KtxPublicIngestPhaseKey) => void; + onPhaseEnd?: (phaseKey: KtxPublicIngestPhaseKey, status: 'done' | 'failed' | 'skipped', summary?: string) => void; +} + +interface KtxPublicContextBuildArgs { + projectDir: string; + inputMode: 'auto' | 'disabled'; + targetConnectionId?: string; + all?: boolean; + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + scanMode?: Extract['mode']; + detectRelationships?: boolean; + cliVersion?: string; + runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; } const sourceAdapterByDriver = new Map([ @@ -77,21 +123,178 @@ const sourceAdapterByDriver = new Map([ ['lookml', 'lookml'], ]); -const warehouseDrivers = new Set([ - 'sqlite', - 'postgres', - 'postgresql', - 'mysql', - 'clickhouse', - 'sqlserver', - 'bigquery', - 'snowflake', +const queryHistoryDialectByDriver = new Map([ + ['postgres', 'postgres'], + ['postgresql', 'postgres'], + ['bigquery', 'bigquery'], + ['snowflake', 'snowflake'], ]); -function normalizedDriver(connection: KtxProjectConnectionConfig): string { - return String(connection.driver ?? '') - .trim() - .toLowerCase(); +interface KtxUnsupportedQueryHistoryWarning { + connectionId: string; + driver: string; + reason: 'explicit' | 'stored'; +} + +interface KtxPublicIngestWarningAccumulator { + warnings: string[]; + ignoredDepthForSources: string[]; + ignoredQueryHistoryForSources: string[]; + unsupportedQueryHistoryForDatabases: KtxUnsupportedQueryHistoryWarning[]; +} + +function createWarningAccumulator(): KtxPublicIngestWarningAccumulator { + return { + warnings: [], + ignoredDepthForSources: [], + ignoredQueryHistoryForSources: [], + unsupportedQueryHistoryForDatabases: [], + }; +} + +function sourceIgnoredWarning(option: string, connectionIds: string[], all: boolean): string | null { + if (connectionIds.length === 0) { + return null; + } + if (all) { + const sourceLabel = + connectionIds.length === 1 ? '1 non-database source' : `${connectionIds.length} non-database sources`; + return `${option} ignored for ${sourceLabel}.`; + } + return `${option} affects database ingest only; ignoring it for ${connectionIds[0]}.`; +} + +function unsupportedDriverList(entries: KtxUnsupportedQueryHistoryWarning[]): string { + return [...new Set(entries.map((entry) => entry.driver))] + .sort((left, right) => left.localeCompare(right)) + .join(', '); +} + +function unsupportedQueryHistoryWarnings( + entries: KtxUnsupportedQueryHistoryWarning[], + all: boolean, +): string[] { + if (entries.length === 0) { + return []; + } + + const warnings: string[] = []; + const explicitEntries = entries.filter((entry) => entry.reason === 'explicit'); + const storedEntries = entries.filter((entry) => entry.reason === 'stored'); + + if (explicitEntries.length === 1 || (!all && explicitEntries.length > 0)) { + warnings.push( + ...explicitEntries.map( + (entry) => + `--query-history is not supported for ${entry.driver}; running schema ingest for ${entry.connectionId}.`, + ), + ); + } else if (explicitEntries.length > 1) { + warnings.push( + `--query-history is not supported for ${explicitEntries.length} database connections (${unsupportedDriverList( + explicitEntries, + )}); running schema ingest for those connections.`, + ); + } + + if (storedEntries.length === 1 || (!all && storedEntries.length > 0)) { + warnings.push( + ...storedEntries.map( + (entry) => + `${entry.connectionId} has query history enabled in ktx.yaml, but ${entry.driver} does not support it; running schema ingest.`, + ), + ); + } else if (storedEntries.length > 1) { + warnings.push( + `${storedEntries.length} database connections have query history enabled in ktx.yaml, but their drivers do not support it; running schema ingest for those connections.`, + ); + } + + return warnings; +} + +function finalizeWarnings( + accumulator: KtxPublicIngestWarningAccumulator, + args: { + all: boolean; + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + }, +): string[] { + const warnings = [ + ...accumulator.warnings, + ...unsupportedQueryHistoryWarnings(accumulator.unsupportedQueryHistoryForDatabases, args.all), + ]; + const depthOption = args.depth ? `--${args.depth}` : null; + if (depthOption) { + const warning = sourceIgnoredWarning(depthOption, accumulator.ignoredDepthForSources, args.all); + if (warning) warnings.push(warning); + } + if (args.queryHistory === 'enabled' || args.queryHistoryWindowDays !== undefined) { + const warning = sourceIgnoredWarning('--query-history', accumulator.ignoredQueryHistoryForSources, args.all); + if (warning) warnings.push(warning); + } + return warnings; +} + +function schemaFirstQueryHistoryNotice( + targets: KtxPublicIngestPlanTarget[], + args: { queryHistory?: KtxPublicIngestQueryHistoryFlag }, +): string | null { + if (args.queryHistory !== 'enabled') { + return null; + } + const queryHistoryTargets = targets.filter((target) => target.queryHistory?.enabled === true); + if (queryHistoryTargets.length === 0) { + return null; + } + if (queryHistoryTargets.length === 1) { + return `Schema ingest runs before query history for ${queryHistoryTargets[0].connectionId}.`; + } + return `Schema ingest runs before query history for ${queryHistoryTargets.length} database connections.`; +} + +function storedQueryHistory(connection: KtxProjectConnectionConfig): Record { + const context = connection.context; + const contextRecord = + context && typeof context === 'object' && !Array.isArray(context) ? (context as Record) : {}; + const value = contextRecord.queryHistory; + return typeof value === 'object' && value !== null && !Array.isArray(value) ? (value as Record) : {}; +} + +function positiveInteger(value: unknown): number | undefined { + return typeof value === 'number' && Number.isInteger(value) && value > 0 ? value : undefined; +} + +function enabledTablesForConnection(connection: KtxProjectConnectionConfig): string[] | undefined { + const raw = connection.enabled_tables; + if (!Array.isArray(raw)) { + return undefined; + } + const tables = raw.filter((value): value is string => typeof value === 'string' && value.trim().length > 0); + return tables.length > 0 ? tables : undefined; +} + +function queryHistoryPullConfig(input: { + stored: Record; + dialect: HistoricSqlDialect; + windowDays?: number; + enabledTables?: string[]; +}): Record { + const { enabled: _enabled, dialect: _dialect, ...storedConfig } = input.stored; + return { + ...storedConfig, + dialect: input.dialect, + ...(input.enabledTables ? { enabledTables: input.enabledTables } : {}), + ...(input.windowDays !== undefined ? { windowDays: input.windowDays } : {}), + }; +} + +function depthFromLegacyScanMode( + mode: Extract['mode'] | undefined, +): KtxPublicIngestDepth | undefined { + return mode === 'enriched' || mode === 'relationships' ? 'deep' : undefined; } function sourceDirForConnection(connection: KtxProjectConnectionConfig): string | undefined { @@ -99,29 +302,141 @@ function sourceDirForConnection(connection: KtxProjectConnectionConfig): string return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined; } -function targetForConnection(connectionId: string, connection: KtxProjectConnectionConfig): KtxPublicIngestPlanTarget { - const driver = normalizedDriver(connection); +function resolveDatabaseTargetOptions(input: { + connectionId: string; + driver: string; + connection: KtxProjectConnectionConfig; + args: { + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + scanMode?: Extract['mode']; + }; + warnings: KtxPublicIngestWarningAccumulator; +}): Pick { + const storedQh = storedQueryHistory(input.connection); + const dialect = queryHistoryDialectByDriver.get(input.driver); + const explicitQueryHistory = input.args.queryHistory ?? 'default'; + const storedEnabled = storedQh.enabled === true; + const windowOverrideRequested = input.args.queryHistoryWindowDays !== undefined; + const requestedQh = + explicitQueryHistory === 'enabled' || + (explicitQueryHistory !== 'disabled' && (windowOverrideRequested || storedEnabled)); + let depth = + input.args.depth ?? depthFromLegacyScanMode(input.args.scanMode) ?? databaseContextDepth(input.connection) ?? 'fast'; + const queryHistory = { + enabled: false, + ...(input.args.queryHistoryWindowDays !== undefined + ? { windowDays: input.args.queryHistoryWindowDays } + : positiveInteger(storedQh.windowDays) !== undefined + ? { windowDays: positiveInteger(storedQh.windowDays) } + : {}), + }; + + if (requestedQh && !dialect) { + input.warnings.unsupportedQueryHistoryForDatabases.push({ + connectionId: input.connectionId, + driver: input.driver, + reason: + explicitQueryHistory === 'enabled' || input.args.queryHistoryWindowDays !== undefined ? 'explicit' : 'stored', + }); + return { + databaseDepth: depth, + queryHistory: { ...queryHistory, unsupported: true }, + steps: ['database-schema'], + }; + } + + if (requestedQh && dialect) { + if (depth === 'fast') { + input.warnings.warnings.push(`--query-history requires deep ingest; running ${input.connectionId} with --deep.`); + } + depth = 'deep'; + return { + databaseDepth: depth, + queryHistory: { + ...queryHistory, + enabled: true, + dialect, + pullConfig: queryHistoryPullConfig({ + stored: storedQh, + dialect, + windowDays: queryHistory.windowDays, + enabledTables: enabledTablesForConnection(input.connection), + }), + }, + steps: ['database-schema', 'query-history'], + }; + } + + if (input.args.depth === 'fast' && explicitQueryHistory !== 'enabled' && storedEnabled) { + input.warnings.warnings.push( + `${input.connectionId} has query history enabled in ktx.yaml, but --fast skips query-history processing.`, + ); + return { + databaseDepth: 'fast', + queryHistory: { ...queryHistory, skippedStoredByFast: true }, + steps: ['database-schema'], + }; + } + + return { + databaseDepth: depth, + queryHistory, + steps: ['database-schema'], + }; +} + +function targetForConnection( + connectionId: string, + connection: KtxProjectConnectionConfig, + projectConfig: KtxPublicIngestProject['config'], + args: { + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + scanMode?: Extract['mode']; + }, + warnings: KtxPublicIngestWarningAccumulator, +): KtxPublicIngestPlanTarget { + const driver = normalizeConnectionDriver(connection); const adapter = sourceAdapterByDriver.get(driver); const sourceDir = sourceDirForConnection(connection); if (adapter) { + if (args.depth) { + warnings.ignoredDepthForSources.push(connectionId); + } + if (args.queryHistory === 'enabled' || args.queryHistoryWindowDays !== undefined) { + warnings.ignoredQueryHistoryForSources.push(connectionId); + } return { connectionId, driver, operation: 'source-ingest', adapter, ...(sourceDir ? { sourceDir } : {}), - debugCommand: `ktx ingest run --connection-id ${connectionId} --adapter ${adapter} --debug`, + debugCommand: `ktx ingest ${connectionId} --debug`, steps: ['source-ingest', 'memory-update'], }; } - if (warehouseDrivers.has(driver)) { + if (isDatabaseDriver(driver)) { + const options = resolveDatabaseTargetOptions({ connectionId, driver, connection, args, warnings }); + const gaps = options.databaseDepth === 'deep' ? deepReadinessGaps(projectConfig) : []; return { connectionId, driver, - operation: 'scan', - debugCommand: `ktx scan ${connectionId} --debug`, - steps: ['scan'], + operation: 'database-ingest', + debugCommand: `ktx ingest ${connectionId} --debug`, + detectRelationships: options.databaseDepth === 'deep' && projectConfig.scan.relationships.enabled, + ...(gaps.length > 0 + ? { + preflightFailure: `${connectionId} requires deep ingest readiness: ${gaps.join( + ', ', + )}. Run ktx setup or rerun with --fast.`, + } + : {}), + ...options, }; } @@ -130,7 +445,15 @@ function targetForConnection(connectionId: string, connection: KtxProjectConnect export function buildPublicIngestPlan( project: KtxPublicIngestProject, - args: { projectDir: string; targetConnectionId?: string; all: boolean }, + args: { + projectDir: string; + targetConnectionId?: string; + all: boolean; + depth?: KtxPublicIngestDepth; + queryHistory?: KtxPublicIngestQueryHistoryFlag; + queryHistoryWindowDays?: number; + scanMode?: Extract['mode']; + }, ): KtxPublicIngestPlan { if (!args.all && !args.targetConnectionId) { throw new Error('Context build requires a connection id or all targets'); @@ -146,26 +469,40 @@ export function buildPublicIngestPlan( throw new Error('No configured connections are eligible for ingest'); } - const targets = selected.map(([connectionId, connection]) => targetForConnection(connectionId, connection)); + const warnings = createWarningAccumulator(); + const targets = selected.map(([connectionId, connection]) => + targetForConnection(connectionId, connection, project.config, args, warnings), + ); + const orderedTargets = [ + ...targets.filter((t) => t.operation === 'database-ingest'), + ...targets.filter((t) => t.operation === 'source-ingest'), + ]; + const notice = schemaFirstQueryHistoryNotice(orderedTargets, args); return { projectDir: args.projectDir, - targets: [...targets.filter((t) => t.operation === 'scan'), ...targets.filter((t) => t.operation === 'source-ingest')], + targets: orderedTargets, + warnings: finalizeWarnings(warnings, args), + ...(notice ? { notices: [notice] } : {}), }; } function defaultSteps(target: KtxPublicIngestPlanTarget): KtxPublicIngestTargetResult['steps'] { return [ { - operation: 'scan', - status: target.steps.includes('scan') ? 'not-run' : 'skipped', - ...(target.operation === 'scan' ? { debugCommand: target.debugCommand } : {}), + operation: 'database-schema', + status: target.steps.includes('database-schema') ? 'not-run' : 'skipped', + ...(target.operation === 'database-ingest' ? { debugCommand: target.debugCommand } : {}), + }, + { + operation: 'query-history', + status: target.steps.includes('query-history') ? 'not-run' : 'skipped', + ...(target.operation === 'database-ingest' ? { debugCommand: target.debugCommand } : {}), }, { operation: 'source-ingest', status: target.steps.includes('source-ingest') ? 'not-run' : 'skipped', ...(target.operation === 'source-ingest' ? { debugCommand: target.debugCommand } : {}), }, - { operation: 'enrich', status: 'skipped' }, { operation: 'memory-update', status: target.steps.includes('memory-update') ? 'not-run' : 'skipped', @@ -174,8 +511,49 @@ function defaultSteps(target: KtxPublicIngestPlanTarget): KtxPublicIngestTargetR ]; } -function markTargetResult(target: KtxPublicIngestPlanTarget, status: 'done' | 'failed'): KtxPublicIngestTargetResult { - const failedOperation = target.operation === 'scan' ? 'scan' : 'source-ingest'; +function retryCommandForTarget( + target: KtxPublicIngestPlanTarget, + args: Extract, +): string { + const projectPart = ` --project-dir ${args.projectDir}`; + const depthPart = target.databaseDepth ? ` --${target.databaseDepth}` : ''; + const queryHistoryPart = target.queryHistory?.enabled === true ? ' --query-history' : ''; + const windowPart = + target.queryHistory?.enabled === true && target.queryHistory.windowDays !== undefined + ? ` --query-history-window-days ${target.queryHistory.windowDays}` + : ''; + return `ktx ingest ${target.connectionId}${projectPart}${depthPart}${queryHistoryPart}${windowPart}`; +} + +function trimTrailingPeriod(value: string): string { + return value.endsWith('.') ? value.slice(0, -1) : value; +} + +function failureDetailWithRetry(input: { + target: KtxPublicIngestPlanTarget; + args: Extract; + failedOperation: KtxPublicIngestStepName; + failureDetail?: string; +}): string { + const detail = input.failureDetail?.trim(); + const base = + detail && detail.startsWith(`${input.target.connectionId} `) + ? detail + : detail + ? `${input.target.connectionId} failed: ${detail}` + : `${input.target.connectionId} failed at ${input.failedOperation}.`; + return `${trimTrailingPeriod(base)}. Retry: ${retryCommandForTarget(input.target, input.args)}`; +} + +function markTargetResult( + target: KtxPublicIngestPlanTarget, + args: Extract, + status: 'done' | 'failed', + failedOperation?: KtxPublicIngestStepName, + failureDetail?: string, +): KtxPublicIngestTargetResult { + const selectedFailedOperation = + failedOperation ?? (target.operation === 'database-ingest' ? 'database-schema' : 'source-ingest'); return { connectionId: target.connectionId, driver: target.driver, @@ -186,8 +564,17 @@ function markTargetResult(target: KtxPublicIngestPlanTarget, status: 'done' | 'f if (status === 'done') { return { ...step, status: 'done' }; } - if (step.operation === failedOperation) { - return { ...step, status: 'failed', detail: `${target.connectionId} failed at ${failedOperation}.` }; + if (step.operation === selectedFailedOperation) { + return { + ...step, + status: 'failed', + detail: failureDetailWithRetry({ + target, + args, + failedOperation: selectedFailedOperation, + failureDetail, + }), + }; } return { ...step, status: 'not-run' }; }), @@ -206,13 +593,16 @@ function renderPlainResults(results: KtxPublicIngestTargetResult[], io: KtxCliIo const failures = results.filter(resultFailed); io.stdout.write(failures.length > 0 ? 'Ingest finished with partial failures\n' : 'Ingest finished\n'); io.stdout.write('\n'); - io.stdout.write('Source Scan Source ingest Enrich Memory update\n'); + io.stdout.write('Source Database schema Query history Source ingest Memory update\n'); for (const result of results) { io.stdout.write( - `${result.connectionId.padEnd(14)} ${stepStatus(result, 'scan').padEnd(9)} ${stepStatus( + `${result.connectionId.padEnd(14)} ${stepStatus(result, 'database-schema').padEnd(16)} ${stepStatus( + result, + 'query-history', + ).padEnd(14)} ${stepStatus( result, 'source-ingest', - ).padEnd(14)} ${stepStatus(result, 'enrich').padEnd(8)} ${stepStatus(result, 'memory-update')}\n`, + ).padEnd(14)} ${stepStatus(result, 'memory-update')}\n`, ); } @@ -227,9 +617,6 @@ function renderPlainResults(results: KtxPublicIngestTargetResult[], io: KtxCliIo continue; } io.stdout.write(` ${failedStep.detail ?? `${result.connectionId} failed.`}\n`); - if (failedStep.debugCommand) { - io.stdout.write(` Debug: ${failedStep.debugCommand}\n`); - } } } @@ -242,27 +629,154 @@ function sourceIngestOutputMode(args: Extract, + io: KtxCliIo, +): boolean { + return args.inputMode === 'auto' && args.json !== true && io.stdout.isTTY === true && hasInteractiveInput(io); +} + +interface CapturedPublicIngestIo extends KtxCliIo { + capturedOutput(): string; +} + +function createCapturedPublicIngestIo(): CapturedPublicIngestIo { + let output = ''; + return { + stdout: { + isTTY: false, + write(chunk: string) { + output += chunk; + }, + }, + stderr: { + write(chunk: string) { + output += chunk; + }, + }, + capturedOutput() { + return output; + }, + }; +} + +const INTERNAL_STATUS_LINE_RE = + /^(Report|Run|Job|Status|Adapter|Connection|Sync|Diff|Tasks|Work units|Failed tasks|Saved memory|Provenance rows):\s*/; + +function firstCapturedFailureLine(output: string): string | undefined { + return output + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line.length > 0) + .filter((line) => !line.startsWith('KTX scan completed')) + .filter((line) => !INTERNAL_STATUS_LINE_RE.test(line)) + .map(publicIngestOutputLine) + .find((line) => line.length > 0); +} + export async function executePublicIngestTarget( target: KtxPublicIngestPlanTarget, args: Extract, io: KtxCliIo, deps: KtxPublicIngestDeps, ): Promise { - if (target.operation === 'scan') { + if (target.preflightFailure) { + if (target.operation === 'database-ingest') { + deps.onPhaseEnd?.('database-schema', 'failed', target.preflightFailure); + if (target.queryHistory?.enabled === true) { + deps.onPhaseEnd?.('query-history', 'skipped'); + } + } else { + deps.onPhaseEnd?.('source-ingest', 'failed', target.preflightFailure); + } + return { + connectionId: target.connectionId, + driver: target.driver, + steps: defaultSteps(target).map((step) => + step.operation === 'database-schema' + ? { + ...step, + status: 'failed', + detail: target.preflightFailure, + } + : step, + ), + }; + } + + if (target.operation === 'database-ingest') { const { runKtxScan } = await import('./scan.js'); const scanArgs: KtxScanArgs = { command: 'run', projectDir: args.projectDir, connectionId: target.connectionId, - mode: args.scanMode ?? 'structural', - detectRelationships: args.detectRelationships ?? false, + mode: target.databaseDepth === 'deep' ? 'enriched' : 'structural', + detectRelationships: target.detectRelationships === true, dryRun: false, + ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), + ...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}), }; const runScan = deps.runScan ?? runKtxScan; - const exitCode = deps.scanProgress - ? await runScan(scanArgs, io, { progress: deps.scanProgress }) - : await runScan(scanArgs, io); - return markTargetResult(target, exitCode === 0 ? 'done' : 'failed'); + const capturedScanIo = deps.scanProgress ? null : createCapturedPublicIngestIo(); + const scanIo = capturedScanIo ?? io; + deps.onPhaseStart?.('database-schema'); + const scanExitCode = deps.scanProgress + ? await runScan(scanArgs, scanIo, { progress: deps.scanProgress }) + : await runScan(scanArgs, scanIo); + if (scanExitCode !== 0) { + deps.onPhaseEnd?.('database-schema', 'failed'); + if (target.queryHistory?.enabled === true) { + deps.onPhaseEnd?.('query-history', 'skipped'); + } + return markTargetResult( + target, + args, + 'failed', + 'database-schema', + capturedScanIo ? firstCapturedFailureLine(capturedScanIo.capturedOutput()) : undefined, + ); + } + deps.onPhaseEnd?.('database-schema', 'done'); + + if (target.queryHistory?.enabled === true) { + const { runKtxIngest } = await import('./ingest.js'); + const runIngest = deps.runIngest ?? runKtxIngest; + const ingestArgs: KtxIngestArgs = { + command: 'run', + projectDir: args.projectDir, + connectionId: target.connectionId, + adapter: 'historic-sql', + outputMode: sourceIngestOutputMode(args, io), + inputMode: args.inputMode, + ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), + ...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}), + allowImplicitAdapter: true, + historicSqlPullConfigOverride: + target.queryHistory.pullConfig ?? { + dialect: target.queryHistory.dialect, + ...(target.queryHistory.windowDays !== undefined ? { windowDays: target.queryHistory.windowDays } : {}), + }, + }; + const capturedIngestIo = deps.ingestProgress ? null : createCapturedPublicIngestIo(); + const ingestIo = capturedIngestIo ?? io; + deps.onPhaseStart?.('query-history'); + const qhExitCode = deps.ingestProgress + ? await runIngest(ingestArgs, ingestIo, { progress: deps.ingestProgress }) + : await runIngest(ingestArgs, ingestIo); + if (qhExitCode !== 0) { + deps.onPhaseEnd?.('query-history', 'failed'); + return markTargetResult( + target, + args, + 'failed', + 'query-history', + capturedIngestIo ? firstCapturedFailureLine(capturedIngestIo.capturedOutput()) : undefined, + ); + } + deps.onPhaseEnd?.('query-history', 'done'); + } + + return markTargetResult(target, args, 'done'); } const { runKtxIngest } = await import('./ingest.js'); @@ -274,12 +788,25 @@ export async function executePublicIngestTarget( ...(target.sourceDir ? { sourceDir: target.sourceDir } : {}), outputMode: sourceIngestOutputMode(args, io), inputMode: args.inputMode, + ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), + ...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}), + allowImplicitAdapter: true, }; const runIngest = deps.runIngest ?? runKtxIngest; + const capturedIngestIo = deps.ingestProgress ? null : createCapturedPublicIngestIo(); + const ingestIo = capturedIngestIo ?? io; + deps.onPhaseStart?.('source-ingest'); const exitCode = deps.ingestProgress - ? await runIngest(ingestArgs, io, { progress: deps.ingestProgress }) - : await runIngest(ingestArgs, io); - return markTargetResult(target, exitCode === 0 ? 'done' : 'failed'); + ? await runIngest(ingestArgs, ingestIo, { progress: deps.ingestProgress }) + : await runIngest(ingestArgs, ingestIo); + deps.onPhaseEnd?.('source-ingest', exitCode === 0 ? 'done' : 'failed'); + return markTargetResult( + target, + args, + exitCode === 0 ? 'done' : 'failed', + 'source-ingest', + capturedIngestIo ? firstCapturedFailureLine(capturedIngestIo.capturedOutput()) : undefined, + ); } export async function runKtxPublicIngest( @@ -287,25 +814,44 @@ export async function runKtxPublicIngest( io: KtxCliIo, deps: KtxPublicIngestDeps = {}, ): Promise { - if (args.command !== 'run') { - const { runKtxIngest } = await import('./ingest.js'); - return await (deps.runIngest ?? runKtxIngest)( + const loadProject = deps.loadProject ?? loadKtxProject; + const project = await loadProject({ projectDir: args.projectDir }); + if (shouldUseForegroundContextBuildView(args, io)) { + const { runContextBuild } = await import('./context-build-view.js'); + const contextBuild = deps.runContextBuild ?? runContextBuild; + const result = await contextBuild( + project, { - command: args.command, projectDir: args.projectDir, - ...(args.runId ? { runId: args.runId } : {}), - outputMode: args.json ? 'json' : args.command === 'watch' ? 'viz' : 'plain', + ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), + all: args.all, + entrypoint: 'ingest', inputMode: args.inputMode, + ...(args.depth ? { depth: args.depth } : {}), + ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), + ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), + ...(args.scanMode ? { scanMode: args.scanMode } : {}), + ...(args.detectRelationships !== undefined ? { detectRelationships: args.detectRelationships } : {}), + ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), + ...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}), }, io, ); + return result.exitCode; } - const loadProject = deps.loadProject ?? loadKtxProject; - const project = await loadProject({ projectDir: args.projectDir }); const plan = buildPublicIngestPlan(project, args); const results: KtxPublicIngestTargetResult[] = []; + if (!args.json) { + for (const notice of plan.notices ?? []) { + io.stdout.write(`${notice}\n`); + } + for (const warning of plan.warnings) { + io.stderr.write(`Warning: ${warning}\n`); + } + } + for (const target of plan.targets) { results.push(await executePublicIngestTarget(target, args, io, deps)); } diff --git a/packages/cli/src/runtime.test.ts b/packages/cli/src/runtime.test.ts index 8151a4b3..a147f966 100644 --- a/packages/cli/src/runtime.test.ts +++ b/packages/cli/src/runtime.test.ts @@ -368,4 +368,71 @@ describe('runKtxRuntime', () => { expect(io.stdout()).toContain('PASS Managed Python runtime: Runtime ready at /runtime/0.2.0'); expect(io.stderr()).toBe(''); }); + + it('returns success when the installed runtime is ready but source assets are missing', async () => { + const io = makeIo(); + const deps: KtxRuntimeDeps = { + readStatus: vi.fn(async (): Promise => ({ + kind: 'ready', + detail: 'Runtime ready at /runtime/0.2.0', + layout: { + cliVersion: '0.2.0', + runtimeRoot: '/runtime', + versionDir: '/runtime/0.2.0', + venvDir: '/runtime/0.2.0/.venv', + manifestPath: '/runtime/0.2.0/manifest.json', + installLogPath: '/runtime/0.2.0/install.log', + assetDir: '/assets/python', + assetManifestPath: '/assets/python/manifest.json', + pythonPath: '/runtime/0.2.0/.venv/bin/python', + daemonPath: '/runtime/0.2.0/.venv/bin/ktx-daemon', + daemonStatePath: '/runtime/0.2.0/daemon.json', + daemonStdoutPath: '/runtime/0.2.0/daemon.stdout.log', + daemonStderrPath: '/runtime/0.2.0/daemon.stderr.log', + }, + manifest: { + schemaVersion: 1, + cliVersion: '0.2.0', + installedAt: '2026-05-11T00:00:00.000Z', + asset: { + schemaVersion: 1, + distributionName: 'kaelio-ktx', + normalizedName: 'kaelio_ktx', + version: '0.1.0', + wheel: { + file: 'kaelio_ktx-0.1.0-py3-none-any.whl', + sha256: 'a'.repeat(64), + bytes: 10, + }, + }, + features: ['core'], + python: { + executable: '/runtime/0.2.0/.venv/bin/python', + daemonExecutable: '/runtime/0.2.0/.venv/bin/ktx-daemon', + }, + installLog: '/runtime/0.2.0/install.log', + }, + })), + doctorRuntime: vi.fn(async (): Promise => [ + { id: 'uv', label: 'uv', status: 'pass', detail: 'uv 0.9.5' }, + { + id: 'asset', + label: 'Bundled Python wheel', + status: 'fail', + detail: 'Missing bundled Python runtime manifest: /assets/python/manifest.json', + fix: 'Run: pnpm run artifacts:check', + }, + { id: 'runtime', label: 'Managed Python runtime', status: 'pass', detail: 'Runtime ready at /runtime/0.2.0' }, + ]), + }; + + await expect(runKtxRuntime({ command: 'status', cliVersion: '0.2.0', json: false }, io.io, deps)).resolves.toBe( + 0, + ); + + expect(io.stdout()).toContain('status: ready'); + expect(io.stdout()).toContain('FAIL Bundled Python wheel: Missing bundled Python runtime manifest'); + expect(io.stdout()).toContain('PASS Managed Python runtime: Runtime ready at /runtime/0.2.0'); + expect(io.stderr()).toBe(''); + }); }); diff --git a/packages/cli/src/runtime.ts b/packages/cli/src/runtime.ts index 8bb3fc7c..e64efd40 100644 --- a/packages/cli/src/runtime.ts +++ b/packages/cli/src/runtime.ts @@ -150,8 +150,8 @@ function writeRuntimeChecks(io: KtxCliIo, checks: ManagedPythonRuntimeDoctorChec } } -function hasRuntimeCheckFailures(checks: ManagedPythonRuntimeDoctorCheck[]): boolean { - return checks.some((check) => check.status === 'fail'); +function hasRuntimeStatusFailure(status: ManagedPythonRuntimeStatus): boolean { + return status.kind !== 'ready'; } export async function runKtxRuntime( @@ -203,7 +203,7 @@ export async function runKtxRuntime( writeStatus(io, status); writeRuntimeChecks(io, checks); } - return hasRuntimeCheckFailures(checks) ? 1 : 0; + return hasRuntimeStatusFailure(status) ? 1 : 0; } const _exhaustive: never = args; return _exhaustive; diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index 12fd332a..66c8f6fe 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -1,13 +1,21 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { readKtxSetupState, writeKtxSetupState } from '@ktx/context/project'; +import { + buildDefaultKtxProjectConfig, + parseKtxProjectConfig, + readKtxSetupState, + serializeKtxProjectConfig, + type KtxProjectConfig, + writeKtxSetupState, +} from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { contextBuildCommands, readKtxSetupContextState, runKtxSetupContextStep, + type KtxSetupContextDeps, writeKtxSetupContextState, } from './setup-context.js'; @@ -32,39 +40,79 @@ function makeIo() { }; } -async function writeReadyProject(projectDir: string) { - await writeFile( - join(projectDir, 'ktx.yaml'), - [ - 'project: revenue', - 'setup:', - ' database_connection_ids:', - ' - warehouse', - 'connections:', - ' warehouse:', - ' driver: postgres', - ' url: env:DATABASE_URL', - ' docs:', - ' driver: notion', - ' auth_token_ref: env:NOTION_TOKEN', - ' crawl_mode: all_accessible', - 'llm:', - ' provider:', - ' backend: anthropic', - ' models:', - ' default: claude-sonnet-4-6', - 'ingest:', - ' embeddings:', - ' backend: openai', - ' model: text-embedding-3-small', - ' dimensions: 1536', - 'scan:', - ' enrichment:', - ' mode: llm', - '', - ].join('\n'), - 'utf-8', - ); +type ReadyProjectOverrides = Omit, 'ingest' | 'llm' | 'scan'> & { + ingest?: Partial; + llm?: Partial; + scan?: Omit, 'enrichment' | 'relationships'> & { + enrichment?: Partial; + relationships?: Partial; + }; +}; + +async function writeReadyProject(projectDir: string, overrides: ReadyProjectOverrides = {}) { + const defaults = buildDefaultKtxProjectConfig('revenue'); + const readyConfig: KtxProjectConfig = { + ...defaults, + setup: { database_connection_ids: ['warehouse'] }, + connections: { + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL', context: { depth: 'deep' } }, + docs: { driver: 'notion', auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'all_accessible' }, + }, + llm: { + provider: { backend: 'anthropic' }, + models: { default: 'claude-sonnet-4-6' }, + }, + ingest: { + ...defaults.ingest, + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + }, + }, + scan: { + ...defaults.scan, + enrichment: { + mode: 'llm', + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + }, + }, + }, + }; + const nextConfig: KtxProjectConfig = { + ...readyConfig, + ...overrides, + setup: overrides.setup ?? readyConfig.setup, + connections: overrides.connections ?? readyConfig.connections, + llm: { + ...readyConfig.llm, + ...overrides.llm, + provider: overrides.llm?.provider ?? readyConfig.llm.provider, + models: overrides.llm?.models ?? readyConfig.llm.models, + }, + ingest: { + ...readyConfig.ingest, + ...overrides.ingest, + embeddings: overrides.ingest?.embeddings ?? readyConfig.ingest.embeddings, + workUnits: overrides.ingest?.workUnits ?? readyConfig.ingest.workUnits, + }, + scan: { + ...readyConfig.scan, + ...overrides.scan, + enrichment: { + ...readyConfig.scan.enrichment, + ...(overrides.scan?.enrichment ?? {}), + }, + relationships: { + ...readyConfig.scan.relationships, + ...(overrides.scan?.relationships ?? {}), + }, + }, + }; + await writeFile(join(projectDir, 'ktx.yaml'), serializeKtxProjectConfig(nextConfig), 'utf-8'); await writeKtxSetupState(projectDir, { completed_steps: ['project', 'llm', 'embeddings', 'databases', 'sources'], }); @@ -73,7 +121,15 @@ async function writeReadyProject(projectDir: string) { async function writeScanReport( projectDir: string, syncId: string, - report: { mode: string; tableDescriptions: string; columnDescriptions: string; embeddings: string }, + report: { + mode: string; + tableDescriptions: string; + columnDescriptions: string; + embeddings: string; + manifestShards?: string[]; + completedStages?: string[]; + relationships?: { accepted: number; review: number; rejected: number; skipped: number }; + }, ) { const reportDir = join(projectDir, 'raw-sources', 'warehouse', 'live-database', syncId); await mkdir(reportDir, { recursive: true }); @@ -85,7 +141,7 @@ async function writeScanReport( mode: report.mode, dryRun: false, artifactPaths: { - manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + manifestShards: report.manifestShards ?? ['semantic-layer/warehouse/_schema/public.yaml'], enrichmentArtifacts: report.mode === 'enriched' ? [`raw-sources/warehouse/live-database/${syncId}/enrichment/descriptions.json`] @@ -95,9 +151,11 @@ async function writeScanReport( tableDescriptions: report.tableDescriptions, columnDescriptions: report.columnDescriptions, embeddings: report.embeddings, + ...(report.relationships ? { relationships: report.relationships } : {}), }, enrichmentState: { - completedStages: report.tableDescriptions === 'completed' ? ['descriptions', 'embeddings'] : [], + completedStages: + report.completedStages ?? (report.tableDescriptions === 'completed' ? ['descriptions', 'embeddings'] : []), failedStages: report.tableDescriptions === 'failed' ? ['descriptions'] : [], }, createdAt: syncId, @@ -108,12 +166,19 @@ async function writeScanReport( ); } -async function writeReadyEnrichedScanReport(projectDir: string, syncId = '2026-05-09T10:00:00.000Z') { +async function writeReadyEnrichedScanReport( + projectDir: string, + syncId = '2026-05-09T10:00:00.000Z', + overrides: Partial[2]> = {}, +) { await writeScanReport(projectDir, syncId, { mode: 'enriched', tableDescriptions: 'completed', columnDescriptions: 'completed', embeddings: 'completed', + completedStages: ['descriptions', 'embeddings', 'relationships'], + relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 }, + ...overrides, }); } @@ -145,7 +210,7 @@ describe('setup context build state', () => { sourceProgress: [ { connectionId: 'warehouse', - operation: 'scan', + operation: 'database-ingest', status: 'running', percent: 42, message: 'Generating descriptions 4/10 tables', @@ -157,18 +222,18 @@ describe('setup context build state', () => { const state = await readKtxSetupContextState(tempDir); expect(state).toMatchObject({ runId: 'setup-context-local-abc123', - status: 'running', + status: 'stale', primarySourceConnectionIds: ['warehouse'], contextSourceConnectionIds: ['docs'], commands: { - watch: `ktx setup --project-dir ${tempDir}`, + build: `ktx setup --project-dir ${tempDir}`, status: `ktx status --project-dir ${tempDir}`, - resume: `ktx setup --project-dir ${tempDir}`, }, + failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.', sourceProgress: [ { connectionId: 'warehouse', - operation: 'scan', + operation: 'database-ingest', status: 'running', percent: 42, message: 'Generating descriptions 4/10 tables', @@ -185,7 +250,6 @@ describe('setup context build state', () => { const io = makeIo(); const runContextBuildMock = vi.fn(async () => ({ exitCode: 0, - detached: false, reportIds: ['report-docs-1'], artifactPaths: ['raw-sources/warehouse/live-database/sync-1/scan-report.json'], })); @@ -214,11 +278,9 @@ describe('setup context build state', () => { expect.objectContaining({ projectDir: tempDir, inputMode: 'disabled', - scanMode: 'enriched', - detectRelationships: true, }), io.io, - expect.objectContaining({ onDetach: expect.any(Function) }), + expect.objectContaining({ onSourceProgress: expect.any(Function) }), ); expect(verifyContextReady).toHaveBeenCalledWith(tempDir); expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); @@ -231,6 +293,8 @@ describe('setup context build state', () => { artifactPaths: ['raw-sources/warehouse/live-database/sync-1/scan-report.json'], }); expect(io.stdout()).toContain('KTX context is ready for agents.'); + expect(io.stdout()).toContain('Databases:'); + expect(io.stdout()).not.toContain(['Primary sources', ':'].join('')); }); it('records only failed sources as retryable when the context build fails', async () => { @@ -238,12 +302,11 @@ describe('setup context build state', () => { const io = makeIo(); const runContextBuildMock = vi.fn(async (_project, _args, _io, hooks) => { hooks.onSourceProgress?.([ - { connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 1000 }, + { connectionId: 'warehouse', operation: 'database-ingest', status: 'done', elapsedMs: 1000 }, { connectionId: 'docs', operation: 'source-ingest', status: 'failed', elapsedMs: 2000 }, ]); return { exitCode: 1, - detached: false, reportIds: ['report-docs-failed'], artifactPaths: ['raw-sources/docs/notion/sync-1/ingest-report.json'], }; @@ -268,7 +331,7 @@ describe('setup context build state', () => { artifactPaths: ['raw-sources/docs/notion/sync-1/ingest-report.json'], retryableFailedTargets: ['docs'], sourceProgress: [ - { connectionId: 'warehouse', operation: 'scan', status: 'done', elapsedMs: 1000 }, + { connectionId: 'warehouse', operation: 'database-ingest', status: 'done', elapsedMs: 1000 }, { connectionId: 'docs', operation: 'source-ingest', status: 'failed', elapsedMs: 2000 }, ], }); @@ -282,7 +345,9 @@ describe('setup context build state', () => { await writeFile(join(tempDir, 'wiki', 'global', 'metrics.md'), '# Metrics\n'); await writeReadyEnrichedScanReport(tempDir); const io = makeIo(); - const runContextBuildMock = vi.fn(async () => ({ exitCode: 0, detached: false })); + const runContextBuildMock = vi.fn>(async () => ({ + exitCode: 0, + })); await expect( runKtxSetupContextStep( @@ -312,6 +377,7 @@ describe('setup context build state', () => { contextSourceConnectionIds: ['docs'], }); expect(io.stdout()).toContain('KTX context is ready for agents.'); + expect(io.stdout()).not.toContain(['Primary sources', ':'].join('')); }); it('does not mark context ready until primary scans have completed description enrichment', async () => { @@ -327,7 +393,7 @@ describe('setup context build state', () => { const io = makeIo(); const runContextBuildMock = vi.fn(async () => { await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:00:00.000Z'); - return { exitCode: 0, detached: false }; + return { exitCode: 0 }; }); await expect( @@ -346,32 +412,183 @@ describe('setup context build state', () => { expect(io.stdout()).not.toContain('Existing context artifacts were found from setup ingest.'); }); - it('does not treat schema-only scan shards as completed setup context', async () => { - await writeReadyProject(tempDir); + it('treats fast database context as ready from schema manifest shards without AI artifacts', async () => { + await writeReadyProject(tempDir, { + connections: { + warehouse: { driver: 'postgres', readonly: true, context: { depth: 'fast' } }, + }, + llm: { provider: { backend: 'none' }, models: {} }, + scan: { enrichment: { mode: 'none' } }, + }); await mkdir(join(tempDir, 'semantic-layer', 'warehouse', '_schema'), { recursive: true }); await writeFile(join(tempDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml'), 'tables: {}\n'); - const io = makeIo(); - const runContextBuildMock = vi.fn(async () => { - await mkdir(join(tempDir, 'wiki', 'global'), { recursive: true }); - await writeFile(join(tempDir, 'wiki', 'global', 'metrics.md'), '# Metrics\n'); - await writeReadyEnrichedScanReport(tempDir); - return { exitCode: 0, detached: false }; + await writeScanReport(tempDir, '2026-05-09T10:00:00.000Z', { + mode: 'structural', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], }); + const io = makeIo(); + const runContextBuildMock = vi.fn>(async () => ({ + exitCode: 0, + })); await expect( runKtxSetupContextStep( { projectDir: tempDir, inputMode: 'disabled' }, io.io, { - runIdFactory: () => 'setup-context-local-schema-only', - now: () => new Date('2026-05-09T10:00:00.000Z'), runContextBuild: runContextBuildMock, }, ), - ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-schema-only' }); + ).resolves.toMatchObject({ status: 'ready' }); + + expect(runContextBuildMock).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('Existing context artifacts were found from setup ingest.'); + }); + + it('stores fast context depth non-interactively when deep readiness is missing', async () => { + await writeReadyProject(tempDir, { + connections: { warehouse: { driver: 'postgres', readonly: true } }, + llm: { provider: { backend: 'none' }, models: {} }, + scan: { enrichment: { mode: 'none' } }, + }); + const io = makeIo(); + const runContextBuildMock = vi.fn>(async () => ({ + exitCode: 0, + })); + const verifyContextReady = vi.fn(async () => ({ + ready: true, + agentContextReady: true, + semanticSearchReady: true, + details: ['ready'], + })); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { runContextBuild: runContextBuildMock, verifyContextReady }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse.context).toMatchObject({ depth: 'fast' }); + expect(runContextBuildMock).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ projectDir: tempDir, inputMode: 'disabled' }), + expect.anything(), + expect.anything(), + ); + expect(runContextBuildMock.mock.calls[0]?.[1]).not.toMatchObject({ + scanMode: 'enriched', + detectRelationships: true, + }); + }); + + it('prompts for database context depth after final readiness is known', async () => { + await writeReadyProject(tempDir, { + connections: { warehouse: { driver: 'postgres', readonly: true } }, + llm: { + provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret + models: { default: 'gpt-test' }, + }, + scan: { + enrichment: { + mode: 'llm', + embeddings: { backend: 'openai', model: 'text-embedding-3-small', dimensions: 1536 }, + }, + }, + }); + const io = makeIo(); + const select = vi.fn(async () => 'deep'); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); + const verifyContextReady = vi.fn(async () => ({ + ready: true, + agentContextReady: true, + semanticSearchReady: true, + details: ['ready'], + })); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto' }, + io.io, + { + prompts: { select, cancel: vi.fn() }, + runContextBuild: runContextBuildMock, + verifyContextReady, + }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('How much database context should KTX build?'), + }), + ); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse.context).toMatchObject({ depth: 'deep' }); + }); + + it('requires completed relationships for deep context when relationship discovery is enabled', async () => { + await writeReadyProject(tempDir, { + connections: { + warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } }, + }, + scan: { relationships: { enabled: true } }, + }); + await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true }); + await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n'); + await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:00:00.000Z', { + completedStages: ['descriptions', 'embeddings'], + relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 }, + }); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => { + await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:01:00.000Z', { + completedStages: ['descriptions', 'embeddings', 'relationships'], + relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 }, + }); + return { exitCode: 0 }; + }); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { runContextBuild: runContextBuildMock }, + ), + ).resolves.toMatchObject({ status: 'ready' }); expect(runContextBuildMock).toHaveBeenCalledOnce(); - expect(io.stdout()).not.toContain('Existing context artifacts were found from setup ingest.'); + }); + + it('does not require relationships for deep context when relationship discovery is disabled', async () => { + await writeReadyProject(tempDir, { + connections: { + warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } }, + }, + scan: { relationships: { enabled: false } }, + }); + await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true }); + await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n'); + await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:00:00.000Z', { + completedStages: ['descriptions', 'embeddings'], + }); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { runContextBuild: runContextBuildMock }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(runContextBuildMock).not.toHaveBeenCalled(); }); it('refuses empty setup context builds', async () => { @@ -404,280 +621,63 @@ describe('setup context build state', () => { ), ).resolves.toEqual({ status: 'failed', projectDir: tempDir }); - expect(io.stderr()).toContain('No primary or context sources are configured for a KTX context build.'); + expect(io.stderr()).toContain('No databases or context sources are configured for a KTX context build.'); }); - it('watches an already-running setup context build from the resume prompt', async () => { + it('normalizes legacy detached and paused setup context states to stale', async () => { await writeReadyProject(tempDir); await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-resume-watch', - status: 'detached', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: '2026-05-09T10:00:00.000Z', - primarySourceConnectionIds: ['warehouse'], - contextSourceConnectionIds: ['docs'], - reportIds: [], - artifactPaths: [], - retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-resume-watch'), - }); - const io = makeIo(); - const completeRun = async () => { - await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-resume-watch', - status: 'completed', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: '2026-05-09T10:02:00.000Z', - completedAt: '2026-05-09T10:02:00.000Z', - primarySourceConnectionIds: ['warehouse'], - contextSourceConnectionIds: ['docs'], - reportIds: [], - artifactPaths: [], - retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-resume-watch'), - }); - }; - const select = vi.fn(async (options: { options: Array<{ value: string; label: string }> }) => { - expect(options.options.map((option) => option.label)).toContain('Watch progress'); - return 'watch'; - }); - - await expect( - runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'auto' }, - io.io, - { - prompts: { select, cancel: vi.fn() }, - sleep: completeRun, - watchIntervalMs: 1, - }, - ), - ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-resume-watch' }); - expect(io.stdout()).toContain('KTX context built: detached'); - expect(io.stdout()).toContain('KTX context built: yes'); - }); - - it('auto-watches a running build without prompting when autoWatch is true', async () => { - await writeReadyProject(tempDir); - await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-auto-watch', - status: 'detached', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: '2026-05-09T10:00:00.000Z', + runId: 'setup-context-local-old', + status: 'detached' as never, + startedAt: '2026-05-09T09:00:00.000Z', + updatedAt: '2026-05-09T09:00:00.000Z', primarySourceConnectionIds: ['warehouse'], contextSourceConnectionIds: [], reportIds: [], artifactPaths: [], retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-auto-watch'), - }); - const io = makeIo(); - const completeRun = async () => { - await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-auto-watch', - status: 'completed', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: '2026-05-09T10:02:00.000Z', - completedAt: '2026-05-09T10:02:00.000Z', - primarySourceConnectionIds: ['warehouse'], - contextSourceConnectionIds: [], - reportIds: [], - artifactPaths: [], - retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-auto-watch'), - }); - }; - const select = vi.fn(async () => { - throw new Error('should not prompt when autoWatch is true'); + commands: contextBuildCommands(tempDir, 'setup-context-local-old'), }); - await expect( - runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'auto', autoWatch: true }, - io.io, - { - prompts: { select, cancel: vi.fn() }, - sleep: completeRun, - watchIntervalMs: 1, - }, - ), - ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-auto-watch' }); - expect(select).not.toHaveBeenCalled(); - expect(io.stdout()).toContain('KTX context built: yes'); + await expect(readKtxSetupContextState(tempDir)).resolves.toMatchObject({ + status: 'stale', + failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.', + }); }); - it('renders the progress view when watching a build with sourceProgress', async () => { - await writeReadyProject(tempDir); - await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-progress', - status: 'detached', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: '2026-05-09T10:00:00.000Z', - primarySourceConnectionIds: ['warehouse'], - contextSourceConnectionIds: ['docs'], - reportIds: [], - artifactPaths: [], - retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-progress'), - sourceProgress: [ - { connectionId: 'warehouse', operation: 'scan' as const, status: 'done' as const, elapsedMs: 30000 }, - { connectionId: 'docs', operation: 'source-ingest' as const, status: 'running' as const, startedAtMs: Date.now() - 5000 }, - ], + it('starts a fresh foreground build when a stale running state is found', async () => { + await writeReadyProject(tempDir, { + connections: { warehouse: { driver: 'postgres', readonly: true, context: { depth: 'fast' } } }, }); - const io = makeIo(); - const completeRun = async () => { - await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-progress', - status: 'completed', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: '2026-05-09T10:02:00.000Z', - completedAt: '2026-05-09T10:02:00.000Z', - primarySourceConnectionIds: ['warehouse'], - contextSourceConnectionIds: ['docs'], - reportIds: [], - artifactPaths: [], - retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-progress'), - sourceProgress: [ - { connectionId: 'warehouse', operation: 'scan' as const, status: 'done' as const, elapsedMs: 30000 }, - { connectionId: 'docs', operation: 'source-ingest' as const, status: 'done' as const, elapsedMs: 60000 }, - ], - }); - }; - const select = vi.fn(async () => 'watch'); - - await expect( - runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'auto' }, - io.io, - { - prompts: { select, cancel: vi.fn() }, - sleep: completeRun, - watchIntervalMs: 1, - }, - ), - ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-progress' }); - - const output = io.stdout(); - expect(output).toContain('Building KTX context'); - expect(output).toContain('Primary sources:'); - expect(output).toContain('warehouse'); - expect(output).toContain('Context sources:'); - expect(output).toContain('docs'); - expect(output).not.toContain('KTX context built: detached'); - }); - - it('re-renders the compact progress view when watched source messages change', async () => { - await writeReadyProject(tempDir); await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-progress-message', - status: 'detached', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: '2026-05-09T10:00:00.000Z', - primarySourceConnectionIds: ['warehouse'], - contextSourceConnectionIds: [], - reportIds: [], - artifactPaths: [], - retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-progress-message'), - sourceProgress: [ - { - connectionId: 'warehouse', - operation: 'scan' as const, - status: 'running' as const, - startedAtMs: Date.now() - 5000, - percent: 35, - message: 'Inspecting database schema', - updatedAtMs: 1000, - }, - ], - }); - const io = makeIo(); - let polls = 0; - const updateRun = async () => { - polls++; - await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-progress-message', - status: polls === 1 ? 'detached' : 'completed', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: polls === 1 ? '2026-05-09T10:00:01.000Z' : '2026-05-09T10:00:02.000Z', - ...(polls === 1 ? {} : { completedAt: '2026-05-09T10:00:02.000Z' }), - primarySourceConnectionIds: ['warehouse'], - contextSourceConnectionIds: [], - reportIds: [], - artifactPaths: [], - retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-progress-message'), - sourceProgress: [ - { - connectionId: 'warehouse', - operation: 'scan' as const, - status: polls === 1 ? ('running' as const) : ('done' as const), - startedAtMs: Date.now() - 5000, - elapsedMs: polls === 1 ? undefined : 6000, - percent: polls === 1 ? 76 : undefined, - message: polls === 1 ? 'Building embeddings 3/4 batches' : undefined, - updatedAtMs: polls === 1 ? 2000 : undefined, - summaryText: polls === 1 ? undefined : '42 tables', - }, - ], - }); - }; - - await expect( - runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'auto', autoWatch: true }, - io.io, - { - sleep: updateRun, - watchIntervalMs: 1, - }, - ), - ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-progress-message' }); - - expect(io.stdout()).toContain('Inspecting database schema'); - expect(io.stdout()).toContain('Building embeddings 3/4 batches'); - expect(io.stdout()).toContain('warehouse'); - }); - - it('supports d to detach from the progress watch view', async () => { - await writeReadyProject(tempDir); - await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-detach', + runId: 'setup-context-local-running', status: 'running', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: '2026-05-09T10:00:00.000Z', + startedAt: '2026-05-09T09:00:00.000Z', + updatedAt: '2026-05-09T09:00:00.000Z', primarySourceConnectionIds: ['warehouse'], contextSourceConnectionIds: [], reportIds: [], artifactPaths: [], retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-detach'), - sourceProgress: [ - { connectionId: 'warehouse', operation: 'scan' as const, status: 'running' as const, startedAtMs: Date.now() }, - ], + commands: contextBuildCommands(tempDir, 'setup-context-local-running'), }); const io = makeIo(); - let triggerDetach: (() => void) | null = null; + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); + const verifyContextReady = vi.fn(async () => ({ + ready: true, + agentContextReady: true, + semanticSearchReady: true, + details: ['ready'], + })); await expect( runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'auto', autoWatch: true }, + { projectDir: tempDir, inputMode: 'disabled' }, io.io, - { - sleep: async () => { triggerDetach?.(); }, - watchIntervalMs: 1, - setupKeystroke: (onDetach) => { - triggerDetach = onDetach; - return () => {}; - }, - }, + { runContextBuild: runContextBuildMock, verifyContextReady }, ), - ).resolves.toMatchObject({ status: 'detached' }); + ).resolves.toMatchObject({ status: 'ready' }); - const output = io.stdout(); - expect(output).toContain('Building KTX context'); - expect(output).toContain('Context build continuing in the background.'); - expect(output).toContain('Resume: ktx setup --project-dir'); + expect(runContextBuildMock).toHaveBeenCalledOnce(); }); }); diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index 44185f18..413230b1 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -10,13 +10,15 @@ import { } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import { buildPublicIngestPlan } from './public-ingest.js'; +import { + type KtxDatabaseContextDepth, + databaseContextDepth, +} from './ingest-depth.js'; +import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; +import { ensureSetupDatabaseContextDepths } from './setup-database-context-depth.js'; import { type ContextBuildSourceProgressUpdate, - createRepainter, - defaultSetupKeystroke, - renderContextBuildView, runContextBuild, - viewStateFromSourceProgress, } from './context-build-view.js'; import { createKtxSetupPromptAdapter, @@ -26,8 +28,6 @@ import { export type KtxSetupContextBuildStatus = | 'not_started' | 'running' - | 'detached' - | 'paused' | 'completed' | 'failed' | 'interrupted' @@ -35,10 +35,7 @@ export type KtxSetupContextBuildStatus = export interface KtxSetupContextCommands { build: string; - watch: string; status: string; - stop: string; - resume: string; } export interface KtxSetupContextState { @@ -61,7 +58,6 @@ export interface KtxSetupContextStatusSummary { ready: boolean; status: KtxSetupContextBuildStatus; runId?: string; - watchCommand?: string; statusCommand?: string; retryCommand?: string; detail?: string; @@ -78,8 +74,6 @@ export interface KtxSetupContextReadiness { export type KtxSetupContextResult = | { status: 'ready'; projectDir: string; runId: string } | { status: 'skipped'; projectDir: string } - | { status: 'detached'; projectDir: string; runId: string } - | { status: 'paused'; projectDir: string; runId: string } | { status: 'back'; projectDir: string } | { status: 'missing-input'; projectDir: string } | { status: 'failed'; projectDir: string }; @@ -91,12 +85,8 @@ export interface KtxSetupContextStepArgs { allowEmpty?: boolean; prompt?: boolean; autoWatch?: boolean; -} - -interface KtxSetupContextWatchArgs { - projectDir: string; - runId?: string; - inputMode: 'auto' | 'disabled'; + cliVersion?: string; + runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; } export interface KtxSetupContextPromptAdapter { @@ -110,9 +100,6 @@ export interface KtxSetupContextDeps { now?: () => Date; runContextBuild?: typeof runContextBuild; verifyContextReady?: (projectDir: string) => Promise; - sleep?: (ms: number) => Promise; - watchIntervalMs?: number; - setupKeystroke?: (onDetach: () => void, onCtrlC: () => void) => (() => void) | null; } interface KtxSetupContextTargets { @@ -123,7 +110,6 @@ interface KtxSetupContextTargets { const SETUP_CONTEXT_STATE_PATH = ['.ktx', 'setup', 'context-build.json'] as const; const LIVE_DATABASE_ADAPTER = 'live-database'; const SCAN_REPORT_FILE = 'scan-report.json'; -const DEFAULT_WATCH_INTERVAL_MS = 2_000; function createPromptAdapter(): KtxSetupContextPromptAdapter { return createKtxSetupPromptAdapter({ selectCancelValue: 'back' }); @@ -146,10 +132,7 @@ export function contextBuildCommands(projectDir: string, runId?: string): KtxSet const resolvedProjectDir = resolve(projectDir); return { build: `ktx setup --project-dir ${resolvedProjectDir}`, - watch: `ktx setup --project-dir ${resolvedProjectDir}`, status: `ktx status --project-dir ${resolvedProjectDir}`, - stop: `ktx setup --project-dir ${resolvedProjectDir}`, - resume: `ktx setup --project-dir ${resolvedProjectDir}`, }; } @@ -169,8 +152,18 @@ function normalizeState(projectDir: string, value: unknown): KtxSetupContextStat if (typeof value !== 'object' || value === null || Array.isArray(value)) { return notStartedState(projectDir); } - const record = value as Partial; - const status = record.status ?? 'not_started'; + const record = value as Record; + const rawStatus = typeof record.status === 'string' ? record.status : 'not_started'; + const legacyActive = rawStatus === 'detached' || rawStatus === 'paused' || rawStatus === 'running'; + const status: KtxSetupContextBuildStatus = legacyActive + ? 'stale' + : rawStatus === 'completed' || + rawStatus === 'failed' || + rawStatus === 'interrupted' || + rawStatus === 'not_started' || + rawStatus === 'stale' + ? rawStatus + : 'not_started'; const runId = typeof record.runId === 'string' && record.runId.length > 0 ? record.runId : undefined; return { ...(runId ? { runId } : {}), @@ -194,12 +187,16 @@ function normalizeState(projectDir: string, value: unknown): KtxSetupContextStat ? record.retryableFailedTargets.filter((item): item is string => typeof item === 'string') : [], commands: contextBuildCommands(projectDir, runId), - ...(typeof record.failureReason === 'string' ? { failureReason: record.failureReason } : {}), + ...(typeof record.failureReason === 'string' + ? { failureReason: record.failureReason } + : legacyActive + ? { failureReason: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.' } + : {}), ...(normalizeSourceProgress(record.sourceProgress) ? { sourceProgress: normalizeSourceProgress(record.sourceProgress) } : {}), }; } -const VALID_SOURCE_OPERATIONS = new Set(['scan', 'source-ingest']); +const VALID_SOURCE_OPERATIONS = new Set(['database-ingest', 'source-ingest']); const VALID_SOURCE_STATUSES = new Set(['queued', 'running', 'done', 'failed']); function normalizeSourceProgress(value: unknown): ContextBuildSourceProgressUpdate[] | undefined { @@ -213,7 +210,7 @@ function normalizeSourceProgress(value: unknown): ContextBuildSourceProgressUpda if (!VALID_SOURCE_STATUSES.has(String(rec.status))) continue; entries.push({ connectionId: rec.connectionId, - operation: rec.operation as 'scan' | 'source-ingest', + operation: rec.operation as 'database-ingest' | 'source-ingest', status: rec.status as 'queued' | 'running' | 'done' | 'failed', ...(typeof rec.startedAtMs === 'number' ? { startedAtMs: rec.startedAtMs } : {}), ...(typeof rec.elapsedMs === 'number' ? { elapsedMs: rec.elapsedMs } : {}), @@ -272,7 +269,7 @@ export function setupContextStatusFromState( ready, status, ...(state.runId ? { runId: state.runId } : {}), - ...(state.runId ? { watchCommand: state.commands.watch, statusCommand: state.commands.status } : {}), + ...(state.runId ? { statusCommand: state.commands.status } : {}), retryCommand: state.commands.build, ...(state.failureReason ? { detail: state.failureReason } : {}), }; @@ -289,7 +286,7 @@ function listContextTargets(project: KtxLocalProject): KtxSetupContextTargets { const plan = buildPublicIngestPlan(project, { projectDir: project.projectDir, all: true }); return { primarySourceConnectionIds: plan.targets - .filter((target) => target.operation === 'scan') + .filter((target) => target.operation === 'database-ingest') .map((target) => target.connectionId), contextSourceConnectionIds: plan.targets .filter((target) => target.operation === 'source-ingest') @@ -297,27 +294,6 @@ function listContextTargets(project: KtxLocalProject): KtxSetupContextTargets { }; } -function missingCapabilities(project: KtxLocalProject): string[] { - const missing: string[] = []; - const llm = project.config.llm; - if (llm.provider.backend === 'none' || !llm.models.default) { - missing.push('Models are not ready.'); - } - const embeddings = project.config.ingest.embeddings; - if ( - embeddings.backend === 'none' || - embeddings.backend === 'deterministic' || - !embeddings.model || - embeddings.dimensions <= 0 - ) { - missing.push('Embeddings are not ready.'); - } - if (project.config.scan.enrichment.mode === 'none') { - missing.push('Scan enrichment is not configured.'); - } - return missing; -} - async function hasFileWithExtension( root: string, extensions: Set, @@ -387,7 +363,21 @@ async function readLatestScanReport(projectDir: string, connectionId: string): P return reports.at(-1)?.report ?? null; } -function scanReportHasCompletedDescriptionEnrichment(report: unknown, connectionId: string): boolean { +function scanReportHasSchemaManifest(report: unknown, connectionId: string): boolean { + if (!isRecord(report)) { + return false; + } + if (report.connectionId !== connectionId || report.dryRun === true) { + return false; + } + return stringArrayValue(isRecord(report.artifactPaths) ? report.artifactPaths.manifestShards : undefined).length > 0; +} + +function scanReportHasCompletedDeepEnrichment( + report: unknown, + connectionId: string, + relationshipsRequired: boolean, +): boolean { if (!isRecord(report)) { return false; } @@ -404,19 +394,39 @@ function scanReportHasCompletedDescriptionEnrichment(report: unknown, connection report.enrichment.embeddings === 'completed' && completedStages.includes('descriptions') && completedStages.includes('embeddings') && + (!relationshipsRequired || completedStages.includes('relationships')) && stringArrayValue(report.artifactPaths.manifestShards).length > 0 ); } +function scanReportSatisfiesDepth(input: { + report: unknown; + connectionId: string; + depth: KtxDatabaseContextDepth; + relationshipsRequired: boolean; +}): boolean { + if (input.depth === 'fast') { + return scanReportHasSchemaManifest(input.report, input.connectionId); + } + return scanReportHasCompletedDeepEnrichment(input.report, input.connectionId, input.relationshipsRequired); +} + async function verifyPrimarySourceScans( - projectDir: string, + project: KtxLocalProject, connectionIds: string[], ): Promise<{ ready: boolean; details: string[] }> { const details: string[] = []; + const relationshipsRequired = project.config.scan.relationships.enabled; for (const connectionId of connectionIds) { - const report = await readLatestScanReport(projectDir, connectionId); - if (!scanReportHasCompletedDescriptionEnrichment(report, connectionId)) { - details.push(`${connectionId}: enriched database scan with AI descriptions has not completed.`); + const connection = project.config.connections[connectionId]; + const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast'; + const report = await readLatestScanReport(project.projectDir, connectionId); + if (!scanReportSatisfiesDepth({ report, connectionId, depth, relationshipsRequired })) { + details.push( + depth === 'fast' + ? `${connectionId}: schema context has not completed.` + : `${connectionId}: deep database context has not completed.`, + ); } } return { ready: details.length === 0, details }; @@ -425,7 +435,7 @@ async function verifyPrimarySourceScans( async function defaultVerifyContextReady(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); const targets = listContextTargets(project); - const primarySourceScans = await verifyPrimarySourceScans(projectDir, targets.primarySourceConnectionIds); + const primarySourceScans = await verifyPrimarySourceScans(project, targets.primarySourceConnectionIds); const semanticLayerContextReady = await hasFileWithExtension( join(projectDir, 'semantic-layer'), new Set(['.yaml', '.yml']), @@ -481,14 +491,21 @@ function writeSkippedContext(projectDir: string, io: KtxCliIo): void { io.stdout.write(`Check status:\n ktx status --project-dir ${resolve(projectDir)}\n`); } -function writeSuccess(readiness: KtxSetupContextReadiness, targets: KtxSetupContextTargets, io: KtxCliIo): void { +function writeSuccess( + project: KtxLocalProject, + readiness: KtxSetupContextReadiness, + targets: KtxSetupContextTargets, + io: KtxCliIo, +): void { io.stdout.write('\nKTX context is ready for agents.\n\n'); - io.stdout.write('Primary sources:\n'); + io.stdout.write('Databases:\n'); if (targets.primarySourceConnectionIds.length === 0) { io.stdout.write(' none\n'); } else { for (const connectionId of targets.primarySourceConnectionIds) { - io.stdout.write(` ${connectionId}: enriched scan complete\n`); + const connection = project.config.connections[connectionId]; + const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast'; + io.stdout.write(` ${connectionId}: ${depth === 'deep' ? 'deep context complete' : 'schema context complete'}\n`); } } io.stdout.write('\nContext sources:\n'); @@ -556,22 +573,11 @@ async function runBuild( { projectDir: args.projectDir, inputMode: args.inputMode, - scanMode: 'enriched', - detectRelationships: true, + ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), + ...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}), }, io, { - onDetach: () => { - const resolvedDir = resolve(args.projectDir); - mkdirSync(join(resolvedDir, '.ktx', 'setup'), { recursive: true }); - const detachedState = normalizeState(resolvedDir, { - ...runningState, - status: 'detached', - updatedAt: new Date().toISOString(), - ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), - }); - writeFileSync(statePath(resolvedDir), `${JSON.stringify(detachedState, null, 2)}\n`); - }, onSourceProgress: (sources) => { lastSourceProgress = sources; try { @@ -591,18 +597,6 @@ async function runBuild( ); const completedReportIds = buildResult.reportIds ?? []; const completedArtifactPaths = buildResult.artifactPaths ?? []; - if (buildResult.detached) { - const updatedAt = now().toISOString(); - await writeKtxSetupContextState(args.projectDir, { - ...runningState, - status: 'detached', - updatedAt, - reportIds: completedReportIds, - artifactPaths: completedArtifactPaths, - ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), - }); - return { status: 'detached', projectDir: args.projectDir, runId }; - } if (buildResult.exitCode !== 0) { const updatedAt = now().toISOString(); await writeKtxSetupContextState(args.projectDir, { @@ -650,7 +644,7 @@ async function runBuild( retryableFailedTargets: [], ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); - writeSuccess(readiness, targets, io); + writeSuccess(project, readiness, targets, io); return { status: 'ready', projectDir: args.projectDir, runId }; } @@ -692,64 +686,31 @@ export async function runKtxSetupContextStep( deps: KtxSetupContextDeps = {}, ): Promise { try { - const project = await loadKtxProject({ projectDir: args.projectDir }); + let project = await loadKtxProject({ projectDir: args.projectDir }); + const prompts = deps.prompts ?? createPromptAdapter(); + const depthProject = await ensureSetupDatabaseContextDepths({ + project, + args, + prompts, + }); + if (depthProject === 'back') { + return { status: 'back', projectDir: args.projectDir }; + } + project = depthProject; const existingState = await readKtxSetupContextState(args.projectDir); const completedSteps = (await readKtxSetupState(args.projectDir)).completed_steps; if (completedSteps.includes('context') && existingState.status === 'completed') { return { status: 'ready', projectDir: args.projectDir, runId: existingState.runId ?? 'setup-context-completed' }; } - if ( - (existingState.status === 'running' || existingState.status === 'detached') && - args.inputMode !== 'disabled' + args.allowEmpty === true && + (!completedSteps.includes('databases') || !completedSteps.includes('sources')) ) { - if (args.autoWatch) { - const watched = await watchContextStatus( - { - projectDir: args.projectDir, - ...(existingState.runId ? { runId: existingState.runId } : {}), - inputMode: args.inputMode, - }, - existingState, - io, - deps, - ); - return setupResultFromWatchedState(args.projectDir, watched.state); - } - const prompts = deps.prompts ?? createPromptAdapter(); - const choice = await prompts.select({ - message: - 'A context build is running in the background.\n\n' + - 'You can watch it until it finishes, check its status once, or start a fresh build.', - options: [ - { value: 'watch', label: 'Watch progress' }, - { value: 'status', label: 'Check status' }, - { value: 'rebuild', label: 'Start a fresh context build' }, - { value: 'back', label: 'Back' }, - ], - }); - if (choice === 'watch') { - const watched = await watchContextStatus( - { - projectDir: args.projectDir, - ...(existingState.runId ? { runId: existingState.runId } : {}), - inputMode: args.inputMode, - }, - existingState, - io, - deps, - ); - return setupResultFromWatchedState(args.projectDir, watched.state); - } - if (choice === 'status') { - const commands = contextBuildCommands(args.projectDir, existingState.runId); - io.stdout.write(`\nRun: ${commands.status}\n`); - io.stdout.write(`Log: ${join(resolve(args.projectDir), '.ktx', 'setup', 'context-build.log')}\n`); - return { status: 'detached', projectDir: args.projectDir, runId: existingState.runId ?? '' }; - } - if (choice === 'back') { - return { status: 'back', projectDir: args.projectDir }; - } + return { status: 'skipped', projectDir: args.projectDir }; + } + + if (existingState.status === 'stale') { + io.stdout.write('Previous context build state is stale; starting a fresh foreground build.\n'); } const targets = listContextTargets(project); @@ -757,16 +718,19 @@ export async function runKtxSetupContextStep( if (args.allowEmpty === true) { return { status: 'skipped', projectDir: args.projectDir }; } - io.stderr.write('No primary or context sources are configured for a KTX context build.\n'); + io.stderr.write('No databases or context sources are configured for a KTX context build.\n'); return { status: 'failed', projectDir: args.projectDir }; } - const missing = missingCapabilities(project); - if (missing.length > 0) { + const preflightPlan = buildPublicIngestPlan(project, { projectDir: project.projectDir, all: true }); + const preflightFailures = preflightPlan.targets.flatMap((target) => + target.preflightFailure ? [`${target.connectionId}: ${target.preflightFailure}`] : [], + ); + if (preflightFailures.length > 0) { if (args.allowEmpty === true) { return { status: 'skipped', projectDir: args.projectDir }; } - writeMissingCapabilities(missing, io); + writeMissingCapabilities(preflightFailures, io); return { status: 'missing-input', projectDir: args.projectDir }; } @@ -778,7 +742,7 @@ export async function runKtxSetupContextStep( } if (args.inputMode !== 'disabled' && args.prompt !== false) { - const choice = await promptForBuild(deps.prompts ?? createPromptAdapter()); + const choice = await promptForBuild(prompts); if (choice === 'back') { return { status: 'back', projectDir: args.projectDir }; } @@ -794,183 +758,3 @@ export async function runKtxSetupContextStep( return { status: 'failed', projectDir: args.projectDir }; } } - -function stateMatchesRunId(state: KtxSetupContextState, runId: string | undefined): boolean { - return !runId || state.runId === runId; -} - -function isActiveStatus(status: KtxSetupContextBuildStatus): boolean { - return status === 'running' || status === 'detached'; -} - -function watchExitCode(status: KtxSetupContextBuildStatus): number { - return status === 'failed' || status === 'interrupted' || status === 'stale' ? 1 : 0; -} - -function defaultSleep(ms: number): Promise { - return new Promise((resolveSleep) => setTimeout(resolveSleep, ms)); -} - -function writeContextStatus(state: KtxSetupContextState, io: KtxCliIo): void { - io.stdout.write(`KTX context built: ${state.status === 'completed' ? 'yes' : state.status.replaceAll('_', ' ')}\n`); - if (state.runId) { - io.stdout.write(`Run: ${state.runId}\n`); - io.stdout.write(`Watch: ${state.commands.watch}\n`); - io.stdout.write(`Status: ${state.commands.status}\n`); - } - if (state.failureReason) { - io.stdout.write(`Detail: ${state.failureReason}\n`); - } -} - -async function watchContextStatus( - args: KtxSetupContextWatchArgs, - initialState: KtxSetupContextState, - io: KtxCliIo, - deps: KtxSetupContextDeps, -): Promise<{ exitCode: number; state: KtxSetupContextState }> { - if (initialState.sourceProgress && initialState.sourceProgress.length > 0) { - return watchContextStatusWithProgressView(args, initialState, io, deps); - } - return watchContextStatusText(args, initialState, io, deps); -} - -async function watchContextStatusText( - args: KtxSetupContextWatchArgs, - initialState: KtxSetupContextState, - io: KtxCliIo, - deps: KtxSetupContextDeps, -): Promise<{ exitCode: number; state: KtxSetupContextState }> { - const sleep = deps.sleep ?? defaultSleep; - const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS; - let state = initialState; - let lastRenderedStatus = ''; - - io.stdout.write('KTX context build\n'); - while (true) { - const renderedStatus = `${state.status}:${state.updatedAt ?? ''}:${state.completedAt ?? ''}:${state.failureReason ?? ''}`; - if (renderedStatus !== lastRenderedStatus) { - writeContextStatus(state, io); - lastRenderedStatus = renderedStatus; - } - - if (!isActiveStatus(state.status)) { - return { exitCode: watchExitCode(state.status), state }; - } - - await sleep(intervalMs); - state = await readKtxSetupContextState(args.projectDir); - if (!stateMatchesRunId(state, args.runId)) { - io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); - return { exitCode: 1, state }; - } - } -} - -async function watchContextStatusWithProgressView( - args: KtxSetupContextWatchArgs, - initialState: KtxSetupContextState, - io: KtxCliIo, - deps: KtxSetupContextDeps, -): Promise<{ exitCode: number; state: KtxSetupContextState }> { - const sleep = deps.sleep ?? defaultSleep; - const intervalMs = deps.watchIntervalMs ?? DEFAULT_WATCH_INTERVAL_MS; - const isTTY = io.stdout.isTTY === true; - const repainter = isTTY ? createRepainter(io) : null; - const projectDir = resolve(args.projectDir); - const viewOpts = { styled: isTTY, showHint: true, projectDir }; - let state = initialState; - let lastProgressKey = ''; - let detached = false; - - let viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], Date.now(), - state.startedAt ? new Date(state.startedAt).getTime() : undefined); - - const cleanupKeystroke = (isTTY || deps.setupKeystroke) - ? (deps.setupKeystroke ?? defaultSetupKeystroke)( - () => { detached = true; }, - () => { detached = true; }, - ) - : null; - - let spinnerInterval: ReturnType | null = null; - if (repainter) { - repainter.paint(renderContextBuildView(viewState, viewOpts)); - spinnerInterval = setInterval(() => { - viewState.frame++; - const now = Date.now(); - viewState.totalElapsedMs = viewState.startedAt !== null ? now - viewState.startedAt : 0; - for (const t of [...viewState.primarySources, ...viewState.contextSources]) { - if (t.status === 'running' && t.startedAt !== null) { - t.elapsedMs = now - t.startedAt; - } - } - repainter.paint(renderContextBuildView(viewState, viewOpts)); - }, 140); - } - - try { - while (true) { - if (!repainter) { - const currentKey = JSON.stringify( - state.sourceProgress?.map((s) => ({ - id: s.connectionId, - status: s.status, - percent: s.percent, - message: s.message, - summaryText: s.summaryText, - updatedAtMs: s.updatedAtMs, - })), - ); - if (currentKey !== lastProgressKey || !isActiveStatus(state.status)) { - io.stdout.write(renderContextBuildView(viewState, viewOpts)); - lastProgressKey = currentKey; - } - } - - if (!isActiveStatus(state.status)) { - return { exitCode: watchExitCode(state.status), state }; - } - if (detached) break; - - await sleep(intervalMs); - if (detached) break; - - try { - state = await readKtxSetupContextState(args.projectDir); - } catch { - continue; - } - - if (!stateMatchesRunId(state, args.runId)) { - io.stderr.write(`KTX setup context run "${args.runId}" was not found.\n`); - return { exitCode: 1, state }; - } - - const now = Date.now(); - const startedAtMs = state.startedAt ? new Date(state.startedAt).getTime() : undefined; - viewState = viewStateFromSourceProgress(state.sourceProgress ?? [], now, startedAtMs); - } - } finally { - if (spinnerInterval) clearInterval(spinnerInterval); - cleanupKeystroke?.(); - } - - io.stdout.write('\n\nContext build continuing in the background.\n'); - io.stdout.write(`Resume: ktx setup --project-dir ${projectDir}\n`); - io.stdout.write(`Status: ktx status --project-dir ${projectDir}\n`); - return { exitCode: 0, state }; -} - -function setupResultFromWatchedState(projectDir: string, state: KtxSetupContextState): KtxSetupContextResult { - if (state.status === 'completed') { - return { status: 'ready', projectDir, runId: state.runId ?? 'setup-context-completed' }; - } - if (state.status === 'paused') { - return { status: 'paused', projectDir, runId: state.runId ?? '' }; - } - if (state.status === 'running' || state.status === 'detached') { - return { status: 'detached', projectDir, runId: state.runId ?? '' }; - } - return { status: 'failed', projectDir }; -} diff --git a/packages/cli/src/setup-database-context-depth.ts b/packages/cli/src/setup-database-context-depth.ts new file mode 100644 index 00000000..27683b61 --- /dev/null +++ b/packages/cli/src/setup-database-context-depth.ts @@ -0,0 +1,131 @@ +import { writeFile } from 'node:fs/promises'; +import { + type KtxLocalProject, + type KtxProjectConnectionConfig, + loadKtxProject, + serializeKtxProjectConfig, +} from '@ktx/context/project'; +import { + type KtxDatabaseContextDepth, + databaseContextDepth, + deepReadinessGaps, + isDatabaseDriver, + normalizeConnectionDriver, + recommendedDatabaseContextDepth, + withDatabaseContextDepth, +} from './ingest-depth.js'; +import type { KtxSetupPromptOption } from './setup-prompts.js'; + +export interface KtxSetupDatabaseContextDepthArgs { + inputMode: 'auto' | 'disabled'; +} + +export interface KtxSetupDatabaseContextDepthPromptAdapter { + select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; +} + +function databaseConnectionsNeedingDepth(project: KtxLocalProject): string[] { + return Object.entries(project.config.connections) + .filter(([, connection]) => isDatabaseDriver(normalizeConnectionDriver(connection))) + .filter(([, connection]) => databaseContextDepth(connection) === undefined) + .map(([connectionId]) => connectionId) + .sort((left, right) => left.localeCompare(right)); +} + +async function chooseSetupDatabaseContextDepth(input: { + project: KtxLocalProject; + args: KtxSetupDatabaseContextDepthArgs; + prompts: KtxSetupDatabaseContextDepthPromptAdapter; +}): Promise { + const recommended = recommendedDatabaseContextDepth(input.project.config); + if (input.args.inputMode === 'disabled') { + return recommended; + } + + const deepReady = deepReadinessGaps(input.project.config).length === 0; + const options = + recommended === 'deep' + ? [ + { value: 'deep', label: 'Deep: AI descriptions, embeddings, relationships, slower' }, + { value: 'fast', label: 'Fast: schema only, no AI, quickest' }, + { value: 'back', label: 'Back' }, + ] + : [ + { value: 'fast', label: 'Fast: schema only, no AI, quickest' }, + { value: 'deep', label: 'Deep: AI descriptions, embeddings, relationships, slower' }, + { value: 'back', label: 'Back' }, + ]; + + const choice = await input.prompts.select({ + message: + 'How much database context should KTX build?\n\n' + + (deepReady + ? 'Deep is available because model, embedding, and scan enrichment are configured.' + : 'Fast is recommended because model, embedding, or scan enrichment is not configured.'), + options, + }); + if (choice === 'back') { + return 'back'; + } + if (choice === 'fast' || choice === 'deep') { + return choice; + } + return recommended; +} + +async function writeDatabaseContextDepths( + project: KtxLocalProject, + connectionIds: string[], + depth: KtxDatabaseContextDepth, +): Promise { + if (connectionIds.length === 0) { + return project; + } + const nextConnections = { ...project.config.connections }; + for (const connectionId of connectionIds) { + const connection = nextConnections[connectionId]; + if (connection) { + nextConnections[connectionId] = withDatabaseContextDepth(connection, depth); + } + } + const nextConfig = { ...project.config, connections: nextConnections }; + await writeFile(project.configPath, serializeKtxProjectConfig(nextConfig), 'utf-8'); + return await loadKtxProject({ projectDir: project.projectDir }); +} + +export async function ensureSetupDatabaseContextDepths(input: { + project: KtxLocalProject; + args: KtxSetupDatabaseContextDepthArgs; + prompts: KtxSetupDatabaseContextDepthPromptAdapter; +}): Promise { + const missingDepthConnectionIds = databaseConnectionsNeedingDepth(input.project); + if (missingDepthConnectionIds.length === 0) { + return input.project; + } + + const depth = await chooseSetupDatabaseContextDepth(input); + if (depth === 'back') { + return 'back'; + } + return await writeDatabaseContextDepths(input.project, missingDepthConnectionIds, depth); +} + +export async function applySetupDatabaseContextDepth(input: { + project: KtxLocalProject; + connection: KtxProjectConnectionConfig; + args: KtxSetupDatabaseContextDepthArgs; + prompts: KtxSetupDatabaseContextDepthPromptAdapter; +}): Promise { + if ( + !isDatabaseDriver(normalizeConnectionDriver(input.connection)) || + databaseContextDepth(input.connection) !== undefined + ) { + return input.connection; + } + + const depth = await chooseSetupDatabaseContextDepth(input); + if (depth === 'back') { + return 'back'; + } + return withDatabaseContextDepth(input.connection, depth); +} diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index d3a55fba..8e53f0bf 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -86,7 +86,15 @@ function makePromptAdapter(options: { const passwordValues = [...(options.passwordValues ?? [])]; return { multiselect: vi.fn(async () => multiselectValues.shift() ?? ['postgres']), - select: vi.fn(async () => selectValues.shift() ?? 'finish'), + select: vi.fn(async ({ message }) => { + if (message.includes('How much database context should KTX build?')) { + const nextValue = selectValues[0]; + return nextValue === 'fast' || nextValue === 'deep' || nextValue === 'back' + ? (selectValues.shift() ?? 'fast') + : 'fast'; + } + return selectValues.shift() ?? 'finish'; + }), text: vi.fn(async () => (textValues.length > 0 ? textValues.shift() : '')), password: vi.fn(async () => (passwordValues.length > 0 ? passwordValues.shift() : '')), cancel: vi.fn(), @@ -118,7 +126,7 @@ describe('setup databases step', () => { await rm(tempDir, { recursive: true, force: true }); }); - it('shows every supported primary source in the interactive checklist', async () => { + it('shows every supported database in the interactive checklist', async () => { const prompts = makePromptAdapter({ multiselectValues: [['back']] }); const result = await runKtxSetupDatabasesStep( @@ -130,7 +138,7 @@ describe('setup databases step', () => { expect(result.status).toBe('back'); expect(prompts.multiselect).toHaveBeenCalledWith({ message: - 'Which primary sources should KTX connect to?\n' + + 'Which databases should KTX connect to?\n' + 'Use Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', options: [ { value: 'sqlite', label: 'SQLite' }, @@ -145,7 +153,7 @@ describe('setup databases step', () => { }); }); - it('requires choosing a primary source after an empty interactive selection', async () => { + it('requires choosing a database after an empty interactive selection', async () => { const io = makeIo(); const prompts = makePromptAdapter({ multiselectValues: [[], ['back']], @@ -161,12 +169,12 @@ describe('setup databases step', () => { expect(result.status).toBe('back'); expect(prompts.select).not.toHaveBeenCalled(); expect(io.stdout()).toContain( - 'KTX cannot work without at least one primary source. Select a source or press Escape to go back.', + 'KTX cannot work without at least one database. Select a database or press Escape to go back.', ); expect(prompts.multiselect).toHaveBeenCalledTimes(2); }); - it('lets Back from connection method selection return to primary source selection when adding a new driver', async () => { + it('lets Back from connection method selection return to database selection when adding a new driver', async () => { const prompts = makePromptAdapter({ multiselectValues: [['postgres'], ['back']], selectValues: ['back'], @@ -189,12 +197,12 @@ describe('setup databases step', () => { }); expect(prompts.multiselect).toHaveBeenCalledTimes(2); expect(vi.mocked(prompts.multiselect).mock.calls[1]?.[0].message).toBe( - 'Which primary sources should KTX connect to?\n' + + 'Which databases should KTX connect to?\n' + 'Use Up/Down to move, Space to select or unselect, Enter to confirm, Escape to go back, or Ctrl+C to exit.', ); }); - it('offers connection URL paste first for URL-capable primary sources', async () => { + it('offers connection URL paste first for URL-capable databases', async () => { const cases: Array<{ driver: KtxSetupDatabaseDriver; label: string }> = [ { driver: 'postgres', label: 'PostgreSQL' }, { driver: 'mysql', label: 'MySQL' }, @@ -319,10 +327,12 @@ describe('setup databases step', () => { }); expect(testConnection).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', expect.anything()); expect(scanConnection).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', expect.anything()); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'); + const config = parseKtxProjectConfig(configText); expect(config.connections['postgres-warehouse']).toEqual({ driver: 'postgres', url: 'env:DATABASE_URL', + context: { depth: 'fast' }, }); }); @@ -545,7 +555,7 @@ describe('setup databases step', () => { } }); - it('lets Back from connection method selection return to primary source selection', async () => { + it('lets Back from connection method selection return to database selection', async () => { const prompts = makePromptAdapter({ multiselectValues: [['postgres'], ['back']], selectValues: ['back'], @@ -553,11 +563,19 @@ describe('setup databases step', () => { }); const testConnection = vi.fn(async () => 0); const scanConnection = vi.fn(async () => 0); + const listSchemas = vi.fn(async () => []); + const listTables = vi.fn(async () => []); const result = await runKtxSetupDatabasesStep( - { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + { + projectDir: tempDir, + inputMode: 'auto', + skipDatabases: false, + databaseSchemas: [], + disableQueryHistory: true, + }, makeIo().io, - { prompts, testConnection, scanConnection }, + { prompts, testConnection, scanConnection, listSchemas, listTables }, ); expect(result.status).toBe('back'); @@ -574,7 +592,7 @@ describe('setup databases step', () => { expect(scanConnection).not.toHaveBeenCalled(); }); - it('shows a configured primary source menu instead of the type checklist when a primary source exists', async () => { + it('shows a configured database menu instead of the type checklist when a database exists', async () => { await writeFile( join(tempDir, 'ktx.yaml'), [ @@ -596,7 +614,13 @@ describe('setup databases step', () => { const scanConnection = vi.fn(async () => 0); const result = await runKtxSetupDatabasesStep( - { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + { + projectDir: tempDir, + inputMode: 'auto', + skipDatabases: false, + databaseSchemas: [], + disableQueryHistory: true, + }, makeIo().io, { prompts, testConnection, scanConnection }, ); @@ -604,18 +628,18 @@ describe('setup databases step', () => { expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] }); expect(prompts.multiselect).not.toHaveBeenCalled(); expect(prompts.select).toHaveBeenCalledWith({ - message: 'Primary sources already configured: warehouse\nWhat would you like to do?', + message: 'Databases already configured: warehouse\nWhat would you like to do?', options: [ - { value: 'continue', label: 'Continue to knowledge sources' }, - { value: 'edit', label: 'Edit an existing primary source' }, - { value: 'add', label: 'Add additional primary sources' }, + { value: 'continue', label: 'Continue to context sources' }, + { value: 'edit', label: 'Edit an existing database' }, + { value: 'add', label: 'Add another database' }, ], }); expect(testConnection).not.toHaveBeenCalled(); expect(scanConnection).not.toHaveBeenCalled(); }); - it('preserves existing primary source ids when adding another source from the configured menu', async () => { + it('preserves existing database ids when adding another database from the configured menu', async () => { await writeFile( join(tempDir, 'ktx.yaml'), [ @@ -641,7 +665,13 @@ describe('setup databases step', () => { const scanConnection = vi.fn(async () => 0); const result = await runKtxSetupDatabasesStep( - { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + { + projectDir: tempDir, + inputMode: 'auto', + skipDatabases: false, + databaseSchemas: [], + disableQueryHistory: true, + }, makeIo().io, { prompts, testConnection, scanConnection }, ); @@ -657,20 +687,21 @@ describe('setup databases step', () => { required: true, })); expect(prompts.select).toHaveBeenCalledWith({ - message: 'Primary sources already configured: warehouse\nWhat would you like to do?', + message: 'Databases already configured: warehouse\nWhat would you like to do?', options: [ - { value: 'continue', label: 'Continue to knowledge sources' }, - { value: 'edit', label: 'Edit an existing primary source' }, - { value: 'add', label: 'Add additional primary sources' }, + { value: 'continue', label: 'Continue to context sources' }, + { value: 'edit', label: 'Edit an existing database' }, + { value: 'add', label: 'Add another database' }, ], }); expect(testConnection).toHaveBeenCalledTimes(1); expect(testConnection).toHaveBeenCalledWith(tempDir, 'mysql-warehouse', expect.anything()); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'); + const config = parseKtxProjectConfig(configText); expect(config.setup?.database_connection_ids).toEqual(['warehouse', 'mysql-warehouse']); }); - it('lets users add another primary source after completing the first one', async () => { + it('lets users add another database after completing the first one', async () => { const prompts = makePromptAdapter({ multiselectValues: [['postgres'], ['mysql']], selectValues: ['url', 'add', 'url', 'continue'], @@ -680,7 +711,13 @@ describe('setup databases step', () => { const scanConnection = vi.fn(async () => 0); const result = await runKtxSetupDatabasesStep( - { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + { + projectDir: tempDir, + inputMode: 'auto', + skipDatabases: false, + databaseSchemas: [], + disableQueryHistory: true, + }, makeIo().io, { prompts, testConnection, scanConnection }, ); @@ -696,11 +733,11 @@ describe('setup databases step', () => { required: true, })); expect(prompts.select).toHaveBeenCalledWith({ - message: 'Primary sources already configured: postgres-warehouse\nWhat would you like to do?', + message: 'Databases already configured: postgres-warehouse\nWhat would you like to do?', options: [ - { value: 'continue', label: 'Continue to knowledge sources' }, - { value: 'edit', label: 'Edit an existing primary source' }, - { value: 'add', label: 'Add additional primary sources' }, + { value: 'continue', label: 'Continue to context sources' }, + { value: 'edit', label: 'Edit an existing database' }, + { value: 'add', label: 'Add another database' }, ], }); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); @@ -718,7 +755,13 @@ describe('setup databases step', () => { const scanConnection = vi.fn(async () => 0); const result = await runKtxSetupDatabasesStep( - { projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] }, + { + projectDir: tempDir, + inputMode: 'auto', + skipDatabases: false, + databaseSchemas: [], + disableQueryHistory: true, + }, io.io, { prompts, testConnection, scanConnection }, ); @@ -733,13 +776,13 @@ describe('setup databases step', () => { initialValues: ['postgres'], required: true, })); - expect(io.stdout()).not.toContain('KTX cannot work without at least one primary source'); - expect(prompts.select).toHaveBeenNthCalledWith(2, { - message: 'Primary sources already configured: postgres-warehouse\nWhat would you like to do?', + expect(io.stdout()).not.toContain('KTX cannot work without at least one database'); + expect(prompts.select).toHaveBeenNthCalledWith(3, { + message: 'Databases already configured: postgres-warehouse\nWhat would you like to do?', options: [ - { value: 'continue', label: 'Continue to knowledge sources' }, - { value: 'edit', label: 'Edit an existing primary source' }, - { value: 'add', label: 'Add additional primary sources' }, + { value: 'continue', label: 'Continue to context sources' }, + { value: 'edit', label: 'Edit an existing database' }, + { value: 'add', label: 'Add another database' }, ], }); }); @@ -778,18 +821,18 @@ describe('setup databases step', () => { initialValues: ['postgres'], required: true, })); - expect(io.stdout()).not.toContain('KTX cannot work without at least one primary source'); + expect(io.stdout()).not.toContain('KTX cannot work without at least one database'); expect(prompts.select).toHaveBeenNthCalledWith(2, { - message: 'Primary sources already configured: warehouse\nWhat would you like to do?', + message: 'Databases already configured: warehouse\nWhat would you like to do?', options: [ - { value: 'continue', label: 'Continue to knowledge sources' }, - { value: 'edit', label: 'Edit an existing primary source' }, - { value: 'add', label: 'Add additional primary sources' }, + { value: 'continue', label: 'Continue to context sources' }, + { value: 'edit', label: 'Edit an existing database' }, + { value: 'add', label: 'Add another database' }, ], }); }); - it('returns from primary source edit selection back to the configured source menu', async () => { + it('returns from database edit selection back to the configured source menu', async () => { await writeFile( join(tempDir, 'ktx.yaml'), [ @@ -820,18 +863,18 @@ describe('setup databases step', () => { expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] }); expect(prompts.select).toHaveBeenNthCalledWith(2, { - message: 'Primary source to edit', + message: 'Database to edit', options: [ { value: 'warehouse', label: 'warehouse (PostgreSQL)' }, { value: 'back', label: 'Back' }, ], }); expect(prompts.select).toHaveBeenNthCalledWith(3, { - message: 'Primary sources already configured: warehouse\nWhat would you like to do?', + message: 'Databases already configured: warehouse\nWhat would you like to do?', options: [ - { value: 'continue', label: 'Continue to knowledge sources' }, - { value: 'edit', label: 'Edit an existing primary source' }, - { value: 'add', label: 'Add additional primary sources' }, + { value: 'continue', label: 'Continue to context sources' }, + { value: 'edit', label: 'Edit an existing database' }, + { value: 'add', label: 'Add another database' }, ], }); expect(testConnection).not.toHaveBeenCalled(); @@ -864,12 +907,13 @@ describe('setup databases step', () => { }); let primaryMenuCount = 0; vi.mocked(prompts.select).mockImplementation(async (options) => { - if (options.message === 'Primary sources already configured: warehouse\nWhat would you like to do?') { + if (options.message === 'Databases already configured: warehouse\nWhat would you like to do?') { primaryMenuCount += 1; return primaryMenuCount === 1 ? 'edit' : 'continue'; } - if (options.message === 'Primary source to edit') return 'warehouse'; + if (options.message === 'Database to edit') return 'warehouse'; if (options.message === 'How do you want to connect to PostgreSQL?') return 'url'; + if (options.message.startsWith('Enable query-history ingest')) return 'no'; return 'back'; }); const testConnection = vi.fn(async () => 0); @@ -909,7 +953,7 @@ describe('setup databases step', () => { }); }); - it('preselects existing schema and table choices when editing a primary source', async () => { + it('preselects existing schema and table choices when editing a database', async () => { await writeFile( join(tempDir, 'ktx.yaml'), [ @@ -936,12 +980,13 @@ describe('setup databases step', () => { }); let primaryMenuCount = 0; vi.mocked(prompts.select).mockImplementation(async (options) => { - if (options.message === 'Primary sources already configured: warehouse\nWhat would you like to do?') { + if (options.message === 'Databases already configured: warehouse\nWhat would you like to do?') { primaryMenuCount += 1; return primaryMenuCount === 1 ? 'edit' : 'continue'; } - if (options.message === 'Primary source to edit') return 'warehouse'; + if (options.message === 'Database to edit') return 'warehouse'; if (options.message === 'How do you want to connect to PostgreSQL?') return 'url'; + if (options.message.startsWith('Enable query-history ingest')) return 'no'; return 'back'; }); const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']); @@ -1008,12 +1053,13 @@ describe('setup databases step', () => { }); let primaryMenuCount = 0; vi.mocked(prompts.select).mockImplementation(async (options) => { - if (options.message === 'Primary sources already configured: warehouse\nWhat would you like to do?') { + if (options.message === 'Databases already configured: warehouse\nWhat would you like to do?') { primaryMenuCount += 1; return primaryMenuCount === 1 ? 'edit' : 'continue'; } - if (options.message === 'Primary source to edit') return 'warehouse'; + if (options.message === 'Database to edit') return 'warehouse'; if (options.message === 'How do you want to connect to PostgreSQL?') return 'url'; + if (options.message.startsWith('Enable query-history ingest')) return 'no'; return 'back'; }); const testConnection = vi.fn(async () => 0); @@ -1074,12 +1120,13 @@ describe('setup databases step', () => { const prompts = makePromptAdapter({ textValues: ['env:DATABASE_URL'] }); let primaryMenuCount = 0; vi.mocked(prompts.select).mockImplementation(async (options) => { - if (options.message === 'Primary sources already configured: warehouse\nWhat would you like to do?') { + if (options.message === 'Databases already configured: warehouse\nWhat would you like to do?') { primaryMenuCount += 1; return primaryMenuCount === 1 ? 'edit' : 'continue'; } - if (options.message === 'Primary source to edit') return 'warehouse'; + if (options.message === 'Database to edit') return 'warehouse'; if (options.message === 'How do you want to connect to PostgreSQL?') return 'url'; + if (options.message.startsWith('Enable query-history ingest')) return 'no'; return 'back'; }); const testConnection = vi.fn(async () => 0); @@ -1116,7 +1163,7 @@ describe('setup databases step', () => { }); }); - it('restores an existing primary source edit when the follow-up scan fails', async () => { + it('restores an existing database edit when the follow-up scan fails', async () => { await writeFile( join(tempDir, 'ktx.yaml'), [ @@ -1141,9 +1188,10 @@ describe('setup databases step', () => { textValues: ['env:DATABASE_URL'], }); vi.mocked(prompts.select).mockImplementation(async (options) => { - if (options.message === 'Primary sources already configured: warehouse\nWhat would you like to do?') return 'edit'; - if (options.message === 'Primary source to edit') return 'warehouse'; + if (options.message === 'Databases already configured: warehouse\nWhat would you like to do?') return 'edit'; + if (options.message === 'Database to edit') return 'warehouse'; if (options.message === 'How do you want to connect to PostgreSQL?') return 'url'; + if (options.message.startsWith('Enable query-history ingest')) return 'no'; return 'back'; }); const listTables = vi.fn(async () => [ @@ -1186,15 +1234,15 @@ describe('setup databases step', () => { databaseDrivers: ['postgres'], databaseSchemas: [], skipDatabases: false, + disableQueryHistory: true, }, makeIo().io, { prompts, testConnection, scanConnection }, ); expect(result.status).toBe('ready'); - expect(prompts.select).toHaveBeenCalledTimes(2); - expect(vi.mocked(prompts.select).mock.calls[0]?.[0].message).toBe('How do you want to connect to PostgreSQL?'); - expect(vi.mocked(prompts.select).mock.calls[1]?.[0].message).toBe('How do you want to connect to PostgreSQL?'); + const selectMessages = vi.mocked(prompts.select).mock.calls.map(([options]) => options.message); + expect(selectMessages.filter((message) => message === 'How do you want to connect to PostgreSQL?')).toHaveLength(2); expect(testConnection).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', expect.anything()); }); @@ -1219,15 +1267,15 @@ describe('setup databases step', () => { expect(prompts.select).toHaveBeenNthCalledWith(2, { message: 'Some PostgreSQL connection details are missing.\n' + - 'Continue entering details, or go back to primary source selection.', + 'Continue entering details, or go back to database selection.', options: [ { value: 'retry', label: 'Continue entering PostgreSQL details' }, - { value: 'back', label: 'Back to primary source selection' }, + { value: 'back', label: 'Back to database selection' }, ], }); }); - it('lets Escape from connection name return to primary source selection', async () => { + it('lets Escape from connection name return to database selection', async () => { const prompts = makePromptAdapter({ multiselectValues: [['postgres'], ['back']], textValues: [undefined], @@ -1276,7 +1324,8 @@ describe('setup databases step', () => { ); expect(result.status).toBe('ready'); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'); + const config = parseKtxProjectConfig(configText); const connection = config.connections['postgres-warehouse']; expect(connection).toMatchObject({ driver: 'postgres', @@ -1315,7 +1364,8 @@ describe('setup databases step', () => { ); expect(result.status).toBe('ready'); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'); + const config = parseKtxProjectConfig(configText); const connection = config.connections['postgres-warehouse']; expect(connection.url).toBe(`file:${resolve(tempDir, '.ktx/secrets/postgres-warehouse-url')}`); expect(connection.driver).toBe('postgres'); @@ -1336,7 +1386,7 @@ describe('setup databases step', () => { return 0; }); const scanConnection = vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KtxCliIo) => { - commandIo.stdout.write('Scanning postgres-warehouse for context. Large primary sources can take a while.\n'); + commandIo.stdout.write('Scanning postgres-warehouse for context. Large databases can take a while.\n'); commandIo.stdout.write('[5%] Preparing scan\n'); commandIo.stdout.write('[15%] Inspecting database schema\n'); commandIo.stdout.write('[55%] Semantic layer comparison found 2 changes across 2 tables\n'); @@ -1390,23 +1440,18 @@ describe('setup databases step', () => { ].join('\n'), ); expect(io.stdout()).not.toContain('Tables: 2'); - expect(io.stdout()).toContain( - [ - '◇ Scanning postgres-warehouse', - '│ Running structural scan…', - '│', - ].join('\n'), - ); - expect(io.stdout()).toContain( - [ - '◇ Scan complete for postgres-warehouse', - '│ Changes: 2 new tables', - '│ Report: raw-sources/postgres-warehouse/live-database/.../scan-report.json', - '│', - '◇ Primary source ready', - '│ postgres-warehouse · PostgreSQL · structural scan complete', - ].join('\n'), - ); + expect(io.stdout()).toContain('◇ Building schema context for postgres-warehouse'); + expect(io.stdout()).toContain('│ Running fast database ingest…'); + expect(io.stdout()).toContain('◇ Schema context complete for postgres-warehouse'); + expect(io.stdout()).toContain('│ Changes: 2 new tables'); + expect(io.stdout()).toContain('◇ Database ready'); + expect(io.stdout()).not.toContain(['Primary source', 'ready'].join(' ')); + expect(io.stdout()).toContain('│ postgres-warehouse · PostgreSQL · schema context complete'); + expect(io.stdout()).not.toContain('Scanning postgres-warehouse'); + expect(io.stdout()).not.toContain('Scan complete for postgres-warehouse'); + expect(io.stdout()).not.toContain('structural scan complete'); + expect(io.stdout()).not.toContain('Report: raw-sources'); + expect(io.stdout()).not.toContain('live-database'); expect(io.stdout()).not.toContain('[5%] Preparing scan'); expect(io.stdout()).not.toContain('What changed'); expect(io.stdout()).not.toContain('Next:'); @@ -1556,6 +1601,7 @@ describe('setup databases step', () => { databaseUrl: 'env:DATABASE_URL', databaseSchemas: ['public'], skipDatabases: false, + disableQueryHistory: true, }, io.io, { testConnection, scanConnection, listSchemas }, @@ -1570,12 +1616,14 @@ describe('setup databases step', () => { driver: 'postgres', url: 'env:DATABASE_URL', schemas: ['public'], + context: { queryHistory: { enabled: false }, depth: 'fast' }, }); expect(config.setup).toEqual({ database_connection_ids: ['warehouse'], }); expect((await readKtxSetupState(tempDir)).completed_steps).toContain('databases'); - expect(io.stdout()).toContain('Primary source ready'); + expect(io.stdout()).toContain('Database ready'); + expect(io.stdout()).not.toContain(['Primary source', 'ready'].join(' ')); expect(io.stdout()).not.toContain('DATABASE_URL='); }); @@ -1607,6 +1655,7 @@ describe('setup databases step', () => { expect(config.connections.warehouse).toEqual({ driver: 'sqlite', path: './warehouse.sqlite', + context: { depth: 'fast' }, }); expect(config.setup).toEqual({ database_connection_ids: ['warehouse'], @@ -1684,9 +1733,11 @@ describe('setup databases step', () => { const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); expect(config.connections.warehouse).toMatchObject({ driver: 'postgres', url: 'env:DATABASE_URL' }); expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); - expect(io.stderr()).toContain('Structural scan failed for warehouse.'); - expect(io.stderr()).toContain('│ Structural scan failed for warehouse.'); - expect(io.stderr()).not.toMatch(/^Structural scan failed for warehouse\./m); + expect(io.stderr()).toContain('Fast database ingest failed for warehouse.'); + expect(io.stderr()).toContain('│ Fast database ingest failed for warehouse.'); + expect(io.stderr()).toContain(`Debug command: ktx ingest warehouse --project-dir ${tempDir} --fast --debug`); + expect(io.stderr()).not.toContain('Structural scan failed for warehouse.'); + expect(io.stderr()).not.toMatch(/^Fast database ingest failed for warehouse\./m); }); it('prints the native SQLite rebuild command when scanning hits a Node ABI mismatch', async () => { @@ -1725,7 +1776,8 @@ describe('setup databases step', () => { expect(io.stderr()).toContain('Native SQLite is built for a different Node.js ABI.'); expect(io.stderr()).toContain('│ Native SQLite is built for a different Node.js ABI.'); expect(io.stderr()).toContain('Fix: pnpm run native:rebuild'); - expect(io.stderr()).toContain(`Retry: ktx scan --project-dir ${tempDir} warehouse`); + expect(io.stderr()).toContain(`Retry: ktx ingest warehouse --project-dir ${tempDir} --fast`); + expect(io.stderr()).not.toContain('ktx scan'); expect(io.stderr()).not.toContain('npm rebuild'); expect(io.stderr()).not.toMatch(/^Native SQLite is built for a different Node.js ABI\./m); }); @@ -1781,10 +1833,11 @@ describe('setup databases step', () => { expect(scanConnection).toHaveBeenCalledTimes(2); expect(io.stderr()).toContain('Native SQLite is built for a different Node.js ABI.'); expect(io.stderr()).toContain('Rebuilding Native SQLite with pnpm run native:rebuild…'); - expect(io.stdout()).toContain('◇ Scan complete for warehouse'); + expect(io.stdout()).toContain('◇ Schema context complete for warehouse'); + expect(io.stdout()).toContain('│ Changes: 0 changes across 56 tables'); }); - it('writes Historic SQL config for supported Snowflake databases after validation succeeds', async () => { + it('writes query history config for supported Snowflake databases after validation succeeds', async () => { const io = makeIo(); const result = await runKtxSetupDatabasesStep( { @@ -1793,10 +1846,10 @@ describe('setup databases step', () => { databaseDrivers: ['snowflake'], databaseConnectionId: 'snowflake', databaseSchemas: [], - enableHistoricSql: true, - historicSqlWindowDays: 30, - historicSqlServiceAccountPatterns: ['^svc_'], - historicSqlRedactionPatterns: ['(?i)secret'], + enableQueryHistory: true, + queryHistoryWindowDays: 30, + queryHistoryServiceAccountPatterns: ['^svc_'], + queryHistoryRedactionPatterns: ['(?i)secret'], skipDatabases: false, }, io.io, @@ -1811,28 +1864,34 @@ describe('setup databases step', () => { ); expect(result.status).toBe('ready'); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'); + const config = parseKtxProjectConfig(configText); expect(config.connections.snowflake).toMatchObject({ driver: 'snowflake', authMethod: 'password', - historicSql: { - enabled: true, - dialect: 'snowflake', - windowDays: 30, - filters: { - dropTrivialProbes: true, - serviceAccounts: { - patterns: ['^svc_'], - mode: 'exclude', + context: { + queryHistory: { + enabled: true, + windowDays: 30, + filters: { + dropTrivialProbes: true, + serviceAccounts: { + patterns: ['^svc_'], + mode: 'exclude', + }, }, + redactionPatterns: ['(?i)secret'], }, - redactionPatterns: ['(?i)secret'], }, }); - expect(config.ingest.adapters).toContain('historic-sql'); + expect(config.connections.snowflake.historicSql).toBeUndefined(); + expect(configText).not.toContain('live-database'); + expect(configText).not.toContain('historic-sql'); + expect(configText).not.toMatch(/^\s+adapters:/m); + expect(config.ingest.adapters).toEqual([]); }); - it('writes Postgres Historic SQL config with minExecutions and ignores window/redaction output', async () => { + it('writes Postgres query history config with minExecutions and ignores window/redaction output', async () => { const io = makeIo(); const result = await runKtxSetupDatabasesStep( { @@ -1842,11 +1901,11 @@ describe('setup databases step', () => { databaseConnectionId: 'warehouse', databaseUrl: 'env:DATABASE_URL', databaseSchemas: ['public'], - enableHistoricSql: true, - historicSqlWindowDays: 30, - historicSqlMinExecutions: 12, - historicSqlServiceAccountPatterns: ['^svc_'], - historicSqlRedactionPatterns: ['(?i)secret'], + enableQueryHistory: true, + queryHistoryWindowDays: 30, + queryHistoryMinExecutions: 12, + queryHistoryServiceAccountPatterns: ['^svc_'], + queryHistoryRedactionPatterns: ['(?i)secret'], skipDatabases: false, }, io.io, @@ -1858,33 +1917,126 @@ describe('setup databases step', () => { ); expect(result.status).toBe('ready'); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'); + const config = parseKtxProjectConfig(configText); expect(config.connections.warehouse).toMatchObject({ driver: 'postgres', url: 'env:DATABASE_URL', schemas: ['public'], - historicSql: { - enabled: true, - dialect: 'postgres', - minExecutions: 12, - filters: { - dropTrivialProbes: true, - serviceAccounts: { - patterns: ['^svc_'], - mode: 'exclude', + context: { + queryHistory: { + enabled: true, + minExecutions: 12, + filters: { + dropTrivialProbes: true, + serviceAccounts: { + patterns: ['^svc_'], + mode: 'exclude', + }, }, }, }, }); - expect(config.connections.warehouse.historicSql).not.toHaveProperty('windowDays'); - expect(config.connections.warehouse.historicSql).not.toHaveProperty('redactionPatterns'); - expect(config.ingest.adapters).toContain('historic-sql'); + expect(config.connections.warehouse.historicSql).toBeUndefined(); + const warehouseContext = + config.connections.warehouse.context && + typeof config.connections.warehouse.context === 'object' && + !Array.isArray(config.connections.warehouse.context) + ? (config.connections.warehouse.context as Record) + : {}; + expect(warehouseContext.queryHistory).not.toHaveProperty('windowDays'); + expect(warehouseContext.queryHistory).not.toHaveProperty('redactionPatterns'); + expect(configText).not.toContain('live-database'); + expect(configText).not.toContain('historic-sql'); + expect(configText).not.toMatch(/^\s+adapters:/m); + expect(config.ingest.adapters).toEqual([]); expect(config.ingest.workUnits.maxConcurrency).toBe(6); - expect(io.stdout()).toContain('Historic SQL probe...'); + expect(io.stdout()).toContain('Query history probe...'); + expect(io.stdout()).not.toContain('Historic SQL probe...'); expect(io.stdout()).toContain('pg_stat_statements ready'); }); - it('writes Historic SQL config for supported existing database connections', async () => { + it('asks interactive Postgres setup whether to enable query history', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + 'llm:', + ' provider:', + ' backend: anthropic', + ' models:', + ' default: claude-sonnet-4-6', + 'scan:', + ' enrichment:', + ' mode: llm', + ' embeddings:', + ' backend: openai', + ' model: text-embedding-3-small', + ' dimensions: 1536', + '', + ].join('\n'), + 'utf-8', + ); + const io = makeIo(); + const prompts = makePromptAdapter({ selectValues: ['yes', 'deep'] }); + const historicSqlProbe = vi.fn(async () => ({ ok: true, lines: [] })); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseConnectionIds: ['warehouse'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { + prompts, + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + historicSqlProbe, + }, + ); + + expect(result.status).toBe('ready'); + expect(prompts.select).toHaveBeenCalledWith({ + message: 'Enable query-history ingest for this PostgreSQL connection?', + options: [ + { value: 'yes', label: 'Enable query history' }, + { value: 'no', label: 'Do not enable query history' }, + { value: 'back', label: 'Back' }, + ], + }); + expect(prompts.select).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + message: expect.stringContaining('How much database context should KTX build?'), + }), + ); + expect(historicSqlProbe).toHaveBeenCalledWith({ + projectDir: tempDir, + connectionId: 'warehouse', + dialect: 'postgres', + }); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse).toMatchObject({ + context: { + queryHistory: { + enabled: true, + minExecutions: 5, + filters: { dropTrivialProbes: true }, + }, + depth: 'deep', + }, + }); + }); + + it('writes query history config for supported existing database connections', async () => { await writeFile( join(tempDir, 'ktx.yaml'), [ @@ -1906,8 +2058,8 @@ describe('setup databases step', () => { inputMode: 'disabled', databaseConnectionIds: ['analytics'], databaseSchemas: [], - enableHistoricSql: true, - historicSqlWindowDays: 45, + enableQueryHistory: true, + queryHistoryWindowDays: 45, skipDatabases: false, }, io.io, @@ -1918,22 +2070,28 @@ describe('setup databases step', () => { ); expect(result.status).toBe('ready'); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'); + const config = parseKtxProjectConfig(configText); expect(config.connections.analytics).toMatchObject({ - historicSql: { - enabled: true, - dialect: 'bigquery', - windowDays: 45, - filters: { - dropTrivialProbes: true, + context: { + queryHistory: { + enabled: true, + windowDays: 45, + filters: { + dropTrivialProbes: true, + }, + redactionPatterns: [], }, - redactionPatterns: [], }, }); - expect(config.ingest.adapters).toContain('historic-sql'); + expect(config.connections.analytics.historicSql).toBeUndefined(); + expect(configText).not.toContain('live-database'); + expect(configText).not.toContain('historic-sql'); + expect(configText).not.toMatch(/^\s+adapters:/m); + expect(config.ingest.adapters).toEqual([]); }); - it('enables Historic SQL on an existing Postgres connection', async () => { + it('enables query history on an existing Postgres connection', async () => { await writeFile( join(tempDir, 'ktx.yaml'), [ @@ -1954,8 +2112,8 @@ describe('setup databases step', () => { inputMode: 'disabled', databaseConnectionIds: ['warehouse'], databaseSchemas: [], - enableHistoricSql: true, - historicSqlMinExecutions: 8, + enableQueryHistory: true, + queryHistoryMinExecutions: 8, skipDatabases: false, }, io.io, @@ -1969,18 +2127,94 @@ describe('setup databases step', () => { expect(result.status).toBe('ready'); const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); expect(config.connections.warehouse).toMatchObject({ - historicSql: { + context: { + queryHistory: { + enabled: true, + minExecutions: 8, + filters: { + dropTrivialProbes: true, + }, + }, + }, + }); + expect(config.connections.warehouse.historicSql).toBeUndefined(); + }); + + it('migrates legacy historicSql to context.queryHistory during database setup', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' readonly: true', + ' historicSql:', + ' enabled: true', + ' dialect: postgres', + ' windowDays: 45', + ' minExecutions: 9', + ' concurrency: 3', + ' staleArchiveAfterDays: 120', + ' filters:', + ' dropTrivialProbes: true', + ' serviceAccounts:', + ' mode: exclude', + ' patterns:', + " - '^svc_'", + ' orchestrators:', + ' mode: exclude', + ' patterns:', + ' - airflow', + ' dropFailedBelow: 2', + ' redactionPatterns:', + " - '(?i)secret'", + '', + ].join('\n'), + 'utf-8', + ); + + const io = makeIo(); + + await expect( + runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseConnectionIds: ['warehouse'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + historicSqlProbe: vi.fn(async () => ({ ok: true, lines: [] })), + }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse.historicSql).toBeUndefined(); + expect(config.connections.warehouse.context).toMatchObject({ + queryHistory: { enabled: true, - dialect: 'postgres', - minExecutions: 8, + windowDays: 45, + minExecutions: 9, + concurrency: 3, + staleArchiveAfterDays: 120, filters: { dropTrivialProbes: true, + serviceAccounts: { mode: 'exclude', patterns: ['^svc_'] }, + orchestrators: { mode: 'exclude', patterns: ['airflow'] }, + dropFailedBelow: 2, }, + redactionPatterns: ['(?i)secret'], }, }); }); - it('prints a non-blocking Postgres Historic SQL probe failure after connection test succeeds', async () => { + it('prints a non-blocking Postgres query history probe failure after connection test succeeds', async () => { const io = makeIo(); const historicSqlProbe = vi.fn(async () => ({ ok: false, @@ -1999,7 +2233,7 @@ describe('setup databases step', () => { databaseConnectionId: 'warehouse', databaseUrl: 'env:DATABASE_URL', databaseSchemas: [], - enableHistoricSql: true, + enableQueryHistory: true, skipDatabases: false, }, io.io, @@ -2018,12 +2252,13 @@ describe('setup databases step', () => { dialect: 'postgres', }), ); - expect(io.stdout()).toContain('Historic SQL probe...'); + expect(io.stdout()).toContain('Query history probe...'); + expect(io.stdout()).not.toContain('Historic SQL probe...'); expect(io.stdout()).toContain('pg_stat_statements extension is not installed'); expect(io.stdout()).toContain('Setup written; first ingest run will fail until fixed.'); }); - it('does not run the Historic SQL probe when the regular connection test fails', async () => { + it('does not run the query history probe when the regular connection test fails', async () => { const io = makeIo(); const historicSqlProbe = vi.fn(async () => ({ ok: true, lines: [] })); @@ -2035,7 +2270,7 @@ describe('setup databases step', () => { databaseConnectionId: 'warehouse', databaseUrl: 'env:DATABASE_URL', databaseSchemas: [], - enableHistoricSql: true, + enableQueryHistory: true, skipDatabases: false, }, io.io, @@ -2068,7 +2303,35 @@ describe('setup databases step', () => { expect(io.stderr()).toContain('Missing database connection id'); }); - it('leaves setup incomplete when primary sources are skipped', async () => { + it('accepts former ingest subcommand names as non-interactive database connection ids', async () => { + const io = makeIo(); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + databaseDrivers: ['postgres'], + databaseConnectionId: 'replay', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + }, + ); + + expect(result.status).toBe('ready'); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.replay).toMatchObject({ + driver: 'postgres', + url: 'env:DATABASE_URL', + }); + }); + + it('leaves setup incomplete when databases are skipped', async () => { const io = makeIo(); const result = await runKtxSetupDatabasesStep( @@ -2077,7 +2340,7 @@ describe('setup databases step', () => { ); expect(result.status).toBe('skipped'); - expect(io.stdout()).toContain('KTX cannot work until you add a primary source.'); + expect(io.stdout()).toContain('KTX cannot work until you add a database.'); expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); }); }); diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index c21ab6d1..1fd76faa 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -21,6 +21,7 @@ import { } from './database-tree-picker.js'; import { withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; import { runKtxScan } from './scan.js'; +import { applySetupDatabaseContextDepth } from './setup-database-context-depth.js'; import { writeProjectLocalSecretReference } from './setup-secrets.js'; import { createKtxSetupPromptAdapter, @@ -47,12 +48,12 @@ export interface KtxSetupDatabasesArgs { databaseConnectionId?: string; databaseUrl?: string; databaseSchemas: string[]; - enableHistoricSql?: boolean; - disableHistoricSql?: boolean; - historicSqlWindowDays?: number; - historicSqlMinExecutions?: number; - historicSqlServiceAccountPatterns?: string[]; - historicSqlRedactionPatterns?: string[]; + enableQueryHistory?: boolean; + disableQueryHistory?: boolean; + queryHistoryWindowDays?: number; + queryHistoryMinExecutions?: number; + queryHistoryServiceAccountPatterns?: string[]; + queryHistoryRedactionPatterns?: string[]; skipDatabases: boolean; } @@ -203,7 +204,7 @@ function missingConnectionDetailsPrompt( label: string, canReturnToDriverSelection: boolean, ): { message: string; options: Array<{ value: string; label: string }> } { - const backDestination = canReturnToDriverSelection ? 'primary source selection' : 'the previous setup step'; + const backDestination = canReturnToDriverSelection ? 'database selection' : 'the previous setup step'; return { message: `Some ${label} connection details are missing.\n` + @@ -234,6 +235,12 @@ function unique(values: string[]): string[] { return [...new Set(values.filter((value) => value.trim().length > 0))]; } +function assertSafeDatabaseConnectionId(connectionId: string): void { + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { + throw new Error(`Unsafe connection id: ${connectionId}`); + } +} + function stringConfigField(connection: KtxProjectConnectionConfig | undefined, field: string): string | undefined { const value = connection?.[field]; return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined; @@ -251,6 +258,48 @@ function historicSqlConfigRecord(connection: KtxProjectConnectionConfig | undefi : null; } +function contextRecord(connection: KtxProjectConnectionConfig | undefined): Record { + const context = connection?.context; + return context && typeof context === 'object' && !Array.isArray(context) ? (context as Record) : {}; +} + +function queryHistoryConfigRecord(connection: KtxProjectConnectionConfig | undefined): Record | null { + const queryHistory = contextRecord(connection).queryHistory; + return queryHistory && typeof queryHistory === 'object' && !Array.isArray(queryHistory) + ? (queryHistory as Record) + : null; +} + +function stripLegacyHistoricSql(connection: KtxProjectConnectionConfig): KtxProjectConnectionConfig { + const { historicSql: _historicSql, ...rest } = connection as KtxProjectConnectionConfig & { + historicSql?: unknown; + }; + return rest; +} + +function withQueryHistoryConfig( + connection: KtxProjectConnectionConfig, + queryHistory: Record, +): KtxProjectConnectionConfig { + return { + ...stripLegacyHistoricSql(connection), + context: { + ...contextRecord(connection), + queryHistory, + }, + }; +} + +function migrateLegacyHistoricSqlConnection(connection: KtxProjectConnectionConfig): KtxProjectConnectionConfig { + const existingQueryHistory = queryHistoryConfigRecord(connection); + const legacy = historicSqlConfigRecord(connection); + if (existingQueryHistory || !legacy) { + return existingQueryHistory ? stripLegacyHistoricSql(connection) : connection; + } + const { dialect: _dialect, ...queryHistory } = legacy; + return withQueryHistoryConfig(connection, queryHistory); +} + function historicSqlProbeFailureLines(error: unknown): string[] { if (error instanceof Error && error.name === 'HistoricSqlExtensionMissingError') { return [ @@ -268,7 +317,7 @@ function historicSqlProbeFailureLines(error: unknown): string[] { if (error instanceof Error && error.name === 'HistoricSqlVersionUnsupportedError') { return [` FAIL ${error.message}`]; } - return [` FAIL Historic SQL probe failed: ${error instanceof Error ? error.message : String(error)}`]; + return [` FAIL Query history probe failed: ${error instanceof Error ? error.message : String(error)}`]; } async function defaultHistoricSqlProbe(input: KtxSetupHistoricSqlProbeInput): Promise { @@ -492,11 +541,11 @@ function configuredPrimarySourcesPrompt(connectionIds: string[]): { options: Array<{ value: string; label: string }>; } { return { - message: `Primary sources already configured: ${connectionIds.join(', ')}\nWhat would you like to do?`, + message: `Databases already configured: ${connectionIds.join(', ')}\nWhat would you like to do?`, options: [ - { value: 'continue', label: 'Continue to knowledge sources' }, - { value: 'edit', label: 'Edit an existing primary source' }, - { value: 'add', label: 'Add additional primary sources' }, + { value: 'continue', label: 'Continue to context sources' }, + { value: 'edit', label: 'Edit an existing database' }, + { value: 'add', label: 'Add another database' }, ], }; } @@ -868,68 +917,61 @@ async function maybeApplyHistoricSqlConfig(input: { }): Promise { const dialect = HISTORIC_SQL_DIALECT_BY_DRIVER[input.driver]; if (!dialect) { - if (input.args.enableHistoricSql === true) { + if (input.args.enableQueryHistory === true) { throw new Error( - `Historic SQL setup is only supported for Snowflake, BigQuery, and Postgres, not ${driverLabel(input.driver)}.`, + `Query history setup is only supported for Snowflake, BigQuery, and Postgres, not ${driverLabel(input.driver)}.`, ); } return input.connection; } - let enabled = input.args.enableHistoricSql === true; - if (input.args.disableHistoricSql === true) { + let enabled = input.args.enableQueryHistory === true; + if (input.args.disableQueryHistory === true) { enabled = false; - } else if (input.args.inputMode !== 'disabled' && input.args.enableHistoricSql !== true && dialect !== 'postgres') { + } else if (input.args.inputMode !== 'disabled' && input.args.enableQueryHistory !== true) { const choice = await input.prompts.select({ - message: `Enable Historic SQL query-history ingest for this ${driverLabel(input.driver)} connection?`, + message: `Enable query-history ingest for this ${driverLabel(input.driver)} connection?`, options: [ - { value: 'yes', label: 'Enable Historic SQL' }, - { value: 'no', label: 'Do not enable Historic SQL' }, + { value: 'yes', label: 'Enable query history' }, + { value: 'no', label: 'Do not enable query history' }, { value: 'back', label: 'Back' }, ], }); if (choice === 'back') return 'back'; - enabled = choice === 'yes'; + if (choice === 'yes') { + enabled = true; + } else if (choice === 'no') { + enabled = false; + } else { + return input.connection; + } } - if (dialect === 'postgres' && input.args.enableHistoricSql !== true && input.args.disableHistoricSql !== true) { - return input.connection; - } - - const existing = - typeof input.connection.historicSql === 'object' && input.connection.historicSql !== null - ? (input.connection.historicSql as Record) - : {}; + const existingRecord = queryHistoryConfigRecord(input.connection) ?? historicSqlConfigRecord(input.connection) ?? {}; + const { dialect: _dialect, ...existing } = existingRecord; if (!enabled) { - return { ...input.connection, historicSql: { ...existing, enabled: false, dialect } }; + return withQueryHistoryConfig(input.connection, { ...existing, enabled: false }); } const common: Record = { ...existing, enabled: true, - dialect, - filters: historicSqlFiltersForSetup(input.args.historicSqlServiceAccountPatterns), + filters: historicSqlFiltersForSetup(input.args.queryHistoryServiceAccountPatterns), }; if (dialect === 'postgres') { - return { - ...input.connection, - historicSql: { - ...common, - minExecutions: input.args.historicSqlMinExecutions ?? 5, - }, - }; + return withQueryHistoryConfig(input.connection, { + ...common, + minExecutions: input.args.queryHistoryMinExecutions ?? 5, + }); } - return { - ...input.connection, - historicSql: { - ...common, - windowDays: input.args.historicSqlWindowDays ?? 90, - redactionPatterns: input.args.historicSqlRedactionPatterns ?? [], - }, - }; + return withQueryHistoryConfig(input.connection, { + ...common, + windowDays: input.args.queryHistoryWindowDays ?? 90, + redactionPatterns: input.args.queryHistoryRedactionPatterns ?? [], + }); } function historicSqlFiltersForSetup(patterns: string[] | undefined) { @@ -1143,20 +1185,6 @@ function summarizeScanChanges(output: string): string { return 'no table changes'; } -function shortenScanReportPath(path: string): string { - const normalized = path.trim(); - const liveDatabaseMarker = '/live-database/'; - const markerIndex = normalized.indexOf(liveDatabaseMarker); - if (markerIndex === -1) { - return normalized; - } - const filename = normalized.split('/').at(-1); - if (!filename) { - return normalized; - } - return `${normalized.slice(0, markerIndex + liveDatabaseMarker.length)}.../${filename}`; -} - function writeSetupSection(io: KtxCliIo, title: string, lines: string[]): void { io.stdout.write(`◇ ${title}\n`); for (const line of lines) { @@ -1171,22 +1199,24 @@ async function writeConnectionConfig(input: { connection: KtxProjectConnectionConfig; }): Promise { const project = await loadKtxProject({ projectDir: input.projectDir }); + const migratedConnections = Object.fromEntries( + Object.entries(project.config.connections).map(([connectionId, connection]) => [ + connectionId, + migrateLegacyHistoricSqlConnection(connection), + ]), + ); + const nextConnection = migrateLegacyHistoricSqlConnection(input.connection); const config = { ...project.config, connections: { - ...project.config.connections, - [input.connectionId]: input.connection, + ...migratedConnections, + [input.connectionId]: nextConnection, }, }; await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); - const historicSql = - typeof input.connection.historicSql === 'object' && - input.connection.historicSql !== null && - !Array.isArray(input.connection.historicSql) - ? (input.connection.historicSql as Record) - : null; - if (historicSql?.enabled === true) { + const queryHistory = queryHistoryConfigRecord(nextConnection); + if (queryHistory?.enabled === true) { await ensureHistoricSqlIngestDefaults(input.projectDir); } } @@ -1464,41 +1494,43 @@ async function maybeConfigureDatabaseScope(input: { async function ensureHistoricSqlIngestDefaults(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); - const adapters = project.config.ingest.adapters.includes('historic-sql') - ? project.config.ingest.adapters - : [...project.config.ingest.adapters, 'historic-sql']; const maxConcurrency = Math.max( project.config.ingest.workUnits.maxConcurrency, HISTORIC_SQL_WORK_UNIT_MAX_CONCURRENCY, ); - if ( - adapters === project.config.ingest.adapters && - maxConcurrency === project.config.ingest.workUnits.maxConcurrency - ) { + if (maxConcurrency === project.config.ingest.workUnits.maxConcurrency) { return; } await writeFile( project.configPath, - serializeKtxProjectConfig( - { - ...project.config, - ingest: { - ...project.config.ingest, - adapters, - workUnits: { - ...project.config.ingest.workUnits, - maxConcurrency, - }, + serializeKtxProjectConfig({ + ...project.config, + ingest: { + ...project.config.ingest, + workUnits: { + ...project.config.ingest.workUnits, + maxConcurrency, }, }, - ), + }), 'utf-8', ); } async function markDatabasesComplete(projectDir: string, connectionIds: string[]): Promise { const project = await loadKtxProject({ projectDir }); - const config = setKtxSetupDatabaseConnectionIds(project.config, unique(connectionIds)); + const config = setKtxSetupDatabaseConnectionIds( + { + ...project.config, + connections: Object.fromEntries( + Object.entries(project.config.connections).map(([connectionId, connection]) => [ + connectionId, + migrateLegacyHistoricSqlConnection(connection), + ]), + ), + }, + unique(connectionIds), + ); await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); await markKtxSetupStateStepComplete(projectDir, 'databases'); } @@ -1511,12 +1543,13 @@ async function maybeRunHistoricSqlSetupProbe(input: { }): Promise { const project = await loadKtxProject({ projectDir: input.projectDir }); const connection = project.config.connections[input.connectionId]; - const historicSql = historicSqlConfigRecord(connection); - if (historicSql?.enabled !== true || historicSql.dialect !== 'postgres') { + const queryHistory = queryHistoryConfigRecord(connection) ?? historicSqlConfigRecord(connection); + const driver = normalizeDriver(connection?.driver); + if (queryHistory?.enabled !== true || driver !== 'postgres') { return; } - input.io.stdout.write('│ Historic SQL probe...\n'); + input.io.stdout.write('│ Query history probe...\n'); const probe = input.deps.historicSqlProbe ?? defaultHistoricSqlProbe; const result = await probe({ projectDir: input.projectDir, @@ -1537,7 +1570,11 @@ async function applyHistoricSqlConfigToExistingConnection(input: { args: KtxSetupDatabasesArgs; prompts: KtxSetupDatabasesPromptAdapter; }): Promise<'back' | void> { - if (input.args.enableHistoricSql !== true && input.args.disableHistoricSql !== true) { + if ( + input.args.inputMode === 'disabled' && + input.args.enableQueryHistory !== true && + input.args.disableQueryHistory !== true + ) { return; } @@ -1555,10 +1592,45 @@ async function applyHistoricSqlConfigToExistingConnection(input: { prompts: input.prompts, }); if (withHistoricSql === 'back') return 'back'; - await writeConnectionConfig({ + const withContextDepth = await maybeApplyContextDepthConfig({ projectDir: input.projectDir, connectionId: input.connectionId, connection: withHistoricSql, + args: input.args, + prompts: input.prompts, + }); + if (withContextDepth === 'back') return 'back'; + await writeConnectionConfig({ + projectDir: input.projectDir, + connectionId: input.connectionId, + connection: withContextDepth, + }); +} + +async function maybeApplyContextDepthConfig(input: { + projectDir: string; + connectionId: string; + connection: KtxProjectConnectionConfig; + args: KtxSetupDatabasesArgs; + prompts: KtxSetupDatabasesPromptAdapter; +}): Promise { + const project = await loadKtxProject({ projectDir: input.projectDir }); + return await applySetupDatabaseContextDepth({ + project: { + ...project, + config: { + ...project.config, + connections: { + ...project.config.connections, + [input.connectionId]: input.connection, + }, + }, + }, + connection: input.connection, + args: { + inputMode: input.args.inputMode === 'disabled' || input.args.databaseUrl ? 'disabled' : input.args.inputMode, + }, + prompts: input.prompts, }); } @@ -1600,8 +1672,8 @@ async function validateAndScanConnection(input: { io: input.io, deps: input.deps, }); - writeSetupSection(input.io, `Scanning ${input.connectionId}`, [ - 'Running structural scan…', + writeSetupSection(input.io, `Building schema context for ${input.connectionId}`, [ + 'Running fast database ingest…', ]); let scanIo = createBufferedCommandIo(); let scanCode = await scanConnection(input.projectDir, input.connectionId, scanIo); @@ -1610,11 +1682,11 @@ async function validateAndScanConnection(input: { if (nativeSqliteDetail) { writePrefixedLines( (chunk) => input.io.stderr.write(chunk), - [ - `Structural scan failed for ${input.connectionId}.`, - 'Native SQLite is built for a different Node.js ABI.', - `Detail: ${nativeSqliteDetail}`, - 'Rebuilding Native SQLite with pnpm run native:rebuild…', + [ + `Fast database ingest failed for ${input.connectionId}.`, + 'Native SQLite is built for a different Node.js ABI.', + `Detail: ${nativeSqliteDetail}`, + 'Rebuilding Native SQLite with pnpm run native:rebuild…', ].join('\n'), ); const rebuildNativeSqlite = input.deps.rebuildNativeSqlite ?? defaultRebuildNativeSqlite; @@ -1622,7 +1694,7 @@ async function validateAndScanConnection(input: { if (rebuildCode === 0) { writePrefixedLines( (chunk) => input.io.stderr.write(chunk), - 'Native SQLite rebuild complete. Retrying structural scan…', + 'Native SQLite rebuild complete. Retrying fast database ingest…', ); const retryScanIo = createBufferedCommandIo(); scanCode = await scanConnection(input.projectDir, input.connectionId, retryScanIo); @@ -1633,10 +1705,10 @@ async function validateAndScanConnection(input: { (chunk) => input.io.stderr.write(chunk), [ rebuildCode === 0 - ? `Structural scan still failed for ${input.connectionId} after rebuilding Native SQLite.` + ? `Fast database ingest still failed for ${input.connectionId} after rebuilding Native SQLite.` : `Native SQLite rebuild failed for ${input.connectionId}.`, 'Fix: pnpm run native:rebuild', - `Retry: ktx scan --project-dir ${input.projectDir} ${input.connectionId}`, + `Retry: ktx ingest ${input.connectionId} --project-dir ${input.projectDir} --fast`, ].join('\n'), ); } @@ -1645,8 +1717,8 @@ async function validateAndScanConnection(input: { writePrefixedLines( (chunk) => input.io.stderr.write(chunk), [ - `Structural scan failed for ${input.connectionId}.`, - `Debug command: ktx scan --project-dir ${input.projectDir} ${input.connectionId}`, + `Fast database ingest failed for ${input.connectionId}.`, + `Debug command: ktx ingest ${input.connectionId} --project-dir ${input.projectDir} --fast --debug`, ].join('\n'), ); } @@ -1655,17 +1727,13 @@ async function validateAndScanConnection(input: { } } const scanOutput = scanIo.stdoutText(); - const reportPath = readOutputValue(scanOutput, 'Report'); writeSetupSection( input.io, - `Scan complete for ${input.connectionId}`, - [ - `Changes: ${summarizeScanChanges(scanOutput)}`, - ...(reportPath ? [`Report: ${shortenScanReportPath(reportPath)}`] : []), - ], + `Schema context complete for ${input.connectionId}`, + [`Changes: ${summarizeScanChanges(scanOutput)}`], ); - writeSetupSection(input.io, 'Primary source ready', [ - `${input.connectionId} · ${driverDisplay} · structural scan complete`, + writeSetupSection(input.io, 'Database ready', [ + `${input.connectionId} · ${driverDisplay} · schema context complete`, ]); return 'ready'; } @@ -1684,14 +1752,14 @@ async function chooseDrivers( } if (args.inputMode === 'disabled') { io.stderr.write( - 'KTX cannot work without a primary source. Pass --database or --database-connection-id, or pass --skip-databases to leave setup incomplete.\n', + 'KTX cannot work without a database. Pass --database or --database-connection-id, or pass --skip-databases to leave setup incomplete.\n', ); return 'missing-input'; } while (true) { const initialValues = unique(options?.initialDrivers ?? []); const choices = await prompts.multiselect({ - message: withMultiselectNavigation('Which primary sources should KTX connect to?'), + message: withMultiselectNavigation('Which databases should KTX connect to?'), options: [...DRIVER_OPTIONS], ...(initialValues.length > 0 ? { initialValues } : {}), required: options?.hasPrimarySources === true, @@ -1707,7 +1775,7 @@ async function chooseDrivers( return 'back'; } - io.stdout.write('│ KTX cannot work without at least one primary source. Select a source or press Escape to go back.\n'); + io.stdout.write('│ KTX cannot work without at least one database. Select a database or press Escape to go back.\n'); } } @@ -1718,10 +1786,12 @@ async function chooseConnectionIdForDriver(input: { prompts: KtxSetupDatabasesPromptAdapter; }): Promise<{ kind: 'existing' | 'new' | 'edit'; connectionId: string } | 'back' | 'missing-input'> { if (input.args.databaseConnectionId) { + assertSafeDatabaseConnectionId(input.args.databaseConnectionId); return { kind: 'new', connectionId: input.args.databaseConnectionId }; } if (input.args.inputMode === 'disabled') { if (!input.args.databaseConnectionId) return 'missing-input'; + assertSafeDatabaseConnectionId(input.args.databaseConnectionId); return { kind: 'new', connectionId: input.args.databaseConnectionId }; } @@ -1737,6 +1807,7 @@ async function chooseConnectionIdForDriver(input: { }); if (entered === undefined) return 'back'; const connectionId = entered.trim() || defaultId; + assertSafeDatabaseConnectionId(connectionId); return connectionId ? { kind: 'new', connectionId } : 'missing-input'; } @@ -1766,6 +1837,7 @@ async function chooseConnectionIdForDriver(input: { }); if (entered === undefined) continue; const connectionId = entered.trim() || defaultId; + assertSafeDatabaseConnectionId(connectionId); return connectionId ? { kind: 'new', connectionId } : 'missing-input'; } } @@ -1785,7 +1857,7 @@ async function choosePrimarySourceToEdit(input: { .filter((option): option is { value: string; label: string } => option !== null); if (options.length === 0) return 'back'; const choice = await input.prompts.select({ - message: 'Primary source to edit', + message: 'Database to edit', options: [...options, { value: 'back', label: 'Back' }], }); return choice === 'back' ? 'back' : choice; @@ -1803,7 +1875,7 @@ async function runPrimarySourceFullEdit(input: { const existing = project.config.connections[input.connectionId]; const driver = normalizeDriver(existing?.driver); if (!existing || !driver) { - input.io.stderr.write(`Connection "${input.connectionId}" is not a configured primary source.\n`); + input.io.stderr.write(`Connection "${input.connectionId}" is not a configured database.\n`); return 'failed'; } @@ -1872,7 +1944,7 @@ export async function runKtxSetupDatabasesStep( deps: KtxSetupDatabasesDeps = {}, ): Promise { if (args.skipDatabases) { - io.stdout.write('│ Primary source setup skipped. KTX cannot work until you add a primary source.\n'); + io.stdout.write('│ Database setup skipped. KTX cannot work until you add a database.\n'); return { status: 'skipped', projectDir: args.projectDir }; } @@ -1970,7 +2042,7 @@ export async function runKtxSetupDatabasesStep( if (drivers === 'missing-input') return { status: 'missing-input', projectDir: args.projectDir }; if (drivers.length === 0) { await markDatabasesComplete(args.projectDir, []); - io.stdout.write('│ KTX cannot work without a primary source.\n'); + io.stdout.write('│ KTX cannot work without a database.\n'); return { status: 'skipped', projectDir: args.projectDir }; } @@ -1978,12 +2050,18 @@ export async function runKtxSetupDatabasesStep( for (const driver of drivers) { const project = await loadKtxProject({ projectDir: args.projectDir }); - const connectionChoice = await chooseConnectionIdForDriver({ - driver, - connections: project.config.connections, - args, - prompts, - }); + let connectionChoice: Awaited>; + try { + connectionChoice = await chooseConnectionIdForDriver({ + driver, + connections: project.config.connections, + args, + prompts, + }); + } catch (error) { + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return { status: 'failed', projectDir: args.projectDir }; + } if (connectionChoice === 'back') { if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; returnToDriverSelection = true; @@ -2061,10 +2139,22 @@ export async function runKtxSetupDatabasesStep( returnToDriverSelection = true; break; } - await writeConnectionConfig({ + const withContextDepth = await maybeApplyContextDepthConfig({ projectDir: args.projectDir, connectionId: connectionChoice.connectionId, connection: withHistoricSql, + args, + prompts, + }); + if (withContextDepth === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + await writeConnectionConfig({ + projectDir: args.projectDir, + connectionId: connectionChoice.connectionId, + connection: withContextDepth, }); } else { const existing = project.config.connections[connectionChoice.connectionId]; @@ -2074,10 +2164,22 @@ export async function runKtxSetupDatabasesStep( returnToDriverSelection = true; break; } - await writeConnectionConfig({ + const withContextDepth = await maybeApplyContextDepthConfig({ projectDir: args.projectDir, connectionId: connectionChoice.connectionId, connection: withHistoricSql, + args, + prompts, + }); + if (withContextDepth === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + await writeConnectionConfig({ + projectDir: args.projectDir, + connectionId: connectionChoice.connectionId, + connection: withContextDepth, }); } @@ -2100,11 +2202,11 @@ export async function runKtxSetupDatabasesStep( } if (args.inputMode === 'disabled') return { status: 'failed', projectDir: args.projectDir }; const action = await prompts.select({ - message: `Primary source setup failed for ${connectionChoice.connectionId}`, + message: `Database setup failed for ${connectionChoice.connectionId}`, options: [ { value: 'retry', label: 'Retry connection test' }, { value: 're-enter', label: 'Re-enter connection details' }, - { value: 'skip', label: 'Skip this primary source' }, + { value: 'skip', label: 'Skip this database' }, { value: 'back', label: 'Back' }, ], }); @@ -2145,10 +2247,22 @@ export async function runKtxSetupDatabasesStep( returnToDriverSelection = true; break; } - await writeConnectionConfig({ + const withContextDepth = await maybeApplyContextDepthConfig({ projectDir: args.projectDir, connectionId: connectionChoice.connectionId, connection: withHistoricSql, + args, + prompts, + }); + if (withContextDepth === 'back') { + if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; + returnToDriverSelection = true; + break; + } + await writeConnectionConfig({ + projectDir: args.projectDir, + connectionId: connectionChoice.connectionId, + connection: withContextDepth, }); setupStatus = await validateAndScanConnection({ projectDir: args.projectDir, @@ -2174,7 +2288,7 @@ export async function runKtxSetupDatabasesStep( } if (selectedConnectionIds.length === 0) { - io.stderr.write('No primary source connections completed setup.\n'); + io.stderr.write('No database connections completed setup.\n'); return { status: 'failed', projectDir: args.projectDir }; } diff --git a/packages/cli/src/setup-demo-tour.test.ts b/packages/cli/src/setup-demo-tour.test.ts index a8b63974..f571f91b 100644 --- a/packages/cli/src/setup-demo-tour.test.ts +++ b/packages/cli/src/setup-demo-tour.test.ts @@ -134,6 +134,17 @@ describe('buildDemoReplayTimeline', () => { expect(timeline[i].delayMs).toBeGreaterThanOrEqual(timeline[i - 1].delayMs); } }); + + it('uses schema-context wording for database progress', () => { + const renderedTimeline = timeline + .map((event) => [event.detailLine, event.summaryText].filter(Boolean).join(' ')) + .join('\n'); + + expect(renderedTimeline).toContain('reading schema'); + expect(renderedTimeline).toContain('56 tables'); + expect(renderedTimeline).not.toContain('scanning'); + expect(renderedTimeline).not.toContain('scanned'); + }); }); describe('DEMO_REPLAY_TARGETS', () => { @@ -145,8 +156,8 @@ describe('DEMO_REPLAY_TARGETS', () => { expect(DEMO_REPLAY_TARGETS.contextSources).toHaveLength(3); }); - it('primary source is a scan operation', () => { - expect(DEMO_REPLAY_TARGETS.primarySources[0].operation).toBe('scan'); + it('primary source is a database-ingest operation', () => { + expect(DEMO_REPLAY_TARGETS.primarySources[0].operation).toBe('database-ingest'); }); it('context sources are source-ingest operations', () => { diff --git a/packages/cli/src/setup-demo-tour.ts b/packages/cli/src/setup-demo-tour.ts index 0f0b6c8b..35640026 100644 --- a/packages/cli/src/setup-demo-tour.ts +++ b/packages/cli/src/setup-demo-tour.ts @@ -30,7 +30,7 @@ function dim(text: string): string { function createDemoTarget( connectionId: string, - operation: 'scan' | 'source-ingest', + operation: 'database-ingest' | 'source-ingest', driver: string, ): KtxPublicIngestPlanTarget { const adapter = operation === 'source-ingest' ? driver : undefined; @@ -40,9 +40,9 @@ function createDemoTarget( operation, ...(adapter ? { adapter } : {}), debugCommand: `ktx setup --project-dir `, - steps: operation === 'scan' - ? ['scan', 'enrich', 'memory-update'] - : ['source-ingest', 'enrich', 'memory-update'], + steps: operation === 'database-ingest' + ? ['database-schema'] + : ['source-ingest', 'memory-update'], }; } @@ -56,6 +56,7 @@ function createTargetState(target: KtxPublicIngestPlanTarget): ContextBuildTarge startedAt: null, elapsedMs: 0, progressUpdatedAtMs: null, + phases: [], }; } @@ -195,7 +196,7 @@ export interface DemoReplayEvent { export const DEMO_REPLAY_TARGETS = { primarySources: [ - createDemoTarget('postgres-warehouse', 'scan', 'postgres'), + createDemoTarget('postgres-warehouse', 'database-ingest', 'postgres'), ], contextSources: [ createDemoTarget('dbt-main', 'source-ingest', 'dbt'), @@ -206,10 +207,10 @@ export const DEMO_REPLAY_TARGETS = { export function buildDemoReplayTimeline(): DemoReplayEvent[] { return [ - // postgres-warehouse: scan + // postgres-warehouse: database schema context { delayMs: 0, connectionId: 'postgres-warehouse', status: 'running', detailLine: null, summaryText: null }, - { delayMs: 1200, connectionId: 'postgres-warehouse', status: 'running', detailLine: '[50%] scanning tables...', summaryText: null }, - { delayMs: 2400, connectionId: 'postgres-warehouse', status: 'done', detailLine: null, summaryText: '56 tables scanned' }, + { delayMs: 1200, connectionId: 'postgres-warehouse', status: 'running', detailLine: '[50%] reading schema...', summaryText: null }, + { delayMs: 2400, connectionId: 'postgres-warehouse', status: 'done', detailLine: null, summaryText: '56 tables' }, // dbt-main { delayMs: 2400, connectionId: 'dbt-main', status: 'running', detailLine: null, summaryText: null }, { delayMs: 3600, connectionId: 'dbt-main', status: 'running', detailLine: '[60%] ingesting models...', summaryText: null }, diff --git a/packages/cli/src/setup-ready-menu.test.ts b/packages/cli/src/setup-ready-menu.test.ts index 643d8b3d..d37b81a0 100644 --- a/packages/cli/src/setup-ready-menu.test.ts +++ b/packages/cli/src/setup-ready-menu.test.ts @@ -37,7 +37,7 @@ describe('setup ready menu', () => { options: [ { value: 'models', label: 'Models' }, { value: 'embeddings', label: 'Embeddings' }, - { value: 'databases', label: 'Primary sources' }, + { value: 'databases', label: 'Databases' }, { value: 'sources', label: 'Context sources' }, { value: 'context', label: 'Rebuild KTX context' }, { value: 'agents', label: 'Agent integration' }, diff --git a/packages/cli/src/setup-ready-menu.ts b/packages/cli/src/setup-ready-menu.ts index c975d991..70ee7d60 100644 --- a/packages/cli/src/setup-ready-menu.ts +++ b/packages/cli/src/setup-ready-menu.ts @@ -44,7 +44,7 @@ export async function runKtxSetupReadyChangeMenu( options: [ { value: 'models', label: 'Models' }, { value: 'embeddings', label: 'Embeddings' }, - { value: 'databases', label: 'Primary sources' }, + { value: 'databases', label: 'Databases' }, { value: 'sources', label: 'Context sources' }, { value: 'context', label: 'Rebuild KTX context' }, { value: 'agents', label: 'Agent integration' }, diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index 339282e1..d1d541b8 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -255,6 +255,37 @@ describe('setup sources step', () => { expect((await readConfig()).connections['notion-main']?.last_successful_cursor).toBeUndefined(); }); + it('accepts former ingest subcommand names as interactive source connection ids', async () => { + await addPrimarySource(); + const io = makeIo(); + const validateNotion = vi.fn(async () => ({ ok: true as const, detail: 'workspace=ok' })); + + const result = await runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'auto', + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + { + prompts: prompts({ + multiselect: [['notion']], + text: ['status', 'env:NOTION_TOKEN'], + select: ['env', 'all_accessible'], + }), + validateNotion, + }, + ); + + expect(result.status).toBe('ready'); + const config = await readConfig(); + expect(config.connections.status).toMatchObject({ + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', + }); + }); + it('uses selected Notion roots when root page ids are provided even if crawl mode says all accessible', async () => { await addPrimarySource(); const validateNotion = vi.fn(async () => ({ ok: true as const, detail: 'roots=1' })); @@ -756,7 +787,7 @@ describe('setup sources step', () => { expect(testPrompts.text).toHaveBeenCalledTimes(4); }); - it('enables the dbt adapter when adding a dbt source connection', async () => { + it('adds a dbt source connection and enables its adapter', async () => { await addPrimarySource(); const validateDbt = vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })); @@ -776,7 +807,10 @@ describe('setup sources step', () => { ), ).resolves.toEqual({ status: 'ready', projectDir, connectionIds: ['dbt-main'] }); - expect((await readConfig()).ingest.adapters).toContain('dbt'); + const configText = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); + expect(configText).not.toContain('live-database'); + expect(configText).not.toContain('historic-sql'); + expect((await readConfig()).ingest.adapters).toEqual(['dbt']); }); it('lets interactive setup retry or continue after initial source ingest fails', async () => { @@ -805,7 +839,9 @@ describe('setup sources step', () => { expect(runInitialIngest).toHaveBeenCalledTimes(1); expect((await readConfig()).connections['dbt-main']).toMatchObject({ driver: 'dbt', source_dir: '/repo/dbt' }); expect(io.stdout()).toContain('Context source saved without a completed context build for dbt-main.'); - expect(io.stdout()).toContain('Run later: ktx ingest run --connection-id dbt-main --adapter '); + expect(io.stdout()).toContain('Run later: ktx ingest dbt-main'); + expect(io.stdout()).not.toContain('ktx ingest run --connection-id'); + expect(io.stdout()).not.toContain('--adapter'); }); it('retries initial source ingest from the failure menu', async () => { @@ -1472,7 +1508,7 @@ describe('setup sources step', () => { } }); - it('does not offer context sources until a primary source exists', async () => { + it('does not offer context sources until a database exists', async () => { const io = makeIo(); const testPrompts = prompts({ multiselect: [['notion']] }); @@ -1485,7 +1521,7 @@ describe('setup sources step', () => { ).resolves.toEqual({ status: 'skipped', projectDir }); expect(testPrompts.multiselect).not.toHaveBeenCalled(); - expect(io.stdout()).toContain('Connect a primary source before adding context sources.'); + expect(io.stdout()).toContain('Connect a database before adding context sources.'); expect(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); }); diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index a1ca531a..c55f3fd8 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -775,7 +775,7 @@ async function runInitialSourceIngestWithRecovery(input: { } if (action === 'continue') { input.io.stdout.write(`│ Context source saved without a completed context build for ${input.connectionId}.\n`); - input.io.stdout.write(`│ Run later: ktx ingest run --connection-id ${input.connectionId} --adapter \n`); + input.io.stdout.write(`│ Run later: ktx ingest ${input.connectionId}\n`); return 'continue'; } return 'back'; @@ -1786,7 +1786,7 @@ export async function runKtxSetupSourcesStep( const prompts = deps.prompts ?? createPromptAdapter(); const project = await loadKtxProject({ projectDir: args.projectDir }); if (!hasPrimarySource(project.config)) { - const message = 'Connect a primary source before adding context sources.'; + const message = 'Connect a database before adding context sources.'; if (args.source) { io.stderr.write(`${message}\n`); return { status: 'failed', projectDir: args.projectDir }; diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index 246484d3..b0c28d7c 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -7,7 +7,7 @@ import { writeKtxSetupState } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { localFakeBundleReport, persistLocalBundleReport } from './ingest.test-utils.js'; -import { contextBuildCommands, writeKtxSetupContextState } from './setup-context.js'; +import { contextBuildCommands, readKtxSetupContextState, writeKtxSetupContextState } from './setup-context.js'; import { runDemoTour } from './setup-demo-tour.js'; import { formatKtxSetupStatus, readKtxSetupStatus, runKtxSetup } from './setup.js'; @@ -297,10 +297,10 @@ describe('setup status', () => { await expect(readKtxSetupStatus(tempDir)).resolves.toMatchObject({ context: { ready: false, - status: 'running', + status: 'stale', runId: 'setup-context-local-abc123', - watchCommand: `ktx setup --project-dir ${tempDir}`, statusCommand: `ktx status --project-dir ${tempDir}`, + detail: 'Previous foreground context build did not finish. Rerun setup or ktx ingest.', }, }); }); @@ -377,6 +377,8 @@ describe('setup status', () => { expect(rendered).toContain(`KTX project: ${tempDir}`); expect(rendered).toContain('Project ready: yes'); expect(rendered).toContain('LLM ready: no'); + expect(rendered).toContain('Databases configured: no'); + expect(rendered).not.toContain(['Primary sources', 'configured'].join(' ')); expect(rendered).toContain('KTX context built: no'); expect(rendered).not.toContain('No KTX project found.'); }); @@ -1141,11 +1143,11 @@ describe('setup status', () => { expect(databasePrompts.select).not.toHaveBeenCalled(); expect(testIo.stdout()).toContain( - 'KTX cannot work without at least one primary source. Select a source or press Escape to go back.', + 'KTX cannot work without at least one database. Select a database or press Escape to go back.', ); expect(embeddings).toHaveBeenCalledTimes(2); expect(embeddings).toHaveBeenNthCalledWith(2, expect.objectContaining({ forcePrompt: true }), testIo.io); - expect(testIo.stderr()).not.toContain('No primary sources selected.'); + expect(testIo.stderr()).not.toContain('No databases selected.'); }); it('lets Back from the first setup step return to the entry menu instead of exiting', async () => { @@ -1221,6 +1223,11 @@ describe('setup status', () => { databaseConnectionId: 'warehouse', databaseUrl: 'env:DATABASE_URL', databaseSchemas: ['public'], + enableQueryHistory: true, + queryHistoryWindowDays: 30, + queryHistoryMinExecutions: 12, + queryHistoryServiceAccountPatterns: ['^svc_'], + queryHistoryRedactionPatterns: ['(?i)secret'], skipDatabases: false, skipSources: true, }, @@ -1237,6 +1244,11 @@ describe('setup status', () => { databaseConnectionId: 'warehouse', databaseUrl: 'env:DATABASE_URL', databaseSchemas: ['public'], + enableQueryHistory: true, + queryHistoryWindowDays: 30, + queryHistoryMinExecutions: 12, + queryHistoryServiceAccountPatterns: ['^svc_'], + queryHistoryRedactionPatterns: ['(?i)secret'], skipDatabases: false, }), testIo.io, @@ -1621,51 +1633,7 @@ describe('setup status', () => { expect(io.stderr()).toContain('KTX context is not ready for agents.'); }); - it('does not install agents when full setup context build is detached', async () => { - const calls: string[] = []; - const io = makeIo(); - await writeFile(join(tempDir, 'ktx.yaml'), ['project: revenue', 'connections: {}', ''].join('\n'), 'utf-8'); - - await expect( - runKtxSetup( - { - command: 'run', - projectDir: tempDir, - mode: 'existing', - agents: false, - inputMode: 'disabled', - yes: true, - cliVersion: '0.2.0', - skipLlm: true, - skipEmbeddings: true, - skipDatabases: true, - skipSources: true, - skipAgents: false, - databaseSchemas: [], - }, - io.io, - { - context: async () => { - calls.push('context'); - return { status: 'detached', projectDir: tempDir, runId: 'setup-context-local-test' }; - }, - agents: async () => { - calls.push('agents'); - return { - status: 'ready', - projectDir: tempDir, - installs: [{ target: 'codex', scope: 'project', mode: 'cli' }], - }; - }, - }, - ), - ).resolves.toBe(0); - - expect(calls).toEqual(['context']); - }); - - it('resumes an active context build before prompting for earlier setup steps', async () => { - const io = makeIo(); + it('does not offer background watch choices from setup status', async () => { await writeFile( join(tempDir, 'ktx.yaml'), [ @@ -1682,122 +1650,22 @@ describe('setup status', () => { 'utf-8', ); await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-active', + runId: 'setup-context-local-stale', status: 'running', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: '2026-05-09T10:00:00.000Z', + startedAt: '2026-05-09T09:00:00.000Z', + updatedAt: '2026-05-09T09:00:00.000Z', primarySourceConnectionIds: ['warehouse'], contextSourceConnectionIds: [], reportIds: [], artifactPaths: [], retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-active'), - }); - const context = vi.fn(async () => ({ - status: 'detached' as const, - projectDir: tempDir, - runId: 'setup-context-local-active', - })); - const databases = vi.fn(async () => { - throw new Error('database setup should not run while context build is active'); + commands: contextBuildCommands(tempDir, 'setup-context-local-stale'), }); - await expect( - runKtxSetup( - { - command: 'run', - projectDir: tempDir, - mode: 'existing', - agents: false, - inputMode: 'auto', - yes: false, - cliVersion: '0.2.0', - skipLlm: false, - skipEmbeddings: false, - skipDatabases: false, - skipSources: false, - skipAgents: false, - databaseSchemas: [], - }, - io.io, - { context, databases }, - ), - ).resolves.toBe(0); - - expect(context).toHaveBeenCalledWith( - { projectDir: tempDir, inputMode: 'auto', allowEmpty: true }, - io.io, - ); - expect(databases).not.toHaveBeenCalled(); - }); - - it('skips entry menu and auto-watches when context build is active and showEntryMenu is true', async () => { - const io = makeIo(); - await writeFile( - join(tempDir, 'ktx.yaml'), - [ - 'project: revenue', - 'setup:', - ' database_connection_ids:', - ' - warehouse', - 'connections:', - ' warehouse:', - ' driver: postgres', - ' url: env:DATABASE_URL', - '', - ].join('\n'), - 'utf-8', - ); - await writeKtxSetupContextState(tempDir, { - runId: 'setup-context-local-active', - status: 'detached', - startedAt: '2026-05-09T10:00:00.000Z', - updatedAt: '2026-05-09T10:00:00.000Z', - primarySourceConnectionIds: ['warehouse'], - contextSourceConnectionIds: [], - reportIds: [], - artifactPaths: [], - retryableFailedTargets: [], - commands: contextBuildCommands(tempDir, 'setup-context-local-active'), - }); - const context = vi.fn(async () => ({ - status: 'detached' as const, - projectDir: tempDir, - runId: 'setup-context-local-active', - })); - const entryMenuSelect = vi.fn(async () => 'exit'); - - await expect( - runKtxSetup( - { - command: 'run', - projectDir: tempDir, - mode: 'existing', - agents: false, - inputMode: 'auto', - yes: false, - cliVersion: '0.2.0', - skipLlm: false, - skipEmbeddings: false, - skipDatabases: false, - skipSources: false, - skipAgents: false, - databaseSchemas: [], - showEntryMenu: true, - }, - io.io, - { - context, - entryMenuDeps: { prompts: { select: entryMenuSelect, cancel: vi.fn() } }, - }, - ), - ).resolves.toBe(0); - - expect(entryMenuSelect).not.toHaveBeenCalled(); - expect(context).toHaveBeenCalledWith( - { projectDir: tempDir, inputMode: 'auto', allowEmpty: true, autoWatch: true }, - io.io, - ); + const status = await readKtxSetupStatus(tempDir); + expect(status.context.status).toBe('stale'); + const state = await readKtxSetupContextState(tempDir); + expect(state.status).toBe('stale'); }); it('routes a ready project menu selection to agent setup', async () => { diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index ee7d227e..d89a4eec 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -90,12 +90,12 @@ export type KtxSetupArgs = databaseConnectionId?: string; databaseUrl?: string; databaseSchemas: string[]; - enableHistoricSql?: boolean; - disableHistoricSql?: boolean; - historicSqlWindowDays?: number; - historicSqlMinExecutions?: number; - historicSqlServiceAccountPatterns?: string[]; - historicSqlRedactionPatterns?: string[]; + enableQueryHistory?: boolean; + disableQueryHistory?: boolean; + queryHistoryWindowDays?: number; + queryHistoryMinExecutions?: number; + queryHistoryServiceAccountPatterns?: string[]; + queryHistoryRedactionPatterns?: string[]; skipDatabases: boolean; source?: KtxSetupSourceType; sourceConnectionId?: string; @@ -371,16 +371,13 @@ export function formatKtxSetupStatus(status: KtxSetupStatus): string { `Embeddings ready: ${formatReady(status.embeddings.ready)}${ status.embeddings.model ? ` (${status.embeddings.model})` : '' }`, - `Primary sources configured: ${formatConnectionList(status.databases.map((database) => database.connectionId))}`, + `Databases configured: ${formatConnectionList(status.databases.map((database) => database.connectionId))}`, `Context sources configured: ${formatConnectionList(status.sources.map((source) => source.connectionId))}`, `KTX context built: ${formatContextBuilt(status.context)}`, `Agent integration ready: ${formatReady(status.agents.some((agent) => agent.ready))}${ status.agents.length > 0 ? ` (${status.agents.map((agent) => `${agent.target}:${agent.scope}`).join(', ')})` : '' }`, ]; - if (!status.context.ready && status.context.watchCommand && status.context.status === 'running') { - lines.push(`Resume: ${status.context.watchCommand}`); - } if (!status.context.ready && status.context.status === 'failed' && status.context.detail) { lines.push(`Retry: ${status.context.retryCommand ?? `ktx setup --project-dir ${status.project.path}`}`); } @@ -412,7 +409,7 @@ function setupContextReady(status: KtxSetupStatus): boolean { } function setupContextActive(status: KtxSetupStatus): boolean { - return status.context.status === 'running' || status.context.status === 'detached'; + return status.context.status === 'running'; } function writeContextNotReadyForAgents(projectDir: string, io: KtxCliIo): void { @@ -627,17 +624,17 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup ...(args.databaseConnectionId ? { databaseConnectionId: args.databaseConnectionId } : {}), ...(args.databaseUrl ? { databaseUrl: args.databaseUrl } : {}), databaseSchemas: args.databaseSchemas, - ...(args.enableHistoricSql !== undefined ? { enableHistoricSql: args.enableHistoricSql } : {}), - ...(args.disableHistoricSql !== undefined ? { disableHistoricSql: args.disableHistoricSql } : {}), - ...(args.historicSqlWindowDays !== undefined ? { historicSqlWindowDays: args.historicSqlWindowDays } : {}), - ...(args.historicSqlMinExecutions !== undefined - ? { historicSqlMinExecutions: args.historicSqlMinExecutions } + ...(args.enableQueryHistory !== undefined ? { enableQueryHistory: args.enableQueryHistory } : {}), + ...(args.disableQueryHistory !== undefined ? { disableQueryHistory: args.disableQueryHistory } : {}), + ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), + ...(args.queryHistoryMinExecutions !== undefined + ? { queryHistoryMinExecutions: args.queryHistoryMinExecutions } : {}), - ...(args.historicSqlServiceAccountPatterns - ? { historicSqlServiceAccountPatterns: args.historicSqlServiceAccountPatterns } + ...(args.queryHistoryServiceAccountPatterns + ? { queryHistoryServiceAccountPatterns: args.queryHistoryServiceAccountPatterns } : {}), - ...(args.historicSqlRedactionPatterns - ? { historicSqlRedactionPatterns: args.historicSqlRedactionPatterns } + ...(args.queryHistoryRedactionPatterns + ? { queryHistoryRedactionPatterns: args.queryHistoryRedactionPatterns } : {}), skipDatabases: args.skipDatabases || !shouldRunDatabases, }, @@ -683,6 +680,8 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup inputMode: args.inputMode, forcePrompt: forcePromptSteps.has('context') || runOnly === 'context', allowEmpty: true, + cliVersion: args.cliVersion, + runtimeInstallPolicy: setupRuntimeInstallPolicy(args), }, io, ); diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts index 6864b13d..537f373c 100644 --- a/packages/cli/src/standalone-smoke.test.ts +++ b/packages/cli/src/standalone-smoke.test.ts @@ -26,12 +26,13 @@ function isExecFailure(error: unknown): error is ExecFailure { return error instanceof Error && ('stdout' in error || 'stderr' in error || 'code' in error); } -async function runBuiltCli(args: string[], options: { env?: NodeJS.ProcessEnv } = {}): Promise { +async function runBuiltCli(args: string[], options: { cwd?: string; env?: NodeJS.ProcessEnv } = {}): Promise { try { const result = await execFileAsync(process.execPath, [CLI_BIN, ...args], { + ...(options.cwd ? { cwd: options.cwd } : {}), encoding: 'utf8', timeout: 20_000, - ...(options.env ? { env: options.env } : {}), + env: options.env ?? process.env, }); return { code: 0, @@ -50,28 +51,6 @@ async function runBuiltCli(args: string[], options: { env?: NodeJS.ProcessEnv } } } -async function writeWarehouseConfig(projectDir: string): Promise { - await writeFile( - join(projectDir, 'ktx.yaml'), - [ - 'project: warehouse', - 'connections:', - ' warehouse:', - ' driver: postgres', - 'ingest:', - ' adapters:', - ' - fake', - '', - ].join('\n'), - 'utf-8', - ); -} - -async function writeSourceFixture(sourceDir: string): Promise { - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); -} - function createSqliteWarehouse(dbPath: string): void { const db = new Database(dbPath); try { @@ -160,33 +139,23 @@ describe('standalone built ktx CLI smoke', () => { await rm(tempDir, { recursive: true, force: true }); }); - it('reports missing local ingest LLM config through the built binary', async () => { + it('rejects old low-level ingest flags through the built binary', async () => { const projectDir = join(tempDir, 'project'); - const sourceDir = join(tempDir, 'source'); const init = await runSetupNewProject(projectDir); expectSetupStderr(init); expect(init.stdout).toContain(`Project: ${projectDir}`); - await writeWarehouseConfig(projectDir); - await writeSourceFixture(sourceDir); - const run = await runBuiltCli([ 'ingest', 'run', - '--project-dir', - projectDir, '--connection-id', 'warehouse', '--adapter', 'fake', - '--source-dir', - sourceDir, ]); expect(run).toMatchObject({ code: 1, stdout: '' }); - expect(run.stderr).toContain( - 'ktx ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', - ); + expect(run.stderr).toContain("unknown option '--connection-id'"); }); it('rejects the removed agent command through the built binary', async () => { @@ -202,7 +171,10 @@ describe('standalone built ktx CLI smoke', () => { expect(result.stdout).toMatch(/KTX status/); if (result.stdout.includes('No project here yet.')) { - expect(result.stdout).toContain('Before you can run ktx setup'); + expect(result.stdout).toContain('ktx setup'); + } else { + expect(result.stdout).toContain('Node 22+'); + expect(result.stdout).toContain('Workspace-local CLI'); } expect(result.stdout).toContain('Node 22+'); expect(result.stdout).toContain('Workspace-local CLI'); @@ -210,8 +182,8 @@ describe('standalone built ktx CLI smoke', () => { expect([0, 1]).toContain(result.code); }); - it('runs structural and enriched scans through the built binary with manifest artifacts', async () => { - const projectDir = join(tempDir, 'scan-project'); + it('runs fast public database ingest through the built binary with manifest artifacts', async () => { + const projectDir = join(tempDir, 'database-ingest-project'); const init = await runSetupNewProject(projectDir); expectSetupStderr(init); @@ -225,43 +197,19 @@ describe('standalone built ktx CLI smoke', () => { expect(connectionTest.stdout).toContain('Driver: sqlite'); expect(connectionTest.stdout).toContain('Tables: 2'); - const structural = await runBuiltCli(['scan', 'warehouse', '--project-dir', projectDir]); - expectProjectStderr(structural, projectDir); - expect(structural.stdout).toContain('Status: done'); - expect(structural.stdout).toContain('Mode: structural'); - expect(structural.stdout).toContain('Schema shards: 1'); + const ingest = await runBuiltCli(['ingest', 'warehouse', '--project-dir', projectDir, '--fast', '--no-input']); + expectProjectStderr(ingest, projectDir); + expect(ingest.stdout).toContain('Ingest finished'); + expect(ingest.stdout).toContain('warehouse'); + expect(ingest.stdout).toContain('Database schema'); + expect(ingest.stdout).toContain('warehouse done'); + expect(ingest.stdout).not.toContain('KTX scan completed'); - const structuralManifest = await readFile( - join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), - 'utf-8', - ); - expect(structuralManifest).toContain('customers:'); - expect(structuralManifest).toContain('orders:'); - expect(structuralManifest).toContain('source: formal'); - expect(structuralManifest).not.toContain('ai:'); - - const providerlessEnriched = await runBuiltCli([ - 'scan', - 'warehouse', - '--project-dir', - projectDir, - '--mode', - 'enriched', - ]); - expectProjectStderr(providerlessEnriched, projectDir); - expect(providerlessEnriched.stdout).toContain('Mode: enriched'); - expect(providerlessEnriched.stdout).toContain('Relationships'); - expect(providerlessEnriched.stdout).toContain('Accepted: 1'); - expect(providerlessEnriched.stdout).toContain('scan_enrichment_backend_not_configured'); - expect(providerlessEnriched.stdout).toContain('Enrichment artifacts: 3'); - await writeSqliteScanConfig(projectDir, dbPath, true); - const enriched = await runBuiltCli(['scan', 'warehouse', '--project-dir', projectDir, '--mode', 'enriched']); - expectProjectStderr(enriched, projectDir); - expect(enriched.stdout).toContain('Mode: enriched'); - expect(enriched.stdout).toContain('Enrichment artifacts:'); - - const enrichedManifest = await readFile(join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'); - expect(enrichedManifest).toContain('Deterministic description'); + const manifest = await readFile(join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'); + expect(manifest).toContain('customers:'); + expect(manifest).toContain('orders:'); + expect(manifest).toContain('source: formal'); + expect(manifest).not.toContain('ai:'); }, 30_000); it('parses gateway LLM config and OpenAI enrichment embeddings used by standalone scans without network calls', async () => { diff --git a/packages/cli/src/status-project.ts b/packages/cli/src/status-project.ts index 59afa811..08686355 100644 --- a/packages/cli/src/status-project.ts +++ b/packages/cli/src/status-project.ts @@ -5,6 +5,7 @@ import type { KtxProjectEmbeddingConfig, KtxProjectLlmConfig, } from '@ktx/context/project'; +import type { PostgresPgssProbeResult } from '@ktx/context/ingest'; import type { DoctorCheck } from './doctor.js'; type ProjectStatusLevel = 'ok' | 'warn' | 'fail'; @@ -32,6 +33,11 @@ interface ConnectionStatus extends ProjectStatusLine { driver: string; } +interface QueryHistoryStatus extends ProjectStatusLine { + connection: string; + dialect: 'postgres'; +} + interface PipelineStatus { adapters: string[]; enrichmentMode: string; @@ -70,6 +76,7 @@ export interface ProjectStatus { embeddings: EmbeddingsStatus; storage: StorageStatus; connections: ConnectionStatus[]; + queryHistory: QueryHistoryStatus[]; pipeline: PipelineStatus; warnings: WarningItem[]; verdict: ProjectVerdict; @@ -294,6 +301,144 @@ function buildConnectionStatus( } } +interface PostgresQueryHistoryProbeInput { + projectDir: string; + connectionId: string; + connection: KtxProjectConnectionConfig; + env: NodeJS.ProcessEnv; +} + +type PostgresQueryHistoryProbe = ( + input: PostgresQueryHistoryProbeInput, +) => Promise; + +function recordValue(value: unknown): Record | null { + return value && typeof value === 'object' && !Array.isArray(value) ? (value as Record) : null; +} + +function queryHistoryRecord(connection: KtxProjectConnectionConfig): Record | null { + const context = recordValue(connection.context); + return recordValue(context?.queryHistory); +} + +function legacyHistoricSqlRecord(connection: KtxProjectConnectionConfig): Record | null { + return recordValue(connection.historicSql); +} + +function isEnabledPostgresQueryHistory(connection: KtxProjectConnectionConfig): boolean { + const queryHistory = queryHistoryRecord(connection); + if (queryHistory) { + return queryHistory.enabled === true; + } + const legacy = legacyHistoricSqlRecord(connection); + return legacy?.enabled === true && legacy.dialect === 'postgres'; +} + +function isPostgresDriver(connection: KtxProjectConnectionConfig): boolean { + const driver = String(connection.driver ?? '').toLowerCase(); + return driver === 'postgres' || driver === 'postgresql'; +} + +function queryHistoryFailureFix(error: unknown, connectionId: string, projectDir: string): string { + if (error instanceof Error && error.name === 'HistoricSqlExtensionMissingError' && 'remediation' in error) { + return String(error.remediation); + } + if (error instanceof Error && error.name === 'HistoricSqlGrantsMissingError' && 'remediation' in error) { + return String(error.remediation); + } + if (error instanceof Error && error.name === 'HistoricSqlVersionUnsupportedError') { + return 'Use PostgreSQL 14 or newer, or disable query history for this connection'; + } + return `Fix connections.${connectionId} Postgres settings, then rerun \`ktx status --project-dir ${projectDir}\``; +} + +function failureDetail(error: unknown): string { + if (error instanceof Error && error.message.trim().length > 0) { + return error.message.trim().split('\n')[0] ?? error.message.trim(); + } + return String(error); +} + +function readinessDetail(result: PostgresPgssProbeResult): string { + const warningText = result.warnings.length > 0 ? ` with warnings: ${result.warnings.join('; ')}` : ''; + const info = result.info ?? []; + const infoText = info.length > 0 ? `; info: ${info.join('; ')}` : ''; + return `pg_stat_statements ready (${result.pgServerVersion})${warningText}${infoText}`; +} + +async function defaultPostgresQueryHistoryProbe( + input: PostgresQueryHistoryProbeInput, +): Promise { + const [{ PostgresPgssReader }, { KtxPostgresHistoricSqlQueryClient, isKtxPostgresConnectionConfig }] = + await Promise.all([import('@ktx/context/ingest'), import('@ktx/connector-postgres')]); + + const inputDriver = input.connection.driver ?? 'unknown'; + if (!isKtxPostgresConnectionConfig(input.connection)) { + throw new Error(`Native PostgreSQL connector cannot run driver "${inputDriver}"`); + } + + const client = new KtxPostgresHistoricSqlQueryClient({ + connectionId: input.connectionId, + connection: input.connection, + env: input.env, + }); + try { + return await new PostgresPgssReader().probe(client); + } finally { + await client.cleanup(); + } +} + +async function buildQueryHistoryStatus( + project: KtxLocalProject, + options: BuildProjectStatusOptions, +): Promise { + const targets = Object.entries(project.config.connections) + .filter(([, connection]) => isEnabledPostgresQueryHistory(connection)) + .sort(([left], [right]) => left.localeCompare(right)); + + const probe = options.postgresQueryHistoryProbe ?? defaultPostgresQueryHistoryProbe; + const env = options.env ?? process.env; + const statuses: QueryHistoryStatus[] = []; + for (const [connectionId, connection] of targets) { + if (!isPostgresDriver(connection)) { + statuses.push({ + connection: connectionId, + dialect: 'postgres', + status: 'fail', + detail: `connections.${connectionId}.context.queryHistory is enabled but driver is ${String(connection.driver)}`, + fix: `Set connections.${connectionId}.driver to postgres or disable query history for this connection`, + }); + continue; + } + + try { + const result = await probe({ projectDir: project.projectDir, connectionId, connection, env }); + statuses.push({ + connection: connectionId, + dialect: 'postgres', + status: result.warnings.length > 0 ? 'warn' : 'ok', + detail: readinessDetail(result), + ...(result.warnings.length > 0 + ? { + fix: `Update the Postgres parameter group or config, then rerun \`ktx status --project-dir ${project.projectDir}\``, + } + : {}), + }); + } catch (error) { + statuses.push({ + connection: connectionId, + dialect: 'postgres', + status: 'fail', + detail: failureDetail(error), + fix: queryHistoryFailureFix(error, connectionId, project.projectDir), + }); + } + } + + return statuses; +} + const ADAPTER_DRIVER_REQUIREMENT: Record = { 'live-database': ['postgres', 'postgresql', 'mysql', 'snowflake', 'bigquery', 'clickhouse', 'sqlite', 'sqlserver'], dbt: ['dbt', 'dbt-core', 'dbt-cloud'], @@ -411,6 +556,7 @@ function buildVerdict( llm: LlmStatus, embeddings: EmbeddingsStatus, connections: ConnectionStatus[], + queryHistory: QueryHistoryStatus[], warnings: WarningItem[], ): { verdict: ProjectVerdict; reason: string; nextActions: string[] } { if (llm.status === 'fail') { @@ -420,6 +566,14 @@ function buildVerdict( nextActions: ['ktx setup'], }; } + const failedQueryHistory = queryHistory.filter((entry) => entry.status === 'fail').length; + if (failedQueryHistory > 0) { + return { + verdict: 'blocked', + reason: `Query history readiness failed for ${failedQueryHistory} connection${failedQueryHistory === 1 ? '' : 's'}.`, + nextActions: ['ktx status --verbose'], + }; + } const reasons: string[] = []; if (llm.status === 'warn') reasons.push('LLM credentials missing'); @@ -432,6 +586,10 @@ function buildVerdict( } const missing = connections.filter((c) => c.status !== 'ok').length; if (missing > 0) reasons.push(`${missing} connection${missing === 1 ? '' : 's'} need configuration`); + const queryHistoryWarnings = queryHistory.filter((entry) => entry.status === 'warn').length; + if (queryHistoryWarnings > 0) { + reasons.push(`${queryHistoryWarnings} query history warning${queryHistoryWarnings === 1 ? '' : 's'}`); + } if (warnings.length > 0) reasons.push(`${warnings.length} config warning${warnings.length === 1 ? '' : 's'}`); if (reasons.length === 0) { @@ -451,9 +609,10 @@ function buildVerdict( export interface BuildProjectStatusOptions { env?: NodeJS.ProcessEnv; + postgresQueryHistoryProbe?: PostgresQueryHistoryProbe; } -export function buildProjectStatus(project: KtxLocalProject, options: BuildProjectStatusOptions = {}): ProjectStatus { +export async function buildProjectStatus(project: KtxLocalProject, options: BuildProjectStatusOptions = {}): Promise { const env = options.env ?? process.env; const config = project.config; @@ -463,9 +622,10 @@ export function buildProjectStatus(project: KtxLocalProject, options: BuildProje const connections = Object.entries(config.connections).map(([name, conn]) => buildConnectionStatus(name, conn, env), ); + const queryHistory = await buildQueryHistoryStatus(project, options); const pipeline = buildPipelineStatus(config); const warnings = buildWarnings(config, connections, llm, embeddings); - const { verdict, reason, nextActions } = buildVerdict(llm, embeddings, connections, warnings); + const { verdict, reason, nextActions } = buildVerdict(llm, embeddings, connections, queryHistory, warnings); return { projectName: config.project, @@ -474,6 +634,7 @@ export function buildProjectStatus(project: KtxLocalProject, options: BuildProje embeddings, storage, connections, + queryHistory, pipeline, warnings, verdict, @@ -580,6 +741,21 @@ export function renderProjectStatus(status: ProjectStatus, options: RenderProjec } lines.push(''); + if (status.queryHistory.length > 0) { + lines.push(` ${bold('Query history')}`); + const connectionWidth = Math.max(...status.queryHistory.map((entry) => entry.connection.length)); + for (const entry of status.queryHistory) { + lines.push( + ` ${sym(entry.status)} ${entry.connection.padEnd(connectionWidth)} ${dim(entry.dialect)} ${entry.detail}`, + ); + if (entry.fix && entry.status !== 'ok') { + const indent = 6 + connectionWidth + 3 + entry.dialect.length + 3; + lines.push(`${' '.repeat(indent)}${dim(`→ ${entry.fix}`)}`); + } + } + lines.push(''); + } + // Pipeline lines.push(` ${bold('Pipeline')}`); const pipelineLabelWidth = Math.max('Adapters'.length, 'Enrichment'.length, 'Research agent'.length); diff --git a/packages/context/src/agent/agent-runner.service.test.ts b/packages/context/src/agent/agent-runner.service.test.ts index 8f405841..3208bda7 100644 --- a/packages/context/src/agent/agent-runner.service.test.ts +++ b/packages/context/src/agent/agent-runner.service.test.ts @@ -52,11 +52,8 @@ describe('AgentRunnerService.runLoop', () => { telemetryTags: { source: 'test' }, }); const call = (generateText as any).mock.calls[0][0]; - expect(call.messages).toEqual([ - { role: 'system', content: 'SYS' }, - { role: 'user', content: 'USR' }, - ]); - expect(call.system).toBeUndefined(); + expect(call.system).toEqual({ role: 'system', content: 'SYS' }); + expect(call.messages).toEqual([{ role: 'user', content: 'USR' }]); expect(call.prompt).toBeUndefined(); expect(call.tools).toEqual(tools); expect(call.stopWhen).toBe(17); @@ -81,10 +78,8 @@ describe('AgentRunnerService.runLoop', () => { expect(llmProvider.getModel).toHaveBeenCalledWith('candidateExtraction'); expect(generateText).toHaveBeenCalledWith( expect.objectContaining({ - messages: [ - { role: 'system', content: 'system' }, - { role: 'user', content: 'user' }, - ], + system: { role: 'system', content: 'system' }, + messages: [{ role: 'user', content: 'user' }], }), ); }); diff --git a/packages/context/src/agent/agent-runner.service.ts b/packages/context/src/agent/agent-runner.service.ts index 92daad8f..11a0715c 100644 --- a/packages/context/src/agent/agent-runner.service.ts +++ b/packages/context/src/agent/agent-runner.service.ts @@ -36,6 +36,14 @@ export interface AgentRunnerServiceDeps { logger?: KtxLogger; } +function splitSystemPromptMessages(messages: ReturnType['messages']) { + const systemMessages = messages.filter((message) => message.role === 'system'); + return { + system: systemMessages.length === 0 ? undefined : systemMessages.length === 1 ? systemMessages[0] : systemMessages, + messages: messages.filter((message) => message.role !== 'system'), + }; +} + export class AgentRunnerService { private readonly logger: KtxLogger; @@ -54,6 +62,7 @@ export class AgentRunnerService { tools: params.toolSet, model, }); + const promptMessages = splitSystemPromptMessages(built.messages); await this.deps.debugRequestRecorder?.record( summarizeKtxLlmDebugRequest({ @@ -76,7 +85,8 @@ export class AgentRunnerService { experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({ source: params.telemetryTags.operationName ?? 'ktx-agent-runner', }), - messages: built.messages, + ...(promptMessages.system ? { system: promptMessages.system } : {}), + messages: promptMessages.messages, tools: built.tools as Record, onStepFinish: async () => { stepIndex += 1; diff --git a/packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.test.ts b/packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.test.ts index 06e5702c..b9ee73b3 100644 --- a/packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/bigquery-query-history-reader.test.ts @@ -103,7 +103,7 @@ describe('BigQueryHistoricSqlQueryHistoryReader', () => { for await (const row of reader.fetchAggregated( client, { start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') }, - { dialect: 'bigquery', minExecutions: 5, windowDays: 90, filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 }, + { dialect: 'bigquery', minExecutions: 5, windowDays: 90, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 }, )) { rows.push(row); } @@ -136,6 +136,7 @@ describe('BigQueryHistoricSqlQueryHistoryReader', () => { dialect: 'bigquery', minExecutions: 5, windowDays: 90, + enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90, diff --git a/packages/context/src/ingest/adapters/historic-sql/postgres-pgss-reader.test.ts b/packages/context/src/ingest/adapters/historic-sql/postgres-pgss-reader.test.ts index 5e4744bb..a91171cd 100644 --- a/packages/context/src/ingest/adapters/historic-sql/postgres-pgss-reader.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/postgres-pgss-reader.test.ts @@ -215,7 +215,7 @@ describe('PostgresPgssReader aggregate path', () => { for await (const row of reader.fetchAggregated( { executeQuery }, { start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') }, - { dialect: 'postgres', minExecutions: 5, filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 }, + { dialect: 'postgres', minExecutions: 5, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 }, )) { rows.push(row); } diff --git a/packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.test.ts b/packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.test.ts index 92b05132..b33183d7 100644 --- a/packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/snowflake-query-history-reader.test.ts @@ -102,7 +102,7 @@ describe('SnowflakeHistoricSqlQueryHistoryReader', () => { for await (const row of reader.fetchAggregated( client, { start: new Date('2026-02-10T00:00:00.000Z'), end: new Date('2026-05-11T00:00:00.000Z') }, - { dialect: 'snowflake', minExecutions: 5, windowDays: 90, filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 }, + { dialect: 'snowflake', minExecutions: 5, windowDays: 90, enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90 }, )) { rows.push(row); } @@ -135,6 +135,7 @@ describe('SnowflakeHistoricSqlQueryHistoryReader', () => { dialect: 'snowflake', minExecutions: 5, windowDays: 90, + enabledTables: [], filters: { dropTrivialProbes: true }, redactionPatterns: [], staleArchiveAfterDays: 90, diff --git a/packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts b/packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts index d1610054..b2af032f 100644 --- a/packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/stage-unified.test.ts @@ -237,6 +237,80 @@ describe('stageHistoricSqlAggregatedSnapshot', () => { expect(patternsJson).toContain('[REDACTED]'); }); + it('limits staged table artifacts to configured enabled tables', async () => { + const stagedDir = await tempDir(); + const reader: HistoricSqlReader = { + async probe() { + return { warnings: [], info: [] }; + }, + async *fetchAggregated() { + yield aggregate({ + templateId: 'selected-qualified', + canonicalSql: 'select count(*) from orbit_analytics.int_active_contract_arr', + }); + yield aggregate({ + templateId: 'selected-unqualified', + canonicalSql: 'select count(*) from int_customer_health_signals', + }); + yield aggregate({ + templateId: 'unselected', + canonicalSql: 'select count(*) from orbit_raw.accounts', + }); + }, + }; + const sqlAnalysis: SqlAnalysisPort = { + analyzeForFingerprint: vi.fn(), + analyzeBatch: vi.fn(async () => new Map([ + [ + 'selected-qualified', + { + tablesTouched: ['orbit_analytics.int_active_contract_arr'], + columnsByClause: { select: [], where: [], join: [], groupBy: [] }, + }, + ], + [ + 'selected-unqualified', + { + tablesTouched: ['int_customer_health_signals'], + columnsByClause: { select: [], where: [], join: [], groupBy: [] }, + }, + ], + [ + 'unselected', + { + tablesTouched: ['orbit_raw.accounts'], + columnsByClause: { select: [], where: [], join: [], groupBy: [] }, + }, + ], + ])), + }; + + await stageHistoricSqlAggregatedSnapshot({ + stagedDir, + connectionId: 'warehouse', + queryClient: {}, + reader, + sqlAnalysis, + pullConfig: { + dialect: 'postgres', + enabledTables: [ + 'orbit_analytics.int_active_contract_arr', + 'orbit_analytics.int_customer_health_signals', + ], + }, + now: new Date('2026-05-11T12:00:00.000Z'), + }); + + expect(await readdir(join(stagedDir, 'tables'))).toEqual([ + 'int_customer_health_signals.json', + 'orbit_analytics.int_active_contract_arr.json', + ]); + const manifest = await readJson>(stagedDir, 'manifest.json'); + expect(manifest.touchedTableCount).toBe(2); + const patterns = await readJson>(stagedDir, 'patterns-input.json'); + expect(patterns.templates.map((entry: any) => entry.id)).toEqual(['selected-qualified', 'selected-unqualified']); + }); + it('preserves full patterns audit input and writes bounded cross-table pattern shards', async () => { const stagedDir = await tempDir(); const largeSql = `select * from public.orders o join public.customers c on c.id = o.customer_id where payload = '${'x'.repeat(8000)}'`; diff --git a/packages/context/src/ingest/adapters/historic-sql/stage-unified.ts b/packages/context/src/ingest/adapters/historic-sql/stage-unified.ts index c2975042..18675b11 100644 --- a/packages/context/src/ingest/adapters/historic-sql/stage-unified.ts +++ b/packages/context/src/ingest/adapters/historic-sql/stage-unified.ts @@ -39,9 +39,15 @@ interface StageHistoricSqlAggregatedSnapshotInput { interface ParsedTemplate { template: AggregatedTemplate; tablesTouched: string[]; + includedTables: string[]; columnsByClause: Record; } +interface EnabledTableFilter { + exact: Set; + uniqueUnqualified: Set; +} + interface TableAccumulator { table: string; executions: number; @@ -103,6 +109,45 @@ function shouldDropTemplate(template: AggregatedTemplate, config: HistoricSqlUni return false; } +function normalizeTableIdentifier(value: string): string { + return value.trim().toLowerCase(); +} + +function unqualifiedTableIdentifier(value: string): string { + const parts = normalizeTableIdentifier(value).split('.').filter(Boolean); + return parts.at(-1) ?? ''; +} + +function buildEnabledTableFilter(enabledTables: string[]): EnabledTableFilter | null { + if (enabledTables.length === 0) { + return null; + } + const exact = new Set(enabledTables.map(normalizeTableIdentifier).filter((value) => value.length > 0)); + const unqualifiedCounts = new Map(); + for (const table of exact) { + const unqualified = unqualifiedTableIdentifier(table); + if (unqualified.length > 0) { + unqualifiedCounts.set(unqualified, (unqualifiedCounts.get(unqualified) ?? 0) + 1); + } + } + return { + exact, + uniqueUnqualified: new Set( + [...unqualifiedCounts.entries()] + .filter(([, count]) => count === 1) + .map(([table]) => table), + ), + }; +} + +function isEnabledTable(table: string, filter: EnabledTableFilter | null): boolean { + if (!filter) { + return true; + } + const normalized = normalizeTableIdentifier(table); + return filter.exact.has(normalized) || filter.uniqueUnqualified.has(unqualifiedTableIdentifier(normalized)); +} + function historicSqlWindowDays(config: HistoricSqlUnifiedPullConfig): number { return 'windowDays' in config ? config.windowDays : 90; } @@ -235,6 +280,7 @@ function toPatternsInput(parsedTemplates: ParsedTemplate[]): StagedPatternsInput export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSqlAggregatedSnapshotInput): Promise { const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig); + const enabledTableFilter = buildEnabledTableFilter(config.enabledTables); const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns); const now = input.now ?? new Date(); const windowStart = new Date(now.getTime() - historicSqlWindowDays(config) * 24 * 60 * 60 * 1000); @@ -263,12 +309,14 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql continue; } const tablesTouched = [...new Set(parsed.tablesTouched)].filter((table) => table.length > 0).sort(); - if (tablesTouched.length === 0) { + const includedTables = tablesTouched.filter((table) => isEnabledTable(table, enabledTableFilter)); + if (includedTables.length === 0) { continue; } parsedTemplates.push({ template: redactTemplateSql(template, redactors), tablesTouched, + includedTables, columnsByClause: Object.fromEntries( Object.entries(parsed.columnsByClause).map(([clause, columns]) => [clause, [...new Set(columns)].sort()]), ), @@ -277,7 +325,7 @@ export async function stageHistoricSqlAggregatedSnapshot(input: StageHistoricSql const byTable = new Map(); for (const parsed of parsedTemplates) { - for (const table of parsed.tablesTouched) { + for (const table of parsed.includedTables) { const acc = byTable.get(table) ?? accumulatorFor(table); addTemplate(acc, parsed); byTable.set(table, acc); diff --git a/packages/context/src/ingest/adapters/historic-sql/types.ts b/packages/context/src/ingest/adapters/historic-sql/types.ts index 353baaf7..ddb1846a 100644 --- a/packages/context/src/ingest/adapters/historic-sql/types.ts +++ b/packages/context/src/ingest/adapters/historic-sql/types.ts @@ -10,6 +10,7 @@ const filterModeSchema = z.enum(['exclude', 'include', 'mark-only']); const historicSqlCommonPullConfigSchema = z.object({ minExecutions: z.number().int().nonnegative().default(5), + enabledTables: z.array(z.string().min(1)).default([]), filters: z.object({ serviceAccounts: z.object({ patterns: z.array(z.string()).default([]), diff --git a/packages/context/src/ingest/local-adapters.test.ts b/packages/context/src/ingest/local-adapters.test.ts index d8575ae8..e8cbf5a5 100644 --- a/packages/context/src/ingest/local-adapters.test.ts +++ b/packages/context/src/ingest/local-adapters.test.ts @@ -194,6 +194,7 @@ describe('local ingest adapters', () => { await expect(localPullConfigForAdapter(postgresProject, historicSql!, 'warehouse')).resolves.toEqual({ dialect: 'postgres', minExecutions: 7, + enabledTables: [], filters: { serviceAccounts: { patterns: ['^svc_'], mode: 'exclude' }, dropTrivialProbes: true, @@ -203,6 +204,45 @@ describe('local ingest adapters', () => { }); }); + it('maps connection context.queryHistory to historic-sql pull config', async () => { + const project = projectWithConnections({ + warehouse: { + driver: 'postgres', + context: { + queryHistory: { + enabled: true, + windowDays: 45, + minExecutions: 7, + filters: { dropTrivialProbes: true }, + }, + }, + }, + }); + const adapter = { source: 'historic-sql' } as never; + + await expect(localPullConfigForAdapter(project, adapter, 'warehouse')).resolves.toMatchObject({ + dialect: 'postgres', + minExecutions: 7, + filters: { dropTrivialProbes: true }, + }); + }); + + it('prefers context.queryHistory over legacy historicSql', async () => { + const project = projectWithConnections({ + warehouse: { + driver: 'postgres', + historicSql: { enabled: true, dialect: 'postgres', windowDays: 90 }, + context: { queryHistory: { enabled: true, windowDays: 30 } }, + }, + }); + const adapter = { source: 'historic-sql' } as never; + + await expect(localPullConfigForAdapter(project, adapter, 'warehouse')).resolves.toMatchObject({ + dialect: 'postgres', + minExecutions: 5, + }); + }); + it('rejects local historic-sql pulls when the connection has not enabled historic SQL', async () => { const historicSql = createDefaultLocalIngestAdapters(project, { historicSql: { @@ -234,7 +274,7 @@ describe('local ingest adapters', () => { }); await expect(localPullConfigForAdapter(postgresProject, historicSql!, 'warehouse')).rejects.toThrow( - 'Connection "warehouse" does not have historicSql.enabled: true', + 'Connection "warehouse" does not have context.queryHistory.enabled: true', ); }); diff --git a/packages/context/src/ingest/local-adapters.ts b/packages/context/src/ingest/local-adapters.ts index 1012ad01..2b13327a 100644 --- a/packages/context/src/ingest/local-adapters.ts +++ b/packages/context/src/ingest/local-adapters.ts @@ -53,6 +53,7 @@ export interface DefaultLocalIngestAdaptersOptions { postgresQueryClient?: KtxPostgresQueryClient; now?: () => Date; }; + historicSqlPullConfigOverride?: Record; looker?: { daemonBaseUrl?: string; client?: Pick; @@ -163,6 +164,28 @@ function isRecord(value: unknown): value is Record { return typeof value === 'object' && value !== null && !Array.isArray(value); } +const historicSqlDialectByDriver = new Map([ + ['postgres', 'postgres'], + ['postgresql', 'postgres'], + ['bigquery', 'bigquery'], + ['snowflake', 'snowflake'], +]); + +function queryHistoryRecord(connection: unknown): Record | null { + if (!isRecord(connection)) return null; + const context = isRecord(connection.context) ? connection.context : null; + const queryHistory = isRecord(context?.queryHistory) ? context.queryHistory : null; + return queryHistory; +} + +function queryHistoryPullConfig(connection: unknown): Record | null { + const queryHistory = queryHistoryRecord(connection); + if (queryHistory?.enabled !== true || !isRecord(connection)) return null; + const dialect = historicSqlDialectByDriver.get(String(connection.driver ?? '').toLowerCase()); + if (!dialect) return null; + return { ...queryHistory, dialect }; +} + function stringField(value: unknown): string | null { return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null; } @@ -213,14 +236,21 @@ export async function localPullConfigForAdapter( ): Promise { if (adapter.source === 'metabase') { throw new Error( - 'Metabase scheduled pulls fan out by mapping. Call runLocalMetabaseIngest() or use `ktx ingest run --adapter metabase --connection-id ` from the CLI.', + 'Metabase scheduled pulls fan out by mapping. Call runLocalMetabaseIngest() or use `ktx ingest ` from the CLI.', ); } const connection = project.config.connections[connectionId]; if (adapter.source === HISTORIC_SQL_SOURCE_KEY) { + if (options.historicSqlPullConfigOverride) { + return historicSqlUnifiedPullConfigSchema.parse(options.historicSqlPullConfigOverride); + } + const queryHistory = queryHistoryPullConfig(connection); + if (queryHistory) { + return historicSqlUnifiedPullConfigSchema.parse(queryHistory); + } const historicSql = isRecord(connection?.historicSql) ? connection.historicSql : null; if (historicSql?.enabled !== true) { - throw new Error(`Connection "${connectionId}" does not have historicSql.enabled: true`); + throw new Error(`Connection "${connectionId}" does not have context.queryHistory.enabled: true`); } return historicSqlUnifiedPullConfigSchema.parse({ ...historicSql, diff --git a/packages/context/src/ingest/local-bundle-runtime.test.ts b/packages/context/src/ingest/local-bundle-runtime.test.ts index e9be5a14..c6bd0539 100644 --- a/packages/context/src/ingest/local-bundle-runtime.test.ts +++ b/packages/context/src/ingest/local-bundle-runtime.test.ts @@ -56,7 +56,7 @@ describe('createLocalBundleIngestRuntime', () => { }), ).toThrow( [ - 'ktx ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + 'ktx ingest requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', `Configure an Anthropic provider, then rerun ingest:`, ` ktx setup --project-dir ${project.projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, ].join('\n'), diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index 2a3c9943..047b7ee6 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -571,7 +571,7 @@ function nextLocalJobId(): string { function localIngestLlmProviderGuardMessage(projectDir: string): string { return [ - 'ktx ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + 'ktx ingest requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', 'Configure an Anthropic provider, then rerun ingest:', ` ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, ].join('\n'); diff --git a/packages/context/src/project/config.test.ts b/packages/context/src/project/config.test.ts index ee6b8ee9..b81132d9 100644 --- a/packages/context/src/project/config.test.ts +++ b/packages/context/src/project/config.test.ts @@ -2,6 +2,21 @@ import { describe, expect, it } from 'vitest'; import { buildDefaultKtxProjectConfig, parseKtxProjectConfig, serializeKtxProjectConfig } from './config.js'; describe('KTX project config', () => { + it.each(['status', 'replay', 'run', 'watch'])('accepts former ingest subcommand name "%s" as a connection id', (connectionId) => { + expect( + parseKtxProjectConfig(` +project: reserved-test +connections: + ${connectionId}: + driver: postgres +`), + ).toMatchObject({ + connections: { + [connectionId]: { driver: 'postgres' }, + }, + }); + }); + it('builds the default standalone project config', () => { expect(buildDefaultKtxProjectConfig('warehouse')).toEqual({ project: 'warehouse', @@ -21,7 +36,7 @@ describe('KTX project config', () => { models: {}, }, ingest: { - adapters: ['live-database', 'lookml', 'metabase', 'metricflow', 'notion'], + adapters: [], embeddings: { backend: 'deterministic', model: 'deterministic', @@ -67,13 +82,12 @@ describe('KTX project config', () => { const parsed = parseKtxProjectConfig(serialized); expect(serialized).toContain('project: warehouse'); - expect(serialized).toContain('live-database'); - expect(serialized).toContain('notion'); + expect(serialized).not.toContain('live-database'); expect(serialized).toContain( ' embeddings:\n backend: deterministic\n model: deterministic\n dimensions: 8', ); expect(parsed.project).toBe('warehouse'); - expect(parsed.ingest.adapters).toEqual(['live-database', 'lookml', 'metabase', 'metricflow', 'notion']); + expect(parsed.ingest.adapters).toEqual([]); expect(parsed.ingest.embeddings).toEqual({ backend: 'deterministic', model: 'deterministic', diff --git a/packages/context/src/project/config.ts b/packages/context/src/project/config.ts index 0c345473..6cd4d8fe 100644 --- a/packages/context/src/project/config.ts +++ b/packages/context/src/project/config.ts @@ -391,7 +391,7 @@ export function buildDefaultKtxProjectConfig(projectName = 'ktx-project'): KtxPr models: {}, }, ingest: { - adapters: ['live-database', 'lookml', 'metabase', 'metricflow', 'notion'], + adapters: [], embeddings: { backend: 'deterministic', model: 'deterministic', @@ -484,6 +484,9 @@ export function parseKtxProjectConfig(raw: string): KtxProjectConfig { ...(isRecord(scanEnrichment.embeddings) ? { embeddings: scanEmbeddings } : {}), }; const parsedScanRelationships = parseScanRelationshipConfig(scanRelationships, defaults.scan.relationships); + const parsedConnections = isRecord(parsed.connections) + ? (parsed.connections as Record) + : defaults.connections; return { project: project.trim(), @@ -494,9 +497,7 @@ export function parseKtxProjectConfig(raw: string): KtxProjectConfig { }, } : {}), - connections: isRecord(parsed.connections) - ? (parsed.connections as Record) - : defaults.connections, + connections: parsedConnections, storage: { state: storage.state === 'sqlite' ? 'sqlite' : defaults.storage.state, search: storage.search === 'sqlite-fts5' ? 'sqlite-fts5' : defaults.storage.search, @@ -529,5 +530,15 @@ export function parseKtxProjectConfig(raw: string): KtxProjectConfig { } export function serializeKtxProjectConfig(config: KtxProjectConfig): string { - return `${YAML.stringify(config, { indent: 2, lineWidth: 0 }).trimEnd()}\n`; + const serializedConfig = + config.ingest.adapters.length === 0 + ? { + ...config, + ingest: { + embeddings: config.ingest.embeddings, + workUnits: config.ingest.workUnits, + }, + } + : config; + return `${YAML.stringify(serializedConfig, { indent: 2, lineWidth: 0 }).trimEnd()}\n`; } diff --git a/packages/context/src/project/index.ts b/packages/context/src/project/index.ts index 8ea92bf6..aaec44ed 100644 --- a/packages/context/src/project/index.ts +++ b/packages/context/src/project/index.ts @@ -6,7 +6,11 @@ export type { KtxSearchBackend, KtxStorageState, } from './config.js'; -export { buildDefaultKtxProjectConfig, parseKtxProjectConfig, serializeKtxProjectConfig } from './config.js'; +export { + buildDefaultKtxProjectConfig, + parseKtxProjectConfig, + serializeKtxProjectConfig, +} from './config.js'; export type { LocalGitFileStoreDeps } from './local-git-file-store.js'; export { LocalGitFileStore } from './local-git-file-store.js'; export { ktxLocalStateDbPath } from './local-state-db.js'; diff --git a/packages/context/src/scan/local-scan.test.ts b/packages/context/src/scan/local-scan.test.ts index 581f02d7..6e9076c6 100644 --- a/packages/context/src/scan/local-scan.test.ts +++ b/packages/context/src/scan/local-scan.test.ts @@ -119,6 +119,22 @@ async function writeLiveDatabaseConfig(projectDir: string): Promise { ); } +async function writeDatabaseConfigWithoutIngestAdapters(projectDir: string): Promise { + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); +} + function fetchOnlyAdapter(options: { extractedAt?: () => string } = {}): SourceAdapter { return { source: 'live-database', @@ -243,6 +259,27 @@ describe('local scan', () => { }); }); + it('runs a structural database scan when live-database is not listed in ktx.yaml', async () => { + await writeDatabaseConfigWithoutIngestAdapters(project.projectDir); + project = await loadKtxProject({ projectDir: project.projectDir }); + + const result = await runLocalScan({ + project, + adapters: [fetchOnlyAdapter()], + connectionId: 'warehouse', + jobId: 'scan-run-without-public-adapter', + now: () => new Date('2026-04-29T09:10:00.000Z'), + }); + + expect(result.report).toMatchObject({ + connectionId: 'warehouse', + runId: 'scan-run-without-public-adapter', + artifactPaths: { + reportPath: 'raw-sources/warehouse/live-database/2026-04-29-091000-scan-run-without-public-adapter/scan-report.json', + }, + }); + }); + it('reuses scan report and raw-source paths when the same local scan run id is retried', async () => { const first = await runLocalScan({ project, diff --git a/packages/context/src/scan/local-scan.ts b/packages/context/src/scan/local-scan.ts index 7f3c00a0..362c3b2c 100644 --- a/packages/context/src/scan/local-scan.ts +++ b/packages/context/src/scan/local-scan.ts @@ -342,6 +342,22 @@ function createFilteredConnector(connector: KtxScanConnector, enabledTables: Set }; } +function withInternalLiveDatabaseAdapter(project: KtxLocalProject): KtxLocalProject { + if (project.config.ingest.adapters.includes(LIVE_DATABASE_ADAPTER)) { + return project; + } + return { + ...project, + config: { + ...project.config, + ingest: { + ...project.config.ingest, + adapters: [...project.config.ingest.adapters, LIVE_DATABASE_ADAPTER], + }, + }, + }; +} + export async function runLocalScan(options: RunLocalScanOptions): Promise { const mode = options.mode ?? 'structural'; assertSupportedMode(mode); @@ -367,7 +383,7 @@ export async function runLocalScan(options: RunLocalScanOptions): Promise { const smoke = await readText('examples/postgres-historic/scripts/smoke.sh'); assert.match(examples, /postgres-historic/); - assert.match(examples, /unified Historic SQL artifacts/); - assert.match(readme, /--enable-historic-sql/); - assert.match(readme, /--historic-sql-min-executions 2/); + assert.doesNotMatch(examples, /Historic SQL/); + assert.doesNotMatch(examples, /historic-SQL/); + assert.match(examples, /query-history ingest via `pg_stat_statements`/); + assert.doesNotMatch(readme, new RegExp(['--enable-historic', 'sql'].join('-'))); + assert.doesNotMatch(readme, new RegExp(['--historic', 'sql-min-executions'].join('-'))); + assert.doesNotMatch(readme, /ktx ingest run --project-dir/); + assert.doesNotMatch(readme, /--adapter historic-sql/); + assert.match(readme, /--enable-query-history/); + assert.match(readme, /--query-history-min-executions 2/); assert.match(readme, /ktx status --project-dir/); - assert.match(readme, /Postgres Historic SQL/); + assert.match(readme, /Postgres query history/); assert.match(readme, /manifest\.json/); assert.match(readme, /tables\/\*\.json/); assert.match(readme, /patterns-input\.json/); @@ -89,7 +95,7 @@ describe('standalone example docs', () => { assert.match(smoke, /historic-sql-patterns-part-/); assert.match(smoke, /patterns-input\/part-/); assert.doesNotMatch(smoke, new RegExp(["unitKey === 'historic", 'sql', "patterns'"].join('-'))); - assert.match(smoke, /--historic-sql-min-executions 2/); + assert.match(smoke, /--query-history-min-executions 2/); assert.match(smoke, /KTX_RUNTIME_ROOT/); assert.match(smoke, /managedDaemon/); assert.match(smoke, /installPolicy: 'auto'/); @@ -129,6 +135,15 @@ describe('standalone example docs', () => { ); }); + it('checked-in example configs do not include public database adapters', async () => { + const localWarehouseConfig = await readFile('examples/local-warehouse/ktx.yaml', 'utf8'); + const orbitConfig = await readFile('examples/orbit-relationship-verification/ktx.yaml', 'utf8'); + const legacyPublicAdapter = new RegExp(['live', 'database'].join('-')); + + assert.doesNotMatch(localWarehouseConfig, legacyPublicAdapter); + assert.doesNotMatch(orbitConfig, legacyPublicAdapter); + }); + it('lists every workspace package in the contributor docs', async () => { const contributing = await readText('docs-site/content/docs/community/contributing.mdx'); @@ -222,18 +237,64 @@ describe('standalone example docs', () => { assert.doesNotMatch(readme, /python -m ktx_daemon semantic-validate/); }); - it('documents scan workflows in the docs site', async () => { + it('documents unified public ingest workflows in the docs site', async () => { const rootReadme = await readText('README.md'); + const cliMeta = await readText('docs-site/content/docs/cli-reference/meta.json'); + const ingestReference = await readText('docs-site/content/docs/cli-reference/ktx-ingest.mdx'); + const devReference = await readText('docs-site/content/docs/cli-reference/ktx-dev.mdx'); + const setupReference = await readText('docs-site/content/docs/cli-reference/ktx-setup.mdx'); const buildingContext = await readText('docs-site/content/docs/guides/building-context.mdx'); - const scanReference = await readText('docs-site/content/docs/cli-reference/ktx-scan.mdx'); + const contextSources = await readText('docs-site/content/docs/integrations/context-sources.mdx'); + const contextAsCode = await readText('docs-site/content/docs/concepts/context-as-code.mdx'); + const quickstart = await readText('docs-site/content/docs/getting-started/quickstart.mdx'); + const primarySources = await readText('docs-site/content/docs/integrations/primary-sources.mdx'); + const examplesIndex = await readText('examples/README.md'); + const localWarehouseReadme = await readText('examples/local-warehouse/README.md'); + + assert.match(ingestReference, /ktx ingest /); + assert.match(ingestReference, /ktx ingest --all --deep/); + assert.match(ingestReference, /--query-history-window-days /); + assert.match(buildingContext, /ktx ingest /); + assert.match(buildingContext, /ktx ingest --all/); + assert.match(contextSources, /ktx ingest /); + assert.match(contextAsCode, /ktx ingest --all --no-input/); + assert.match(quickstart, /schema context/); + assert.match(primarySources, /context:\n queryHistory:/); + assert.match(rootReadme, /Databases configured: yes \(postgres-warehouse\)/); + assert.match(quickstart, /Databases:\n postgres-warehouse: deep context complete/); + assert.match(quickstart, /Databases configured: yes \(postgres-warehouse\)/); + assert.match(setupReference, /Databases configured: yes \(postgres-warehouse\)/); + assert.doesNotMatch(rootReadme, new RegExp(['Primary sources', 'configured'].join(' '))); + assert.doesNotMatch(quickstart, new RegExp(['Primary', 'sources'].join(' '))); + assert.doesNotMatch(setupReference, new RegExp(['Primary sources', 'configured'].join(' '))); + + assert.doesNotMatch(cliMeta, /ktx-scan/); + assert.doesNotMatch(ingestReference, /ktx ingest run/); + assert.doesNotMatch(ingestReference, /ktx ingest status/); + assert.doesNotMatch(ingestReference, /ktx ingest replay/); + assert.doesNotMatch(ingestReference, /--adapter/); + assert.doesNotMatch(ingestReference, /ktx ingest watch/); + assert.doesNotMatch(ingestReference, /live-database/); + assert.doesNotMatch(devReference, /ktx scan/); + assert.doesNotMatch(buildingContext, /ktx ingest watch/); + assert.doesNotMatch(buildingContext, /ktx ingest status/); + assert.doesNotMatch(buildingContext, /ktx ingest replay/); + assert.doesNotMatch(buildingContext, /historic-sql/); + assert.doesNotMatch(buildingContext, /live-database/); + assert.doesNotMatch(contextSources, /ktx ingest run --connection-id/); + assert.doesNotMatch(contextSources, /--adapter /); + assert.doesNotMatch(contextAsCode, /ktx ingest run --connection-id/); + assert.doesNotMatch(quickstart, /Historic SQL/); + assert.doesNotMatch(quickstart, /--enable-historic-sql/); + assert.doesNotMatch(quickstart, /press d<\/kbd> to detach/); + assert.doesNotMatch(primarySources, /historicSql/); + assert.doesNotMatch(primarySources, /Historic SQL/); + assert.doesNotMatch(examplesIndex, /ktx ingest run --project-dir/); + assert.doesNotMatch(localWarehouseReadme, /ktx ingest run --project-dir/); - assert.match(buildingContext, /ktx scan /); - assert.match(buildingContext, /ktx status/); - assert.doesNotMatch(buildingContext, /ktx scan status /); - assert.doesNotMatch(buildingContext, /ktx scan report /); - assert.match(scanReference, /ktx scan \[options\]/); assert.match(rootReadme, /raw-sources\//); - assert.match(rootReadme, /live-database\//); + assert.doesNotMatch(rootReadme, new RegExp(`${['live', 'database'].join('-')}/`)); + assert.doesNotMatch(rootReadme, /ktx scan/); assert.doesNotMatch(rootReadme, /Run a local ingest smoke test/); assert.doesNotMatch(rootReadme, /ktx ingest run --project-dir/); assert.doesNotMatch(rootReadme, /ktx ingest status --project-dir/); diff --git a/scripts/installed-live-database-smoke.mjs b/scripts/installed-live-database-smoke.mjs index 88213366..fae97850 100644 --- a/scripts/installed-live-database-smoke.mjs +++ b/scripts/installed-live-database-smoke.mjs @@ -95,34 +95,23 @@ export function buildKtxYaml(postgresUrl) { 'storage:', ' state: sqlite', ' search: sqlite-fts5', - 'ingest:', - ' adapters:', - ' - live-database', '', ].join('\n'); } -export function buildLiveDatabaseIngestArgs(projectDir, databaseIntrospectionUrl) { +export function buildLiveDatabaseIngestArgs(projectDir, _databaseIntrospectionUrl, connectionId = 'warehouse') { return [ 'exec', 'ktx', 'ingest', - 'run', + connectionId, '--project-dir', projectDir, - '--connection-id', - 'warehouse', - '--adapter', - 'live-database', - '--database-introspection-url', - databaseIntrospectionUrl, + '--fast', + '--no-input', ]; } -export function buildLiveDatabaseStatusArgs(projectDir, runId) { - return ['exec', 'ktx', 'ingest', 'status', '--project-dir', projectDir, runId]; -} - async function run(command, args, options = {}) { process.stdout.write(`$ ${command} ${args.join(' ')}\n`); return new Promise((resolve) => { @@ -173,7 +162,7 @@ function requireOutput(label, result, pattern) { function getRunId(stdout) { const match = stdout.match(/^Run: (.+)$/m); if (!match) { - throw new Error(`ingest run output did not include a run id\nstdout:\n${stdout}`); + throw new Error(`ingest output did not include a run id\nstdout:\n${stdout}`); } return match[1]; } @@ -323,24 +312,11 @@ async function main() { env: managedRuntimeEnv(cleanInstallDir), timeout: 120_000, }); - requireSuccess('ktx ingest run live-database', ingestRun); - requireOutput('ktx ingest run live-database', ingestRun, /Status: done/); - requireOutput('ktx ingest run live-database', ingestRun, /Adapter: live-database/); - requireOutput('ktx ingest run live-database', ingestRun, /Diff: \+4\/~0\/-0\/=0/); - requireOutput('ktx ingest run live-database', ingestRun, /Raw files: 4/); - requireOutput('ktx ingest run live-database', ingestRun, /Work units: 2/); + requireSuccess('ktx ingest warehouse --fast', ingestRun); + requireOutput('ktx ingest warehouse --fast', ingestRun, /Ingest finished/); + requireOutput('ktx ingest warehouse --fast', ingestRun, /Database schema/); const runId = getRunId(ingestRun.stdout); - const ingestStatus = await run('pnpm', buildLiveDatabaseStatusArgs(projectDir, runId), { - cwd: cleanInstallDir, - env: managedRuntimeEnv(cleanInstallDir), - timeout: 30_000, - }); - requireSuccess('ktx ingest status live-database', ingestStatus); - requireOutput('ktx ingest status live-database', ingestStatus, new RegExp(`Run: ${runId}`)); - requireOutput('ktx ingest status live-database', ingestStatus, /Status: done/); - requireOutput('ktx ingest status live-database', ingestStatus, /Raw files: 4/); - requireOutput('ktx ingest status live-database', ingestStatus, /Work units: 2/); await assertPathExists(join(projectDir, '.ktx', 'db.sqlite'), 'SQLite local ingest state'); process.stdout.write(`Installed live-database artifact smoke passed: ${runId}\n`); } finally { diff --git a/scripts/installed-live-database-smoke.test.mjs b/scripts/installed-live-database-smoke.test.mjs index 0a45cdf5..3eda6cf0 100644 --- a/scripts/installed-live-database-smoke.test.mjs +++ b/scripts/installed-live-database-smoke.test.mjs @@ -5,7 +5,6 @@ import { buildDockerRunArgs, buildKtxYaml, buildLiveDatabaseIngestArgs, - buildLiveDatabaseStatusArgs, buildPostgresUrl, buildPostgresReadyArgs, buildSeedSql, @@ -50,7 +49,7 @@ describe('installed live-database artifact smoke helpers', () => { ); }); - it('writes a live-database-only KTX project config with SQLite local state', () => { + it('writes a public database ingest KTX project config with SQLite local state', () => { assert.equal( buildKtxYaml('postgresql://ktx:postgres@127.0.0.1:15432/warehouse'), // pragma: allowlist secret [ @@ -62,9 +61,6 @@ describe('installed live-database artifact smoke helpers', () => { 'storage:', ' state: sqlite', ' search: sqlite-fts5', - 'ingest:', - ' adapters:', - ' - live-database', '', ].join('\n'), ); @@ -97,30 +93,17 @@ describe('installed live-database artifact smoke helpers', () => { ]); }); - it('builds installed CLI live-database ingest and status commands', () => { + it('builds the installed CLI public database ingest command', () => { assert.deepEqual(buildLiveDatabaseIngestArgs('/tmp/project', 'http://127.0.0.1:8765'), [ 'exec', 'ktx', 'ingest', - 'run', + 'warehouse', '--project-dir', '/tmp/project', - '--connection-id', - 'warehouse', - '--adapter', - 'live-database', - '--database-introspection-url', - 'http://127.0.0.1:8765', + '--fast', + '--no-input', ]); - assert.deepEqual(buildLiveDatabaseStatusArgs('/tmp/project', 'local-run-1'), [ - 'exec', - 'ktx', - 'ingest', - 'status', - '--project-dir', - '/tmp/project', - 'local-run-1', - ]); }); }); diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index 90155636..219f3e3c 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -518,7 +518,7 @@ function requireSuccess(label, result) { assert.equal(result.stderr, '', label + ' wrote unexpected stderr'); } -function requireProjectStderr(label, result, projectDir) { +function requireSuccessWithProjectStderr(label, result, projectDir) { assert.equal( result.code, 0, @@ -527,6 +527,15 @@ function requireProjectStderr(label, result, projectDir) { assert.equal(result.stderr, 'Project: ' + projectDir + '\\n', label + ' wrote unexpected stderr'); } +function requireExitCodeWithProjectStderr(label, result, projectDir, expectedCode) { + assert.equal( + result.code, + expectedCode, + label + ' failed with code ' + result.code + '\\nstdout:\\n' + result.stdout + '\\nstderr:\\n' + result.stderr, + ); + assert.equal(result.stderr, 'Project: ' + projectDir + '\\n', label + ' wrote unexpected stderr'); +} + function requireSuccessWithStderr(label, result, stderrPattern) { assert.equal( result.code, @@ -559,12 +568,6 @@ function requireIncludes(values, expected, label) { assert.ok(values.includes(expected), label + ' did not include ' + expected + ': ' + values.join(', ')); } -function getRunId(stdout) { - const match = stdout.match(/^Run: (.+)$/m); - assert.ok(match, 'ingest run output did not include a run id'); - return match[1]; -} - async function writeSqliteWarehouse(projectDir) { const database = new DatabaseSync(join(projectDir, 'warehouse.db')); try { @@ -588,7 +591,6 @@ process.env.KTX_RUNTIME_ROOT = join(root, 'managed-runtime'); let daemonStarted = false; try { const projectDir = join(root, 'project'); - const sourceDir = join(root, 'source'); const version = await run('pnpm', ['exec', 'ktx', '--version']); requireSuccess('ktx public package version', version); @@ -619,7 +621,6 @@ try { '--skip-agents', ]); requireSuccess('ktx setup', init); - requireOutput('ktx setup', init, /Project: /); const emptyProjectDir = join(root, 'empty-project'); const emptyInit = await run('pnpm', [ @@ -652,10 +653,6 @@ try { 'scan:', ' enrichment:', ' mode: deterministic', - 'ingest:', - ' adapters:', - ' - fake', - ' - live-database', '', ].join('\\n'), 'utf-8', @@ -818,52 +815,32 @@ try { requireOutput('ktx dev runtime stop', runtimeStop, /Stopped KTX Python daemon/); process.stdout.write('ktx dev runtime daemon lifecycle verified\\n'); - const structuralScan = await run('pnpm', ['exec', 'ktx', 'scan', 'warehouse', + const structuralScan = await run('pnpm', ['exec', 'ktx', 'ingest', 'warehouse', '--project-dir', projectDir, + '--fast', + '--no-input', ]); - requireProjectStderr('ktx scan structural', structuralScan, projectDir); - requireOutput('ktx scan structural', structuralScan, /Status: done/); - requireOutput('ktx scan structural', structuralScan, /Mode: structural/); - requireOutput('ktx scan structural', structuralScan, /Needs attention\\s+None/); - const structuralScanRunId = getRunId(structuralScan.stdout); + requireSuccessWithProjectStderr('ktx ingest fast', structuralScan, projectDir); + requireOutput('ktx ingest fast', structuralScan, /Ingest finished/); + requireOutput('ktx ingest fast', structuralScan, /Database schema/); + requireOutput('ktx ingest fast', structuralScan, /warehouse\\s+done/); await access(join(projectDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml')); - process.stdout.write('ktx scan structural verified: ' + structuralScanRunId + '\\n'); + process.stdout.write('ktx ingest fast verified\\n'); - const enrichedScan = await run('pnpm', ['exec', 'ktx', 'scan', 'warehouse', + const enrichedScan = await run('pnpm', ['exec', 'ktx', 'ingest', 'warehouse', '--project-dir', projectDir, - '--mode', - 'enriched', + '--deep', + '--no-input', ]); - requireProjectStderr('ktx scan enriched', enrichedScan, projectDir); - requireOutput('ktx scan enriched', enrichedScan, /Status: done/); - requireOutput('ktx scan enriched', enrichedScan, /Mode: enriched/); - requireOutput('ktx scan enriched', enrichedScan, /Enrichment artifacts:/); - const enrichedScanRunId = getRunId(enrichedScan.stdout); - process.stdout.write('ktx scan enriched verified: ' + enrichedScanRunId + '\\n'); - - await mkdir(join(sourceDir, 'orders'), { recursive: true }); - await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\\n', 'utf-8'); - - const ingestRun = await run('pnpm', ['exec', 'ktx', 'ingest', 'run', - '--project-dir', - projectDir, - '--connection-id', - 'warehouse', - '--adapter', - 'fake', - '--source-dir', - sourceDir, - ]); - assert.equal(ingestRun.code, 1, 'ktx ingest run without an LLM provider must fail'); - assert.match( - ingestRun.stderr, - /ktx ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway, or an injected agentRunner/, - ); + requireExitCodeWithProjectStderr('ktx ingest deep readiness guard', enrichedScan, projectDir, 1); + requireOutput('ktx ingest deep readiness guard', enrichedScan, /Ingest finished with partial failures/); + requireOutput('ktx ingest deep readiness guard', enrichedScan, /requires deep ingest readiness/); + process.stdout.write('ktx ingest deep readiness guard verified\\n'); await access(join(projectDir, '.ktx', 'db.sqlite')); - process.stdout.write('ktx ingest provider guard verified\\n'); + process.stdout.write('ktx ingest state verified\\n'); } finally { if (daemonStarted) { await run('pnpm', ['exec', 'ktx', 'dev', 'runtime', 'stop']); @@ -939,7 +916,7 @@ try { assert.ok([0, 1].includes(doctor.code), 'ktx status setup exit code must be 0 or 1'); requireStdout('ktx status setup', doctor, /KTX status/); requireStdout('ktx status setup', doctor, /No project here yet\\./); - requireStdout('ktx status setup', doctor, /Before you can run ktx setup/); + requireStdout('ktx status setup', doctor, /ktx setup/); requireStdout('ktx status setup', doctor, /Node 22\\+/); assert.equal(doctor.stderr, '', 'ktx status setup wrote unexpected stderr'); } finally { diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index 8e338c0c..1f0bf164 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -464,7 +464,7 @@ describe('verification snippets', () => { assert.match(source, /node:sqlite/); assert.match(source, /driver: sqlite/); assert.match(source, /path: warehouse\.db/); - assert.match(source, /live-database/); + assert.doesNotMatch(source, /live-database/); assert.match(source, /'--execute'/); assert.match(source, /"mode": "compile_only"/); assert.match(source, /"mode": "executed"/); @@ -488,18 +488,18 @@ describe('verification snippets', () => { assert.match(source, /ktx dev runtime stop/); assert.doesNotMatch(source, /ktx dev runtime prune/); assert.doesNotMatch(source, /staleRuntimeDir/); - assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'scan',\s*'warehouse'/); - assert.match(source, /'--mode',\s*'enriched'/); + assert.match(source, /run\('pnpm', \[\s*'exec',\s*'ktx',\s*'ingest',\s*'warehouse'/); + assert.match(source, /'--deep'/); assert.doesNotMatch(source, /'--enrich'/); - assert.match(source, /ktx scan structural verified/); - assert.match(source, /ktx scan enriched verified/); + assert.match(source, /ktx ingest fast verified/); + assert.match(source, /ktx ingest deep readiness guard verified/); assert.match(source, /enrichment:/); assert.match(source, /mode: deterministic/); - assert.match(source, /run\('pnpm', \['exec', 'ktx', 'ingest', 'run'/); + assert.doesNotMatch(source, /run\('pnpm', \['exec', 'ktx', 'ingest', 'run'/); assert.match(source, /access\(join\(projectDir, '\.ktx', 'db\.sqlite'\)\)/); assert.match(source, /SQLite wiki index/); - assert.match(source, /ktx ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway/); - assert.match(source, /ktx ingest provider guard verified/); + assert.doesNotMatch(source, /ktx ingest run requires llm\\.provider\\.backend: anthropic, vertex, or gateway/); + assert.match(source, /ktx ingest state verified/); }); describe('npmCliSmokeSource', () => { @@ -511,6 +511,8 @@ describe('verification snippets', () => { assert.match(source, /Usage: ktx setup/); assert.doesNotMatch(source, new RegExp(["'demo'", "'--mode'", "'deterministic'"].join(', '))); assert.match(source, /'status', '--verbose', '--no-input'/); + assert.match(source, /KTX status/); + assert.match(source, /No project here yet/); assert.doesNotMatch(source, /function requireProjectStderr/); assert.match(source, /Object\.keys\(packageJson\.dependencies\)/); assert.match(source, /'@kaelio\/ktx'/); diff --git a/scripts/relationship-orbit-verification.mjs b/scripts/relationship-orbit-verification.mjs index 81f81187..5f9ada62 100644 --- a/scripts/relationship-orbit-verification.mjs +++ b/scripts/relationship-orbit-verification.mjs @@ -41,8 +41,8 @@ export function defaultOrbitVerificationProjectDir() { return defaultProjectDir; } -function shellCommand(argv) { - return ['pnpm', 'run', 'ktx', '--', ...argv].join(' '); +function internalScanCommand(input) { + return `internal runKtxScan connection=${input.connectionId} mode=relationships projectDir=${input.projectDir}`; } function firstNonEmptyLine(...values) { @@ -55,7 +55,7 @@ function firstNonEmptyLine(...values) { return line; } } - return 'Orbit scan command failed before producing diagnostic output'; + return 'Orbit relationship scan failed before producing diagnostic output'; } function parseArgs(argv) { @@ -88,8 +88,15 @@ function parseArgs(argv) { return options; } -export function buildOrbitScanArgv(input) { - return ['scan', input.connectionId, '--mode', 'relationships', '--project-dir', input.projectDir]; +export function buildOrbitScanArgs(input) { + return { + command: 'run', + projectDir: input.projectDir, + connectionId: input.connectionId, + mode: 'relationships', + detectRelationships: true, + dryRun: false, + }; } export function extractRunId(stdout) { @@ -171,7 +178,7 @@ function formatBlocked(result) { '', '## Evidence', '', - '- Orbit verification was not executed because the current local Orbit scan command failed.', + '- Orbit verification was not executed because the current local Orbit relationship scan failed.', '- Re-run with `--report-path` to write verification evidence to a custom location.', '', 'Scan stdout:', @@ -228,6 +235,36 @@ async function runBufferedWorkspaceKtx(runner, argv, rootDir, execFile) { }; } +function cliScanModulePath(rootDir) { + return resolve(rootDir, 'packages/cli/dist/scan.js'); +} + +async function loadRunKtxScan(rootDir) { + const module = await import(pathToFileURL(cliScanModulePath(rootDir)).href); + return module.runKtxScan; +} + +async function runBufferedInternalScan(input) { + const stdout = new BufferWriter(); + const stderr = new BufferWriter(); + let runKtxScan = input.runKtxScan; + + if (!runKtxScan) { + const build = await runBufferedWorkspaceKtx(input.runner, ['--version'], input.rootDir, input.execFile); + if (build.exitCode !== 0) { + return build; + } + runKtxScan = await loadRunKtxScan(input.rootDir); + } + + const exitCode = await runKtxScan(input.scanArgs, { stdout, stderr }); + return { + exitCode, + stdout: stdout.text(), + stderr: stderr.text(), + }; +} + function orbitVerificationEnv(projectDir) { if (projectDir !== defaultProjectDir) { return process.env; @@ -253,8 +290,15 @@ export async function runOrbitVerification(options = {}) { const env = options.env ?? orbitVerificationEnv(projectDir); const runWithEnv = (argv, runnerOptions) => runner(argv, { ...runnerOptions, env }); - const scanArgv = buildOrbitScanArgv({ connectionId, projectDir }); - const scan = await runBufferedWorkspaceKtx(runWithEnv, scanArgv, rootDir, execFile); + const scanArgs = buildOrbitScanArgs({ connectionId, projectDir }); + const scanCommand = internalScanCommand({ connectionId, projectDir }); + const scan = await runBufferedInternalScan({ + scanArgs, + rootDir, + execFile, + runner: runWithEnv, + runKtxScan: options.runKtxScan, + }); let result; if (scan.exitCode !== 0) { @@ -263,7 +307,7 @@ export async function runOrbitVerification(options = {}) { date, connectionId, projectDir, - scanCommand: shellCommand(scanArgv), + scanCommand, scanExitCode: scan.exitCode, blocker: firstNonEmptyLine(scan.stderr, scan.stdout), scanStdout: scan.stdout, @@ -277,7 +321,7 @@ export async function runOrbitVerification(options = {}) { date, connectionId, projectDir, - scanCommand: shellCommand(scanArgv), + scanCommand, scanExitCode: scan.exitCode, blocker: 'KTX scan completed without printing a Run id', scanStdout: scan.stdout, @@ -291,7 +335,7 @@ export async function runOrbitVerification(options = {}) { date, connectionId, projectDir, - scanCommand: shellCommand(scanArgv), + scanCommand, scanExitCode: scan.exitCode, blocker: 'KTX scan completed without printing a report artifact path', scanStdout: scan.stdout, @@ -304,7 +348,7 @@ export async function runOrbitVerification(options = {}) { date, connectionId, projectDir, - scanCommand: shellCommand(scanArgv), + scanCommand, reportPath: fullScanReportPath, scanExitCode: scan.exitCode, scanStdout: scan.stdout, diff --git a/scripts/relationship-orbit-verification.test.mjs b/scripts/relationship-orbit-verification.test.mjs index a6dc3607..65a200c4 100644 --- a/scripts/relationship-orbit-verification.test.mjs +++ b/scripts/relationship-orbit-verification.test.mjs @@ -1,9 +1,8 @@ import assert from 'node:assert/strict'; import { readFile } from 'node:fs/promises'; -import { dirname } from 'node:path'; import { describe, it } from 'node:test'; import { - buildOrbitScanArgv, + buildOrbitScanArgs, defaultOrbitVerificationProjectDir, extractReportPath, extractRunId, @@ -49,6 +48,14 @@ function successReportJson() { }); } +function successfulRunKtxScan(calls = []) { + return async (args, io) => { + calls.push(args); + io.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n Report: reports/scan-report.json\n'); + return 0; + }; +} + describe('relationship Orbit verification helper', () => { it('exposes the Orbit verification command from the KTX workspace package', async () => { const packageJson = JSON.parse(await readFile(new URL('../package.json', import.meta.url), 'utf8')); @@ -59,20 +66,19 @@ describe('relationship Orbit verification helper', () => { ); }); - it('builds the current KTX launcher arguments for scan commands', () => { - assert.deepEqual(buildOrbitScanArgv({ connectionId: 'orbit', projectDir: '/tmp/orbit-project' }), [ - 'scan', - 'orbit', - '--mode', - 'relationships', - '--project-dir', - '/tmp/orbit-project', - ]); + it('builds the internal relationship scan arguments', () => { + assert.deepEqual(buildOrbitScanArgs({ connectionId: 'orbit', projectDir: '/tmp/orbit-project' }), { + command: 'run', + projectDir: '/tmp/orbit-project', + connectionId: 'orbit', + mode: 'relationships', + detectRelationships: true, + dryRun: false, + }); }); it('uses the checked-in Orbit verification project by default', async () => { - const calls = []; - const envs = []; + const scanCalls = []; const writes = []; const defaultProjectDir = defaultOrbitVerificationProjectDir(); @@ -83,27 +89,28 @@ describe('relationship Orbit verification helper', () => { writeFile: async (path, content) => { writes.push({ path, content }); }, - runWorkspaceKtx: async (argv, options) => { - calls.push(argv); - envs.push(options.env); - options.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n Report: reports/scan-report.json\n'); - return 0; - }, + runKtxScan: successfulRunKtxScan(scanCalls), readFile: async () => successReportJson(), }); assert.equal(result.status, 'success'); - assert.deepEqual(calls, [ - ['scan', 'orbit', '--mode', 'relationships', '--project-dir', defaultProjectDir], + assert.deepEqual(scanCalls, [ + { + command: 'run', + projectDir: defaultProjectDir, + connectionId: 'orbit', + mode: 'relationships', + detectRelationships: true, + dryRun: false, + }, ]); - assert.equal(envs[0].GIT_CEILING_DIRECTORIES, dirname(defaultProjectDir)); assert.equal(writes.length, 1); assert.match(writes[0].content, new RegExp(defaultProjectDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))); }); it('uses KTX_PROJECT_DIR for the Orbit verification project override', async () => { const previousProjectDir = process.env.KTX_PROJECT_DIR; - const calls = []; + const scanCalls = []; try { process.env.KTX_PROJECT_DIR = '/tmp/orbit-project-from-env'; @@ -113,17 +120,20 @@ describe('relationship Orbit verification helper', () => { now: () => new Date('2026-05-07T10:00:00.000Z'), mkdir: async () => {}, writeFile: async () => {}, - runWorkspaceKtx: async (argv, options) => { - calls.push(argv); - options.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n Report: reports/scan-report.json\n'); - return 0; - }, + runKtxScan: successfulRunKtxScan(scanCalls), readFile: async () => successReportJson(), }); assert.equal(result.projectDir, '/tmp/orbit-project-from-env'); - assert.deepEqual(calls, [ - ['scan', 'orbit', '--mode', 'relationships', '--project-dir', '/tmp/orbit-project-from-env'], + assert.deepEqual(scanCalls, [ + { + command: 'run', + projectDir: '/tmp/orbit-project-from-env', + connectionId: 'orbit', + mode: 'relationships', + detectRelationships: true, + dryRun: false, + }, ]); } finally { if (previousProjectDir === undefined) { @@ -146,7 +156,7 @@ describe('relationship Orbit verification helper', () => { date: '2026-05-07', connectionId: 'orbit', projectDir: '/tmp/orbit-project', - scanCommand: 'pnpm run ktx -- scan orbit --mode relationships --project-dir /tmp/orbit-project', + scanCommand: 'internal runKtxScan connection=orbit mode=relationships projectDir=/tmp/orbit-project', reportPath: '/tmp/orbit-project/reports/scan-report.json', scanExitCode: 0, scanStdout: 'KTX scan completed\nRun: scan-orbit-1\n', @@ -171,7 +181,7 @@ describe('relationship Orbit verification helper', () => { date: '2026-05-07', connectionId: 'orbit', projectDir: '/tmp/orbit-project', - scanCommand: 'pnpm run ktx -- scan orbit --mode relationships --project-dir /tmp/orbit-project', + scanCommand: 'internal runKtxScan connection=orbit mode=relationships projectDir=/tmp/orbit-project', scanExitCode: 1, blocker: 'Connection "orbit" was not found', scanStdout: '', @@ -180,12 +190,12 @@ describe('relationship Orbit verification helper', () => { assert.match(markdown, /Exit code: 1/); assert.match(markdown, /Connection "orbit" was not found/); - assert.match(markdown, /Orbit verification was not executed because the current local Orbit scan command failed/); + assert.match(markdown, /Orbit verification was not executed because the current local Orbit relationship scan failed/); assert.doesNotMatch(markdown, /scan\.enrichment\.mode is required/); }); it('runs scan then reads the report artifact and writes success Markdown', async () => { - const calls = []; + const scanCalls = []; const writes = []; const result = await runOrbitVerification({ connectionId: 'orbit', @@ -196,24 +206,27 @@ describe('relationship Orbit verification helper', () => { writeFile: async (path, content) => { writes.push({ path, content }); }, - runWorkspaceKtx: async (argv, options) => { - calls.push(argv); - options.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n Report: reports/scan-report.json\n'); - return 0; - }, + runKtxScan: successfulRunKtxScan(scanCalls), readFile: async () => successReportJson(), }); assert.equal(result.status, 'success'); - assert.deepEqual(calls, [ - ['scan', 'orbit', '--mode', 'relationships', '--project-dir', '/tmp/orbit-project'], + assert.deepEqual(scanCalls, [ + { + command: 'run', + projectDir: '/tmp/orbit-project', + connectionId: 'orbit', + mode: 'relationships', + detectRelationships: true, + dryRun: false, + }, ]); assert.equal(writes.length, 1); assert.equal(writes[0].path, '/tmp/orbit-report.md'); assert.match(writes[0].content, /Accepted: 14/); }); - it('writes blocked Markdown when the scan command fails before a run id exists', async () => { + it('writes blocked Markdown when the internal scan fails before a run id exists', async () => { const writes = []; const result = await runOrbitVerification({ connectionId: 'orbit', @@ -224,8 +237,8 @@ describe('relationship Orbit verification helper', () => { writeFile: async (path, content) => { writes.push({ path, content }); }, - runWorkspaceKtx: async (_argv, options) => { - options.stderr.write('Connection "orbit" was not found\n'); + runKtxScan: async (_args, io) => { + io.stderr.write('Connection "orbit" was not found\n'); return 1; }, }); @@ -236,7 +249,7 @@ describe('relationship Orbit verification helper', () => { assert.match(writes[0].content, /Connection "orbit" was not found/); }); - it('runs the workspace launcher in buffered mode so real scan errors are captured', async () => { + it('runs the workspace launcher in buffered mode when preparing the internal scan module', async () => { let sawExecFile = false; const result = await runOrbitVerification({ connectionId: 'orbit', @@ -246,7 +259,8 @@ describe('relationship Orbit verification helper', () => { mkdir: async () => {}, writeFile: async () => {}, execFile: async () => ({ stdout: '', stderr: '' }), - runWorkspaceKtx: async (_argv, options) => { + runWorkspaceKtx: async (argv, options) => { + assert.deepEqual(argv, ['--version']); sawExecFile = typeof options.execFile === 'function'; options.stderr.write('ENOENT: no such file or directory, open \'/tmp/orbit-project/ktx.yaml\'\n'); return 1; diff --git a/scripts/run-ktx.test.mjs b/scripts/run-ktx.test.mjs index 3263ef30..1533b67c 100644 --- a/scripts/run-ktx.test.mjs +++ b/scripts/run-ktx.test.mjs @@ -152,7 +152,7 @@ test('runWorkspaceKtx rebuilds before running when workspace sources are newer t const logs = []; let sourceMtimeMs = 3000; - const exitCode = await runWorkspaceKtx(['scan', 'orbit', '--mode', 'relationships'], { + const exitCode = await runWorkspaceKtx(['status', '--json', '--no-input'], { rootDir: '/workspace/ktx', access: async () => undefined, stat: async (path) => ({ @@ -174,7 +174,7 @@ test('runWorkspaceKtx rebuilds before running when workspace sources are newer t sourceMtimeMs = 1000; return { stdout: 'build ok\n', stderr: '' }; } - return { stdout: 'scan ok\n', stderr: '' }; + return { stdout: '{"status":"ready"}\n', stderr: '' }; }, stdout: { write: (chunk) => logs.push(['stdout', chunk]) }, stderr: { write: (chunk) => logs.push(['stderr', chunk]) }, @@ -185,12 +185,12 @@ test('runWorkspaceKtx rebuilds before running when workspace sources are newer t calls.map((call) => [call.command, call.args]), [ ['pnpm', ['run', 'build']], - [process.execPath, ['/workspace/ktx/packages/cli/dist/bin.js', 'scan', 'orbit', '--mode', 'relationships']], + [process.execPath, ['/workspace/ktx/packages/cli/dist/bin.js', 'status', '--json', '--no-input']], ], ); assert.deepEqual(logs, [ ['stderr', 'KTX CLI build output is stale. Rebuilding it now with `pnpm run build`...\n'], ['stdout', 'build ok\n'], - ['stdout', 'scan ok\n'], + ['stdout', '{"status":"ready"}\n'], ]); });