diff --git a/.github/workflows/star-history.yml b/.github/workflows/star-history.yml new file mode 100644 index 00000000..b7d90c43 --- /dev/null +++ b/.github/workflows/star-history.yml @@ -0,0 +1,72 @@ +name: Refresh star history chart + +on: + schedule: + # Twice daily at 06:00 and 18:00 UTC. + - cron: "0 6,18 * * *" + workflow_dispatch: + +permissions: + contents: write + +env: + DO_NOT_TRACK: "1" + KTX_TELEMETRY_DISABLED: "1" + NEXT_TELEMETRY_DISABLED: "1" + +concurrency: + group: star-history-refresh + cancel-in-progress: true + +jobs: + refresh: + name: Regenerate assets/star-history.svg + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + # RELEASE_PAT can push to the protected main branch; the default + # GITHUB_TOKEN is rejected by the branch-protection hook (GH006). + token: ${{ secrets.RELEASE_PAT }} + + - name: Fetch fresh star-history SVG + run: | + set -euo pipefail + # cachebust forces star-history to regenerate instead of serving its + # own server-side cache; --location follows the slug-normalizing 301. + url="https://api.star-history.com/svg?repos=Kaelio/ktx&type=Date&cachebust=${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" + curl --fail --location --silent --show-error \ + --retry 3 --retry-delay 5 --max-time 60 \ + -o assets/star-history.svg.new "$url" + # Guard against error pages / truncated responses before overwriting. + if ! grep -q "" assets/star-history.svg.new; then + echo "Downloaded file is not a valid SVG; aborting." >&2 + exit 1 + fi + if [ "$(wc -c < assets/star-history.svg.new)" -lt 1000 ]; then + echo "Downloaded SVG is suspiciously small; aborting." >&2 + exit 1 + fi + # The star-history API returns the SVG without a trailing newline, + # which end-of-file-fixer rewrites whenever pre-commit runs + # --all-files on a PR. Because the refresh commit below uses [skip ci], + # the hook never runs against it here, so an un-normalized file + # silently breaks the pre-commit check on every open PR. Normalize to + # exactly one trailing newline before committing. + printf '%s\n' "$(cat assets/star-history.svg.new)" > assets/star-history.svg + rm -f assets/star-history.svg.new + + - name: Commit if changed + run: | + set -euo pipefail + if git diff --quiet -- assets/star-history.svg; then + echo "Star-history chart unchanged; nothing to commit." + exit 0 + fi + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add assets/star-history.svg + # [skip ci] keeps this housekeeping commit from triggering KTX CI. + git commit -m "chore: refresh star history chart [skip ci]" + git push diff --git a/AGENTS.md b/AGENTS.md index 3d8c1725..2aa0dbed 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -350,8 +350,9 @@ error messages — including the disambiguation rule for the overloaded word `source` (semantic / primary / context / source of truth) — see [`docs/terminology.md`](docs/terminology.md). Follow that file when choosing between near-synonyms (e.g. `connector` vs `adapter`, `data agent` vs -`database agent`, `fast ingest` vs `schema ingest`). Product-name rules in -this section take precedence over anything in that file when they conflict. +`database agent`, `context-source ingest` vs `source ingest`). Product-name +rules in this section take precedence over anything in that file when they +conflict. ### Updating `docs-site/` After Code Changes diff --git a/README.md b/README.md index 23b2fa0a..686ece22 100644 --- a/README.md +++ b/README.md @@ -248,6 +248,6 @@ event catalog and opt-out options.

- ktx Star History Chart + ktx Star History Chart

diff --git a/assets/star-history.svg b/assets/star-history.svg new file mode 100644 index 00000000..3f6c4a04 --- /dev/null +++ b/assets/star-history.svg @@ -0,0 +1 @@ +star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars diff --git a/docs-site/content/docs/cli-reference/ktx-ingest.mdx b/docs-site/content/docs/cli-reference/ktx-ingest.mdx index d4e06881..db3b1c0e 100644 --- a/docs-site/content/docs/cli-reference/ktx-ingest.mdx +++ b/docs-site/content/docs/cli-reference/ktx-ingest.mdx @@ -5,9 +5,11 @@ description: "Build or refresh ktx context, or capture text into ktx memory." `ktx ingest` builds or refreshes **ktx** context from configured connections, and can also capture free-form text into **ktx** memory. Database connections build -schema context. Context-source connections ingest metadata from tools such as -dbt, Looker, Metabase, MetricFlow, LookML, and Notion. Pass `--text` or -`--file` to capture inline text or text files into memory instead. +enriched context — schema plus AI-generated descriptions, embeddings, and +relationship evidence — and require a configured model and embeddings. +Context-source connections ingest metadata from tools such as dbt, Looker, +Metabase, MetricFlow, LookML, and Notion. Pass `--text` or `--file` to capture +inline text or text files into memory instead. ## Command signature @@ -29,8 +31,6 @@ connection is selected. | Flag | Description | Default | |------|-------------|---------| | `--all` | Ingest all configured connections (same as bare invocation) | `false` | -| `--fast` | Use deterministic fast database ingest | Stored connection default, or `fast` | -| `--deep` | Use deep database ingest with AI-generated descriptions, embeddings, and relationship evidence | Stored connection default, or `fast` | | `--query-history` | Include database query-history usage patterns | Stored connection default | | `--no-query-history` | Skip database query-history usage patterns for this run | Stored connection default | | `--query-history-window-days ` | BigQuery/Snowflake query-history lookback window for this run | Stored connection default | @@ -44,12 +44,12 @@ connection is selected. | `--yes` | Install required managed runtime features without prompting | `false` | | `--no-input` | Disable interactive terminal input | - | -`--fast` and `--deep` are mutually exclusive. Depth flags apply only to -database connections. Query-history flags apply only to database connections +Database ingest always builds enriched context and requires a configured model +and embeddings (run `ktx setup`); connections without that configuration fail +before any work starts. Query-history flags apply only to database connections that support query history. The window flag applies to BigQuery and Snowflake; Postgres reads the current `pg_stat_statements` aggregate data instead of a -time-windowed history table. Query-history ingest runs after fast ingest and -requires deep ingest readiness. +time-windowed history table. Query-history ingest runs after the schema scan. When more than one connection is selected, database ingest runs first, then context-source ingest and memory updates run for context-source connections. @@ -72,14 +72,8 @@ ktx ingest # Build one database or context-source connection ktx ingest warehouse -# Force deterministic fast database ingest -ktx ingest warehouse --fast - -# Force deep database ingest with AI enrichment -ktx ingest warehouse --deep - # Include query-history usage patterns -ktx ingest warehouse --deep --query-history +ktx ingest warehouse --query-history # Set the lookback window for BigQuery or Snowflake query history ktx ingest warehouse --query-history-window-days 30 @@ -154,8 +148,8 @@ KTX_INGEST_TRACE_LEVEL=trace ktx ingest metabase | Error | Cause | Recovery | |-------|-------|----------| | Connection not configured | The connection id is not present in `ktx.yaml` | Add the connection with `ktx setup` or update `ktx.yaml` | -| Deep readiness is missing | `--deep` or query history needs model, embedding, and scan-enrichment configuration | Run `ktx setup` or rerun with `--fast` | -| Query history is unsupported | The selected database driver does not support query history | Run fast ingest without query-history flags | +| Enrichment is not configured | Database ingest needs a model, embeddings, and scan-enrichment configuration | Run `ktx setup` to configure a model and embeddings | +| Query history is unsupported | The selected database driver does not support query history | Run ingest without query-history flags | | Python runtime is missing | The selected ingest target needs runtime-backed SQL analysis or source parsing | Accept the interactive prompt, rerun with `--yes`, or run the suggested `ktx admin runtime install` command | -| Context-source options were ignored | Depth and query-history flags were supplied for a context-source connection | Omit database-only flags when ingesting context-source connections | +| Context-source options were ignored | Query-history flags were supplied for a context-source connection | Omit database-only flags when ingesting context-source connections | | Text ingest stops early | `--fail-fast` was used and one item failed | Fix the failed item or rerun without `--fail-fast` to collect all failures | diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index 2c19bd07..0da7b339 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -131,8 +131,8 @@ BigQuery; and `databases` for ClickHouse. Query history setup is supported for Postgres, BigQuery, and Snowflake. The window flag applies to BigQuery and Snowflake; Postgres reads the current `pg_stat_statements` aggregate data instead of a time-windowed history table. -Enabling query history makes deep ingest readiness matter for later -`ktx ingest` runs. +Later `ktx ingest` runs build enriched context and need a configured model and +embeddings, including when query history is enabled. When query history is enabled for PostgreSQL, Snowflake, or BigQuery, `ktx setup` runs a non-blocking readiness probe after the connection test @@ -160,9 +160,9 @@ sources. This is equivalent to passing `--skip-sources` in scripted setup. | `--source-git-url ` | Git URL for dbt, MetricFlow, or LookML | | `--source-branch ` | Git branch for context-source setup | | `--source-subpath ` | Repo subpath for context-source setup | -| `--source-auth-token-ref ` | `env:` or `file:` credential reference for source repo auth | +| `--source-auth-token-ref ` | `env:` or `file:` credential reference for source repo auth or Notion integration token | | `--source-url ` | Source service URL for Metabase or Looker | -| `--source-api-key-ref ` | `env:` or `file:` API key reference for Metabase or Notion | +| `--source-api-key-ref ` | `env:` or `file:` API key reference for Metabase | | `--source-client-id ` | Looker client id | | `--source-client-secret-ref ` | `env:` or `file:` Looker client secret reference | | `--source-warehouse-connection-id ` | Warehouse connection id used for context-source mapping | @@ -221,6 +221,14 @@ ktx setup \ --source-warehouse-connection-id warehouse \ --metabase-database-id 1 +# Add a Notion source that crawls selected root pages +ktx setup \ + --source notion \ + --source-connection-id notion-main \ + --source-auth-token-ref env:NOTION_TOKEN \ + --notion-crawl-mode selected_roots \ + --notion-root-page-id abc123def456 + # Install project-scoped agent integration for Codex ktx setup --agents --target codex ``` diff --git a/docs-site/content/docs/configuration/ktx-yaml.mdx b/docs-site/content/docs/configuration/ktx-yaml.mdx index 4a919d45..13105851 100644 --- a/docs-site/content/docs/configuration/ktx-yaml.mdx +++ b/docs-site/content/docs/configuration/ktx-yaml.mdx @@ -66,8 +66,9 @@ read, how to think, and where to put the results. ## Minimal config A working `ktx.yaml` needs one entry in `connections`. Everything else accepts -defaults. The example below is enough for `ktx ingest warehouse` to run a fast -schema scan against a local Postgres. +defaults. The example below registers a local Postgres connection; building +context with `ktx ingest warehouse` also needs a model and embeddings, which +`ktx setup` configures. ```yaml connections: @@ -123,7 +124,7 @@ context-source drivers share the map. Warehouse connections are open objects: the listed fields are validated, and any other field is preserved and passed through to the connector. Use -`enabled_tables` to scope deep ingest to a specific list of +`enabled_tables` to scope ingest to a specific list of `schema.table` names - useful for smoke tests. ```yaml diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index 7402d6d9..66f46a79 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -236,7 +236,7 @@ Testing warehouse Connection test passed Building schema context for warehouse - Running fast database ingest + Running database scan ``` If setup exits early, rerun `ktx setup` in the same directory. **ktx** keeps @@ -268,13 +268,13 @@ Agent integration ready: yes (codex:project) For a structured check inside scripts, use `ktx status --json`. -When setup builds deep context, its final context check looks like: +When setup finishes building context, its final context check looks like: ```text ktx context is ready for agents. Databases: - warehouse: deep context complete + warehouse: database context complete Context sources: dbt_main: memory update complete @@ -326,7 +326,7 @@ ktx setup \ Then build context: ```bash -ktx ingest warehouse --fast +ktx ingest warehouse ``` See [ktx setup](/docs/cli-reference/ktx-setup) for the full automation flag diff --git a/docs-site/content/docs/guides/building-context.mdx b/docs-site/content/docs/guides/building-context.mdx index d6d58053..b806c424 100644 --- a/docs-site/content/docs/guides/building-context.mdx +++ b/docs-site/content/docs/guides/building-context.mdx @@ -24,7 +24,9 @@ external metadata can attach to known warehouse tables. ## Database ingest -Database ingest records table, column, type, constraint, and row-count context. +Database ingest always builds enriched context: tables, columns, types, +constraints, and row counts, plus AI-generated descriptions, embeddings, and +relationship evidence. ```bash # Build one configured database connection @@ -34,23 +36,8 @@ ktx ingest warehouse ktx ingest --all ``` -Depth controls how much context **ktx** builds: - -| Flag | Best for | What it does | -|------|----------|--------------| -| `--fast` | First setup, quick refreshes, CI smoke checks | Deterministic fast ingest with tables, columns, types, constraints, and row counts | -| `--deep` | Agent-ready context for real analysis | Fast ingest plus deep enrichment with descriptions, embeddings, relationship evidence, and optional query history | - -Examples: - -```bash -ktx ingest warehouse --fast -ktx ingest warehouse --deep -ktx ingest --all --deep -``` - -Deep ingest needs LLM and embedding readiness. Otherwise run `ktx setup` or use -`--fast`. +Enriched ingest needs a configured model and embeddings. Run `ktx setup` first; +connections without that configuration fail before any work starts. With `claude-code`, **ktx** agent loops can invoke only the **ktx** MCP tools for the current run. @@ -64,7 +51,7 @@ Enable it during setup, store it under `connections..context.queryHistory`, or request it for one run: ```bash -ktx ingest warehouse --deep --query-history +ktx ingest warehouse --query-history # Set the lookback window for BigQuery or Snowflake query history ktx ingest warehouse --query-history-window-days 30 ``` @@ -74,8 +61,8 @@ for one run. ## Relationship evidence -**ktx** scores relationship candidates during supported deep database ingest. The -public CLI does not expose separate relationship review subcommands. +**ktx** scores relationship candidates during database ingest. The public CLI +does not expose separate relationship review subcommands. ## Context-source ingest @@ -159,7 +146,7 @@ After interactive setup: ```bash ktx status -ktx ingest --all --deep +ktx ingest --all ktx status ``` @@ -176,8 +163,8 @@ ktx wiki "revenue" --json --limit 10 | Symptom | Likely cause | Recovery | |---------|--------------|----------| | Connection not configured | The connection id is missing from `ktx.yaml` | Add it with `ktx setup` | -| Deep readiness is missing | LLM or embeddings are not setup-ready | Run `ktx setup`, or rerun with `--fast` | -| Query history is unsupported | The selected database driver does not expose query history | Run fast ingest without query-history flags | +| Enrichment is not configured | LLM or embeddings are not setup-ready | Run `ktx setup` to configure a model and embeddings | +| Query history is unsupported | The selected database driver does not expose query history | Run ingest without query-history flags | | No connections configured | The project has no entries under `connections` | Run `ktx setup` and add a database or context-source connection | -| Context-source flags have no effect | Depth and query-history flags were supplied for a context-source connector | Use those flags only for database connections | +| Context-source flags have no effect | Query-history flags were supplied for a context-source connector | Use query-history flags only for database connections | | Text ingest stops early | `--fail-fast` stopped on the first failed item | Fix the item or rerun without `--fail-fast` | diff --git a/docs-site/content/docs/guides/serving-agents.mdx b/docs-site/content/docs/guides/serving-agents.mdx index 4c1ced4b..133739b7 100644 --- a/docs-site/content/docs/guides/serving-agents.mdx +++ b/docs-site/content/docs/guides/serving-agents.mdx @@ -111,12 +111,13 @@ non-obvious terms. Agents can refresh context when the user asks them to: ```bash -ktx ingest warehouse --fast +ktx ingest warehouse ktx ingest ktx ingest --file docs/revenue-notes.md --connection-id warehouse ``` -Use `--deep` only when LLM and embedding setup is ready. +Database ingest builds enriched context and requires a configured model and +embeddings; run `ktx setup` first if they are not ready. ## Good agent behavior diff --git a/docs-site/content/docs/integrations/primary-sources.mdx b/docs-site/content/docs/integrations/primary-sources.mdx index 81b8d400..6cb2d26f 100644 --- a/docs-site/content/docs/integrations/primary-sources.mdx +++ b/docs-site/content/docs/integrations/primary-sources.mdx @@ -517,5 +517,5 @@ No authentication required - SQLite is file-based. The file must be readable by | Connection URL appears in git diff | A literal credential URL was written to `ktx.yaml` | Replace it with `env:NAME` or `file:/path/to/secret` and rotate exposed credentials | | Database ingest returns no tables | Schema, database, or project filter is wrong, or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions | | Query history is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun `ktx ingest --query-history` or `ktx setup` | -| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on fast schema context | +| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on schema-level context without column statistics | | Semantic query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test ` and check the `ktx sl query` flags | diff --git a/docs/terminology.md b/docs/terminology.md index 9da59456..4c9ec3cb 100644 --- a/docs/terminology.md +++ b/docs/terminology.md @@ -77,8 +77,6 @@ maintains, validates, and serves that layer. | Connection ref in prose | **connection id** (lowercase, two words) | "connection ID" | | CLI arg/flag literal | `connectionId` (code font) | — | | File path placeholder | `` (code font) | — | -| Fast schema mode | **fast ingest** | schema ingest, schema-only ingest | -| AI-enriched mode | **deep ingest** | AI-enriched ingest | | Ingest of a primary connection | **database ingest** | — | | Ingest of a context-source connection | **context-source ingest** | bare "source ingest" | | Wiki capture | **text ingest** | — | diff --git a/packages/cli/src/commands/ingest-commands.ts b/packages/cli/src/commands/ingest-commands.ts index 9ffd2562..b5efe443 100644 --- a/packages/cli/src/commands/ingest-commands.ts +++ b/packages/cli/src/commands/ingest-commands.ts @@ -29,8 +29,6 @@ export function registerIngestCommands( .usage('[options] [connectionId]') .argument('[connectionId]', 'Configured connection id to ingest (omit to ingest all)') .option('--all', 'Ingest all configured connections', false) - .addOption(new Option('--fast', 'Use deterministic database schema ingest').conflicts('deep')) - .addOption(new Option('--deep', 'Use AI-enriched database ingest').conflicts('fast')) .addOption(new Option('--query-history', 'Include database query-history usage patterns').conflicts('noQueryHistory')) .addOption(new Option('--no-query-history', 'Skip database query-history usage patterns')) .option('--query-history-window-days ', 'Query-history lookback window for this run', parsePositiveIntegerOption) @@ -87,8 +85,6 @@ export function registerIngestCommands( all: selection.kind === 'all', json: options.json === true, inputMode: options.input === false ? 'disabled' : 'auto', - ...(options.fast === true ? { depth: 'fast' as const } : {}), - ...(options.deep === true ? { depth: 'deep' as const } : {}), queryHistory, ...(options.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: options.queryHistoryWindowDays } : {}), cliVersion: context.packageInfo.version, diff --git a/packages/cli/src/commands/setup-commands.ts b/packages/cli/src/commands/setup-commands.ts index 54628346..19f980bd 100644 --- a/packages/cli/src/commands/setup-commands.ts +++ b/packages/cli/src/commands/setup-commands.ts @@ -308,9 +308,14 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo .addOption(new Option('--source-git-url ', 'Git URL for dbt, MetricFlow, or LookML').hideHelp()) .addOption(new Option('--source-branch ', 'Git branch for source setup').hideHelp()) .addOption(new Option('--source-subpath ', 'Repo subpath for source setup').hideHelp()) - .addOption(new Option('--source-auth-token-ref ', 'env: or file: credential ref for source repo auth').hideHelp()) + .addOption( + new Option( + '--source-auth-token-ref ', + 'env: or file: credential ref for source repo auth or Notion integration token', + ).hideHelp(), + ) .addOption(new Option('--source-url ', 'Source service URL for Metabase or Looker').hideHelp()) - .addOption(new Option('--source-api-key-ref ', 'env: or file: API key ref for Metabase or Notion').hideHelp()) + .addOption(new Option('--source-api-key-ref ', 'env: or file: API key ref for Metabase').hideHelp()) .addOption(new Option('--source-client-id ', 'Looker client id').hideHelp()) .addOption(new Option('--source-client-secret-ref ', 'env: or file: Looker client secret ref').hideHelp()) .addOption(new Option('--source-warehouse-connection-id ', 'Mapped warehouse connection id').hideHelp()) diff --git a/packages/cli/src/connection-drivers.ts b/packages/cli/src/connection-drivers.ts new file mode 100644 index 00000000..4f10e663 --- /dev/null +++ b/packages/cli/src/connection-drivers.ts @@ -0,0 +1,21 @@ +import type { KtxProjectConnectionConfig } from './context/project/config.js'; + +const KTX_DATABASE_DRIVER_IDS = new Set([ + 'sqlite', + 'postgres', + 'mysql', + 'clickhouse', + 'sqlserver', + 'bigquery', + 'snowflake', +]); + +export function normalizeConnectionDriver(connection: KtxProjectConnectionConfig): string { + return String(connection.driver ?? '') + .trim() + .toLowerCase(); +} + +export function isDatabaseDriver(driver: string): boolean { + return KTX_DATABASE_DRIVER_IDS.has(driver.trim().toLowerCase()); +} diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index 9a06d39a..4b5be38b 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -88,7 +88,6 @@ export interface ContextBuildArgs { targetConnectionId?: string; all?: boolean; entrypoint?: 'setup' | 'ingest'; - depth?: Extract['depth']; queryHistory?: Extract['queryHistory']; queryHistoryWindowDays?: number; scanMode?: Extract['scanMode']; @@ -371,19 +370,17 @@ function retryCommand(input: { projectDir?: string; entrypoint?: 'setup' | 'ingest'; connectionId?: string; - depth?: 'fast' | 'deep'; queryHistory?: boolean; queryHistoryWindowDays?: number; }): string { const projectPart = input.projectDir ? ` --project-dir ${input.projectDir}` : ''; if (input.entrypoint === 'ingest' && input.connectionId) { - const depthPart = input.depth ? ` --${input.depth}` : ''; const queryHistoryPart = input.queryHistory ? ' --query-history' : ''; const windowPart = input.queryHistory && input.queryHistoryWindowDays !== undefined ? ` --query-history-window-days ${input.queryHistoryWindowDays}` : ''; - return `ktx ingest ${input.connectionId}${projectPart}${depthPart}${queryHistoryPart}${windowPart}`; + return `ktx ingest ${input.connectionId}${projectPart}${queryHistoryPart}${windowPart}`; } return input.projectDir ? `ktx setup --project-dir ${input.projectDir}` : 'ktx setup'; } @@ -746,7 +743,6 @@ function appendRetryIfNeeded(input: { projectDir: input.projectDir, entrypoint: input.entrypoint, connectionId: input.target.connectionId, - depth: input.target.databaseDepth, queryHistory: input.target.queryHistory?.enabled === true, queryHistoryWindowDays: input.target.queryHistory?.windowDays, })}`; @@ -769,7 +765,6 @@ function failureTextForTarget(input: { projectDir: input.projectDir, entrypoint: input.entrypoint, connectionId: input.target.connectionId, - depth: input.target.databaseDepth, queryHistory: input.target.queryHistory?.enabled === true, queryHistoryWindowDays: input.target.queryHistory?.windowDays, })}`, @@ -784,7 +779,6 @@ function failureTextForTarget(input: { projectDir: input.projectDir, entrypoint: input.entrypoint, connectionId: input.target.connectionId, - depth: input.target.databaseDepth, queryHistory: input.target.queryHistory?.enabled === true, queryHistoryWindowDays: input.target.queryHistory?.windowDays, })}`, @@ -868,7 +862,6 @@ export async function runContextBuild( projectDir: args.projectDir, ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), all: args.all ?? true, - ...(args.depth ? { depth: args.depth } : {}), ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), ...(args.scanMode ? { scanMode: args.scanMode } : {}), @@ -935,7 +928,6 @@ export async function runContextBuild( all: args.all ?? true, json: false, inputMode: args.inputMode, - ...(args.depth ? { depth: args.depth } : {}), ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), ...(args.scanMode ? { scanMode: args.scanMode } : {}), diff --git a/packages/cli/src/context/ingest/local-ingest.ts b/packages/cli/src/context/ingest/local-ingest.ts index 2351d420..ec8a72f4 100644 --- a/packages/cli/src/context/ingest/local-ingest.ts +++ b/packages/cli/src/context/ingest/local-ingest.ts @@ -13,6 +13,7 @@ import { localPullConfigForAdapter, type DefaultLocalIngestAdaptersOptions } fro import { createLocalBundleIngestRuntime } from './local-bundle-runtime.js'; import type { MemoryFlowEventSink } from './memory-flow/types.js'; import { buildSyncId } from './raw-sources-paths.js'; +import { ingestReportOutcome } from './reports.js'; import type { IngestReportBody, IngestReportSnapshot } from './reports.js'; import { SqliteBundleIngestStore } from './sqlite-bundle-ingest-store.js'; import type { IngestBundleResult, IngestJobContext, IngestJobPhase, IngestTrigger, SourceAdapter } from './types.js'; @@ -79,7 +80,7 @@ export interface LocalMetabaseFanoutProgress { metabaseDatabaseId: number; targetConnectionId: string; jobId: string; - status: 'done' | 'failed'; + status: 'done' | 'partial' | 'failed'; }): void; } @@ -232,11 +233,11 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise child.report.body.failedWorkUnits.length === 0).length; - if (succeeded === children.length) { + const outcomes = children.map((child) => ingestReportOutcome(child.report)); + if (outcomes.every((outcome) => outcome === 'done')) { return 'all_succeeded'; } - if (succeeded === 0) { + if (outcomes.every((outcome) => outcome === 'error')) { return 'all_failed'; } return 'partial_failure'; @@ -401,12 +402,13 @@ export async function runLocalMetabaseIngest( error, }); } + const childOutcome = ingestReportOutcome(child.report); options.progress?.onMetabaseChildCompleted?.({ metabaseConnectionId, metabaseDatabaseId: childPlan.metabaseDatabaseId, targetConnectionId, jobId: child.report.jobId, - status: child.report.body.failedWorkUnits.length > 0 ? 'failed' : 'done', + status: childOutcome === 'error' ? 'failed' : childOutcome, }); children.push({ jobId: child.report.jobId, diff --git a/packages/cli/src/context/ingest/memory-flow/events.ts b/packages/cli/src/context/ingest/memory-flow/events.ts index 020ce5ae..92cebe0f 100644 --- a/packages/cli/src/context/ingest/memory-flow/events.ts +++ b/packages/cli/src/context/ingest/memory-flow/events.ts @@ -1,5 +1,6 @@ import type { MemoryAction } from '../../../context/memory/types.js'; import type { LocalIngestRunRecord } from '../local-stage-ingest.js'; +import { ingestReportOutcome } from '../reports.js'; import type { IngestReportSnapshot } from '../reports.js'; import type { MemoryFlowActionDetail, @@ -72,7 +73,7 @@ function fullModeMetadata(input: { } function reportStatus(report: IngestReportSnapshot): MemoryFlowReplayInput['status'] { - return report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; + return ingestReportOutcome(report) === 'error' ? 'error' : 'done'; } function reportCreatedEvent(report: IngestReportSnapshot): MemoryFlowEvent { diff --git a/packages/cli/src/context/ingest/reports.ts b/packages/cli/src/context/ingest/reports.ts index ea02a31a..09f92170 100644 --- a/packages/cli/src/context/ingest/reports.ts +++ b/packages/cli/src/context/ingest/reports.ts @@ -146,6 +146,20 @@ export function savedMemoryCountsForReport(report: IngestReportSnapshot): Ingest }; } +/** @internal */ +export type IngestReportOutcome = 'done' | 'partial' | 'error'; + +export function ingestReportOutcome(report: IngestReportSnapshot): IngestReportOutcome { + if (report.body.status === 'failed') { + return 'error'; + } + if (report.body.failedWorkUnits.length === 0) { + return 'done'; + } + const { wikiCount, slCount } = savedMemoryCountsForReport(report); + return wikiCount + slCount > 0 ? 'partial' : 'error'; +} + export function buildStageIndexFromReportBody(jobId: string, connectionId: string, body: IngestReportBody): StageIndex { return { jobId, diff --git a/packages/cli/src/context/project/driver-schemas.ts b/packages/cli/src/context/project/driver-schemas.ts index 6b4dc017..f9a3639f 100644 --- a/packages/cli/src/context/project/driver-schemas.ts +++ b/packages/cli/src/context/project/driver-schemas.ts @@ -30,7 +30,7 @@ function warehouseConnectionSchema(driver: .array(z.string().min(1)) .optional() .describe( - 'Optional allowlist of fully-qualified table names ("schema.table") to ingest. When set, live-database ingest discards any table whose schema-qualified name is not in this list. Useful for smoke-testing deep ingest on a single table.', + 'Optional allowlist of fully-qualified table names ("schema.table") to ingest. When set, live-database ingest discards any table whose schema-qualified name is not in this list. Useful for smoke-testing ingest on a single table.', ), }) .describe( diff --git a/packages/cli/src/ingest-depth.ts b/packages/cli/src/ingest-depth.ts deleted file mode 100644 index b8957763..00000000 --- a/packages/cli/src/ingest-depth.ts +++ /dev/null @@ -1,75 +0,0 @@ -import type { KtxProjectConfig, KtxProjectConnectionConfig } from './context/project/config.js'; - -export type KtxDatabaseContextDepth = 'fast' | 'deep'; - -const KTX_DATABASE_DRIVER_IDS = new Set([ - 'sqlite', - 'postgres', - 'mysql', - 'clickhouse', - 'sqlserver', - 'bigquery', - 'snowflake', -]); - -export function normalizeConnectionDriver(connection: KtxProjectConnectionConfig): string { - return String(connection.driver ?? '') - .trim() - .toLowerCase(); -} - -export function isDatabaseDriver(driver: string): boolean { - return KTX_DATABASE_DRIVER_IDS.has(driver.trim().toLowerCase()); -} - -function connectionContextRecord(connection: KtxProjectConnectionConfig): Record { - const context = connection.context; - return typeof context === 'object' && context !== null && !Array.isArray(context) - ? (context as Record) - : {}; -} - -export function databaseContextDepth(connection: KtxProjectConnectionConfig): KtxDatabaseContextDepth | undefined { - const depth = connectionContextRecord(connection).depth; - return depth === 'fast' || depth === 'deep' ? depth : undefined; -} - -export function withDatabaseContextDepth( - connection: KtxProjectConnectionConfig, - depth: KtxDatabaseContextDepth, -): KtxProjectConnectionConfig { - return { - ...connection, - context: { - ...connectionContextRecord(connection), - depth, - }, - }; -} - -export function deepReadinessGaps(config: KtxProjectConfig): string[] { - const gaps: string[] = []; - if (config.llm.provider.backend === 'none' || !config.llm.models.default) { - gaps.push('model configuration'); - } - - if (config.scan.enrichment.mode !== 'llm') { - gaps.push('scan enrichment mode'); - } - - const embeddings = config.scan.enrichment.embeddings; - if ( - !embeddings || - embeddings.backend === 'none' || - !embeddings.model || - embeddings.dimensions <= 0 - ) { - gaps.push('scan embeddings'); - } - - return gaps; -} - -export function recommendedDatabaseContextDepth(config: KtxProjectConfig): KtxDatabaseContextDepth { - return deepReadinessGaps(config).length === 0 ? 'deep' : 'fast'; -} diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index fb8c9a29..ad5ba270 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -2,7 +2,7 @@ import { buildMemoryFlowViewModel } from './context/ingest/memory-flow/view-mode import { createMemoryFlowLiveBuffer, sanitizeMemoryFlowError } from './context/ingest/memory-flow/live-buffer.js'; import { formatMemoryFlowFinalSummary } from './context/ingest/memory-flow/summary.js'; import { getLatestLocalIngestStatus, getLocalIngestStatus, type LocalMetabaseFanoutResult, type LocalMetabaseFanoutProgress, type RunLocalIngestOptions, runLocalIngest, runLocalMetabaseIngest } from './context/ingest/local-ingest.js'; -import { type IngestReportSnapshot, savedMemoryCountsForReport } from './context/ingest/reports.js'; +import { type IngestReportSnapshot, ingestReportOutcome, savedMemoryCountsForReport } from './context/ingest/reports.js'; import { ingestReportToMemoryFlowReplay } from './context/ingest/memory-flow/events.js'; import type { MemoryFlowEvent, MemoryFlowReplayInput } from './context/ingest/memory-flow/types.js'; import { renderMemoryFlowReplay } from './context/ingest/memory-flow/render.js'; @@ -93,10 +93,6 @@ export interface KtxIngestDeps { runtimeIo?: KtxIngestIo; } -function reportStatus(report: IngestReportSnapshot): 'done' | 'error' { - return report.body.status === 'failed' || report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; -} - const REPORT_SOURCE_LABELS = new Map([ ['live-database', 'Database schema'], ['historic-sql', 'Query history'], @@ -193,7 +189,7 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void if (report.body.tracePath) { io.stdout.write(`Trace: ${report.body.tracePath}\n`); } - io.stdout.write(`Status: ${reportStatus(report)}\n`); + io.stdout.write(`Status: ${ingestReportOutcome(report)}\n`); io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`); io.stdout.write(`Connection: ${report.connectionId}\n`); io.stdout.write(`Sync: ${report.body.syncId}\n`); @@ -231,7 +227,7 @@ function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIng } io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`); for (const child of result.children) { - const status = reportStatus(child.report); + const status = ingestReportOutcome(child.report); io.stdout.write( `- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId} report=${child.report.id}\n`, ); @@ -595,7 +591,7 @@ function initialRunMemoryFlowInput( } function finalRunMemoryFlowInput(snapshot: MemoryFlowReplayInput, report: IngestReportSnapshot): MemoryFlowReplayInput { - const status = reportStatus(report); + const status = ingestReportOutcome(report) === 'error' ? 'error' : 'done'; return { ...snapshot, runId: report.runId, @@ -777,7 +773,7 @@ export async function runKtxIngest( } finally { plainProgress?.flush(); } - return result.status === 'all_succeeded' ? 0 : 1; + return result.status === 'all_failed' ? 1 : 0; } const jobId = deps.jobIdFactory?.(); @@ -846,7 +842,7 @@ export async function runKtxIngest( liveTui?.close(); liveTui = null; io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot)); - return reportStatus(result.report) === 'done' ? 0 : 1; + return ingestReportOutcome(result.report) === 'error' ? 1 : 0; } plainProgress?.flush(); await writeReportRecord(result.report, runOutputMode, io, { @@ -854,7 +850,7 @@ export async function runKtxIngest( renderStoredMemoryFlow: deps.renderStoredMemoryFlow, env, }); - return reportStatus(result.report) === 'done' ? 0 : 1; + return ingestReportOutcome(result.report) === 'error' ? 1 : 0; } finally { plainProgress?.flush(); liveTui?.close(); diff --git a/packages/cli/src/public-ingest-copy.ts b/packages/cli/src/public-ingest-copy.ts index be1206c1..86423f74 100644 --- a/packages/cli/src/public-ingest-copy.ts +++ b/packages/cli/src/public-ingest-copy.ts @@ -12,7 +12,7 @@ const DATABASE_INGEST_REPLACEMENTS: Array<[RegExp, string]> = [ 'Database enrichment failed after schema context completed', ], [/\bstructural scan\b/gi, 'schema context'], - [/\benriched scan\b/gi, 'deep database ingest'], + [/\benriched scan\b/gi, 'database ingest'], [/\bscan results\b/gi, 'database context'], ]; diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index 60bceecd..25fe30dd 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -1,16 +1,10 @@ import { getKtxCliPackageInfo } from './cli-runtime.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; -import type { KtxProjectConnectionConfig } from './context/project/config.js'; +import type { KtxProjectConfig, KtxProjectConnectionConfig } from './context/project/config.js'; import type { KtxProgressPort } from './context/scan/types.js'; import type { KtxCliIo } from './index.js'; import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js'; -import { - type KtxDatabaseContextDepth, - databaseContextDepth, - deepReadinessGaps, - isDatabaseDriver, - normalizeConnectionDriver, -} from './ingest-depth.js'; +import { isDatabaseDriver, normalizeConnectionDriver } from './connection-drivers.js'; import { ensureManagedPythonCommandRuntime, type KtxManagedPythonInstallPolicy, @@ -29,7 +23,6 @@ profileMark('module:public-ingest'); type KtxPublicIngestStepName = 'database-schema' | 'query-history' | 'source-ingest' | 'memory-update'; type KtxPublicIngestStepStatus = 'done' | 'skipped' | 'failed' | 'not-run'; type KtxPublicIngestInputMode = 'auto' | 'disabled'; -type KtxPublicIngestDepth = KtxDatabaseContextDepth; type KtxPublicIngestQueryHistoryFlag = 'default' | 'enabled' | 'disabled'; type HistoricSqlDialect = 'postgres' | 'bigquery' | 'snowflake'; @@ -41,7 +34,6 @@ export type KtxPublicIngestArgs = all: boolean; json: boolean; inputMode: KtxPublicIngestInputMode; - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; scanMode?: Extract['mode']; @@ -58,7 +50,6 @@ export interface KtxPublicIngestPlanTarget { sourceDir?: string; debugCommand: string; steps: KtxPublicIngestStepName[]; - databaseDepth?: KtxPublicIngestDepth; detectRelationships?: boolean; preflightFailure?: string; queryHistory?: { @@ -67,7 +58,6 @@ export interface KtxPublicIngestPlanTarget { windowDays?: number; pullConfig?: Record; unsupported?: boolean; - skippedStoredByFast?: boolean; }; } @@ -121,7 +111,6 @@ interface KtxPublicContextBuildArgs { inputMode: 'auto' | 'disabled'; targetConnectionId?: string; all?: boolean; - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; scanMode?: Extract['mode']; @@ -154,7 +143,6 @@ interface KtxUnsupportedQueryHistoryWarning { interface KtxPublicIngestWarningAccumulator { warnings: string[]; - ignoredDepthForSources: string[]; ignoredQueryHistoryForSources: string[]; unsupportedQueryHistoryForDatabases: KtxUnsupportedQueryHistoryWarning[]; } @@ -162,7 +150,6 @@ interface KtxPublicIngestWarningAccumulator { function createWarningAccumulator(): KtxPublicIngestWarningAccumulator { return { warnings: [], - ignoredDepthForSources: [], ignoredQueryHistoryForSources: [], unsupportedQueryHistoryForDatabases: [], }; @@ -233,7 +220,6 @@ function finalizeWarnings( accumulator: KtxPublicIngestWarningAccumulator, args: { all: boolean; - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; }, @@ -242,11 +228,6 @@ function finalizeWarnings( ...accumulator.warnings, ...unsupportedQueryHistoryWarnings(accumulator.unsupportedQueryHistoryForDatabases, args.all), ]; - const depthOption = args.depth ? `--${args.depth}` : null; - if (depthOption) { - const warning = sourceIgnoredWarning(depthOption, accumulator.ignoredDepthForSources, args.all); - if (warning) warnings.push(warning); - } if (args.queryHistory === 'enabled' || args.queryHistoryWindowDays !== undefined) { const warning = sourceIgnoredWarning('--query-history', accumulator.ignoredQueryHistoryForSources, args.all); if (warning) warnings.push(warning); @@ -317,13 +298,12 @@ function resolveDatabaseTargetOptions(input: { driver: string; connection: KtxProjectConnectionConfig; args: { - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; scanMode?: Extract['mode']; }; warnings: KtxPublicIngestWarningAccumulator; -}): Pick { +}): Pick { const storedQh = storedQueryHistory(input.connection); const dialect = queryHistoryDialectByDriver.get(input.driver); const explicitQueryHistory = input.args.queryHistory ?? 'default'; @@ -332,7 +312,6 @@ function resolveDatabaseTargetOptions(input: { const requestedQh = explicitQueryHistory === 'enabled' || (explicitQueryHistory !== 'disabled' && (windowOverrideRequested || storedEnabled)); - let depth = input.args.depth ?? databaseContextDepth(input.connection) ?? 'fast'; const queryHistory = { enabled: false, ...(input.args.queryHistoryWindowDays !== undefined @@ -350,19 +329,13 @@ function resolveDatabaseTargetOptions(input: { explicitQueryHistory === 'enabled' || input.args.queryHistoryWindowDays !== undefined ? 'explicit' : 'stored', }); return { - databaseDepth: depth, queryHistory: { ...queryHistory, unsupported: true }, steps: ['database-schema'], }; } if (requestedQh && dialect) { - if (depth === 'fast') { - input.warnings.warnings.push(`--query-history requires deep ingest; running ${input.connectionId} with --deep.`); - } - depth = 'deep'; return { - databaseDepth: depth, queryHistory: { ...queryHistory, enabled: true, @@ -378,30 +351,35 @@ function resolveDatabaseTargetOptions(input: { }; } - if (input.args.depth === 'fast' && explicitQueryHistory !== 'enabled' && storedEnabled) { - input.warnings.warnings.push( - `${input.connectionId} has query history enabled in ktx.yaml, but --fast skips query-history processing.`, - ); - return { - databaseDepth: 'fast', - queryHistory: { ...queryHistory, skippedStoredByFast: true }, - steps: ['database-schema'], - }; - } - return { - databaseDepth: depth, queryHistory, steps: ['database-schema'], }; } +function enrichmentReadinessGaps(config: KtxProjectConfig): string[] { + const gaps: string[] = []; + if (config.llm.provider.backend === 'none' || !config.llm.models.default) { + gaps.push('model configuration'); + } + + if (config.scan.enrichment.mode !== 'llm') { + gaps.push('scan enrichment mode'); + } + + const embeddings = config.scan.enrichment.embeddings; + if (!embeddings || embeddings.backend === 'none' || !embeddings.model || embeddings.dimensions <= 0) { + gaps.push('scan embeddings'); + } + + return gaps; +} + function targetForConnection( connectionId: string, connection: KtxProjectConnectionConfig, projectConfig: KtxPublicIngestProject['config'], args: { - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; scanMode?: Extract['mode']; @@ -412,9 +390,6 @@ function targetForConnection( const adapter = sourceAdapterByDriver.get(driver); const sourceDir = sourceDirForConnection(connection); if (adapter) { - if (args.depth) { - warnings.ignoredDepthForSources.push(connectionId); - } if (args.queryHistory === 'enabled' || args.queryHistoryWindowDays !== undefined) { warnings.ignoredQueryHistoryForSources.push(connectionId); } @@ -431,18 +406,18 @@ function targetForConnection( if (isDatabaseDriver(driver)) { const options = resolveDatabaseTargetOptions({ connectionId, driver, connection, args, warnings }); - const gaps = options.databaseDepth === 'deep' ? deepReadinessGaps(projectConfig) : []; + const gaps = enrichmentReadinessGaps(projectConfig); return { connectionId, driver, operation: 'database-ingest', debugCommand: `ktx ingest ${connectionId} --debug`, - detectRelationships: options.databaseDepth === 'deep' && projectConfig.scan.relationships.enabled, + detectRelationships: projectConfig.scan.relationships.enabled, ...(gaps.length > 0 ? { - preflightFailure: `${connectionId} requires deep ingest readiness: ${gaps.join( + preflightFailure: `${connectionId} cannot be ingested: enrichment is not configured (${gaps.join( ', ', - )}. Run ktx setup or rerun with --fast.`, + )}). Run ktx setup to configure a model and embeddings.`, } : {}), ...options, @@ -458,7 +433,6 @@ export function buildPublicIngestPlan( projectDir: string; targetConnectionId?: string; all: boolean; - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; scanMode?: Extract['mode']; @@ -522,13 +496,12 @@ function retryCommandForTarget( args: Extract, ): string { const projectPart = ` --project-dir ${args.projectDir}`; - const depthPart = target.databaseDepth ? ` --${target.databaseDepth}` : ''; const queryHistoryPart = target.queryHistory?.enabled === true ? ' --query-history' : ''; const windowPart = target.queryHistory?.enabled === true && target.queryHistory.windowDays !== undefined ? ` --query-history-window-days ${target.queryHistory.windowDays}` : ''; - return `ktx ingest ${target.connectionId}${projectPart}${depthPart}${queryHistoryPart}${windowPart}`; + return `ktx ingest ${target.connectionId}${projectPart}${queryHistoryPart}${windowPart}`; } function trimTrailingPeriod(value: string): string { @@ -830,7 +803,7 @@ export async function executePublicIngestTarget( command: 'run', projectDir: args.projectDir, connectionId: target.connectionId, - mode: target.databaseDepth === 'deep' ? 'enriched' : 'structural', + mode: 'enriched', detectRelationships: target.detectRelationships === true, dryRun: false, ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), @@ -979,7 +952,6 @@ export async function runKtxPublicIngest( all: args.all, entrypoint: 'ingest', inputMode: args.inputMode, - ...(args.depth ? { depth: args.depth } : {}), ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), ...(args.scanMode ? { scanMode: args.scanMode } : {}), diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index dc289278..63b4dbdf 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -7,12 +7,7 @@ import { serializeKtxProjectConfig } from './context/project/config.js'; import type { KtxCliIo } from './cli-runtime.js'; import { errorMessage, writePrefixedLines } from './clack.js'; import { buildPublicIngestPlan } from './public-ingest.js'; -import { - type KtxDatabaseContextDepth, - databaseContextDepth, -} from './ingest-depth.js'; import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; -import { ensureSetupDatabaseContextDepths } from './setup-database-context-depth.js'; import { type ContextBuildSourceProgressUpdate, runContextBuild, @@ -353,16 +348,6 @@ async function readLatestScanReport(projectDir: string, connectionId: string): P return reports.at(-1)?.report ?? null; } -function scanReportHasSchemaManifest(report: unknown, connectionId: string): boolean { - if (!isRecord(report)) { - return false; - } - if (report.connectionId !== connectionId || report.dryRun === true) { - return false; - } - return stringArrayValue(isRecord(report.artifactPaths) ? report.artifactPaths.manifestShards : undefined).length > 0; -} - function scanReportHasCompletedDeepEnrichment( report: unknown, connectionId: string, @@ -389,18 +374,6 @@ function scanReportHasCompletedDeepEnrichment( ); } -function scanReportSatisfiesDepth(input: { - report: unknown; - connectionId: string; - depth: KtxDatabaseContextDepth; - relationshipsRequired: boolean; -}): boolean { - if (input.depth === 'fast') { - return scanReportHasSchemaManifest(input.report, input.connectionId); - } - return scanReportHasCompletedDeepEnrichment(input.report, input.connectionId, input.relationshipsRequired); -} - async function verifyPrimarySourceScans( project: KtxLocalProject, connectionIds: string[], @@ -408,15 +381,9 @@ async function verifyPrimarySourceScans( const details: string[] = []; const relationshipsRequired = project.config.scan.relationships.enabled; for (const connectionId of connectionIds) { - const connection = project.config.connections[connectionId]; - const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast'; const report = await readLatestScanReport(project.projectDir, connectionId); - if (!scanReportSatisfiesDepth({ report, connectionId, depth, relationshipsRequired })) { - details.push( - depth === 'fast' - ? `${connectionId}: schema context has not completed.` - : `${connectionId}: deep database context has not completed.`, - ); + if (!scanReportHasCompletedDeepEnrichment(report, connectionId, relationshipsRequired)) { + details.push(`${connectionId}: database context has not completed.`); } } return { ready: details.length === 0, details }; @@ -482,7 +449,6 @@ function writeSkippedContext(projectDir: string, io: KtxCliIo): void { } function writeSuccess( - project: KtxLocalProject, readiness: KtxSetupContextReadiness, targets: KtxSetupContextTargets, io: KtxCliIo, @@ -493,9 +459,7 @@ function writeSuccess( io.stdout.write(' none\n'); } else { for (const connectionId of targets.primarySourceConnectionIds) { - const connection = project.config.connections[connectionId]; - const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast'; - io.stdout.write(` ${connectionId}: ${depth === 'deep' ? 'deep context complete' : 'schema context complete'}\n`); + io.stdout.write(` ${connectionId}: database context complete\n`); } } io.stdout.write('\nContext sources:\n'); @@ -636,7 +600,7 @@ async function runBuild( failureReason: undefined, ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); - writeSuccess(project, readiness, targets, io); + writeSuccess(readiness, targets, io); return { status: 'ready', projectDir: args.projectDir, runId }; } @@ -678,17 +642,8 @@ export async function runKtxSetupContextStep( deps: KtxSetupContextDeps = {}, ): Promise { try { - let project = await loadKtxProject({ projectDir: args.projectDir }); + const project = await loadKtxProject({ projectDir: args.projectDir }); const prompts = deps.prompts ?? createPromptAdapter(); - const depthProject = await ensureSetupDatabaseContextDepths({ - project, - args, - prompts, - }); - if (depthProject === 'back') { - return { status: 'back', projectDir: args.projectDir }; - } - project = depthProject; const existingState = await readKtxSetupContextState(args.projectDir); const completedSteps = (await readKtxSetupState(args.projectDir)).completed_steps; if (completedSteps.includes('context') && existingState.status === 'completed') { diff --git a/packages/cli/src/setup-database-context-depth.ts b/packages/cli/src/setup-database-context-depth.ts deleted file mode 100644 index 20df813c..00000000 --- a/packages/cli/src/setup-database-context-depth.ts +++ /dev/null @@ -1,131 +0,0 @@ -import { writeFile } from 'node:fs/promises'; -import { type KtxLocalProject, loadKtxProject } from './context/project/project.js'; -import { type KtxProjectConnectionConfig, serializeKtxProjectConfig } from './context/project/config.js'; -import { - type KtxDatabaseContextDepth, - databaseContextDepth, - deepReadinessGaps, - isDatabaseDriver, - normalizeConnectionDriver, - recommendedDatabaseContextDepth, - withDatabaseContextDepth, -} from './ingest-depth.js'; -import type { KtxSetupPromptOption } from './setup-prompts.js'; - -export interface KtxSetupDatabaseContextDepthArgs { - inputMode: 'auto' | 'disabled'; -} - -export interface KtxSetupDatabaseContextDepthPromptAdapter { - select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; -} - -function databaseConnectionsNeedingDepth(project: KtxLocalProject): string[] { - return Object.entries(project.config.connections) - .filter(([, connection]) => isDatabaseDriver(normalizeConnectionDriver(connection))) - .filter(([, connection]) => databaseContextDepth(connection) === undefined) - .map(([connectionId]) => connectionId) - .sort((left, right) => left.localeCompare(right)); -} - -async function chooseSetupDatabaseContextDepth(input: { - project: KtxLocalProject; - args: KtxSetupDatabaseContextDepthArgs; - prompts: KtxSetupDatabaseContextDepthPromptAdapter; -}): Promise { - const recommended = recommendedDatabaseContextDepth(input.project.config); - if (input.args.inputMode === 'disabled') { - return recommended; - } - - const deepReady = deepReadinessGaps(input.project.config).length === 0; - const options = - recommended === 'deep' - ? [ - { - value: 'deep', - label: 'Deep: AI descriptions, embeddings, relationships, slower', - hint: 'recommended', - }, - { value: 'fast', label: 'Fast: schema only, no AI, quickest' }, - { value: 'back', label: 'Back' }, - ] - : [ - { value: 'fast', label: 'Fast: schema only, no AI, quickest', hint: 'recommended' }, - { value: 'deep', label: 'Deep: AI descriptions, embeddings, relationships, slower' }, - { value: 'back', label: 'Back' }, - ]; - - const choice = await input.prompts.select({ - message: - 'How much database context should KTX build?\n\n' + - (deepReady - ? 'Deep is available because model, embedding, and scan enrichment are configured.' - : 'Fast is recommended because model, embedding, or scan enrichment is not configured.'), - options, - }); - if (choice === 'back') { - return 'back'; - } - if (choice === 'fast' || choice === 'deep') { - return choice; - } - return recommended; -} - -async function writeDatabaseContextDepths( - project: KtxLocalProject, - connectionIds: string[], - depth: KtxDatabaseContextDepth, -): Promise { - if (connectionIds.length === 0) { - return project; - } - const nextConnections = { ...project.config.connections }; - for (const connectionId of connectionIds) { - const connection = nextConnections[connectionId]; - if (connection) { - nextConnections[connectionId] = withDatabaseContextDepth(connection, depth); - } - } - const nextConfig = { ...project.config, connections: nextConnections }; - await writeFile(project.configPath, serializeKtxProjectConfig(nextConfig), 'utf-8'); - return await loadKtxProject({ projectDir: project.projectDir }); -} - -export async function ensureSetupDatabaseContextDepths(input: { - project: KtxLocalProject; - args: KtxSetupDatabaseContextDepthArgs; - prompts: KtxSetupDatabaseContextDepthPromptAdapter; -}): Promise { - const missingDepthConnectionIds = databaseConnectionsNeedingDepth(input.project); - if (missingDepthConnectionIds.length === 0) { - return input.project; - } - - const depth = await chooseSetupDatabaseContextDepth(input); - if (depth === 'back') { - return 'back'; - } - return await writeDatabaseContextDepths(input.project, missingDepthConnectionIds, depth); -} - -export async function applySetupDatabaseContextDepth(input: { - project: KtxLocalProject; - connection: KtxProjectConnectionConfig; - args: KtxSetupDatabaseContextDepthArgs; - prompts: KtxSetupDatabaseContextDepthPromptAdapter; -}): Promise { - if ( - !isDatabaseDriver(normalizeConnectionDriver(input.connection)) || - databaseContextDepth(input.connection) !== undefined - ) { - return input.connection; - } - - const depth = await chooseSetupDatabaseContextDepth(input); - if (depth === 'back') { - return 'back'; - } - return withDatabaseContextDepth(input.connection, depth); -} diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index eb364228..09db1bde 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -29,7 +29,6 @@ import { } from './database-tree-picker.js'; import { withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; import { runKtxScan } from './scan.js'; -import { applySetupDatabaseContextDepth } from './setup-database-context-depth.js'; import { writeProjectLocalSecretReference } from './setup-secrets.js'; import { isDemoConnection } from './telemetry/demo-detect.js'; import { emitTelemetryEvent } from './telemetry/index.js'; @@ -1614,45 +1613,10 @@ async function applyHistoricSqlConfigToExistingConnection(input: { prompts: input.prompts, }); if (withHistoricSql === 'back') return 'back'; - const withContextDepth = await maybeApplyContextDepthConfig({ - projectDir: input.projectDir, - connectionId: input.connectionId, - connection: withHistoricSql, - args: input.args, - prompts: input.prompts, - }); - if (withContextDepth === 'back') return 'back'; await writeConnectionConfig({ projectDir: input.projectDir, connectionId: input.connectionId, - connection: withContextDepth, - }); -} - -async function maybeApplyContextDepthConfig(input: { - projectDir: string; - connectionId: string; - connection: KtxProjectConnectionConfig; - args: KtxSetupDatabasesArgs; - prompts: KtxSetupDatabasesPromptAdapter; -}): Promise { - const project = await loadKtxProject({ projectDir: input.projectDir }); - return await applySetupDatabaseContextDepth({ - project: { - ...project, - config: { - ...project.config, - connections: { - ...project.config.connections, - [input.connectionId]: input.connection, - }, - }, - }, - connection: input.connection, - args: { - inputMode: input.args.inputMode === 'disabled' || input.args.databaseUrl ? 'disabled' : input.args.inputMode, - }, - prompts: input.prompts, + connection: withHistoricSql, }); } @@ -1698,7 +1662,7 @@ async function validateAndScanConnection(input: { deps: input.deps, }); writeSetupSection(input.io, `Building schema context for ${input.connectionId}`, [ - 'Running fast database ingest…', + 'Running database scan…', ]); let scanIo = createBufferedCommandIo(); let scanCode = await scanConnection(input.projectDir, input.connectionId, scanIo); @@ -1708,7 +1672,7 @@ async function validateAndScanConnection(input: { writePrefixedLines( (chunk) => input.io.stderr.write(chunk), [ - `Fast database ingest failed for ${input.connectionId}.`, + `Database scan failed for ${input.connectionId}.`, 'Native SQLite is built for a different Node.js ABI.', `Detail: ${nativeSqliteDetail}`, 'Rebuilding Native SQLite with pnpm run native:rebuild…', @@ -1719,7 +1683,7 @@ async function validateAndScanConnection(input: { if (rebuildCode === 0) { writePrefixedLines( (chunk) => input.io.stderr.write(chunk), - 'Native SQLite rebuild complete. Retrying fast database ingest…', + 'Native SQLite rebuild complete. Retrying database scan…', ); const retryScanIo = createBufferedCommandIo(); scanCode = await scanConnection(input.projectDir, input.connectionId, retryScanIo); @@ -1730,10 +1694,10 @@ async function validateAndScanConnection(input: { (chunk) => input.io.stderr.write(chunk), [ rebuildCode === 0 - ? `Fast database ingest still failed for ${input.connectionId} after rebuilding Native SQLite.` + ? `Database scan still failed for ${input.connectionId} after rebuilding Native SQLite.` : `Native SQLite rebuild failed for ${input.connectionId}.`, 'Fix: pnpm run native:rebuild', - `Retry: ktx ingest ${input.connectionId} --project-dir ${input.projectDir} --fast`, + `Retry: ktx ingest ${input.connectionId} --project-dir ${input.projectDir}`, ].join('\n'), ); } @@ -1742,8 +1706,8 @@ async function validateAndScanConnection(input: { writePrefixedLines( (chunk) => input.io.stderr.write(chunk), [ - `Fast database ingest failed for ${input.connectionId}.`, - `Debug command: ktx ingest ${input.connectionId} --project-dir ${input.projectDir} --fast --debug`, + `Database scan failed for ${input.connectionId}.`, + `Debug command: ktx ingest ${input.connectionId} --project-dir ${input.projectDir} --debug`, ].join('\n'), ); } @@ -2167,22 +2131,10 @@ export async function runKtxSetupDatabasesStep( returnToDriverSelection = true; break; } - const withContextDepth = await maybeApplyContextDepthConfig({ - projectDir: args.projectDir, - connectionId: connectionChoice.connectionId, - connection: withHistoricSql, - args, - prompts, - }); - if (withContextDepth === 'back') { - if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; - returnToDriverSelection = true; - break; - } await writeConnectionConfig({ projectDir: args.projectDir, connectionId: connectionChoice.connectionId, - connection: withContextDepth, + connection: withHistoricSql, io, }); } else { @@ -2193,22 +2145,10 @@ export async function runKtxSetupDatabasesStep( returnToDriverSelection = true; break; } - const withContextDepth = await maybeApplyContextDepthConfig({ - projectDir: args.projectDir, - connectionId: connectionChoice.connectionId, - connection: withHistoricSql, - args, - prompts, - }); - if (withContextDepth === 'back') { - if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; - returnToDriverSelection = true; - break; - } await writeConnectionConfig({ projectDir: args.projectDir, connectionId: connectionChoice.connectionId, - connection: withContextDepth, + connection: withHistoricSql, io, }); } @@ -2291,22 +2231,10 @@ export async function runKtxSetupDatabasesStep( returnToDriverSelection = true; break; } - const withContextDepth = await maybeApplyContextDepthConfig({ - projectDir: args.projectDir, - connectionId: connectionChoice.connectionId, - connection: withHistoricSql, - args, - prompts, - }); - if (withContextDepth === 'back') { - if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; - returnToDriverSelection = true; - break; - } await writeConnectionConfig({ projectDir: args.projectDir, connectionId: connectionChoice.connectionId, - connection: withContextDepth, + connection: withHistoricSql, io, }); setupStatus = await validateAndScanConnection({ diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index dea1cd43..4f0a94bc 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -217,6 +217,39 @@ function credentialRef(value: string | undefined, label: string): string { return ref; } +type SourceCredentialFlag = { + field: 'sourceAuthTokenRef' | 'sourceApiKeyRef' | 'sourceClientSecretRef'; + flag: string; +}; + +// Each connector reads exactly one credential ref; the flag name mirrors the +// ktx.yaml field it writes (auth_token_ref / api_key_ref / client_secret_ref). +const SOURCE_CREDENTIAL_FLAG: Record = { + dbt: { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, + metricflow: { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, + lookml: { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, + notion: { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, + metabase: { field: 'sourceApiKeyRef', flag: '--source-api-key-ref' }, + looker: { field: 'sourceClientSecretRef', flag: '--source-client-secret-ref' }, +}; + +const ALL_SOURCE_CREDENTIAL_FLAGS: SourceCredentialFlag[] = [ + { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, + { field: 'sourceApiKeyRef', flag: '--source-api-key-ref' }, + { field: 'sourceClientSecretRef', flag: '--source-client-secret-ref' }, +]; + +// Reject a credential ref flag the chosen source does not read, so a wrong flag +// fails loudly instead of being silently dropped (KLO-724). +function assertSourceCredentialFlags(source: KtxSetupSourceType, args: KtxSetupSourcesArgs): void { + const allowed = SOURCE_CREDENTIAL_FLAG[source]; + for (const { field, flag } of ALL_SOURCE_CREDENTIAL_FLAGS) { + if (args[field] && field !== allowed.field) { + throw new Error(`${flag} does not apply to --source ${source}; use ${allowed.flag}.`); + } + } +} + async function chooseSourceCredentialRef(input: { prompts: KtxSetupSourcesPromptAdapter; projectDir: string; @@ -515,7 +548,7 @@ function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC } return { driver: 'notion', - auth_token_ref: credentialRef(args.sourceApiKeyRef, 'Notion token ref'), + auth_token_ref: credentialRef(args.sourceAuthTokenRef, 'Notion token ref'), crawl_mode: crawlMode, ...(rootPageIds.length > 0 ? { root_page_ids: rootPageIds } : {}), root_database_ids: [], @@ -1295,10 +1328,10 @@ async function promptForInteractiveSource( label: 'Notion integration token', envName: 'NOTION_TOKEN', secretFileName: `${currentState.sourceConnectionId ?? 'notion-main'}-token`, - existingRef: currentState.sourceApiKeyRef, + existingRef: currentState.sourceAuthTokenRef, }); if (ref === 'back') return 'back'; - currentState.sourceApiKeyRef = ref; + currentState.sourceAuthTokenRef = ref; return 'next'; }, async (currentState) => { @@ -1326,7 +1359,7 @@ async function promptForInteractiveSource( connectionId, connection: { driver: 'notion', - auth_token_ref: credentialRef(currentState.sourceApiKeyRef, 'Notion token ref'), + auth_token_ref: credentialRef(currentState.sourceAuthTokenRef, 'Notion token ref'), crawl_mode: 'selected_roots', root_page_ids: currentState.notionRootPageIds ?? [], root_database_ids: [], @@ -1516,7 +1549,7 @@ function sourceArgsFromExistingConnection(input: { return sourceArgs; } - sourceArgs.sourceApiKeyRef = stringField(input.connection.auth_token_ref); + sourceArgs.sourceAuthTokenRef = stringField(input.connection.auth_token_ref); sourceArgs.notionCrawlMode = input.connection.crawl_mode === 'all_accessible' ? 'all_accessible' : 'selected_roots'; if (Array.isArray(input.connection.root_page_ids)) { @@ -1817,6 +1850,10 @@ export async function runKtxSetupSourcesStep( return { status: 'skipped', projectDir: args.projectDir }; } + if (args.source) { + assertSourceCredentialFlags(args.source, args); + } + const prompts = deps.prompts ?? createPromptAdapter(); const project = await loadKtxProject({ projectDir: args.projectDir }); if (!hasPrimarySource(project.config)) { diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 74056542..ebc04c87 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -1,7 +1,7 @@ import { existsSync } from 'node:fs'; import { basename, join, resolve } from 'node:path'; import { getLatestLocalIngestStatus } from './context/ingest/local-ingest.js'; -import { savedMemoryCountsForReport } from './context/ingest/reports.js'; +import { ingestReportOutcome, savedMemoryCountsForReport } from './context/ingest/reports.js'; import { ktxLocalStateDbPath } from './context/project/local-state-db.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; import { readKtxSetupState } from './context/project/setup-config.js'; @@ -306,7 +306,7 @@ function sourceConnections(config: Awaited>['c type LocalIngestStatusReport = NonNullable>>; function reportHasSavedContext(report: LocalIngestStatusReport): boolean { - if (report.body.failedWorkUnits.length > 0) { + if (ingestReportOutcome(report) === 'error') { return false; } const counts = savedMemoryCountsForReport(report); diff --git a/packages/cli/src/telemetry/events.schema.json b/packages/cli/src/telemetry/events.schema.json index 13642c49..628c8f4b 100644 --- a/packages/cli/src/telemetry/events.schema.json +++ b/packages/cli/src/telemetry/events.schema.json @@ -365,7 +365,6 @@ "embeddings", "secrets", "databases", - "database-context-depth", "sources", "context", "agents", diff --git a/packages/cli/src/telemetry/events.ts b/packages/cli/src/telemetry/events.ts index e73001ed..5e5b5335 100644 --- a/packages/cli/src/telemetry/events.ts +++ b/packages/cli/src/telemetry/events.ts @@ -38,7 +38,6 @@ const setupStepSchema = telemetryCommonEnvelopeSchema 'embeddings', 'secrets', 'databases', - 'database-context-depth', 'sources', 'context', 'agents', diff --git a/packages/cli/test/context-build-view.test.ts b/packages/cli/test/context-build-view.test.ts index 5936afa9..40e33606 100644 --- a/packages/cli/test/context-build-view.test.ts +++ b/packages/cli/test/context-build-view.test.ts @@ -228,11 +228,11 @@ describe('renderContextBuildView', () => { const rendered = renderContextBuildView(state, { styled: false, - warnings: ['--deep affects database ingest only; ignoring it for docs.'], + warnings: ['--query-history affects database ingest only; ignoring it for docs.'], }); expect(rendered).toContain('Warnings:'); - expect(rendered).toContain('--deep affects database ingest only; ignoring it for docs.'); + expect(rendered).toContain('--query-history affects database ingest only; ignoring it for docs.'); }); it('renders public notices in the foreground view before warnings', () => { @@ -243,7 +243,6 @@ describe('renderContextBuildView', () => { operation: 'database-ingest', debugCommand: 'ktx ingest warehouse --debug', steps: ['database-schema', 'query-history'], - databaseDepth: 'deep', detectRelationships: true, queryHistory: { enabled: true, dialect: 'postgres' }, }, @@ -252,12 +251,12 @@ describe('renderContextBuildView', () => { const rendered = renderContextBuildView(state, { styled: false, notices: ['Schema ingest runs before query history for warehouse.'], - warnings: ['--query-history requires deep ingest; running warehouse with --deep.'], + warnings: ['--query-history is not supported for sqlite; running schema ingest for local.'], }); expect(rendered.indexOf('Notices:')).toBeLessThan(rendered.indexOf('Warnings:')); expect(rendered).toContain('Schema ingest runs before query history for warehouse.'); - expect(rendered).toContain('--query-history requires deep ingest; running warehouse with --deep.'); + expect(rendered).toContain('--query-history is not supported for sqlite; running schema ingest for local.'); }); it('renders dynamic separator matching header width', () => { @@ -653,7 +652,6 @@ describe('runContextBuild', () => { inputMode: 'disabled', targetConnectionId: 'warehouse', all: false, - depth: 'fast', queryHistory: 'default', }, io.io, @@ -665,7 +663,6 @@ describe('runContextBuild', () => { expect(executeTarget.mock.calls[0]?.[0]).toMatchObject({ connectionId: 'warehouse', operation: 'database-ingest', - databaseDepth: 'fast', }); expect(io.stdout()).toContain('Databases:'); expect(io.stdout()).toContain('warehouse'); @@ -716,7 +713,7 @@ describe('runContextBuild', () => { it('renders localhost SQL analysis refusal as a runtime failure during query history', async () => { const io = makeIo(); const project = projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep', queryHistory: { enabled: true } } }, + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: true } } }, }); const executeTarget = vi.fn(async (target, _args, targetIo) => { targetIo.stderr.write('connect ECONNREFUSED 127.0.0.1:8765\n'); @@ -751,7 +748,7 @@ describe('runContextBuild', () => { it('uses captured query-history stderr instead of generic failed-at detail', async () => { const io = makeIo(); const project = projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep', queryHistory: { enabled: true } } }, + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: true } } }, }); const executeTarget = vi.fn(async (target, _args, targetIo) => { targetIo.stdout.write('KTX scan completed\n'); @@ -768,7 +765,7 @@ describe('runContextBuild', () => { operation: 'query-history', status: 'failed', detail: - 'warehouse failed at query-history. Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history', + 'warehouse failed at query-history. Retry: ktx ingest warehouse --project-dir /tmp/project --query-history', }, { operation: 'source-ingest', status: 'skipped' }, { operation: 'memory-update', status: 'skipped' }, @@ -785,7 +782,7 @@ describe('runContextBuild', () => { expect(result).toEqual({ exitCode: 1 }); expect(io.stdout()).toContain('Missing bundled Python runtime manifest: /tmp/assets/python/manifest.json.'); - expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --query-history'); expect(io.stdout()).not.toContain('Then retry the runtime-backed KTX command'); expect(io.stdout()).not.toContain('warehouse failed at query-history'); expect(io.stdout().match(/Retry: /g)).toHaveLength(1); @@ -899,12 +896,12 @@ describe('runContextBuild', () => { const io = makeIo(); const project: KtxPublicIngestProject = { ...projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }), config: { ...projectWithConnections({ warehouse: { driver: 'postgres' } }).config, connections: { - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }, llm: { provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret diff --git a/packages/cli/test/context/ingest/local-metabase-ingest.test.ts b/packages/cli/test/context/ingest/local-metabase-ingest.test.ts index 06822aa2..8fb89bd0 100644 --- a/packages/cli/test/context/ingest/local-metabase-ingest.test.ts +++ b/packages/cli/test/context/ingest/local-metabase-ingest.test.ts @@ -6,6 +6,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js'; import { LocalMetabaseDiscoveryCache } from '../../../src/context/ingest/adapters/metabase/local-source-state-store.js'; import { getLocalIngestStatus, runLocalMetabaseIngest } from '../../../src/context/ingest/local-ingest.js'; +import { ingestReportOutcome } from '../../../src/context/ingest/reports.js'; import type { ChunkResult, FetchContext, SourceAdapter } from '../../../src/context/ingest/types.js'; class TestAgentRunner implements AgentRunnerPort { @@ -202,6 +203,24 @@ describe('runLocalMetabaseIngest', () => { expect(result.children[1]?.report.body.failedWorkUnits).toEqual(['metabase-db-2']); }); + it('keeps a child that saved memory out of all_failed when another child fails', async () => { + await seedMetabaseState(); + const agentRunner = new TestAgentRunner(); + const ids = ['metabase-child-1', 'metabase-child-2']; + + const result = await runLocalMetabaseIngest({ + project, + adapters: [new FakeMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + agentRunner, + jobIdFactory: () => ids.shift() ?? 'metabase-child-extra', + }); + + expect(result.status).toBe('partial_failure'); + expect(ingestReportOutcome(result.children[0].report)).toBe('done'); + expect(ingestReportOutcome(result.children[1].report)).toBe('error'); + }); + it('captures fetch-time child failures and continues later mappings', async () => { await seedMetabaseState(); project.config.connections.warehouse_c = { driver: 'postgres', url: 'postgres://localhost/c' }; diff --git a/packages/cli/test/context/ingest/memory-flow/events.test.ts b/packages/cli/test/context/ingest/memory-flow/events.test.ts index e29405a4..cb0e72c8 100644 --- a/packages/cli/test/context/ingest/memory-flow/events.test.ts +++ b/packages/cli/test/context/ingest/memory-flow/events.test.ts @@ -166,7 +166,7 @@ describe('memory-flow event mapping', () => { runId: 'run-1', connectionId: 'warehouse', adapter: 'lookml', - status: 'error', + status: 'done', sourceDir: null, syncId: 'sync-2', reportId: 'report-1', @@ -308,7 +308,7 @@ describe('memory-flow event mapping', () => { sourceReportPath: 'report-1', fallbackReason: null, }); - expect(replay.status).toBe('error'); + expect(replay.status).toBe('done'); expect(replay.reportId).toBe('report-1'); expect(replay.reportPath).toBe('report-1'); expect(replay.events[0]).toMatchObject({ type: 'source_acquired', emittedAt: '2026-05-01T10:00:00.000Z' }); diff --git a/packages/cli/test/context/ingest/reports.test.ts b/packages/cli/test/context/ingest/reports.test.ts new file mode 100644 index 00000000..5fc24f6d --- /dev/null +++ b/packages/cli/test/context/ingest/reports.test.ts @@ -0,0 +1,71 @@ +import { describe, expect, it } from 'vitest'; +import { ingestReportOutcome } from '../../../src/context/ingest/reports.js'; +import type { IngestReportSnapshot } from '../../../src/context/ingest/reports.js'; + +function report(body: Partial): IngestReportSnapshot { + return { + id: 'r', + runId: 'run', + jobId: 'job', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-29T00:00:00.000Z', + body: { + syncId: 'sync', + diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + ...body, + }, + }; +} + +const savingWorkUnit = { + unitKey: 'ok', + rawFiles: ['cards/1.json'], + status: 'success' as const, + actions: [{ target: 'sl' as const, type: 'updated' as const, key: 'warehouse.orders', detail: 'measure' }], + touchedSlSources: [], +}; + +const failedWorkUnit = { + unitKey: 'bad', + rawFiles: ['cards/2.json'], + status: 'failed' as const, + reason: 'tool write failed', + actions: [], + touchedSlSources: [], +}; + +describe('ingestReportOutcome', () => { + it('returns done when there are no failed work units', () => { + expect(ingestReportOutcome(report({ workUnits: [savingWorkUnit] }))).toBe('done'); + }); + + it('returns partial when failed work units coexist with saved memory', () => { + expect( + ingestReportOutcome(report({ workUnits: [savingWorkUnit, failedWorkUnit], failedWorkUnits: ['bad'] })), + ).toBe('partial'); + }); + + it('returns error when failed work units produced no saved memory', () => { + expect(ingestReportOutcome(report({ workUnits: [failedWorkUnit], failedWorkUnits: ['bad'] }))).toBe('error'); + }); + + it('returns error for a stage-level failure even if artifacts were recorded', () => { + expect(ingestReportOutcome(report({ status: 'failed', workUnits: [savingWorkUnit], failedWorkUnits: [] }))).toBe( + 'error', + ); + }); +}); diff --git a/packages/cli/test/index.test.ts b/packages/cli/test/index.test.ts index a60c48f2..bd17e641 100644 --- a/packages/cli/test/index.test.ts +++ b/packages/cli/test/index.test.ts @@ -702,7 +702,7 @@ describe('runKtxCli', () => { const publicIngest = vi.fn().mockResolvedValue(0); await expect( - runKtxCli(['--project-dir', tempDir, 'ingest', 'warehouse', '--fast', '--no-input'], testIo.io, { + runKtxCli(['--project-dir', tempDir, 'ingest', 'warehouse', '--no-input'], testIo.io, { publicIngest, }), ).resolves.toBe(0); @@ -715,7 +715,6 @@ describe('runKtxCli', () => { all: false, json: false, inputMode: 'disabled', - depth: 'fast', queryHistory: 'default', cliVersion, runtimeInstallPolicy: 'never', @@ -725,12 +724,12 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toBe(`Project: ${tempDir}\n`); }); - it('routes public ingest --all --deep with JSON output', async () => { + it('routes public ingest --all with JSON output', async () => { const testIo = makeIo(); const publicIngest = vi.fn().mockResolvedValue(0); await expect( - runKtxCli(['--project-dir', tempDir, 'ingest', '--all', '--deep', '--json'], testIo.io, { + runKtxCli(['--project-dir', tempDir, 'ingest', '--all', '--json'], testIo.io, { publicIngest, }), ).resolves.toBe(0); @@ -742,7 +741,6 @@ describe('runKtxCli', () => { all: true, json: true, inputMode: 'auto', - depth: 'deep', queryHistory: 'default', cliVersion, runtimeInstallPolicy: 'prompt', @@ -786,20 +784,6 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toContain('Choose only one runtime install mode: --yes or --no-input'); }); - it('rejects mutually exclusive public ingest depth flags before dispatch', async () => { - const testIo = makeIo(); - const publicIngest = vi.fn().mockResolvedValue(0); - - await expect( - runKtxCli(['--project-dir', '/tmp/project', 'ingest', 'warehouse', '--fast', '--deep'], testIo.io, { - publicIngest, - }), - ).resolves.toBe(1); - - expect(publicIngest).not.toHaveBeenCalled(); - expect(testIo.stderr()).toMatch(/option '--(deep|fast)' cannot be used with option '--(fast|deep)'/); - }); - it.each(['run', 'status', 'watch', 'replay'])( 'routes former ingest subcommand name "%s" as a connection id', async (connectionId) => { @@ -890,8 +874,6 @@ describe('runKtxCli', () => { expect(testIo.stdout()).toContain('Usage: ktx ingest'); expect(testIo.stdout()).toContain('Build or inspect KTX context'); expect(testIo.stdout()).toContain('--all'); - expect(testIo.stdout()).toContain('--fast'); - expect(testIo.stdout()).toContain('--deep'); expect(testIo.stdout()).toContain('--query-history'); expect(testIo.stdout()).toContain('--no-query-history'); expect(testIo.stdout()).toContain('--query-history-window-days '); diff --git a/packages/cli/test/ingest.test.ts b/packages/cli/test/ingest.test.ts index eef751ba..f5cd1ac5 100644 --- a/packages/cli/test/ingest.test.ts +++ b/packages/cli/test/ingest.test.ts @@ -403,7 +403,7 @@ describe('runKtxIngest', () => { expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); }); - it('returns a non-zero code when Metabase fanout has failed children', async () => { + it('returns a non-zero code when a Metabase fanout child fully fails', async () => { const projectDir = join(tempDir, 'project'); await writeMetabaseConfig(projectDir); const io = makeIo(); @@ -441,7 +441,7 @@ describe('runKtxIngest', () => { { runLocalMetabaseIngest: async () => ({ metabaseConnectionId: 'prod-metabase', - status: 'partial_failure', + status: 'all_failed', totals: { workUnits: 1, failedWorkUnits: 1 }, children: [ { @@ -467,9 +467,83 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(1); - expect(io.stdout()).toContain('Metabase fanout: partial_failure'); - expect(io.stdout()).toContain('Failed tasks: 1'); + expect(io.stdout()).toContain('Metabase fanout: all_failed'); expect(io.stdout()).toContain('status=error'); + }); + + it('exits 0 and reports status=partial when a Metabase child saved memory despite a failure', async () => { + const projectDir = join(tempDir, 'project'); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + const report = localFakeBundleReport('metabase-child-1', { + id: 'report-metabase-child-1', + runId: 'run-a', + jobId: 'metabase-child-1', + connectionId: 'warehouse_a', + sourceKey: 'metabase', + body: { + failedWorkUnits: ['metabase-db-2'], + workUnits: [ + { + unitKey: 'metabase-db-1', + rawFiles: ['cards/1.json'], + status: 'success', + actions: [{ target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'measure' }], + touchedSlSources: [], + }, + { + unitKey: 'metabase-db-2', + rawFiles: ['cards/2.json'], + status: 'failed', + reason: 'bad SQL', + actions: [], + touchedSlSources: [], + }, + ], + }, + }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + runLocalMetabaseIngest: async () => ({ + metabaseConnectionId: 'prod-metabase', + status: 'partial_failure', + totals: { workUnits: 2, failedWorkUnits: 1 }, + children: [ + { + jobId: 'metabase-child-1', + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + result: { + jobId: 'metabase-child-1', + runId: 'run-a', + syncId: 'sync-a', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 2, + failedWorkUnits: ['metabase-db-2'], + artifactsWritten: 1, + commitSha: 'abc', + }, + report, + }, + ], + }), + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Metabase fanout: partial_failure'); + expect(io.stdout()).toContain('status=partial'); expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); }); @@ -1140,6 +1214,63 @@ describe('runKtxIngest', () => { expect(io.stdout()).toContain('Status: error\n'); }); + it('exits 0 and reports Status: partial when a single-source ingest saved memory despite a failure', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const partialReport = localFakeBundleReport('local-job-partial', { + connectionId: 'warehouse', + sourceKey: 'fake', + body: { + failedWorkUnits: ['orders-bad'], + workUnits: [ + { + unitKey: 'orders-ok', + rawFiles: ['orders/orders.json'], + status: 'success', + actions: [{ target: 'wiki', type: 'created', key: 'wiki/orders.md', detail: 'orders' }], + touchedSlSources: [], + }, + { + unitKey: 'orders-bad', + rawFiles: ['orders/bad.json'], + status: 'failed', + reason: 'writer tool failed', + actions: [], + touchedSlSources: [], + }, + ], + }, + }); + const runLocal = vi.fn(async (_input: RunLocalIngestOptions) => ({ + result: { + jobId: 'local-job-partial', + runId: partialReport.runId, + syncId: partialReport.body.syncId, + diffSummary: partialReport.body.diffSummary, + workUnitCount: partialReport.body.workUnits.length, + failedWorkUnits: partialReport.body.failedWorkUnits, + artifactsWritten: 1, + commitSha: partialReport.body.commitSha, + }, + report: partialReport, + })); + + const io = makeIo(); + await expect( + runKtxIngest( + { command: 'run', projectDir, connectionId: 'warehouse', adapter: 'fake', sourceDir, outputMode: 'plain' }, + io.io, + { runLocalIngest: runLocal, jobIdFactory: () => 'local-job-partial' }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Status: partial\n'); + }); + it('prints trace path and error status for stored failed ingest reports', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/test/public-ingest.test.ts b/packages/cli/test/public-ingest.test.ts index b926793c..41289208 100644 --- a/packages/cli/test/public-ingest.test.ts +++ b/packages/cli/test/public-ingest.test.ts @@ -88,7 +88,7 @@ function deepReadyProject( describe('buildPublicIngestPlan', () => { it('plans warehouse connections as scan targets and source connections as source ingest targets', () => { - const project = projectWithConnections({ + const project = deepReadyProject({ warehouse: { driver: 'postgres' }, prod_metabase: { driver: 'metabase', api_url: 'https://metabase.example.com' }, docs: { driver: 'notion' }, @@ -103,8 +103,7 @@ describe('buildPublicIngestPlan', () => { operation: 'database-ingest', debugCommand: 'ktx ingest warehouse --debug', steps: ['database-schema'], - databaseDepth: 'fast', - detectRelationships: false, + detectRelationships: true, queryHistory: { enabled: false }, }, { @@ -139,61 +138,6 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets.map((target) => target.connectionId).sort()).toEqual(['docs', 'warehouse']); }); - it('resolves database depth from flags, stored context, and defaults', () => { - const project = projectWithConnections({ - fast_default: { driver: 'postgres' }, - deep_default: { driver: 'postgres', context: { depth: 'deep' } }, - docs: { driver: 'notion' }, - }); - - expect( - buildPublicIngestPlan(project, { - projectDir: '/tmp/project', - targetConnectionId: 'fast_default', - all: false, - queryHistory: 'default', - }).targets[0], - ).toMatchObject({ connectionId: 'fast_default', databaseDepth: 'fast', queryHistory: { enabled: false } }); - - expect( - buildPublicIngestPlan(project, { - projectDir: '/tmp/project', - targetConnectionId: 'deep_default', - all: false, - queryHistory: 'default', - }).targets[0], - ).toMatchObject({ connectionId: 'deep_default', databaseDepth: 'deep' }); - - expect( - buildPublicIngestPlan(project, { - projectDir: '/tmp/project', - targetConnectionId: 'docs', - all: false, - depth: 'deep', - queryHistory: 'default', - }).warnings, - ).toEqual(['--deep affects database ingest only; ignoring it for docs.']); - }); - - it('does not infer deep ingest from legacy scanMode values', () => { - const project = projectWithConnections({ - warehouse: { driver: 'postgres' }, - }); - - const plan = buildPublicIngestPlan(project, { - projectDir: '/tmp/project', - targetConnectionId: 'warehouse', - all: false, - scanMode: 'enriched', - }); - - expect(plan.targets[0]).toMatchObject({ - connectionId: 'warehouse', - databaseDepth: 'fast', - steps: ['database-schema'], - }); - }); - it('rejects stale local Looker source driver aliases', () => { const project = projectWithConnections({ local_looker: { driver: 'local_looker' } as never, @@ -204,8 +148,8 @@ describe('buildPublicIngestPlan', () => { ); }); - it('upgrades effective depth when query history is explicitly enabled', () => { - const project = projectWithConnections({ + it('enables query history when explicitly requested even if stored config disables it', () => { + const project = deepReadyProject({ warehouse: { driver: 'postgres', context: { queryHistory: { enabled: false } } }, }); @@ -213,17 +157,16 @@ describe('buildPublicIngestPlan', () => { projectDir: '/tmp/project', targetConnectionId: 'warehouse', all: false, - depth: 'fast', queryHistory: 'enabled', queryHistoryWindowDays: 30, }); expect(plan.targets[0]).toMatchObject({ connectionId: 'warehouse', - databaseDepth: 'deep', queryHistory: { enabled: true, windowDays: 30, dialect: 'postgres' }, + steps: ['database-schema', 'query-history'], }); - expect(plan.warnings).toEqual(['--query-history requires deep ingest; running warehouse with --deep.']); + expect(plan.warnings).toEqual([]); }); it('warns and skips query history for unsupported database drivers', () => { @@ -238,7 +181,6 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets[0]).toMatchObject({ connectionId: 'local', - databaseDepth: 'fast', queryHistory: { enabled: false, unsupported: true }, }); expect(plan.warnings).toEqual(['--query-history is not supported for sqlite; running schema ingest for local.']); @@ -249,12 +191,11 @@ describe('buildPublicIngestPlan', () => { deepReadyProject({ local: { driver: 'sqlite' }, mysql_warehouse: { driver: 'mysql' }, - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }), { projectDir: '/tmp/project', all: true, - depth: 'deep', queryHistory: 'enabled', }, ); @@ -326,7 +267,6 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets[0]).toMatchObject({ connectionId: 'warehouse', - databaseDepth: 'deep', queryHistory: { enabled: true, dialect: 'postgres', windowDays: 30 }, steps: ['database-schema', 'query-history'], }); @@ -334,7 +274,7 @@ describe('buildPublicIngestPlan', () => { it('adds a schema-first notice when query history is explicitly enabled', () => { const project = deepReadyProject({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); expect( @@ -363,34 +303,15 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets[0]).toMatchObject({ connectionId: 'local', - databaseDepth: 'fast', queryHistory: { enabled: false, windowDays: 30, unsupported: true }, steps: ['database-schema'], }); expect(plan.warnings).toEqual(['--query-history is not supported for sqlite; running schema ingest for local.']); }); - it('aggregates ignored database-depth warnings for all source targets', () => { - const plan = buildPublicIngestPlan( - projectWithConnections({ - warehouse: { driver: 'postgres' }, - docs: { driver: 'notion' }, - dbt: { driver: 'dbt' }, - }), - { - projectDir: '/tmp/project', - all: true, - depth: 'deep', - queryHistory: 'default', - }, - ); - - expect(plan.warnings).toEqual(['--deep ignored for 2 non-database sources.']); - }); - - it('records a preflight failure for deep database ingest when readiness config is missing', () => { + it('records a preflight failure for database ingest when enrichment readiness config is missing', () => { const project = projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const plan = buildPublicIngestPlan(project, { @@ -402,15 +323,14 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets[0]).toMatchObject({ connectionId: 'warehouse', - databaseDepth: 'deep', preflightFailure: - 'warehouse requires deep ingest readiness: model configuration, scan enrichment mode, scan embeddings. Run ktx setup or rerun with --fast.', + 'warehouse cannot be ingested: enrichment is not configured (model configuration, scan enrichment mode, scan embeddings). Run ktx setup to configure a model and embeddings.', }); }); - it('honors scan.relationships.enabled when planning deep database ingest', () => { + it('honors scan.relationships.enabled when planning database ingest', () => { const plan = buildPublicIngestPlan( - deepReadyProject({ warehouse: { driver: 'postgres', context: { depth: 'deep' } } }, false), + deepReadyProject({ warehouse: { driver: 'postgres' } }, false), { projectDir: '/tmp/project', targetConnectionId: 'warehouse', @@ -421,7 +341,6 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets[0]).toMatchObject({ connectionId: 'warehouse', - databaseDepth: 'deep', detectRelationships: false, }); }); @@ -432,11 +351,11 @@ describe('runKtxPublicIngest', () => { vi.unstubAllEnvs(); }); - it('maps fast and deep database targets to scan internals', async () => { + it('maps database targets to enriched scan internals', async () => { const io = makeIo(); const project = deepReadyProject({ - fast: { driver: 'postgres' }, - deep: { driver: 'postgres', context: { depth: 'deep' } }, + first: { driver: 'postgres' }, + second: { driver: 'postgres' }, }); const runScan = vi.fn(async () => 0); @@ -450,12 +369,12 @@ describe('runKtxPublicIngest', () => { expect(runScan).toHaveBeenNthCalledWith( 1, - expect.objectContaining({ connectionId: 'deep', mode: 'enriched', detectRelationships: true }), + expect.objectContaining({ connectionId: 'first', mode: 'enriched', detectRelationships: true }), expect.anything(), ); expect(runScan).toHaveBeenNthCalledWith( 2, - expect.objectContaining({ connectionId: 'fast', mode: 'structural', detectRelationships: false }), + expect.objectContaining({ connectionId: 'second', mode: 'enriched', detectRelationships: true }), expect.anything(), ); }); @@ -467,7 +386,7 @@ describe('runKtxPublicIngest', () => { try { await initKtxProject({ projectDir }); const io = makeIo({ isTTY: true }); - const project = projectWithConnections({ + const project = deepReadyProject({ warehouse: { driver: 'sqlite', path: join(projectDir, 'warehouse.sqlite') }, }); @@ -614,7 +533,7 @@ describe('runKtxPublicIngest', () => { it('prints the schema-first notice for explicit query-history runs', async () => { const io = makeIo(); const project = deepReadyProject({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const runScan = vi.fn(async () => 0); const runIngest = vi.fn(async () => 0); @@ -640,7 +559,7 @@ describe('runKtxPublicIngest', () => { it('suppresses internal scan output for public database ingest summaries', async () => { const io = makeIo(); - const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const project = deepReadyProject({ warehouse: { driver: 'postgres' } }); const runScan = vi.fn(async (_args, scanIo) => { scanIo.stdout.write('KTX scan completed\n'); scanIo.stdout.write('Mode: structural\n'); @@ -674,7 +593,7 @@ describe('runKtxPublicIngest', () => { it('sanitizes captured database scan failure details in direct public output', async () => { const io = makeIo(); - const project = deepReadyProject({ warehouse: { driver: 'postgres', context: { depth: 'deep' } } }); + const project = deepReadyProject({ warehouse: { driver: 'postgres' } }); const runScan = vi.fn(async (_args, scanIo) => { scanIo.stdout.write('KTX scan enrichment failed after structural scan completed: embedding service timed out\n'); return 1; @@ -689,7 +608,6 @@ describe('runKtxPublicIngest', () => { all: false, json: false, inputMode: 'disabled', - depth: 'deep', }, io.io, { loadProject: vi.fn(async () => project), runScan }, @@ -699,7 +617,7 @@ describe('runKtxPublicIngest', () => { expect(io.stdout()).toContain( 'warehouse failed: Database enrichment failed after schema context completed: embedding service timed out.', ); - expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project'); expect(io.stdout()).not.toContain('KTX scan enrichment failed'); expect(io.stdout()).not.toContain('structural scan'); }); @@ -743,7 +661,7 @@ describe('runKtxPublicIngest', () => { it('suppresses historic-sql report output during direct public query-history ingest', async () => { const io = makeIo(); const project = deepReadyProject({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const runScan = vi.fn(async () => 0); const runIngest = vi.fn(async (_args, ingestIo) => { @@ -794,7 +712,6 @@ describe('runKtxPublicIngest', () => { all: false, json: false, inputMode: 'auto', - depth: 'fast', queryHistory: 'default', }, io.io, @@ -809,7 +726,6 @@ describe('runKtxPublicIngest', () => { targetConnectionId: 'warehouse', all: false, entrypoint: 'ingest', - depth: 'fast', queryHistory: 'default', }), io.io, @@ -821,7 +737,7 @@ describe('runKtxPublicIngest', () => { const io = makeIo({ isTTY: true, interactive: true }); const calls: string[] = []; const project = projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const ensureRuntime = vi.fn(async (): Promise => { calls.push('runtime'); @@ -923,10 +839,13 @@ describe('runKtxPublicIngest', () => { it('runs all independent targets and reports partial failures', async () => { const io = makeIo(); - const project = projectWithConnections({ - warehouse: { driver: 'postgres' }, - prod_metabase: { driver: 'metabase', api_url: 'https://metabase.example.com' }, - }); + const project = deepReadyProject( + { + warehouse: { driver: 'postgres' }, + prod_metabase: { driver: 'metabase', api_url: 'https://metabase.example.com' }, + }, + false, + ); const runScan = vi.fn(async () => 1); const runIngest = vi.fn(async () => 0); @@ -959,7 +878,7 @@ describe('runKtxPublicIngest', () => { command: 'run', projectDir: '/tmp/project', connectionId: 'warehouse', - mode: 'structural', + mode: 'enriched', detectRelationships: false, dryRun: false, }, @@ -967,14 +886,14 @@ describe('runKtxPublicIngest', () => { ); expect(io.stdout()).toContain('Ingest finished with partial failures'); expect(io.stdout()).toContain('warehouse failed at database-schema.'); - expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --fast'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project'); expect(io.stdout()).not.toContain('Debug:'); }); it('skips the query-history facet but keeps the target green when query-history fails', async () => { const io = makeIo(); const project = deepReadyProject({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const runScan = vi.fn(async () => 0); const runIngest = vi.fn(async (_args, ingestIo) => { @@ -1007,14 +926,14 @@ describe('runKtxPublicIngest', () => { 'Query history failed for 60 tasks. First failure: Google Cloud authentication failed while analyzing query history', ); expect(io.stdout()).not.toContain('warehouse failed: Error:'); - expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --query-history'); expect(io.stdout()).not.toContain('historic-sql'); }); it('prints the runtime artifact build hint for missing query-history runtime assets', async () => { const io = makeIo(); const project = deepReadyProject({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const runScan = vi.fn(async () => 0); const runIngest = vi.fn(async (_args, ingestIo) => { @@ -1045,14 +964,14 @@ describe('runKtxPublicIngest', () => { expect(io.stdout()).toContain( 'In a source checkout, build the local runtime assets with: pnpm run artifacts:build', ); - expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --query-history'); expect(io.stdout()).not.toContain('Then retry the runtime-backed KTX command'); }); - it('fails deep-readiness targets before work starts while continuing independent --all targets', async () => { + it('fails enrichment-readiness targets before work starts while continuing independent --all targets', async () => { const io = makeIo(); const project = projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, docs: { driver: 'notion' }, }); const runScan = vi.fn(async () => 0); @@ -1071,12 +990,12 @@ describe('runKtxPublicIngest', () => { expect.objectContaining({ command: 'run', connectionId: 'docs', adapter: 'notion' }), expect.anything(), ); - expect(io.stdout()).toContain('warehouse requires deep ingest readiness'); + expect(io.stdout()).toContain('warehouse cannot be ingested: enrichment is not configured'); }); - it('does not infer enriched relationship scans from legacy scanMode values', async () => { + it('drives scan relationship detection from project config, not from legacy args', async () => { const io = makeIo(); - const project = deepReadyProject({ warehouse: { driver: 'postgres' } }); + const project = deepReadyProject({ warehouse: { driver: 'postgres' } }, false); const runScan = vi.fn(async () => 0); await expect( @@ -1103,7 +1022,7 @@ describe('runKtxPublicIngest', () => { command: 'run', projectDir: '/tmp/project', connectionId: 'warehouse', - mode: 'structural', + mode: 'enriched', detectRelationships: false, dryRun: false, }, @@ -1113,7 +1032,7 @@ describe('runKtxPublicIngest', () => { it('prints stable JSON results', async () => { const io = makeIo(); - const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const project = deepReadyProject({ warehouse: { driver: 'postgres' } }); await expect( runKtxPublicIngest( diff --git a/packages/cli/test/setup-context.test.ts b/packages/cli/test/setup-context.test.ts index 9757cc62..d04e24e1 100644 --- a/packages/cli/test/setup-context.test.ts +++ b/packages/cli/test/setup-context.test.ts @@ -1,7 +1,7 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { buildDefaultKtxProjectConfig, parseKtxProjectConfig, serializeKtxProjectConfig, type KtxProjectConfig } from '../src/context/project/config.js'; +import { buildDefaultKtxProjectConfig, serializeKtxProjectConfig, type KtxProjectConfig } from '../src/context/project/config.js'; import { readKtxSetupState, writeKtxSetupState } from '../src/context/project/setup-config.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; @@ -49,7 +49,7 @@ async function writeReadyProject(projectDir: string, overrides: ReadyProjectOver ...defaults, setup: { database_connection_ids: ['warehouse'] }, connections: { - warehouse: { driver: 'postgres', url: 'env:DATABASE_URL', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' }, docs: { driver: 'notion', auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'all_accessible' }, }, llm: { @@ -407,130 +407,10 @@ describe('setup context build state', () => { expect(io.stdout()).not.toContain('Existing context artifacts were found from setup ingest.'); }); - it('treats fast database context as ready from schema manifest shards without AI artifacts', async () => { + it('requires completed relationships for database context when relationship discovery is enabled', async () => { await writeReadyProject(tempDir, { connections: { - warehouse: { driver: 'postgres', readonly: true, context: { depth: 'fast' } }, - }, - llm: { provider: { backend: 'none' }, models: {} }, - scan: { enrichment: { mode: 'none' } }, - }); - await mkdir(join(tempDir, 'semantic-layer', 'warehouse', '_schema'), { recursive: true }); - await writeFile(join(tempDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml'), 'tables: {}\n'); - await writeScanReport(tempDir, '2026-05-09T10:00:00.000Z', { - mode: 'structural', - tableDescriptions: 'skipped', - columnDescriptions: 'skipped', - embeddings: 'skipped', - manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], - }); - const io = makeIo(); - const runContextBuildMock = vi.fn>(async () => ({ - exitCode: 0, - })); - - await expect( - runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'disabled' }, - io.io, - { - runContextBuild: runContextBuildMock, - }, - ), - ).resolves.toMatchObject({ status: 'ready' }); - - expect(runContextBuildMock).not.toHaveBeenCalled(); - expect(io.stdout()).toContain('Existing context artifacts were found from setup ingest.'); - }); - - it('stores fast context depth non-interactively when deep readiness is missing', async () => { - await writeReadyProject(tempDir, { - connections: { warehouse: { driver: 'postgres', readonly: true } }, - llm: { provider: { backend: 'none' }, models: {} }, - scan: { enrichment: { mode: 'none' } }, - }); - const io = makeIo(); - const runContextBuildMock = vi.fn>(async () => ({ - exitCode: 0, - })); - const verifyContextReady = vi.fn(async () => ({ - ready: true, - agentContextReady: true, - semanticSearchReady: true, - details: ['ready'], - })); - - await expect( - runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'disabled' }, - io.io, - { runContextBuild: runContextBuildMock, verifyContextReady }, - ), - ).resolves.toMatchObject({ status: 'ready' }); - - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.connections.warehouse.context).toMatchObject({ depth: 'fast' }); - expect(runContextBuildMock).toHaveBeenCalledWith( - expect.anything(), - expect.objectContaining({ projectDir: tempDir, inputMode: 'disabled' }), - expect.anything(), - expect.anything(), - ); - expect(runContextBuildMock.mock.calls[0]?.[1]).not.toMatchObject({ - scanMode: 'enriched', - detectRelationships: true, - }); - }); - - it('prompts for database context depth after final readiness is known', async () => { - await writeReadyProject(tempDir, { - connections: { warehouse: { driver: 'postgres', readonly: true } }, - llm: { - provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret - models: { default: 'gpt-test' }, - }, - scan: { - enrichment: { - mode: 'llm', - embeddings: { backend: 'openai', model: 'text-embedding-3-small', dimensions: 1536 }, - }, - }, - }); - const io = makeIo(); - const select = vi.fn(async () => 'deep'); - const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); - const verifyContextReady = vi.fn(async () => ({ - ready: true, - agentContextReady: true, - semanticSearchReady: true, - details: ['ready'], - })); - - await expect( - runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'auto' }, - io.io, - { - prompts: { select, cancel: vi.fn() }, - runContextBuild: runContextBuildMock, - verifyContextReady, - }, - ), - ).resolves.toMatchObject({ status: 'ready' }); - - expect(select).toHaveBeenCalledWith( - expect.objectContaining({ - message: expect.stringContaining('How much database context should KTX build?'), - }), - ); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.connections.warehouse.context).toMatchObject({ depth: 'deep' }); - }); - - it('requires completed relationships for deep context when relationship discovery is enabled', async () => { - await writeReadyProject(tempDir, { - connections: { - warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } }, + warehouse: { driver: 'postgres', readonly: true }, }, scan: { relationships: { enabled: true } }, }); @@ -560,10 +440,10 @@ describe('setup context build state', () => { expect(runContextBuildMock).toHaveBeenCalledOnce(); }); - it('does not require relationships for deep context when relationship discovery is disabled', async () => { + it('does not require relationships for database context when relationship discovery is disabled', async () => { await writeReadyProject(tempDir, { connections: { - warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } }, + warehouse: { driver: 'postgres', readonly: true }, }, scan: { relationships: { enabled: false } }, }); @@ -620,7 +500,7 @@ describe('setup context build state', () => { it('starts a fresh foreground build when stale state is found', async () => { await writeReadyProject(tempDir, { - connections: { warehouse: { driver: 'postgres', readonly: true, context: { depth: 'fast' } } }, + connections: { warehouse: { driver: 'postgres', readonly: true } }, }); await writeKtxSetupContextState(tempDir, { runId: 'setup-context-local-stale', diff --git a/packages/cli/test/setup-databases.test.ts b/packages/cli/test/setup-databases.test.ts index 15d27e3c..cf7acf3c 100644 --- a/packages/cli/test/setup-databases.test.ts +++ b/packages/cli/test/setup-databases.test.ts @@ -262,48 +262,6 @@ describe('setup databases step', () => { expect(prompts.select).toHaveBeenCalledTimes(1); }); - it('preserves context.depth when editing an existing database connection', async () => { - await writeFile( - join(tempDir, 'ktx.yaml'), - [ - 'connections:', - ' warehouse:', - ' driver: sqlite', - ' path: ./warehouse.sqlite', - ' context:', - ' depth: deep', - '', - ].join('\n'), - 'utf-8', - ); - const prompts = makePromptAdapter({ - selectValues: ['edit', 'warehouse', 'continue'], - textValues: ['./warehouse.sqlite'], - }); - const testConnection = vi.fn(async () => 0); - const scanConnection = vi.fn(async () => 0); - const io = makeIo(); - const result = await runKtxSetupDatabasesStep( - { - projectDir: tempDir, - inputMode: 'auto', - skipDatabases: false, - databaseSchemas: [], - disableQueryHistory: true, - }, - io.io, - { prompts, testConnection, scanConnection }, - ); - - expect(result.status, io.stderr()).toBe('ready'); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.connections.warehouse).toMatchObject({ - driver: 'sqlite', - path: './warehouse.sqlite', - context: { depth: 'deep' }, - }); - }); - it('labels existing database connections with the database type', async () => { await writeFile( join(tempDir, 'ktx.yaml'), @@ -376,7 +334,6 @@ describe('setup databases step', () => { expect(config.connections['postgres-warehouse']).toEqual({ driver: 'postgres', url: 'env:DATABASE_URL', - context: { depth: 'fast' }, }); }); @@ -1558,7 +1515,7 @@ describe('setup databases step', () => { ); expect(io.stdout()).not.toContain('Tables: 2'); expect(io.stdout()).toContain('◇ Building schema context for postgres-warehouse'); - expect(io.stdout()).toContain('│ Running fast database ingest…'); + expect(io.stdout()).toContain('│ Running database scan…'); expect(io.stdout()).toContain('◇ Schema context complete for postgres-warehouse'); expect(io.stdout()).toContain('│ Changes: 2 new tables'); expect(io.stdout()).toContain('◇ Database ready'); @@ -1907,7 +1864,7 @@ describe('setup databases step', () => { driver: 'postgres', url: 'env:DATABASE_URL', schemas: ['public'], - context: { queryHistory: { enabled: false }, depth: 'fast' }, + context: { queryHistory: { enabled: false } }, }); expect(config.setup).toEqual({ database_connection_ids: ['warehouse'], @@ -1946,7 +1903,6 @@ describe('setup databases step', () => { expect(config.connections.warehouse).toEqual({ driver: 'sqlite', path: './warehouse.sqlite', - context: { depth: 'fast' }, }); expect(config.setup).toEqual({ database_connection_ids: ['warehouse'], @@ -2023,11 +1979,11 @@ describe('setup databases step', () => { const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); expect(config.connections.warehouse).toMatchObject({ driver: 'postgres', url: 'env:DATABASE_URL' }); expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); - expect(io.stderr()).toContain('Fast database ingest failed for warehouse.'); - expect(io.stderr()).toContain('│ Fast database ingest failed for warehouse.'); - expect(io.stderr()).toContain(`Debug command: ktx ingest warehouse --project-dir ${tempDir} --fast --debug`); + expect(io.stderr()).toContain('Database scan failed for warehouse.'); + expect(io.stderr()).toContain('│ Database scan failed for warehouse.'); + expect(io.stderr()).toContain(`Debug command: ktx ingest warehouse --project-dir ${tempDir} --debug`); expect(io.stderr()).not.toContain('Structural scan failed for warehouse.'); - expect(io.stderr()).not.toMatch(/^Fast database ingest failed for warehouse\./m); + expect(io.stderr()).not.toMatch(/^Database scan failed for warehouse\./m); }); it('prints the native SQLite rebuild command when scanning hits a Node ABI mismatch', async () => { @@ -2066,7 +2022,7 @@ describe('setup databases step', () => { expect(io.stderr()).toContain('Native SQLite is built for a different Node.js ABI.'); expect(io.stderr()).toContain('│ Native SQLite is built for a different Node.js ABI.'); expect(io.stderr()).toContain('Fix: pnpm run native:rebuild'); - expect(io.stderr()).toContain(`Retry: ktx ingest warehouse --project-dir ${tempDir} --fast`); + expect(io.stderr()).toContain(`Retry: ktx ingest warehouse --project-dir ${tempDir}`); expect(io.stderr()).not.toContain('ktx scan'); expect(io.stderr()).not.toContain('npm rebuild'); expect(io.stderr()).not.toMatch(/^Native SQLite is built for a different Node.js ABI\./m); @@ -2364,7 +2320,7 @@ describe('setup databases step', () => { 'utf-8', ); const io = makeIo(); - const prompts = makePromptAdapter({ selectValues: ['yes', 'deep'] }); + const prompts = makePromptAdapter({ selectValues: ['yes'] }); const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements'); const historicSqlReadinessProbe = vi.fn(async () => ({ ok: true as const, @@ -2399,12 +2355,6 @@ describe('setup databases step', () => { { value: 'back', label: 'Back' }, ], }); - expect(prompts.select).toHaveBeenNthCalledWith( - 2, - expect.objectContaining({ - message: expect.stringContaining('How much database context should KTX build?'), - }), - ); expect(historicSqlReadinessProbe).toHaveBeenCalledWith( expect.objectContaining({ projectDir: tempDir, @@ -2420,7 +2370,6 @@ describe('setup databases step', () => { minExecutions: 5, filters: { dropTrivialProbes: true }, }, - depth: 'deep', }, }); }); diff --git a/packages/cli/test/setup-sources.test.ts b/packages/cli/test/setup-sources.test.ts index b426ad10..784dcc46 100644 --- a/packages/cli/test/setup-sources.test.ts +++ b/packages/cli/test/setup-sources.test.ts @@ -260,7 +260,7 @@ describe('setup sources step', () => { inputMode: 'disabled', source: 'notion', sourceConnectionId: 'notion-main', - sourceApiKeyRef: 'env:NOTION_TOKEN', // pragma: allowlist secret + sourceAuthTokenRef: 'env:NOTION_TOKEN', // pragma: allowlist secret notionCrawlMode: 'selected_roots', notionRootPageIds: ['page-1'], runInitialSourceIngest: false, @@ -281,6 +281,81 @@ describe('setup sources step', () => { expect((await readConfig()).connections['notion-main']?.last_successful_cursor).toBeUndefined(); }); + it('rejects --source-api-key-ref for Notion and points at --source-auth-token-ref', async () => { + await addPrimarySource(); + const io = makeIo(); + + await expect( + runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'notion', + sourceConnectionId: 'notion-main', + sourceApiKeyRef: 'env:NOTION_TOKEN', // pragma: allowlist secret + notionCrawlMode: 'selected_roots', + notionRootPageIds: ['page-1'], + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + {}, + ), + ).resolves.toEqual({ status: 'failed', projectDir }); + + expect(io.stderr()).toContain('--source-api-key-ref does not apply to --source notion; use --source-auth-token-ref.'); + expect((await readConfig()).connections['notion-main']).toBeUndefined(); + }); + + it('rejects --source-auth-token-ref for Metabase and points at --source-api-key-ref', async () => { + await addPrimarySource(); + const io = makeIo(); + + await expect( + runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'metabase', + sourceConnectionId: 'prod_metabase', + sourceUrl: 'https://metabase.example.com', + sourceAuthTokenRef: 'env:METABASE_API_KEY', // pragma: allowlist secret + sourceWarehouseConnectionId: 'warehouse', + metabaseDatabaseId: 1, + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + {}, + ), + ).resolves.toEqual({ status: 'failed', projectDir }); + + expect(io.stderr()).toContain('--source-auth-token-ref does not apply to --source metabase; use --source-api-key-ref.'); + }); + + it('rejects --source-client-secret-ref for dbt and points at --source-auth-token-ref', async () => { + await addPrimarySource(); + const io = makeIo(); + + await expect( + runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'dbt', + sourceConnectionId: 'dbt-main', + sourceClientSecretRef: 'env:DBT_SECRET', // pragma: allowlist secret + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + {}, + ), + ).resolves.toEqual({ status: 'failed', projectDir }); + + expect(io.stderr()).toContain('--source-client-secret-ref does not apply to --source dbt; use --source-auth-token-ref.'); + }); + it('accepts former ingest subcommand names as interactive source connection ids', async () => { await addPrimarySource(); const io = makeIo(); @@ -323,7 +398,7 @@ describe('setup sources step', () => { inputMode: 'disabled', source: 'notion', sourceConnectionId: 'notion-main', - sourceApiKeyRef: 'env:NOTION_TOKEN', // pragma: allowlist secret + sourceAuthTokenRef: 'env:NOTION_TOKEN', // pragma: allowlist secret notionCrawlMode: 'all_accessible', notionRootPageIds: ['page-1'], runInitialSourceIngest: false, diff --git a/packages/cli/test/setup.test.ts b/packages/cli/test/setup.test.ts index 0bc00919..da51e9af 100644 --- a/packages/cli/test/setup.test.ts +++ b/packages/cli/test/setup.test.ts @@ -398,6 +398,59 @@ describe('setup status', () => { expect(rendered).toContain('KTX context built: yes'); }); + it('reports context ready after a partial ingest report saved memory', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'setup:', + ' database_connection_ids:', + ' - warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + 'ingest:', + ' embeddings:', + ' backend: none', + ' dimensions: 8', + '', + ].join('\n'), + 'utf-8', + ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'databases'] }); + await persistLocalBundleReport( + tempDir, + localFakeBundleReport('warehouse-job-partial', { + connectionId: 'warehouse', + sourceKey: 'fake', + body: { + failedWorkUnits: ['orders-bad'], + workUnits: [ + { + unitKey: 'orders-ok', + rawFiles: ['orders/orders.json'], + status: 'success', + actions: [{ target: 'wiki', type: 'created', key: 'wiki/orders.md', detail: 'orders' }], + touchedSlSources: [], + }, + { + unitKey: 'orders-bad', + rawFiles: ['orders/bad.json'], + status: 'failed', + reason: 'writer tool failed', + actions: [], + touchedSlSources: [], + }, + ], + }, + }), + ); + + const status = await readKtxSetupStatus(tempDir); + + expect(status.context).toMatchObject({ ready: true, status: 'completed' }); + }); + it('formats plain and JSON setup status payloads', async () => { const status = await readKtxSetupStatus(tempDir); const rendered = formatKtxSetupStatus(status); diff --git a/packages/cli/test/standalone-smoke.test.ts b/packages/cli/test/standalone-smoke.test.ts index 4007afcb..7dde8979 100644 --- a/packages/cli/test/standalone-smoke.test.ts +++ b/packages/cli/test/standalone-smoke.test.ts @@ -185,7 +185,7 @@ describe('standalone built ktx CLI smoke', () => { expect([0, 1]).toContain(result.code); }); - it('runs fast public database ingest through the built binary with manifest artifacts', async () => { + it('blocks public database ingest through the built binary when enrichment is not configured', async () => { const projectDir = join(tempDir, 'database-ingest-project'); const init = await runSetupNewProject(projectDir); expectSetupStderr(init); @@ -200,19 +200,10 @@ describe('standalone built ktx CLI smoke', () => { expect(connectionTest.stdout).toContain('Driver: sqlite'); expect(connectionTest.stdout).toContain('Status: ok'); - const ingest = await runBuiltCli(['ingest', 'warehouse', '--project-dir', projectDir, '--fast', '--no-input']); - expectProjectStderr(ingest, projectDir); - expect(ingest.stdout).toContain('Ingest finished'); - expect(ingest.stdout).toContain('warehouse'); - expect(ingest.stdout).toContain('Database schema'); - expect(ingest.stdout).toContain('warehouse done'); + const ingest = await runBuiltCli(['ingest', 'warehouse', '--project-dir', projectDir, '--no-input']); + expect(ingest.code).toBe(1); + expect(ingest.stdout).toContain('warehouse cannot be ingested: enrichment is not configured'); expect(ingest.stdout).not.toContain('KTX scan completed'); - - const manifest = await readFile(join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'); - expect(manifest).toContain('customers:'); - expect(manifest).toContain('orders:'); - expect(manifest).toContain('source: formal'); - expect(manifest).not.toContain('ai:'); }, 30_000); it('parses gateway LLM config and OpenAI enrichment embeddings used by standalone scans without network calls', async () => { diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json index 13642c49..628c8f4b 100644 --- a/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json @@ -365,7 +365,6 @@ "embeddings", "secrets", "databases", - "database-context-depth", "sources", "context", "agents", diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index b196aaa1..2ea9ce27 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -257,7 +257,7 @@ describe('standalone example docs', () => { assert.match(primarySources, /context:\n queryHistory:/); assert.match(rootReadme, /`ktx ingest` \| Build context for every configured connection/); assert.doesNotMatch(rootReadme, /`ktx ingest `/); - assert.match(quickstart, /Databases:\n warehouse: deep context complete/); + assert.match(quickstart, /Databases:\n warehouse: database context complete/); assert.match(quickstart, /Databases configured: yes \(warehouse\)/); assert.match(setupReference, /Databases configured: yes \(postgres-warehouse\)/); assert.doesNotMatch(rootReadme, new RegExp(['Primary sources', 'configured'].join(' '))); diff --git a/scripts/installed-live-database-smoke.mjs b/scripts/installed-live-database-smoke.mjs index a11e38d2..20bad6b5 100644 --- a/scripts/installed-live-database-smoke.mjs +++ b/scripts/installed-live-database-smoke.mjs @@ -106,7 +106,6 @@ export function buildLiveDatabaseIngestArgs(projectDir, _databaseIntrospectionUr connectionId, '--project-dir', projectDir, - '--fast', '--no-input', ]; } @@ -152,20 +151,20 @@ function requireSuccess(label, result) { } } +function requireFailure(label, result) { + if (result.code === 0) { + throw new Error( + `${label} unexpectedly succeeded\nstdout:\n${result.stdout}\nstderr:\n${result.stderr}`, + ); + } +} + function requireOutput(label, result, pattern) { if (!pattern.test(result.stdout)) { throw new Error(`${label} output did not match ${pattern}\nstdout:\n${result.stdout}`); } } -function getRunId(stdout) { - const match = stdout.match(/^Run: (.+)$/m); - if (!match) { - throw new Error(`ingest output did not include a run id\nstdout:\n${stdout}`); - } - return match[1]; -} - async function requireDocker() { const result = await run('docker', ['info'], { timeout: 20_000 }); if (result.code !== 0) { @@ -310,13 +309,17 @@ async function main() { env: managedRuntimeEnv(cleanInstallDir), timeout: 120_000, }); - requireSuccess('ktx ingest warehouse --fast', ingestRun); - requireOutput('ktx ingest warehouse --fast', ingestRun, /Ingest finished/); - requireOutput('ktx ingest warehouse --fast', ingestRun, /Database schema/); + // ktx ingest now always builds enriched context and requires a configured + // model and embeddings. This smoke project has neither, so the database + // target fails the enrichment-readiness preflight before any work runs. + // This still exercises the packaged binary, daemon startup, and the live + // database connection end to end. + requireFailure('ktx ingest warehouse', ingestRun); + requireOutput('ktx ingest warehouse', ingestRun, /Ingest finished with partial failures/); + requireOutput('ktx ingest warehouse', ingestRun, /enrichment is not configured/); - const runId = getRunId(ingestRun.stdout); await assertPathExists(join(projectDir, '.ktx', 'db.sqlite'), 'SQLite local ingest state'); - process.stdout.write(`Installed live-database artifact smoke passed: ${runId}\n`); + process.stdout.write('Installed live-database artifact smoke passed: enrichment-readiness guard verified\n'); } finally { if (daemonStarted && cleanInstallDir) { await stopDaemon(cleanInstallDir); diff --git a/scripts/installed-live-database-smoke.test.mjs b/scripts/installed-live-database-smoke.test.mjs index ef618725..2ddeed5d 100644 --- a/scripts/installed-live-database-smoke.test.mjs +++ b/scripts/installed-live-database-smoke.test.mjs @@ -100,7 +100,6 @@ describe('installed live-database artifact smoke helpers', () => { 'warehouse', '--project-dir', '/tmp/project', - '--fast', '--no-input', ]); diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index e1ff8c6c..d66d7f1a 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -512,15 +512,6 @@ function requireSuccess(label, result) { assert.equal(result.stderr, '', label + ' wrote unexpected stderr'); } -function requireSuccessWithProjectStderr(label, result, projectDir) { - assert.equal( - result.code, - 0, - label + ' failed with code ' + result.code + '\\nstdout:\\n' + result.stdout + '\\nstderr:\\n' + result.stderr, - ); - assert.equal(result.stderr, 'Project: ' + projectDir + '\\n', label + ' wrote unexpected stderr'); -} - function requireExitCodeWithProjectStderr(label, result, projectDir, expectedCode) { assert.equal( result.code, @@ -860,27 +851,15 @@ try { requireOutput('ktx admin runtime stop', runtimeStop, /Stopped KTX daemon/); process.stdout.write('ktx admin runtime daemon lifecycle verified\\n'); - const structuralScan = await run( + const databaseIngest = await run( ...Object.values( - pnpmCommand(['exec', 'ktx', 'ingest', 'warehouse', '--project-dir', projectDir, '--fast', '--no-input']), + pnpmCommand(['exec', 'ktx', 'ingest', 'warehouse', '--project-dir', projectDir, '--no-input']), ), ); - requireSuccessWithProjectStderr('ktx ingest fast', structuralScan, projectDir); - requireOutput('ktx ingest fast', structuralScan, /Ingest finished/); - requireOutput('ktx ingest fast', structuralScan, /Database schema/); - requireOutput('ktx ingest fast', structuralScan, /warehouse\\s+done/); - await access(join(projectDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml')); - process.stdout.write('ktx ingest fast verified\\n'); - - const enrichedScan = await run( - ...Object.values( - pnpmCommand(['exec', 'ktx', 'ingest', 'warehouse', '--project-dir', projectDir, '--deep', '--no-input']), - ), - ); - requireExitCodeWithProjectStderr('ktx ingest deep readiness guard', enrichedScan, projectDir, 1); - requireOutput('ktx ingest deep readiness guard', enrichedScan, /Ingest finished with partial failures/); - requireOutput('ktx ingest deep readiness guard', enrichedScan, /requires deep ingest readiness/); - process.stdout.write('ktx ingest deep readiness guard verified\\n'); + requireExitCodeWithProjectStderr('ktx ingest enrichment guard', databaseIngest, projectDir, 1); + requireOutput('ktx ingest enrichment guard', databaseIngest, /Ingest finished with partial failures/); + requireOutput('ktx ingest enrichment guard', databaseIngest, /enrichment is not configured/); + process.stdout.write('ktx ingest enrichment guard verified\\n'); await access(join(projectDir, '.ktx', 'db.sqlite')); process.stdout.write('ktx ingest state verified\\n'); diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index a1d2489d..ffc59ce6 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -530,10 +530,11 @@ describe('verification snippets', () => { assert.doesNotMatch(source, /ktx admin runtime prune/); assert.doesNotMatch(source, /staleRuntimeDir/); assert.match(source, /pnpmCommand\(\['exec', 'ktx', 'ingest', 'warehouse'/); - assert.match(source, /'--deep'/); + assert.doesNotMatch(source, /'--fast'/); + assert.doesNotMatch(source, /'--deep'/); assert.doesNotMatch(source, /'--enrich'/); - assert.match(source, /ktx ingest fast verified/); - assert.match(source, /ktx ingest deep readiness guard verified/); + assert.match(source, /ktx ingest enrichment guard verified/); + assert.match(source, /enrichment is not configured/); assert.match(source, /enrichment:/); assert.match(source, /mode: deterministic/); assert.doesNotMatch(source, /run\('pnpm', \['exec', 'ktx', 'ingest', 'run'/); diff --git a/skills/ktx/SKILL.md b/skills/ktx/SKILL.md index 3887fdc0..58893d7f 100644 --- a/skills/ktx/SKILL.md +++ b/skills/ktx/SKILL.md @@ -87,16 +87,17 @@ Do not discover these inputs across multiple setup runs. pass the database flags from the previous run** — setup validates current flags, not persisted `ktx.yaml` state. -4. **Run fast ingest** if setup did not already complete one: +4. **Build context** if setup did not already complete one: ```bash - ktx ingest --fast --no-input + ktx ingest --no-input ``` - Note: `ktx ingest` rejects `--yes` together with `--no-input` - (*Choose only one runtime install mode*); `ktx setup` accepts both. Use - `--no-input` only for ingest. Do not run `--deep` ingest unless the user - explicitly asks for LLM-backed enrichment. + `ktx ingest` always builds enriched context and requires a configured model + and embeddings (set during setup); a database connection without them fails + with an enrichment-readiness error. Note: `ktx ingest` rejects `--yes` + together with `--no-input` (*Choose only one runtime install mode*); + `ktx setup` accepts both. Use `--no-input` only for ingest. 5. **Install agent integration:** @@ -138,7 +139,7 @@ ktx setup --no-input --yes --skip-databases --skip-llm --skip-embeddings \ # Notion ktx setup --no-input --yes --skip-databases --skip-llm --skip-embeddings \ --source notion --source-connection-id \ - --source-api-key-ref env:NOTION_TOKEN \ + --source-auth-token-ref env:NOTION_TOKEN \ --notion-crawl-mode selected_roots --notion-root-page-id ``` @@ -151,7 +152,7 @@ Notes: `--notion-root-page-id` (repeatable); use `all_accessible` to crawl everything the token can see. - After adding sources, ingest each new connection so its context is queryable: - `ktx ingest --fast --no-input`. + `ktx ingest --no-input`. ## Files to inspect