From 637891f0304d83809373691738203d8d9d14d4b4 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Fri, 29 May 2026 17:23:46 +0200 Subject: [PATCH 1/7] fix(cli): align Notion setup credential to --source-auth-token-ref (#236) Notion's setup path read --source-api-key-ref while writing the auth_token_ref config field, so --source-auth-token-ref was silently dropped. Align Notion to the flag=field convention every other connector follows: it now reads --source-auth-token-ref, and --source-api-key-ref becomes Metabase-only. Also add validation rejecting any credential-ref flag not applicable to the chosen --source, with a pointer to the correct flag, closing the silent-drop class for all connectors. Update CLI-reference docs, the ktx skill Notion example, and tests. Fixes KLO-724. --- .../content/docs/cli-reference/ktx-setup.mdx | 12 ++- packages/cli/src/commands/setup-commands.ts | 9 ++- packages/cli/src/setup-sources.ts | 47 +++++++++-- packages/cli/test/setup-sources.test.ts | 79 ++++++++++++++++++- skills/ktx/SKILL.md | 2 +- 5 files changed, 137 insertions(+), 12 deletions(-) diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index 2c19bd07..415b0e6e 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -160,9 +160,9 @@ sources. This is equivalent to passing `--skip-sources` in scripted setup. | `--source-git-url ` | Git URL for dbt, MetricFlow, or LookML | | `--source-branch ` | Git branch for context-source setup | | `--source-subpath ` | Repo subpath for context-source setup | -| `--source-auth-token-ref ` | `env:` or `file:` credential reference for source repo auth | +| `--source-auth-token-ref ` | `env:` or `file:` credential reference for source repo auth or Notion integration token | | `--source-url ` | Source service URL for Metabase or Looker | -| `--source-api-key-ref ` | `env:` or `file:` API key reference for Metabase or Notion | +| `--source-api-key-ref ` | `env:` or `file:` API key reference for Metabase | | `--source-client-id ` | Looker client id | | `--source-client-secret-ref ` | `env:` or `file:` Looker client secret reference | | `--source-warehouse-connection-id ` | Warehouse connection id used for context-source mapping | @@ -221,6 +221,14 @@ ktx setup \ --source-warehouse-connection-id warehouse \ --metabase-database-id 1 +# Add a Notion source that crawls selected root pages +ktx setup \ + --source notion \ + --source-connection-id notion-main \ + --source-auth-token-ref env:NOTION_TOKEN \ + --notion-crawl-mode selected_roots \ + --notion-root-page-id abc123def456 + # Install project-scoped agent integration for Codex ktx setup --agents --target codex ``` diff --git a/packages/cli/src/commands/setup-commands.ts b/packages/cli/src/commands/setup-commands.ts index 54628346..19f980bd 100644 --- a/packages/cli/src/commands/setup-commands.ts +++ b/packages/cli/src/commands/setup-commands.ts @@ -308,9 +308,14 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo .addOption(new Option('--source-git-url ', 'Git URL for dbt, MetricFlow, or LookML').hideHelp()) .addOption(new Option('--source-branch ', 'Git branch for source setup').hideHelp()) .addOption(new Option('--source-subpath ', 'Repo subpath for source setup').hideHelp()) - .addOption(new Option('--source-auth-token-ref ', 'env: or file: credential ref for source repo auth').hideHelp()) + .addOption( + new Option( + '--source-auth-token-ref ', + 'env: or file: credential ref for source repo auth or Notion integration token', + ).hideHelp(), + ) .addOption(new Option('--source-url ', 'Source service URL for Metabase or Looker').hideHelp()) - .addOption(new Option('--source-api-key-ref ', 'env: or file: API key ref for Metabase or Notion').hideHelp()) + .addOption(new Option('--source-api-key-ref ', 'env: or file: API key ref for Metabase').hideHelp()) .addOption(new Option('--source-client-id ', 'Looker client id').hideHelp()) .addOption(new Option('--source-client-secret-ref ', 'env: or file: Looker client secret ref').hideHelp()) .addOption(new Option('--source-warehouse-connection-id ', 'Mapped warehouse connection id').hideHelp()) diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index dea1cd43..4f0a94bc 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -217,6 +217,39 @@ function credentialRef(value: string | undefined, label: string): string { return ref; } +type SourceCredentialFlag = { + field: 'sourceAuthTokenRef' | 'sourceApiKeyRef' | 'sourceClientSecretRef'; + flag: string; +}; + +// Each connector reads exactly one credential ref; the flag name mirrors the +// ktx.yaml field it writes (auth_token_ref / api_key_ref / client_secret_ref). +const SOURCE_CREDENTIAL_FLAG: Record = { + dbt: { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, + metricflow: { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, + lookml: { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, + notion: { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, + metabase: { field: 'sourceApiKeyRef', flag: '--source-api-key-ref' }, + looker: { field: 'sourceClientSecretRef', flag: '--source-client-secret-ref' }, +}; + +const ALL_SOURCE_CREDENTIAL_FLAGS: SourceCredentialFlag[] = [ + { field: 'sourceAuthTokenRef', flag: '--source-auth-token-ref' }, + { field: 'sourceApiKeyRef', flag: '--source-api-key-ref' }, + { field: 'sourceClientSecretRef', flag: '--source-client-secret-ref' }, +]; + +// Reject a credential ref flag the chosen source does not read, so a wrong flag +// fails loudly instead of being silently dropped (KLO-724). +function assertSourceCredentialFlags(source: KtxSetupSourceType, args: KtxSetupSourcesArgs): void { + const allowed = SOURCE_CREDENTIAL_FLAG[source]; + for (const { field, flag } of ALL_SOURCE_CREDENTIAL_FLAGS) { + if (args[field] && field !== allowed.field) { + throw new Error(`${flag} does not apply to --source ${source}; use ${allowed.flag}.`); + } + } +} + async function chooseSourceCredentialRef(input: { prompts: KtxSetupSourcesPromptAdapter; projectDir: string; @@ -515,7 +548,7 @@ function buildNotionConnection(args: KtxSetupSourcesArgs): KtxProjectConnectionC } return { driver: 'notion', - auth_token_ref: credentialRef(args.sourceApiKeyRef, 'Notion token ref'), + auth_token_ref: credentialRef(args.sourceAuthTokenRef, 'Notion token ref'), crawl_mode: crawlMode, ...(rootPageIds.length > 0 ? { root_page_ids: rootPageIds } : {}), root_database_ids: [], @@ -1295,10 +1328,10 @@ async function promptForInteractiveSource( label: 'Notion integration token', envName: 'NOTION_TOKEN', secretFileName: `${currentState.sourceConnectionId ?? 'notion-main'}-token`, - existingRef: currentState.sourceApiKeyRef, + existingRef: currentState.sourceAuthTokenRef, }); if (ref === 'back') return 'back'; - currentState.sourceApiKeyRef = ref; + currentState.sourceAuthTokenRef = ref; return 'next'; }, async (currentState) => { @@ -1326,7 +1359,7 @@ async function promptForInteractiveSource( connectionId, connection: { driver: 'notion', - auth_token_ref: credentialRef(currentState.sourceApiKeyRef, 'Notion token ref'), + auth_token_ref: credentialRef(currentState.sourceAuthTokenRef, 'Notion token ref'), crawl_mode: 'selected_roots', root_page_ids: currentState.notionRootPageIds ?? [], root_database_ids: [], @@ -1516,7 +1549,7 @@ function sourceArgsFromExistingConnection(input: { return sourceArgs; } - sourceArgs.sourceApiKeyRef = stringField(input.connection.auth_token_ref); + sourceArgs.sourceAuthTokenRef = stringField(input.connection.auth_token_ref); sourceArgs.notionCrawlMode = input.connection.crawl_mode === 'all_accessible' ? 'all_accessible' : 'selected_roots'; if (Array.isArray(input.connection.root_page_ids)) { @@ -1817,6 +1850,10 @@ export async function runKtxSetupSourcesStep( return { status: 'skipped', projectDir: args.projectDir }; } + if (args.source) { + assertSourceCredentialFlags(args.source, args); + } + const prompts = deps.prompts ?? createPromptAdapter(); const project = await loadKtxProject({ projectDir: args.projectDir }); if (!hasPrimarySource(project.config)) { diff --git a/packages/cli/test/setup-sources.test.ts b/packages/cli/test/setup-sources.test.ts index b426ad10..784dcc46 100644 --- a/packages/cli/test/setup-sources.test.ts +++ b/packages/cli/test/setup-sources.test.ts @@ -260,7 +260,7 @@ describe('setup sources step', () => { inputMode: 'disabled', source: 'notion', sourceConnectionId: 'notion-main', - sourceApiKeyRef: 'env:NOTION_TOKEN', // pragma: allowlist secret + sourceAuthTokenRef: 'env:NOTION_TOKEN', // pragma: allowlist secret notionCrawlMode: 'selected_roots', notionRootPageIds: ['page-1'], runInitialSourceIngest: false, @@ -281,6 +281,81 @@ describe('setup sources step', () => { expect((await readConfig()).connections['notion-main']?.last_successful_cursor).toBeUndefined(); }); + it('rejects --source-api-key-ref for Notion and points at --source-auth-token-ref', async () => { + await addPrimarySource(); + const io = makeIo(); + + await expect( + runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'notion', + sourceConnectionId: 'notion-main', + sourceApiKeyRef: 'env:NOTION_TOKEN', // pragma: allowlist secret + notionCrawlMode: 'selected_roots', + notionRootPageIds: ['page-1'], + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + {}, + ), + ).resolves.toEqual({ status: 'failed', projectDir }); + + expect(io.stderr()).toContain('--source-api-key-ref does not apply to --source notion; use --source-auth-token-ref.'); + expect((await readConfig()).connections['notion-main']).toBeUndefined(); + }); + + it('rejects --source-auth-token-ref for Metabase and points at --source-api-key-ref', async () => { + await addPrimarySource(); + const io = makeIo(); + + await expect( + runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'metabase', + sourceConnectionId: 'prod_metabase', + sourceUrl: 'https://metabase.example.com', + sourceAuthTokenRef: 'env:METABASE_API_KEY', // pragma: allowlist secret + sourceWarehouseConnectionId: 'warehouse', + metabaseDatabaseId: 1, + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + {}, + ), + ).resolves.toEqual({ status: 'failed', projectDir }); + + expect(io.stderr()).toContain('--source-auth-token-ref does not apply to --source metabase; use --source-api-key-ref.'); + }); + + it('rejects --source-client-secret-ref for dbt and points at --source-auth-token-ref', async () => { + await addPrimarySource(); + const io = makeIo(); + + await expect( + runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'dbt', + sourceConnectionId: 'dbt-main', + sourceClientSecretRef: 'env:DBT_SECRET', // pragma: allowlist secret + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + {}, + ), + ).resolves.toEqual({ status: 'failed', projectDir }); + + expect(io.stderr()).toContain('--source-client-secret-ref does not apply to --source dbt; use --source-auth-token-ref.'); + }); + it('accepts former ingest subcommand names as interactive source connection ids', async () => { await addPrimarySource(); const io = makeIo(); @@ -323,7 +398,7 @@ describe('setup sources step', () => { inputMode: 'disabled', source: 'notion', sourceConnectionId: 'notion-main', - sourceApiKeyRef: 'env:NOTION_TOKEN', // pragma: allowlist secret + sourceAuthTokenRef: 'env:NOTION_TOKEN', // pragma: allowlist secret notionCrawlMode: 'all_accessible', notionRootPageIds: ['page-1'], runInitialSourceIngest: false, diff --git a/skills/ktx/SKILL.md b/skills/ktx/SKILL.md index 3887fdc0..0eaa03e3 100644 --- a/skills/ktx/SKILL.md +++ b/skills/ktx/SKILL.md @@ -138,7 +138,7 @@ ktx setup --no-input --yes --skip-databases --skip-llm --skip-embeddings \ # Notion ktx setup --no-input --yes --skip-databases --skip-llm --skip-embeddings \ --source notion --source-connection-id \ - --source-api-key-ref env:NOTION_TOKEN \ + --source-auth-token-ref env:NOTION_TOKEN \ --notion-crawl-mode selected_roots --notion-root-page-id ``` From 3f0d11e07d3696beb5f9d172efd3091e950d2b34 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Fri, 29 May 2026 17:41:04 +0200 Subject: [PATCH 2/7] feat(cli)!: remove fast mode; ktx ingest always builds enriched context (KLO-721) (#237) Fast mode (the ktx ingest --fast/--deep database-ingest depth toggle) is removed. ktx ingest now always builds the full enriched ("deep") context. There is no structural fallback: a database connection without a configured model and embeddings fails the enrichment-readiness preflight before any work runs, with a 'Run ktx setup to configure a model and embeddings' hint. - Remove --fast/--deep flags, the per-connection context.depth field, and the ktx setup depth prompt (delete setup-database-context-depth.ts). - Rename ingest-depth.ts -> connection-drivers.ts; ingest always requests scan mode 'enriched'; readiness gate (enrichmentReadinessGaps) runs for every database target. - Drop the database-context-depth telemetry step (Node + Python schema mirrors regenerated). - Update CLI, setup, context-build view, docs, the public ktx skill, and the release-smoke / artifacts scripts (now assert the no-LLM guard failure). ktx status --fast (a separate network-probe flag) is unchanged. Follow-ups: KLO-726 (live progress for ktx ingest --all), KLO-727 (restore credentialed successful-ingest release smoke coverage). --- AGENTS.md | 5 +- .../content/docs/cli-reference/ktx-ingest.mdx | 32 ++-- .../content/docs/cli-reference/ktx-setup.mdx | 4 +- .../content/docs/configuration/ktx-yaml.mdx | 7 +- .../docs/getting-started/quickstart.mdx | 8 +- .../content/docs/guides/building-context.mdx | 37 ++-- .../content/docs/guides/serving-agents.mdx | 5 +- .../docs/integrations/primary-sources.mdx | 2 +- docs/terminology.md | 2 - packages/cli/src/commands/ingest-commands.ts | 4 - packages/cli/src/connection-drivers.ts | 21 +++ packages/cli/src/context-build-view.ts | 10 +- .../cli/src/context/project/driver-schemas.ts | 2 +- packages/cli/src/ingest-depth.ts | 75 -------- packages/cli/src/public-ingest-copy.ts | 2 +- packages/cli/src/public-ingest.ts | 82 +++------ packages/cli/src/setup-context.ts | 55 +----- .../cli/src/setup-database-context-depth.ts | 131 -------------- packages/cli/src/setup-databases.ts | 94 ++-------- packages/cli/src/telemetry/events.schema.json | 1 - packages/cli/src/telemetry/events.ts | 1 - packages/cli/test/context-build-view.test.ts | 23 +-- packages/cli/test/index.test.ts | 24 +-- packages/cli/test/public-ingest.test.ts | 171 +++++------------- packages/cli/test/setup-context.test.ts | 134 +------------- packages/cli/test/setup-databases.test.ts | 67 +------ packages/cli/test/standalone-smoke.test.ts | 17 +- .../ktx_daemon/telemetry/events.schema.json | 1 - scripts/examples-docs.test.mjs | 2 +- scripts/installed-live-database-smoke.mjs | 31 ++-- .../installed-live-database-smoke.test.mjs | 1 - scripts/package-artifacts.mjs | 33 +--- scripts/package-artifacts.test.mjs | 7 +- skills/ktx/SKILL.md | 15 +- 34 files changed, 222 insertions(+), 884 deletions(-) create mode 100644 packages/cli/src/connection-drivers.ts delete mode 100644 packages/cli/src/ingest-depth.ts delete mode 100644 packages/cli/src/setup-database-context-depth.ts diff --git a/AGENTS.md b/AGENTS.md index 3d8c1725..2aa0dbed 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -350,8 +350,9 @@ error messages — including the disambiguation rule for the overloaded word `source` (semantic / primary / context / source of truth) — see [`docs/terminology.md`](docs/terminology.md). Follow that file when choosing between near-synonyms (e.g. `connector` vs `adapter`, `data agent` vs -`database agent`, `fast ingest` vs `schema ingest`). Product-name rules in -this section take precedence over anything in that file when they conflict. +`database agent`, `context-source ingest` vs `source ingest`). Product-name +rules in this section take precedence over anything in that file when they +conflict. ### Updating `docs-site/` After Code Changes diff --git a/docs-site/content/docs/cli-reference/ktx-ingest.mdx b/docs-site/content/docs/cli-reference/ktx-ingest.mdx index d4e06881..db3b1c0e 100644 --- a/docs-site/content/docs/cli-reference/ktx-ingest.mdx +++ b/docs-site/content/docs/cli-reference/ktx-ingest.mdx @@ -5,9 +5,11 @@ description: "Build or refresh ktx context, or capture text into ktx memory." `ktx ingest` builds or refreshes **ktx** context from configured connections, and can also capture free-form text into **ktx** memory. Database connections build -schema context. Context-source connections ingest metadata from tools such as -dbt, Looker, Metabase, MetricFlow, LookML, and Notion. Pass `--text` or -`--file` to capture inline text or text files into memory instead. +enriched context — schema plus AI-generated descriptions, embeddings, and +relationship evidence — and require a configured model and embeddings. +Context-source connections ingest metadata from tools such as dbt, Looker, +Metabase, MetricFlow, LookML, and Notion. Pass `--text` or `--file` to capture +inline text or text files into memory instead. ## Command signature @@ -29,8 +31,6 @@ connection is selected. | Flag | Description | Default | |------|-------------|---------| | `--all` | Ingest all configured connections (same as bare invocation) | `false` | -| `--fast` | Use deterministic fast database ingest | Stored connection default, or `fast` | -| `--deep` | Use deep database ingest with AI-generated descriptions, embeddings, and relationship evidence | Stored connection default, or `fast` | | `--query-history` | Include database query-history usage patterns | Stored connection default | | `--no-query-history` | Skip database query-history usage patterns for this run | Stored connection default | | `--query-history-window-days ` | BigQuery/Snowflake query-history lookback window for this run | Stored connection default | @@ -44,12 +44,12 @@ connection is selected. | `--yes` | Install required managed runtime features without prompting | `false` | | `--no-input` | Disable interactive terminal input | - | -`--fast` and `--deep` are mutually exclusive. Depth flags apply only to -database connections. Query-history flags apply only to database connections +Database ingest always builds enriched context and requires a configured model +and embeddings (run `ktx setup`); connections without that configuration fail +before any work starts. Query-history flags apply only to database connections that support query history. The window flag applies to BigQuery and Snowflake; Postgres reads the current `pg_stat_statements` aggregate data instead of a -time-windowed history table. Query-history ingest runs after fast ingest and -requires deep ingest readiness. +time-windowed history table. Query-history ingest runs after the schema scan. When more than one connection is selected, database ingest runs first, then context-source ingest and memory updates run for context-source connections. @@ -72,14 +72,8 @@ ktx ingest # Build one database or context-source connection ktx ingest warehouse -# Force deterministic fast database ingest -ktx ingest warehouse --fast - -# Force deep database ingest with AI enrichment -ktx ingest warehouse --deep - # Include query-history usage patterns -ktx ingest warehouse --deep --query-history +ktx ingest warehouse --query-history # Set the lookback window for BigQuery or Snowflake query history ktx ingest warehouse --query-history-window-days 30 @@ -154,8 +148,8 @@ KTX_INGEST_TRACE_LEVEL=trace ktx ingest metabase | Error | Cause | Recovery | |-------|-------|----------| | Connection not configured | The connection id is not present in `ktx.yaml` | Add the connection with `ktx setup` or update `ktx.yaml` | -| Deep readiness is missing | `--deep` or query history needs model, embedding, and scan-enrichment configuration | Run `ktx setup` or rerun with `--fast` | -| Query history is unsupported | The selected database driver does not support query history | Run fast ingest without query-history flags | +| Enrichment is not configured | Database ingest needs a model, embeddings, and scan-enrichment configuration | Run `ktx setup` to configure a model and embeddings | +| Query history is unsupported | The selected database driver does not support query history | Run ingest without query-history flags | | Python runtime is missing | The selected ingest target needs runtime-backed SQL analysis or source parsing | Accept the interactive prompt, rerun with `--yes`, or run the suggested `ktx admin runtime install` command | -| Context-source options were ignored | Depth and query-history flags were supplied for a context-source connection | Omit database-only flags when ingesting context-source connections | +| Context-source options were ignored | Query-history flags were supplied for a context-source connection | Omit database-only flags when ingesting context-source connections | | Text ingest stops early | `--fail-fast` was used and one item failed | Fix the failed item or rerun without `--fail-fast` to collect all failures | diff --git a/docs-site/content/docs/cli-reference/ktx-setup.mdx b/docs-site/content/docs/cli-reference/ktx-setup.mdx index 415b0e6e..0da7b339 100644 --- a/docs-site/content/docs/cli-reference/ktx-setup.mdx +++ b/docs-site/content/docs/cli-reference/ktx-setup.mdx @@ -131,8 +131,8 @@ BigQuery; and `databases` for ClickHouse. Query history setup is supported for Postgres, BigQuery, and Snowflake. The window flag applies to BigQuery and Snowflake; Postgres reads the current `pg_stat_statements` aggregate data instead of a time-windowed history table. -Enabling query history makes deep ingest readiness matter for later -`ktx ingest` runs. +Later `ktx ingest` runs build enriched context and need a configured model and +embeddings, including when query history is enabled. When query history is enabled for PostgreSQL, Snowflake, or BigQuery, `ktx setup` runs a non-blocking readiness probe after the connection test diff --git a/docs-site/content/docs/configuration/ktx-yaml.mdx b/docs-site/content/docs/configuration/ktx-yaml.mdx index 4a919d45..13105851 100644 --- a/docs-site/content/docs/configuration/ktx-yaml.mdx +++ b/docs-site/content/docs/configuration/ktx-yaml.mdx @@ -66,8 +66,9 @@ read, how to think, and where to put the results. ## Minimal config A working `ktx.yaml` needs one entry in `connections`. Everything else accepts -defaults. The example below is enough for `ktx ingest warehouse` to run a fast -schema scan against a local Postgres. +defaults. The example below registers a local Postgres connection; building +context with `ktx ingest warehouse` also needs a model and embeddings, which +`ktx setup` configures. ```yaml connections: @@ -123,7 +124,7 @@ context-source drivers share the map. Warehouse connections are open objects: the listed fields are validated, and any other field is preserved and passed through to the connector. Use -`enabled_tables` to scope deep ingest to a specific list of +`enabled_tables` to scope ingest to a specific list of `schema.table` names - useful for smoke tests. ```yaml diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index 7402d6d9..66f46a79 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -236,7 +236,7 @@ Testing warehouse Connection test passed Building schema context for warehouse - Running fast database ingest + Running database scan ``` If setup exits early, rerun `ktx setup` in the same directory. **ktx** keeps @@ -268,13 +268,13 @@ Agent integration ready: yes (codex:project) For a structured check inside scripts, use `ktx status --json`. -When setup builds deep context, its final context check looks like: +When setup finishes building context, its final context check looks like: ```text ktx context is ready for agents. Databases: - warehouse: deep context complete + warehouse: database context complete Context sources: dbt_main: memory update complete @@ -326,7 +326,7 @@ ktx setup \ Then build context: ```bash -ktx ingest warehouse --fast +ktx ingest warehouse ``` See [ktx setup](/docs/cli-reference/ktx-setup) for the full automation flag diff --git a/docs-site/content/docs/guides/building-context.mdx b/docs-site/content/docs/guides/building-context.mdx index d6d58053..b806c424 100644 --- a/docs-site/content/docs/guides/building-context.mdx +++ b/docs-site/content/docs/guides/building-context.mdx @@ -24,7 +24,9 @@ external metadata can attach to known warehouse tables. ## Database ingest -Database ingest records table, column, type, constraint, and row-count context. +Database ingest always builds enriched context: tables, columns, types, +constraints, and row counts, plus AI-generated descriptions, embeddings, and +relationship evidence. ```bash # Build one configured database connection @@ -34,23 +36,8 @@ ktx ingest warehouse ktx ingest --all ``` -Depth controls how much context **ktx** builds: - -| Flag | Best for | What it does | -|------|----------|--------------| -| `--fast` | First setup, quick refreshes, CI smoke checks | Deterministic fast ingest with tables, columns, types, constraints, and row counts | -| `--deep` | Agent-ready context for real analysis | Fast ingest plus deep enrichment with descriptions, embeddings, relationship evidence, and optional query history | - -Examples: - -```bash -ktx ingest warehouse --fast -ktx ingest warehouse --deep -ktx ingest --all --deep -``` - -Deep ingest needs LLM and embedding readiness. Otherwise run `ktx setup` or use -`--fast`. +Enriched ingest needs a configured model and embeddings. Run `ktx setup` first; +connections without that configuration fail before any work starts. With `claude-code`, **ktx** agent loops can invoke only the **ktx** MCP tools for the current run. @@ -64,7 +51,7 @@ Enable it during setup, store it under `connections..context.queryHistory`, or request it for one run: ```bash -ktx ingest warehouse --deep --query-history +ktx ingest warehouse --query-history # Set the lookback window for BigQuery or Snowflake query history ktx ingest warehouse --query-history-window-days 30 ``` @@ -74,8 +61,8 @@ for one run. ## Relationship evidence -**ktx** scores relationship candidates during supported deep database ingest. The -public CLI does not expose separate relationship review subcommands. +**ktx** scores relationship candidates during database ingest. The public CLI +does not expose separate relationship review subcommands. ## Context-source ingest @@ -159,7 +146,7 @@ After interactive setup: ```bash ktx status -ktx ingest --all --deep +ktx ingest --all ktx status ``` @@ -176,8 +163,8 @@ ktx wiki "revenue" --json --limit 10 | Symptom | Likely cause | Recovery | |---------|--------------|----------| | Connection not configured | The connection id is missing from `ktx.yaml` | Add it with `ktx setup` | -| Deep readiness is missing | LLM or embeddings are not setup-ready | Run `ktx setup`, or rerun with `--fast` | -| Query history is unsupported | The selected database driver does not expose query history | Run fast ingest without query-history flags | +| Enrichment is not configured | LLM or embeddings are not setup-ready | Run `ktx setup` to configure a model and embeddings | +| Query history is unsupported | The selected database driver does not expose query history | Run ingest without query-history flags | | No connections configured | The project has no entries under `connections` | Run `ktx setup` and add a database or context-source connection | -| Context-source flags have no effect | Depth and query-history flags were supplied for a context-source connector | Use those flags only for database connections | +| Context-source flags have no effect | Query-history flags were supplied for a context-source connector | Use query-history flags only for database connections | | Text ingest stops early | `--fail-fast` stopped on the first failed item | Fix the item or rerun without `--fail-fast` | diff --git a/docs-site/content/docs/guides/serving-agents.mdx b/docs-site/content/docs/guides/serving-agents.mdx index 4c1ced4b..133739b7 100644 --- a/docs-site/content/docs/guides/serving-agents.mdx +++ b/docs-site/content/docs/guides/serving-agents.mdx @@ -111,12 +111,13 @@ non-obvious terms. Agents can refresh context when the user asks them to: ```bash -ktx ingest warehouse --fast +ktx ingest warehouse ktx ingest ktx ingest --file docs/revenue-notes.md --connection-id warehouse ``` -Use `--deep` only when LLM and embedding setup is ready. +Database ingest builds enriched context and requires a configured model and +embeddings; run `ktx setup` first if they are not ready. ## Good agent behavior diff --git a/docs-site/content/docs/integrations/primary-sources.mdx b/docs-site/content/docs/integrations/primary-sources.mdx index 81b8d400..6cb2d26f 100644 --- a/docs-site/content/docs/integrations/primary-sources.mdx +++ b/docs-site/content/docs/integrations/primary-sources.mdx @@ -517,5 +517,5 @@ No authentication required - SQLite is file-based. The file must be readable by | Connection URL appears in git diff | A literal credential URL was written to `ktx.yaml` | Replace it with `env:NAME` or `file:/path/to/secret` and rotate exposed credentials | | Database ingest returns no tables | Schema, database, or project filter is wrong, or the user lacks metadata permissions | Verify the schema list and grant metadata read permissions | | Query history is empty | Query history extension or warehouse history view is unavailable | Enable the warehouse-specific history feature, then rerun `ktx ingest --query-history` or `ktx setup` | -| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on fast schema context | +| Column statistics are missing | Connector cannot access stats tables or the warehouse does not expose them | Grant stats permissions where supported; otherwise rely on schema-level context without column statistics | | Semantic query execution fails | Connection is missing, unreachable, or query execution is disabled | Run `ktx connection test ` and check the `ktx sl query` flags | diff --git a/docs/terminology.md b/docs/terminology.md index 9da59456..4c9ec3cb 100644 --- a/docs/terminology.md +++ b/docs/terminology.md @@ -77,8 +77,6 @@ maintains, validates, and serves that layer. | Connection ref in prose | **connection id** (lowercase, two words) | "connection ID" | | CLI arg/flag literal | `connectionId` (code font) | — | | File path placeholder | `` (code font) | — | -| Fast schema mode | **fast ingest** | schema ingest, schema-only ingest | -| AI-enriched mode | **deep ingest** | AI-enriched ingest | | Ingest of a primary connection | **database ingest** | — | | Ingest of a context-source connection | **context-source ingest** | bare "source ingest" | | Wiki capture | **text ingest** | — | diff --git a/packages/cli/src/commands/ingest-commands.ts b/packages/cli/src/commands/ingest-commands.ts index 9ffd2562..b5efe443 100644 --- a/packages/cli/src/commands/ingest-commands.ts +++ b/packages/cli/src/commands/ingest-commands.ts @@ -29,8 +29,6 @@ export function registerIngestCommands( .usage('[options] [connectionId]') .argument('[connectionId]', 'Configured connection id to ingest (omit to ingest all)') .option('--all', 'Ingest all configured connections', false) - .addOption(new Option('--fast', 'Use deterministic database schema ingest').conflicts('deep')) - .addOption(new Option('--deep', 'Use AI-enriched database ingest').conflicts('fast')) .addOption(new Option('--query-history', 'Include database query-history usage patterns').conflicts('noQueryHistory')) .addOption(new Option('--no-query-history', 'Skip database query-history usage patterns')) .option('--query-history-window-days ', 'Query-history lookback window for this run', parsePositiveIntegerOption) @@ -87,8 +85,6 @@ export function registerIngestCommands( all: selection.kind === 'all', json: options.json === true, inputMode: options.input === false ? 'disabled' : 'auto', - ...(options.fast === true ? { depth: 'fast' as const } : {}), - ...(options.deep === true ? { depth: 'deep' as const } : {}), queryHistory, ...(options.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: options.queryHistoryWindowDays } : {}), cliVersion: context.packageInfo.version, diff --git a/packages/cli/src/connection-drivers.ts b/packages/cli/src/connection-drivers.ts new file mode 100644 index 00000000..4f10e663 --- /dev/null +++ b/packages/cli/src/connection-drivers.ts @@ -0,0 +1,21 @@ +import type { KtxProjectConnectionConfig } from './context/project/config.js'; + +const KTX_DATABASE_DRIVER_IDS = new Set([ + 'sqlite', + 'postgres', + 'mysql', + 'clickhouse', + 'sqlserver', + 'bigquery', + 'snowflake', +]); + +export function normalizeConnectionDriver(connection: KtxProjectConnectionConfig): string { + return String(connection.driver ?? '') + .trim() + .toLowerCase(); +} + +export function isDatabaseDriver(driver: string): boolean { + return KTX_DATABASE_DRIVER_IDS.has(driver.trim().toLowerCase()); +} diff --git a/packages/cli/src/context-build-view.ts b/packages/cli/src/context-build-view.ts index 9a06d39a..4b5be38b 100644 --- a/packages/cli/src/context-build-view.ts +++ b/packages/cli/src/context-build-view.ts @@ -88,7 +88,6 @@ export interface ContextBuildArgs { targetConnectionId?: string; all?: boolean; entrypoint?: 'setup' | 'ingest'; - depth?: Extract['depth']; queryHistory?: Extract['queryHistory']; queryHistoryWindowDays?: number; scanMode?: Extract['scanMode']; @@ -371,19 +370,17 @@ function retryCommand(input: { projectDir?: string; entrypoint?: 'setup' | 'ingest'; connectionId?: string; - depth?: 'fast' | 'deep'; queryHistory?: boolean; queryHistoryWindowDays?: number; }): string { const projectPart = input.projectDir ? ` --project-dir ${input.projectDir}` : ''; if (input.entrypoint === 'ingest' && input.connectionId) { - const depthPart = input.depth ? ` --${input.depth}` : ''; const queryHistoryPart = input.queryHistory ? ' --query-history' : ''; const windowPart = input.queryHistory && input.queryHistoryWindowDays !== undefined ? ` --query-history-window-days ${input.queryHistoryWindowDays}` : ''; - return `ktx ingest ${input.connectionId}${projectPart}${depthPart}${queryHistoryPart}${windowPart}`; + return `ktx ingest ${input.connectionId}${projectPart}${queryHistoryPart}${windowPart}`; } return input.projectDir ? `ktx setup --project-dir ${input.projectDir}` : 'ktx setup'; } @@ -746,7 +743,6 @@ function appendRetryIfNeeded(input: { projectDir: input.projectDir, entrypoint: input.entrypoint, connectionId: input.target.connectionId, - depth: input.target.databaseDepth, queryHistory: input.target.queryHistory?.enabled === true, queryHistoryWindowDays: input.target.queryHistory?.windowDays, })}`; @@ -769,7 +765,6 @@ function failureTextForTarget(input: { projectDir: input.projectDir, entrypoint: input.entrypoint, connectionId: input.target.connectionId, - depth: input.target.databaseDepth, queryHistory: input.target.queryHistory?.enabled === true, queryHistoryWindowDays: input.target.queryHistory?.windowDays, })}`, @@ -784,7 +779,6 @@ function failureTextForTarget(input: { projectDir: input.projectDir, entrypoint: input.entrypoint, connectionId: input.target.connectionId, - depth: input.target.databaseDepth, queryHistory: input.target.queryHistory?.enabled === true, queryHistoryWindowDays: input.target.queryHistory?.windowDays, })}`, @@ -868,7 +862,6 @@ export async function runContextBuild( projectDir: args.projectDir, ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), all: args.all ?? true, - ...(args.depth ? { depth: args.depth } : {}), ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), ...(args.scanMode ? { scanMode: args.scanMode } : {}), @@ -935,7 +928,6 @@ export async function runContextBuild( all: args.all ?? true, json: false, inputMode: args.inputMode, - ...(args.depth ? { depth: args.depth } : {}), ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), ...(args.scanMode ? { scanMode: args.scanMode } : {}), diff --git a/packages/cli/src/context/project/driver-schemas.ts b/packages/cli/src/context/project/driver-schemas.ts index 6b4dc017..f9a3639f 100644 --- a/packages/cli/src/context/project/driver-schemas.ts +++ b/packages/cli/src/context/project/driver-schemas.ts @@ -30,7 +30,7 @@ function warehouseConnectionSchema(driver: .array(z.string().min(1)) .optional() .describe( - 'Optional allowlist of fully-qualified table names ("schema.table") to ingest. When set, live-database ingest discards any table whose schema-qualified name is not in this list. Useful for smoke-testing deep ingest on a single table.', + 'Optional allowlist of fully-qualified table names ("schema.table") to ingest. When set, live-database ingest discards any table whose schema-qualified name is not in this list. Useful for smoke-testing ingest on a single table.', ), }) .describe( diff --git a/packages/cli/src/ingest-depth.ts b/packages/cli/src/ingest-depth.ts deleted file mode 100644 index b8957763..00000000 --- a/packages/cli/src/ingest-depth.ts +++ /dev/null @@ -1,75 +0,0 @@ -import type { KtxProjectConfig, KtxProjectConnectionConfig } from './context/project/config.js'; - -export type KtxDatabaseContextDepth = 'fast' | 'deep'; - -const KTX_DATABASE_DRIVER_IDS = new Set([ - 'sqlite', - 'postgres', - 'mysql', - 'clickhouse', - 'sqlserver', - 'bigquery', - 'snowflake', -]); - -export function normalizeConnectionDriver(connection: KtxProjectConnectionConfig): string { - return String(connection.driver ?? '') - .trim() - .toLowerCase(); -} - -export function isDatabaseDriver(driver: string): boolean { - return KTX_DATABASE_DRIVER_IDS.has(driver.trim().toLowerCase()); -} - -function connectionContextRecord(connection: KtxProjectConnectionConfig): Record { - const context = connection.context; - return typeof context === 'object' && context !== null && !Array.isArray(context) - ? (context as Record) - : {}; -} - -export function databaseContextDepth(connection: KtxProjectConnectionConfig): KtxDatabaseContextDepth | undefined { - const depth = connectionContextRecord(connection).depth; - return depth === 'fast' || depth === 'deep' ? depth : undefined; -} - -export function withDatabaseContextDepth( - connection: KtxProjectConnectionConfig, - depth: KtxDatabaseContextDepth, -): KtxProjectConnectionConfig { - return { - ...connection, - context: { - ...connectionContextRecord(connection), - depth, - }, - }; -} - -export function deepReadinessGaps(config: KtxProjectConfig): string[] { - const gaps: string[] = []; - if (config.llm.provider.backend === 'none' || !config.llm.models.default) { - gaps.push('model configuration'); - } - - if (config.scan.enrichment.mode !== 'llm') { - gaps.push('scan enrichment mode'); - } - - const embeddings = config.scan.enrichment.embeddings; - if ( - !embeddings || - embeddings.backend === 'none' || - !embeddings.model || - embeddings.dimensions <= 0 - ) { - gaps.push('scan embeddings'); - } - - return gaps; -} - -export function recommendedDatabaseContextDepth(config: KtxProjectConfig): KtxDatabaseContextDepth { - return deepReadinessGaps(config).length === 0 ? 'deep' : 'fast'; -} diff --git a/packages/cli/src/public-ingest-copy.ts b/packages/cli/src/public-ingest-copy.ts index be1206c1..86423f74 100644 --- a/packages/cli/src/public-ingest-copy.ts +++ b/packages/cli/src/public-ingest-copy.ts @@ -12,7 +12,7 @@ const DATABASE_INGEST_REPLACEMENTS: Array<[RegExp, string]> = [ 'Database enrichment failed after schema context completed', ], [/\bstructural scan\b/gi, 'schema context'], - [/\benriched scan\b/gi, 'deep database ingest'], + [/\benriched scan\b/gi, 'database ingest'], [/\bscan results\b/gi, 'database context'], ]; diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index 60bceecd..25fe30dd 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -1,16 +1,10 @@ import { getKtxCliPackageInfo } from './cli-runtime.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; -import type { KtxProjectConnectionConfig } from './context/project/config.js'; +import type { KtxProjectConfig, KtxProjectConnectionConfig } from './context/project/config.js'; import type { KtxProgressPort } from './context/scan/types.js'; import type { KtxCliIo } from './index.js'; import type { KtxIngestArgs, KtxIngestDeps, KtxIngestProgressUpdate } from './ingest.js'; -import { - type KtxDatabaseContextDepth, - databaseContextDepth, - deepReadinessGaps, - isDatabaseDriver, - normalizeConnectionDriver, -} from './ingest-depth.js'; +import { isDatabaseDriver, normalizeConnectionDriver } from './connection-drivers.js'; import { ensureManagedPythonCommandRuntime, type KtxManagedPythonInstallPolicy, @@ -29,7 +23,6 @@ profileMark('module:public-ingest'); type KtxPublicIngestStepName = 'database-schema' | 'query-history' | 'source-ingest' | 'memory-update'; type KtxPublicIngestStepStatus = 'done' | 'skipped' | 'failed' | 'not-run'; type KtxPublicIngestInputMode = 'auto' | 'disabled'; -type KtxPublicIngestDepth = KtxDatabaseContextDepth; type KtxPublicIngestQueryHistoryFlag = 'default' | 'enabled' | 'disabled'; type HistoricSqlDialect = 'postgres' | 'bigquery' | 'snowflake'; @@ -41,7 +34,6 @@ export type KtxPublicIngestArgs = all: boolean; json: boolean; inputMode: KtxPublicIngestInputMode; - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; scanMode?: Extract['mode']; @@ -58,7 +50,6 @@ export interface KtxPublicIngestPlanTarget { sourceDir?: string; debugCommand: string; steps: KtxPublicIngestStepName[]; - databaseDepth?: KtxPublicIngestDepth; detectRelationships?: boolean; preflightFailure?: string; queryHistory?: { @@ -67,7 +58,6 @@ export interface KtxPublicIngestPlanTarget { windowDays?: number; pullConfig?: Record; unsupported?: boolean; - skippedStoredByFast?: boolean; }; } @@ -121,7 +111,6 @@ interface KtxPublicContextBuildArgs { inputMode: 'auto' | 'disabled'; targetConnectionId?: string; all?: boolean; - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; scanMode?: Extract['mode']; @@ -154,7 +143,6 @@ interface KtxUnsupportedQueryHistoryWarning { interface KtxPublicIngestWarningAccumulator { warnings: string[]; - ignoredDepthForSources: string[]; ignoredQueryHistoryForSources: string[]; unsupportedQueryHistoryForDatabases: KtxUnsupportedQueryHistoryWarning[]; } @@ -162,7 +150,6 @@ interface KtxPublicIngestWarningAccumulator { function createWarningAccumulator(): KtxPublicIngestWarningAccumulator { return { warnings: [], - ignoredDepthForSources: [], ignoredQueryHistoryForSources: [], unsupportedQueryHistoryForDatabases: [], }; @@ -233,7 +220,6 @@ function finalizeWarnings( accumulator: KtxPublicIngestWarningAccumulator, args: { all: boolean; - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; }, @@ -242,11 +228,6 @@ function finalizeWarnings( ...accumulator.warnings, ...unsupportedQueryHistoryWarnings(accumulator.unsupportedQueryHistoryForDatabases, args.all), ]; - const depthOption = args.depth ? `--${args.depth}` : null; - if (depthOption) { - const warning = sourceIgnoredWarning(depthOption, accumulator.ignoredDepthForSources, args.all); - if (warning) warnings.push(warning); - } if (args.queryHistory === 'enabled' || args.queryHistoryWindowDays !== undefined) { const warning = sourceIgnoredWarning('--query-history', accumulator.ignoredQueryHistoryForSources, args.all); if (warning) warnings.push(warning); @@ -317,13 +298,12 @@ function resolveDatabaseTargetOptions(input: { driver: string; connection: KtxProjectConnectionConfig; args: { - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; scanMode?: Extract['mode']; }; warnings: KtxPublicIngestWarningAccumulator; -}): Pick { +}): Pick { const storedQh = storedQueryHistory(input.connection); const dialect = queryHistoryDialectByDriver.get(input.driver); const explicitQueryHistory = input.args.queryHistory ?? 'default'; @@ -332,7 +312,6 @@ function resolveDatabaseTargetOptions(input: { const requestedQh = explicitQueryHistory === 'enabled' || (explicitQueryHistory !== 'disabled' && (windowOverrideRequested || storedEnabled)); - let depth = input.args.depth ?? databaseContextDepth(input.connection) ?? 'fast'; const queryHistory = { enabled: false, ...(input.args.queryHistoryWindowDays !== undefined @@ -350,19 +329,13 @@ function resolveDatabaseTargetOptions(input: { explicitQueryHistory === 'enabled' || input.args.queryHistoryWindowDays !== undefined ? 'explicit' : 'stored', }); return { - databaseDepth: depth, queryHistory: { ...queryHistory, unsupported: true }, steps: ['database-schema'], }; } if (requestedQh && dialect) { - if (depth === 'fast') { - input.warnings.warnings.push(`--query-history requires deep ingest; running ${input.connectionId} with --deep.`); - } - depth = 'deep'; return { - databaseDepth: depth, queryHistory: { ...queryHistory, enabled: true, @@ -378,30 +351,35 @@ function resolveDatabaseTargetOptions(input: { }; } - if (input.args.depth === 'fast' && explicitQueryHistory !== 'enabled' && storedEnabled) { - input.warnings.warnings.push( - `${input.connectionId} has query history enabled in ktx.yaml, but --fast skips query-history processing.`, - ); - return { - databaseDepth: 'fast', - queryHistory: { ...queryHistory, skippedStoredByFast: true }, - steps: ['database-schema'], - }; - } - return { - databaseDepth: depth, queryHistory, steps: ['database-schema'], }; } +function enrichmentReadinessGaps(config: KtxProjectConfig): string[] { + const gaps: string[] = []; + if (config.llm.provider.backend === 'none' || !config.llm.models.default) { + gaps.push('model configuration'); + } + + if (config.scan.enrichment.mode !== 'llm') { + gaps.push('scan enrichment mode'); + } + + const embeddings = config.scan.enrichment.embeddings; + if (!embeddings || embeddings.backend === 'none' || !embeddings.model || embeddings.dimensions <= 0) { + gaps.push('scan embeddings'); + } + + return gaps; +} + function targetForConnection( connectionId: string, connection: KtxProjectConnectionConfig, projectConfig: KtxPublicIngestProject['config'], args: { - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; scanMode?: Extract['mode']; @@ -412,9 +390,6 @@ function targetForConnection( const adapter = sourceAdapterByDriver.get(driver); const sourceDir = sourceDirForConnection(connection); if (adapter) { - if (args.depth) { - warnings.ignoredDepthForSources.push(connectionId); - } if (args.queryHistory === 'enabled' || args.queryHistoryWindowDays !== undefined) { warnings.ignoredQueryHistoryForSources.push(connectionId); } @@ -431,18 +406,18 @@ function targetForConnection( if (isDatabaseDriver(driver)) { const options = resolveDatabaseTargetOptions({ connectionId, driver, connection, args, warnings }); - const gaps = options.databaseDepth === 'deep' ? deepReadinessGaps(projectConfig) : []; + const gaps = enrichmentReadinessGaps(projectConfig); return { connectionId, driver, operation: 'database-ingest', debugCommand: `ktx ingest ${connectionId} --debug`, - detectRelationships: options.databaseDepth === 'deep' && projectConfig.scan.relationships.enabled, + detectRelationships: projectConfig.scan.relationships.enabled, ...(gaps.length > 0 ? { - preflightFailure: `${connectionId} requires deep ingest readiness: ${gaps.join( + preflightFailure: `${connectionId} cannot be ingested: enrichment is not configured (${gaps.join( ', ', - )}. Run ktx setup or rerun with --fast.`, + )}). Run ktx setup to configure a model and embeddings.`, } : {}), ...options, @@ -458,7 +433,6 @@ export function buildPublicIngestPlan( projectDir: string; targetConnectionId?: string; all: boolean; - depth?: KtxPublicIngestDepth; queryHistory?: KtxPublicIngestQueryHistoryFlag; queryHistoryWindowDays?: number; scanMode?: Extract['mode']; @@ -522,13 +496,12 @@ function retryCommandForTarget( args: Extract, ): string { const projectPart = ` --project-dir ${args.projectDir}`; - const depthPart = target.databaseDepth ? ` --${target.databaseDepth}` : ''; const queryHistoryPart = target.queryHistory?.enabled === true ? ' --query-history' : ''; const windowPart = target.queryHistory?.enabled === true && target.queryHistory.windowDays !== undefined ? ` --query-history-window-days ${target.queryHistory.windowDays}` : ''; - return `ktx ingest ${target.connectionId}${projectPart}${depthPart}${queryHistoryPart}${windowPart}`; + return `ktx ingest ${target.connectionId}${projectPart}${queryHistoryPart}${windowPart}`; } function trimTrailingPeriod(value: string): string { @@ -830,7 +803,7 @@ export async function executePublicIngestTarget( command: 'run', projectDir: args.projectDir, connectionId: target.connectionId, - mode: target.databaseDepth === 'deep' ? 'enriched' : 'structural', + mode: 'enriched', detectRelationships: target.detectRelationships === true, dryRun: false, ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), @@ -979,7 +952,6 @@ export async function runKtxPublicIngest( all: args.all, entrypoint: 'ingest', inputMode: args.inputMode, - ...(args.depth ? { depth: args.depth } : {}), ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), ...(args.scanMode ? { scanMode: args.scanMode } : {}), diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index dc289278..63b4dbdf 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -7,12 +7,7 @@ import { serializeKtxProjectConfig } from './context/project/config.js'; import type { KtxCliIo } from './cli-runtime.js'; import { errorMessage, writePrefixedLines } from './clack.js'; import { buildPublicIngestPlan } from './public-ingest.js'; -import { - type KtxDatabaseContextDepth, - databaseContextDepth, -} from './ingest-depth.js'; import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; -import { ensureSetupDatabaseContextDepths } from './setup-database-context-depth.js'; import { type ContextBuildSourceProgressUpdate, runContextBuild, @@ -353,16 +348,6 @@ async function readLatestScanReport(projectDir: string, connectionId: string): P return reports.at(-1)?.report ?? null; } -function scanReportHasSchemaManifest(report: unknown, connectionId: string): boolean { - if (!isRecord(report)) { - return false; - } - if (report.connectionId !== connectionId || report.dryRun === true) { - return false; - } - return stringArrayValue(isRecord(report.artifactPaths) ? report.artifactPaths.manifestShards : undefined).length > 0; -} - function scanReportHasCompletedDeepEnrichment( report: unknown, connectionId: string, @@ -389,18 +374,6 @@ function scanReportHasCompletedDeepEnrichment( ); } -function scanReportSatisfiesDepth(input: { - report: unknown; - connectionId: string; - depth: KtxDatabaseContextDepth; - relationshipsRequired: boolean; -}): boolean { - if (input.depth === 'fast') { - return scanReportHasSchemaManifest(input.report, input.connectionId); - } - return scanReportHasCompletedDeepEnrichment(input.report, input.connectionId, input.relationshipsRequired); -} - async function verifyPrimarySourceScans( project: KtxLocalProject, connectionIds: string[], @@ -408,15 +381,9 @@ async function verifyPrimarySourceScans( const details: string[] = []; const relationshipsRequired = project.config.scan.relationships.enabled; for (const connectionId of connectionIds) { - const connection = project.config.connections[connectionId]; - const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast'; const report = await readLatestScanReport(project.projectDir, connectionId); - if (!scanReportSatisfiesDepth({ report, connectionId, depth, relationshipsRequired })) { - details.push( - depth === 'fast' - ? `${connectionId}: schema context has not completed.` - : `${connectionId}: deep database context has not completed.`, - ); + if (!scanReportHasCompletedDeepEnrichment(report, connectionId, relationshipsRequired)) { + details.push(`${connectionId}: database context has not completed.`); } } return { ready: details.length === 0, details }; @@ -482,7 +449,6 @@ function writeSkippedContext(projectDir: string, io: KtxCliIo): void { } function writeSuccess( - project: KtxLocalProject, readiness: KtxSetupContextReadiness, targets: KtxSetupContextTargets, io: KtxCliIo, @@ -493,9 +459,7 @@ function writeSuccess( io.stdout.write(' none\n'); } else { for (const connectionId of targets.primarySourceConnectionIds) { - const connection = project.config.connections[connectionId]; - const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast'; - io.stdout.write(` ${connectionId}: ${depth === 'deep' ? 'deep context complete' : 'schema context complete'}\n`); + io.stdout.write(` ${connectionId}: database context complete\n`); } } io.stdout.write('\nContext sources:\n'); @@ -636,7 +600,7 @@ async function runBuild( failureReason: undefined, ...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}), }); - writeSuccess(project, readiness, targets, io); + writeSuccess(readiness, targets, io); return { status: 'ready', projectDir: args.projectDir, runId }; } @@ -678,17 +642,8 @@ export async function runKtxSetupContextStep( deps: KtxSetupContextDeps = {}, ): Promise { try { - let project = await loadKtxProject({ projectDir: args.projectDir }); + const project = await loadKtxProject({ projectDir: args.projectDir }); const prompts = deps.prompts ?? createPromptAdapter(); - const depthProject = await ensureSetupDatabaseContextDepths({ - project, - args, - prompts, - }); - if (depthProject === 'back') { - return { status: 'back', projectDir: args.projectDir }; - } - project = depthProject; const existingState = await readKtxSetupContextState(args.projectDir); const completedSteps = (await readKtxSetupState(args.projectDir)).completed_steps; if (completedSteps.includes('context') && existingState.status === 'completed') { diff --git a/packages/cli/src/setup-database-context-depth.ts b/packages/cli/src/setup-database-context-depth.ts deleted file mode 100644 index 20df813c..00000000 --- a/packages/cli/src/setup-database-context-depth.ts +++ /dev/null @@ -1,131 +0,0 @@ -import { writeFile } from 'node:fs/promises'; -import { type KtxLocalProject, loadKtxProject } from './context/project/project.js'; -import { type KtxProjectConnectionConfig, serializeKtxProjectConfig } from './context/project/config.js'; -import { - type KtxDatabaseContextDepth, - databaseContextDepth, - deepReadinessGaps, - isDatabaseDriver, - normalizeConnectionDriver, - recommendedDatabaseContextDepth, - withDatabaseContextDepth, -} from './ingest-depth.js'; -import type { KtxSetupPromptOption } from './setup-prompts.js'; - -export interface KtxSetupDatabaseContextDepthArgs { - inputMode: 'auto' | 'disabled'; -} - -export interface KtxSetupDatabaseContextDepthPromptAdapter { - select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; -} - -function databaseConnectionsNeedingDepth(project: KtxLocalProject): string[] { - return Object.entries(project.config.connections) - .filter(([, connection]) => isDatabaseDriver(normalizeConnectionDriver(connection))) - .filter(([, connection]) => databaseContextDepth(connection) === undefined) - .map(([connectionId]) => connectionId) - .sort((left, right) => left.localeCompare(right)); -} - -async function chooseSetupDatabaseContextDepth(input: { - project: KtxLocalProject; - args: KtxSetupDatabaseContextDepthArgs; - prompts: KtxSetupDatabaseContextDepthPromptAdapter; -}): Promise { - const recommended = recommendedDatabaseContextDepth(input.project.config); - if (input.args.inputMode === 'disabled') { - return recommended; - } - - const deepReady = deepReadinessGaps(input.project.config).length === 0; - const options = - recommended === 'deep' - ? [ - { - value: 'deep', - label: 'Deep: AI descriptions, embeddings, relationships, slower', - hint: 'recommended', - }, - { value: 'fast', label: 'Fast: schema only, no AI, quickest' }, - { value: 'back', label: 'Back' }, - ] - : [ - { value: 'fast', label: 'Fast: schema only, no AI, quickest', hint: 'recommended' }, - { value: 'deep', label: 'Deep: AI descriptions, embeddings, relationships, slower' }, - { value: 'back', label: 'Back' }, - ]; - - const choice = await input.prompts.select({ - message: - 'How much database context should KTX build?\n\n' + - (deepReady - ? 'Deep is available because model, embedding, and scan enrichment are configured.' - : 'Fast is recommended because model, embedding, or scan enrichment is not configured.'), - options, - }); - if (choice === 'back') { - return 'back'; - } - if (choice === 'fast' || choice === 'deep') { - return choice; - } - return recommended; -} - -async function writeDatabaseContextDepths( - project: KtxLocalProject, - connectionIds: string[], - depth: KtxDatabaseContextDepth, -): Promise { - if (connectionIds.length === 0) { - return project; - } - const nextConnections = { ...project.config.connections }; - for (const connectionId of connectionIds) { - const connection = nextConnections[connectionId]; - if (connection) { - nextConnections[connectionId] = withDatabaseContextDepth(connection, depth); - } - } - const nextConfig = { ...project.config, connections: nextConnections }; - await writeFile(project.configPath, serializeKtxProjectConfig(nextConfig), 'utf-8'); - return await loadKtxProject({ projectDir: project.projectDir }); -} - -export async function ensureSetupDatabaseContextDepths(input: { - project: KtxLocalProject; - args: KtxSetupDatabaseContextDepthArgs; - prompts: KtxSetupDatabaseContextDepthPromptAdapter; -}): Promise { - const missingDepthConnectionIds = databaseConnectionsNeedingDepth(input.project); - if (missingDepthConnectionIds.length === 0) { - return input.project; - } - - const depth = await chooseSetupDatabaseContextDepth(input); - if (depth === 'back') { - return 'back'; - } - return await writeDatabaseContextDepths(input.project, missingDepthConnectionIds, depth); -} - -export async function applySetupDatabaseContextDepth(input: { - project: KtxLocalProject; - connection: KtxProjectConnectionConfig; - args: KtxSetupDatabaseContextDepthArgs; - prompts: KtxSetupDatabaseContextDepthPromptAdapter; -}): Promise { - if ( - !isDatabaseDriver(normalizeConnectionDriver(input.connection)) || - databaseContextDepth(input.connection) !== undefined - ) { - return input.connection; - } - - const depth = await chooseSetupDatabaseContextDepth(input); - if (depth === 'back') { - return 'back'; - } - return withDatabaseContextDepth(input.connection, depth); -} diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index eb364228..09db1bde 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -29,7 +29,6 @@ import { } from './database-tree-picker.js'; import { withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; import { runKtxScan } from './scan.js'; -import { applySetupDatabaseContextDepth } from './setup-database-context-depth.js'; import { writeProjectLocalSecretReference } from './setup-secrets.js'; import { isDemoConnection } from './telemetry/demo-detect.js'; import { emitTelemetryEvent } from './telemetry/index.js'; @@ -1614,45 +1613,10 @@ async function applyHistoricSqlConfigToExistingConnection(input: { prompts: input.prompts, }); if (withHistoricSql === 'back') return 'back'; - const withContextDepth = await maybeApplyContextDepthConfig({ - projectDir: input.projectDir, - connectionId: input.connectionId, - connection: withHistoricSql, - args: input.args, - prompts: input.prompts, - }); - if (withContextDepth === 'back') return 'back'; await writeConnectionConfig({ projectDir: input.projectDir, connectionId: input.connectionId, - connection: withContextDepth, - }); -} - -async function maybeApplyContextDepthConfig(input: { - projectDir: string; - connectionId: string; - connection: KtxProjectConnectionConfig; - args: KtxSetupDatabasesArgs; - prompts: KtxSetupDatabasesPromptAdapter; -}): Promise { - const project = await loadKtxProject({ projectDir: input.projectDir }); - return await applySetupDatabaseContextDepth({ - project: { - ...project, - config: { - ...project.config, - connections: { - ...project.config.connections, - [input.connectionId]: input.connection, - }, - }, - }, - connection: input.connection, - args: { - inputMode: input.args.inputMode === 'disabled' || input.args.databaseUrl ? 'disabled' : input.args.inputMode, - }, - prompts: input.prompts, + connection: withHistoricSql, }); } @@ -1698,7 +1662,7 @@ async function validateAndScanConnection(input: { deps: input.deps, }); writeSetupSection(input.io, `Building schema context for ${input.connectionId}`, [ - 'Running fast database ingest…', + 'Running database scan…', ]); let scanIo = createBufferedCommandIo(); let scanCode = await scanConnection(input.projectDir, input.connectionId, scanIo); @@ -1708,7 +1672,7 @@ async function validateAndScanConnection(input: { writePrefixedLines( (chunk) => input.io.stderr.write(chunk), [ - `Fast database ingest failed for ${input.connectionId}.`, + `Database scan failed for ${input.connectionId}.`, 'Native SQLite is built for a different Node.js ABI.', `Detail: ${nativeSqliteDetail}`, 'Rebuilding Native SQLite with pnpm run native:rebuild…', @@ -1719,7 +1683,7 @@ async function validateAndScanConnection(input: { if (rebuildCode === 0) { writePrefixedLines( (chunk) => input.io.stderr.write(chunk), - 'Native SQLite rebuild complete. Retrying fast database ingest…', + 'Native SQLite rebuild complete. Retrying database scan…', ); const retryScanIo = createBufferedCommandIo(); scanCode = await scanConnection(input.projectDir, input.connectionId, retryScanIo); @@ -1730,10 +1694,10 @@ async function validateAndScanConnection(input: { (chunk) => input.io.stderr.write(chunk), [ rebuildCode === 0 - ? `Fast database ingest still failed for ${input.connectionId} after rebuilding Native SQLite.` + ? `Database scan still failed for ${input.connectionId} after rebuilding Native SQLite.` : `Native SQLite rebuild failed for ${input.connectionId}.`, 'Fix: pnpm run native:rebuild', - `Retry: ktx ingest ${input.connectionId} --project-dir ${input.projectDir} --fast`, + `Retry: ktx ingest ${input.connectionId} --project-dir ${input.projectDir}`, ].join('\n'), ); } @@ -1742,8 +1706,8 @@ async function validateAndScanConnection(input: { writePrefixedLines( (chunk) => input.io.stderr.write(chunk), [ - `Fast database ingest failed for ${input.connectionId}.`, - `Debug command: ktx ingest ${input.connectionId} --project-dir ${input.projectDir} --fast --debug`, + `Database scan failed for ${input.connectionId}.`, + `Debug command: ktx ingest ${input.connectionId} --project-dir ${input.projectDir} --debug`, ].join('\n'), ); } @@ -2167,22 +2131,10 @@ export async function runKtxSetupDatabasesStep( returnToDriverSelection = true; break; } - const withContextDepth = await maybeApplyContextDepthConfig({ - projectDir: args.projectDir, - connectionId: connectionChoice.connectionId, - connection: withHistoricSql, - args, - prompts, - }); - if (withContextDepth === 'back') { - if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; - returnToDriverSelection = true; - break; - } await writeConnectionConfig({ projectDir: args.projectDir, connectionId: connectionChoice.connectionId, - connection: withContextDepth, + connection: withHistoricSql, io, }); } else { @@ -2193,22 +2145,10 @@ export async function runKtxSetupDatabasesStep( returnToDriverSelection = true; break; } - const withContextDepth = await maybeApplyContextDepthConfig({ - projectDir: args.projectDir, - connectionId: connectionChoice.connectionId, - connection: withHistoricSql, - args, - prompts, - }); - if (withContextDepth === 'back') { - if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; - returnToDriverSelection = true; - break; - } await writeConnectionConfig({ projectDir: args.projectDir, connectionId: connectionChoice.connectionId, - connection: withContextDepth, + connection: withHistoricSql, io, }); } @@ -2291,22 +2231,10 @@ export async function runKtxSetupDatabasesStep( returnToDriverSelection = true; break; } - const withContextDepth = await maybeApplyContextDepthConfig({ - projectDir: args.projectDir, - connectionId: connectionChoice.connectionId, - connection: withHistoricSql, - args, - prompts, - }); - if (withContextDepth === 'back') { - if (!canReturnToDriverSelection) return { status: 'back', projectDir: args.projectDir }; - returnToDriverSelection = true; - break; - } await writeConnectionConfig({ projectDir: args.projectDir, connectionId: connectionChoice.connectionId, - connection: withContextDepth, + connection: withHistoricSql, io, }); setupStatus = await validateAndScanConnection({ diff --git a/packages/cli/src/telemetry/events.schema.json b/packages/cli/src/telemetry/events.schema.json index 13642c49..628c8f4b 100644 --- a/packages/cli/src/telemetry/events.schema.json +++ b/packages/cli/src/telemetry/events.schema.json @@ -365,7 +365,6 @@ "embeddings", "secrets", "databases", - "database-context-depth", "sources", "context", "agents", diff --git a/packages/cli/src/telemetry/events.ts b/packages/cli/src/telemetry/events.ts index e73001ed..5e5b5335 100644 --- a/packages/cli/src/telemetry/events.ts +++ b/packages/cli/src/telemetry/events.ts @@ -38,7 +38,6 @@ const setupStepSchema = telemetryCommonEnvelopeSchema 'embeddings', 'secrets', 'databases', - 'database-context-depth', 'sources', 'context', 'agents', diff --git a/packages/cli/test/context-build-view.test.ts b/packages/cli/test/context-build-view.test.ts index 5936afa9..40e33606 100644 --- a/packages/cli/test/context-build-view.test.ts +++ b/packages/cli/test/context-build-view.test.ts @@ -228,11 +228,11 @@ describe('renderContextBuildView', () => { const rendered = renderContextBuildView(state, { styled: false, - warnings: ['--deep affects database ingest only; ignoring it for docs.'], + warnings: ['--query-history affects database ingest only; ignoring it for docs.'], }); expect(rendered).toContain('Warnings:'); - expect(rendered).toContain('--deep affects database ingest only; ignoring it for docs.'); + expect(rendered).toContain('--query-history affects database ingest only; ignoring it for docs.'); }); it('renders public notices in the foreground view before warnings', () => { @@ -243,7 +243,6 @@ describe('renderContextBuildView', () => { operation: 'database-ingest', debugCommand: 'ktx ingest warehouse --debug', steps: ['database-schema', 'query-history'], - databaseDepth: 'deep', detectRelationships: true, queryHistory: { enabled: true, dialect: 'postgres' }, }, @@ -252,12 +251,12 @@ describe('renderContextBuildView', () => { const rendered = renderContextBuildView(state, { styled: false, notices: ['Schema ingest runs before query history for warehouse.'], - warnings: ['--query-history requires deep ingest; running warehouse with --deep.'], + warnings: ['--query-history is not supported for sqlite; running schema ingest for local.'], }); expect(rendered.indexOf('Notices:')).toBeLessThan(rendered.indexOf('Warnings:')); expect(rendered).toContain('Schema ingest runs before query history for warehouse.'); - expect(rendered).toContain('--query-history requires deep ingest; running warehouse with --deep.'); + expect(rendered).toContain('--query-history is not supported for sqlite; running schema ingest for local.'); }); it('renders dynamic separator matching header width', () => { @@ -653,7 +652,6 @@ describe('runContextBuild', () => { inputMode: 'disabled', targetConnectionId: 'warehouse', all: false, - depth: 'fast', queryHistory: 'default', }, io.io, @@ -665,7 +663,6 @@ describe('runContextBuild', () => { expect(executeTarget.mock.calls[0]?.[0]).toMatchObject({ connectionId: 'warehouse', operation: 'database-ingest', - databaseDepth: 'fast', }); expect(io.stdout()).toContain('Databases:'); expect(io.stdout()).toContain('warehouse'); @@ -716,7 +713,7 @@ describe('runContextBuild', () => { it('renders localhost SQL analysis refusal as a runtime failure during query history', async () => { const io = makeIo(); const project = projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep', queryHistory: { enabled: true } } }, + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: true } } }, }); const executeTarget = vi.fn(async (target, _args, targetIo) => { targetIo.stderr.write('connect ECONNREFUSED 127.0.0.1:8765\n'); @@ -751,7 +748,7 @@ describe('runContextBuild', () => { it('uses captured query-history stderr instead of generic failed-at detail', async () => { const io = makeIo(); const project = projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep', queryHistory: { enabled: true } } }, + warehouse: { driver: 'postgres', context: { queryHistory: { enabled: true } } }, }); const executeTarget = vi.fn(async (target, _args, targetIo) => { targetIo.stdout.write('KTX scan completed\n'); @@ -768,7 +765,7 @@ describe('runContextBuild', () => { operation: 'query-history', status: 'failed', detail: - 'warehouse failed at query-history. Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history', + 'warehouse failed at query-history. Retry: ktx ingest warehouse --project-dir /tmp/project --query-history', }, { operation: 'source-ingest', status: 'skipped' }, { operation: 'memory-update', status: 'skipped' }, @@ -785,7 +782,7 @@ describe('runContextBuild', () => { expect(result).toEqual({ exitCode: 1 }); expect(io.stdout()).toContain('Missing bundled Python runtime manifest: /tmp/assets/python/manifest.json.'); - expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --query-history'); expect(io.stdout()).not.toContain('Then retry the runtime-backed KTX command'); expect(io.stdout()).not.toContain('warehouse failed at query-history'); expect(io.stdout().match(/Retry: /g)).toHaveLength(1); @@ -899,12 +896,12 @@ describe('runContextBuild', () => { const io = makeIo(); const project: KtxPublicIngestProject = { ...projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }), config: { ...projectWithConnections({ warehouse: { driver: 'postgres' } }).config, connections: { - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }, llm: { provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret diff --git a/packages/cli/test/index.test.ts b/packages/cli/test/index.test.ts index a60c48f2..bd17e641 100644 --- a/packages/cli/test/index.test.ts +++ b/packages/cli/test/index.test.ts @@ -702,7 +702,7 @@ describe('runKtxCli', () => { const publicIngest = vi.fn().mockResolvedValue(0); await expect( - runKtxCli(['--project-dir', tempDir, 'ingest', 'warehouse', '--fast', '--no-input'], testIo.io, { + runKtxCli(['--project-dir', tempDir, 'ingest', 'warehouse', '--no-input'], testIo.io, { publicIngest, }), ).resolves.toBe(0); @@ -715,7 +715,6 @@ describe('runKtxCli', () => { all: false, json: false, inputMode: 'disabled', - depth: 'fast', queryHistory: 'default', cliVersion, runtimeInstallPolicy: 'never', @@ -725,12 +724,12 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toBe(`Project: ${tempDir}\n`); }); - it('routes public ingest --all --deep with JSON output', async () => { + it('routes public ingest --all with JSON output', async () => { const testIo = makeIo(); const publicIngest = vi.fn().mockResolvedValue(0); await expect( - runKtxCli(['--project-dir', tempDir, 'ingest', '--all', '--deep', '--json'], testIo.io, { + runKtxCli(['--project-dir', tempDir, 'ingest', '--all', '--json'], testIo.io, { publicIngest, }), ).resolves.toBe(0); @@ -742,7 +741,6 @@ describe('runKtxCli', () => { all: true, json: true, inputMode: 'auto', - depth: 'deep', queryHistory: 'default', cliVersion, runtimeInstallPolicy: 'prompt', @@ -786,20 +784,6 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toContain('Choose only one runtime install mode: --yes or --no-input'); }); - it('rejects mutually exclusive public ingest depth flags before dispatch', async () => { - const testIo = makeIo(); - const publicIngest = vi.fn().mockResolvedValue(0); - - await expect( - runKtxCli(['--project-dir', '/tmp/project', 'ingest', 'warehouse', '--fast', '--deep'], testIo.io, { - publicIngest, - }), - ).resolves.toBe(1); - - expect(publicIngest).not.toHaveBeenCalled(); - expect(testIo.stderr()).toMatch(/option '--(deep|fast)' cannot be used with option '--(fast|deep)'/); - }); - it.each(['run', 'status', 'watch', 'replay'])( 'routes former ingest subcommand name "%s" as a connection id', async (connectionId) => { @@ -890,8 +874,6 @@ describe('runKtxCli', () => { expect(testIo.stdout()).toContain('Usage: ktx ingest'); expect(testIo.stdout()).toContain('Build or inspect KTX context'); expect(testIo.stdout()).toContain('--all'); - expect(testIo.stdout()).toContain('--fast'); - expect(testIo.stdout()).toContain('--deep'); expect(testIo.stdout()).toContain('--query-history'); expect(testIo.stdout()).toContain('--no-query-history'); expect(testIo.stdout()).toContain('--query-history-window-days '); diff --git a/packages/cli/test/public-ingest.test.ts b/packages/cli/test/public-ingest.test.ts index b926793c..41289208 100644 --- a/packages/cli/test/public-ingest.test.ts +++ b/packages/cli/test/public-ingest.test.ts @@ -88,7 +88,7 @@ function deepReadyProject( describe('buildPublicIngestPlan', () => { it('plans warehouse connections as scan targets and source connections as source ingest targets', () => { - const project = projectWithConnections({ + const project = deepReadyProject({ warehouse: { driver: 'postgres' }, prod_metabase: { driver: 'metabase', api_url: 'https://metabase.example.com' }, docs: { driver: 'notion' }, @@ -103,8 +103,7 @@ describe('buildPublicIngestPlan', () => { operation: 'database-ingest', debugCommand: 'ktx ingest warehouse --debug', steps: ['database-schema'], - databaseDepth: 'fast', - detectRelationships: false, + detectRelationships: true, queryHistory: { enabled: false }, }, { @@ -139,61 +138,6 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets.map((target) => target.connectionId).sort()).toEqual(['docs', 'warehouse']); }); - it('resolves database depth from flags, stored context, and defaults', () => { - const project = projectWithConnections({ - fast_default: { driver: 'postgres' }, - deep_default: { driver: 'postgres', context: { depth: 'deep' } }, - docs: { driver: 'notion' }, - }); - - expect( - buildPublicIngestPlan(project, { - projectDir: '/tmp/project', - targetConnectionId: 'fast_default', - all: false, - queryHistory: 'default', - }).targets[0], - ).toMatchObject({ connectionId: 'fast_default', databaseDepth: 'fast', queryHistory: { enabled: false } }); - - expect( - buildPublicIngestPlan(project, { - projectDir: '/tmp/project', - targetConnectionId: 'deep_default', - all: false, - queryHistory: 'default', - }).targets[0], - ).toMatchObject({ connectionId: 'deep_default', databaseDepth: 'deep' }); - - expect( - buildPublicIngestPlan(project, { - projectDir: '/tmp/project', - targetConnectionId: 'docs', - all: false, - depth: 'deep', - queryHistory: 'default', - }).warnings, - ).toEqual(['--deep affects database ingest only; ignoring it for docs.']); - }); - - it('does not infer deep ingest from legacy scanMode values', () => { - const project = projectWithConnections({ - warehouse: { driver: 'postgres' }, - }); - - const plan = buildPublicIngestPlan(project, { - projectDir: '/tmp/project', - targetConnectionId: 'warehouse', - all: false, - scanMode: 'enriched', - }); - - expect(plan.targets[0]).toMatchObject({ - connectionId: 'warehouse', - databaseDepth: 'fast', - steps: ['database-schema'], - }); - }); - it('rejects stale local Looker source driver aliases', () => { const project = projectWithConnections({ local_looker: { driver: 'local_looker' } as never, @@ -204,8 +148,8 @@ describe('buildPublicIngestPlan', () => { ); }); - it('upgrades effective depth when query history is explicitly enabled', () => { - const project = projectWithConnections({ + it('enables query history when explicitly requested even if stored config disables it', () => { + const project = deepReadyProject({ warehouse: { driver: 'postgres', context: { queryHistory: { enabled: false } } }, }); @@ -213,17 +157,16 @@ describe('buildPublicIngestPlan', () => { projectDir: '/tmp/project', targetConnectionId: 'warehouse', all: false, - depth: 'fast', queryHistory: 'enabled', queryHistoryWindowDays: 30, }); expect(plan.targets[0]).toMatchObject({ connectionId: 'warehouse', - databaseDepth: 'deep', queryHistory: { enabled: true, windowDays: 30, dialect: 'postgres' }, + steps: ['database-schema', 'query-history'], }); - expect(plan.warnings).toEqual(['--query-history requires deep ingest; running warehouse with --deep.']); + expect(plan.warnings).toEqual([]); }); it('warns and skips query history for unsupported database drivers', () => { @@ -238,7 +181,6 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets[0]).toMatchObject({ connectionId: 'local', - databaseDepth: 'fast', queryHistory: { enabled: false, unsupported: true }, }); expect(plan.warnings).toEqual(['--query-history is not supported for sqlite; running schema ingest for local.']); @@ -249,12 +191,11 @@ describe('buildPublicIngestPlan', () => { deepReadyProject({ local: { driver: 'sqlite' }, mysql_warehouse: { driver: 'mysql' }, - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }), { projectDir: '/tmp/project', all: true, - depth: 'deep', queryHistory: 'enabled', }, ); @@ -326,7 +267,6 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets[0]).toMatchObject({ connectionId: 'warehouse', - databaseDepth: 'deep', queryHistory: { enabled: true, dialect: 'postgres', windowDays: 30 }, steps: ['database-schema', 'query-history'], }); @@ -334,7 +274,7 @@ describe('buildPublicIngestPlan', () => { it('adds a schema-first notice when query history is explicitly enabled', () => { const project = deepReadyProject({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); expect( @@ -363,34 +303,15 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets[0]).toMatchObject({ connectionId: 'local', - databaseDepth: 'fast', queryHistory: { enabled: false, windowDays: 30, unsupported: true }, steps: ['database-schema'], }); expect(plan.warnings).toEqual(['--query-history is not supported for sqlite; running schema ingest for local.']); }); - it('aggregates ignored database-depth warnings for all source targets', () => { - const plan = buildPublicIngestPlan( - projectWithConnections({ - warehouse: { driver: 'postgres' }, - docs: { driver: 'notion' }, - dbt: { driver: 'dbt' }, - }), - { - projectDir: '/tmp/project', - all: true, - depth: 'deep', - queryHistory: 'default', - }, - ); - - expect(plan.warnings).toEqual(['--deep ignored for 2 non-database sources.']); - }); - - it('records a preflight failure for deep database ingest when readiness config is missing', () => { + it('records a preflight failure for database ingest when enrichment readiness config is missing', () => { const project = projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const plan = buildPublicIngestPlan(project, { @@ -402,15 +323,14 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets[0]).toMatchObject({ connectionId: 'warehouse', - databaseDepth: 'deep', preflightFailure: - 'warehouse requires deep ingest readiness: model configuration, scan enrichment mode, scan embeddings. Run ktx setup or rerun with --fast.', + 'warehouse cannot be ingested: enrichment is not configured (model configuration, scan enrichment mode, scan embeddings). Run ktx setup to configure a model and embeddings.', }); }); - it('honors scan.relationships.enabled when planning deep database ingest', () => { + it('honors scan.relationships.enabled when planning database ingest', () => { const plan = buildPublicIngestPlan( - deepReadyProject({ warehouse: { driver: 'postgres', context: { depth: 'deep' } } }, false), + deepReadyProject({ warehouse: { driver: 'postgres' } }, false), { projectDir: '/tmp/project', targetConnectionId: 'warehouse', @@ -421,7 +341,6 @@ describe('buildPublicIngestPlan', () => { expect(plan.targets[0]).toMatchObject({ connectionId: 'warehouse', - databaseDepth: 'deep', detectRelationships: false, }); }); @@ -432,11 +351,11 @@ describe('runKtxPublicIngest', () => { vi.unstubAllEnvs(); }); - it('maps fast and deep database targets to scan internals', async () => { + it('maps database targets to enriched scan internals', async () => { const io = makeIo(); const project = deepReadyProject({ - fast: { driver: 'postgres' }, - deep: { driver: 'postgres', context: { depth: 'deep' } }, + first: { driver: 'postgres' }, + second: { driver: 'postgres' }, }); const runScan = vi.fn(async () => 0); @@ -450,12 +369,12 @@ describe('runKtxPublicIngest', () => { expect(runScan).toHaveBeenNthCalledWith( 1, - expect.objectContaining({ connectionId: 'deep', mode: 'enriched', detectRelationships: true }), + expect.objectContaining({ connectionId: 'first', mode: 'enriched', detectRelationships: true }), expect.anything(), ); expect(runScan).toHaveBeenNthCalledWith( 2, - expect.objectContaining({ connectionId: 'fast', mode: 'structural', detectRelationships: false }), + expect.objectContaining({ connectionId: 'second', mode: 'enriched', detectRelationships: true }), expect.anything(), ); }); @@ -467,7 +386,7 @@ describe('runKtxPublicIngest', () => { try { await initKtxProject({ projectDir }); const io = makeIo({ isTTY: true }); - const project = projectWithConnections({ + const project = deepReadyProject({ warehouse: { driver: 'sqlite', path: join(projectDir, 'warehouse.sqlite') }, }); @@ -614,7 +533,7 @@ describe('runKtxPublicIngest', () => { it('prints the schema-first notice for explicit query-history runs', async () => { const io = makeIo(); const project = deepReadyProject({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const runScan = vi.fn(async () => 0); const runIngest = vi.fn(async () => 0); @@ -640,7 +559,7 @@ describe('runKtxPublicIngest', () => { it('suppresses internal scan output for public database ingest summaries', async () => { const io = makeIo(); - const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const project = deepReadyProject({ warehouse: { driver: 'postgres' } }); const runScan = vi.fn(async (_args, scanIo) => { scanIo.stdout.write('KTX scan completed\n'); scanIo.stdout.write('Mode: structural\n'); @@ -674,7 +593,7 @@ describe('runKtxPublicIngest', () => { it('sanitizes captured database scan failure details in direct public output', async () => { const io = makeIo(); - const project = deepReadyProject({ warehouse: { driver: 'postgres', context: { depth: 'deep' } } }); + const project = deepReadyProject({ warehouse: { driver: 'postgres' } }); const runScan = vi.fn(async (_args, scanIo) => { scanIo.stdout.write('KTX scan enrichment failed after structural scan completed: embedding service timed out\n'); return 1; @@ -689,7 +608,6 @@ describe('runKtxPublicIngest', () => { all: false, json: false, inputMode: 'disabled', - depth: 'deep', }, io.io, { loadProject: vi.fn(async () => project), runScan }, @@ -699,7 +617,7 @@ describe('runKtxPublicIngest', () => { expect(io.stdout()).toContain( 'warehouse failed: Database enrichment failed after schema context completed: embedding service timed out.', ); - expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project'); expect(io.stdout()).not.toContain('KTX scan enrichment failed'); expect(io.stdout()).not.toContain('structural scan'); }); @@ -743,7 +661,7 @@ describe('runKtxPublicIngest', () => { it('suppresses historic-sql report output during direct public query-history ingest', async () => { const io = makeIo(); const project = deepReadyProject({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const runScan = vi.fn(async () => 0); const runIngest = vi.fn(async (_args, ingestIo) => { @@ -794,7 +712,6 @@ describe('runKtxPublicIngest', () => { all: false, json: false, inputMode: 'auto', - depth: 'fast', queryHistory: 'default', }, io.io, @@ -809,7 +726,6 @@ describe('runKtxPublicIngest', () => { targetConnectionId: 'warehouse', all: false, entrypoint: 'ingest', - depth: 'fast', queryHistory: 'default', }), io.io, @@ -821,7 +737,7 @@ describe('runKtxPublicIngest', () => { const io = makeIo({ isTTY: true, interactive: true }); const calls: string[] = []; const project = projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const ensureRuntime = vi.fn(async (): Promise => { calls.push('runtime'); @@ -923,10 +839,13 @@ describe('runKtxPublicIngest', () => { it('runs all independent targets and reports partial failures', async () => { const io = makeIo(); - const project = projectWithConnections({ - warehouse: { driver: 'postgres' }, - prod_metabase: { driver: 'metabase', api_url: 'https://metabase.example.com' }, - }); + const project = deepReadyProject( + { + warehouse: { driver: 'postgres' }, + prod_metabase: { driver: 'metabase', api_url: 'https://metabase.example.com' }, + }, + false, + ); const runScan = vi.fn(async () => 1); const runIngest = vi.fn(async () => 0); @@ -959,7 +878,7 @@ describe('runKtxPublicIngest', () => { command: 'run', projectDir: '/tmp/project', connectionId: 'warehouse', - mode: 'structural', + mode: 'enriched', detectRelationships: false, dryRun: false, }, @@ -967,14 +886,14 @@ describe('runKtxPublicIngest', () => { ); expect(io.stdout()).toContain('Ingest finished with partial failures'); expect(io.stdout()).toContain('warehouse failed at database-schema.'); - expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --fast'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project'); expect(io.stdout()).not.toContain('Debug:'); }); it('skips the query-history facet but keeps the target green when query-history fails', async () => { const io = makeIo(); const project = deepReadyProject({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const runScan = vi.fn(async () => 0); const runIngest = vi.fn(async (_args, ingestIo) => { @@ -1007,14 +926,14 @@ describe('runKtxPublicIngest', () => { 'Query history failed for 60 tasks. First failure: Google Cloud authentication failed while analyzing query history', ); expect(io.stdout()).not.toContain('warehouse failed: Error:'); - expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --query-history'); expect(io.stdout()).not.toContain('historic-sql'); }); it('prints the runtime artifact build hint for missing query-history runtime assets', async () => { const io = makeIo(); const project = deepReadyProject({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, }); const runScan = vi.fn(async () => 0); const runIngest = vi.fn(async (_args, ingestIo) => { @@ -1045,14 +964,14 @@ describe('runKtxPublicIngest', () => { expect(io.stdout()).toContain( 'In a source checkout, build the local runtime assets with: pnpm run artifacts:build', ); - expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history'); + expect(io.stdout()).toContain('Retry: ktx ingest warehouse --project-dir /tmp/project --query-history'); expect(io.stdout()).not.toContain('Then retry the runtime-backed KTX command'); }); - it('fails deep-readiness targets before work starts while continuing independent --all targets', async () => { + it('fails enrichment-readiness targets before work starts while continuing independent --all targets', async () => { const io = makeIo(); const project = projectWithConnections({ - warehouse: { driver: 'postgres', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres' }, docs: { driver: 'notion' }, }); const runScan = vi.fn(async () => 0); @@ -1071,12 +990,12 @@ describe('runKtxPublicIngest', () => { expect.objectContaining({ command: 'run', connectionId: 'docs', adapter: 'notion' }), expect.anything(), ); - expect(io.stdout()).toContain('warehouse requires deep ingest readiness'); + expect(io.stdout()).toContain('warehouse cannot be ingested: enrichment is not configured'); }); - it('does not infer enriched relationship scans from legacy scanMode values', async () => { + it('drives scan relationship detection from project config, not from legacy args', async () => { const io = makeIo(); - const project = deepReadyProject({ warehouse: { driver: 'postgres' } }); + const project = deepReadyProject({ warehouse: { driver: 'postgres' } }, false); const runScan = vi.fn(async () => 0); await expect( @@ -1103,7 +1022,7 @@ describe('runKtxPublicIngest', () => { command: 'run', projectDir: '/tmp/project', connectionId: 'warehouse', - mode: 'structural', + mode: 'enriched', detectRelationships: false, dryRun: false, }, @@ -1113,7 +1032,7 @@ describe('runKtxPublicIngest', () => { it('prints stable JSON results', async () => { const io = makeIo(); - const project = projectWithConnections({ warehouse: { driver: 'postgres' } }); + const project = deepReadyProject({ warehouse: { driver: 'postgres' } }); await expect( runKtxPublicIngest( diff --git a/packages/cli/test/setup-context.test.ts b/packages/cli/test/setup-context.test.ts index 9757cc62..d04e24e1 100644 --- a/packages/cli/test/setup-context.test.ts +++ b/packages/cli/test/setup-context.test.ts @@ -1,7 +1,7 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { buildDefaultKtxProjectConfig, parseKtxProjectConfig, serializeKtxProjectConfig, type KtxProjectConfig } from '../src/context/project/config.js'; +import { buildDefaultKtxProjectConfig, serializeKtxProjectConfig, type KtxProjectConfig } from '../src/context/project/config.js'; import { readKtxSetupState, writeKtxSetupState } from '../src/context/project/setup-config.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; @@ -49,7 +49,7 @@ async function writeReadyProject(projectDir: string, overrides: ReadyProjectOver ...defaults, setup: { database_connection_ids: ['warehouse'] }, connections: { - warehouse: { driver: 'postgres', url: 'env:DATABASE_URL', context: { depth: 'deep' } }, + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' }, docs: { driver: 'notion', auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'all_accessible' }, }, llm: { @@ -407,130 +407,10 @@ describe('setup context build state', () => { expect(io.stdout()).not.toContain('Existing context artifacts were found from setup ingest.'); }); - it('treats fast database context as ready from schema manifest shards without AI artifacts', async () => { + it('requires completed relationships for database context when relationship discovery is enabled', async () => { await writeReadyProject(tempDir, { connections: { - warehouse: { driver: 'postgres', readonly: true, context: { depth: 'fast' } }, - }, - llm: { provider: { backend: 'none' }, models: {} }, - scan: { enrichment: { mode: 'none' } }, - }); - await mkdir(join(tempDir, 'semantic-layer', 'warehouse', '_schema'), { recursive: true }); - await writeFile(join(tempDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml'), 'tables: {}\n'); - await writeScanReport(tempDir, '2026-05-09T10:00:00.000Z', { - mode: 'structural', - tableDescriptions: 'skipped', - columnDescriptions: 'skipped', - embeddings: 'skipped', - manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], - }); - const io = makeIo(); - const runContextBuildMock = vi.fn>(async () => ({ - exitCode: 0, - })); - - await expect( - runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'disabled' }, - io.io, - { - runContextBuild: runContextBuildMock, - }, - ), - ).resolves.toMatchObject({ status: 'ready' }); - - expect(runContextBuildMock).not.toHaveBeenCalled(); - expect(io.stdout()).toContain('Existing context artifacts were found from setup ingest.'); - }); - - it('stores fast context depth non-interactively when deep readiness is missing', async () => { - await writeReadyProject(tempDir, { - connections: { warehouse: { driver: 'postgres', readonly: true } }, - llm: { provider: { backend: 'none' }, models: {} }, - scan: { enrichment: { mode: 'none' } }, - }); - const io = makeIo(); - const runContextBuildMock = vi.fn>(async () => ({ - exitCode: 0, - })); - const verifyContextReady = vi.fn(async () => ({ - ready: true, - agentContextReady: true, - semanticSearchReady: true, - details: ['ready'], - })); - - await expect( - runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'disabled' }, - io.io, - { runContextBuild: runContextBuildMock, verifyContextReady }, - ), - ).resolves.toMatchObject({ status: 'ready' }); - - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.connections.warehouse.context).toMatchObject({ depth: 'fast' }); - expect(runContextBuildMock).toHaveBeenCalledWith( - expect.anything(), - expect.objectContaining({ projectDir: tempDir, inputMode: 'disabled' }), - expect.anything(), - expect.anything(), - ); - expect(runContextBuildMock.mock.calls[0]?.[1]).not.toMatchObject({ - scanMode: 'enriched', - detectRelationships: true, - }); - }); - - it('prompts for database context depth after final readiness is known', async () => { - await writeReadyProject(tempDir, { - connections: { warehouse: { driver: 'postgres', readonly: true } }, - llm: { - provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, // pragma: allowlist secret - models: { default: 'gpt-test' }, - }, - scan: { - enrichment: { - mode: 'llm', - embeddings: { backend: 'openai', model: 'text-embedding-3-small', dimensions: 1536 }, - }, - }, - }); - const io = makeIo(); - const select = vi.fn(async () => 'deep'); - const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); - const verifyContextReady = vi.fn(async () => ({ - ready: true, - agentContextReady: true, - semanticSearchReady: true, - details: ['ready'], - })); - - await expect( - runKtxSetupContextStep( - { projectDir: tempDir, inputMode: 'auto' }, - io.io, - { - prompts: { select, cancel: vi.fn() }, - runContextBuild: runContextBuildMock, - verifyContextReady, - }, - ), - ).resolves.toMatchObject({ status: 'ready' }); - - expect(select).toHaveBeenCalledWith( - expect.objectContaining({ - message: expect.stringContaining('How much database context should KTX build?'), - }), - ); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.connections.warehouse.context).toMatchObject({ depth: 'deep' }); - }); - - it('requires completed relationships for deep context when relationship discovery is enabled', async () => { - await writeReadyProject(tempDir, { - connections: { - warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } }, + warehouse: { driver: 'postgres', readonly: true }, }, scan: { relationships: { enabled: true } }, }); @@ -560,10 +440,10 @@ describe('setup context build state', () => { expect(runContextBuildMock).toHaveBeenCalledOnce(); }); - it('does not require relationships for deep context when relationship discovery is disabled', async () => { + it('does not require relationships for database context when relationship discovery is disabled', async () => { await writeReadyProject(tempDir, { connections: { - warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } }, + warehouse: { driver: 'postgres', readonly: true }, }, scan: { relationships: { enabled: false } }, }); @@ -620,7 +500,7 @@ describe('setup context build state', () => { it('starts a fresh foreground build when stale state is found', async () => { await writeReadyProject(tempDir, { - connections: { warehouse: { driver: 'postgres', readonly: true, context: { depth: 'fast' } } }, + connections: { warehouse: { driver: 'postgres', readonly: true } }, }); await writeKtxSetupContextState(tempDir, { runId: 'setup-context-local-stale', diff --git a/packages/cli/test/setup-databases.test.ts b/packages/cli/test/setup-databases.test.ts index 15d27e3c..cf7acf3c 100644 --- a/packages/cli/test/setup-databases.test.ts +++ b/packages/cli/test/setup-databases.test.ts @@ -262,48 +262,6 @@ describe('setup databases step', () => { expect(prompts.select).toHaveBeenCalledTimes(1); }); - it('preserves context.depth when editing an existing database connection', async () => { - await writeFile( - join(tempDir, 'ktx.yaml'), - [ - 'connections:', - ' warehouse:', - ' driver: sqlite', - ' path: ./warehouse.sqlite', - ' context:', - ' depth: deep', - '', - ].join('\n'), - 'utf-8', - ); - const prompts = makePromptAdapter({ - selectValues: ['edit', 'warehouse', 'continue'], - textValues: ['./warehouse.sqlite'], - }); - const testConnection = vi.fn(async () => 0); - const scanConnection = vi.fn(async () => 0); - const io = makeIo(); - const result = await runKtxSetupDatabasesStep( - { - projectDir: tempDir, - inputMode: 'auto', - skipDatabases: false, - databaseSchemas: [], - disableQueryHistory: true, - }, - io.io, - { prompts, testConnection, scanConnection }, - ); - - expect(result.status, io.stderr()).toBe('ready'); - const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); - expect(config.connections.warehouse).toMatchObject({ - driver: 'sqlite', - path: './warehouse.sqlite', - context: { depth: 'deep' }, - }); - }); - it('labels existing database connections with the database type', async () => { await writeFile( join(tempDir, 'ktx.yaml'), @@ -376,7 +334,6 @@ describe('setup databases step', () => { expect(config.connections['postgres-warehouse']).toEqual({ driver: 'postgres', url: 'env:DATABASE_URL', - context: { depth: 'fast' }, }); }); @@ -1558,7 +1515,7 @@ describe('setup databases step', () => { ); expect(io.stdout()).not.toContain('Tables: 2'); expect(io.stdout()).toContain('◇ Building schema context for postgres-warehouse'); - expect(io.stdout()).toContain('│ Running fast database ingest…'); + expect(io.stdout()).toContain('│ Running database scan…'); expect(io.stdout()).toContain('◇ Schema context complete for postgres-warehouse'); expect(io.stdout()).toContain('│ Changes: 2 new tables'); expect(io.stdout()).toContain('◇ Database ready'); @@ -1907,7 +1864,7 @@ describe('setup databases step', () => { driver: 'postgres', url: 'env:DATABASE_URL', schemas: ['public'], - context: { queryHistory: { enabled: false }, depth: 'fast' }, + context: { queryHistory: { enabled: false } }, }); expect(config.setup).toEqual({ database_connection_ids: ['warehouse'], @@ -1946,7 +1903,6 @@ describe('setup databases step', () => { expect(config.connections.warehouse).toEqual({ driver: 'sqlite', path: './warehouse.sqlite', - context: { depth: 'fast' }, }); expect(config.setup).toEqual({ database_connection_ids: ['warehouse'], @@ -2023,11 +1979,11 @@ describe('setup databases step', () => { const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); expect(config.connections.warehouse).toMatchObject({ driver: 'postgres', url: 'env:DATABASE_URL' }); expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:'); - expect(io.stderr()).toContain('Fast database ingest failed for warehouse.'); - expect(io.stderr()).toContain('│ Fast database ingest failed for warehouse.'); - expect(io.stderr()).toContain(`Debug command: ktx ingest warehouse --project-dir ${tempDir} --fast --debug`); + expect(io.stderr()).toContain('Database scan failed for warehouse.'); + expect(io.stderr()).toContain('│ Database scan failed for warehouse.'); + expect(io.stderr()).toContain(`Debug command: ktx ingest warehouse --project-dir ${tempDir} --debug`); expect(io.stderr()).not.toContain('Structural scan failed for warehouse.'); - expect(io.stderr()).not.toMatch(/^Fast database ingest failed for warehouse\./m); + expect(io.stderr()).not.toMatch(/^Database scan failed for warehouse\./m); }); it('prints the native SQLite rebuild command when scanning hits a Node ABI mismatch', async () => { @@ -2066,7 +2022,7 @@ describe('setup databases step', () => { expect(io.stderr()).toContain('Native SQLite is built for a different Node.js ABI.'); expect(io.stderr()).toContain('│ Native SQLite is built for a different Node.js ABI.'); expect(io.stderr()).toContain('Fix: pnpm run native:rebuild'); - expect(io.stderr()).toContain(`Retry: ktx ingest warehouse --project-dir ${tempDir} --fast`); + expect(io.stderr()).toContain(`Retry: ktx ingest warehouse --project-dir ${tempDir}`); expect(io.stderr()).not.toContain('ktx scan'); expect(io.stderr()).not.toContain('npm rebuild'); expect(io.stderr()).not.toMatch(/^Native SQLite is built for a different Node.js ABI\./m); @@ -2364,7 +2320,7 @@ describe('setup databases step', () => { 'utf-8', ); const io = makeIo(); - const prompts = makePromptAdapter({ selectValues: ['yes', 'deep'] }); + const prompts = makePromptAdapter({ selectValues: ['yes'] }); const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements'); const historicSqlReadinessProbe = vi.fn(async () => ({ ok: true as const, @@ -2399,12 +2355,6 @@ describe('setup databases step', () => { { value: 'back', label: 'Back' }, ], }); - expect(prompts.select).toHaveBeenNthCalledWith( - 2, - expect.objectContaining({ - message: expect.stringContaining('How much database context should KTX build?'), - }), - ); expect(historicSqlReadinessProbe).toHaveBeenCalledWith( expect.objectContaining({ projectDir: tempDir, @@ -2420,7 +2370,6 @@ describe('setup databases step', () => { minExecutions: 5, filters: { dropTrivialProbes: true }, }, - depth: 'deep', }, }); }); diff --git a/packages/cli/test/standalone-smoke.test.ts b/packages/cli/test/standalone-smoke.test.ts index 4007afcb..7dde8979 100644 --- a/packages/cli/test/standalone-smoke.test.ts +++ b/packages/cli/test/standalone-smoke.test.ts @@ -185,7 +185,7 @@ describe('standalone built ktx CLI smoke', () => { expect([0, 1]).toContain(result.code); }); - it('runs fast public database ingest through the built binary with manifest artifacts', async () => { + it('blocks public database ingest through the built binary when enrichment is not configured', async () => { const projectDir = join(tempDir, 'database-ingest-project'); const init = await runSetupNewProject(projectDir); expectSetupStderr(init); @@ -200,19 +200,10 @@ describe('standalone built ktx CLI smoke', () => { expect(connectionTest.stdout).toContain('Driver: sqlite'); expect(connectionTest.stdout).toContain('Status: ok'); - const ingest = await runBuiltCli(['ingest', 'warehouse', '--project-dir', projectDir, '--fast', '--no-input']); - expectProjectStderr(ingest, projectDir); - expect(ingest.stdout).toContain('Ingest finished'); - expect(ingest.stdout).toContain('warehouse'); - expect(ingest.stdout).toContain('Database schema'); - expect(ingest.stdout).toContain('warehouse done'); + const ingest = await runBuiltCli(['ingest', 'warehouse', '--project-dir', projectDir, '--no-input']); + expect(ingest.code).toBe(1); + expect(ingest.stdout).toContain('warehouse cannot be ingested: enrichment is not configured'); expect(ingest.stdout).not.toContain('KTX scan completed'); - - const manifest = await readFile(join(projectDir, 'semantic-layer/warehouse/_schema/public.yaml'), 'utf-8'); - expect(manifest).toContain('customers:'); - expect(manifest).toContain('orders:'); - expect(manifest).toContain('source: formal'); - expect(manifest).not.toContain('ai:'); }, 30_000); it('parses gateway LLM config and OpenAI enrichment embeddings used by standalone scans without network calls', async () => { diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json index 13642c49..628c8f4b 100644 --- a/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json @@ -365,7 +365,6 @@ "embeddings", "secrets", "databases", - "database-context-depth", "sources", "context", "agents", diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index b196aaa1..2ea9ce27 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -257,7 +257,7 @@ describe('standalone example docs', () => { assert.match(primarySources, /context:\n queryHistory:/); assert.match(rootReadme, /`ktx ingest` \| Build context for every configured connection/); assert.doesNotMatch(rootReadme, /`ktx ingest `/); - assert.match(quickstart, /Databases:\n warehouse: deep context complete/); + assert.match(quickstart, /Databases:\n warehouse: database context complete/); assert.match(quickstart, /Databases configured: yes \(warehouse\)/); assert.match(setupReference, /Databases configured: yes \(postgres-warehouse\)/); assert.doesNotMatch(rootReadme, new RegExp(['Primary sources', 'configured'].join(' '))); diff --git a/scripts/installed-live-database-smoke.mjs b/scripts/installed-live-database-smoke.mjs index a11e38d2..20bad6b5 100644 --- a/scripts/installed-live-database-smoke.mjs +++ b/scripts/installed-live-database-smoke.mjs @@ -106,7 +106,6 @@ export function buildLiveDatabaseIngestArgs(projectDir, _databaseIntrospectionUr connectionId, '--project-dir', projectDir, - '--fast', '--no-input', ]; } @@ -152,20 +151,20 @@ function requireSuccess(label, result) { } } +function requireFailure(label, result) { + if (result.code === 0) { + throw new Error( + `${label} unexpectedly succeeded\nstdout:\n${result.stdout}\nstderr:\n${result.stderr}`, + ); + } +} + function requireOutput(label, result, pattern) { if (!pattern.test(result.stdout)) { throw new Error(`${label} output did not match ${pattern}\nstdout:\n${result.stdout}`); } } -function getRunId(stdout) { - const match = stdout.match(/^Run: (.+)$/m); - if (!match) { - throw new Error(`ingest output did not include a run id\nstdout:\n${stdout}`); - } - return match[1]; -} - async function requireDocker() { const result = await run('docker', ['info'], { timeout: 20_000 }); if (result.code !== 0) { @@ -310,13 +309,17 @@ async function main() { env: managedRuntimeEnv(cleanInstallDir), timeout: 120_000, }); - requireSuccess('ktx ingest warehouse --fast', ingestRun); - requireOutput('ktx ingest warehouse --fast', ingestRun, /Ingest finished/); - requireOutput('ktx ingest warehouse --fast', ingestRun, /Database schema/); + // ktx ingest now always builds enriched context and requires a configured + // model and embeddings. This smoke project has neither, so the database + // target fails the enrichment-readiness preflight before any work runs. + // This still exercises the packaged binary, daemon startup, and the live + // database connection end to end. + requireFailure('ktx ingest warehouse', ingestRun); + requireOutput('ktx ingest warehouse', ingestRun, /Ingest finished with partial failures/); + requireOutput('ktx ingest warehouse', ingestRun, /enrichment is not configured/); - const runId = getRunId(ingestRun.stdout); await assertPathExists(join(projectDir, '.ktx', 'db.sqlite'), 'SQLite local ingest state'); - process.stdout.write(`Installed live-database artifact smoke passed: ${runId}\n`); + process.stdout.write('Installed live-database artifact smoke passed: enrichment-readiness guard verified\n'); } finally { if (daemonStarted && cleanInstallDir) { await stopDaemon(cleanInstallDir); diff --git a/scripts/installed-live-database-smoke.test.mjs b/scripts/installed-live-database-smoke.test.mjs index ef618725..2ddeed5d 100644 --- a/scripts/installed-live-database-smoke.test.mjs +++ b/scripts/installed-live-database-smoke.test.mjs @@ -100,7 +100,6 @@ describe('installed live-database artifact smoke helpers', () => { 'warehouse', '--project-dir', '/tmp/project', - '--fast', '--no-input', ]); diff --git a/scripts/package-artifacts.mjs b/scripts/package-artifacts.mjs index e1ff8c6c..d66d7f1a 100644 --- a/scripts/package-artifacts.mjs +++ b/scripts/package-artifacts.mjs @@ -512,15 +512,6 @@ function requireSuccess(label, result) { assert.equal(result.stderr, '', label + ' wrote unexpected stderr'); } -function requireSuccessWithProjectStderr(label, result, projectDir) { - assert.equal( - result.code, - 0, - label + ' failed with code ' + result.code + '\\nstdout:\\n' + result.stdout + '\\nstderr:\\n' + result.stderr, - ); - assert.equal(result.stderr, 'Project: ' + projectDir + '\\n', label + ' wrote unexpected stderr'); -} - function requireExitCodeWithProjectStderr(label, result, projectDir, expectedCode) { assert.equal( result.code, @@ -860,27 +851,15 @@ try { requireOutput('ktx admin runtime stop', runtimeStop, /Stopped KTX daemon/); process.stdout.write('ktx admin runtime daemon lifecycle verified\\n'); - const structuralScan = await run( + const databaseIngest = await run( ...Object.values( - pnpmCommand(['exec', 'ktx', 'ingest', 'warehouse', '--project-dir', projectDir, '--fast', '--no-input']), + pnpmCommand(['exec', 'ktx', 'ingest', 'warehouse', '--project-dir', projectDir, '--no-input']), ), ); - requireSuccessWithProjectStderr('ktx ingest fast', structuralScan, projectDir); - requireOutput('ktx ingest fast', structuralScan, /Ingest finished/); - requireOutput('ktx ingest fast', structuralScan, /Database schema/); - requireOutput('ktx ingest fast', structuralScan, /warehouse\\s+done/); - await access(join(projectDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml')); - process.stdout.write('ktx ingest fast verified\\n'); - - const enrichedScan = await run( - ...Object.values( - pnpmCommand(['exec', 'ktx', 'ingest', 'warehouse', '--project-dir', projectDir, '--deep', '--no-input']), - ), - ); - requireExitCodeWithProjectStderr('ktx ingest deep readiness guard', enrichedScan, projectDir, 1); - requireOutput('ktx ingest deep readiness guard', enrichedScan, /Ingest finished with partial failures/); - requireOutput('ktx ingest deep readiness guard', enrichedScan, /requires deep ingest readiness/); - process.stdout.write('ktx ingest deep readiness guard verified\\n'); + requireExitCodeWithProjectStderr('ktx ingest enrichment guard', databaseIngest, projectDir, 1); + requireOutput('ktx ingest enrichment guard', databaseIngest, /Ingest finished with partial failures/); + requireOutput('ktx ingest enrichment guard', databaseIngest, /enrichment is not configured/); + process.stdout.write('ktx ingest enrichment guard verified\\n'); await access(join(projectDir, '.ktx', 'db.sqlite')); process.stdout.write('ktx ingest state verified\\n'); diff --git a/scripts/package-artifacts.test.mjs b/scripts/package-artifacts.test.mjs index a1d2489d..ffc59ce6 100644 --- a/scripts/package-artifacts.test.mjs +++ b/scripts/package-artifacts.test.mjs @@ -530,10 +530,11 @@ describe('verification snippets', () => { assert.doesNotMatch(source, /ktx admin runtime prune/); assert.doesNotMatch(source, /staleRuntimeDir/); assert.match(source, /pnpmCommand\(\['exec', 'ktx', 'ingest', 'warehouse'/); - assert.match(source, /'--deep'/); + assert.doesNotMatch(source, /'--fast'/); + assert.doesNotMatch(source, /'--deep'/); assert.doesNotMatch(source, /'--enrich'/); - assert.match(source, /ktx ingest fast verified/); - assert.match(source, /ktx ingest deep readiness guard verified/); + assert.match(source, /ktx ingest enrichment guard verified/); + assert.match(source, /enrichment is not configured/); assert.match(source, /enrichment:/); assert.match(source, /mode: deterministic/); assert.doesNotMatch(source, /run\('pnpm', \['exec', 'ktx', 'ingest', 'run'/); diff --git a/skills/ktx/SKILL.md b/skills/ktx/SKILL.md index 0eaa03e3..58893d7f 100644 --- a/skills/ktx/SKILL.md +++ b/skills/ktx/SKILL.md @@ -87,16 +87,17 @@ Do not discover these inputs across multiple setup runs. pass the database flags from the previous run** — setup validates current flags, not persisted `ktx.yaml` state. -4. **Run fast ingest** if setup did not already complete one: +4. **Build context** if setup did not already complete one: ```bash - ktx ingest --fast --no-input + ktx ingest --no-input ``` - Note: `ktx ingest` rejects `--yes` together with `--no-input` - (*Choose only one runtime install mode*); `ktx setup` accepts both. Use - `--no-input` only for ingest. Do not run `--deep` ingest unless the user - explicitly asks for LLM-backed enrichment. + `ktx ingest` always builds enriched context and requires a configured model + and embeddings (set during setup); a database connection without them fails + with an enrichment-readiness error. Note: `ktx ingest` rejects `--yes` + together with `--no-input` (*Choose only one runtime install mode*); + `ktx setup` accepts both. Use `--no-input` only for ingest. 5. **Install agent integration:** @@ -151,7 +152,7 @@ Notes: `--notion-root-page-id` (repeatable); use `all_accessible` to crawl everything the token can see. - After adding sources, ingest each new connection so its context is queryable: - `ktx ingest --fast --no-input`. + `ktx ingest --no-input`. ## Files to inspect From 53a6f8d1112adbb282205525ddc10b2690fc250d Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sat, 30 May 2026 00:42:59 +0200 Subject: [PATCH 3/7] fix(cli): treat artifact-producing ingests with failures as partial (#238) * fix(cli): derive ingest outcomes from saved artifacts * fix(cli): treat artifact-producing ingests with failures as partial * fix(cli): route memory-flow run status through shared ingest outcome * fix(cli): treat partial ingest as saved context in setup status * test(cli): align memory-flow replay expectations with partial ingests --- .../cli/src/context/ingest/local-ingest.ts | 12 +- .../src/context/ingest/memory-flow/events.ts | 3 +- packages/cli/src/context/ingest/reports.ts | 14 ++ packages/cli/src/ingest.ts | 18 +-- packages/cli/src/setup.ts | 4 +- .../ingest/local-metabase-ingest.test.ts | 19 +++ .../context/ingest/memory-flow/events.test.ts | 4 +- .../cli/test/context/ingest/reports.test.ts | 71 +++++++++ packages/cli/test/ingest.test.ts | 139 +++++++++++++++++- packages/cli/test/setup.test.ts | 53 +++++++ 10 files changed, 312 insertions(+), 25 deletions(-) create mode 100644 packages/cli/test/context/ingest/reports.test.ts diff --git a/packages/cli/src/context/ingest/local-ingest.ts b/packages/cli/src/context/ingest/local-ingest.ts index 2351d420..ec8a72f4 100644 --- a/packages/cli/src/context/ingest/local-ingest.ts +++ b/packages/cli/src/context/ingest/local-ingest.ts @@ -13,6 +13,7 @@ import { localPullConfigForAdapter, type DefaultLocalIngestAdaptersOptions } fro import { createLocalBundleIngestRuntime } from './local-bundle-runtime.js'; import type { MemoryFlowEventSink } from './memory-flow/types.js'; import { buildSyncId } from './raw-sources-paths.js'; +import { ingestReportOutcome } from './reports.js'; import type { IngestReportBody, IngestReportSnapshot } from './reports.js'; import { SqliteBundleIngestStore } from './sqlite-bundle-ingest-store.js'; import type { IngestBundleResult, IngestJobContext, IngestJobPhase, IngestTrigger, SourceAdapter } from './types.js'; @@ -79,7 +80,7 @@ export interface LocalMetabaseFanoutProgress { metabaseDatabaseId: number; targetConnectionId: string; jobId: string; - status: 'done' | 'failed'; + status: 'done' | 'partial' | 'failed'; }): void; } @@ -232,11 +233,11 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise child.report.body.failedWorkUnits.length === 0).length; - if (succeeded === children.length) { + const outcomes = children.map((child) => ingestReportOutcome(child.report)); + if (outcomes.every((outcome) => outcome === 'done')) { return 'all_succeeded'; } - if (succeeded === 0) { + if (outcomes.every((outcome) => outcome === 'error')) { return 'all_failed'; } return 'partial_failure'; @@ -401,12 +402,13 @@ export async function runLocalMetabaseIngest( error, }); } + const childOutcome = ingestReportOutcome(child.report); options.progress?.onMetabaseChildCompleted?.({ metabaseConnectionId, metabaseDatabaseId: childPlan.metabaseDatabaseId, targetConnectionId, jobId: child.report.jobId, - status: child.report.body.failedWorkUnits.length > 0 ? 'failed' : 'done', + status: childOutcome === 'error' ? 'failed' : childOutcome, }); children.push({ jobId: child.report.jobId, diff --git a/packages/cli/src/context/ingest/memory-flow/events.ts b/packages/cli/src/context/ingest/memory-flow/events.ts index 020ce5ae..92cebe0f 100644 --- a/packages/cli/src/context/ingest/memory-flow/events.ts +++ b/packages/cli/src/context/ingest/memory-flow/events.ts @@ -1,5 +1,6 @@ import type { MemoryAction } from '../../../context/memory/types.js'; import type { LocalIngestRunRecord } from '../local-stage-ingest.js'; +import { ingestReportOutcome } from '../reports.js'; import type { IngestReportSnapshot } from '../reports.js'; import type { MemoryFlowActionDetail, @@ -72,7 +73,7 @@ function fullModeMetadata(input: { } function reportStatus(report: IngestReportSnapshot): MemoryFlowReplayInput['status'] { - return report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; + return ingestReportOutcome(report) === 'error' ? 'error' : 'done'; } function reportCreatedEvent(report: IngestReportSnapshot): MemoryFlowEvent { diff --git a/packages/cli/src/context/ingest/reports.ts b/packages/cli/src/context/ingest/reports.ts index ea02a31a..09f92170 100644 --- a/packages/cli/src/context/ingest/reports.ts +++ b/packages/cli/src/context/ingest/reports.ts @@ -146,6 +146,20 @@ export function savedMemoryCountsForReport(report: IngestReportSnapshot): Ingest }; } +/** @internal */ +export type IngestReportOutcome = 'done' | 'partial' | 'error'; + +export function ingestReportOutcome(report: IngestReportSnapshot): IngestReportOutcome { + if (report.body.status === 'failed') { + return 'error'; + } + if (report.body.failedWorkUnits.length === 0) { + return 'done'; + } + const { wikiCount, slCount } = savedMemoryCountsForReport(report); + return wikiCount + slCount > 0 ? 'partial' : 'error'; +} + export function buildStageIndexFromReportBody(jobId: string, connectionId: string, body: IngestReportBody): StageIndex { return { jobId, diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index fb8c9a29..ad5ba270 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -2,7 +2,7 @@ import { buildMemoryFlowViewModel } from './context/ingest/memory-flow/view-mode import { createMemoryFlowLiveBuffer, sanitizeMemoryFlowError } from './context/ingest/memory-flow/live-buffer.js'; import { formatMemoryFlowFinalSummary } from './context/ingest/memory-flow/summary.js'; import { getLatestLocalIngestStatus, getLocalIngestStatus, type LocalMetabaseFanoutResult, type LocalMetabaseFanoutProgress, type RunLocalIngestOptions, runLocalIngest, runLocalMetabaseIngest } from './context/ingest/local-ingest.js'; -import { type IngestReportSnapshot, savedMemoryCountsForReport } from './context/ingest/reports.js'; +import { type IngestReportSnapshot, ingestReportOutcome, savedMemoryCountsForReport } from './context/ingest/reports.js'; import { ingestReportToMemoryFlowReplay } from './context/ingest/memory-flow/events.js'; import type { MemoryFlowEvent, MemoryFlowReplayInput } from './context/ingest/memory-flow/types.js'; import { renderMemoryFlowReplay } from './context/ingest/memory-flow/render.js'; @@ -93,10 +93,6 @@ export interface KtxIngestDeps { runtimeIo?: KtxIngestIo; } -function reportStatus(report: IngestReportSnapshot): 'done' | 'error' { - return report.body.status === 'failed' || report.body.failedWorkUnits.length > 0 ? 'error' : 'done'; -} - const REPORT_SOURCE_LABELS = new Map([ ['live-database', 'Database schema'], ['historic-sql', 'Query history'], @@ -193,7 +189,7 @@ function writeReportStatus(report: IngestReportSnapshot, io: KtxIngestIo): void if (report.body.tracePath) { io.stdout.write(`Trace: ${report.body.tracePath}\n`); } - io.stdout.write(`Status: ${reportStatus(report)}\n`); + io.stdout.write(`Status: ${ingestReportOutcome(report)}\n`); io.stdout.write(`Source: ${reportSourceLabel(report.sourceKey)}\n`); io.stdout.write(`Connection: ${report.connectionId}\n`); io.stdout.write(`Sync: ${report.body.syncId}\n`); @@ -231,7 +227,7 @@ function writeMetabaseFanoutStatus(result: LocalMetabaseFanoutResult, io: KtxIng } io.stdout.write(`Saved memory: ${counts.wikiCount} wiki, ${counts.slCount} SL\n`); for (const child of result.children) { - const status = reportStatus(child.report); + const status = ingestReportOutcome(child.report); io.stdout.write( `- target=${child.targetConnectionId} database=${child.metabaseDatabaseId} status=${status} job=${child.jobId} report=${child.report.id}\n`, ); @@ -595,7 +591,7 @@ function initialRunMemoryFlowInput( } function finalRunMemoryFlowInput(snapshot: MemoryFlowReplayInput, report: IngestReportSnapshot): MemoryFlowReplayInput { - const status = reportStatus(report); + const status = ingestReportOutcome(report) === 'error' ? 'error' : 'done'; return { ...snapshot, runId: report.runId, @@ -777,7 +773,7 @@ export async function runKtxIngest( } finally { plainProgress?.flush(); } - return result.status === 'all_succeeded' ? 0 : 1; + return result.status === 'all_failed' ? 1 : 0; } const jobId = deps.jobIdFactory?.(); @@ -846,7 +842,7 @@ export async function runKtxIngest( liveTui?.close(); liveTui = null; io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot)); - return reportStatus(result.report) === 'done' ? 0 : 1; + return ingestReportOutcome(result.report) === 'error' ? 1 : 0; } plainProgress?.flush(); await writeReportRecord(result.report, runOutputMode, io, { @@ -854,7 +850,7 @@ export async function runKtxIngest( renderStoredMemoryFlow: deps.renderStoredMemoryFlow, env, }); - return reportStatus(result.report) === 'done' ? 0 : 1; + return ingestReportOutcome(result.report) === 'error' ? 1 : 0; } finally { plainProgress?.flush(); liveTui?.close(); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 74056542..ebc04c87 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -1,7 +1,7 @@ import { existsSync } from 'node:fs'; import { basename, join, resolve } from 'node:path'; import { getLatestLocalIngestStatus } from './context/ingest/local-ingest.js'; -import { savedMemoryCountsForReport } from './context/ingest/reports.js'; +import { ingestReportOutcome, savedMemoryCountsForReport } from './context/ingest/reports.js'; import { ktxLocalStateDbPath } from './context/project/local-state-db.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; import { readKtxSetupState } from './context/project/setup-config.js'; @@ -306,7 +306,7 @@ function sourceConnections(config: Awaited>['c type LocalIngestStatusReport = NonNullable>>; function reportHasSavedContext(report: LocalIngestStatusReport): boolean { - if (report.body.failedWorkUnits.length > 0) { + if (ingestReportOutcome(report) === 'error') { return false; } const counts = savedMemoryCountsForReport(report); diff --git a/packages/cli/test/context/ingest/local-metabase-ingest.test.ts b/packages/cli/test/context/ingest/local-metabase-ingest.test.ts index 06822aa2..8fb89bd0 100644 --- a/packages/cli/test/context/ingest/local-metabase-ingest.test.ts +++ b/packages/cli/test/context/ingest/local-metabase-ingest.test.ts @@ -6,6 +6,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js'; import { LocalMetabaseDiscoveryCache } from '../../../src/context/ingest/adapters/metabase/local-source-state-store.js'; import { getLocalIngestStatus, runLocalMetabaseIngest } from '../../../src/context/ingest/local-ingest.js'; +import { ingestReportOutcome } from '../../../src/context/ingest/reports.js'; import type { ChunkResult, FetchContext, SourceAdapter } from '../../../src/context/ingest/types.js'; class TestAgentRunner implements AgentRunnerPort { @@ -202,6 +203,24 @@ describe('runLocalMetabaseIngest', () => { expect(result.children[1]?.report.body.failedWorkUnits).toEqual(['metabase-db-2']); }); + it('keeps a child that saved memory out of all_failed when another child fails', async () => { + await seedMetabaseState(); + const agentRunner = new TestAgentRunner(); + const ids = ['metabase-child-1', 'metabase-child-2']; + + const result = await runLocalMetabaseIngest({ + project, + adapters: [new FakeMetabaseSourceAdapter()], + metabaseConnectionId: 'prod-metabase', + agentRunner, + jobIdFactory: () => ids.shift() ?? 'metabase-child-extra', + }); + + expect(result.status).toBe('partial_failure'); + expect(ingestReportOutcome(result.children[0].report)).toBe('done'); + expect(ingestReportOutcome(result.children[1].report)).toBe('error'); + }); + it('captures fetch-time child failures and continues later mappings', async () => { await seedMetabaseState(); project.config.connections.warehouse_c = { driver: 'postgres', url: 'postgres://localhost/c' }; diff --git a/packages/cli/test/context/ingest/memory-flow/events.test.ts b/packages/cli/test/context/ingest/memory-flow/events.test.ts index e29405a4..cb0e72c8 100644 --- a/packages/cli/test/context/ingest/memory-flow/events.test.ts +++ b/packages/cli/test/context/ingest/memory-flow/events.test.ts @@ -166,7 +166,7 @@ describe('memory-flow event mapping', () => { runId: 'run-1', connectionId: 'warehouse', adapter: 'lookml', - status: 'error', + status: 'done', sourceDir: null, syncId: 'sync-2', reportId: 'report-1', @@ -308,7 +308,7 @@ describe('memory-flow event mapping', () => { sourceReportPath: 'report-1', fallbackReason: null, }); - expect(replay.status).toBe('error'); + expect(replay.status).toBe('done'); expect(replay.reportId).toBe('report-1'); expect(replay.reportPath).toBe('report-1'); expect(replay.events[0]).toMatchObject({ type: 'source_acquired', emittedAt: '2026-05-01T10:00:00.000Z' }); diff --git a/packages/cli/test/context/ingest/reports.test.ts b/packages/cli/test/context/ingest/reports.test.ts new file mode 100644 index 00000000..5fc24f6d --- /dev/null +++ b/packages/cli/test/context/ingest/reports.test.ts @@ -0,0 +1,71 @@ +import { describe, expect, it } from 'vitest'; +import { ingestReportOutcome } from '../../../src/context/ingest/reports.js'; +import type { IngestReportSnapshot } from '../../../src/context/ingest/reports.js'; + +function report(body: Partial): IngestReportSnapshot { + return { + id: 'r', + runId: 'run', + jobId: 'job', + connectionId: 'warehouse', + sourceKey: 'metabase', + createdAt: '2026-05-29T00:00:00.000Z', + body: { + syncId: 'sync', + diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 }, + commitSha: null, + workUnits: [], + failedWorkUnits: [], + reconciliationSkipped: false, + conflictsResolved: [], + evictionsApplied: [], + unmappedFallbacks: [], + evictionInputs: [], + unresolvedCards: [], + supersededBy: null, + overrideOf: null, + provenanceRows: [], + toolTranscripts: [], + ...body, + }, + }; +} + +const savingWorkUnit = { + unitKey: 'ok', + rawFiles: ['cards/1.json'], + status: 'success' as const, + actions: [{ target: 'sl' as const, type: 'updated' as const, key: 'warehouse.orders', detail: 'measure' }], + touchedSlSources: [], +}; + +const failedWorkUnit = { + unitKey: 'bad', + rawFiles: ['cards/2.json'], + status: 'failed' as const, + reason: 'tool write failed', + actions: [], + touchedSlSources: [], +}; + +describe('ingestReportOutcome', () => { + it('returns done when there are no failed work units', () => { + expect(ingestReportOutcome(report({ workUnits: [savingWorkUnit] }))).toBe('done'); + }); + + it('returns partial when failed work units coexist with saved memory', () => { + expect( + ingestReportOutcome(report({ workUnits: [savingWorkUnit, failedWorkUnit], failedWorkUnits: ['bad'] })), + ).toBe('partial'); + }); + + it('returns error when failed work units produced no saved memory', () => { + expect(ingestReportOutcome(report({ workUnits: [failedWorkUnit], failedWorkUnits: ['bad'] }))).toBe('error'); + }); + + it('returns error for a stage-level failure even if artifacts were recorded', () => { + expect(ingestReportOutcome(report({ status: 'failed', workUnits: [savingWorkUnit], failedWorkUnits: [] }))).toBe( + 'error', + ); + }); +}); diff --git a/packages/cli/test/ingest.test.ts b/packages/cli/test/ingest.test.ts index eef751ba..f5cd1ac5 100644 --- a/packages/cli/test/ingest.test.ts +++ b/packages/cli/test/ingest.test.ts @@ -403,7 +403,7 @@ describe('runKtxIngest', () => { expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); }); - it('returns a non-zero code when Metabase fanout has failed children', async () => { + it('returns a non-zero code when a Metabase fanout child fully fails', async () => { const projectDir = join(tempDir, 'project'); await writeMetabaseConfig(projectDir); const io = makeIo(); @@ -441,7 +441,7 @@ describe('runKtxIngest', () => { { runLocalMetabaseIngest: async () => ({ metabaseConnectionId: 'prod-metabase', - status: 'partial_failure', + status: 'all_failed', totals: { workUnits: 1, failedWorkUnits: 1 }, children: [ { @@ -467,9 +467,83 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(1); - expect(io.stdout()).toContain('Metabase fanout: partial_failure'); - expect(io.stdout()).toContain('Failed tasks: 1'); + expect(io.stdout()).toContain('Metabase fanout: all_failed'); expect(io.stdout()).toContain('status=error'); + }); + + it('exits 0 and reports status=partial when a Metabase child saved memory despite a failure', async () => { + const projectDir = join(tempDir, 'project'); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + const report = localFakeBundleReport('metabase-child-1', { + id: 'report-metabase-child-1', + runId: 'run-a', + jobId: 'metabase-child-1', + connectionId: 'warehouse_a', + sourceKey: 'metabase', + body: { + failedWorkUnits: ['metabase-db-2'], + workUnits: [ + { + unitKey: 'metabase-db-1', + rawFiles: ['cards/1.json'], + status: 'success', + actions: [{ target: 'sl', type: 'updated', key: 'warehouse.orders', detail: 'measure' }], + touchedSlSources: [], + }, + { + unitKey: 'metabase-db-2', + rawFiles: ['cards/2.json'], + status: 'failed', + reason: 'bad SQL', + actions: [], + touchedSlSources: [], + }, + ], + }, + }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + runLocalMetabaseIngest: async () => ({ + metabaseConnectionId: 'prod-metabase', + status: 'partial_failure', + totals: { workUnits: 2, failedWorkUnits: 1 }, + children: [ + { + jobId: 'metabase-child-1', + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + result: { + jobId: 'metabase-child-1', + runId: 'run-a', + syncId: 'sync-a', + diffSummary: { added: 1, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 2, + failedWorkUnits: ['metabase-db-2'], + artifactsWritten: 1, + commitSha: 'abc', + }, + report, + }, + ], + }), + }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Metabase fanout: partial_failure'); + expect(io.stdout()).toContain('status=partial'); expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); }); @@ -1140,6 +1214,63 @@ describe('runKtxIngest', () => { expect(io.stdout()).toContain('Status: error\n'); }); + it('exits 0 and reports Status: partial when a single-source ingest saved memory despite a failure', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const partialReport = localFakeBundleReport('local-job-partial', { + connectionId: 'warehouse', + sourceKey: 'fake', + body: { + failedWorkUnits: ['orders-bad'], + workUnits: [ + { + unitKey: 'orders-ok', + rawFiles: ['orders/orders.json'], + status: 'success', + actions: [{ target: 'wiki', type: 'created', key: 'wiki/orders.md', detail: 'orders' }], + touchedSlSources: [], + }, + { + unitKey: 'orders-bad', + rawFiles: ['orders/bad.json'], + status: 'failed', + reason: 'writer tool failed', + actions: [], + touchedSlSources: [], + }, + ], + }, + }); + const runLocal = vi.fn(async (_input: RunLocalIngestOptions) => ({ + result: { + jobId: 'local-job-partial', + runId: partialReport.runId, + syncId: partialReport.body.syncId, + diffSummary: partialReport.body.diffSummary, + workUnitCount: partialReport.body.workUnits.length, + failedWorkUnits: partialReport.body.failedWorkUnits, + artifactsWritten: 1, + commitSha: partialReport.body.commitSha, + }, + report: partialReport, + })); + + const io = makeIo(); + await expect( + runKtxIngest( + { command: 'run', projectDir, connectionId: 'warehouse', adapter: 'fake', sourceDir, outputMode: 'plain' }, + io.io, + { runLocalIngest: runLocal, jobIdFactory: () => 'local-job-partial' }, + ), + ).resolves.toBe(0); + + expect(io.stdout()).toContain('Status: partial\n'); + }); + it('prints trace path and error status for stored failed ingest reports', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/test/setup.test.ts b/packages/cli/test/setup.test.ts index 0bc00919..da51e9af 100644 --- a/packages/cli/test/setup.test.ts +++ b/packages/cli/test/setup.test.ts @@ -398,6 +398,59 @@ describe('setup status', () => { expect(rendered).toContain('KTX context built: yes'); }); + it('reports context ready after a partial ingest report saved memory', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'setup:', + ' database_connection_ids:', + ' - warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + 'ingest:', + ' embeddings:', + ' backend: none', + ' dimensions: 8', + '', + ].join('\n'), + 'utf-8', + ); + await writeKtxSetupState(tempDir, { completed_steps: ['project', 'databases'] }); + await persistLocalBundleReport( + tempDir, + localFakeBundleReport('warehouse-job-partial', { + connectionId: 'warehouse', + sourceKey: 'fake', + body: { + failedWorkUnits: ['orders-bad'], + workUnits: [ + { + unitKey: 'orders-ok', + rawFiles: ['orders/orders.json'], + status: 'success', + actions: [{ target: 'wiki', type: 'created', key: 'wiki/orders.md', detail: 'orders' }], + touchedSlSources: [], + }, + { + unitKey: 'orders-bad', + rawFiles: ['orders/bad.json'], + status: 'failed', + reason: 'writer tool failed', + actions: [], + touchedSlSources: [], + }, + ], + }, + }), + ); + + const status = await readKtxSetupStatus(tempDir); + + expect(status.context).toMatchObject({ ready: true, status: 'completed' }); + }); + it('formats plain and JSON setup status payloads', async () => { const status = await readKtxSetupStatus(tempDir); const rendered = formatKtxSetupStatus(status); From 08d08d8ea00639f9a8198566805cd955eadcad0b Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sat, 30 May 2026 12:07:15 +0200 Subject: [PATCH 4/7] ci: refresh README star history chart twice daily Point the README chart at a committed assets/star-history.svg instead of the star-history API URL so GitHub serves it directly and bypasses the Camo proxy cache. A scheduled workflow regenerates the SVG at 06:00/18:00 UTC, busting star-history's server-side cache, and commits it when it changes. --- .github/workflows/star-history.yml | 61 ++++++++++++++++++++++++++++++ README.md | 2 +- assets/star-history.svg | 1 + 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/star-history.yml create mode 100644 assets/star-history.svg diff --git a/.github/workflows/star-history.yml b/.github/workflows/star-history.yml new file mode 100644 index 00000000..ec484b05 --- /dev/null +++ b/.github/workflows/star-history.yml @@ -0,0 +1,61 @@ +name: Refresh star history chart + +on: + schedule: + # Twice daily at 06:00 and 18:00 UTC. + - cron: "0 6,18 * * *" + workflow_dispatch: + +permissions: + contents: write + +env: + DO_NOT_TRACK: "1" + KTX_TELEMETRY_DISABLED: "1" + NEXT_TELEMETRY_DISABLED: "1" + +concurrency: + group: star-history-refresh + cancel-in-progress: true + +jobs: + refresh: + name: Regenerate assets/star-history.svg + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Fetch fresh star-history SVG + run: | + set -euo pipefail + # cachebust forces star-history to regenerate instead of serving its + # own server-side cache; --location follows the slug-normalizing 301. + url="https://api.star-history.com/svg?repos=Kaelio/ktx&type=Date&cachebust=${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" + curl --fail --location --silent --show-error \ + --retry 3 --retry-delay 5 --max-time 60 \ + -o assets/star-history.svg.new "$url" + # Guard against error pages / truncated responses before overwriting. + if ! grep -q "" assets/star-history.svg.new; then + echo "Downloaded file is not a valid SVG; aborting." >&2 + exit 1 + fi + if [ "$(wc -c < assets/star-history.svg.new)" -lt 1000 ]; then + echo "Downloaded SVG is suspiciously small; aborting." >&2 + exit 1 + fi + mv assets/star-history.svg.new assets/star-history.svg + + - name: Commit if changed + run: | + set -euo pipefail + if git diff --quiet -- assets/star-history.svg; then + echo "Star-history chart unchanged; nothing to commit." + exit 0 + fi + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add assets/star-history.svg + # [skip ci] keeps this housekeeping commit from triggering KTX CI. + git commit -m "chore: refresh star history chart [skip ci]" + git push diff --git a/README.md b/README.md index 23b2fa0a..686ece22 100644 --- a/README.md +++ b/README.md @@ -248,6 +248,6 @@ event catalog and opt-out options.

- ktx Star History Chart + ktx Star History Chart

diff --git a/assets/star-history.svg b/assets/star-history.svg new file mode 100644 index 00000000..3f6c4a04 --- /dev/null +++ b/assets/star-history.svg @@ -0,0 +1 @@ +star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars From ba06f7078af69fdba2184186ca5cc53c65427ea2 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sat, 30 May 2026 16:01:47 +0200 Subject: [PATCH 5/7] ci: push star-history refresh to protected main with RELEASE_PAT (#239) The scheduled star-history workflow checked out with the default GITHUB_TOKEN, so its git push to main was rejected by the branch protection hook (GH006). Check out with RELEASE_PAT instead, matching release.yml, whose semantic-release step already pushes to the protected main branch with the same token. --- .github/workflows/star-history.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/star-history.yml b/.github/workflows/star-history.yml index ec484b05..e67a0517 100644 --- a/.github/workflows/star-history.yml +++ b/.github/workflows/star-history.yml @@ -25,6 +25,10 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + # RELEASE_PAT can push to the protected main branch; the default + # GITHUB_TOKEN is rejected by the branch-protection hook (GH006). + token: ${{ secrets.RELEASE_PAT }} - name: Fetch fresh star-history SVG run: | From 54d6e877335a7218dc2ec795ff85529403ac6bde Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 30 May 2026 14:02:55 +0000 Subject: [PATCH 6/7] chore: refresh star history chart [skip ci] --- assets/star-history.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/star-history.svg b/assets/star-history.svg index 3f6c4a04..246ba5a0 100644 --- a/assets/star-history.svg +++ b/assets/star-history.svg @@ -1 +1 @@ -star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars +star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars \ No newline at end of file From cbbcf8e8bdd1560b3d0c73e47abd36eb5d8c6f23 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Sat, 30 May 2026 17:44:27 +0200 Subject: [PATCH 7/7] ci: normalize star-history.svg trailing newline (#241) The star-history refresh workflow committed the API's SVG verbatim, but the response has no trailing newline. Because the refresh commit uses [skip ci], the file never ran end-of-file-fixer at commit time, so pre-commit's `--all-files` run failed end-of-file-fixer on every open PR (e.g. #240), even PRs that never touched the file. Normalize the downloaded SVG to exactly one trailing newline in the workflow (idempotent, so the "unchanged" guard still works), and fix the currently committed file so open PRs go green now. --- .github/workflows/star-history.yml | 9 ++++++++- assets/star-history.svg | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/star-history.yml b/.github/workflows/star-history.yml index e67a0517..b7d90c43 100644 --- a/.github/workflows/star-history.yml +++ b/.github/workflows/star-history.yml @@ -48,7 +48,14 @@ jobs: echo "Downloaded SVG is suspiciously small; aborting." >&2 exit 1 fi - mv assets/star-history.svg.new assets/star-history.svg + # The star-history API returns the SVG without a trailing newline, + # which end-of-file-fixer rewrites whenever pre-commit runs + # --all-files on a PR. Because the refresh commit below uses [skip ci], + # the hook never runs against it here, so an un-normalized file + # silently breaks the pre-commit check on every open PR. Normalize to + # exactly one trailing newline before committing. + printf '%s\n' "$(cat assets/star-history.svg.new)" > assets/star-history.svg + rm -f assets/star-history.svg.new - name: Commit if changed run: | diff --git a/assets/star-history.svg b/assets/star-history.svg index 246ba5a0..3f6c4a04 100644 --- a/assets/star-history.svg +++ b/assets/star-history.svg @@ -1 +1 @@ -star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars \ No newline at end of file +star-history.comMay 17May 24 100200300400kaelio/ktxStar HistoryDateGitHub Stars