From 1bd29c7eb1675d2eb5e7fbd3d0268acbba4661d3 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Mon, 11 May 2026 22:35:07 +0200 Subject: [PATCH] Fix historic SQL ingest setup and progress --- ...-historic-sql-redesign-manual-test-plan.md | 459 ++++++++++++++++++ packages/cli/src/commands/ingest-commands.ts | 2 +- packages/cli/src/index.test.ts | 21 +- packages/cli/src/ingest-viz.test.ts | 8 +- packages/cli/src/ingest.test.ts | 97 ++++ packages/cli/src/ingest.ts | 129 ++++- packages/cli/src/setup-context.ts | 3 + packages/cli/src/setup.test.ts | 60 +++ .../connector-postgres/src/connector.test.ts | 19 + packages/connector-postgres/src/connector.ts | 14 +- .../historic-sql/evidence-tool.test.ts | 9 + .../adapters/historic-sql/evidence-tool.ts | 81 +++- .../ktx-daemon/src/ktx_daemon/sql_analysis.py | 4 +- python/ktx-daemon/tests/test_sql_analysis.py | 5 + 14 files changed, 877 insertions(+), 34 deletions(-) create mode 100644 docs/superpowers/plans/2026-05-11-historic-sql-redesign-manual-test-plan.md diff --git a/docs/superpowers/plans/2026-05-11-historic-sql-redesign-manual-test-plan.md b/docs/superpowers/plans/2026-05-11-historic-sql-redesign-manual-test-plan.md new file mode 100644 index 00000000..ba539195 --- /dev/null +++ b/docs/superpowers/plans/2026-05-11-historic-sql-redesign-manual-test-plan.md @@ -0,0 +1,459 @@ +# External Hosted Postgres Discovery Manual Test Plan + +This plan tests KTX from the point of view of a new external user who discovers +the public CLI and connects the hosted Kaelio demo Postgres database as the +source. It starts with the credential-free seeded demo, then creates a real KTX +project that reads from `start.kaelio.com`. + +The plan avoids writing the database password into this repository. Keep the +password in a local environment variable and configure KTX with +`env:KTX_DEMO_DATABASE_URL`. + +## Scope + +Use this plan when the goal is to test KTX as an external user with the hosted +demo database. The commands use the published package shape through +`npx @kaelio/ktx`. If you are testing from this repository, you can replace +`npx @kaelio/ktx` with the local `ktx` alias. + +The required checks cover: + +- Running the packaged seeded demo without credentials. +- Creating a new project that points to the hosted Postgres demo source. +- Verifying the connection through the public CLI. +- Running public ingest against the hosted database. +- Searching semantic-layer sources through `agent sl list --query`. +- Running the Postgres historic-SQL readiness doctor. +- Running the historic-SQL adapter when the demo database exposes query + history and local LLM configuration is available. +- Searching generated historic-SQL usage and pattern pages when historic-SQL + ingest runs. + +## Prerequisites + +Prepare a clean terminal before starting. The required path needs Node and +network access to `start.kaelio.com:5432`. The optional historic-SQL ingest path +also needs `uv` and an LLM provider configured for KTX. + +1. Confirm Node 22 or newer is available: + + ```bash + node --version + ``` + + Expected: the version is `v22` or newer. + +2. Confirm the hosted Postgres endpoint is reachable from your network: + + ```bash + nc -vz start.kaelio.com 5432 + ``` + + Expected: the command reports that the TCP connection succeeds. If `nc` is + unavailable, continue and let `ktx connection test` perform the real check. + +3. Create an isolated test parent: + + ```bash + export KTX_EXTERNAL_PARENT="$(mktemp -d)" + export KTX_SEEDED_PROJECT="$KTX_EXTERNAL_PARENT/seeded-demo" + export KTX_HOSTED_PROJECT="$KTX_EXTERNAL_PARENT/hosted-postgres" + export KTX_RUNTIME_ROOT="$KTX_EXTERNAL_PARENT/managed-runtime" + ``` + + Expected: every file created by this test stays under + `$KTX_EXTERNAL_PARENT`. + +4. Set the hosted database URL without committing the password: + + ```bash + read -rsp "Demo database password: " KTX_DEMO_DB_PASSWORD + printf '\n' + export KTX_DEMO_DATABASE_URL="postgresql://kaelio_demo:${KTX_DEMO_DB_PASSWORD}" + export KTX_DEMO_DATABASE_URL="${KTX_DEMO_DATABASE_URL}@start.kaelio.com:5432/demo?sslmode=prefer" + unset KTX_DEMO_DB_PASSWORD + ``` + + Expected: `KTX_DEMO_DATABASE_URL` is set only in your shell. The project + config will store `env:KTX_DEMO_DATABASE_URL`, not the literal URL. + + The hosted demo endpoint uses libpq-style `sslmode=prefer`, which means + "try SSL, then fall back to non-SSL." KTX handles this mode explicitly for + the Node Postgres connector so the setup check can connect to the hosted + demo database. + +5. Verify the required shell variables before running any `ktx` commands: + + ```bash + : "${KTX_EXTERNAL_PARENT:?Run prerequisite step 3 in this shell first}" + : "${KTX_SEEDED_PROJECT:?Run prerequisite step 3 in this shell first}" + : "${KTX_HOSTED_PROJECT:?Run prerequisite step 3 in this shell first}" + : "${KTX_RUNTIME_ROOT:?Run prerequisite step 3 in this shell first}" + : "${KTX_DEMO_DATABASE_URL:?Run prerequisite step 4 in this shell first}" + ``` + + Expected: the command prints nothing and exits zero. If it prints a shell + error, rerun the referenced prerequisite in the same terminal before + continuing. + +## Step 1: Run the packaged seeded demo + +Start with the shortest public path. The seeded demo uses packaged data and +prebuilt context, so it must not ask for an LLM key. + +1. Run the seeded demo: + + ```bash + npx @kaelio/ktx setup demo \ + --project-dir "$KTX_SEEDED_PROJECT" \ + --plain \ + --no-input + ``` + + Expected: output includes `Mode: seeded`, `Source: packaged demo project`, + and `LLM calls: none`. + +2. Inspect the seeded demo: + + ```bash + npx @kaelio/ktx setup demo inspect \ + --project-dir "$KTX_SEEDED_PROJECT" \ + --json > "$KTX_EXTERNAL_PARENT/seeded-inspect.json" + ``` + + Expected: the JSON reports seeded mode, semantic-layer sources, knowledge + pages, and `reports/seeded-demo-report.json`. + +3. Search seeded semantic-layer sources: + + ```bash + npx @kaelio/ktx agent sl list \ + --project-dir "$KTX_SEEDED_PROJECT" \ + --json \ + --query "revenue" \ + > "$KTX_EXTERNAL_PARENT/seeded-sl-search.json" + ``` + + Expected: the command exits zero and returns at least one source with a + numeric `score`. + +## Step 2: Create a hosted Postgres project + +Create a new KTX project that uses the hosted demo database as the warehouse +source. This step enables historic SQL in the config, but it does not require +LLM credentials yet. + +If an earlier setup attempt failed after creating `$KTX_HOSTED_PROJECT/ktx.yaml`, +start a fresh test project before rerunning the `--new` command: + +```bash +export KTX_HOSTED_PROJECT="$KTX_EXTERNAL_PARENT/hosted-postgres-retry" +``` + +1. Create the project and connection: + + ```bash + npx @kaelio/ktx setup \ + --project-dir "${KTX_HOSTED_PROJECT:?Run prerequisite step 3 first}" \ + --new \ + --skip-llm \ + --skip-embeddings \ + --skip-sources \ + --skip-agents \ + --database postgres \ + --new-database-connection-id warehouse \ + --database-url env:KTX_DEMO_DATABASE_URL \ + --database-schema public \ + --enable-historic-sql \ + --historic-sql-min-executions 2 \ + --yes \ + --no-input + ``` + + Expected: `$KTX_HOSTED_PROJECT/ktx.yaml` exists and contains a `warehouse` + Postgres connection whose URL is `env:KTX_DEMO_DATABASE_URL`. + +2. Confirm the password was not written to disk: + + ```bash + grep -R "start.kaelio.com:5432/demo" "$KTX_HOSTED_PROJECT" || true + ``` + + Expected: no matches are printed. + +3. Inspect the generated connection config: + + ```bash + sed -n '1,120p' "$KTX_HOSTED_PROJECT/ktx.yaml" + ``` + + Expected: the `warehouse` connection has `driver: postgres`, + `url: env:KTX_DEMO_DATABASE_URL` or an equivalent URL reference, and + `historicSql.enabled: true`. + +## Step 3: Test the hosted connection + +Run the public connection check before ingest. This verifies that the external +user can reach and introspect the hosted source. + +1. Test the connection: + + ```bash + npx @kaelio/ktx connection test warehouse \ + --project-dir "$KTX_HOSTED_PROJECT" + ``` + + Expected: output includes `Driver: postgres` and a positive table count. + +2. List configured connections: + + ```bash + npx @kaelio/ktx connection list \ + --project-dir "$KTX_HOSTED_PROJECT" + ``` + + Expected: output includes the `warehouse` connection. + +## Step 4: Run public ingest + +Run the public ingest command. For warehouse connections, this performs the +database scan path and writes local context files that agent search can use. + +1. Run ingest: + + ```bash + npx @kaelio/ktx ingest warehouse \ + --project-dir "$KTX_HOSTED_PROJECT" \ + --no-input + ``` + + Expected: output reports that ingest finished and that the `scan` step is + `done`. + +2. Inspect the latest public ingest status: + + ```bash + npx @kaelio/ktx ingest status \ + --project-dir "$KTX_HOSTED_PROJECT" \ + --no-input + ``` + + Expected: the status references the hosted `warehouse` source and a + completed scan. + +3. Confirm semantic-layer files exist: + + ```bash + find "$KTX_HOSTED_PROJECT/semantic-layer/warehouse" \ + -name '*.yaml' -print | head + ``` + + Expected: at least one semantic-layer YAML file is printed. + +## Step 5: Search the hosted database context + +Use the agent-facing semantic-layer search command after ingest. This validates +the discovery path that agents use for database analysis. + +1. Run semantic-layer search: + + ```bash + npx @kaelio/ktx agent sl list \ + --project-dir "$KTX_HOSTED_PROJECT" \ + --connection-id warehouse \ + --json \ + --query "orders revenue customers" \ + > "$KTX_EXTERNAL_PARENT/hosted-sl-search.json" + ``` + + Expected: the command exits zero. + +2. Validate search metadata: + + ```bash + node - "$KTX_EXTERNAL_PARENT/hosted-sl-search.json" <<'NODE' + const { readFileSync } = require('node:fs'); + const result = JSON.parse(readFileSync(process.argv[2], 'utf8')); + const assert = (ok, message) => { + if (!ok) throw new Error(message); + }; + assert(Array.isArray(result.sources), 'sources missing'); + assert(result.sources.length > 0, 'no semantic-layer hits'); + assert(Number.isFinite(result.sources[0].score), 'score missing'); + console.log('hosted semantic-layer search ok'); + NODE + ``` + + Expected: the script prints `hosted semantic-layer search ok`. + +3. Read the top source: + + ```bash + node - "$KTX_EXTERNAL_PARENT/hosted-sl-search.json" \ + > "$KTX_EXTERNAL_PARENT/hosted-top-source-name.txt" <<'NODE' + const { readFileSync } = require('node:fs'); + const result = JSON.parse(readFileSync(process.argv[2], 'utf8')); + process.stdout.write(result.sources[0].name); + NODE + + npx @kaelio/ktx agent sl read \ + "$(cat "$KTX_EXTERNAL_PARENT/hosted-top-source-name.txt")" \ + --project-dir "$KTX_HOSTED_PROJECT" \ + --connection-id warehouse \ + --json \ + > "$KTX_EXTERNAL_PARENT/hosted-sl-read.json" + ``` + + Expected: the JSON includes the full semantic-layer source. + +## Step 6: Check historic-SQL readiness + +Run the Postgres historic-SQL doctor. This determines whether the hosted demo +database exposes the query-history prerequisites needed for the redesign's +historic-SQL adapter. + +1. Run doctor: + + ```bash + npx @kaelio/ktx dev doctor \ + --project-dir "$KTX_HOSTED_PROJECT" \ + --no-input + ``` + + Expected: output includes a `Postgres Historic SQL (warehouse)` check. + +2. Interpret the result: + + - `PASS` means the hosted source is ready for the optional historic-SQL + ingest path. + - `WARN` or `FAIL` means the external discovery test still covers scan and + semantic-layer search, but historic-SQL query-history ingestion is blocked + by database permissions or configuration. + +## Step 7: Optional historic-SQL ingest + +Run this section only when the doctor passes and the KTX project has an LLM +provider configured. Historic-SQL table and pattern curation uses LLM-backed +skills, so this path is not credential-free. + +1. Configure LLM and embeddings if you skipped them during setup: + + ```bash + npx @kaelio/ktx setup \ + --project-dir "$KTX_HOSTED_PROJECT" + ``` + + Expected: `npx @kaelio/ktx setup status --project-dir "$KTX_HOSTED_PROJECT"` + reports that LLM and embedding setup are ready. + +2. Run historic-SQL ingest: + + ```bash + npx @kaelio/ktx dev ingest run \ + --project-dir "$KTX_HOSTED_PROJECT" \ + --connection-id warehouse \ + --adapter historic-sql \ + --plain \ + --yes \ + --no-input + ``` + + Expected: the command exits zero and schedules `historic-sql-table-` and + `historic-sql-patterns-` WorkUnits when the database has qualifying query + history. + +3. Locate the latest historic-SQL manifest: + + ```bash + find "$KTX_HOSTED_PROJECT/raw-sources/warehouse/historic-sql" \ + -name manifest.json -print | sort | tail -n 1 + ``` + + Expected: a manifest path is printed. + +4. Search for generated usage: + + ```bash + npx @kaelio/ktx agent sl list \ + --project-dir "$KTX_HOSTED_PROJECT" \ + --connection-id warehouse \ + --json \ + --query "common filters joins usage" \ + > "$KTX_EXTERNAL_PARENT/historic-sl-search.json" + ``` + + Expected: hits produced from historic-SQL usage include `score`, and hits + with projected usage include `frequencyTier` and `snippet`. + +5. Search for generated pattern pages: + + ```bash + npx @kaelio/ktx agent wiki search "historic sql pattern" \ + --project-dir "$KTX_HOSTED_PROJECT" \ + --json \ + --limit 10 \ + > "$KTX_EXTERNAL_PARENT/historic-wiki-search.json" + ``` + + Expected: results include pages whose keys start with `historic-sql/` when + the run produced cross-table patterns. + +## Step 8: Record results + +Capture the result in a way that separates the external discovery path from the +optional historic-SQL path. + +1. Save useful outputs: + + ```bash + mkdir -p "$KTX_EXTERNAL_PARENT/results" + cp "$KTX_EXTERNAL_PARENT/seeded-inspect.json" \ + "$KTX_EXTERNAL_PARENT/results/" 2>/dev/null || true + cp "$KTX_EXTERNAL_PARENT/hosted-sl-search.json" \ + "$KTX_EXTERNAL_PARENT/results/" 2>/dev/null || true + cp "$KTX_EXTERNAL_PARENT/hosted-sl-read.json" \ + "$KTX_EXTERNAL_PARENT/results/" 2>/dev/null || true + cp "$KTX_EXTERNAL_PARENT/historic-sl-search.json" \ + "$KTX_EXTERNAL_PARENT/results/" 2>/dev/null || true + cp "$KTX_EXTERNAL_PARENT/historic-wiki-search.json" \ + "$KTX_EXTERNAL_PARENT/results/" 2>/dev/null || true + ``` + + Expected: the results directory contains the JSON outputs created during the + run. + +2. Mark these areas as pass, fail, or blocked: + + - Public package discovery through `npx @kaelio/ktx`. + - Seeded demo without credentials. + - Hosted Postgres project setup. + - Hosted Postgres connection test. + - Public ingest scan. + - Semantic-layer search and read. + - Historic-SQL doctor. + - Historic-SQL ingest, if doctor and LLM setup allow it. + - Historic-SQL usage search, if ingest ran. + - Historic-SQL wiki pattern search, if ingest ran. + + Expected: every required external discovery area passes. Historic-SQL ingest + is pass, fail, or blocked based on the doctor result and local LLM + configuration. + +## Cleanup + +Remove the disposable project after collecting results. Keep it only when you +need the files for debugging. + +1. Stop the managed runtime: + + ```bash + npx @kaelio/ktx runtime stop || true + ``` + +2. Remove the test parent: + + ```bash + rm -rf "$KTX_EXTERNAL_PARENT" + ``` + + Expected: temporary projects and runtime files are removed. diff --git a/packages/cli/src/commands/ingest-commands.ts b/packages/cli/src/commands/ingest-commands.ts index 772c107d..5ad357e1 100644 --- a/packages/cli/src/commands/ingest-commands.ts +++ b/packages/cli/src/commands/ingest-commands.ts @@ -92,7 +92,7 @@ export function registerIngestCommands( sourceDir: options.sourceDir ? resolve(options.sourceDir) : undefined, databaseIntrospectionUrl: options.databaseIntrospectionUrl || undefined, cliVersion: context.packageInfo.version, - runtimeInstallPolicy: runtimeInstallPolicyFromFlags(options), + runtimeInstallPolicy: runtimeInstallPolicyFromFlags({ yes: options.yes }), ...(options.debugLlmRequestFile ? { debugLlmRequestFile: resolve(options.debugLlmRequestFile) } : {}), outputMode: outputMode(options), ...inputMode(options), diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index def47cd8..a575eeed 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -920,7 +920,7 @@ describe('runKtxCli', () => { sourceDir: tempDir, databaseIntrospectionUrl: undefined, cliVersion: '0.0.0-private', - runtimeInstallPolicy: 'never', + runtimeInstallPolicy: 'prompt', debugLlmRequestFile: `${tempDir}/debug.jsonl`, outputMode: 'json', inputMode: 'disabled', @@ -934,9 +934,9 @@ describe('runKtxCli', () => { expect(ingestReplayHelpIo.stderr()).toBe(''); }); - it('routes ingest managed runtime install policies', async () => { + it('routes ingest managed runtime install policy separately from visualization input mode', async () => { const autoIo = makeIo(); - const conflictIo = makeIo(); + const nonInteractiveIo = makeIo(); const ingest = vi.fn(async () => 0); await expect( @@ -972,10 +972,10 @@ describe('runKtxCli', () => { '--yes', '--no-input', ], - conflictIo.io, + nonInteractiveIo.io, { ingest }, ), - ).resolves.toBe(1); + ).resolves.toBe(0); expect(ingest).toHaveBeenCalledWith( expect.objectContaining({ @@ -985,7 +985,16 @@ describe('runKtxCli', () => { }), autoIo.io, ); - expect(conflictIo.stderr()).toContain('Choose only one runtime install mode: --yes or --no-input'); + expect(ingest).toHaveBeenCalledWith( + expect.objectContaining({ + command: 'run', + cliVersion: '0.0.0-private', + runtimeInstallPolicy: 'auto', + inputMode: 'disabled', + }), + nonInteractiveIo.io, + ); + expect(nonInteractiveIo.stderr()).toBe(''); }); it('dispatches public connection through the existing connection implementation', async () => { diff --git a/packages/cli/src/ingest-viz.test.ts b/packages/cli/src/ingest-viz.test.ts index 936490d7..d7bcfe50 100644 --- a/packages/cli/src/ingest-viz.test.ts +++ b/packages/cli/src/ingest-viz.test.ts @@ -304,7 +304,7 @@ describe('runKtxIngest viz and replay', () => { expect(io.stdout()).toContain('KTX memory flow warehouse/fake done'); }); - it('does not attach a live memory-flow sink for plain run output', async () => { + it('attaches a plain progress memory-flow sink for interactive plain run output', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); const sourceDir = join(tempDir, 'source'); @@ -329,7 +329,8 @@ describe('runKtxIngest viz and replay', () => { ), ).resolves.toBe(0); - expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('[5%] Fetching source files for warehouse/fake'); expect(io.stdout()).toContain('Job: plain-run'); expect(io.stdout()).not.toContain('KTX memory flow'); }); @@ -403,7 +404,8 @@ describe('runKtxIngest viz and replay', () => { ).resolves.toBe(0); expect(startLiveMemoryFlow).not.toHaveBeenCalled(); - expect(runLocal).toHaveBeenCalledWith(expect.not.objectContaining({ memoryFlow: expect.anything() })); + expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.anything() })); + expect(io.stdout()).toContain('[5%] Fetching source files for warehouse/fake'); expect(io.stdout()).toContain('Job: raw-missing-viz-run'); expect(io.stdout()).not.toContain('KTX memory flow'); expect(io.stderr()).toContain( diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index f6907895..4e0d35e4 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -762,6 +762,103 @@ describe('runKtxIngest', () => { ); }); + it('prints live progress for plain local ingest in interactive terminals', async () => { + const projectDir = join(tempDir, 'historic-sql-progress-project'); + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'project: historic-sql-progress-project', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_DATABASE_URL', + ' historicSql:', + ' enabled: true', + ' dialect: postgres', + ' minExecutions: 2', + 'ingest:', + ' adapters:', + ' - historic-sql', + '', + ].join('\n'), + 'utf-8', + ); + const createdAdapters: SourceAdapter[] = [ + { source: 'historic-sql', skillNames: [], detect: async () => true, chunk: async () => ({ workUnits: [] }) }, + ]; + const createAdapters = vi.fn(() => createdAdapters as never); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => { + expect(input.memoryFlow).toBeDefined(); + input.memoryFlow?.emit({ + type: 'source_acquired', + adapter: 'historic-sql', + trigger: 'manual_resync', + fileCount: 3, + }); + input.memoryFlow?.update({ syncId: 'sync-progress-1' }); + input.memoryFlow?.emit({ type: 'raw_snapshot_written', syncId: 'sync-progress-1', rawFileCount: 3 }); + input.memoryFlow?.emit({ type: 'diff_computed', added: 2, modified: 0, deleted: 0, unchanged: 1 }); + input.memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'historic-sql-table-public-orders', + rawFiles: ['tables/public/orders.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 1, workUnitCount: 1, evictionCount: 0 }); + input.memoryFlow?.emit({ + type: 'work_unit_started', + unitKey: 'historic-sql-table-public-orders', + skills: ['historic_sql_table_digest'], + stepBudget: 40, + }); + input.memoryFlow?.emit({ + type: 'work_unit_finished', + unitKey: 'historic-sql-table-public-orders', + status: 'success', + }); + input.memoryFlow?.emit({ type: 'saved', commitSha: null, wikiCount: 0, slCount: 1 }); + input.memoryFlow?.emit({ type: 'provenance_recorded', rowCount: 3 }); + input.memoryFlow?.emit({ type: 'report_created', runId: 'run-live-1', reportPath: 'report-live-1' }); + input.memoryFlow?.finish('done'); + return completedLocalBundleRun(input, input.jobId ?? 'historic-progress-job'); + }); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'historic-sql', + outputMode: 'plain', + }, + io.io, + { + createAdapters, + runLocalIngest: runLocal, + jobIdFactory: () => 'historic-progress-job', + }, + ), + ).resolves.toBe(0); + + const stdout = io.stdout(); + expect(stdout).toContain('[5%] Fetching source files for warehouse/historic-sql'); + expect(stdout).toContain('[15%] Fetched 3 source files from historic-sql'); + expect(stdout).toContain('[45%] Planned 1 work unit'); + expect(stdout).toContain('[80%] Processed 1/1 work units'); + expect(stdout).toContain('[100%] Ingest completed'); + expect(stdout.indexOf('[5%] Fetching source files for warehouse/historic-sql')).toBeLessThan( + stdout.indexOf('Report: report-live-1'), + ); + expect(io.stderr()).toBe(''); + }); + it('passes local Looker pull-config options and agent runner into scheduled ingest for Looker scheduled ingest', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index d9f4d434..5ce967a4 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -8,6 +8,7 @@ import { ingestReportToMemoryFlowReplay, type LocalMetabaseFanoutResult, type LocalMetabaseFanoutProgress, + type MemoryFlowEvent, type MemoryFlowReplayInput, type RunLocalIngestOptions, renderMemoryFlowReplay, @@ -170,6 +171,118 @@ function createMetabaseFanoutProgress( }; } +function formatDiffProgress(event: Extract): string { + return `+${event.added}/~${event.modified}/-${event.deleted}/=${event.unchanged}`; +} + +function completedWorkUnitCount(snapshot: MemoryFlowReplayInput): number { + return snapshot.events.filter((event) => event.type === 'work_unit_finished').length; +} + +function plainIngestEventProgress( + event: MemoryFlowEvent, + snapshot: MemoryFlowReplayInput, +): { percent: number; message: string } | null { + switch (event.type) { + case 'source_acquired': + return { + percent: 15, + message: `Fetched ${pluralize(event.fileCount, 'source file')} from ${event.adapter}`, + }; + case 'raw_snapshot_written': + return { + percent: 25, + message: `Wrote raw snapshot ${event.syncId} with ${pluralize(event.rawFileCount, 'file')}`, + }; + case 'diff_computed': + return { percent: 35, message: `Computed source diff ${formatDiffProgress(event)}` }; + case 'chunks_planned': + return { + percent: 45, + message: `Planned ${pluralize(event.workUnitCount, 'work unit')}`, + }; + case 'stage_skipped': + return { percent: 45, message: `Skipped ${event.stage}: ${event.reason}` }; + case 'work_unit_started': + return { percent: 55, message: `Processing ${event.unitKey}` }; + case 'work_unit_finished': { + const total = snapshot.plannedWorkUnits.length || completedWorkUnitCount(snapshot); + const completed = completedWorkUnitCount(snapshot); + const percent = total > 0 ? 55 + Math.round((completed / total) * 25) : 80; + return { + percent, + message: `Processed ${completed}/${total} work units`, + }; + } + case 'reconciliation_finished': + return { + percent: 85, + message: `Reconciled results with ${pluralize(event.conflictCount, 'conflict')} and ${pluralize( + event.fallbackCount, + 'fallback', + )}`, + }; + case 'saved': + return { + percent: 90, + message: `Saved memory updates (${event.wikiCount} wiki, ${event.slCount} SL)`, + }; + case 'provenance_recorded': + return { percent: 95, message: `Recorded ${pluralize(event.rowCount, 'provenance row')}` }; + case 'report_created': + return { percent: 98, message: `Created ingest report ${event.reportPath ?? event.runId}` }; + case 'scope_detected': + case 'work_unit_step': + case 'candidate_action': + return null; + } +} + +function shouldWritePlainIngestProgress( + outputMode: KtxIngestOutputMode, + io: KtxIngestIo, + env: NodeJS.ProcessEnv, +): boolean { + return outputMode === 'plain' && io.stdout.isTTY === true && env.CI !== 'true'; +} + +function createPlainIngestProgressRenderer( + args: Extract, + io: KtxIngestIo, +): { start(): void; update(snapshot: MemoryFlowReplayInput): void } { + let printedEvents = 0; + let lastPercent = 0; + let printedCompletion = false; + + const write = (percent: number, message: string) => { + const nextPercent = Math.max(lastPercent, Math.max(0, Math.min(100, percent))); + lastPercent = nextPercent; + io.stdout.write(`[${nextPercent}%] ${message}\n`); + }; + + return { + start() { + write(5, `Fetching source files for ${args.connectionId}/${args.adapter}`); + }, + update(snapshot) { + while (printedEvents < snapshot.events.length) { + const event = snapshot.events[printedEvents++]; + if (!event) { + continue; + } + const progress = plainIngestEventProgress(event, snapshot); + if (progress) { + write(progress.percent, progress.message); + } + } + if (!printedCompletion && snapshot.status !== 'running') { + printedCompletion = true; + write(100, snapshot.status === 'done' ? 'Ingest completed' : 'Ingest failed'); + } + }, + }; +} + function writeReportJson(report: IngestReportSnapshot, io: KtxIngestIo): void { io.stdout.write(`${JSON.stringify(report, null, 2)}\n`); } @@ -366,10 +479,14 @@ export async function runKtxIngest( }); const shouldUseLiveViz = runOutputMode === 'viz' && (args.inputMode ?? 'auto') === 'auto' && isInteractiveTerminal(io); - const initialMemoryFlow = shouldUseLiveViz ? initialRunMemoryFlowInput(args, jobId ?? 'pending') : undefined; + const plainProgress = shouldWritePlainIngestProgress(runOutputMode, io, env) + ? createPlainIngestProgressRenderer(args, io) + : null; + const initialMemoryFlow = + shouldUseLiveViz || plainProgress ? initialRunMemoryFlowInput(args, jobId ?? 'pending') : undefined; let latestMemoryFlowSnapshot: MemoryFlowReplayInput | null = initialMemoryFlow ?? null; - if (initialMemoryFlow && isTuiCapableIo(io)) { + if (shouldUseLiveViz && initialMemoryFlow && isTuiCapableIo(io)) { const startLiveMemoryFlow = deps.startLiveMemoryFlow ?? startLiveMemoryFlowTui; liveTui = await startLiveMemoryFlow(initialMemoryFlow, io); } @@ -382,13 +499,17 @@ export async function runKtxIngest( liveTui.update(snapshot); return; } - if (!liveTui) { + if (shouldUseLiveViz && !liveTui) { writeMemoryFlowInput(snapshot, io, { clear: true }); + return; } + plainProgress?.update(snapshot); }, }) : undefined; + plainProgress?.start(); + try { const result = await executeLocalIngest({ project, @@ -403,7 +524,7 @@ export async function runKtxIngest( ...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}), ...(memoryFlow ? { memoryFlow } : {}), }); - if (memoryFlow) { + if (shouldUseLiveViz && memoryFlow) { latestMemoryFlowSnapshot = memoryFlow.snapshot(); liveTui?.close(); liveTui = null; diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index f88635f4..fc7a1aef 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -767,6 +767,9 @@ export async function runKtxSetupContextStep( const missing = missingCapabilities(project); if (missing.length > 0) { + if (args.allowEmpty === true) { + return { status: 'skipped', projectDir: args.projectDir }; + } writeMissingCapabilities(missing, io); return { status: 'missing-input', projectDir: args.projectDir }; } diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index 7cb0d0df..c8961e2a 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -1174,6 +1174,66 @@ describe('setup status', () => { expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources']); }); + it('does not fail context build when prerequisites were explicitly skipped and agents are skipped', async () => { + const calls: string[] = []; + const io = makeIo(); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DEMO_DATABASE_URL', + ' readonly: true', + '', + ].join('\n'), + 'utf-8', + ); + + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: true, + cliVersion: '0.2.0', + skipLlm: true, + skipEmbeddings: true, + skipDatabases: true, + skipSources: true, + databaseSchemas: [], + }, + io.io, + { + model: async () => { + calls.push('model'); + return { status: 'skipped', projectDir: tempDir }; + }, + embeddings: async () => { + calls.push('embeddings'); + return { status: 'skipped', projectDir: tempDir }; + }, + databases: async () => { + calls.push('databases'); + return { status: 'skipped', projectDir: tempDir }; + }, + sources: async () => { + calls.push('sources'); + return { status: 'skipped', projectDir: tempDir }; + }, + }, + ), + ).resolves.toBe(0); + + expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources']); + expect(io.stderr()).not.toContain('KTX cannot build agent-ready context yet.'); + }); + it('runs context after sources and before agents in full setup', async () => { const calls: string[] = []; const io = makeIo(); diff --git a/packages/connector-postgres/src/connector.test.ts b/packages/connector-postgres/src/connector.test.ts index 3bdfc109..297eb123 100644 --- a/packages/connector-postgres/src/connector.test.ts +++ b/packages/connector-postgres/src/connector.test.ts @@ -129,6 +129,25 @@ describe('KtxPostgresScanConnector', () => { options: '-c search_path=analytics,public', ssl: { rejectUnauthorized: false }, }); + const libpqPreferConfig = postgresPoolConfigFromConfig({ + connectionId: 'warehouse', + connection: { + driver: 'postgres', + url: 'env:DEMO_DATABASE_URL', + readonly: true, + }, + env: { + DEMO_DATABASE_URL: 'postgresql://reader@start.kaelio.com:5432/demo?sslmode=prefer', + }, + }); + expect(libpqPreferConfig).toMatchObject({ + host: 'start.kaelio.com', + port: 5432, + database: 'demo', + user: 'reader', + }); + expect(libpqPreferConfig).not.toHaveProperty('connectionString'); + expect(libpqPreferConfig).not.toHaveProperty('ssl'); expect(() => postgresPoolConfigFromConfig({ connectionId: 'warehouse', diff --git a/packages/connector-postgres/src/connector.ts b/packages/connector-postgres/src/connector.ts index a780663f..288ef25c 100644 --- a/packages/connector-postgres/src/connector.ts +++ b/packages/connector-postgres/src/connector.ts @@ -57,6 +57,8 @@ export interface KtxPostgresConnectionConfig { schema?: string; schemas?: string[]; ssl?: boolean; + sslmode?: string; + sslMode?: string; rejectUnauthorized?: boolean; readonly?: boolean; [key: string]: unknown; @@ -253,15 +255,22 @@ function numberValue(value: unknown): number | undefined { function parsePostgresUrl(url: string): Partial { const parsed = new URL(url); + const sslmode = parsed.searchParams.get('sslmode') ?? undefined; return { host: parsed.hostname, port: parsed.port ? Number(parsed.port) : undefined, database: parsed.pathname.replace(/^\/+/, '') || undefined, username: parsed.username ? decodeURIComponent(parsed.username) : undefined, password: parsed.password ? decodeURIComponent(parsed.password) : undefined, + ...(sslmode ? { sslmode } : {}), }; } +function normalizedSslMode(connection: KtxPostgresConnectionConfig): string | undefined { + const value = connection.sslmode ?? connection.sslMode; + return typeof value === 'string' && value.trim().length > 0 ? value.trim().toLowerCase() : undefined; +} + function schemasFromConnection(connection: KtxPostgresConnectionConfig): string[] { if (Array.isArray(connection.schemas) && connection.schemas.length > 0) { return connection.schemas.filter((schema): schema is string => typeof schema === 'string' && schema.length > 0); @@ -299,6 +308,7 @@ export function postgresPoolConfigFromConfig(input: { const database = stringConfigValue(merged, 'database', env); const user = stringConfigValue(merged, 'username', env) ?? stringConfigValue(merged, 'user', env); const password = stringConfigValue(merged, 'password', env); + const sslmode = normalizedSslMode(merged); if (!referencedUrl && !host) { throw new Error(`Native PostgreSQL connector requires connections.${input.connectionId}.host or url`); @@ -314,7 +324,7 @@ export function postgresPoolConfigFromConfig(input: { max: 10, idleTimeoutMillis: 30_000, connectionTimeoutMillis: 10_000, - ...(referencedUrl + ...(referencedUrl && sslmode !== 'prefer' && sslmode !== 'disable' ? { connectionString: referencedUrl } : { host, port: numberValue(merged.port) ?? 5432, database, user, password }), }; @@ -322,7 +332,7 @@ export function postgresPoolConfigFromConfig(input: { if (searchPathSchemas.length > 0) { config.options = `-c search_path=${searchPathSchemas.join(',')}`; } - if (merged.ssl) { + if (merged.ssl && sslmode !== 'prefer' && sslmode !== 'disable') { config.ssl = { rejectUnauthorized: merged.rejectUnauthorized ?? true }; } return config; diff --git a/packages/context/src/ingest/adapters/historic-sql/evidence-tool.test.ts b/packages/context/src/ingest/adapters/historic-sql/evidence-tool.test.ts index a8219192..ae16d105 100644 --- a/packages/context/src/ingest/adapters/historic-sql/evidence-tool.test.ts +++ b/packages/context/src/ingest/adapters/historic-sql/evidence-tool.test.ts @@ -1,7 +1,16 @@ import { describe, expect, it, vi } from 'vitest'; +import { asSchema } from 'ai'; import { createEmitHistoricSqlEvidenceTool } from './evidence-tool.js'; describe('emit_historic_sql_evidence tool', () => { + it('exposes an AI SDK v6 tool input schema with top-level object type', async () => { + const tool = createEmitHistoricSqlEvidenceTool(); + + expect(await asSchema(tool.inputSchema).jsonSchema).toMatchObject({ + type: 'object', + }); + }); + it('writes table usage evidence to the ignored run evidence directory', async () => { const writeFile = vi.fn(async () => ({ success: true, commitHash: null })); const tool = createEmitHistoricSqlEvidenceTool(); diff --git a/packages/context/src/ingest/adapters/historic-sql/evidence-tool.ts b/packages/context/src/ingest/adapters/historic-sql/evidence-tool.ts index a6641a74..29d66cb2 100644 --- a/packages/context/src/ingest/adapters/historic-sql/evidence-tool.ts +++ b/packages/context/src/ingest/adapters/historic-sql/evidence-tool.ts @@ -6,6 +6,42 @@ import { patternOutputSchema, tableUsageOutputSchema } from './skill-schemas.js' const SYSTEM_AUTHOR = 'System User'; const SYSTEM_EMAIL = 'system@example.com'; +const emitHistoricSqlEvidenceInputSchema = z + .object({ + kind: z.enum(['table_usage', 'pattern']), + table: z.string().min(1).optional(), + rawPath: z.string().min(1), + usage: tableUsageOutputSchema.optional(), + pattern: patternOutputSchema.optional(), + }) + .superRefine((input, ctx) => { + if (input.kind === 'table_usage') { + if (!input.table) { + ctx.addIssue({ + code: 'custom', + path: ['table'], + message: 'table is required when kind is table_usage', + }); + } + if (!input.usage) { + ctx.addIssue({ + code: 'custom', + path: ['usage'], + message: 'usage is required when kind is table_usage', + }); + } + } + if (input.kind === 'pattern' && !input.pattern) { + ctx.addIssue({ + code: 'custom', + path: ['pattern'], + message: 'pattern is required when kind is pattern', + }); + } + }); + +type EmitHistoricSqlEvidenceInput = z.infer; + interface EmitHistoricSqlEvidenceToolContext { connectionId?: string | null; session?: { @@ -23,30 +59,42 @@ interface EmitHistoricSqlEvidenceToolContext { }; } -function unitKeyForEvidence(input: { kind: string; table?: string; pattern?: { slug: string } }): string { +function unitKeyForEvidence(input: EmitHistoricSqlEvidenceInput): string { if (input.kind === 'table_usage') { return `historic-sql-table-${String(input.table).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`; } return `historic-sql-pattern-${String(input.pattern?.slug).replace(/[^a-zA-Z0-9]+/g, '-').replace(/^-+|-+$/g, '')}`; } +function evidenceEnvelope(input: EmitHistoricSqlEvidenceInput, connectionId: string) { + if (input.kind === 'table_usage') { + if (!input.table || !input.usage) { + throw new Error('Invalid historic-SQL table usage evidence input.'); + } + return { + kind: 'table_usage' as const, + connectionId, + table: input.table, + rawPath: input.rawPath, + usage: input.usage, + }; + } + if (!input.pattern) { + throw new Error('Invalid historic-SQL pattern evidence input.'); + } + return { + kind: 'pattern' as const, + connectionId, + rawPath: input.rawPath, + pattern: input.pattern, + }; +} + export function createEmitHistoricSqlEvidenceTool(defaultContext?: EmitHistoricSqlEvidenceToolContext) { return tool({ description: 'Record typed historic-SQL evidence for deterministic projection. Use this instead of wiki_write, sl_write_source, sl_edit_source, or context_candidate_write during historic-SQL WorkUnits.', - inputSchema: z.discriminatedUnion('kind', [ - z.object({ - kind: z.literal('table_usage'), - table: z.string().min(1), - rawPath: z.string().min(1), - usage: tableUsageOutputSchema, - }), - z.object({ - kind: z.literal('pattern'), - rawPath: z.string().min(1), - pattern: patternOutputSchema, - }), - ]), + inputSchema: emitHistoricSqlEvidenceInputSchema, execute: async (input, options): Promise => { const context = (options.experimental_context as EmitHistoricSqlEvidenceToolContext | undefined) ?? defaultContext; const ingest = context?.session?.ingest; @@ -56,7 +104,8 @@ export function createEmitHistoricSqlEvidenceTool(defaultContext?: EmitHistoricS } const unitKey = unitKeyForEvidence(input); - const content = serializeHistoricSqlEvidence({ ...input, connectionId: context.connectionId }); + const evidence = evidenceEnvelope(input, context.connectionId); + const content = serializeHistoricSqlEvidence(evidence); await configService.writeFile( historicSqlEvidencePath(ingest.runId, unitKey), content, @@ -65,7 +114,7 @@ export function createEmitHistoricSqlEvidenceTool(defaultContext?: EmitHistoricS `Record historic-SQL evidence: ${unitKey}`, { skipLock: true }, ); - const label = input.kind === 'table_usage' ? input.table : input.pattern.slug; + const label = evidence.kind === 'table_usage' ? evidence.table : evidence.pattern.slug; return `Recorded historic-SQL ${input.kind} evidence for ${label}.`; }, }); diff --git a/python/ktx-daemon/src/ktx_daemon/sql_analysis.py b/python/ktx-daemon/src/ktx_daemon/sql_analysis.py index 80e6d85b..9a222098 100644 --- a/python/ktx-daemon/src/ktx_daemon/sql_analysis.py +++ b/python/ktx-daemon/src/ktx_daemon/sql_analysis.py @@ -61,10 +61,10 @@ def _column_name(column: exp.Column) -> str: return str(column.name) -def _columns_from_nodes(nodes: list[exp.Expression | None]) -> list[str]: +def _columns_from_nodes(nodes: list[object]) -> list[str]: names: list[str] = [] for node in nodes: - if node is None: + if not isinstance(node, exp.Expression): continue names.extend(_column_name(column) for column in node.find_all(exp.Column)) return _ordered_unique(names) diff --git a/python/ktx-daemon/tests/test_sql_analysis.py b/python/ktx-daemon/tests/test_sql_analysis.py index ac800a09..c1fc35f8 100644 --- a/python/ktx-daemon/tests/test_sql_analysis.py +++ b/python/ktx-daemon/tests/test_sql_analysis.py @@ -3,6 +3,7 @@ from __future__ import annotations from ktx_daemon.sql_analysis import ( AnalyzeSqlBatchItem, AnalyzeSqlBatchRequest, + _columns_from_nodes, analyze_sql_batch_response, ) @@ -51,3 +52,7 @@ def test_analyze_sql_batch_returns_per_item_parse_errors() -> None: assert result.tables_touched == [] assert result.columns_by_clause == {} assert result.error is not None + + +def test_columns_from_nodes_ignores_non_expression_clause_values() -> None: + assert _columns_from_nodes([True, False, None]) == []