From 02e096a8e3d63d8f220b1b965da5f694105ca0c7 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 01:05:28 +0200 Subject: [PATCH 01/33] fix(cli): honor configured llm backends in setup --- packages/cli/src/setup-models.test.ts | 49 ++++++++++++ packages/cli/src/setup-models.ts | 30 ++++++-- packages/cli/src/setup.test.ts | 103 ++++++++++++++++++++++++++ packages/cli/src/setup.ts | 11 +-- 4 files changed, 177 insertions(+), 16 deletions(-) diff --git a/packages/cli/src/setup-models.test.ts b/packages/cli/src/setup-models.test.ts index 81c8b361..96092b25 100644 --- a/packages/cli/src/setup-models.test.ts +++ b/packages/cli/src/setup-models.test.ts @@ -676,4 +676,53 @@ describe('setup Anthropic model step', () => { ).resolves.toMatchObject({ status: 'ready' }); expect(healthCheck).not.toHaveBeenCalled(); }); + + it.each([ + { + backend: 'vertex', + providerLines: [' backend: vertex', ' vertex:', ' project: kaelio-dev', ' location: us-east5'], + model: 'claude-sonnet-4-6', + }, + { + backend: 'gateway', + providerLines: [' backend: gateway', ' gateway:', ' api_key: env:AI_GATEWAY_API_KEY'], + model: 'anthropic/claude-sonnet-4-6', + }, + ])('preserves already configured $backend llm setup without asking for Anthropic credentials', async (fixture) => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'setup:', + ' database_connection_ids: []', + ' completed_steps:', + ' - project', + ' - llm', + 'connections: {}', + 'llm:', + ' provider:', + ...fixture.providerLines, + ' models:', + ` default: ${fixture.model}`, + 'ingest:', + ' embeddings:', + ' backend: deterministic', + ' model: deterministic', + ' dimensions: 8', + ].join('\n'), + 'utf-8', + ); + + const healthCheck = vi.fn(async () => ({ ok: true as const })); + const io = makeIo(); + await expect( + runKtxSetupAnthropicModelStep({ projectDir: tempDir, inputMode: 'disabled', skipLlm: false }, io.io, { + healthCheck, + }), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(healthCheck).not.toHaveBeenCalled(); + expect(io.stdout()).toContain(`LLM ready: yes (${fixture.model})`); + expect(io.stderr()).not.toContain('Anthropic'); + }); }); diff --git a/packages/cli/src/setup-models.ts b/packages/cli/src/setup-models.ts index 28908849..5b0dea18 100644 --- a/packages/cli/src/setup-models.ts +++ b/packages/cli/src/setup-models.ts @@ -1,5 +1,6 @@ import { writeFile } from 'node:fs/promises'; import { cancel, isCancel, password, select, text } from '@clack/prompts'; +import { resolveLocalKtxLlmConfig } from '@ktx/context'; import { resolveKtxConfigReference } from '@ktx/context/core'; import { type KtxProjectConfig, @@ -170,13 +171,26 @@ export async function fetchAnthropicModels( return models.map((item, index) => ({ ...item, recommended: index === Math.max(recommendedIndex, 0) })); } -function hasCompletedLlm(config: KtxProjectConfig): boolean { - return ( - config.setup?.completed_steps.includes('llm') === true && - config.llm.provider.backend === 'anthropic' && - typeof config.llm.models.default === 'string' && - config.llm.models.default.length > 0 - ); +export function isKtxSetupLlmConfigReady(config: KtxProjectLlmConfig): boolean { + let resolved: KtxLlmConfig | null; + try { + resolved = resolveLocalKtxLlmConfig(config, process.env); + } catch { + return false; + } + if (!resolved) { + return false; + } + + if (resolved.backend === 'vertex') { + return typeof resolved.vertex?.location === 'string' && resolved.vertex.location.trim().length > 0; + } + + return resolved.backend === 'anthropic' || resolved.backend === 'gateway'; +} + +function hasUsableConfiguredLlm(config: KtxProjectConfig): boolean { + return isKtxSetupLlmConfigReady(config.llm); } function buildProjectLlmConfig( @@ -386,7 +400,7 @@ export async function runKtxSetupAnthropicModelStep( const project = await loadKtxProject({ projectDir: args.projectDir }); if ( args.forcePrompt !== true && - hasCompletedLlm(project.config) && + hasUsableConfiguredLlm(project.config) && !args.anthropicApiKeyEnv && !args.anthropicApiKeyFile && !args.anthropicModel diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index c8961e2a..ac2038ab 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -82,6 +82,38 @@ describe('setup status', () => { }); }); + it.each([ + { + backend: 'vertex', + providerLines: [' backend: vertex', ' vertex:', ' project: kaelio-dev', ' location: us-east5'], + model: 'claude-sonnet-4-6', + }, + { + backend: 'gateway', + providerLines: [' backend: gateway', ' gateway:', ' api_key: env:AI_GATEWAY_API_KEY'], + model: 'anthropic/claude-sonnet-4-6', + }, + ])('reports configured $backend llm backends as setup-ready', async (fixture) => { + await mkdir(tempDir, { recursive: true }); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'llm:', + ' provider:', + ...fixture.providerLines, + ' models:', + ` default: ${fixture.model}`, + 'connections: {}', + ].join('\n'), + 'utf-8', + ); + + await expect(readKtxSetupStatus(tempDir)).resolves.toMatchObject({ + llm: { backend: fixture.backend, ready: true, model: fixture.model }, + }); + }); + it('uses setup database connection ids when present', async () => { await writeFile( join(tempDir, 'ktx.yaml'), @@ -1174,6 +1206,77 @@ describe('setup status', () => { expect(calls).toEqual(['model', 'embeddings', 'databases', 'sources']); }); + it.each([ + { + backend: 'vertex', + providerLines: [' backend: vertex', ' vertex:', ' project: kaelio-dev', ' location: us-east5'], + model: 'claude-sonnet-4-6', + }, + { + backend: 'gateway', + providerLines: [' backend: gateway', ' gateway:', ' api_key: env:AI_GATEWAY_API_KEY'], + model: 'anthropic/claude-sonnet-4-6', + }, + ])('adds a dbt source in non-interactive setup with existing $backend llm config', async (fixture) => { + const io = makeIo(); + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - project', + ' - databases', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_URL', + 'llm:', + ' provider:', + ...fixture.providerLines, + ' models:', + ` default: ${fixture.model}`, + ].join('\n'), + 'utf-8', + ); + + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'existing', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: true, + cliVersion: '0.2.0', + skipLlm: false, + skipEmbeddings: true, + skipDatabases: true, + source: 'dbt', + sourceConnectionId: 'dbt-main', + sourceGitUrl: 'https://github.com/Kaelio/klo-dbt-demo', + sourceBranch: 'main', + sourceProjectName: 'orbit_analytics', + sourceWarehouseConnectionId: 'warehouse', + skipSources: false, + databaseSchemas: [], + }, + io.io, + { + sourcesDeps: { validateDbt: vi.fn(async () => ({ ok: true as const, detail: 'dbt project valid' })) }, + context: vi.fn(async () => ({ status: 'ready' as const, projectDir: tempDir, runId: 'setup-context-test' })), + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).not.toContain('Anthropic'); + expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).toContain('dbt-main:'); + }); + it('does not fail context build when prerequisites were explicitly skipped and agents are skipped', async () => { const calls: string[] = []; const io = makeIo(); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 89c5dcdc..9859d80f 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -22,7 +22,7 @@ import { runKtxSetupDatabasesStep, } from './setup-databases.js'; import { type KtxSetupEmbeddingsDeps, runKtxSetupEmbeddingsStep } from './setup-embeddings.js'; -import { type KtxSetupModelDeps, runKtxSetupAnthropicModelStep } from './setup-models.js'; +import { type KtxSetupModelDeps, isKtxSetupLlmConfigReady, runKtxSetupAnthropicModelStep } from './setup-models.js'; import { type KtxSetupProjectDeps, runKtxSetupProjectStep } from './setup-project.js'; import { isKtxPreAgentSetupReady, @@ -233,10 +233,6 @@ async function runKtxSetupDemoFromEntryMenu( ); } -function llmReady(status: KtxSetupStatus['llm']): boolean { - return status.backend === 'anthropic' && typeof status.model === 'string' && status.model.length > 0; -} - function embeddingsReady(status: KtxSetupStatus['embeddings']): boolean { return ( status.backend !== undefined && @@ -276,10 +272,9 @@ export async function readKtxSetupStatus(projectDir: string): Promise database.ready) && status.sources.every((source) => source.ready) From e05a6d43abb4aa7b6e69376cac675c6a94def809 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 01:07:47 +0200 Subject: [PATCH 02/33] fix(cli): report metabase ingest readiness --- packages/cli/src/setup.test.ts | 57 ++++++++++++++++++++++++++++++++++ packages/cli/src/setup.ts | 42 +++++++++++++++++++++++-- 2 files changed, 96 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index c8961e2a..44fc8c7d 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -3,6 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { localFakeBundleReport, persistLocalBundleReport } from './ingest.test-utils.js'; import { contextBuildCommands, writeKtxSetupContextState } from './setup-context.js'; import { readKtxSetupStatus, runKtxSetup } from './setup.js'; @@ -274,6 +275,62 @@ describe('setup status', () => { }); }); + it('reports Vertex LLM and context ready after a successful Metabase ingest report', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - project', + ' - databases', + ' - sources', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' metabase:', + ' driver: metabase', + ' url: env:METABASE_URL', + ' api_key_ref: env:METABASE_API_KEY', + ' warehouse_connection_id: warehouse', + 'llm:', + ' provider:', + ' backend: vertex', + ' vertex:', + ' project: kaelio-dev', + ' location: us-east5', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' embeddings:', + ' backend: deterministic', + ' model: deterministic', + ' dimensions: 8', + '', + ].join('\n'), + 'utf-8', + ); + await persistLocalBundleReport( + tempDir, + localFakeBundleReport('metabase-job-1', { + connectionId: 'warehouse', + sourceKey: 'metabase', + }), + ); + + const status = await readKtxSetupStatus(tempDir); + const io = makeIo(); + await expect(runKtxSetup({ command: 'status', projectDir: tempDir, json: false }, io.io)).resolves.toBe(0); + + expect(status.llm).toMatchObject({ backend: 'vertex', ready: true, model: 'claude-sonnet-4-6' }); + expect(status.context).toMatchObject({ ready: true, status: 'completed' }); + expect(io.stdout()).toContain('LLM ready: yes (claude-sonnet-4-6)'); + expect(io.stdout()).toContain('KTX context built: yes'); + }); + it('prints plain and JSON setup status', async () => { const plainIo = makeIo(); const jsonIo = makeIo(); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 89c5dcdc..0b0c400d 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -1,7 +1,8 @@ import { existsSync } from 'node:fs'; import { join, resolve } from 'node:path'; import { cancel, isCancel, select } from '@clack/prompts'; -import { loadKtxProject } from '@ktx/context/project'; +import { getLatestLocalIngestStatus, savedMemoryCountsForReport } from '@ktx/context/ingest'; +import { ktxLocalStateDbPath, loadKtxProject, type KtxLocalProject } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import type { KtxDemoArgs } from './demo.js'; import { defaultDemoProjectDir } from './demo-assets.js'; @@ -152,6 +153,7 @@ export interface KtxSetupDeps { } const SOURCE_DRIVERS = new Set(['dbt', 'metricflow', 'metabase', 'looker', 'lookml', 'notion']); +const READY_LLM_BACKENDS = new Set(['anthropic', 'vertex', 'gateway']); type KtxSetupEntryAction = 'setup' | 'new-project' | 'agents' | 'status' | 'demo' | 'exit'; type KtxSetupFlowStep = 'models' | 'embeddings' | 'databases' | 'sources' | 'context' | 'agents'; @@ -234,7 +236,12 @@ async function runKtxSetupDemoFromEntryMenu( } function llmReady(status: KtxSetupStatus['llm']): boolean { - return status.backend === 'anthropic' && typeof status.model === 'string' && status.model.length > 0; + return ( + status.backend !== undefined && + READY_LLM_BACKENDS.has(status.backend) && + typeof status.model === 'string' && + status.model.length > 0 + ); } function embeddingsReady(status: KtxSetupStatus['embeddings']): boolean { @@ -259,6 +266,31 @@ function sourceConnections(config: Awaited>['c .sort((left, right) => left.connectionId.localeCompare(right.connectionId)); } +type LocalIngestStatusReport = NonNullable>>; + +function reportHasSavedContext(report: LocalIngestStatusReport): boolean { + if (report.body.failedWorkUnits.length > 0) { + return false; + } + const counts = savedMemoryCountsForReport(report); + return counts.wikiCount > 0 || counts.slCount > 0; +} + +async function readIngestContextStatus(project: KtxLocalProject): Promise { + if (!existsSync(ktxLocalStateDbPath(project))) { + return null; + } + const report = await getLatestLocalIngestStatus(project); + if (!report || !reportHasSavedContext(report)) { + return null; + } + return { + ready: true, + status: 'completed', + runId: report.runId, + }; +} + export async function readKtxSetupStatus(projectDir: string): Promise { const resolvedProjectDir = resolve(projectDir); if (!existsSync(join(resolvedProjectDir, 'ktx.yaml'))) { @@ -291,6 +323,10 @@ export async function readKtxSetupStatus(projectDir: string): Promise Date: Tue, 12 May 2026 01:34:18 +0200 Subject: [PATCH 03/33] Relax boundary check for test fixtures --- scripts/check-boundaries.mjs | 2 +- scripts/check-boundaries.test.mjs | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/check-boundaries.mjs b/scripts/check-boundaries.mjs index 86b98712..53455abd 100644 --- a/scripts/check-boundaries.mjs +++ b/scripts/check-boundaries.mjs @@ -95,7 +95,7 @@ function scansForContextProductionLlmBoundaries(relativePath) { } function scansForForbiddenIdentifiers(relativePath) { - return isCodeSource(relativePath) || isRuntimeAsset(relativePath); + return (isCodeSource(relativePath) && !isTestSource(relativePath)) || isRuntimeAsset(relativePath); } function skipsIdentifierScan(relativePath) { diff --git a/scripts/check-boundaries.test.mjs b/scripts/check-boundaries.test.mjs index 8d7fabdd..db8afafe 100644 --- a/scripts/check-boundaries.test.mjs +++ b/scripts/check-boundaries.test.mjs @@ -65,6 +65,13 @@ describe('scanFileContent', () => { assert.equal(scanFileContent('python/ktx-sl/openspec/specs/semantic-layer/spec.md', name).length, 0); }); + it('allows product identifiers in test fixtures', () => { + const name = lowerProductName(); + + assert.equal(scanFileContent('packages/cli/src/setup.test.ts', `project: ${name}-dev`).length, 0); + assert.equal(scanFileContent('packages/context/src/ingest/importer.test.ts', `email: system@${name}.dev`).length, 0); + }); + it('allows public package identifiers in release packaging and managed runtime source', () => { const name = lowerProductName(); From 3e9869340f654bfd6a6336f3be0fbc9d95ecb6f6 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 01:44:15 +0200 Subject: [PATCH 04/33] ci: parallelize KTX CI checks --- .github/workflows/ci.yml | 133 ++++++++++++++++++++++-- .github/workflows/release.yml | 6 +- scripts/ci-artifact-upload.test.mjs | 2 +- scripts/standalone-ci-workflow.test.mjs | 27 +++-- 4 files changed, 151 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 750c71d5..5d70d495 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,19 +15,20 @@ concurrency: cancel-in-progress: true jobs: - check: + typescript-checks: + name: TypeScript checks runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Setup pnpm - uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0 + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 with: run_install: false - name: Setup Node.js - uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: node-version: "24" cache: "pnpm" @@ -39,19 +40,101 @@ jobs: - name: Run TypeScript checks run: pnpm run check - - name: Run slow TypeScript tests - run: pnpm run test:slow + slow-context-tests: + name: Slow context tests + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup pnpm + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 + with: + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: "24" + cache: "pnpm" + cache-dependency-path: "pnpm-lock.yaml" + + - name: Install TypeScript dependencies + run: pnpm install --frozen-lockfile + + - name: Build TypeScript packages + run: pnpm run build + + - name: Run slow context tests + run: pnpm --filter @ktx/context run test:slow + + slow-cli-tests: + name: Slow CLI tests + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup pnpm + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 + with: + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: "24" + cache: "pnpm" + cache-dependency-path: "pnpm-lock.yaml" + + - name: Install TypeScript dependencies + run: pnpm install --frozen-lockfile + + - name: Build TypeScript packages + run: pnpm run build + + - name: Run slow CLI tests + run: pnpm --filter @ktx/cli run test:slow + + cli-smoke-tests: + name: CLI smoke tests + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup pnpm + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 + with: + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: "24" + cache: "pnpm" + cache-dependency-path: "pnpm-lock.yaml" + + - name: Install TypeScript dependencies + run: pnpm install --frozen-lockfile - name: Run CLI smoke tests run: pnpm run smoke + python-checks: + name: Python checks + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Setup Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.13" - name: Setup uv - uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0 + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 with: enable-cache: true cache-dependency-glob: "uv.lock" @@ -62,11 +145,47 @@ jobs: - name: Run Python checks run: uv run pytest + artifact-checks: + name: Artifact checks + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Setup pnpm + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 + with: + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: "24" + cache: "pnpm" + cache-dependency-path: "pnpm-lock.yaml" + + - name: Install TypeScript dependencies + run: pnpm install --frozen-lockfile + + - name: Setup Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.13" + + - name: Setup uv + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + with: + enable-cache: true + cache-dependency-glob: "uv.lock" + + - name: Install Python dependencies + run: uv sync --all-packages + - name: Build and verify package artifacts run: pnpm run artifacts:check - name: Upload package artifacts - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: ktx-package-artifacts-${{ github.sha }} path: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 16c9f1e2..2a8f696e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -24,12 +24,12 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Setup pnpm - uses: pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061 # v4.2.0 + uses: pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0 # v6.0.7 with: run_install: false - name: Setup Node.js - uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: node-version: "24" cache: "pnpm" @@ -44,7 +44,7 @@ jobs: python-version: "3.13" - name: Setup uv - uses: astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b # v7.3.0 + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 with: enable-cache: true cache-dependency-glob: "uv.lock" diff --git a/scripts/ci-artifact-upload.test.mjs b/scripts/ci-artifact-upload.test.mjs index 3fecdfbc..2c931cd0 100644 --- a/scripts/ci-artifact-upload.test.mjs +++ b/scripts/ci-artifact-upload.test.mjs @@ -31,7 +31,7 @@ describe('KTX CI artifact upload contract', () => { workflow, /name: Build and verify package artifacts\s+run: pnpm run artifacts:check\s+- name: Upload package artifacts/s, ); - assert.match(workflow, /uses: actions\/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f/); + assert.match(workflow, /uses: actions\/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a/); assert.match(workflow, /name: ktx-package-artifacts-\$\{\{ github\.sha \}\}/); assert.match(workflow, /dist\/artifacts\/manifest\.json/); assert.match(workflow, /dist\/artifacts\/npm\/\*\.tgz/); diff --git a/scripts/standalone-ci-workflow.test.mjs b/scripts/standalone-ci-workflow.test.mjs index 230b9e2f..195fce53 100644 --- a/scripts/standalone-ci-workflow.test.mjs +++ b/scripts/standalone-ci-workflow.test.mjs @@ -13,25 +13,39 @@ function assertIncludesAll(text, values) { } describe('standalone KTX CI workflow', () => { - it('runs the package checks from a filtered repository root', async () => { + it('runs package checks in parallel jobs from the repository root', async () => { const workflow = await readText('.github/workflows/ci.yml'); assert.match(workflow, /^name: KTX CI/m); assertIncludesAll(workflow, [ 'permissions:', 'contents: read', + 'typescript-checks:', + 'name: TypeScript checks', + 'slow-context-tests:', + 'name: Slow context tests', + 'slow-cli-tests:', + 'name: Slow CLI tests', + 'cli-smoke-tests:', + 'name: CLI smoke tests', + 'python-checks:', + 'name: Python checks', + 'artifact-checks:', + 'name: Artifact checks', 'actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd', - 'pnpm/action-setup@41ff72655975bd51cab0327fa583b6e92b6d3061', - 'actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238', + 'pnpm/action-setup@739bfe42ca9233c5e6aca07c1a25a9d34aca49b0', + 'actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e', 'node-version: "24"', 'cache-dependency-path: "pnpm-lock.yaml"', 'pnpm install --frozen-lockfile', 'pnpm run check', - 'pnpm run test:slow', + 'pnpm run build', + 'pnpm --filter @ktx/context run test:slow', + 'pnpm --filter @ktx/cli run test:slow', 'pnpm run smoke', 'actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405', 'python-version: "3.13"', - 'astral-sh/setup-uv@eac588ad8def6316056a12d4907a9d4d84ff7a3b', + 'astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b', 'cache-dependency-glob: "uv.lock"', 'uv sync --all-packages', 'uv run pytest', @@ -42,13 +56,14 @@ describe('standalone KTX CI workflow', () => { assert.doesNotMatch(workflow, /cd ktx/); assert.doesNotMatch(workflow, /ktx\/pnpm-lock\.yaml/); assert.doesNotMatch(workflow, /ktx\/uv\.lock/); + assert.doesNotMatch(workflow, /run: pnpm run test:slow/); }); it('uploads verified artifacts from root-relative paths', async () => { const workflow = await readText('.github/workflows/ci.yml'); assertIncludesAll(workflow, [ - 'actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f', + 'actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a', 'name: ktx-package-artifacts-${{ github.sha }}', 'dist/artifacts/manifest.json', 'dist/artifacts/npm/*.tgz', From 1b552a38c27b14c2ecc2d33c487d079aa0b39e7c Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Mon, 11 May 2026 23:32:10 -0700 Subject: [PATCH 05/33] docs: refresh setup and install guidance --- README.md | 362 +++++------------- .../content/docs/cli-reference/ktx-serve.mdx | 2 +- .../content/docs/community/contributing.mdx | 9 +- .../docs/getting-started/quickstart.mdx | 74 +--- 4 files changed, 128 insertions(+), 319 deletions(-) diff --git a/README.md b/README.md index 5f152cca..84592226 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@

- Workspace-first context layer for database agents + The context layer for analytics agents

@@ -14,312 +14,154 @@ --- -KTX stores warehouse memory in a project directory, generates and validates -semantic-layer YAML, indexes knowledge, scans database schemas, and exposes the -result through a CLI and MCP server. +KTX turns warehouse metadata, semantic definitions, and business knowledge into +reviewable project files that agents can use while planning, querying, and +updating analytics work. -KTX projects are plain files: YAML, Markdown, SQLite state, and generated -artifacts. You can inspect them, commit them, and serve them to any MCP client. +A KTX project is a directory of plain files — YAML semantic sources, Markdown +knowledge pages, and SQLite state — that you commit to git and review in PRs, +just like dbt models. -## What KTX provides +## Who KTX is for -- Durable warehouse memory with semantic-layer sources and knowledge pages. -- Native scan connectors for SQLite, Postgres, MySQL, ClickHouse, SQL Server, - BigQuery, and Snowflake. -- Agentic ingest with provenance links, tool transcripts, and replay metadata. -- Local semantic-layer query planning and optional query execution. -- A stdio MCP server with tools for connections, knowledge, semantic-layer - sources, ingest reports, and replay. +KTX is built for analytics engineers and data teams who want data agents to +work on real analytics systems — not just generate one-off SQL. + +Use KTX when you want agents to: + +- **Generate SQL** from approved measures and joins +- **Repair semantic definitions** through reviewable diffs +- **Explain metric provenance** with warehouse evidence +- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and modern BI + platforms + +Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, and +SQLite. ## Quick start -Run the pre-seeded demo through the public npm package: +Install the CLI and run the setup wizard: ```bash -npx @kaelio/ktx setup demo --no-input -npx @kaelio/ktx setup demo inspect -``` - -The default demo uses packaged sample data and prebuilt context. It does not -require API keys, network access, or an LLM provider. - -To replay the packaged ingest run, use: - -```bash -npx @kaelio/ktx setup demo --mode replay --no-input -``` - -To run the full agentic demo with an LLM provider, set a provider key for the -current process: - -```bash -ANTHROPIC_API_KEY=$YOUR_ANTHROPIC_API_KEY \ - npx @kaelio/ktx setup demo --mode full --no-input -``` - -Interactive full-demo setup can prompt for a provider key without writing the -key to `ktx.yaml`. - -You can also install the CLI in a project or globally: - -```bash -npm install @kaelio/ktx -npx ktx --help npm install -g @kaelio/ktx -ktx --help +ktx setup ``` -## Build a local project +The wizard walks through six steps: configuring your LLM provider, setting up +embeddings, connecting your database, adding context sources (dbt, LookML, +Metabase, Looker, Notion), building context, and installing agent integration. -Create a project from a local workspace: +If it exits before completion, rerun `ktx setup` to resume where you left off. + +Check your project status: ```bash -npm install @kaelio/ktx -PROJECT_DIR="$(mktemp -d)/ktx-demo" -npx ktx init "$PROJECT_DIR" --name ktx-demo +ktx status ``` -Create a SQLite warehouse: +``` +KTX project: /home/user/analytics +Project ready: yes +LLM ready: yes (claude-sonnet-4-6) +Embeddings ready: yes (text-embedding-3-small) +Primary sources configured: yes (postgres-warehouse) +Context sources configured: yes (dbt-main) +KTX context built: yes +Agent integration ready: yes (claude-code:project) +``` + +## What's in a project + +``` +my-project/ +├── ktx.yaml # Project configuration +├── semantic-layer/ +│ └── warehouse/ +│ ├── orders.yaml # Semantic source definitions +│ ├── customers.yaml +│ └── order_items.yaml +├── knowledge/ +│ ├── global/ +│ │ ├── revenue.md # Business definitions and rules +│ │ └── segment-classification.md +│ └── user/ +│ └── local/ +├── raw-sources/ +│ └── warehouse/ +│ └── live-database/ # Scan artifacts and reports +└── .ktx/ + └── db.sqlite # Local state (git-ignored) +``` + +Semantic sources and knowledge pages are committed to git. The `.ktx/` directory +holds ephemeral state and is git-ignored — delete it and KTX rebuilds on the +next run. + +## Serve agents + +KTX integrates with coding agents through CLI skills, an MCP server, or both. +The setup wizard configures this automatically — here's what each mode looks +like. + +**CLI skills** — the agent calls `ktx` commands directly through a skill file +installed in your agent's config (e.g., `.claude/skills/ktx/SKILL.md`): ```bash -python - "$PROJECT_DIR/demo.db" <<'PY' -import sqlite3 -import sys - -conn = sqlite3.connect(sys.argv[1]) -conn.executescript(""" -DROP TABLE IF EXISTS accounts; -CREATE TABLE accounts ( - account_id INTEGER PRIMARY KEY, - account_name TEXT NOT NULL, - segment TEXT NOT NULL, - region TEXT NOT NULL -); -INSERT INTO accounts VALUES - (1, 'Acme Analytics', 'Mid-Market', 'NA'), - (2, 'Beacon Bank', 'Enterprise', 'EMEA'), - (3, 'Cobalt Coffee', 'SMB', 'NA'), - (4, 'Delta Devices', 'Mid-Market', 'APAC'), - (5, 'Evergreen Energy', 'Enterprise', 'NA'); -""") -conn.close() -PY +ktx sl query --measure orders.revenue --dimension orders.status --format sql +ktx wiki search "revenue definition" +ktx sl validate orders ``` -Replace the generated `ktx.yaml`: +**MCP server** — the agent calls KTX tools over the Model Context Protocol: ```bash -cat > "$PROJECT_DIR/ktx.yaml" <" -memory: - auto_commit: true -YAML -``` - -Write and validate a semantic-layer source: - -```bash -npx ktx sl write accounts --project-dir "$PROJECT_DIR" \ - --connection-id warehouse --yaml 'name: accounts -table: accounts -description: CRM accounts with segmentation attributes. -grain: - - account_id -columns: - - name: account_id - type: number - - name: account_name - type: string - - name: segment - type: string - - name: region - type: string -measures: - - name: account_count - expr: count(account_id) -joins: [] -' - -npx ktx sl validate accounts --project-dir "$PROJECT_DIR" \ - --connection-id warehouse -``` - -Generate SQL and execute the query: - -```bash -npx ktx sl query --project-dir "$PROJECT_DIR" \ - --connection-id warehouse \ - --measure accounts.account_count \ - --dimension accounts.segment \ - --order-by accounts.account_count:desc \ - --limit 5 \ - --format sql - -npx ktx sl query --project-dir "$PROJECT_DIR" \ - --connection-id warehouse \ - --measure accounts.account_count \ - --dimension accounts.segment \ - --order-by accounts.account_count:desc \ - --limit 5 \ - --execute \ - --max-rows 5 -``` - -List and test the warehouse connection: - -```bash -npx ktx connection list --project-dir "$PROJECT_DIR" -npx ktx connection test warehouse --project-dir "$PROJECT_DIR" -``` - -The connection test prints the configured driver and discovered table count: - -```text -Driver: sqlite -Tables: 1 -``` - -### Scan the demo warehouse - -Scan artifacts are written under -`raw-sources/warehouse/live-database//` in the project directory. - -```bash - -SCAN_OUTPUT="$(npx ktx scan warehouse --project-dir "$PROJECT_DIR")" -printf '%s\n' "$SCAN_OUTPUT" -SCAN_RUN_ID="$(printf '%s\n' "$SCAN_OUTPUT" | awk '/^Run: / { print $2 }')" -npx ktx scan status --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" -npx ktx scan report --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" -``` - -For non-SQLite drivers, prefer credential references such as `--url env:NAME` -or `--url file:PATH` over literal credential URLs. - -## Managed Python runtime - -KTX installs its Python runtime only when a Python-backed command needs it. -The runtime lives outside the npm cache, is versioned by the installed CLI -version, and is managed by `ktx runtime` commands. - -KTX requires `uv` on `PATH` to create the managed runtime. Install `uv` with -your system package manager or the official installer before running Python- -backed KTX commands. KTX doesn't download `uv` automatically; run -`ktx runtime doctor` if runtime installation fails: - -```bash -npx ktx runtime install --yes -npx ktx runtime status -npx ktx runtime doctor -npx ktx runtime start -npx ktx runtime stop -npx ktx runtime prune --dry-run -npx ktx runtime prune --yes -``` - -Use `runtime prune --dry-run` to preview stale runtime directories from older -CLI versions. Add `--yes` to remove those stale directories after daemon -processes are stopped. - -Commands such as `npx @kaelio/ktx sl query ... --yes` can install the core -runtime lazily from the bundled wheel. Local embeddings remain lazy; prepare -them only when you select local `sentence-transformers` embeddings: - -```bash -npx ktx runtime install --feature local-embeddings --yes -npx ktx runtime start --feature local-embeddings -``` - -## Serve MCP - -Start the stdio MCP server from the project directory: - -```bash -npx ktx serve --mcp stdio --project-dir "$PROJECT_DIR" \ +ktx serve --mcp stdio \ --user-id local \ --semantic-compute \ --execute-queries \ --yes ``` -The `--semantic-compute` flag uses the managed Python runtime when no explicit -semantic compute URL is provided. KTX starts or reuses the managed runtime as -needed. +This exposes tools for connections, knowledge search, semantic-layer sources, +validation, queries, ingestion, and replay. The `--semantic-compute` flag starts +the managed Python runtime for query planning automatically. -The MCP server exposes `connection_list`, `knowledge_search`, -`knowledge_read`, `knowledge_write`, `sl_list_sources`, `sl_read_source`, -`sl_write_source`, `sl_validate`, `sl_query`, `ingest_trigger`, -`ingest_status`, `ingest_report`, and `ingest_replay`. +Supported agents: Claude Code, Codex, Cursor, OpenCode, and any agent that +reads `.agents/` skills or MCP configuration. ## Workspace packages -- `packages/context`: core TypeScript context library. -- `packages/cli`: CLI wrapper over the context package. -- `packages/llm`: LLM and embedding provider helpers. -- `packages/connector-bigquery`: BigQuery scan connector. -- `packages/connector-clickhouse`: ClickHouse scan connector. -- `packages/connector-mysql`: MySQL scan connector. -- `packages/connector-postgres`: Postgres scan connector. -- `packages/connector-snowflake`: Snowflake scan connector. -- `packages/connector-sqlite`: SQLite scan connector. -- `packages/connector-sqlserver`: SQL Server scan connector. -- `python/ktx-sl`: semantic-layer engine. -- `python/ktx-daemon`: portable compute service for semantic-layer operations. +| Package | Purpose | +|---------|---------| +| `packages/cli` | CLI entry point | +| `packages/context` | Core context engine | +| `packages/llm` | LLM and embedding providers | +| `packages/connector-*` | Database connectors (Postgres, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, SQLite) | +| `python/ktx-sl` | Semantic-layer query planning | +| `python/ktx-daemon` | Portable compute service | ## Development -Install dependencies and run checks: - ```bash +git clone https://github.com/kaelio/ktx.git +cd ktx pnpm install +uv sync --all-groups +pnpm run build pnpm run check -uv sync --all-packages -source .venv/bin/activate -uv run pytest ``` -Use the optional development binary when you want a local `ktx-dev` command: +Use the development CLI for local testing: ```bash +pnpm run setup:dev pnpm run link:dev ktx-dev --help ``` The repository uses `pnpm` for TypeScript packages and `uv` for Python -packages. - -## Release status - -This repository builds one public npm artifact named `@kaelio/ktx`. The release -artifact manifest contains the public npm tarball and the bundled `kaelio-ktx` -runtime wheel. The first public npm handoff is policy-gated through -`release-policy.json`, which keeps Python package publishing disabled because -KTX-owned Python code ships inside the npm package as a bundled wheel. The -`python/ktx-sl` and `python/ktx-daemon` directories remain source packages for -development, not public release artifacts. - -Build local package artifacts and verify the guarded dry-run publish path with: - -```bash -source .venv/bin/activate -pnpm run artifacts:check -pnpm run release:readiness -pnpm run release:npm-publish -``` - -Run the live npm publish only from the manual `KTX Release` workflow with the -`publish_live` input enabled after the `NPM_TOKEN` secret is configured. +packages. See [Contributing](docs-site/content/docs/community/contributing.mdx) +for full development setup, testing, and PR guidelines. ## License diff --git a/docs-site/content/docs/cli-reference/ktx-serve.mdx b/docs-site/content/docs/cli-reference/ktx-serve.mdx index ec0d2b28..3816b808 100644 --- a/docs-site/content/docs/cli-reference/ktx-serve.mdx +++ b/docs-site/content/docs/cli-reference/ktx-serve.mdx @@ -68,7 +68,7 @@ The MCP server is typically configured through `ktx setup --agents` rather than | Error | Cause | Recovery | |-------|-------|----------| -| Agent cannot start server | The agent config cannot find the `ktx` binary | Run `pnpm run link:dev` or use an absolute command path in the agent config | +| Agent cannot start server | The agent config cannot find the `ktx` binary | Install `@kaelio/ktx` globally with `npm install -g @kaelio/ktx` or use an absolute command path in the agent config | | Semantic tools are unavailable | Server was started without `--semantic-compute` | Add `--semantic-compute` or `--semantic-compute-url` to the server args | | Query execution is denied | Server was started without `--execute-queries` | Add `--execute-queries` only for trusted projects where read-only execution is intended | | Context resolves to wrong project | `KTX_PROJECT_DIR` is missing or points elsewhere | Set `KTX_PROJECT_DIR` to the project containing the intended `ktx.yaml` | diff --git a/docs-site/content/docs/community/contributing.mdx b/docs-site/content/docs/community/contributing.mdx index 8feb86c9..1b4e39ce 100644 --- a/docs-site/content/docs/community/contributing.mdx +++ b/docs-site/content/docs/community/contributing.mdx @@ -7,6 +7,11 @@ KTX is an open-source project and welcomes contributions — bug fixes, new conn ## Development setup +This page is for contributors working on the KTX repository. To install KTX for +an analytics project, use the published +[`@kaelio/ktx`](https://www.npmjs.com/package/@kaelio/ktx) package in the +[Quickstart](/docs/getting-started/quickstart). + ### Prerequisites - **Node.js 22+** and **pnpm** — for the TypeScript workspace @@ -44,7 +49,9 @@ pnpm run setup:dev pnpm run link:dev ``` -This makes the `ktx` command available globally, pointing at your local build. +This makes the `ktx-dev` command available globally, pointing at your local +build. Use this development binary when you need to test unpublished repository +changes. ## Repository structure diff --git a/docs-site/content/docs/getting-started/quickstart.mdx b/docs-site/content/docs/getting-started/quickstart.mdx index 91a17d05..61abc301 100644 --- a/docs-site/content/docs/getting-started/quickstart.mdx +++ b/docs-site/content/docs/getting-started/quickstart.mdx @@ -9,44 +9,30 @@ If you are a coding assistant trying to decide which KTX docs page to read, star ## Workflow summary -Use this sequence when an agent needs to set up KTX from a fresh checkout: +Use this sequence when you are setting up KTX in an analytics project: -1. `pnpm install` — install workspace dependencies. -2. `pnpm run setup:dev` — build local packages and prepare the development CLI. -3. `pnpm run link:dev` — link the `ktx` command for local use. -4. `ktx setup` — create or resume a KTX project. -5. `ktx status` — verify project readiness. -6. `ktx sl list` — confirm semantic-layer sources are available. -7. `ktx sl query ... --format sql` — compile a semantic query without executing it. +1. `npm install -g @kaelio/ktx` — install the published KTX CLI from npm. +2. `ktx setup` — create or resume a KTX project. The setup wizard is stateful. If it exits before completion, rerun `ktx setup` in the same project directory to resume from the first incomplete step. -## Prerequisites - -- **Node.js 22+** and **pnpm** -- An **Anthropic API key** for LLM-powered enrichment and ingestion -- A **database connection** — PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, or SQLite -- Optionally, a **dbt project**, **LookML repo**, **Metabase instance**, or other context source - ## Install and run setup -KTX is currently used from a local checkout or linked workspace CLI. Build and link the CLI first: +Install the published [`@kaelio/ktx`](https://www.npmjs.com/package/@kaelio/ktx) CLI: ```bash -git clone https://github.com/kaelio/ktx.git -cd ktx -pnpm install -pnpm run setup:dev -pnpm run link:dev +npm install -g @kaelio/ktx ``` -Then run the setup wizard in the directory where you want your KTX project: +Then run the setup wizard: ```bash ktx setup ``` -The wizard walks through six steps. You can go back at any point, and if you exit early, running `ktx setup` again resumes where you left off. +The local checkout flow is only for contributors working on KTX itself. See [Contributing](/docs/community/contributing) for that setup. + +The wizard walks through six steps. You can go back at any point, and if you exit early, rerunning `ktx setup` resumes where you left off. ## Step 1: Configure LLM @@ -86,10 +72,11 @@ KTX uses embeddings for semantic search over sources, wiki content, schema metad **OpenAI embeddings** use `text-embedding-3-small` (1536 dimensions) and require an `OPENAI_API_KEY`. -**Local embeddings** use `all-MiniLM-L6-v2` (384 dimensions) via the KTX Python daemon. No API key is needed. If you run the daemon as a long-lived HTTP service, start it with: +**Local embeddings** use `all-MiniLM-L6-v2` (384 dimensions) via the KTX managed Python runtime. No API key is needed. KTX can install and start the runtime during setup; to prepare it ahead of time, run: ```bash -ktx-daemon serve-http --host 127.0.0.1 --port 8765 +ktx runtime install --feature local-embeddings --yes +ktx runtime start --feature local-embeddings ``` ## Step 3: Connect a database @@ -208,12 +195,15 @@ Then select which agents to install for: │ ◻ Codex │ ◻ Cursor │ ◻ OpenCode +│ ◻ Custom agent (.agents) ``` **CLI mode** writes a skill file (e.g., `.claude/skills/ktx/SKILL.md`) that teaches the agent to call KTX commands directly. **MCP mode** writes an MCP server configuration (e.g., `.mcp.json`) that lets the agent call KTX tools like `sl_query`, `knowledge_search`, and `sl_write_source` over the Model Context Protocol. +**Custom agent** uses the universal `.agents` target for agents that can read project-local skills or MCP configuration. + ## Generated files KTX writes project state as plain files so agents can inspect and edit changes in git. @@ -247,44 +237,14 @@ KTX context built: yes Agent integration ready: yes (claude-code:project) ``` -List your semantic sources: - -```bash -ktx sl list -``` - -Query through the semantic layer: - -```bash -ktx sl query \ - --connection-id postgres-warehouse \ - --measure orders.total_revenue \ - --dimension orders.status \ - --order-by orders.total_revenue:desc \ - --limit 5 \ - --format sql -``` - -This outputs the generated SQL. Add `--execute` to run it against your warehouse: - -```bash -ktx sl query \ - --connection-id postgres-warehouse \ - --measure orders.total_revenue \ - --dimension orders.status \ - --order-by orders.total_revenue:desc \ - --limit 5 \ - --execute --max-rows 10 -``` - ## Common errors | Error or symptom | Likely cause | Recovery | |------------------|--------------|----------| -| `ktx: command not found` | The local CLI has not been linked | Run `pnpm run setup:dev` and `pnpm run link:dev` from the KTX checkout, then open a new shell | +| `ktx: command not found` | The KTX package is not installed globally, or the shell cannot find the global binary | Run `npm install -g @kaelio/ktx` and open a new shell | | LLM health check fails | Missing, invalid, or unauthorized Anthropic API key | Export `ANTHROPIC_API_KEY` or rerun `ktx setup` and choose the file-backed secret option | | OpenAI embedding check fails | `OPENAI_API_KEY` is missing when OpenAI embeddings are selected | Export `OPENAI_API_KEY`, or rerun setup and choose local sentence-transformers embeddings | -| Local embeddings hang or fail | The Python daemon cannot start or the local model runtime is unavailable | Run `uv sync --all-groups`, then start `ktx-daemon serve-http --host 127.0.0.1 --port 8765` and rerun setup | +| Local embeddings hang or fail | The managed Python runtime cannot start or the local model runtime is unavailable | Install `uv`, run `ktx runtime doctor`, then run `ktx runtime install --feature local-embeddings --yes` and rerun setup | | Database connection test fails | Credentials, network access, warehouse, database, or schema value is wrong | Test the same URL with the database's native client, then rerun `ktx connection add ... --force` or rerun setup | | `KTX context built: no` in `ktx status` | Setup saved configuration but did not build context | Run `ktx setup context build` or rerun `ktx setup` and choose to build context now | | Agent integration is incomplete | Setup skipped the agents step or the target was not installed | Run `ktx setup --agents --target codex --agent-install-mode both --project` using the target you need | From a0193b3fb09762ecf041354b61920a8a815612c2 Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Mon, 11 May 2026 23:32:12 -0700 Subject: [PATCH 06/33] docs: tighten context layer concepts --- .../content/docs/concepts/context-as-code.mdx | 56 +++++++++++-------- .../docs/concepts/the-context-layer.mdx | 30 +++++----- 2 files changed, 46 insertions(+), 40 deletions(-) diff --git a/docs-site/content/docs/concepts/context-as-code.mdx b/docs-site/content/docs/concepts/context-as-code.mdx index 4ee25d88..3a2b1686 100644 --- a/docs-site/content/docs/concepts/context-as-code.mdx +++ b/docs-site/content/docs/concepts/context-as-code.mdx @@ -29,43 +29,51 @@ This reconciliation step is what separates auto-ingestion from a simple sync. A Auto-ingestion is designed to plug into a PR-based workflow. Run ingestion on a branch, review the changed YAML and Markdown files, and merge them the same way you merge dbt models or application code. -``` - dbt / Looker / Metabase KTX project repo - ┌──────────────┐ ┌──────────────────────┐ - │ Metadata │───ingestion──▶│ Branch: ingest/... │ - │ changes │ │ │ - └──────────────┘ │ + 3 new sources │ - │ ~ 2 updated joins │ - │ + 1 knowledge page │ - │ │ - │ ──── Open PR ──── │ - │ │ - │ Review semantic diff │ - │ Approve & merge │ - └──────────────────────┘ - │ - ▼ - Agents see updated - context immediately +```text +dbt / Looker / Metabase / Notion + | + v + metadata changes + | + v + nightly cron or CI ingest + | + v + branch: ingest/nightly + | + | + 3 new sources + | ~ 2 updated joins + | + 1 knowledge page + v + open PR + | + v + review semantic diff + | + v + approve & merge + | + v + agents see updated context ``` A typical branch shows a semantic diff: "this ingest added 3 new sources from dbt, updated 2 join definitions based on schema changes, and created 1 knowledge page from a Notion doc." Analytics engineers review the diff, verify that the new sources look correct, and merge. +Teams usually run this on demand while setting up a source, then schedule it once the source is stable. A cron job or CI schedule can run `ktx ingest --all --no-input` overnight on an ingest branch so the latest dbt manifests, BI metadata, and documentation updates are ready for review each morning. + Once merged, agents querying through KTX's MCP server or CLI see the updated context immediately. No deployment step, no cache invalidation, no restart. The files are the source of truth, and agents read them on every request. This workflow gives you the same review guarantees you have for dbt models. No semantic source reaches production without a human approving it. But unlike maintaining context manually, the heavy lifting — discovering new tables, drafting source definitions, extracting business rules from documentation — is done by the ingestion agent. You review and approve. You don't write from scratch. ## Feedback loops -Context improves over time through three feedback channels. +Context improves over time through two feedback channels. **Analyst corrections.** When an analytics engineer spots something wrong — a measure formula that doesn't match the business definition, a join that should be `many_to_one` instead of `one_to_many`, a knowledge page that's out of date — they edit the YAML or Markdown directly and commit. These corrections become part of the project's git history, and the next ingestion run respects them. If you manually fix a measure definition, KTX won't overwrite it on the next ingest. -**Agent feedback.** When an agent queries the semantic layer and gets unexpected results — a query that returns no rows because of a bad filter, a join path that produces duplicated results — it can flag the issue. These signals feed back into the context: knowledge pages can note known data quality issues, source definitions can be tightened with better filters or grain declarations, and relationship thresholds can be adjusted. +**Agent feedback.** When an agent queries the semantic layer and gets unexpected results — a query that returns no rows because of a bad filter, a join path that produces duplicated results — it can flag the issue. These signals feed back into the context: knowledge pages can note known data quality issues, and source definitions can be tightened with better filters, join paths, or grain declarations. -**Relationship calibration.** KTX infers foreign key relationships between tables automatically, even when the database has no declared constraints. It does this by analyzing column names, types, value distributions, and asking the LLM for proposals. Each inferred relationship gets a confidence score. You control two thresholds: `acceptThreshold` (relationships above this score are accepted automatically, default 0.85) and `reviewThreshold` (relationships between review and accept are flagged for human review, default 0.55). As you accept or reject proposals, the system learns which patterns match your schema conventions. - -Each of these channels makes the next ingestion cycle better. Analyst corrections teach the system what your team considers authoritative. Agent feedback surfaces gaps in coverage. Relationship calibration tunes the discovery process to your warehouse's conventions. Context is not a static artifact — it's a living system that converges toward accuracy with every iteration. +Each of these channels makes the next ingestion cycle better. Analyst corrections teach the system what your team considers authoritative. Agent feedback surfaces gaps in coverage. Context is not a static artifact — it's a living system that converges toward accuracy with every iteration. ## Deterministic replay @@ -89,5 +97,5 @@ Use this page when an agent needs to explain review workflows, ingestion diffs, |------------|------------------|-----------| | Explain how generated context should be reviewed | The git workflow | [Building Context](/docs/guides/building-context) | | Diagnose why ingestion changed a semantic source | Auto-ingestion and Deterministic replay | [ktx ingest](/docs/cli-reference/ktx-ingest) | -| Explain how context improves over time | Feedback loops | [Link Detection](/docs/benchmarks/link-detection) | +| Explain how context improves over time | Feedback loops | [Building Context](/docs/guides/building-context) | | Tell a user what to commit | The git workflow | [Writing Context](/docs/guides/writing-context) | diff --git a/docs-site/content/docs/concepts/the-context-layer.mdx b/docs-site/content/docs/concepts/the-context-layer.mdx index 64a17730..953af89a 100644 --- a/docs-site/content/docs/concepts/the-context-layer.mdx +++ b/docs-site/content/docs/concepts/the-context-layer.mdx @@ -9,7 +9,7 @@ Give an agent access to your database and it will generate SQL. It might even pr The agent doesn't know that `orders.amount` includes refunds and needs a status filter. It doesn't know that `customers` should join to `orders` on `customer_id`, not `id`. It doesn't know that your team stopped using `legacy_segments` six months ago, or that "enterprise" means contracts over $100k, not just big logos. It sees column names and types. It doesn't see your business. -This isn't a model capability problem. GPT-4, Claude, and Gemini can all write correct SQL — when they know what correct means. The gap is context: which tables matter, which joins are valid, which metrics are canonical, what the business terms actually refer to. Without that, agents produce plausible-looking artifacts that are subtly, dangerously wrong. Wrong enough to pass a glance, wrong enough to drive a decision. +This isn't a model capability problem. Claude Code, Codex, and your BI agents can write correct SQL when they know what correct means. The gap is context: which tables matter, which joins are valid, which metrics are canonical, what the business terms actually refer to. Without that, agents produce plausible-looking artifacts that are subtly, dangerously wrong. Wrong enough to pass a glance, wrong enough to drive a decision. Analytics engineers already know this pain. It's the same reason you write dbt tests, maintain a data dictionary, and spend half of standup explaining why someone's dashboard number doesn't match the board deck. The difference is that agents make decisions at machine speed, so the wrong context propagates faster than a human can catch it. @@ -19,9 +19,9 @@ The industry has moved through three distinct approaches to getting AI and data **Wave one: database access.** Connect an LLM to a database, let it generate SQL. This works for simple lookups — "how many orders last week?" — but breaks on anything that requires business knowledge. The agent guesses at joins, invents metrics, and hallucinates table relationships. Every query is a coin flip. -**Wave two: semantic layers and text-to-SQL.** Add structure. Define metrics in MetricFlow or Cube, expose schemas, build text-to-SQL pipelines. This is better — the agent knows that `revenue` means `sum(amount) where status != 'refunded'` — but it's still limited. Semantic layers define what to calculate, not why, when, or how to interpret the result. The agent can compute net revenue but doesn't know about the February refund anomaly, the segment reclassification, or the fact that `enterprise` changed definition last quarter. +**Wave two: semantic layers and text-to-SQL.** Add structure. Define metrics in MetricFlow or Cube, expose schemas, build text-to-SQL pipelines. This is better — the agent knows that `revenue` means `sum(amount) where status != 'refunded'` — but building and maintaining that structure by hand is manual, time-consuming, and still limited. Semantic layers define what to calculate, not why, when, or how to interpret the result. The agent can compute net revenue but doesn't know about the February refund anomaly, the segment reclassification, or the fact that `enterprise` changed definition last quarter. -**Wave three: agentic context.** AI is no longer just answering questions — it's generating dashboards, writing semantic definitions, proposing dbt models, creating tests and documentation. For that to work, agents need more than metric definitions. They need the full picture: business rules, data quality gotchas, relationship maps, historical context, and the institutional knowledge that lives in your team's heads. They need a context layer. +**Wave three: agentic context.** AI is no longer just answering questions — it's generating dashboards, writing semantic definitions, proposing dbt models, creating tests and documentation. For that to work, agents need more than metric definitions. They need the full picture: business rules, known data quality issues, relationship maps, historical context, and the institutional knowledge that lives in your team's heads. They need a context layer. ## What a context layer is @@ -29,6 +29,13 @@ A context layer is the infrastructure that gives agents the business knowledge t KTX organizes context into four pillars: +- Semantic sources +- Knowledge pages +- Scan artifacts +- Provenance + +Each pillar covers a different kind of context agents need before they can safely write SQL, update semantic definitions, or explain an analytics result. + **Semantic sources** are YAML definitions that describe your data in terms agents can reason about. Each source maps to a table or SQL query, declares its grain, defines typed columns, specifies valid joins, and exposes named measures with optional filters. This is where "revenue means `sum(amount)` excluding refunds" lives. ```yaml @@ -60,7 +67,7 @@ measures: expr: count(id) ``` -**Knowledge pages** are Markdown documents that capture business definitions, rules, and gotchas — the kind of context that doesn't fit in a schema definition. Pages have structured frontmatter (summary, tags, semantic layer references) and free-form content. Agents search them when they need to understand why a metric works a certain way, not just how to compute it. +**Knowledge pages** are Markdown documents that capture business definitions, rules, and operating context — the kind of context that doesn't fit in a schema definition. Pages have structured frontmatter (summary, tags, semantic layer references) and free-form content. Agents search them when they need to understand why a metric works a certain way, not just how to compute it. ```markdown --- @@ -90,13 +97,12 @@ Together, these four pillars give agents enough context to produce analytics art ## How KTX compares -KTX is a context layer, and its structured core is an agent-native semantic layer. That matters. MetricFlow, Cube, and Malloy all give teams ways to model metrics, dimensions, joins, and generated SQL. KTX covers that same semantic-layer job, then adds the surrounding context agents need to use it well: knowledge pages, schema scans, provenance, ingestion, validation, and MCP tools. +KTX is a context layer with an agent-native semantic layer at its core. MetricFlow, Cube, and Malloy model metrics, dimensions, joins, and generated SQL. KTX covers that semantic-layer work, then adds the context agents need to use and maintain it: knowledge pages, schema scans, provenance, ingestion, validation, and MCP tools. -The primary user is different. MetricFlow is centered on dbt-style metric definitions. Cube is centered on a governed semantic runtime for BI, applications, and agents. Malloy is centered on an expressive modeling and query language. KTX is centered on agents that need to read a semantic model, change it, validate it, inspect the generated SQL, and leave a reviewable git diff. +The workflow is the difference. Traditional semantic layers are powerful, but they are usually built and maintained through manual modeling work, product-specific runtimes, or language-specific workflows. They are not agent-native by default, which makes them harder for agents to inspect, edit, validate, and review in a tight loop. KTX is designed for agents that need to read context, change semantic files, inspect generated SQL, and leave a reviewable git diff. | | KTX semantic layer | MetricFlow | Cube | Malloy | |---|---|---|---|---| -| **Design center** | Agent-native semantic modeling inside a broader context layer | Metric definitions and dbt semantic models | Governed serving layer for BI, embedded analytics, APIs, and agents | Semantic modeling and analytical query language | | **Model surface** | Plain YAML sources plus Markdown knowledge pages | YAML semantic models and metrics in a dbt project | YAML or JavaScript cubes, views, access policies, and pre-aggregations | `.malloy` models, query pipelines, notebooks, and annotations | | **What it models** | Sources, columns, measures, segments, joins, grain, filters, default time dimensions, and context references | Semantic models, entities, dimensions, measures, metrics, time grains, and metric types | Cubes, views, measures, dimensions, segments, joins, hierarchies, policies, and rollups | Sources, joins, dimensions, measures, calculations, nested results, and query pipelines | | **Agent edit loop** | First-class. Agents can patch small files, save imperfect drafts, run validation, query through MCP, inspect SQL, and refine in the same workflow | Possible, but the interface is a dbt/metric workflow rather than an agent context workflow | Possible through code-first models and platform APIs, but changes are tied to runtime deployment and governance concerns | Possible, but agents must operate in Malloy's language and compiler model | @@ -105,15 +111,7 @@ The primary user is different. MetricFlow is centered on dbt-style metric defini | **Context around semantics** | Built in: wiki pages, scan artifacts, relationship inference, ingest transcripts, replay, and agent-facing MCP tools | Primarily metric and dbt project context | Descriptions and `meta.ai_context` inside the semantic model, plus platform agent features | Annotations/tags can carry metadata; surrounding context depends on the application | | **Best fit** | Agents maintaining analytics code, metrics, joins, SQL, docs, and semantic definitions | Teams standardizing metrics inside dbt workflows | Production semantic APIs, BI integrations, access control, caching, and concurrency | Expressive modeling and exploratory analysis above SQL | -**Agent-native by design.** KTX's advantage is not just that the files are YAML. The whole loop is shaped for agents: sources are small, overlays can add measures or computed columns without copying entire generated schemas, writes are permissive so an agent can save a draft, and validation/query tools give immediate feedback. An agent can move from "this metric is wrong" to "here is the semantic diff, generated SQL, and supporting context" without leaving the project. - -**A semantic layer plus the context to use it.** Traditional semantic layers define what to calculate. KTX also stores why the definition exists, where it came from, what schema evidence supports it, and what an agent did when it changed. A measure can live next to a knowledge page about exclusions, a scan artifact that proves the join path, and an ingest transcript that explains the source of the definition. That is the difference between giving an agent a metric catalog and giving it operational memory. - -**Fan-out handling is explicit and reviewable.** KTX asks model authors and agents to declare grain and relationship direction. The planner uses that metadata to avoid silent row multiplication: it detects `one_to_many` fan-out paths, separates independent fact measures into aggregate-locality CTEs, and refuses filters that would be unsafe to apply after pre-aggregation. Cube, MetricFlow, and Malloy all have strong approaches to this class of problem, but KTX's approach is deliberately inspectable in the files and in the generated plan. - -**Where other systems are stronger.** KTX draws a clear product boundary around agent-native context and semantic modeling. Cube is stronger when you need a production semantic API with access policies, pre-aggregations, refresh workers, and high-concurrency serving. MetricFlow is stronger when your primary workflow is dbt-native metric standardization. Malloy is stronger when you want a full analytical language with nested query shapes. KTX is strongest when the semantic layer is the substrate agents will read, edit, validate, and extend as part of day-to-day analytics engineering. - -**When KTX replaces your semantic layer vs. works beside it.** If you do not have a semantic layer, KTX can build an agent-native one from your database schema and enrich it with generated descriptions and knowledge pages. If you already use MetricFlow, LookML, Looker, Metabase, dbt, or Notion, KTX ingests from those tools and merges their context into KTX's files. You can keep your existing BI or metric-serving system while using KTX as the semantic and contextual surface agents work against. +If you do not have a semantic layer, KTX can build an agent-native one from your database schema and enrich it with generated descriptions and knowledge pages. If you already use MetricFlow or LookML, KTX ingests from those tools and merges their context into KTX's files. You can keep your existing BI or metric-serving system while using KTX as the semantic and contextual surface agents work against. ## The plain-files philosophy From 37150c0abc155fe40339db7fabba1a6ec194c261 Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Mon, 11 May 2026 23:32:13 -0700 Subject: [PATCH 07/33] docs: remove stale benchmarks section --- .../docs/benchmarks/link-detection.mdx | 163 ------------------ docs-site/content/docs/benchmarks/meta.json | 5 - docs-site/content/docs/meta.json | 1 - 3 files changed, 169 deletions(-) delete mode 100644 docs-site/content/docs/benchmarks/link-detection.mdx delete mode 100644 docs-site/content/docs/benchmarks/meta.json diff --git a/docs-site/content/docs/benchmarks/link-detection.mdx b/docs-site/content/docs/benchmarks/link-detection.mdx deleted file mode 100644 index feb2f3f1..00000000 --- a/docs-site/content/docs/benchmarks/link-detection.mdx +++ /dev/null @@ -1,163 +0,0 @@ ---- -title: Link Detection -description: How KTX's relationship detection performs on real-world schemas. ---- - -KTX infers foreign key relationships between tables even when the database declares no primary keys or foreign key constraints. This is critical for analytics warehouses, where constraints are rarely enforced. This page documents the methodology, scoring pipeline, and a reproducible benchmark you can run yourself. - -## Agent usage notes - -Use this page when an agent needs to explain, tune, or verify relationship detection. - -| Agent task | Relevant section | Command | -|------------|------------------|---------| -| Explain why KTX inferred a join | Detection pipeline | `ktx dev scan relationships --status all` | -| Decide whether to accept or reject a candidate | Scoring and threshold configuration | `ktx dev scan relationships --accept ` | -| Tune thresholds from reviewed decisions | Broader benchmark suite and calibration | `ktx dev scan relationship-thresholds --connection ` | -| Reproduce the bundled benchmark | Reproducing the benchmark | `pnpm run relationships:verify-orbit` | - -## What this measures - -Most analytics warehouses — Snowflake, BigQuery, Redshift — don't enforce referential integrity constraints. Tables like `fct_product_events` reference `dim_accounts` by convention (`account_id` → `id`), but nothing in the schema says so. - -KTX's relationship detection discovers these links automatically. The benchmark measures how accurately it recovers known foreign key relationships from a schema with **all declared constraints removed** — the hardest operating mode. - -Metrics tracked: - -- **Accepted** — relationships scored above the accept threshold (default 0.85) and written to the project manifest -- **Review** — relationships scored between the review threshold (0.55) and accept threshold, flagged for human review -- **Rejected** — relationships scored below the review threshold -- **Skipped** — relationships not evaluated (e.g., filtered by candidate limits) - -## Methodology - -### Detection pipeline - -Relationship detection runs as a multi-stage pipeline during `ktx dev scan`: - -1. **Candidate generation** — scans the schema for potential FK relationships using multiple heuristics: exact column name matches, normalized table name matching, name inflection (singular/plural), column suffix patterns (`_id`, `_key`, `_code`, `_uuid`), self-references (`parent_id`, `manager_id`), and optionally embedding similarity and LLM proposals. - -2. **Column profiling** — samples up to 10,000 rows per column (configurable via `profile_sample_rows`) to collect statistics: row counts, null rates, distinct value counts, uniqueness ratios, sample values, and text length ranges. - -3. **Validation** — tests each candidate relationship against actual data by measuring target uniqueness, source coverage, violation ratio, and value overlap between child and parent columns. - -4. **Scoring** — combines 7 weighted signals into a confidence score: - -| Signal | Weight | What it captures | -|--------|--------|-----------------| -| Name similarity | 0.24 | How closely column/table names match FK conventions | -| Value overlap | 0.22 | What percentage of FK values exist in the PK column | -| Profile uniqueness | 0.22 | How unique the target column values are | -| Type compatibility | 0.10 | Whether data types are compatible (hard gate — score is 0 if incompatible) | -| Embedding similarity | 0.10 | Semantic similarity between column names | -| Profile null rate | 0.08 | Presence of non-null values | -| Structural prior | 0.04 | Baseline structural hints from schema conventions | - -Each signal is normalized to \[0, 1\], multiplied by its weight, and summed. The final confidence is `0.56 + (weighted_sum × 0.65)`, clamped to \[0, 1\]. - -5. **Graph resolution** — resolves conflicts when multiple candidates target the same column, detects primary keys (by name pattern and validation), and classifies each relationship into `accepted`, `review`, or `rejected` based on thresholds. - -### Threshold configuration - -```yaml -scan: - relationships: - accept_threshold: 0.85 - review_threshold: 0.55 -``` - -Relationships scoring above `accept_threshold` are automatically accepted into the project manifest. Those between `review_threshold` and `accept_threshold` are flagged for analyst review. Below `review_threshold`, they're rejected. - -### Test fixture - -The benchmark uses the **Orbit-style product warehouse** — a synthetic schema modeled after a real SaaS analytics warehouse with all declared constraints removed. The fixture is a SQLite database with 6 tables: - -| Table | Role | Estimated rows | -|-------|------|---------------| -| `dim_accounts` | Dimension | 3 | -| `dim_users` | Dimension | 4 | -| `dim_workspaces` | Dimension | 4 | -| `fct_product_events` | Fact | 5 | -| `fct_invoices` | Fact | 3 | -| `support_tickets` | Fact | 4 | - -**Ground truth:** 6 primary keys (one `id` column per table) and 9 foreign key relationships, all `many_to_one`: - -| Source column | Target | -|--------------|--------| -| `dim_users.account_id` | `dim_accounts.id` | -| `dim_workspaces.account_id` | `dim_accounts.id` | -| `dim_workspaces.user_id` | `dim_users.id` | -| `fct_product_events.account_id` | `dim_accounts.id` | -| `fct_product_events.user_id` | `dim_users.id` | -| `fct_product_events.workspace_id` | `dim_workspaces.id` | -| `fct_invoices.account_id` | `dim_accounts.id` | -| `support_tickets.account_id` | `dim_accounts.id` | -| `support_tickets.user_id` | `dim_users.id` | - -The fixture runs in multiple modes to isolate the contribution of each pipeline stage: with LLM disabled, profiling disabled, validation disabled, and embeddings disabled. - -## Results - -Results for the default configuration will be added after the benchmark run is finalized. - -## Reproducing the benchmark - -### Prerequisites - -- Node.js 22+ -- pnpm -- The KTX repository cloned and dependencies installed (`pnpm install`) - -### Running - -From the repository root: - -```bash -pnpm run relationships:verify-orbit -``` - -This runs `ktx dev scan` against the bundled SQLite fixture with enrichment disabled, then generates a verification report at: - -```text -examples/orbit-relationship-verification/reports/orbit-verification.md -``` - -The report includes the full relationship summary, enrichment details, artifact paths, and any warnings. - -### Custom project - -To run verification against your own database (e.g., a local Orbit project): - -```bash -KTX_ORBIT_PROJECT_DIR=/path/to/your-project pnpm run relationships:verify-orbit -``` - -### Configuration - -The benchmark project configuration lives at `examples/orbit-relationship-verification/ktx.yaml`: - -```yaml -scan: - enrichment: - backend: none - relationships: - enabled: true - llm_proposals: false - accept_threshold: 0.85 - review_threshold: 0.55 - profile_sample_rows: 10000 - validation_concurrency: 4 -``` - -Adjust `accept_threshold` and `review_threshold` to see how threshold changes affect the accepted/review/rejected distribution. Lower thresholds accept more relationships (higher recall, lower precision); higher thresholds are more conservative. - -## Broader benchmark suite - -Beyond the Orbit fixture, KTX includes a full benchmark corpus at `packages/context/test/fixtures/relationship-benchmarks/` with fixtures across multiple tiers: - -- **Unit** — minimal schemas testing individual heuristics -- **Row-bearing** — small schemas with data for validation testing -- **Product** — full warehouse schemas like the Orbit fixture - -Fixtures from public datasets (Chinook, Sakila, AdventureWorks, Northwind) supplement the synthetic fixtures. The benchmark runner measures precision, recall, and F1 for both primary key and foreign key detection across all fixtures and modes. diff --git a/docs-site/content/docs/benchmarks/meta.json b/docs-site/content/docs/benchmarks/meta.json deleted file mode 100644 index b75b5aba..00000000 --- a/docs-site/content/docs/benchmarks/meta.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "title": "Benchmarks", - "defaultOpen": true, - "pages": ["link-detection"] -} diff --git a/docs-site/content/docs/meta.json b/docs-site/content/docs/meta.json index 93dc227f..6fbee965 100644 --- a/docs-site/content/docs/meta.json +++ b/docs-site/content/docs/meta.json @@ -6,7 +6,6 @@ "concepts", "guides", "integrations", - "benchmarks", "cli-reference", "ai-resources", "community" From f693adf637a1acd19f58adac9c61c88b1188b45b Mon Sep 17 00:00:00 2001 From: Luca Martial Date: Mon, 11 May 2026 23:32:16 -0700 Subject: [PATCH 08/33] feat(docs): polish introduction page --- docs-site/app/docs/[[...slug]]/page.tsx | 24 +++-- docs-site/app/global.css | 79 ++++++++++++++- docs-site/components/code-block.tsx | 20 +++- docs-site/components/docs-page-actions.tsx | 2 +- .../docs/getting-started/introduction.mdx | 97 +++++++++++-------- 5 files changed, 167 insertions(+), 55 deletions(-) diff --git a/docs-site/app/docs/[[...slug]]/page.tsx b/docs-site/app/docs/[[...slug]]/page.tsx index 869454ec..1e0c18ad 100644 --- a/docs-site/app/docs/[[...slug]]/page.tsx +++ b/docs-site/app/docs/[[...slug]]/page.tsx @@ -17,6 +17,10 @@ function isDocsIndex(slug: string[] | undefined) { return slug === undefined || slug.length === 0 || slug.join("/") === ""; } +function isHeroPage(slug: string[] | undefined) { + return slug?.join("/") === "getting-started/introduction"; +} + export default async function Page(props: { params: Promise<{ slug?: string[] }>; }) { @@ -30,14 +34,22 @@ export default async function Page(props: { const MDX = page.data.body; + const hero = isHeroPage(params.slug); + return ( - {page.data.title} - {page.data.description} - + {!hero && ( + <> +

+ {page.data.title} + +
+ {page.data.description} + + )} diff --git a/docs-site/app/global.css b/docs-site/app/global.css index b1e3a4e9..f3425e78 100644 --- a/docs-site/app/global.css +++ b/docs-site/app/global.css @@ -262,6 +262,74 @@ figure[data-rehype-pretty-code-figure]:has(.ktx-code) { color: #c8c3bc !important; } +/* ── Mode D: Output preview (wizard prompts, status output) ── */ +.ktx-code-output { + background: var(--color-fd-muted); + border: 1px solid var(--color-fd-border); + border-left: 3px solid color-mix(in oklch, var(--color-fd-primary) 50%, var(--color-fd-border)); + position: relative; + box-shadow: 0 1px 2px rgba(27, 27, 24, 0.02); +} + +.dark .ktx-code-output { + background: #111a1e; + border-color: rgba(255, 255, 255, 0.05); + border-left-color: rgba(34, 211, 238, 0.25); +} + +.ktx-code-output:hover { + border-color: color-mix(in oklch, var(--color-fd-primary) 25%, var(--color-fd-border)); + border-left-color: var(--color-fd-primary); +} + +.dark .ktx-code-output:hover { + border-color: rgba(255, 255, 255, 0.08); + border-left-color: rgba(34, 211, 238, 0.45); +} + +.ktx-code-output-label { + position: absolute; + top: 8px; + right: 14px; + font-size: 10px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--color-fd-muted-foreground); + font-family: var(--font-display), var(--font-sans), sans-serif; + opacity: 0.4; + pointer-events: none; + z-index: 1; +} + +.ktx-code-output-copy { + position: absolute !important; + top: 6px !important; + right: 6px !important; + opacity: 0; + transform: translateY(-4px); + transition: opacity 0.2s var(--ktx-ease), transform 0.2s var(--ktx-ease); + z-index: 2; +} + +.ktx-code-output:hover .ktx-code-output-copy { + opacity: 0.5; + transform: translateY(0); +} + +.ktx-code-output:hover .ktx-code-output-label { + opacity: 0; +} + +.ktx-code-body-output { + background: transparent !important; + color: var(--ktx-ink-soft) !important; +} + +.dark .ktx-code-body-output { + color: #8a9da6 !important; +} + /* ── Mode B: VS Code tab (filename) ───────── */ .ktx-code-tab { background: var(--color-fd-card); @@ -495,14 +563,20 @@ th { opacity: 0.7; } +/* Hide the vertical indicator lines in sidebar sections */ +#nd-sidebar div[data-state]::before, +#nd-sidebar a[data-active]::before { + content: none !important; + display: none !important; +} + /* Page link items */ #nd-sidebar a[data-active] { font-size: 14px; padding: 6px 12px; border-radius: 6px; margin-left: 0; - border-left: 2px solid transparent; - transition: background 0.15s ease, color 0.15s ease, border-color 0.15s ease; + transition: background 0.15s ease, color 0.15s ease; } #nd-sidebar a[data-active="false"]:hover { @@ -512,7 +586,6 @@ th { #nd-sidebar a[data-active="true"] { background: color-mix(in oklch, var(--color-fd-primary) 8%, transparent) !important; - border-left-color: var(--color-fd-primary) !important; color: var(--color-fd-primary) !important; font-weight: 500; } diff --git a/docs-site/components/code-block.tsx b/docs-site/components/code-block.tsx index d8045a5c..15ae5ce7 100644 --- a/docs-site/components/code-block.tsx +++ b/docs-site/components/code-block.tsx @@ -52,12 +52,11 @@ export function CodeBlock(props: Props) { const language = detectLanguage(props, children); const codeText = extractText(children); - const isTerminal = - (language !== null && TERMINAL_LANGS.has(language)) || - WIZARD_GLYPHS.test(codeText); + const isTerminal = language !== null && TERMINAL_LANGS.has(language); + const isOutput = !isTerminal && WIZARD_GLYPHS.test(codeText); const hasTitle = typeof title === "string" && title.length > 0; - // Mode A — Terminal + // Mode A — Terminal (commands the user types) if (isTerminal) { return (
@@ -80,6 +79,19 @@ export function CodeBlock(props: Props) { ); } + // Mode D — Output preview (wizard prompts, terminal output) + if (isOutput) { + return ( +
+ output + +
+          {children}
+        
+
+ ); + } + // Mode B — VS Code tab (filename present) if (hasTitle) { return ( diff --git a/docs-site/components/docs-page-actions.tsx b/docs-site/components/docs-page-actions.tsx index dd69cea3..95bf93a4 100644 --- a/docs-site/components/docs-page-actions.tsx +++ b/docs-site/components/docs-page-actions.tsx @@ -11,7 +11,7 @@ type Props = { export function DocsPageActions({ markdownUrl, mdxSource }: Props) { return ( -
+ ## Who KTX is for -KTX is built for analytics engineers and data teams who want data agents to work on real analytics systems, not just generate one-off SQL. +KTX is built for analytics engineers and data teams who want data agents to +work on real analytics systems — not just generate one-off SQL. Use KTX when you want agents to: -- Generate SQL from approved measures, dimensions, and joins -- Repair or extend semantic definitions through reviewable git diffs -- Explain where a metric definition came from and what business rules shape it -- Use warehouse scans and relationship evidence instead of guessing join paths -- Work alongside **dbt**, **LookML**, **MetricFlow**, **Looker**, **Metabase**, **Notion**, and BI platforms -- Work with warehouses like **PostgreSQL**, **Snowflake**, **BigQuery**, **ClickHouse**, **MySQL**, or **SQL Server** +- **Generate SQL** from approved measures and joins +- **Repair semantic definitions** through reviewable diffs +- **Explain metric provenance** with warehouse evidence +- **Work alongside** dbt, LookML, MetricFlow, Looker, Metabase, and modern BI platforms -If you've ever watched an agent confidently generate a query that joins on the wrong key or invents a metric that doesn't exist, KTX is the fix. +Works with PostgreSQL, Snowflake, BigQuery, ClickHouse, MySQL, and SQL Server. -## What KTX gives agents - -- **A semantic layer they can edit** — plain YAML sources with measures, dimensions, joins, grain, segments, filters, and computed columns -- **Safe query planning** — grain-aware SQL generation, fan-out detection, chasm-trap handling, and dialect transpilation -- **Business context** — Markdown knowledge pages for definitions, rules, exceptions, and data quality notes -- **Schema evidence** — warehouse scans with table metadata, column stats, constraints, and inferred relationships -- **Provenance** — ingest transcripts and replay metadata that explain where context came from and why it changed -- **An agent-facing API** — MCP and CLI tools for reading, writing, validating, searching, and querying context - -## How these docs are organized +## Explore the docs Set up KTX and build your first context in under 10 minutes. - - Machine-readable docs and prompt recipes for coding assistants. - - Understand what a context layer is, why agents need one, and how KTX compares to other semantic layers. + Understand what a context layer is and why agents need one. - Hands-on workflows for scanning, ingesting, writing semantic sources, and serving agents. - - - Setup details for every supported database, context source, and agent client. + Hands-on workflows for scanning, ingesting, writing, and serving. - Exhaustive flag and subcommand reference for every KTX command. + Complete flag and subcommand reference for every KTX command. -## Next steps - -- **Get hands-on** — follow the [Quickstart](/docs/getting-started/quickstart) to set up KTX with your own database in under 10 minutes. -- **Help a coding agent use the docs** — start with [AI Resources](/docs/ai-resources) or fetch [`/llms.txt`](/llms.txt). -- **Understand the theory** — read [The Context Layer](/docs/concepts/the-context-layer) to learn why schema access alone breaks on real analytics and how KTX addresses it. - ## Agent usage notes -Use this page as the high-level routing document for KTX docs. - | Agent task | Read next | |------------|-----------| | Discover machine-readable docs | [AI Resources](/docs/ai-resources) | From 106ce161ee419a091696eaf50edf61ce942c8136 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 10:25:54 +0200 Subject: [PATCH 09/33] fix(cli): support Metabase connection tests (#21) Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> --- packages/cli/src/connection.test.ts | 58 ++++++++++++++++++++++++- packages/cli/src/connection.ts | 67 +++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/connection.test.ts b/packages/cli/src/connection.test.ts index a54280be..ae593805 100644 --- a/packages/cli/src/connection.test.ts +++ b/packages/cli/src/connection.test.ts @@ -1,7 +1,8 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { initKtxProject, parseKtxProjectConfig } from '@ktx/context/project'; +import type { MetabaseRuntimeClient } from '@ktx/context/ingest'; +import { initKtxProject, parseKtxProjectConfig, serializeKtxProjectConfig } from '@ktx/context/project'; import type { KtxConnectionDriver, KtxScanConnector, KtxSchemaSnapshot } from '@ktx/context/scan'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxConnection } from './connection.js'; @@ -598,6 +599,61 @@ describe('runKtxConnection', () => { expect(io.stdout()).toContain('Tables: 2'); }); + it('tests a configured Metabase connection through the Metabase runtime client', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + const projectConfig = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + await writeFile( + join(projectDir, 'ktx.yaml'), + serializeKtxProjectConfig({ + ...projectConfig, + connections: { + ...projectConfig.connections, + prod_metabase: { + driver: 'metabase', + api_url: 'http://metabase.example.test', + api_key: 'mb_test', + }, + }, + }), + 'utf-8', + ); + const testConnection = vi.fn(async () => ({ success: true as const })); + const getDatabases = vi.fn(async () => [ + { id: 1, name: 'Analytics', engine: 'postgres', details: {}, is_sample: false }, + { id: 2, name: 'Sample Database', engine: 'h2', details: {}, is_sample: true }, + ]); + const cleanup = vi.fn(async () => undefined); + const createMetabaseClient = vi.fn( + async (): Promise> => ({ + testConnection, + getDatabases, + cleanup, + }), + ); + const createScanConnector = vi.fn(async () => { + throw new Error('native scanner should not be used for Metabase'); + }); + const io = makeIo(); + + await expect( + runKtxConnection({ command: 'test', projectDir, connectionId: 'prod_metabase' }, io.io, { + createScanConnector, + createMetabaseClient, + }), + ).resolves.toBe(0); + + expect(createScanConnector).not.toHaveBeenCalled(); + expect(createMetabaseClient).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), 'prod_metabase'); + expect(testConnection).toHaveBeenCalledTimes(1); + expect(getDatabases).toHaveBeenCalledTimes(1); + expect(cleanup).toHaveBeenCalledTimes(1); + expect(io.stdout()).toContain('Connection test passed: prod_metabase'); + expect(io.stdout()).toContain('Driver: metabase'); + expect(io.stdout()).toContain('Databases: 1'); + expect(io.stderr()).toBe(''); + }); + it('cleans up the native scan connector when connection testing fails', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir, projectName: 'warehouse' }); diff --git a/packages/cli/src/connection.ts b/packages/cli/src/connection.ts index aa6de7c2..1dde60ac 100644 --- a/packages/cli/src/connection.ts +++ b/packages/cli/src/connection.ts @@ -1,4 +1,10 @@ import { cancel, confirm, isCancel } from '@clack/prompts'; +import { + DEFAULT_METABASE_CLIENT_CONFIG, + DefaultMetabaseConnectionClientFactory, + type MetabaseRuntimeClient, + metabaseRuntimeConfigFromLocalConnection, +} from '@ktx/context/ingest'; import { type KtxLocalProject, loadKtxProject, serializeKtxProjectConfig } from '@ktx/context/project'; import type { KtxScanConnector } from '@ktx/context/scan'; import type { KtxConnectionMappingArgs } from './commands/connection-mapping.js'; @@ -61,6 +67,7 @@ interface KtxConnectionIo extends KtxCliIo { interface KtxConnectionDeps { createScanConnector?: typeof createKtxCliScanConnector; + createMetabaseClient?: typeof createDefaultMetabaseClient; runMapping?: (argv: string[], io: KtxCliIo) => Promise; prompts?: KtxConnectionPromptAdapter; } @@ -104,6 +111,12 @@ async function cleanupConnector(connector: KtxScanConnector | null): Promise> { + const factory = new DefaultMetabaseConnectionClientFactory( + (metabaseConnectionId) => + metabaseRuntimeConfigFromLocalConnection( + metabaseConnectionId, + project.config.connections[metabaseConnectionId], + ), + DEFAULT_METABASE_CLIENT_CONFIG, + ); + return factory.createClient(connectionId); +} + +async function testMetabaseConnection( + project: KtxLocalProject, + connectionId: string, + createMetabaseClient: typeof createDefaultMetabaseClient, +): Promise<{ driver: 'metabase'; databaseCount: number }> { + let client: Pick | null = null; + try { + client = await createMetabaseClient(project, connectionId); + const testResult = await client.testConnection(); + if (!testResult.success) { + throw new Error( + `Metabase connection test failed: ${testResult.error ?? testResult.message ?? 'unknown error'}`, + ); + } + + const databases = await client.getDatabases(); + const databaseCount = databases.filter((database) => database.is_sample !== true).length; + if (databaseCount === 0) { + throw new Error('Metabase auth worked but no usable databases were returned'); + } + + return { driver: 'metabase', databaseCount }; + } finally { + await client?.cleanup(); + } +} + interface BufferedIo extends KtxCliIo { stdoutText(): string; stderrText(): string; @@ -399,6 +454,18 @@ export async function runKtxConnection( return 0; } + if (normalizedConnectionDriver(project, args.connectionId) === 'metabase') { + const result = await testMetabaseConnection( + project, + args.connectionId, + deps.createMetabaseClient ?? createDefaultMetabaseClient, + ); + io.stdout.write(`Connection test passed: ${args.connectionId}\n`); + io.stdout.write(`Driver: ${result.driver}\n`); + io.stdout.write(`Databases: ${result.databaseCount}\n`); + return 0; + } + const result = await testNativeConnection( project, args.connectionId, From 9d3b1015cc4db15c6cd76ef915ca3499ce4a623d Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 10:25:56 +0200 Subject: [PATCH 10/33] fix: allow dbt ingest to discover warehouse schemas (#20) Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> --- .../src/sl/semantic-layer.service.test.ts | 25 ++++++ .../context/src/sl/semantic-layer.service.ts | 8 +- .../src/sl/tools/sl-discover.tool.test.ts | 80 +++++++++++++++++++ .../context/src/sl/tools/sl-discover.tool.ts | 20 +++-- 4 files changed, 122 insertions(+), 11 deletions(-) create mode 100644 packages/context/src/sl/tools/sl-discover.tool.test.ts diff --git a/packages/context/src/sl/semantic-layer.service.test.ts b/packages/context/src/sl/semantic-layer.service.test.ts index 3adde085..c89cbc28 100644 --- a/packages/context/src/sl/semantic-layer.service.test.ts +++ b/packages/context/src/sl/semantic-layer.service.test.ts @@ -38,6 +38,31 @@ const baseTable: SemanticLayerSource = { measures: [], }; +describe('listConnectionIdsWithNames', () => { + it('discovers local KTX connection ids from semantic-layer directories', async () => { + const configService = { + listFiles: vi.fn().mockResolvedValue({ + files: [ + 'semantic-layer/warehouse/_schema/public.yaml', + 'semantic-layer/dbt-main/orders.yaml', + 'semantic-layer/.gitkeep', + ], + }), + }; + const catalog = connectionCatalog(); + catalog.listEnabledConnections.mockImplementation(async (ids: string[]) => + ids.map((id) => ({ id, name: id, connectionType: id === 'warehouse' ? 'postgres' : 'dbt' })), + ); + const service = new SemanticLayerService(configService as never, catalog, pythonPort); + + await expect(service.listConnectionIdsWithNames()).resolves.toEqual([ + { id: 'dbt-main', name: 'dbt-main', connectionType: 'dbt' }, + { id: 'warehouse', name: 'warehouse', connectionType: 'postgres' }, + ]); + expect(catalog.listEnabledConnections).toHaveBeenCalledWith(['dbt-main', 'warehouse']); + }); +}); + describe('composeOverlay', () => { it('carries top-level segments from overlay into the composed source', () => { const overlay = { diff --git a/packages/context/src/sl/semantic-layer.service.ts b/packages/context/src/sl/semantic-layer.service.ts index ffae0b12..938763fe 100644 --- a/packages/context/src/sl/semantic-layer.service.ts +++ b/packages/context/src/sl/semantic-layer.service.ts @@ -12,6 +12,7 @@ interface WriteSourceOptions { } const SL_DIR_PREFIX = 'semantic-layer'; +const CONNECTION_ID_PATTERN = /^[a-zA-Z0-9][a-zA-Z0-9_-]*$/; function formatPortError(error: unknown, fallback: string): string { if (typeof error === 'string') { @@ -61,11 +62,12 @@ export class SemanticLayerService { async listConnectionIds(): Promise { try { const result = await this.configService.listFiles(SL_DIR_PREFIX); - // Directories under semantic-layer/ are connectionIds (UUIDs) - const uuidPattern = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; + // Directories under semantic-layer/ are connectionIds. Local KTX projects use + // readable ids like "warehouse" and "dbt-main", not only UUIDs. return result.files .map((f) => f.replace(`${SL_DIR_PREFIX}/`, '').split('/')[0]) - .filter((name, i, arr) => uuidPattern.test(name) && arr.indexOf(name) === i); + .filter((name, i, arr) => CONNECTION_ID_PATTERN.test(name) && arr.indexOf(name) === i) + .sort(); } catch { return []; } diff --git a/packages/context/src/sl/tools/sl-discover.tool.test.ts b/packages/context/src/sl/tools/sl-discover.tool.test.ts new file mode 100644 index 00000000..3277d45d --- /dev/null +++ b/packages/context/src/sl/tools/sl-discover.tool.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { ToolContext, ToolSession } from '../../tools/index.js'; +import { createTouchedSlSources } from '../../tools/index.js'; +import type { SemanticLayerSource } from '../types.js'; +import { SlDiscoverTool } from './sl-discover.tool.js'; + +function makeTool() { + const semanticLayerService = { + listConnectionIdsWithNames: vi.fn(async () => [] as Array<{ id: string; name: string; connectionType: string }>), + loadAllSources: vi.fn(async () => [] as SemanticLayerSource[]), + }; + const slSearchService = { + search: vi.fn(async () => []), + }; + const tool = new SlDiscoverTool( + { + semanticLayerService: semanticLayerService as never, + slSearchService: slSearchService as never, + authorResolver: { resolve: vi.fn() }, + }, + { maxSources: 25, minRrfScore: 0, maxDetailedSources: 5 }, + ); + return { tool, semanticLayerService, slSearchService }; +} + +function makeContext(overrides: Partial = {}): ToolContext { + return { + sourceId: 'src', + messageId: 'msg', + userId: 'user', + ...overrides, + }; +} + +function makeSession(semanticLayerService: Record): ToolSession { + return { + connectionId: 'dbt-main', + isWorktreeScoped: true, + preHead: 'base', + touchedSlSources: createTouchedSlSources(), + actions: [], + semanticLayerService: semanticLayerService as never, + wikiService: {} as never, + configService: {} as never, + gitService: {} as never, + }; +} + +describe('SlDiscoverTool - session-scoped reads', () => { + it('discovers sources through context.session.semanticLayerService when a session is present', async () => { + const { tool, semanticLayerService } = makeTool(); + const sessionSemanticLayerService = { + listConnectionIdsWithNames: vi.fn().mockResolvedValue([ + { id: 'warehouse', name: 'warehouse', connectionType: 'postgres' }, + ]), + loadAllSources: vi.fn().mockResolvedValue([ + { + name: 'orders', + table: 'public.orders', + grain: ['order_id'], + columns: [{ name: 'order_id', type: 'string' }], + measures: [], + joins: [], + }, + ]), + }; + + const result = await tool.call({}, makeContext({ session: makeSession(sessionSemanticLayerService) })); + + expect(result.structured.totalSources).toBe(1); + expect(result.structured.sources[0]).toMatchObject({ + connectionId: 'warehouse', + name: 'orders', + columnCount: 1, + }); + expect(sessionSemanticLayerService.listConnectionIdsWithNames).toHaveBeenCalled(); + expect(sessionSemanticLayerService.loadAllSources).toHaveBeenCalledWith('warehouse'); + expect(semanticLayerService.listConnectionIdsWithNames).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/context/src/sl/tools/sl-discover.tool.ts b/packages/context/src/sl/tools/sl-discover.tool.ts index ed7c1854..97426b40 100644 --- a/packages/context/src/sl/tools/sl-discover.tool.ts +++ b/packages/context/src/sl/tools/sl-discover.tool.ts @@ -1,5 +1,6 @@ import { z } from 'zod'; import { DEFAULT_PRIORITY, resolveDescription } from '../descriptions.js'; +import type { SemanticLayerService } from '../semantic-layer.service.js'; import type { SemanticLayerSource } from '../types.js'; import type { ToolContext, ToolOutput } from '../../tools/index.js'; import { BaseSemanticLayerTool, type BaseSemanticLayerToolDeps } from './base-semantic-layer.tool.js'; @@ -66,13 +67,14 @@ Use this to understand what data is available before writing a semantic_query. return slDiscoverInputSchema; } - async call(input: SlDiscoverInput, _context: ToolContext): Promise> { + async call(input: SlDiscoverInput, context: ToolContext): Promise> { const { query, sourceName } = input; + const semanticLayerService = context.session?.semanticLayerService ?? this.semanticLayerService; // Resolve connectionId: use provided value, or auto-detect let connectionId = input.connectionId; if (!connectionId) { - const connections = await this.semanticLayerService.listConnectionIdsWithNames(); + const connections = await semanticLayerService.listConnectionIdsWithNames(); if (connections.length === 0) { return { markdown: 'No semantic layer sources found. Run a schema scan first.', @@ -92,14 +94,14 @@ Use this to understand what data is available before writing a semantic_query. structured: { sources: [], totalSources: 0 }, }; } - return this.discoverAcrossConnections(connections, query); + return this.discoverAcrossConnections(semanticLayerService, connections, query); } } // If inspecting a specific source — show the SL interface (columns, measures, joins) // without the raw SQL. Use `sl_read_source` to see the full YAML including SQL. if (sourceName) { - const sources = await this.semanticLayerService.loadAllSources(connectionId); + const sources = await semanticLayerService.loadAllSources(connectionId); const source = sources.find((s) => s.name === sourceName); if (!source) { return { @@ -136,19 +138,20 @@ Use this to understand what data is available before writing a semantic_query. } // Single connection: list all sources - const connections = await this.semanticLayerService.listConnectionIdsWithNames(); + const connections = await semanticLayerService.listConnectionIdsWithNames(); const connInfo = connections.find((c) => c.id === connectionId); - return this.discoverForConnection(connectionId, connInfo?.name ?? connectionId, query); + return this.discoverForConnection(semanticLayerService, connectionId, connInfo?.name ?? connectionId, query); } private async discoverAcrossConnections( + semanticLayerService: SemanticLayerService, connections: Array<{ id: string; name: string; connectionType: string }>, query?: string, ): Promise> { // Load sources from all connections in parallel const results = await Promise.all( connections.map(async (conn) => { - const sources = await this.semanticLayerService.loadAllSources(conn.id); + const sources = await semanticLayerService.loadAllSources(conn.id); let filtered = sources; if (query) { filtered = await this.filterByQuery(conn.id, sources, query); @@ -205,11 +208,12 @@ Use this to understand what data is available before writing a semantic_query. } private async discoverForConnection( + semanticLayerService: SemanticLayerService, connectionId: string, connectionName: string, query?: string, ): Promise> { - const sources = await this.semanticLayerService.loadAllSources(connectionId); + const sources = await semanticLayerService.loadAllSources(connectionId); if (sources.length === 0) { return { From df2eeaa96cbf9c040953a1f0362b8cd906c72095 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 10:25:58 +0200 Subject: [PATCH 11/33] fix(cli): surface historic sql ingest progress (#18) Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> --- packages/cli/src/ingest.test.ts | 91 ++++++++++++++++++++++++ packages/cli/src/ingest.ts | 56 ++++++++++++--- packages/cli/src/setup-databases.test.ts | 1 + packages/cli/src/setup-databases.ts | 24 +++++-- 4 files changed, 160 insertions(+), 12 deletions(-) diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 9fc4dc82..3b580cc1 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -918,6 +918,97 @@ describe('runKtxIngest', () => { expect(io.stderr()).toBe(''); }); + it('prints plain WorkUnit step progress during long-running local ingest', async () => { + const projectDir = join(tempDir, 'historic-sql-step-progress-project'); + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'project: historic-sql-step-progress-project', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_DATABASE_URL', + ' historicSql:', + ' enabled: true', + ' dialect: postgres', + ' minExecutions: 2', + 'ingest:', + ' adapters:', + ' - historic-sql', + '', + ].join('\n'), + 'utf-8', + ); + const createdAdapters: SourceAdapter[] = [ + { source: 'historic-sql', skillNames: [], detect: async () => true, chunk: async () => ({ workUnits: [] }) }, + ]; + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => { + input.memoryFlow?.update({ + plannedWorkUnits: [ + { + unitKey: 'historic-sql-table-public-orders', + rawFiles: ['tables/public/orders.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + { + unitKey: 'historic-sql-table-public-customers', + rawFiles: ['tables/public/customers.json'], + peerFileCount: 0, + dependencyCount: 0, + }, + ], + }); + input.memoryFlow?.emit({ type: 'chunks_planned', chunkCount: 2, workUnitCount: 2, evictionCount: 0 }); + input.memoryFlow?.emit({ + type: 'work_unit_started', + unitKey: 'historic-sql-table-public-orders', + skills: ['historic_sql_table_digest'], + stepBudget: 40, + }); + input.memoryFlow?.emit({ + type: 'work_unit_step', + unitKey: 'historic-sql-table-public-orders', + stepIndex: 7, + stepBudget: 40, + }); + input.memoryFlow?.emit({ + type: 'work_unit_finished', + unitKey: 'historic-sql-table-public-orders', + status: 'success', + }); + input.memoryFlow?.finish('done'); + return completedLocalBundleRun(input, input.jobId ?? 'historic-step-progress-job'); + }); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'historic-sql', + outputMode: 'plain', + }, + io.io, + { + env: interactiveEnv(), + createAdapters: vi.fn(() => createdAdapters as never), + runLocalIngest: runLocal, + jobIdFactory: () => 'historic-step-progress-job', + }, + ), + ).resolves.toBe(0); + + const stdout = io.stdout(); + expect(stdout).toContain('[45%] Planned 2 work units'); + expect(stdout).toContain('[55%] Processing 1/2 work units: historic-sql-table-public-orders'); + expect(stdout).toContain('[58%] Processing 1/2 work units: historic-sql-table-public-orders step 7/40'); + expect(stdout).toContain('[68%] Processed 1/2 work units'); + }); + it('passes local Looker pull-config options and agent runner into scheduled ingest for Looker scheduled ingest', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index a580b3d5..39bf21bb 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -168,13 +168,37 @@ function formatDiffProgress(event: Extract event.type === 'work_unit_finished').length; +function workUnitEventsThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): MemoryFlowEvent[] { + return snapshot.events.slice(0, eventIndex + 1); +} + +function completedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number { + return workUnitEventsThrough(snapshot, eventIndex).filter((event) => event.type === 'work_unit_finished').length; +} + +function plannedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number { + if (snapshot.plannedWorkUnits.length > 0) { + return snapshot.plannedWorkUnits.length; + } + const planEvent = workUnitEventsThrough(snapshot, eventIndex) + .filter((event) => event.type === 'chunks_planned') + .at(-1); + return planEvent?.workUnitCount ?? completedWorkUnitCountThrough(snapshot, eventIndex); +} + +function workUnitOrdinalThrough(snapshot: MemoryFlowReplayInput, eventIndex: number, unitKey: string): number { + const events = workUnitEventsThrough(snapshot, eventIndex); + const startedIndex = events.findIndex((event) => event.type === 'work_unit_started' && event.unitKey === unitKey); + if (startedIndex === -1) { + return completedWorkUnitCountThrough(snapshot, eventIndex) + 1; + } + return events.slice(0, startedIndex + 1).filter((event) => event.type === 'work_unit_started').length; } function plainIngestEventProgress( event: MemoryFlowEvent, snapshot: MemoryFlowReplayInput, + eventIndex: number, ): { percent: number; message: string } | null { switch (event.type) { case 'source_acquired': @@ -196,11 +220,27 @@ function plainIngestEventProgress( }; case 'stage_skipped': return { percent: 45, message: `Skipped ${event.stage}: ${event.reason}` }; - case 'work_unit_started': - return { percent: 55, message: `Processing ${event.unitKey}` }; + case 'work_unit_started': { + const total = plannedWorkUnitCountThrough(snapshot, eventIndex); + const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey); + const progress = total > 0 ? `${ordinal}/${total} work units: ` : ''; + return { percent: 55, message: `Processing ${progress}${event.unitKey}` }; + } + case 'work_unit_step': { + const total = plannedWorkUnitCountThrough(snapshot, eventIndex); + const completed = completedWorkUnitCountThrough(snapshot, eventIndex); + const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey); + const stepFraction = event.stepBudget > 0 ? Math.min(1, event.stepIndex / event.stepBudget) : 0; + const percent = total > 0 ? 55 + Math.ceil(((completed + stepFraction) / total) * 25) : 55; + const progress = total > 0 ? `${ordinal}/${total} work units: ` : ''; + return { + percent, + message: `Processing ${progress}${event.unitKey} step ${event.stepIndex}/${event.stepBudget}`, + }; + } case 'work_unit_finished': { - const total = snapshot.plannedWorkUnits.length || completedWorkUnitCount(snapshot); - const completed = completedWorkUnitCount(snapshot); + const total = plannedWorkUnitCountThrough(snapshot, eventIndex); + const completed = completedWorkUnitCountThrough(snapshot, eventIndex); const percent = total > 0 ? 55 + Math.round((completed / total) * 25) : 80; return { percent, @@ -225,7 +265,6 @@ function plainIngestEventProgress( case 'report_created': return { percent: 98, message: `Created ingest report ${event.reportPath ?? event.runId}` }; case 'scope_detected': - case 'work_unit_step': case 'candidate_action': return null; } @@ -259,11 +298,12 @@ function createPlainIngestProgressRenderer( }, update(snapshot) { while (printedEvents < snapshot.events.length) { + const eventIndex = printedEvents; const event = snapshot.events[printedEvents++]; if (!event) { continue; } - const progress = plainIngestEventProgress(event, snapshot); + const progress = plainIngestEventProgress(event, snapshot, eventIndex); if (progress) { write(progress.percent, progress.message); } diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 09b9d29f..a20df910 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -1295,6 +1295,7 @@ describe('setup databases step', () => { expect(config.connections.warehouse.historicSql).not.toHaveProperty('redactionPatterns'); expect(config.connections.warehouse.historicSql).not.toHaveProperty(legacyHistoricSqlServiceAccountPatternsKey); expect(config.ingest.adapters).toContain('historic-sql'); + expect(config.ingest.workUnits.maxConcurrency).toBe(6); expect(io.stdout()).toContain('Historic SQL probe...'); expect(io.stdout()).toContain('pg_stat_statements ready'); }); diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index bd554590..3d49f75b 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -14,6 +14,8 @@ import { runKtxScan } from './scan.js'; import { withSetupInterruptConfirmation } from './setup-interrupt.js'; import { writeProjectLocalSecretReference } from './setup-secrets.js'; +const HISTORIC_SQL_WORK_UNIT_MAX_CONCURRENCY = 6; + export type KtxSetupDatabaseDriver = | 'sqlite' | 'postgres' @@ -843,7 +845,7 @@ async function writeConnectionConfig(input: { ? (input.connection.historicSql as Record) : null; if (historicSql?.enabled === true) { - await ensureHistoricSqlAdapterEnabled(input.projectDir); + await ensureHistoricSqlIngestDefaults(input.projectDir); } } @@ -954,9 +956,19 @@ async function maybeConfigurePostgresSchemas(input: { return true; } -async function ensureHistoricSqlAdapterEnabled(projectDir: string): Promise { +async function ensureHistoricSqlIngestDefaults(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); - if (project.config.ingest.adapters.includes('historic-sql')) { + const adapters = project.config.ingest.adapters.includes('historic-sql') + ? project.config.ingest.adapters + : [...project.config.ingest.adapters, 'historic-sql']; + const maxConcurrency = Math.max( + project.config.ingest.workUnits.maxConcurrency, + HISTORIC_SQL_WORK_UNIT_MAX_CONCURRENCY, + ); + if ( + adapters === project.config.ingest.adapters && + maxConcurrency === project.config.ingest.workUnits.maxConcurrency + ) { return; } await writeFile( @@ -965,7 +977,11 @@ async function ensureHistoricSqlAdapterEnabled(projectDir: string): Promise Date: Tue, 12 May 2026 10:26:01 +0200 Subject: [PATCH 12/33] fix(cli): retry daemon health before startup failure (#17) Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> --- .../cli/src/managed-python-daemon.test.ts | 35 +++++++++++++++++++ packages/cli/src/managed-python-daemon.ts | 9 +++++ 2 files changed, 44 insertions(+) diff --git a/packages/cli/src/managed-python-daemon.test.ts b/packages/cli/src/managed-python-daemon.test.ts index 24df2a78..4e7af22c 100644 --- a/packages/cli/src/managed-python-daemon.test.ts +++ b/packages/cli/src/managed-python-daemon.test.ts @@ -170,6 +170,41 @@ describe('managed Python daemon lifecycle', () => { }); }); + it('makes a final health probe before reporting startup failure', async () => { + const spawnDaemon = makeSpawn(5556); + const installRuntime = vi.fn(async () => installResult(tempDir)); + const fetch = vi + .fn() + .mockRejectedValueOnce(new Error('fetch failed')) + .mockResolvedValueOnce({ + ok: true, + status: 200, + json: async () => ({ status: 'healthy', version: '0.2.0' }), + text: async () => '', + }); + + const result = await startManagedPythonDaemon({ + cliVersion: '0.2.0', + runtimeRoot: join(tempDir, 'runtime'), + features: ['core'], + installRuntime, + spawnDaemon, + fetch, + allocatePort: vi.fn(async () => 61234), + now: () => new Date('2026-05-11T00:00:00.000Z'), + startupTimeoutMs: 5, + pollIntervalMs: 20, + }); + + expect(result.status).toBe('started'); + expect(fetch).toHaveBeenCalledTimes(2); + expect(JSON.parse(await readFile(layout(tempDir).daemonStatePath, 'utf8'))).toMatchObject({ + pid: 5556, + port: 61234, + version: '0.2.0', + }); + }); + it('reuses a healthy daemon with the requested feature set', async () => { await mkdir(layout(tempDir).versionDir, { recursive: true }); await writeFile(layout(tempDir).daemonStatePath, `${JSON.stringify(runningState(tempDir), null, 2)}\n`); diff --git a/packages/cli/src/managed-python-daemon.ts b/packages/cli/src/managed-python-daemon.ts index 4b128c63..2caf9182 100644 --- a/packages/cli/src/managed-python-daemon.ts +++ b/packages/cli/src/managed-python-daemon.ts @@ -273,6 +273,15 @@ async function waitForHealth(input: { lastDetail = health.detail; await delay(input.pollIntervalMs); } + const finalHealth = await healthOk({ + state: input.state, + cliVersion: input.cliVersion, + fetch: input.fetch, + }); + if (finalHealth.ok) { + return; + } + lastDetail = finalHealth.detail; throw new Error(`KTX Python daemon failed to start: ${lastDetail}. stderr: ${input.state.stderrLog}`); } From ff3e0edce32fab007b5a8d326d9017e3ed106ea8 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 10:26:05 +0200 Subject: [PATCH 13/33] fix(cli): harden managed runtime install (#16) Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> --- .../cli/src/managed-python-runtime.test.ts | 32 +++++++++++++ packages/cli/src/managed-python-runtime.ts | 46 ++++++++++++++++--- 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/packages/cli/src/managed-python-runtime.test.ts b/packages/cli/src/managed-python-runtime.test.ts index d100e409..fa14f480 100644 --- a/packages/cli/src/managed-python-runtime.test.ts +++ b/packages/cli/src/managed-python-runtime.test.ts @@ -161,6 +161,14 @@ describe('verifyRuntimeAsset', () => { await expect(verifyRuntimeAsset({ assetDir })).rejects.toThrow(/Unsafe runtime wheel filename/); }); + + it('reports the source-checkout artifact command when the bundled manifest is missing', async () => { + const assetDir = join(tempDir, 'packages', 'cli', 'assets', 'python'); + + await expect(verifyRuntimeAsset({ assetDir })).rejects.toThrow( + /Missing bundled Python runtime manifest.*pnpm run artifacts:build/s, + ); + }); }); describe('installManagedPythonRuntime', () => { @@ -210,6 +218,30 @@ describe('installManagedPythonRuntime', () => { expect(manifest.python.daemonExecutable).toBe(result.layout.daemonPath); }); + it('disables repo uv config for managed runtime uv commands', async () => { + const { assetDir } = await writeAsset(tempDir, 'core-wheel'); + const commands: Array<{ command: string; args: string[]; env?: NodeJS.ProcessEnv }> = []; + const exec: ManagedPythonRuntimeExec = vi.fn(async (command, args, options) => { + commands.push({ command, args, env: options?.env }); + return { stdout: command === 'uv' && args[0] === '--version' ? 'uv 0.11.13\n' : '', stderr: '' }; + }); + + await installManagedPythonRuntime({ + cliVersion: '0.2.0', + runtimeRoot: join(tempDir, 'runtime'), + assetDir, + env: { PATH: '/opt/homebrew/bin', UV_NO_CONFIG: '0' }, + features: ['core'], + exec, + }); + + expect(commands.map((call) => [call.command, call.args[0], call.env?.UV_NO_CONFIG, call.env?.PATH])).toEqual([ + ['uv', '--version', '1', '/opt/homebrew/bin'], + ['uv', 'venv', '1', '/opt/homebrew/bin'], + ['uv', 'pip', '1', '/opt/homebrew/bin'], + ]); + }); + it('installs the local-embeddings extra when requested', async () => { const { assetDir } = await writeAsset(tempDir, 'embedding-wheel'); const commands: Array<{ command: string; args: string[] }> = []; diff --git a/packages/cli/src/managed-python-runtime.ts b/packages/cli/src/managed-python-runtime.ts index 2b715b69..bb1a71d8 100644 --- a/packages/cli/src/managed-python-runtime.ts +++ b/packages/cli/src/managed-python-runtime.ts @@ -186,9 +186,28 @@ async function readJsonFile(path: string): Promise { return JSON.parse(await readFile(path, 'utf8')) as unknown; } +function isErrnoException(error: unknown, code: string): boolean { + return typeof error === 'object' && error !== null && 'code' in error && error.code === code; +} + export async function verifyRuntimeAsset(input: { assetDir: string }): Promise { const manifestPath = join(input.assetDir, 'manifest.json'); - const manifest = runtimeAssetManifestSchema.parse(await readJsonFile(manifestPath)); + let manifestData: unknown; + try { + manifestData = await readJsonFile(manifestPath); + } catch (error) { + if (isErrnoException(error, 'ENOENT')) { + throw new Error( + [ + `Missing bundled Python runtime manifest: ${manifestPath}`, + 'In a source checkout, build the local runtime assets with: pnpm run artifacts:build', + 'Then retry the runtime-backed KTX command.', + ].join('\n'), + ); + } + throw error; + } + const manifest = runtimeAssetManifestSchema.parse(manifestData); assertSafeWheelFilename(manifest.wheel.file); const wheelPath = join(input.assetDir, manifest.wheel.file); const wheel = await readFile(wheelPath); @@ -243,10 +262,11 @@ async function runLogged(input: { command: string; args: string[]; cwd?: string; + env?: NodeJS.ProcessEnv; }): Promise<{ stdout: string; stderr: string }> { await appendFile(input.logPath, `$ ${input.command} ${input.args.join(' ')}\n`); try { - const result = await input.exec(input.command, input.args, { cwd: input.cwd }); + const result = await input.exec(input.command, input.args, { cwd: input.cwd, env: input.env }); if (result.stdout) { await appendFile(input.logPath, result.stdout.endsWith('\n') ? result.stdout : `${result.stdout}\n`); } @@ -266,9 +286,13 @@ async function runLogged(input: { } } -async function ensureUv(exec: ManagedPythonRuntimeExec): Promise { +function managedRuntimeUvEnv(baseEnv: NodeJS.ProcessEnv): NodeJS.ProcessEnv { + return { ...baseEnv, UV_NO_CONFIG: '1' }; +} + +async function ensureUv(exec: ManagedPythonRuntimeExec, env?: NodeJS.ProcessEnv): Promise { try { - const result = await exec('uv', ['--version']); + const result = await exec('uv', ['--version'], { env }); return result.stdout.trim() || 'uv available'; } catch { throw new Error(MISSING_UV_RUNTIME_INSTALL_MESSAGE); @@ -282,6 +306,7 @@ export async function installManagedPythonRuntime( const exec = options.exec ?? defaultExec; const features = normalizeFeatures(options.features); const asset = await verifyRuntimeAsset({ assetDir: layout.assetDir }); + const uvEnv = managedRuntimeUvEnv(options.env ?? process.env); const existing = await readInstalledManifest(layout.manifestPath); if ( options.force !== true && @@ -298,14 +323,21 @@ export async function installManagedPythonRuntime( await rm(layout.versionDir, { recursive: true, force: true }); await mkdir(layout.versionDir, { recursive: true }); await writeFile(layout.installLogPath, ''); - await ensureUv(exec); - await runLogged({ exec, logPath: layout.installLogPath, command: 'uv', args: ['venv', layout.venvDir] }); + await ensureUv(exec, uvEnv); + await runLogged({ + exec, + logPath: layout.installLogPath, + command: 'uv', + args: ['venv', layout.venvDir], + env: uvEnv, + }); const wheelSpec = features.includes('local-embeddings') ? `${asset.wheelPath}[local-embeddings]` : asset.wheelPath; await runLogged({ exec, logPath: layout.installLogPath, command: 'uv', args: ['pip', 'install', '--python', layout.pythonPath, wheelSpec], + env: uvEnv, }); const manifest: InstalledKtxRuntimeManifest = { @@ -371,7 +403,7 @@ export async function doctorManagedPythonRuntime( const exec = options.exec ?? defaultExec; const checks: ManagedPythonRuntimeDoctorCheck[] = []; try { - const version = await ensureUv(exec); + const version = await ensureUv(exec, managedRuntimeUvEnv(options.env ?? process.env)); checks.push(check('pass', { id: 'uv', label: 'uv', detail: version })); } catch (error) { checks.push( From a2dcd4eb08027ef9dd7640bd6561068d75ae61bc Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 10:26:07 +0200 Subject: [PATCH 14/33] fix: guide dev ingest llm setup (#15) Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> --- packages/cli/src/ingest.test.ts | 70 +++++++++++++++++++ .../src/ingest/local-bundle-runtime.test.ts | 8 ++- .../src/ingest/local-bundle-runtime.ts | 12 +++- 3 files changed, 86 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 3b580cc1..59df5e86 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -32,6 +32,7 @@ import { writeWarehouseConfig, } from './ingest.test-utils.js'; import { resetVizFallbackWarningsForTest } from './viz-fallback.js'; +import { runKtxSetup } from './setup.js'; describe('runKtxIngest', () => { let tempDir: string; @@ -105,6 +106,75 @@ describe('runKtxIngest', () => { expect(statusIo.stderr()).toBe(''); }); + it('prints provider setup guidance when a skip-llm setup project runs dev ingest', async () => { + const projectDir = join(tempDir, 'project'); + const setupIo = makeIo(); + await expect( + runKtxSetup( + { + command: 'run', + projectDir, + mode: 'new', + agents: false, + agentScope: 'project', + agentInstallMode: 'cli', + skipAgents: true, + inputMode: 'disabled', + yes: true, + cliVersion: '0.0.0-test', + skipLlm: true, + skipEmbeddings: true, + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:WAREHOUSE_URL', + databaseSchemas: [], + enableHistoricSql: true, + skipDatabases: false, + skipSources: true, + }, + setupIo.io, + { + databasesDeps: { + testConnection: async (_projectDir, _connectionId, io) => { + io.stdout.write('Driver: postgres\nTables: 1\n'); + return 0; + }, + scanConnection: async () => 0, + historicSqlProbe: async () => ({ ok: true, lines: ['PASS Historic SQL probe skipped in test'] }), + }, + context: async () => ({ status: 'skipped', projectDir }), + }, + ), + ).resolves.toBe(0); + + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + + const runIo = makeIo(); + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'historic-sql', + sourceDir, + outputMode: 'plain', + }, + runIo.io, + ), + ).resolves.toBe(1); + + expect(runIo.stdout()).toBe(''); + expect(runIo.stderr()).toContain( + 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + ); + expect(runIo.stderr()).toContain( + `ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, + ); + }); + it('routes metabase scheduled pulls to the fan-out runner and prints child summaries', async () => { const projectDir = join(tempDir, 'project'); await writeMetabaseConfig(projectDir); diff --git a/packages/context/src/ingest/local-bundle-runtime.test.ts b/packages/context/src/ingest/local-bundle-runtime.test.ts index 779c2cc3..d8cd3907 100644 --- a/packages/context/src/ingest/local-bundle-runtime.test.ts +++ b/packages/context/src/ingest/local-bundle-runtime.test.ts @@ -53,7 +53,13 @@ describe('createLocalBundleIngestRuntime', () => { project, adapters: [new FakeSourceAdapter()], }), - ).toThrow('ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner'); + ).toThrow( + [ + 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + `Configure an Anthropic provider, then rerun ingest:`, + ` ktx setup --project-dir ${project.projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, + ].join('\n'), + ); }); it('builds runner deps with local SQLite stores and context tools enabled', async () => { diff --git a/packages/context/src/ingest/local-bundle-runtime.ts b/packages/context/src/ingest/local-bundle-runtime.ts index 43d0247b..afcb2525 100644 --- a/packages/context/src/ingest/local-bundle-runtime.ts +++ b/packages/context/src/ingest/local-bundle-runtime.ts @@ -536,6 +536,14 @@ function nextLocalJobId(): string { return `local-${Date.now().toString(36)}`; } +function localIngestLlmProviderGuardMessage(projectDir: string): string { + return [ + 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner.', + 'Configure an Anthropic provider, then rerun ingest:', + ` ktx setup --project-dir ${projectDir} --anthropic-api-key-env ANTHROPIC_API_KEY --anthropic-model claude-sonnet-4-6 --no-input`, + ].join('\n'); +} + function resolveAgentRunner(options: CreateLocalBundleIngestRuntimeOptions): { agentRunner: AgentRunnerService; llmProvider?: KtxLlmProvider; @@ -548,9 +556,7 @@ function resolveAgentRunner(options: CreateLocalBundleIngestRuntimeOptions): { } if (!llmProvider) { - throw new Error( - 'ktx dev ingest run requires llm.provider.backend: anthropic, vertex, or gateway, or an injected agentRunner', - ); + throw new Error(localIngestLlmProviderGuardMessage(options.project.projectDir)); } return { From e7418fd75f000fca3eeca739be2997f9349319cf Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 11:13:14 +0200 Subject: [PATCH 15/33] feat(llm): add local AI SDK DevTools tracing --- .gitignore | 1 + README.md | 23 +++++ packages/llm/package.json | 1 + packages/llm/src/model-health.test.ts | 5 +- packages/llm/src/model-health.ts | 2 +- packages/llm/src/model-provider.test.ts | 132 +++++++++++++++++++++++- packages/llm/src/model-provider.ts | 43 +++++++- pnpm-lock.yaml | 14 +++ 8 files changed, 215 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index eb63517a..ed14196b 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ yarn-error.log* # Local project runtime state .ktx/ +**/.devtools/ *.db *.sqlite *.sqlite3 diff --git a/README.md b/README.md index 84592226..255aa51b 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,29 @@ pnpm run link:dev ktx-dev --help ``` +### Debug LLM traces + +KTX can capture local AI SDK DevTools traces for LLM calls that run through the +KTX provider. Enable it with an environment flag when running an LLM-backed +command: + +```bash +KTX_AI_DEVTOOLS_ENABLED=true ktx dev ingest run \ + --connection-id warehouse \ + --adapter metabase +``` + +Traces are written to `.devtools/generations.json` under the current working +directory. To inspect them, run: + +```bash +pnpm dlx @ai-sdk/devtools +``` + +Then open `http://localhost:4983`. These traces are local-development-only and +store prompts, model outputs, tool arguments/results, and raw provider payloads +in plain text. Do not enable this in production or for sensitive runs. + The repository uses `pnpm` for TypeScript packages and `uv` for Python packages. See [Contributing](docs-site/content/docs/community/contributing.mdx) for full development setup, testing, and PR guidelines. diff --git a/packages/llm/package.json b/packages/llm/package.json index fc7deeba..13f49666 100644 --- a/packages/llm/package.json +++ b/packages/llm/package.json @@ -27,6 +27,7 @@ }, "dependencies": { "@ai-sdk/anthropic": "3.0.71", + "@ai-sdk/devtools": "0.0.17", "@ai-sdk/google-vertex": "^4.0.112", "ai": "^6.0.168", "openai": "^6.25.0" diff --git a/packages/llm/src/model-health.test.ts b/packages/llm/src/model-health.test.ts index 003c12d5..d1b3df47 100644 --- a/packages/llm/src/model-health.test.ts +++ b/packages/llm/src/model-health.test.ts @@ -1,3 +1,4 @@ +import { wrapLanguageModel as defaultWrapLanguageModel } from 'ai'; import { describe, expect, it, vi } from 'vitest'; import { runKtxLlmHealthCheck } from './model-health.js'; @@ -7,6 +8,7 @@ describe('KTX LLM health check', () => { it('runs a minimal non-streaming model call through the configured provider', async () => { const generateText = vi.fn(async () => ({ text: 'ok' })); const createAnthropic = vi.fn(() => vi.fn(() => anthropicModel)); + const wrapLanguageModel = vi.fn(defaultWrapLanguageModel); await expect( runKtxLlmHealthCheck( @@ -15,7 +17,7 @@ describe('KTX LLM health check', () => { anthropic: { apiKey: 'sk-ant-test' }, modelSlots: { default: 'claude-sonnet-4-6' }, }, - { deps: { createAnthropic, generateText } }, + { deps: { createAnthropic, generateText, devtoolsEnabled: true, wrapLanguageModel } }, ), ).resolves.toEqual({ ok: true }); @@ -32,6 +34,7 @@ describe('KTX LLM health check', () => { maxOutputTokens: 8, }), ); + expect(wrapLanguageModel).not.toHaveBeenCalled(); }); it('returns a failed result without exposing secret values', async () => { diff --git a/packages/llm/src/model-health.ts b/packages/llm/src/model-health.ts index 131822b6..abbc2735 100644 --- a/packages/llm/src/model-health.ts +++ b/packages/llm/src/model-health.ts @@ -41,7 +41,7 @@ export async function runKtxLlmHealthCheck( ): Promise { try { const { generateText: runGenerateTextOverride, ...providerDeps } = options.deps ?? {}; - const provider = createKtxLlmProvider(config, providerDeps); + const provider = createKtxLlmProvider(config, { ...providerDeps, devtoolsEnabled: false }); const runGenerateText = runGenerateTextOverride ?? generateText; await withTimeout( runGenerateText({ diff --git a/packages/llm/src/model-provider.test.ts b/packages/llm/src/model-provider.test.ts index 55dd5da9..ff65a12a 100644 --- a/packages/llm/src/model-provider.test.ts +++ b/packages/llm/src/model-provider.test.ts @@ -1,10 +1,138 @@ -import type { LanguageModel } from 'ai'; +import { devToolsMiddleware as defaultDevToolsMiddleware } from '@ai-sdk/devtools'; +import { wrapLanguageModel as defaultWrapLanguageModel, type LanguageModel } from 'ai'; import { describe, expect, it, vi } from 'vitest'; -import { createKtxLlmProvider } from './model-provider.js'; +import { createKtxLlmProvider, type KtxLlmProviderFactoryDeps } from './model-provider.js'; const languageModel = (modelId: string, provider = 'test'): LanguageModel => ({ modelId, provider }) as LanguageModel; +const devtoolsMiddleware = (): ReturnType => ({ specificationVersion: 'v3' }); +const wrapWith = (model: LanguageModel) => + vi.fn((_options: Parameters[0]) => model as ReturnType); describe('createKtxLlmProvider', () => { + it('wraps language models with DevTools middleware when explicitly enabled', () => { + const anthropicModel = languageModel('claude-sonnet-4-6', 'anthropic'); + const wrappedModel = languageModel('claude-sonnet-4-6', 'anthropic-devtools'); + const middleware = devtoolsMiddleware(); + const wrapLanguageModel = wrapWith(wrappedModel); + const devToolsMiddleware = vi.fn(devtoolsMiddleware); + + const provider = createKtxLlmProvider( + { + backend: 'anthropic', + anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: false }, + }, + { + createAnthropic: vi.fn(() => vi.fn(() => anthropicModel)), + devtoolsEnabled: true, + wrapLanguageModel, + devToolsMiddleware, + } satisfies KtxLlmProviderFactoryDeps, + ); + + expect(provider.getModel('default')).toBe(wrappedModel); + expect(devToolsMiddleware).toHaveBeenCalledTimes(1); + expect(wrapLanguageModel).toHaveBeenCalledWith({ + model: anthropicModel, + middleware, + modelId: 'claude-sonnet-4-6', + providerId: 'anthropic', + }); + }); + + it('does not wrap language models by default', () => { + const anthropicModel = languageModel('claude-sonnet-4-6', 'anthropic'); + const wrapLanguageModel = vi.fn(defaultWrapLanguageModel); + const devToolsMiddleware = vi.fn(defaultDevToolsMiddleware); + + const provider = createKtxLlmProvider( + { + backend: 'anthropic', + anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: false }, + }, + { + createAnthropic: vi.fn(() => vi.fn(() => anthropicModel)), + wrapLanguageModel, + devToolsMiddleware, + } satisfies KtxLlmProviderFactoryDeps, + ); + + expect(provider.getModel('default')).toBe(anthropicModel); + expect(wrapLanguageModel).not.toHaveBeenCalled(); + expect(devToolsMiddleware).not.toHaveBeenCalled(); + }); + + it('wraps language models when KTX_AI_DEVTOOLS_ENABLED is true', () => { + const originalEnv = process.env.KTX_AI_DEVTOOLS_ENABLED; + process.env.KTX_AI_DEVTOOLS_ENABLED = 'true'; + try { + const gatewayModel = languageModel('anthropic/claude-sonnet-4-6', 'gateway'); + const wrappedModel = languageModel('anthropic/claude-sonnet-4-6', 'gateway-devtools'); + const wrapLanguageModel = wrapWith(wrappedModel); + + const provider = createKtxLlmProvider( + { + backend: 'gateway', + gateway: { baseURL: 'https://gateway.test/v1' }, + modelSlots: { default: 'anthropic/claude-sonnet-4-6' }, + promptCaching: { enabled: false }, + }, + { + createGateway: vi.fn(() => vi.fn(() => gatewayModel)), + wrapLanguageModel, + devToolsMiddleware: vi.fn(devtoolsMiddleware), + } satisfies KtxLlmProviderFactoryDeps, + ); + + expect(provider.getModel('default')).toBe(wrappedModel); + expect(wrapLanguageModel).toHaveBeenCalledTimes(1); + } finally { + if (originalEnv === undefined) { + delete process.env.KTX_AI_DEVTOOLS_ENABLED; + } else { + process.env.KTX_AI_DEVTOOLS_ENABLED = originalEnv; + } + } + }); + + it('does not wrap language models in production even when enabled', () => { + const originalNodeEnv = process.env.NODE_ENV; + process.env.NODE_ENV = 'production'; + try { + const anthropicModel = languageModel('claude-sonnet-4-6', 'anthropic'); + const wrapLanguageModel = vi.fn(defaultWrapLanguageModel); + const devToolsMiddleware = vi.fn(defaultDevToolsMiddleware); + + const provider = createKtxLlmProvider( + { + backend: 'anthropic', + anthropic: { apiKey: 'test-anthropic-key' }, // pragma: allowlist secret + modelSlots: { default: 'claude-sonnet-4-6' }, + promptCaching: { enabled: false }, + }, + { + createAnthropic: vi.fn(() => vi.fn(() => anthropicModel)), + devtoolsEnabled: true, + wrapLanguageModel, + devToolsMiddleware, + } satisfies KtxLlmProviderFactoryDeps, + ); + + expect(provider.getModel('default')).toBe(anthropicModel); + expect(wrapLanguageModel).not.toHaveBeenCalled(); + expect(devToolsMiddleware).not.toHaveBeenCalled(); + } finally { + if (originalNodeEnv === undefined) { + delete process.env.NODE_ENV; + } else { + process.env.NODE_ENV = originalNodeEnv; + } + } + }); + it('uses direct Anthropic with both beta headers', () => { const anthropicModel = languageModel('claude-sonnet-4-6', 'anthropic'); const anthropic = vi.fn(() => anthropicModel); diff --git a/packages/llm/src/model-provider.ts b/packages/llm/src/model-provider.ts index 66a9fddd..6dbdcb06 100644 --- a/packages/llm/src/model-provider.ts +++ b/packages/llm/src/model-provider.ts @@ -1,6 +1,7 @@ import { createAnthropic } from '@ai-sdk/anthropic'; +import { devToolsMiddleware } from '@ai-sdk/devtools'; import { createVertexAnthropic } from '@ai-sdk/google-vertex/anthropic'; -import { createGateway, generateText, type LanguageModel } from 'ai'; +import { createGateway, generateText, wrapLanguageModel, type LanguageModel } from 'ai'; import { createKtxToolCallRepairHandler } from './repair.js'; import type { KtxLlmConfig, @@ -21,6 +22,9 @@ export interface KtxLlmProviderFactoryDeps { createVertexAnthropic?: VertexAnthropicFactory; createGateway?: GatewayFactory; generateText?: typeof generateText; + devtoolsEnabled?: boolean; + wrapLanguageModel?: typeof wrapLanguageModel; + devToolsMiddleware?: typeof devToolsMiddleware; } const DEFAULT_PROMPT_CACHING: KtxPromptCachingConfig = { @@ -40,10 +44,27 @@ function resolvePromptCaching(config: KtxLlmConfig): KtxPromptCachingConfig { return { ...DEFAULT_PROMPT_CACHING, ...config.promptCaching }; } +function resolveDevtoolsEnabled(override: boolean | undefined): boolean { + if (process.env.NODE_ENV === 'production') { + return false; + } + if (override !== undefined) { + return override; + } + const value = process.env.KTX_AI_DEVTOOLS_ENABLED?.trim().toLowerCase(); + return value === 'true' || value === '1' || value === 'yes'; +} + export function modelIdFromLanguageModel(model: LanguageModel | string): string { return typeof model === 'string' ? model : ((model as { modelId?: string }).modelId ?? ''); } +function providerIdFromLanguageModel(model: Exclude): string | undefined { + return typeof (model as { provider?: unknown }).provider === 'string' + ? (model as { provider: string }).provider + : undefined; +} + export function isAnthropicProtocolModel(model: LanguageModel | string): boolean { const modelId = modelIdFromLanguageModel(model); return modelId.startsWith('claude-') || modelId.startsWith('anthropic/') || modelId.includes('/claude-'); @@ -53,6 +74,9 @@ class DefaultKtxLlmProvider implements KtxLlmProvider { private readonly promptCaching: KtxPromptCachingConfig; private readonly getModelByResolvedName: (modelId: string) => LanguageModel; private readonly runGenerateText: typeof generateText; + private readonly devtoolsEnabled: boolean; + private readonly runWrapLanguageModel: typeof wrapLanguageModel; + private readonly createDevToolsMiddleware: typeof devToolsMiddleware; constructor( private readonly config: KtxLlmConfig, @@ -60,6 +84,9 @@ class DefaultKtxLlmProvider implements KtxLlmProvider { ) { this.promptCaching = resolvePromptCaching(config); this.runGenerateText = deps.generateText ?? generateText; + this.devtoolsEnabled = resolveDevtoolsEnabled(deps.devtoolsEnabled); + this.runWrapLanguageModel = deps.wrapLanguageModel ?? wrapLanguageModel; + this.createDevToolsMiddleware = deps.devToolsMiddleware ?? devToolsMiddleware; this.getModelByResolvedName = this.createModelFactory(config, deps); } @@ -68,7 +95,7 @@ class DefaultKtxLlmProvider implements KtxLlmProvider { } getModelByName(modelId: string): LanguageModel { - return this.getModelByResolvedName(modelId); + return this.withDevtools(this.getModelByResolvedName(modelId)); } cacheMarker(ttl: KtxPromptCacheTtl, model?: LanguageModel | string) { @@ -113,6 +140,18 @@ class DefaultKtxLlmProvider implements KtxLlmProvider { return this.config.modelSlots[role] ?? this.config.modelSlots.default; } + private withDevtools(model: LanguageModel): LanguageModel { + if (!this.devtoolsEnabled || typeof model === 'string') { + return model; + } + return this.runWrapLanguageModel({ + model: model as Parameters[0]['model'], + middleware: this.createDevToolsMiddleware(), + modelId: modelIdFromLanguageModel(model), + providerId: providerIdFromLanguageModel(model), + }); + } + private createModelFactory(config: KtxLlmConfig, deps: KtxLlmProviderFactoryDeps): (modelId: string) => LanguageModel { if (config.backend === 'anthropic') { const anthropic = (deps.createAnthropic ?? createAnthropic)({ diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e74389f9..0cd6dbeb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -355,6 +355,9 @@ importers: '@ai-sdk/anthropic': specifier: 3.0.71 version: 3.0.71(zod@4.4.3) + '@ai-sdk/devtools': + specifier: 0.0.17 + version: 0.0.17 '@ai-sdk/google-vertex': specifier: ^4.0.112 version: 4.0.118(zod@4.4.3) @@ -389,6 +392,11 @@ packages: peerDependencies: zod: ^3.25.76 || ^4.1.8 + '@ai-sdk/devtools@0.0.17': + resolution: {integrity: sha512-CJgo+3DMHOJbxxq1qTgnW4vpFXgBW1pHePMimBW4Go5FPU7iLqppoGX/UC798IXqlD3hncQRPfyBLZjbsJC91w==} + engines: {node: '>=18'} + hasBin: true + '@ai-sdk/gateway@3.0.104': resolution: {integrity: sha512-ZKX5n74io8VIRlhIMSLWVlvT3sXC8Z7cZ9GHuWBWZDVi96+62AIsWuLGvMfcBA1STYuSoDrp6rIziZmvrTq0TA==} engines: {node: '>=18'} @@ -4576,6 +4584,12 @@ snapshots: '@ai-sdk/provider-utils': 4.0.26(zod@4.4.3) zod: 4.4.3 + '@ai-sdk/devtools@0.0.17': + dependencies: + '@ai-sdk/provider': 3.0.10 + '@hono/node-server': 1.19.14(hono@4.12.15) + hono: 4.12.15 + '@ai-sdk/gateway@3.0.104(zod@4.3.6)': dependencies: '@ai-sdk/provider': 3.0.8 From 9e80add72c99b1fa67d2f7c72d74eaae172b6973 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 11:21:29 +0200 Subject: [PATCH 16/33] fix(context): make ingest adapter logging explicit --- .../src/ingest/adapters/looker/client.test.ts | 18 ++++++ .../src/ingest/adapters/looker/client.ts | 8 +-- .../ingest/adapters/metabase/client.test.ts | 21 +++++++ .../src/ingest/adapters/metabase/client.ts | 8 +-- .../ingest/adapters/metabase/fetch.test.ts | 36 +++++++++++ .../src/ingest/adapters/metabase/fetch.ts | 13 +++- .../src/ingest/adapters/notion/fetch.test.ts | 15 +++-- .../src/ingest/adapters/notion/fetch.ts | 61 +++++++++++++++---- 8 files changed, 150 insertions(+), 30 deletions(-) diff --git a/packages/context/src/ingest/adapters/looker/client.test.ts b/packages/context/src/ingest/adapters/looker/client.test.ts index a7d4e604..3b1822e0 100644 --- a/packages/context/src/ingest/adapters/looker/client.test.ts +++ b/packages/context/src/ingest/adapters/looker/client.test.ts @@ -112,6 +112,24 @@ describe('LookerClient', () => { }); }); + it('does not warn to console when optional prioritization inputs fail by default', async () => { + const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + const fakeSdk = sdk({ + search_dashboards: vi.fn().mockRejectedValue(new Error('dashboards unavailable')), + search_looks: vi.fn().mockRejectedValue(new Error('looks unavailable')), + }); + const client = new LookerClient(params(), { sdkFactory: () => fakeSdk }); + + await expect(client.getSignals()).resolves.toMatchObject({ + dashboardUsage: [], + lookUsage: [], + scheduledPlans: [], + favorites: [], + }); + + expect(warn).not.toHaveBeenCalled(); + }); + it('maps dashboards, looks, folders, models, explores, users, and groups to staged DTOs', async () => { const fakeSdk = sdk(); const client = new LookerClient(params(), { sdkFactory: () => fakeSdk }); diff --git a/packages/context/src/ingest/adapters/looker/client.ts b/packages/context/src/ingest/adapters/looker/client.ts index 50b0b104..90f9f466 100644 --- a/packages/context/src/ingest/adapters/looker/client.ts +++ b/packages/context/src/ingest/adapters/looker/client.ts @@ -80,10 +80,10 @@ export interface LookerClientDeps { } const defaultLogger: LookerClientLogger = { - log: (message) => console.log(message), - warn: (message) => console.warn(message), - error: (message) => console.error(message), - debug: (message) => console.debug(message), + log: () => undefined, + warn: () => undefined, + error: () => undefined, + debug: () => undefined, }; class InlineLookerSettings extends NodeSettings { diff --git a/packages/context/src/ingest/adapters/metabase/client.test.ts b/packages/context/src/ingest/adapters/metabase/client.test.ts index f81939c6..1ee3fe93 100644 --- a/packages/context/src/ingest/adapters/metabase/client.test.ts +++ b/packages/context/src/ingest/adapters/metabase/client.test.ts @@ -72,6 +72,27 @@ describe('MetabaseClient retry exhaustion', () => { vi.restoreAllMocks(); }); + it('does not warn to console when retrying by default', async () => { + const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + globalThis.fetch = vi + .fn() + .mockRejectedValueOnce(Object.assign(new Error('read ECONNRESET'), { code: 'ECONNRESET' })) + .mockResolvedValueOnce(new Response(JSON.stringify([]), { status: 200 })); + + const client = new MetabaseClient( + { apiUrl: 'https://metabase.example.test', apiKey: 'key' }, + { + ...DEFAULT_METABASE_CLIENT_CONFIG, + baseDelayMs: 0, + maxRetries: 1, + }, + ); + + await client.getDatabases(); + + expect(warn).not.toHaveBeenCalled(); + }); + it('wraps an exhausted ECONNRESET retry chain with method, path, attempt count, and original cause', async () => { const sysErr = Object.assign(new Error('read ECONNRESET'), { code: 'ECONNRESET', diff --git a/packages/context/src/ingest/adapters/metabase/client.ts b/packages/context/src/ingest/adapters/metabase/client.ts index 70e70964..2b70bc79 100644 --- a/packages/context/src/ingest/adapters/metabase/client.ts +++ b/packages/context/src/ingest/adapters/metabase/client.ts @@ -25,10 +25,10 @@ export interface MetabaseClientLogger { } const defaultLogger: MetabaseClientLogger = { - log: (message) => console.log(message), - warn: (message) => console.warn(message), - error: (message) => console.error(message), - debug: (message) => console.debug(message), + log: () => undefined, + warn: () => undefined, + error: () => undefined, + debug: () => undefined, }; interface TemplateTagInfo { diff --git a/packages/context/src/ingest/adapters/metabase/fetch.test.ts b/packages/context/src/ingest/adapters/metabase/fetch.test.ts index a86350ac..c8d4f4fb 100644 --- a/packages/context/src/ingest/adapters/metabase/fetch.test.ts +++ b/packages/context/src/ingest/adapters/metabase/fetch.test.ts @@ -86,6 +86,7 @@ describe('fetchMetabaseBundle', () => { }); afterEach(async () => { + vi.restoreAllMocks(); await rm(stagedDir, { recursive: true, force: true }); }); @@ -115,6 +116,41 @@ describe('fetchMetabaseBundle', () => { expect(card.archived).toBe(false); }); + it('does not write Metabase fetch progress to console by default', async () => { + const log = vi.spyOn(console, 'log').mockImplementation(() => undefined); + const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + }); + + expect(log).not.toHaveBeenCalled(); + expect(warn).not.toHaveBeenCalled(); + }); + + it('routes Metabase fetch warnings through the injected logger', async () => { + const logger = { + log: vi.fn(), + warn: vi.fn(), + }; + clientFactory.__client.getCard.mockRejectedValueOnce(new Error('card read failed')); + + await fetchMetabaseBundle({ + pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, + stagedDir, + ctx: makeFetchContext(), + clientFactory, + sourceStateReader, + logger, + }); + + expect(logger.warn).toHaveBeenCalledWith('failed to load card 1: card read failed'); + }); + it('passes the Metabase source pull config and target fetch context to the client factory', async () => { await fetchMetabaseBundle({ pullConfig: { metabaseConnectionId, metabaseDatabaseId: 42 }, diff --git a/packages/context/src/ingest/adapters/metabase/fetch.ts b/packages/context/src/ingest/adapters/metabase/fetch.ts index f8cc1e12..9ccb2be6 100644 --- a/packages/context/src/ingest/adapters/metabase/fetch.ts +++ b/packages/context/src/ingest/adapters/metabase/fetch.ts @@ -21,9 +21,14 @@ class IngestInputError extends Error { } } -const logger = { - log: (message: string) => console.log(message), - warn: (message: string) => console.warn(message), +export interface MetabaseFetchLogger { + log(message: string): void; + warn(message: string): void; +} + +const noopMetabaseFetchLogger: MetabaseFetchLogger = { + log: () => undefined, + warn: () => undefined, }; export interface FetchMetabaseBundleParams { @@ -32,6 +37,7 @@ export interface FetchMetabaseBundleParams { ctx: FetchContext; clientFactory: MetabaseClientFactory; sourceStateReader: MetabaseSourceStateReader; + logger?: MetabaseFetchLogger; } interface CollectionNode { @@ -76,6 +82,7 @@ function resolvePath(index: Map, collectionId: export async function fetchMetabaseBundle(params: FetchMetabaseBundleParams): Promise { const pullConfig: MetabasePullConfig = parseMetabasePullConfig(params.pullConfig); + const logger = params.logger ?? noopMetabaseFetchLogger; const syncState = await params.sourceStateReader.getSourceState(pullConfig.metabaseConnectionId); const mapping = syncState.mappings.find( (m) => m.metabaseDatabaseId === pullConfig.metabaseDatabaseId && m.syncEnabled, diff --git a/packages/context/src/ingest/adapters/notion/fetch.test.ts b/packages/context/src/ingest/adapters/notion/fetch.test.ts index ae6d5fd5..b60170f7 100644 --- a/packages/context/src/ingest/adapters/notion/fetch.test.ts +++ b/packages/context/src/ingest/adapters/notion/fetch.test.ts @@ -89,12 +89,13 @@ describe('fetchNotionSnapshot', () => { }); it('logs skipped page materialization failures', async () => { - const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + const logger = { warn: vi.fn() }; (client.retrievePage as ReturnType).mockRejectedValueOnce(new Error('Notion API failed')); const manifest = await fetchNotionSnapshot({ client, stagedDir, + logger, config: { authToken: 'secret', crawlMode: 'selected_roots', @@ -109,7 +110,7 @@ describe('fetchNotionSnapshot', () => { }); expect(manifest.skipped).toEqual([{ externalId: 'page-1', reason: 'Notion API failed' }]); - expect(warn).toHaveBeenCalledWith('Skipping Notion page page-1: Notion API failed'); + expect(logger.warn).toHaveBeenCalledWith('Skipping Notion page page-1: Notion API failed'); }); it('recursively fetches selected-root child pages and derives scoped links', async () => { @@ -191,7 +192,7 @@ describe('fetchNotionSnapshot', () => { }); it('truncates deeply nested block trees and records a warning', async () => { - const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + const logger = { warn: vi.fn() }; (client.listBlockChildren as ReturnType).mockImplementation((blockId: string) => { const currentDepth = blockId === 'page-1' ? 0 : Number(blockId.replace('block-', '')); const nextDepth = currentDepth + 1; @@ -215,6 +216,7 @@ describe('fetchNotionSnapshot', () => { await fetchNotionSnapshot({ client, stagedDir, + logger, config: { authToken: 'secret', crawlMode: 'selected_roots', @@ -232,11 +234,11 @@ describe('fetchNotionSnapshot', () => { const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')); expect(blocks).toHaveLength(10); expect(manifest.warnings).toContain('maxBlockDepth reached for page page-1 at depth 10'); - expect(warnSpy).toHaveBeenCalledWith('maxBlockDepth reached for page page-1 at depth 10'); + expect(logger.warn).toHaveBeenCalledWith('maxBlockDepth reached for page page-1 at depth 10'); }); it('truncates pages at the per-page block cap and records a warning', async () => { - const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + const logger = { warn: vi.fn() }; (client.listBlockChildren as ReturnType).mockResolvedValue({ results: Array.from({ length: 2001 }, (_, index) => ({ id: `block-${index}`, @@ -250,6 +252,7 @@ describe('fetchNotionSnapshot', () => { await fetchNotionSnapshot({ client, stagedDir, + logger, config: { authToken: 'secret', crawlMode: 'selected_roots', @@ -267,7 +270,7 @@ describe('fetchNotionSnapshot', () => { const manifest = JSON.parse(await readFile(join(stagedDir, 'manifest.json'), 'utf-8')); expect(blocks).toHaveLength(2000); expect(manifest.warnings).toContain('maxBlocksPerPage reached for page page-1 at 2000 blocks'); - expect(warnSpy).toHaveBeenCalledWith('maxBlocksPerPage reached for page page-1 at 2000 blocks'); + expect(logger.warn).toHaveBeenCalledWith('maxBlocksPerPage reached for page page-1 at 2000 blocks'); }); it('uses all_accessible search for pages and data sources', async () => { diff --git a/packages/context/src/ingest/adapters/notion/fetch.ts b/packages/context/src/ingest/adapters/notion/fetch.ts index 6d0ee691..18974f83 100644 --- a/packages/context/src/ingest/adapters/notion/fetch.ts +++ b/packages/context/src/ingest/adapters/notion/fetch.ts @@ -12,10 +12,19 @@ import { type NotionPullConfig, } from './types.js'; +export interface NotionFetchLogger { + warn(message: string): void; +} + +const noopNotionFetchLogger: NotionFetchLogger = { + warn: () => undefined, +}; + interface FetchNotionSnapshotParams { client: NotionApi; config: NotionPullConfig; stagedDir: string; + logger?: NotionFetchLogger; } interface CrawlState { @@ -23,6 +32,7 @@ interface CrawlState { databaseCount: number; dataSourceCount: number; capped: boolean; + logger: NotionFetchLogger; skipped: Array<{ externalId: string; reason: string }>; warnings: string[]; materializedPageTargets: Set; @@ -44,9 +54,6 @@ interface NotionLinks { const DEFAULT_MAX_BLOCK_DEPTH = 10; const DEFAULT_MAX_BLOCKS_PER_PAGE = 2000; -const logger = { - warn: (message: string) => console.warn(message), -}; async function writeJson(path: string, value: unknown): Promise { await mkdir(dirname(path), { recursive: true }); @@ -58,7 +65,12 @@ async function writeText(path: string, value: string): Promise { await writeFile(path, value.endsWith('\n') ? value : `${value}\n`, 'utf-8'); } -function addWarning(warnings: string[], warning: string, logWarning = false): void { +function addWarning( + warnings: string[], + warning: string, + logWarning = false, + logger: NotionFetchLogger = noopNotionFetchLogger, +): void { if (!warnings.includes(warning)) { warnings.push(warning); if (logWarning) { @@ -119,11 +131,21 @@ async function visitPaginated(params: { } while (cursor); } -function addBlockCountWarning(state: BlockCollectionState, warnings: string[], pageId: string): void { +function addBlockCountWarning( + state: BlockCollectionState, + warnings: string[], + pageId: string, + logger: NotionFetchLogger, +): void { if (state.blockCountWarningWritten) { return; } - addWarning(warnings, `maxBlocksPerPage reached for page ${pageId} at ${DEFAULT_MAX_BLOCKS_PER_PAGE} blocks`, true); + addWarning( + warnings, + `maxBlocksPerPage reached for page ${pageId} at ${DEFAULT_MAX_BLOCKS_PER_PAGE} blocks`, + true, + logger, + ); state.blockCountWarningWritten = true; } @@ -134,18 +156,19 @@ async function collectBlockChildren(params: { depth: number; warnings: string[]; state: BlockCollectionState; + logger: NotionFetchLogger; }): Promise { let cursor: string | null = null; do { const remainingBlocks = DEFAULT_MAX_BLOCKS_PER_PAGE - params.state.blocks.length; if (remainingBlocks <= 0) { - addBlockCountWarning(params.state, params.warnings, params.pageId); + addBlockCountWarning(params.state, params.warnings, params.pageId, params.logger); return; } const page = await params.client.listBlockChildren(params.blockId, cursor, Math.min(remainingBlocks, 100)); for (let index = 0; index < page.results.length; index += 1) { if (params.state.blocks.length >= DEFAULT_MAX_BLOCKS_PER_PAGE) { - addBlockCountWarning(params.state, params.warnings, params.pageId); + addBlockCountWarning(params.state, params.warnings, params.pageId, params.logger); return; } @@ -159,9 +182,10 @@ async function collectBlockChildren(params: { params.warnings, `maxBlockDepth reached for page ${params.pageId} at depth ${DEFAULT_MAX_BLOCK_DEPTH}`, true, + params.logger, ); } else if (params.state.blocks.length >= DEFAULT_MAX_BLOCKS_PER_PAGE) { - addBlockCountWarning(params.state, params.warnings, params.pageId); + addBlockCountWarning(params.state, params.warnings, params.pageId, params.logger); return; } else { await collectBlockChildren({ @@ -171,6 +195,7 @@ async function collectBlockChildren(params: { depth: blockDepth, warnings: params.warnings, state: params.state, + logger: params.logger, }); } } @@ -179,7 +204,7 @@ async function collectBlockChildren(params: { params.state.blocks.length >= DEFAULT_MAX_BLOCKS_PER_PAGE && (index < page.results.length - 1 || page.hasMore) ) { - addBlockCountWarning(params.state, params.warnings, params.pageId); + addBlockCountWarning(params.state, params.warnings, params.pageId, params.logger); return; } } @@ -187,7 +212,12 @@ async function collectBlockChildren(params: { } while (cursor); } -async function collectBlockTree(client: NotionApi, pageId: string, warnings: string[]): Promise { +async function collectBlockTree( + client: NotionApi, + pageId: string, + warnings: string[], + logger: NotionFetchLogger, +): Promise { const state: BlockCollectionState = { blocks: [], blockCountWarningWritten: false }; await collectBlockChildren({ client, @@ -196,6 +226,7 @@ async function collectBlockTree(client: NotionApi, pageId: string, warnings: str depth: 0, warnings, state, + logger, }); return state.blocks; } @@ -341,7 +372,7 @@ async function materializePage(params: { if (params.skipDataSourceRows && !params.dataSourceId && parentDataSourceId(page)) { return; } - const blocks = await collectBlockTree(params.client, params.pageId, params.state.warnings); + const blocks = await collectBlockTree(params.client, params.pageId, params.state.warnings, params.state.logger); const metadata = normalizeNotionPageMetadata({ page, fallbackPath: params.fallbackPath, @@ -374,7 +405,9 @@ async function materializePage(params: { } } } catch (error) { - logger.warn(`Skipping Notion page ${params.pageId}: ${error instanceof Error ? error.message : String(error)}`); + params.state.logger.warn( + `Skipping Notion page ${params.pageId}: ${error instanceof Error ? error.message : String(error)}`, + ); params.state.skipped.push({ externalId: params.pageId, reason: error instanceof Error ? error.message : String(error), @@ -491,6 +524,7 @@ async function materializeDatabase(params: { export async function fetchNotionSnapshot(params: FetchNotionSnapshotParams): Promise { await mkdir(params.stagedDir, { recursive: true }); + const logger = params.logger ?? noopNotionFetchLogger; const configuredCursor = params.config.crawlMode === 'all_accessible' ? parseConfiguredCursor(params.config) : null; const continuedFromCursor = configuredCursor !== null; const state: CrawlState = { @@ -498,6 +532,7 @@ export async function fetchNotionSnapshot(params: FetchNotionSnapshotParams): Pr databaseCount: 0, dataSourceCount: 0, capped: false, + logger, skipped: [], warnings: [], materializedPageTargets: new Set(), From d5f484eb7e624d04d46a5c9bce178daecbc61a06 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 11:21:37 +0200 Subject: [PATCH 17/33] fix: standardize KTX environment variables --- .../orbit-relationship-verification/README.md | 2 +- .../memory-agent.service.ingest.test.ts | 24 ++++++++++++ .../src/memory/memory-agent.service.ts | 2 +- scripts/relationship-orbit-verification.mjs | 4 +- .../relationship-orbit-verification.test.mjs | 37 +++++++++++++++++++ 5 files changed, 65 insertions(+), 4 deletions(-) diff --git a/examples/orbit-relationship-verification/README.md b/examples/orbit-relationship-verification/README.md index 245411b6..126488a2 100644 --- a/examples/orbit-relationship-verification/README.md +++ b/examples/orbit-relationship-verification/README.md @@ -29,5 +29,5 @@ examples/orbit-relationship-verification/reports/orbit-verification.md Use a real local Orbit project by overriding the project directory: ```bash -KTX_ORBIT_PROJECT_DIR=/path/to/orbit-project pnpm run relationships:verify-orbit +KTX_PROJECT_DIR=/path/to/orbit-project pnpm run relationships:verify-orbit ``` diff --git a/packages/context/src/memory/memory-agent.service.ingest.test.ts b/packages/context/src/memory/memory-agent.service.ingest.test.ts index 710ba956..6375e494 100644 --- a/packages/context/src/memory/memory-agent.service.ingest.test.ts +++ b/packages/context/src/memory/memory-agent.service.ingest.test.ts @@ -37,6 +37,7 @@ interface BuiltMocks { agentRunner: any; slValidator: any; toolsetFactory: any; + logger: any; } const buildMocks = (overrides: Partial = {}): BuiltMocks => { @@ -131,6 +132,7 @@ const buildMocks = (overrides: Partial = {}): BuiltMocks => { getAllTools: vi.fn().mockReturnValue([]), }), }, + logger: { log: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() }, }; return { ...defaults, ...overrides }; @@ -179,6 +181,7 @@ const buildService = (mocks: BuiltMocks): MemoryAgentService => telemetry: { trackMemoryIngestion: mocks.eventTracker.trackEvent, }, + logger: mocks.logger, }); const baseInput = { @@ -238,6 +241,27 @@ describe('MemoryAgentService.ingest — session-branch orchestration', () => { expect(result.commitHash).toBe('cafebabe'); }); + it('logs prompt debug output when KTX_MEMORY_AGENT_DEBUG_PROMPTS is enabled', async () => { + const previousDebugPrompts = process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS; + const mocks = buildMocks(); + const svc = buildService(mocks); + + try { + process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS = '1'; + + await svc.ingest(baseInput); + + expect(mocks.logger.debug).toHaveBeenCalledWith(expect.stringContaining('[memory-agent prompt-debug] system=')); + expect(mocks.logger.debug).toHaveBeenCalledWith(expect.stringContaining('[memory-agent prompt-debug] user=')); + } finally { + if (previousDebugPrompts === undefined) { + delete process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS; + } else { + process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS = previousDebugPrompts; + } + } + }); + it('empty path: squash returns no touched paths → no enqueue, cleanup(empty), commitHash=null', async () => { const mocks = buildMocks(); mocks.gitService.squashMergeIntoMain.mockResolvedValue({ diff --git a/packages/context/src/memory/memory-agent.service.ts b/packages/context/src/memory/memory-agent.service.ts index fd1f0a6c..6f239053 100644 --- a/packages/context/src/memory/memory-agent.service.ts +++ b/packages/context/src/memory/memory-agent.service.ts @@ -192,7 +192,7 @@ export class MemoryAgentService { `[memory-agent] chat=${chatId} running (sourceType=${sourceType}, hasSL=${hasSL}, budget=${stepBudget}, model=${modelName})${signalsSuffix}${dialectSuffix}`, ); - if (process.env.MEMORY_AGENT_DEBUG_PROMPTS === '1') { + if (process.env.KTX_MEMORY_AGENT_DEBUG_PROMPTS === '1') { this.logger.debug(`[memory-agent prompt-debug] system=${systemPrompt}`); this.logger.debug(`[memory-agent prompt-debug] user=${prompt}`); } diff --git a/scripts/relationship-orbit-verification.mjs b/scripts/relationship-orbit-verification.mjs index 1c24a4e9..d1c97f56 100644 --- a/scripts/relationship-orbit-verification.mjs +++ b/scripts/relationship-orbit-verification.mjs @@ -62,7 +62,7 @@ function firstNonEmptyLine(...values) { function parseArgs(argv) { const options = { connectionId: process.env.KTX_ORBIT_CONNECTION_ID ?? 'orbit', - projectDir: process.env.KTX_ORBIT_PROJECT_DIR ?? defaultProjectDir, + projectDir: process.env.KTX_PROJECT_DIR ?? defaultProjectDir, reportPath: defaultReportPath, }; @@ -242,7 +242,7 @@ function orbitVerificationEnv(projectDir) { export async function runOrbitVerification(options = {}) { const connectionId = options.connectionId ?? process.env.KTX_ORBIT_CONNECTION_ID ?? 'orbit'; - const projectDir = options.projectDir ?? process.env.KTX_ORBIT_PROJECT_DIR ?? defaultProjectDir; + const projectDir = options.projectDir ?? process.env.KTX_PROJECT_DIR ?? defaultProjectDir; const reportPath = options.reportPath ?? defaultReportPath; const rootDir = options.rootDir ?? ktxRootDir; const runner = options.runWorkspaceKtx ?? runWorkspaceKtx; diff --git a/scripts/relationship-orbit-verification.test.mjs b/scripts/relationship-orbit-verification.test.mjs index c7cdaffc..017b2518 100644 --- a/scripts/relationship-orbit-verification.test.mjs +++ b/scripts/relationship-orbit-verification.test.mjs @@ -115,6 +115,43 @@ describe('relationship Orbit verification helper', () => { assert.match(writes[0].content, new RegExp(defaultProjectDir.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))); }); + it('uses KTX_PROJECT_DIR for the Orbit verification project override', async () => { + const previousProjectDir = process.env.KTX_PROJECT_DIR; + const calls = []; + + try { + process.env.KTX_PROJECT_DIR = '/tmp/orbit-project-from-env'; + + const result = await runOrbitVerification({ + reportPath: '/tmp/orbit-report.md', + now: () => new Date('2026-05-07T10:00:00.000Z'), + mkdir: async () => {}, + writeFile: async () => {}, + runWorkspaceKtx: async (argv, options) => { + calls.push(argv); + if (argv[2] === 'report') { + options.stdout.write(successReportJson()); + return 0; + } + options.stdout.write('KTX scan completed\nRun: scan-orbit-1\nConnection: orbit\n'); + return 0; + }, + }); + + assert.equal(result.projectDir, '/tmp/orbit-project-from-env'); + assert.deepEqual(calls, [ + ['dev', 'scan', 'orbit', '--enrich', '--project-dir', '/tmp/orbit-project-from-env'], + ['dev', 'scan', 'report', '--json', '--project-dir', '/tmp/orbit-project-from-env', 'scan-orbit-1'], + ]); + } finally { + if (previousProjectDir === undefined) { + delete process.env.KTX_PROJECT_DIR; + } else { + process.env.KTX_PROJECT_DIR = previousProjectDir; + } + } + }); + it('extracts the run id from human scan output', () => { assert.equal(extractRunId(`KTX scan completed\nStatus: done\nRun: scan-orbit-1\nConnection: orbit\n`), 'scan-orbit-1'); assert.equal(extractRunId('KTX scan completed without a run line\n'), null); From d7fb092cb0fec651d7646f16d25bd306fce1501b Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 11:26:34 +0200 Subject: [PATCH 18/33] feat(cli): route ingest adapter logs through operational logger --- packages/cli/src/ingest.test.ts | 97 +++++++++++++++---- packages/cli/src/ingest.ts | 3 + packages/cli/src/io/logger.test.ts | 65 +++++++++++++ packages/cli/src/io/logger.ts | 40 ++++++++ packages/cli/src/local-adapters.ts | 2 + .../adapters/looker/local-looker.adapter.ts | 6 +- .../metabase/local-metabase.adapter.ts | 10 +- .../adapters/metabase/metabase.adapter.ts | 4 +- .../ingest/adapters/notion/notion.adapter.ts | 10 +- packages/context/src/ingest/local-adapters.ts | 21 +++- 10 files changed, 232 insertions(+), 26 deletions(-) create mode 100644 packages/cli/src/io/logger.test.ts create mode 100644 packages/cli/src/io/logger.ts diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 59df5e86..8f7bf5b8 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -14,6 +14,7 @@ import { import { initKtxProject, ktxLocalStateDbPath, loadKtxProject } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { type KtxIngestArgs, runKtxIngest } from './ingest.js'; +import type { KtxCliLocalIngestAdaptersOptions } from './local-adapters.js'; import { CliLookerSlWritingAgentRunner, CliMetabaseAgentRunner, @@ -553,6 +554,46 @@ describe('runKtxIngest', () => { expect(io.stderr()).toBe(''); }); + it('keeps metabase JSON stdout free of operational adapter logs', async () => { + const projectDir = join(tempDir, 'project'); + await writeMetabaseConfig(projectDir); + const io = makeIo(); + let adapterOptions: KtxCliLocalIngestAdaptersOptions | undefined; + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'json', + }, + io.io, + { + createAdapters: (_project, options) => { + adapterOptions = options; + options?.logger?.warn('adapter warning'); + return []; + }, + runLocalMetabaseIngest: async (input) => { + input.adapters.find((adapter) => adapter.source === 'metabase'); + return { + metabaseConnectionId: 'prod-metabase', + status: 'all_succeeded', + totals: { workUnits: 0, failedWorkUnits: 0 }, + children: [], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(adapterOptions?.logger).toEqual(expect.objectContaining({ warn: expect.any(Function) })); + expect(() => JSON.parse(io.stdout())).not.toThrow(); + expect(io.stderr()).toBe(''); + }); + it('rejects source-dir uploads through the metabase fan-out route', async () => { const projectDir = join(tempDir, 'project'); await writeMetabaseConfig(projectDir); @@ -764,17 +805,22 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - expect(createAdapters).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), { - databaseIntrospectionUrl: 'http://127.0.0.1:8765', - }); + expect(createAdapters).toHaveBeenCalledWith( + expect.objectContaining({ projectDir }), + expect.objectContaining({ + databaseIntrospectionUrl: 'http://127.0.0.1:8765', + logger: expect.any(Object), + }), + ); expect(runLocal).toHaveBeenCalledWith( expect.objectContaining({ adapters: createdAdapters, adapter: 'fake', connectionId: 'warehouse', - pullConfigOptions: { + pullConfigOptions: expect.objectContaining({ databaseIntrospectionUrl: 'http://127.0.0.1:8765', - }, + logger: expect.any(Object), + }), }), ); }); @@ -817,14 +863,19 @@ describe('runKtxIngest', () => { installPolicy: 'auto', io: io.io, }; - expect(createAdapters).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), { - managedDaemon: expectedManagedDaemon, - }); + expect(createAdapters).toHaveBeenCalledWith( + expect.objectContaining({ projectDir }), + expect.objectContaining({ + managedDaemon: expectedManagedDaemon, + logger: expect.any(Object), + }), + ); expect(runLocal).toHaveBeenCalledWith( expect.objectContaining({ - pullConfigOptions: { + pullConfigOptions: expect.objectContaining({ managedDaemon: expectedManagedDaemon, - }, + logger: expect.any(Object), + }), }), ); }); @@ -878,9 +929,13 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - expect(createAdapters).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), { - historicSqlConnectionId: 'warehouse', - }); + expect(createAdapters).toHaveBeenCalledWith( + expect.objectContaining({ projectDir }), + expect.objectContaining({ + historicSqlConnectionId: 'warehouse', + logger: expect.any(Object), + }), + ); expect(runLocal).toHaveBeenCalledWith( expect.objectContaining({ adapters: createdAdapters, @@ -1119,15 +1174,19 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - expect(createAdapters).toHaveBeenCalledWith(expect.objectContaining({ projectDir }), { - looker: { - parser: pullConfigOptions.looker.parser, - }, - }); + expect(createAdapters).toHaveBeenCalledWith( + expect.objectContaining({ projectDir }), + expect.objectContaining({ + logger: expect.any(Object), + looker: { + parser: pullConfigOptions.looker.parser, + }, + }), + ); expect(runLocal).toHaveBeenCalledWith( expect.objectContaining({ agentRunner, - pullConfigOptions, + pullConfigOptions: expect.objectContaining(pullConfigOptions), }), ); }); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index 39bf21bb..516d4dd6 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -18,6 +18,7 @@ import { } from '@ktx/context/ingest'; import { loadKtxProject } from '@ktx/context/project'; import { readIngestReportSnapshotFile } from './ingest-report-file.js'; +import { createCliOperationalLogger } from './io/logger.js'; import { createKtxCliLocalIngestAdapters } from './local-adapters.js'; import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; import { type KtxMemoryFlowStdin, renderMemoryFlowInteractively } from './memory-flow-interactive.js'; @@ -475,11 +476,13 @@ export async function runKtxIngest( const executeLocalIngest = deps.runLocalIngest ?? runLocalIngest; const localIngestOptions = deps.localIngestOptions ?? {}; const managedDaemon = managedDaemonOptionsForIngestRun(args, io); + const operationalLogger = createCliOperationalLogger(io, args.outputMode); const adapterOptions = { ...(localIngestOptions.pullConfigOptions ?? {}), ...(args.databaseIntrospectionUrl ? { databaseIntrospectionUrl: args.databaseIntrospectionUrl } : {}), ...(managedDaemon ? { managedDaemon } : {}), ...(args.adapter === 'historic-sql' ? { historicSqlConnectionId: args.connectionId } : {}), + logger: operationalLogger, }; if (args.adapter === 'metabase' && args.sourceDir) { throw new Error('source-dir uploads are not supported for the Metabase fan-out adapter'); diff --git a/packages/cli/src/io/logger.test.ts b/packages/cli/src/io/logger.test.ts new file mode 100644 index 00000000..bf21a150 --- /dev/null +++ b/packages/cli/src/io/logger.test.ts @@ -0,0 +1,65 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createCliOperationalLogger, createNoopOperationalLogger } from './logger.js'; + +function makeIo() { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + write: (chunk: string) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('createCliOperationalLogger', () => { + it('routes operational messages to stderr outside JSON mode', () => { + const io = makeIo(); + const logger = createCliOperationalLogger(io.io, 'plain'); + + logger.log('progress'); + logger.warn('warning'); + logger.error('failure'); + logger.debug?.('debug'); + + expect(io.stdout()).toBe(''); + expect(io.stderr()).toBe('progress\nwarning\nfailure\ndebug\n'); + }); + + it('suppresses operational messages in JSON mode by default', () => { + const io = makeIo(); + const logger = createCliOperationalLogger(io.io, 'json'); + + logger.log('progress'); + logger.warn('warning'); + logger.error('failure'); + logger.debug?.('debug'); + + expect(io.stdout()).toBe(''); + expect(io.stderr()).toBe(''); + }); +}); + +describe('createNoopOperationalLogger', () => { + it('never writes', () => { + const logger = createNoopOperationalLogger(); + const warn = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + + logger.log('progress'); + logger.warn('warning'); + logger.error('failure'); + logger.debug?.('debug'); + + expect(warn).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/cli/src/io/logger.ts b/packages/cli/src/io/logger.ts new file mode 100644 index 00000000..e9952254 --- /dev/null +++ b/packages/cli/src/io/logger.ts @@ -0,0 +1,40 @@ +import type { KtxCliIo } from '../cli-runtime.js'; +import type { KtxOutputMode } from './mode.js'; + +export interface KtxOperationalLogger { + log(message: string): void; + warn(message: string): void; + error(message: string): void; + debug?(message: string): void; +} + +export type KtxOperationalOutputMode = KtxOutputMode | 'viz'; + +function writeLine(io: KtxCliIo, message: string): void { + io.stderr.write(message.endsWith('\n') ? message : `${message}\n`); +} + +export function createNoopOperationalLogger(): KtxOperationalLogger { + return { + log: () => undefined, + warn: () => undefined, + error: () => undefined, + debug: () => undefined, + }; +} + +export function createCliOperationalLogger( + io: KtxCliIo, + mode: KtxOperationalOutputMode, +): KtxOperationalLogger { + if (mode === 'json') { + return createNoopOperationalLogger(); + } + + return { + log: (message) => writeLine(io, message), + warn: (message) => writeLine(io, message), + error: (message) => writeLine(io, message), + debug: (message) => writeLine(io, message), + }; +} diff --git a/packages/cli/src/local-adapters.ts b/packages/cli/src/local-adapters.ts index d0a5f571..8557674c 100644 --- a/packages/cli/src/local-adapters.ts +++ b/packages/cli/src/local-adapters.ts @@ -35,6 +35,7 @@ import { managedDaemonDatabaseIntrospectionOptions, type ManagedPythonCoreDaemonOptions, } from './managed-python-http.js'; +import type { KtxOperationalLogger } from './io/logger.js'; function hasSnowflakeDriver(connection: unknown): boolean { return ( @@ -162,6 +163,7 @@ export interface KtxCliLocalIngestAdaptersOptions extends DefaultLocalIngestAdap sqlAnalysis?: SqlAnalysisPort; sqlAnalysisUrl?: string; managedDaemon?: ManagedPythonCoreDaemonOptions; + logger?: KtxOperationalLogger; } function historicSqlRecord(connection: unknown): Record | null { diff --git a/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts b/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts index c1869da9..a29fecd1 100644 --- a/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts +++ b/packages/context/src/ingest/adapters/looker/local-looker.adapter.ts @@ -1,4 +1,5 @@ import type { KtxLocalProject, KtxProjectConnectionConfig } from '../../../project/index.js'; +import type { LookerClientLogger } from './client.js'; import { DefaultLookerClientFactory, DefaultLookerConnectionClientFactory, @@ -59,8 +60,11 @@ export function createLocalLookerCredentialResolver( export function createLocalLookerSourceAdapter( project: KtxLocalProject, env: NodeJS.ProcessEnv = process.env, + logger?: LookerClientLogger, ): LookerSourceAdapter { - const connectionFactory = new DefaultLookerConnectionClientFactory(createLocalLookerCredentialResolver(project, env)); + const connectionFactory = new DefaultLookerConnectionClientFactory(createLocalLookerCredentialResolver(project, env), { + ...(logger ? { logger } : {}), + }); return new LookerSourceAdapter({ clientFactory: new DefaultLookerClientFactory(connectionFactory), }); diff --git a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts index bd81413f..ec5e163e 100644 --- a/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts +++ b/packages/context/src/ingest/adapters/metabase/local-metabase.adapter.ts @@ -1,12 +1,17 @@ import type { KtxLocalProject, KtxProjectConnectionConfig } from '../../../project/index.js'; import { ktxLocalStateDbPath } from '../../../project/index.js'; import { resolveKtxConfigReference } from '../../../core/config-reference.js'; -import { DEFAULT_METABASE_CLIENT_CONFIG, DefaultMetabaseConnectionClientFactory } from './client.js'; +import { + DEFAULT_METABASE_CLIENT_CONFIG, + DefaultMetabaseConnectionClientFactory, + type MetabaseClientLogger, +} from './client.js'; import { IngestMetabaseClientFactory, type MetabaseClientConfig, type MetabaseClientRuntimeConfig, } from './client-port.js'; +import type { MetabaseFetchLogger } from './fetch.js'; import { LocalMetabaseSourceStateReader } from './local-source-state-store.js'; import { MetabaseSourceAdapter } from './metabase.adapter.js'; @@ -50,6 +55,7 @@ export function metabaseRuntimeConfigFromLocalConnection( interface CreateLocalMetabaseSourceAdapterOptions { env?: NodeJS.ProcessEnv; defaultClientConfig?: MetabaseClientConfig; + logger?: MetabaseClientLogger & MetabaseFetchLogger; } export function createLocalMetabaseSourceAdapter( @@ -65,9 +71,11 @@ export function createLocalMetabaseSourceAdapter( options.env, ), options.defaultClientConfig ?? DEFAULT_METABASE_CLIENT_CONFIG, + options.logger, ); return new MetabaseSourceAdapter({ clientFactory: new IngestMetabaseClientFactory(connectionFactory), sourceStateReader, + ...(options.logger ? { logger: options.logger } : {}), }); } diff --git a/packages/context/src/ingest/adapters/metabase/metabase.adapter.ts b/packages/context/src/ingest/adapters/metabase/metabase.adapter.ts index 1c0bb53b..cff59641 100644 --- a/packages/context/src/ingest/adapters/metabase/metabase.adapter.ts +++ b/packages/context/src/ingest/adapters/metabase/metabase.adapter.ts @@ -4,7 +4,7 @@ import type { ChunkResult, DiffSet, FetchContext, ScopeDescriptor, SourceAdapter import { chunkMetabaseStagedDir } from './chunk.js'; import type { MetabaseClientFactory } from './client-port.js'; import { detectMetabaseStagedDir } from './detect.js'; -import { fetchMetabaseBundle } from './fetch.js'; +import { fetchMetabaseBundle, type MetabaseFetchLogger } from './fetch.js'; import { computeFetchScope, hashScope, isPathInMetabaseScope } from './fetch-scope.js'; import type { MetabaseSourceStateReader } from './source-state-port.js'; import { STAGED_FILES, stagedSyncConfigSchema } from './types.js'; @@ -12,6 +12,7 @@ import { STAGED_FILES, stagedSyncConfigSchema } from './types.js'; export interface MetabaseSourceAdapterDeps { clientFactory: MetabaseClientFactory; sourceStateReader: MetabaseSourceStateReader; + logger?: MetabaseFetchLogger; } export class MetabaseSourceAdapter implements SourceAdapter { @@ -31,6 +32,7 @@ export class MetabaseSourceAdapter implements SourceAdapter { ctx, clientFactory: this.deps.clientFactory, sourceStateReader: this.deps.sourceStateReader, + ...(this.deps.logger ? { logger: this.deps.logger } : {}), }); } diff --git a/packages/context/src/ingest/adapters/notion/notion.adapter.ts b/packages/context/src/ingest/adapters/notion/notion.adapter.ts index 896ef69f..fba68cee 100644 --- a/packages/context/src/ingest/adapters/notion/notion.adapter.ts +++ b/packages/context/src/ingest/adapters/notion/notion.adapter.ts @@ -14,7 +14,7 @@ import type { import { chunkNotionStagedDir, describeNotionScope } from './chunk.js'; import { clusterNotionWorkUnits } from './cluster.js'; import { detectNotionStagedDir } from './detect.js'; -import { fetchNotionSnapshot } from './fetch.js'; +import { fetchNotionSnapshot, type NotionFetchLogger } from './fetch.js'; import { NotionClient } from './notion-client.js'; import { parseNotionPullConfig } from './pull-config.js'; import { type NotionMetadata, notionManifestSchema, notionMetadataSchema } from './types.js'; @@ -31,6 +31,7 @@ interface NotionPullSucceededContext { export interface NotionSourceAdapterDeps { onPullSucceeded?: (ctx: NotionPullSucceededContext) => Promise; + logger?: NotionFetchLogger; } export class NotionSourceAdapter implements SourceAdapter { @@ -48,7 +49,12 @@ export class NotionSourceAdapter implements SourceAdapter { async fetch(pullConfig: unknown, stagedDir: string, _ctx: FetchContext): Promise { const config = parseNotionPullConfig(pullConfig); - await fetchNotionSnapshot({ client: new NotionClient(config.authToken), config, stagedDir }); + await fetchNotionSnapshot({ + client: new NotionClient(config.authToken), + config, + stagedDir, + ...(this.deps.logger ? { logger: this.deps.logger } : {}), + }); } chunk(stagedDir: string, diffSet?: DiffSet): Promise { diff --git a/packages/context/src/ingest/local-adapters.ts b/packages/context/src/ingest/local-adapters.ts index 93d6b063..dc4f50a4 100644 --- a/packages/context/src/ingest/local-adapters.ts +++ b/packages/context/src/ingest/local-adapters.ts @@ -19,6 +19,7 @@ import { } from './adapters/live-database/daemon-introspection.js'; import { LiveDatabaseSourceAdapter } from './adapters/live-database/live-database.adapter.js'; import { createDaemonLookerTableIdentifierParser } from './adapters/looker/daemon-table-identifier-parser.js'; +import type { LookerClientLogger } from './adapters/looker/client.js'; import { DefaultLookerConnectionClientFactory } from './adapters/looker/factory.js'; import { createLocalLookerCredentialResolver } from './adapters/looker/local-looker.adapter.js'; import { LocalLookerRuntimeStore } from './adapters/looker/local-runtime-store.js'; @@ -32,9 +33,12 @@ import type { LookerRuntimeClient } from './adapters/looker/fetch.js'; import { LookmlSourceAdapter } from './adapters/lookml/lookml.adapter.js'; import { pullConfigFromIntegrationConfig } from './adapters/lookml/pull-config.js'; import { createLocalMetabaseSourceAdapter } from './adapters/metabase/local-metabase.adapter.js'; +import type { MetabaseClientLogger } from './adapters/metabase/client.js'; +import type { MetabaseFetchLogger } from './adapters/metabase/fetch.js'; import { MetricflowSourceAdapter } from './adapters/metricflow/metricflow.adapter.js'; import { pullConfigFromMetricflowIntegration } from './adapters/metricflow/pull-config.js'; import { NotionSourceAdapter } from './adapters/notion/notion.adapter.js'; +import type { NotionFetchLogger } from './adapters/notion/fetch.js'; import { seedLocalMappingStateFromKtxYaml } from './local-mapping-reconcile.js'; import type { SourceAdapter } from './types.js'; @@ -56,14 +60,23 @@ export interface DefaultLocalIngestAdaptersOptions { parser?: LookerTableIdentifierParser; env?: NodeJS.ProcessEnv; }; + logger?: LocalIngestOperationalLogger; } +type LocalIngestOperationalLogger = MetabaseClientLogger & + MetabaseFetchLogger & + LookerClientLogger & + NotionFetchLogger; + export function createDefaultLocalIngestAdapters( project: KtxLocalProject, options: DefaultLocalIngestAdaptersOptions = {}, ): SourceAdapter[] { const lookerConnectionFactory = new DefaultLookerConnectionClientFactory( createLocalLookerCredentialResolver(project, options.looker?.env), + { + ...(options.logger ? { logger: options.logger } : {}), + }, ); const adapters: SourceAdapter[] = [ @@ -77,7 +90,9 @@ export function createDefaultLocalIngestAdapters( }), new LookmlSourceAdapter({ homeDir: join(project.projectDir, '.ktx/cache') }), new DbtSourceAdapter({ homeDir: join(project.projectDir, '.ktx/cache') }), - createLocalMetabaseSourceAdapter(project), + createLocalMetabaseSourceAdapter(project, { + ...(options.logger ? { logger: options.logger } : {}), + }), new LookerSourceAdapter({ clientFactory: { async createClient(config, ctx) { @@ -89,7 +104,9 @@ export function createDefaultLocalIngestAdapters( }, }), new MetricflowSourceAdapter({ homeDir: join(project.projectDir, '.ktx/cache') }), - new NotionSourceAdapter(), + new NotionSourceAdapter({ + ...(options.logger ? { logger: options.logger } : {}), + }), ]; if (options.historicSql) { From 9409d50d1d16d8cec19b6f3e7b86bc493cfddf87 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 11:29:34 +0200 Subject: [PATCH 19/33] fix(cli): keep ingest progress off stdout --- packages/cli/src/ingest-viz.test.ts | 6 +- packages/cli/src/ingest.test-utils.ts | 2 +- packages/cli/src/ingest.test.ts | 122 +++++++++++++++++++++----- packages/cli/src/ingest.ts | 14 +-- 4 files changed, 111 insertions(+), 33 deletions(-) diff --git a/packages/cli/src/ingest-viz.test.ts b/packages/cli/src/ingest-viz.test.ts index 1347b3a8..6963d277 100644 --- a/packages/cli/src/ingest-viz.test.ts +++ b/packages/cli/src/ingest-viz.test.ts @@ -331,8 +331,9 @@ describe('runKtxIngest viz and replay', () => { ).resolves.toBe(0); expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.anything() })); - expect(io.stdout()).toContain('[5%] Fetching source files for warehouse/fake'); + expect(io.stderr()).toContain('[5%] Fetching source files for warehouse/fake'); expect(io.stdout()).toContain('Job: plain-run'); + expect(io.stdout()).not.toContain('[5%]'); expect(io.stdout()).not.toContain('KTX memory flow'); }); @@ -407,8 +408,9 @@ describe('runKtxIngest viz and replay', () => { expect(startLiveMemoryFlow).not.toHaveBeenCalled(); expect(runLocal).toHaveBeenCalledWith(expect.objectContaining({ memoryFlow: expect.anything() })); - expect(io.stdout()).toContain('[5%] Fetching source files for warehouse/fake'); + expect(io.stderr()).toContain('[5%] Fetching source files for warehouse/fake'); expect(io.stdout()).toContain('Job: raw-missing-viz-run'); + expect(io.stdout()).not.toContain('[5%]'); expect(io.stdout()).not.toContain('KTX memory flow'); expect(io.stderr()).toContain( 'Visualization requested but stdin raw mode is unavailable; printing plain output.', diff --git a/packages/cli/src/ingest.test-utils.ts b/packages/cli/src/ingest.test-utils.ts index a83b38be..71d85c6c 100644 --- a/packages/cli/src/ingest.test-utils.ts +++ b/packages/cli/src/ingest.test-utils.ts @@ -546,7 +546,7 @@ export async function runPublicMetabaseSyncModeCase(tempDir: string, input: Sync ), ).resolves.toBe(0); - expect(io.stderr()).toBe(''); + expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); expect(io.stdout()).toContain(`target=warehouse_a database=1 status=done job=${jobId}`); diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 8f7bf5b8..2573262a 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -230,7 +230,7 @@ describe('runKtxIngest', () => { expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); expect(io.stdout()).toContain('warehouse_a'); expect(io.stdout()).toContain('metabase-child-1'); - expect(io.stderr()).toBe(''); + expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); }); it('returns a non-zero code when Metabase fan-out has failed children', async () => { @@ -300,7 +300,7 @@ describe('runKtxIngest', () => { expect(io.stdout()).toContain('Metabase fan-out: partial_failure'); expect(io.stdout()).toContain('Failed work units: 1'); expect(io.stdout()).toContain('status=error'); - expect(io.stderr()).toBe(''); + expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); }); it('prints Metabase fan-out progress before the final summary', async () => { @@ -374,12 +374,56 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - expect(io.stdout()).toContain('Metabase ingest: prod-metabase'); - expect(io.stdout()).toContain('Targets: 1 mapped database'); - expect(io.stdout()).toContain('- database=1 target=warehouse_a status=running job=metabase-child-1'); - expect(io.stdout()).toContain('- database=1 target=warehouse_a status=done job=metabase-child-1'); + expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); + expect(io.stderr()).toContain('Targets: 1 mapped database'); + expect(io.stderr()).toContain('- database=1 target=warehouse_a status=running job=metabase-child-1'); + expect(io.stderr()).toContain('- database=1 target=warehouse_a status=done job=metabase-child-1'); expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); - expect(io.stderr()).toBe(''); + expect(io.stdout()).not.toContain('status=running job=metabase-child-1'); + }); + + it('writes metabase fan-out progress to stderr and final result to stdout', async () => { + const projectDir = join(tempDir, 'project'); + await writeMetabaseConfig(projectDir); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'prod-metabase', + adapter: 'metabase', + outputMode: 'plain', + }, + io.io, + { + runLocalMetabaseIngest: async (input) => { + input.progress?.onMetabaseFanoutPlanned({ + metabaseConnectionId: 'prod-metabase', + children: [{ metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a' }], + }); + input.progress?.onMetabaseChildStarted({ + metabaseConnectionId: 'prod-metabase', + metabaseDatabaseId: 1, + targetConnectionId: 'warehouse_a', + jobId: 'metabase-child-1', + }); + return { + metabaseConnectionId: 'prod-metabase', + status: 'all_succeeded', + totals: { workUnits: 0, failedWorkUnits: 0 }, + children: [], + }; + }, + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); + expect(io.stderr()).toContain('status=running job=metabase-child-1'); + expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); + expect(io.stdout()).not.toContain('status=running job=metabase-child-1'); }); it('runs Metabase scheduled ingest through the public CLI command path with real fan-out', async () => { @@ -464,7 +508,8 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - expect(io.stderr()).toBe(''); + expect(io.stderr()).toContain('Metabase ingest: prod-metabase'); + expect(io.stderr()).toContain('Targets: 2 mapped databases'); expect(io.stdout()).toContain('Metabase fan-out: all_succeeded'); expect(io.stdout()).toContain('Source: prod-metabase'); expect(io.stdout()).toContain('Children: 2'); @@ -1031,16 +1076,46 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - const stdout = io.stdout(); - expect(stdout).toContain('[5%] Fetching source files for warehouse/historic-sql'); - expect(stdout).toContain('[15%] Fetched 3 source files from historic-sql'); - expect(stdout).toContain('[45%] Planned 1 work unit'); - expect(stdout).toContain('[80%] Processed 1/1 work units'); - expect(stdout).toContain('[100%] Ingest completed'); - expect(stdout.indexOf('[5%] Fetching source files for warehouse/historic-sql')).toBeLessThan( - stdout.indexOf('Report: report-live-1'), - ); - expect(io.stderr()).toBe(''); + const stderr = io.stderr(); + expect(stderr).toContain('[5%] Fetching source files for warehouse/historic-sql'); + expect(stderr).toContain('[15%] Fetched 3 source files from historic-sql'); + expect(stderr).toContain('[45%] Planned 1 work unit'); + expect(stderr).toContain('[80%] Processed 1/1 work units'); + expect(stderr).toContain('[100%] Ingest completed'); + expect(io.stdout()).toContain('Report: report-live-1'); + expect(io.stdout()).not.toContain('[5%]'); + }); + + it('writes plain TTY ingest progress to stderr and final report to stdout', async () => { + const projectDir = join(tempDir, 'project'); + await writeWarehouseConfig(projectDir); + const sourceDir = join(tempDir, 'source'); + await mkdir(join(sourceDir, 'orders'), { recursive: true }); + await writeFile(join(sourceDir, 'orders', 'orders.json'), '{"name":"orders"}\n', 'utf-8'); + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => completedLocalBundleRun(input, 'local-job-1')); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'fake', + sourceDir, + outputMode: 'plain', + }, + io.io, + { + env: interactiveEnv(), + runLocalIngest: runLocal, + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toContain('[5%] Fetching source files for warehouse/fake'); + expect(io.stdout()).toContain('Report: report-live-1'); + expect(io.stdout()).not.toContain('[5%]'); }); it('prints plain WorkUnit step progress during long-running local ingest', async () => { @@ -1127,11 +1202,12 @@ describe('runKtxIngest', () => { ), ).resolves.toBe(0); - const stdout = io.stdout(); - expect(stdout).toContain('[45%] Planned 2 work units'); - expect(stdout).toContain('[55%] Processing 1/2 work units: historic-sql-table-public-orders'); - expect(stdout).toContain('[58%] Processing 1/2 work units: historic-sql-table-public-orders step 7/40'); - expect(stdout).toContain('[68%] Processed 1/2 work units'); + const stderr = io.stderr(); + expect(stderr).toContain('[45%] Planned 2 work units'); + expect(stderr).toContain('[55%] Processing 1/2 work units: historic-sql-table-public-orders'); + expect(stderr).toContain('[58%] Processing 1/2 work units: historic-sql-table-public-orders step 7/40'); + expect(stderr).toContain('[68%] Processed 1/2 work units'); + expect(io.stdout()).not.toContain('[45%]'); }); it('passes local Looker pull-config options and agent runner into scheduled ingest for Looker scheduled ingest', async () => { diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index 516d4dd6..36b45a6a 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -143,22 +143,22 @@ function createMetabaseFanoutProgress( connectionId: string, io: KtxIngestIo, ): LocalMetabaseFanoutProgress { - io.stdout.write(`Metabase ingest: ${connectionId}\n`); - io.stdout.write('Checking mappings and scheduled-pull targets...\n'); + io.stderr.write(`Metabase ingest: ${connectionId}\n`); + io.stderr.write('Checking mappings and scheduled-pull targets...\n'); return { onMetabaseFanoutPlanned(event) { - io.stdout.write(`Targets: ${pluralize(event.children.length, 'mapped database')}\n`); + io.stderr.write(`Targets: ${pluralize(event.children.length, 'mapped database')}\n`); for (const child of event.children) { - io.stdout.write(`- database=${child.metabaseDatabaseId} target=${child.targetConnectionId} status=queued\n`); + io.stderr.write(`- database=${child.metabaseDatabaseId} target=${child.targetConnectionId} status=queued\n`); } }, onMetabaseChildStarted(event) { - io.stdout.write( + io.stderr.write( `- database=${event.metabaseDatabaseId} target=${event.targetConnectionId} status=running job=${event.jobId}\n`, ); }, onMetabaseChildCompleted(event) { - io.stdout.write( + io.stderr.write( `- database=${event.metabaseDatabaseId} target=${event.targetConnectionId} status=${event.status} job=${event.jobId}\n`, ); }, @@ -290,7 +290,7 @@ function createPlainIngestProgressRenderer( const write = (percent: number, message: string) => { const nextPercent = Math.max(lastPercent, Math.max(0, Math.min(100, percent))); lastPercent = nextPercent; - io.stdout.write(`[${nextPercent}%] ${message}\n`); + io.stderr.write(`[${nextPercent}%] ${message}\n`); }; return { From 42365481ac861ff3108859f5dbe174d9ac02ab52 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 11:31:43 +0200 Subject: [PATCH 20/33] refactor(cli): centralize Clack prompt handling --- packages/cli/src/clack.ts | 58 ++++++++++++++++++- .../cli/src/managed-python-command.test.ts | 42 ++++++++++++++ packages/cli/src/managed-python-command.ts | 18 +++--- 3 files changed, 106 insertions(+), 12 deletions(-) diff --git a/packages/cli/src/clack.ts b/packages/cli/src/clack.ts index e7083df9..fc24f1e7 100644 --- a/packages/cli/src/clack.ts +++ b/packages/cli/src/clack.ts @@ -1,4 +1,4 @@ -import { spinner } from '@clack/prompts'; +import { cancel, confirm, isCancel, log, spinner } from '@clack/prompts'; export interface KtxCliSpinner { start(message: string): void; @@ -6,6 +6,62 @@ export interface KtxCliSpinner { error(message: string): void; } +export interface KtxCliPromptAdapter { + confirm(options: { message: string; initialValue?: boolean }): Promise; + cancel(message: string): void; + log: { + info(message: string): void; + warn(message: string): void; + error(message: string): void; + success(message: string): void; + step(message: string): void; + }; + spinner(): KtxCliSpinner; +} + +export class KtxCliPromptCancelledError extends Error { + constructor(message = 'Operation cancelled.') { + super(message); + this.name = 'KtxCliPromptCancelledError'; + } +} + export function createClackSpinner(): KtxCliSpinner { return spinner(); } + +export function createClackPromptAdapter(): KtxCliPromptAdapter { + return { + async confirm(options) { + const value = await confirm(options); + if (isCancel(value)) { + cancel('Operation cancelled.'); + throw new KtxCliPromptCancelledError(); + } + return value; + }, + cancel(message) { + cancel(message); + }, + log: { + info(message) { + log.info(message); + }, + warn(message) { + log.warn(message); + }, + error(message) { + log.error(message); + }, + success(message) { + log.success(message); + }, + step(message) { + log.step(message); + }, + }, + spinner() { + return createClackSpinner(); + }, + }; +} diff --git a/packages/cli/src/managed-python-command.test.ts b/packages/cli/src/managed-python-command.test.ts index d081c320..3dbf315a 100644 --- a/packages/cli/src/managed-python-command.test.ts +++ b/packages/cli/src/managed-python-command.test.ts @@ -214,6 +214,7 @@ describe('createManagedPythonSemanticLayerComputePort', () => { expect(confirmInstall).toHaveBeenCalledWith( 'KTX needs to install the core Python runtime. This downloads Python dependencies with uv. Continue?', + io.io, ); expect(installRuntime).toHaveBeenCalledWith({ cliVersion: '0.2.0', @@ -221,4 +222,45 @@ describe('createManagedPythonSemanticLayerComputePort', () => { force: false, }); }); + + it('uses injected runtime confirmation instead of reading process TTY directly', async () => { + const io = makeIo(); + const compute = { query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() }; + const installRuntime = vi.fn(async (): Promise => installResult()); + const confirmInstall = vi.fn(async () => true); + + await expect( + createManagedPythonSemanticLayerComputePort({ + cliVersion: '0.2.0', + installPolicy: 'prompt', + io: io.io, + readStatus: async () => missingStatus(), + installRuntime, + confirmInstall, + createPythonCompute: () => compute, + }), + ).resolves.toBe(compute); + + expect(confirmInstall).toHaveBeenCalledWith( + 'KTX needs to install the core Python runtime. This downloads Python dependencies with uv. Continue?', + io.io, + ); + expect(io.stderr()).toContain('Installing KTX Python runtime (core) with uv...'); + }); + + it('can decide default runtime prompting from injected io capabilities', async () => { + const io = makeIo(); + Object.assign(io.io.stdout, { isTTY: false }); + + await expect( + createManagedPythonSemanticLayerComputePort({ + cliVersion: '0.2.0', + installPolicy: 'prompt', + io: io.io, + readStatus: async () => missingStatus(), + installRuntime: vi.fn(), + createPythonCompute: () => ({ query: vi.fn(), validateSources: vi.fn(), generateSources: vi.fn() }), + }), + ).rejects.toThrow('KTX Python runtime installation was cancelled'); + }); }); diff --git a/packages/cli/src/managed-python-command.ts b/packages/cli/src/managed-python-command.ts index 0a8a193c..ce7afe7b 100644 --- a/packages/cli/src/managed-python-command.ts +++ b/packages/cli/src/managed-python-command.ts @@ -1,6 +1,6 @@ -import { cancel, confirm, isCancel } from '@clack/prompts'; import { createPythonSemanticLayerComputePort, type KtxSemanticLayerComputePort } from '@ktx/context/daemon'; import type { KtxCliIo } from './cli-runtime.js'; +import { createClackPromptAdapter } from './clack.js'; import { installManagedPythonRuntime, readManagedPythonRuntimeStatus, @@ -36,7 +36,7 @@ export interface ManagedPythonCommandRuntime { export interface ManagedPythonCommandDeps { readStatus?: (options: ManagedPythonRuntimeLayoutOptions) => Promise; installRuntime?: (options: ManagedPythonRuntimeInstallOptions) => Promise; - confirmInstall?: (message: string) => Promise; + confirmInstall?: (message: string, io: KtxCliIo) => Promise; } export interface ManagedPythonCommandOptions extends ManagedPythonCommandDeps { @@ -69,16 +69,12 @@ function hasFeature(manifest: InstalledKtxRuntimeManifest, feature: KtxRuntimeFe return manifest.features.includes(feature); } -async function defaultConfirmInstall(message: string): Promise { - if (process.stdin.isTTY !== true || process.stdout.isTTY !== true) { +async function defaultConfirmInstall(message: string, io: KtxCliIo): Promise { + if (io.stdout.isTTY !== true) { return false; } - const response = await confirm({ message, initialValue: true }); - if (isCancel(response)) { - cancel('Runtime installation cancelled.'); - return false; - } - return response === true; + const prompts = createClackPromptAdapter(); + return await prompts.confirm({ message, initialValue: true }); } export async function ensureManagedPythonCommandRuntime( @@ -99,7 +95,7 @@ export async function ensureManagedPythonCommandRuntime( if (options.installPolicy === 'prompt') { const confirmInstall = options.confirmInstall ?? defaultConfirmInstall; - const confirmed = await confirmInstall(installPrompt(feature)); + const confirmed = await confirmInstall(installPrompt(feature), options.io); if (!confirmed) { throw new Error(`KTX Python runtime installation was cancelled. Run: ${managedRuntimeInstallCommand(feature)}`); } From b95cb9b3347ed378741e25b48ac98f9ad9e6ff90 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 11:32:49 +0200 Subject: [PATCH 21/33] feat(cli): formalize dev-friendly result output --- packages/cli/src/io/print-list.ts | 15 ++++++++++++--- packages/cli/src/sl.test.ts | 12 ++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/packages/cli/src/io/print-list.ts b/packages/cli/src/io/print-list.ts index b66fa7ad..d2129d7d 100644 --- a/packages/cli/src/io/print-list.ts +++ b/packages/cli/src/io/print-list.ts @@ -28,6 +28,16 @@ export interface PrintListArgs { io: KtxCliIo; } +export interface KtxJsonResultEnvelope { + kind: string; + data: T; + meta?: Record; +} + +export function writeJsonResult(io: KtxCliIo, envelope: KtxJsonResultEnvelope): void { + io.stdout.write(`${JSON.stringify(envelope, null, 2)}\n`); +} + export function printList(args: PrintListArgs): void { switch (args.mode) { case 'json': @@ -61,12 +71,11 @@ function printListPlain(args: PrintListArgs): void { } function printListJson(args: PrintListArgs): void { - const envelope = { + writeJsonResult(args.io, { kind: 'list', data: { items: args.rows }, meta: { command: args.command }, - }; - args.io.stdout.write(`${JSON.stringify(envelope, null, 2)}\n`); + }); } function pluralize(count: number, singular: string): string { diff --git a/packages/cli/src/sl.test.ts b/packages/cli/src/sl.test.ts index bd746b0b..8752d0ec 100644 --- a/packages/cli/src/sl.test.ts +++ b/packages/cli/src/sl.test.ts @@ -398,10 +398,18 @@ joins: [] listIo.io, ); expect(code).toBe(0); + expect(listIo.stderr()).toBe(''); const parsed = JSON.parse(listIo.stdout()); - expect(parsed.kind).toBe('list'); - expect(parsed.meta).toEqual({ command: 'sl list' }); + expect(parsed).toMatchObject({ + kind: 'list', + data: { + items: expect.any(Array), + }, + meta: { + command: 'sl list', + }, + }); expect(parsed.data.items).toHaveLength(1); expect(parsed.data.items[0]).toMatchObject({ connectionId: 'warehouse', From c35297b80ad3f889481b0b8a7f3f3831feca8c21 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 11:36:15 +0200 Subject: [PATCH 22/33] test(cli): cover output channel invariants --- packages/cli/src/index.test.ts | 18 ++++++++++++++++++ packages/cli/src/ingest.test.ts | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index a575eeed..8ee74016 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -2,6 +2,7 @@ import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { createRequire } from 'node:module'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; +import { initKtxProject } from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { @@ -310,6 +311,23 @@ describe('runKtxCli', () => { expect(testIo.stderr()).toBe(''); }); + it('keeps representative JSON command stdout parseable', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir, projectName: 'warehouse' }); + const commands = [ + ['--project-dir', projectDir, 'setup', 'status', '--json'], + ['--project-dir', projectDir, 'sl', 'list', '--json'], + ]; + + for (const argv of commands) { + const testIo = makeIo(); + await expect(runKtxCli(argv, testIo.io)).resolves.toBe(0); + + expect(() => JSON.parse(testIo.stdout())).not.toThrow(); + expect(testIo.stderr()).toBe(''); + } + }); + it('starts setup for bare ktx in a TTY when no project is discoverable', async () => { const { mkdtemp, realpath, rm } = await import('node:fs/promises'); const { tmpdir } = await import('node:os'); diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 2573262a..bf27be15 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -399,11 +399,11 @@ describe('runKtxIngest', () => { io.io, { runLocalMetabaseIngest: async (input) => { - input.progress?.onMetabaseFanoutPlanned({ + input.progress?.onMetabaseFanoutPlanned?.({ metabaseConnectionId: 'prod-metabase', children: [{ metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a' }], }); - input.progress?.onMetabaseChildStarted({ + input.progress?.onMetabaseChildStarted?.({ metabaseConnectionId: 'prod-metabase', metabaseDatabaseId: 1, targetConnectionId: 'warehouse_a', From 4c93a6e983c9e5241202f1c9adc568243d7900ef Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 12:02:26 +0200 Subject: [PATCH 23/33] fix(ci): update stale KTX test expectations (#32) Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> --- packages/cli/src/demo.test.ts | 10 +- packages/cli/src/standalone-smoke.test.ts | 16 +-- scripts/examples-docs.test.mjs | 129 ++++++++++++---------- 3 files changed, 86 insertions(+), 69 deletions(-) diff --git a/packages/cli/src/demo.test.ts b/packages/cli/src/demo.test.ts index 0cedba99..5c9fa7ac 100644 --- a/packages/cli/src/demo.test.ts +++ b/packages/cli/src/demo.test.ts @@ -336,8 +336,8 @@ describe('runKtxDemo', () => { notion: { pageCount: 8 }, }, generatedOutputs: { - semanticLayer: { manifestSourceCount: 6, fileCount: 6 }, - knowledge: { manifestPageCount: 10, fileCount: 10 }, + semanticLayer: { manifestSourceCount: 46, fileCount: 46 }, + knowledge: { manifestPageCount: 28, fileCount: 28 }, links: { manifestLinkCount: 23, linkCount: 23 }, reports: { primaryPath: 'reports/seeded-demo-report.json', fileCount: 1 }, }, @@ -636,10 +636,10 @@ describe('runKtxDemo', () => { ).resolves.toBe(0); expect(seededIo.stdout()).toContain('Status: ready'); - expect(seededIo.stdout()).toContain('Semantic-layer sources: 6 manifest, 6 files'); - expect(seededIo.stdout()).toContain('Knowledge pages: 10 manifest, 10 files'); + expect(seededIo.stdout()).toContain('Semantic-layer sources: 46 manifest, 46 files'); + expect(seededIo.stdout()).toContain('Knowledge pages: 28 manifest, 28 files'); expect(seededIo.stdout()).not.toContain('Status: corrupt'); - expect(seededIo.stdout()).not.toContain('Semantic-layer sources: 6 manifest, 0 files'); + expect(seededIo.stdout()).not.toContain('Semantic-layer sources: 46 manifest, 0 files'); }); it('fails corrupted demo projects in no-input mode with reset guidance', async () => { diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts index 27f34b92..a7b6c049 100644 --- a/packages/cli/src/standalone-smoke.test.ts +++ b/packages/cli/src/standalone-smoke.test.ts @@ -370,7 +370,7 @@ describe('standalone built ktx CLI smoke', () => { totalFound: number; }>(await client.callTool({ name: 'knowledge_search', arguments: { query: 'ARR contract', limit: 5 } })); expect(knowledgeSearch.totalFound).toBeGreaterThan(0); - expect(knowledgeSearch.results.map((result) => result.key)).toContain('arr-contract-first'); + expect(knowledgeSearch.results.map((result) => result.key)).toContain('orbit-arr-contract-first-definition'); const knowledgeRead = structuredContent<{ key: string; @@ -378,26 +378,26 @@ describe('standalone built ktx CLI smoke', () => { content: string; tags: string[]; slRefs: string[]; - }>(await client.callTool({ name: 'knowledge_read', arguments: { key: 'arr-contract-first' } })); - expect(knowledgeRead.key).toBe('arr-contract-first'); + }>(await client.callTool({ name: 'knowledge_read', arguments: { key: 'orbit-arr-contract-first-definition' } })); + expect(knowledgeRead.key).toBe('orbit-arr-contract-first-definition'); expect(knowledgeRead.summary).toContain('ARR'); expect(knowledgeRead.content).toContain('contract'); - expect(knowledgeRead.slRefs).toContain('orbit_demo.contracts'); + expect(knowledgeRead.slRefs).toContain('mart_arr_daily'); const slRead = structuredContent<{ sourceName: string; yaml: string }>( await client.callTool({ name: 'sl_read_source', - arguments: { connectionId: 'orbit_demo', sourceName: 'accounts' }, + arguments: { connectionId: 'postgres-warehouse', sourceName: 'mart_arr_daily' }, }), ); - expect(slRead.sourceName).toBe('accounts'); - expect(slRead.yaml).toContain('name: accounts'); + expect(slRead.sourceName).toBe('mart_arr_daily'); + expect(slRead.yaml).toContain('name: mart_arr_daily'); expect(slRead.yaml).toContain('measures:'); const slValidate = structuredContent<{ success: boolean; errors: string[]; warnings: string[] }>( await client.callTool({ name: 'sl_validate', - arguments: { connectionId: 'orbit_demo', names: ['accounts', 'contracts'] }, + arguments: { connectionId: 'postgres-warehouse', names: ['mart_arr_daily'] }, }), ); expect(slValidate.success).toBe(true); diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index 24c83452..81c42b9c 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -135,72 +135,86 @@ describe('standalone example docs', () => { assert.doesNotMatch(readme, /--historic-sql-min-calls/); }); - it('lists every published TypeScript package in the package root README', async () => { - const rootReadme = await readText('README.md'); + it('lists every workspace package in the contributor docs', async () => { + const contributing = await readText('docs-site/content/docs/community/contributing.mdx'); - assert.match(rootReadme, /`packages\/context`/); - assert.match(rootReadme, /`packages\/cli`/); - assert.match(rootReadme, /`packages\/connector-bigquery`/); - assert.match(rootReadme, /`packages\/connector-clickhouse`/); - assert.match(rootReadme, /`packages\/connector-mysql`/); - assert.match(rootReadme, /`packages\/connector-postgres`/); - assert.match(rootReadme, /`packages\/connector-snowflake`/); - assert.match(rootReadme, /`packages\/connector-sqlite`/); - assert.match(rootReadme, /`packages\/connector-sqlserver`/); - assert.match(rootReadme, /`python\/ktx-sl`/); - assert.match(rootReadme, /`python\/ktx-daemon`/); + assert.match(contributing, /cli\/\s+# CLI entry point/); + assert.match(contributing, /context\/\s+# Core context engine/); + assert.match(contributing, /llm\/\s+# LLM client abstraction/); + assert.match(contributing, /connector-bigquery\/\s+# BigQuery connector/); + assert.match(contributing, /connector-clickhouse\/\s+# ClickHouse connector/); + assert.match(contributing, /connector-mysql\/\s+# MySQL connector/); + assert.match(contributing, /connector-postgres\/\s+# PostgreSQL connector/); + assert.match(contributing, /connector-snowflake\/\s+# Snowflake connector/); + assert.match(contributing, /connector-sqlite\/\s+# SQLite connector/); + assert.match(contributing, /connector-sqlserver\/\s+# SQL Server connector/); + assert.match(contributing, /ktx-sl\/\s+# Semantic layer/); + assert.match(contributing, /ktx-daemon\/\s+# Daemon/); }); it('documents every standalone MCP tool that the CLI server exposes', async () => { - const rootReadme = await readText('README.md'); + const servingAgents = await readText('docs-site/content/docs/guides/serving-agents.mdx'); - assert.match(rootReadme, /`connection_list`/); - assert.match(rootReadme, /`knowledge_search`/); - assert.match(rootReadme, /`knowledge_read`/); - assert.match(rootReadme, /`knowledge_write`/); - assert.match(rootReadme, /`sl_list_sources`/); - assert.match(rootReadme, /`sl_read_source`/); - assert.match(rootReadme, /`sl_write_source`/); - assert.match(rootReadme, /`sl_validate`/); - assert.match(rootReadme, /`sl_query`/); - assert.match(rootReadme, /`ingest_trigger`/); - assert.match(rootReadme, /`ingest_status`/); - assert.match(rootReadme, /`ingest_report`/); - assert.match(rootReadme, /`ingest_replay`/); + for (const tool of [ + 'connection_list', + 'connection_test', + 'knowledge_search', + 'knowledge_read', + 'knowledge_write', + 'sl_list_sources', + 'sl_read_source', + 'sl_write_source', + 'sl_validate', + 'sl_query', + 'scan_trigger', + 'scan_status', + 'scan_report', + 'scan_list_artifacts', + 'scan_read_artifact', + 'ingest_trigger', + 'ingest_status', + 'ingest_report', + 'ingest_replay', + 'memory_capture', + 'memory_capture_status', + ]) { + assert.match(servingAgents, new RegExp(`\`${tool}\``)); + } }); - it('walks through ktx connection list and ktx connection test in the README quickstart', async () => { - const rootReadme = await readText('README.md'); + it('walks through connection testing in the quickstart and CLI reference', async () => { + const quickstart = await readText('docs-site/content/docs/getting-started/quickstart.mdx'); + const connectionReference = await readText('docs-site/content/docs/cli-reference/ktx-connection.mdx'); - assert.match(rootReadme, /connection list --project-dir/); - assert.match(rootReadme, /connection test warehouse --project-dir/); - assert.match(rootReadme, /Driver: sqlite/); - assert.match(rootReadme, /Tables: 1/); + assert.match(connectionReference, /ktx connection list/); + assert.match(connectionReference, /ktx connection test my-warehouse/); + assert.match(quickstart, /Connection test passed/); + assert.match(quickstart, /Driver: PostgreSQL .* Tables: 42/); }); - it('documents public npm and managed runtime usage in the README', async () => { + it('documents public npm and managed runtime usage', async () => { const rootReadme = await readText('README.md'); + const quickstart = await readText('docs-site/content/docs/getting-started/quickstart.mdx'); + const packageArtifacts = await readText('examples/package-artifacts/README.md'); - assert.match(rootReadme, publicPackagePattern('npx {package} setup demo --no-input')); - assert.match(rootReadme, publicPackagePattern('npx {package} sl query')); - assert.match(rootReadme, publicPackagePattern('npm install {package}')); assert.match(rootReadme, publicPackagePattern('npm install -g {package}')); - assert.match(rootReadme, /ktx runtime install/); - assert.match(rootReadme, /ktx runtime status/); - assert.match(rootReadme, /ktx runtime doctor/); - assert.match(rootReadme, /ktx runtime start/); - assert.match(rootReadme, /ktx runtime stop/); - assert.match(rootReadme, /ktx runtime prune --dry-run/); - assert.match(rootReadme, /ktx runtime prune --yes/); - assert.match(rootReadme, /KTX requires `uv` on `PATH`/); - assert.match(rootReadme, /KTX doesn't download `uv` automatically/); + assert.match(quickstart, publicPackagePattern('npm install -g {package}')); + assert.match(quickstart, /ktx runtime install --feature local-embeddings --yes/); + assert.match(quickstart, /ktx runtime start --feature local-embeddings/); + assert.match(quickstart, /Install `uv`, run `ktx runtime doctor`/); + assert.match(packageArtifacts, /requires `uv` on `PATH`/); + assert.match(packageArtifacts, /ktx runtime status/); + assert.match(packageArtifacts, /ktx runtime doctor/); + assert.match(packageArtifacts, /ktx runtime prune --dry-run/); + assert.match(packageArtifacts, /ktx runtime prune --yes/); assert.match( - rootReadme, - runtimeWheelPackagePattern( - 'release\\s+artifact manifest contains the public npm tarball and the\\s+bundled `{package}`\\s+runtime wheel', + packageArtifacts, + new RegExp( + `artifact manifest contains the public \`${escapeRegExp(publicNpmPackageName())}\` npm tarball and the\\s+bundled \`${escapeRegExp( + runtimeWheelPackageName(), + )}\` runtime wheel`, ), ); - assert.match(rootReadme, /source packages for\s+development, not public release artifacts/); assert.match(rootReadme, /ktx serve --mcp stdio/); assert.doesNotMatch(rootReadme, /uv run ktx-daemon serve-http/); assert.doesNotMatch(rootReadme, /--semantic-compute-url http:\/\/127\.0\.0\.1:8765/); @@ -232,14 +246,17 @@ describe('standalone example docs', () => { assert.doesNotMatch(readme, /python -m ktx_daemon semantic-validate/); }); - it('replaces the fake-ingest smoke with a ktx scan walkthrough in the README', async () => { + it('documents scan workflows in the docs site', async () => { const rootReadme = await readText('README.md'); + const buildingContext = await readText('docs-site/content/docs/guides/building-context.mdx'); + const scanReference = await readText('docs-site/content/docs/cli-reference/ktx-scan.mdx'); - assert.match(rootReadme, /### Scan the demo warehouse/); - assert.match(rootReadme, /scan warehouse --project-dir/); - assert.match(rootReadme, /scan status --project-dir/); - assert.match(rootReadme, /scan report --project-dir/); - assert.match(rootReadme, /raw-sources\/warehouse\/live-database/); + assert.match(buildingContext, /ktx dev scan /); + assert.match(buildingContext, /ktx dev scan status /); + assert.match(buildingContext, /ktx dev scan report /); + assert.match(scanReference, /ktx dev scan \[options\]/); + assert.match(rootReadme, /raw-sources\//); + assert.match(rootReadme, /live-database\//); assert.doesNotMatch(rootReadme, /Run a local ingest smoke test/); assert.doesNotMatch(rootReadme, /ktx dev ingest run --project-dir/); assert.doesNotMatch(rootReadme, /ktx ingest status --project-dir/); From d830e8c46e33ce7b0b2c888d12039a499ef07333 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 12:24:25 +0200 Subject: [PATCH 24/33] docs: standardize env variable examples --- .../content/docs/integrations/primary-sources.mdx | 2 +- packages/cli/src/connection.test.ts | 6 +++--- packages/cli/src/index.test.ts | 4 ++-- packages/cli/src/standalone-smoke.test.ts | 6 +++--- .../context/src/connections/notion-config.test.ts | 14 +++++++------- .../context/src/ingest/local-stage-ingest.test.ts | 10 +++++----- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/docs-site/content/docs/integrations/primary-sources.mdx b/docs-site/content/docs/integrations/primary-sources.mdx index c36260d1..dcfd143f 100644 --- a/docs-site/content/docs/integrations/primary-sources.mdx +++ b/docs-site/content/docs/integrations/primary-sources.mdx @@ -213,7 +213,7 @@ For multiple datasets: | Method | Config | |--------|--------| | Service account JSON | `credentials_json: file:/path/to/key.json` | -| Environment variable | `credentials_json: env:GCP_CREDENTIALS_JSON` | +| Environment variable | `credentials_json: env:BIGQUERY_CREDENTIALS_JSON` | The project ID is extracted automatically from the service account JSON file. diff --git a/packages/cli/src/connection.test.ts b/packages/cli/src/connection.test.ts index ae593805..04c73cf1 100644 --- a/packages/cli/src/connection.test.ts +++ b/packages/cli/src/connection.test.ts @@ -477,7 +477,7 @@ describe('runKtxConnection', () => { force: false, allowLiteralCredentials: false, notion: { - authTokenRef: 'env:NOTION_AUTH_TOKEN', + authTokenRef: 'env:NOTION_TOKEN', crawlMode: 'all_accessible', rootPageIds: [], rootDatabaseIds: [], @@ -493,7 +493,7 @@ describe('runKtxConnection', () => { const yaml = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); expect(yaml).toContain('driver: notion'); - expect(yaml).toContain('auth_token_ref: env:NOTION_AUTH_TOKEN'); + expect(yaml).toContain('auth_token_ref: env:NOTION_TOKEN'); expect(yaml).toContain('crawl_mode: all_accessible'); expect(yaml).toContain('max_pages_per_run: 50'); expect(yaml).not.toContain('ntn_'); @@ -516,7 +516,7 @@ describe('runKtxConnection', () => { force: false, allowLiteralCredentials: false, notion: { - authTokenRef: 'env:NOTION_AUTH_TOKEN', + authTokenRef: 'env:NOTION_TOKEN', crawlMode: 'all_accessible', rootPageIds: [], rootDatabaseIds: ['database-1'], diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index a575eeed..8bc2a3a6 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -1964,7 +1964,7 @@ describe('runKtxCli', () => { '--project-dir', tempDir, '--token-env', - 'NOTION_AUTH_TOKEN', + 'NOTION_TOKEN', '--crawl-mode', 'selected_roots', '--root-page-id', @@ -1991,7 +1991,7 @@ describe('runKtxCli', () => { force: false, allowLiteralCredentials: false, notion: { - authTokenRef: 'env:NOTION_AUTH_TOKEN', + authTokenRef: 'env:NOTION_TOKEN', crawlMode: 'selected_roots', rootPageIds: ['page-1'], rootDatabaseIds: ['database-1'], diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts index 27f34b92..b1b30534 100644 --- a/packages/cli/src/standalone-smoke.test.ts +++ b/packages/cli/src/standalone-smoke.test.ts @@ -716,7 +716,7 @@ describe('standalone built ktx CLI smoke', () => { '--project-dir', projectDir, '--token-env', - 'NOTION_AUTH_TOKEN', + 'NOTION_TOKEN', '--crawl-mode', 'all_accessible', '--max-pages', @@ -729,7 +729,7 @@ describe('standalone built ktx CLI smoke', () => { const yaml = await readFile(join(projectDir, 'ktx.yaml'), 'utf-8'); expect(yaml).toContain('driver: notion'); - expect(yaml).toContain('auth_token_ref: env:NOTION_AUTH_TOKEN'); + expect(yaml).toContain('auth_token_ref: env:NOTION_TOKEN'); expect(yaml).toContain('crawl_mode: all_accessible'); expect(yaml).toContain('max_pages_per_run: 5'); expect(yaml).not.toContain('ntn_'); @@ -737,7 +737,7 @@ describe('standalone built ktx CLI smoke', () => { const parsed = parseKtxProjectConfig(yaml); expect(parsed.connections['notion-main']).toMatchObject({ driver: 'notion', - auth_token_ref: 'env:NOTION_AUTH_TOKEN', + auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'all_accessible', }); }); diff --git a/packages/context/src/connections/notion-config.test.ts b/packages/context/src/connections/notion-config.test.ts index 33d1e110..8ad88c86 100644 --- a/packages/context/src/connections/notion-config.test.ts +++ b/packages/context/src/connections/notion-config.test.ts @@ -23,14 +23,14 @@ describe('standalone Notion connection config', () => { it('parses selected-root Notion config with safe defaults', () => { const parsed = parseNotionConnectionConfig({ driver: 'notion', - auth_token_ref: 'env:NOTION_AUTH_TOKEN', + auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'selected_roots', root_page_ids: ['page-1'], }); expect(parsed).toEqual({ driver: 'notion', - auth_token_ref: 'env:NOTION_AUTH_TOKEN', + auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'selected_roots', root_page_ids: ['page-1'], root_database_ids: [], @@ -70,7 +70,7 @@ describe('standalone Notion connection config', () => { expect(() => parseNotionConnectionConfig({ driver: 'notion', - auth_token_ref: 'env:NOTION_AUTH_TOKEN', + auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'selected_roots', }), ).toThrow('selected_roots requires at least one root page, database, or data source id'); @@ -81,8 +81,8 @@ describe('standalone Notion connection config', () => { await writeFile(tokenPath, 'ntn_file_token\n', 'utf-8'); await expect( - resolveNotionAuthToken('env:NOTION_AUTH_TOKEN', { - env: { NOTION_AUTH_TOKEN: 'ntn_env_token' }, + resolveNotionAuthToken('env:NOTION_TOKEN', { + env: { NOTION_TOKEN: 'ntn_env_token' }, }), ).resolves.toBe('ntn_env_token'); await expect(resolveNotionAuthToken(`file:${tokenPath}`)).resolves.toBe('ntn_file_token'); @@ -95,14 +95,14 @@ describe('standalone Notion connection config', () => { const pullConfig = await notionConnectionToPullConfig( parseNotionConnectionConfig({ driver: 'notion', - auth_token_ref: 'env:NOTION_AUTH_TOKEN', + auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'all_accessible', max_pages_per_run: 12, max_knowledge_creates_per_run: 2, max_knowledge_updates_per_run: 7, last_successful_cursor: '{"phase":"all_accessible_pages","cursor":"cursor-1"}', }), - { env: { NOTION_AUTH_TOKEN: 'ntn_env_token' } }, + { env: { NOTION_TOKEN: 'ntn_env_token' } }, ); expect(pullConfig).toEqual({ diff --git a/packages/context/src/ingest/local-stage-ingest.test.ts b/packages/context/src/ingest/local-stage-ingest.test.ts index e24174fb..157bd96b 100644 --- a/packages/context/src/ingest/local-stage-ingest.test.ts +++ b/packages/context/src/ingest/local-stage-ingest.test.ts @@ -569,8 +569,8 @@ describe('local ingest', () => { }); it('passes resolved standalone Notion config into fetch adapters', async () => { - const priorToken = process.env.NOTION_AUTH_TOKEN; - process.env.NOTION_AUTH_TOKEN = 'ntn_local_test_token'; + const priorToken = process.env.NOTION_TOKEN; + process.env.NOTION_TOKEN = 'ntn_local_test_token'; try { await writeFile( join(project.projectDir, 'ktx.yaml'), @@ -579,7 +579,7 @@ describe('local ingest', () => { 'connections:', ' notion-main:', ' driver: notion', - ' auth_token_ref: env:NOTION_AUTH_TOKEN', + ' auth_token_ref: env:NOTION_TOKEN', ' crawl_mode: selected_roots', ' root_page_ids:', ' - page-1', @@ -666,9 +666,9 @@ describe('local ingest', () => { }); } finally { if (priorToken === undefined) { - delete process.env.NOTION_AUTH_TOKEN; + delete process.env.NOTION_TOKEN; } else { - process.env.NOTION_AUTH_TOKEN = priorToken; + process.env.NOTION_TOKEN = priorToken; } } }); From 085f68beec89f66e2a38665a66988daefd6b292b Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 12:26:19 +0200 Subject: [PATCH 25/33] docs: refresh KTX demo readiness guidance --- README.md | 89 ++++++++++++++++++++++- packages/cli/src/demo.test.ts | 25 +++++-- packages/cli/src/standalone-smoke.test.ts | 18 ++--- 3 files changed, 117 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 84592226..696558a5 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ SQLite. Install the CLI and run the setup wizard: ```bash +npm install @kaelio/ktx npm install -g @kaelio/ktx ktx setup ``` @@ -70,6 +71,40 @@ KTX context built: yes Agent integration ready: yes (claude-code:project) ``` +Run the packaged demo without installing globally: + +```bash +npx @kaelio/ktx setup demo --no-input +npx @kaelio/ktx setup demo inspect +``` + +The default demo uses packaged sample data and prebuilt context. It does not +require API keys, network access, or an LLM provider. + +Generate SQL from a semantic-layer source: + +```bash +npx @kaelio/ktx sl query --project-dir "$PROJECT_DIR" \ + --connection-id warehouse \ + --measure accounts.account_count \ + --dimension accounts.segment \ + --format sql +``` + +List and test a configured warehouse connection: + +```bash +ktx connection list --project-dir "$PROJECT_DIR" +ktx connection test warehouse --project-dir "$PROJECT_DIR" +``` + +The connection test prints the configured driver and discovered table count: + +```text +Driver: sqlite +Tables: 1 +``` + ## What's in a project ``` @@ -97,6 +132,47 @@ Semantic sources and knowledge pages are committed to git. The `.ktx/` directory holds ephemeral state and is git-ignored — delete it and KTX rebuilds on the next run. +### Scan the demo warehouse + +Scan artifacts are written under +`raw-sources/warehouse/live-database//` in the project directory. + +```bash +SCAN_OUTPUT="$(ktx scan warehouse --project-dir "$PROJECT_DIR")" +printf '%s\n' "$SCAN_OUTPUT" +SCAN_RUN_ID="$(printf '%s\n' "$SCAN_OUTPUT" | awk '/^Run: / { print $2 }')" +ktx scan status --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" +ktx scan report --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" +``` + +For non-SQLite drivers, prefer credential references such as `--url env:NAME` +or `--url file:PATH` over literal credential URLs. + +## Managed Python runtime + +KTX installs its Python runtime only when a Python-backed command needs it. +The runtime lives outside the npm cache, is versioned by the installed CLI +version, and is managed by `ktx runtime` commands. + +KTX requires `uv` on `PATH` to create the managed runtime. Install `uv` with +your system package manager or the official installer before running Python- +backed KTX commands. KTX doesn't download `uv` automatically; run +`ktx runtime doctor` if runtime installation fails: + +```bash +ktx runtime install --yes +ktx runtime status +ktx runtime doctor +ktx runtime start +ktx runtime stop +ktx runtime prune --dry-run +ktx runtime prune --yes +``` + +The release artifact manifest contains the public npm tarball and the bundled `kaelio-ktx` +runtime wheel. The `python/ktx-sl` and `python/ktx-daemon` directories remain +source packages for development, not public release artifacts. + ## Serve agents KTX integrates with coding agents through CLI skills, an MCP server, or both. @@ -126,6 +202,11 @@ This exposes tools for connections, knowledge search, semantic-layer sources, validation, queries, ingestion, and replay. The `--semantic-compute` flag starts the managed Python runtime for query planning automatically. +The standalone MCP server exposes `connection_list`, `knowledge_search`, +`knowledge_read`, `knowledge_write`, `sl_list_sources`, `sl_read_source`, +`sl_write_source`, `sl_validate`, `sl_query`, `ingest_trigger`, +`ingest_status`, `ingest_report`, and `ingest_replay`. + Supported agents: Claude Code, Codex, Cursor, OpenCode, and any agent that reads `.agents/` skills or MCP configuration. @@ -136,7 +217,13 @@ reads `.agents/` skills or MCP configuration. | `packages/cli` | CLI entry point | | `packages/context` | Core context engine | | `packages/llm` | LLM and embedding providers | -| `packages/connector-*` | Database connectors (Postgres, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, SQLite) | +| `packages/connector-bigquery` | BigQuery scan connector | +| `packages/connector-clickhouse` | ClickHouse scan connector | +| `packages/connector-mysql` | MySQL scan connector | +| `packages/connector-postgres` | Postgres scan connector | +| `packages/connector-snowflake` | Snowflake scan connector | +| `packages/connector-sqlite` | SQLite scan connector | +| `packages/connector-sqlserver` | SQL Server scan connector | | `python/ktx-sl` | Semantic-layer query planning | | `python/ktx-daemon` | Portable compute service | diff --git a/packages/cli/src/demo.test.ts b/packages/cli/src/demo.test.ts index 0cedba99..0b053ee6 100644 --- a/packages/cli/src/demo.test.ts +++ b/packages/cli/src/demo.test.ts @@ -11,6 +11,9 @@ import type { renderMemoryFlowTui } from './memory-flow-tui.js'; import { KTX_NEXT_STEP_COMMANDS } from './next-steps.js'; import { resetVizFallbackWarningsForTest } from './viz-fallback.js'; +const SEEDED_DEMO_SEMANTIC_SOURCE_COUNT = 46; +const SEEDED_DEMO_KNOWLEDGE_PAGE_COUNT = 28; + function makeIo(options: { isTTY?: boolean; columns?: number; rawMode?: boolean } = {}) { let stdout = ''; let stderr = ''; @@ -336,8 +339,14 @@ describe('runKtxDemo', () => { notion: { pageCount: 8 }, }, generatedOutputs: { - semanticLayer: { manifestSourceCount: 6, fileCount: 6 }, - knowledge: { manifestPageCount: 10, fileCount: 10 }, + semanticLayer: { + manifestSourceCount: SEEDED_DEMO_SEMANTIC_SOURCE_COUNT, + fileCount: SEEDED_DEMO_SEMANTIC_SOURCE_COUNT, + }, + knowledge: { + manifestPageCount: SEEDED_DEMO_KNOWLEDGE_PAGE_COUNT, + fileCount: SEEDED_DEMO_KNOWLEDGE_PAGE_COUNT, + }, links: { manifestLinkCount: 23, linkCount: 23 }, reports: { primaryPath: 'reports/seeded-demo-report.json', fileCount: 1 }, }, @@ -636,10 +645,16 @@ describe('runKtxDemo', () => { ).resolves.toBe(0); expect(seededIo.stdout()).toContain('Status: ready'); - expect(seededIo.stdout()).toContain('Semantic-layer sources: 6 manifest, 6 files'); - expect(seededIo.stdout()).toContain('Knowledge pages: 10 manifest, 10 files'); + expect(seededIo.stdout()).toContain( + `Semantic-layer sources: ${SEEDED_DEMO_SEMANTIC_SOURCE_COUNT} manifest, ${SEEDED_DEMO_SEMANTIC_SOURCE_COUNT} files`, + ); + expect(seededIo.stdout()).toContain( + `Knowledge pages: ${SEEDED_DEMO_KNOWLEDGE_PAGE_COUNT} manifest, ${SEEDED_DEMO_KNOWLEDGE_PAGE_COUNT} files`, + ); expect(seededIo.stdout()).not.toContain('Status: corrupt'); - expect(seededIo.stdout()).not.toContain('Semantic-layer sources: 6 manifest, 0 files'); + expect(seededIo.stdout()).not.toContain( + `Semantic-layer sources: ${SEEDED_DEMO_SEMANTIC_SOURCE_COUNT} manifest, 0 files`, + ); }); it('fails corrupted demo projects in no-input mode with reset guidance', async () => { diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts index 27f34b92..0b15410c 100644 --- a/packages/cli/src/standalone-smoke.test.ts +++ b/packages/cli/src/standalone-smoke.test.ts @@ -368,9 +368,9 @@ describe('standalone built ktx CLI smoke', () => { const knowledgeSearch = structuredContent<{ results: Array<{ key: string; summary: string; score: number }>; totalFound: number; - }>(await client.callTool({ name: 'knowledge_search', arguments: { query: 'ARR contract', limit: 5 } })); + }>(await client.callTool({ name: 'knowledge_search', arguments: { query: 'ARR contract-first definition', limit: 10 } })); expect(knowledgeSearch.totalFound).toBeGreaterThan(0); - expect(knowledgeSearch.results.map((result) => result.key)).toContain('arr-contract-first'); + expect(knowledgeSearch.results.map((result) => result.key)).toContain('orbit-arr-contract-first-definition'); const knowledgeRead = structuredContent<{ key: string; @@ -378,26 +378,26 @@ describe('standalone built ktx CLI smoke', () => { content: string; tags: string[]; slRefs: string[]; - }>(await client.callTool({ name: 'knowledge_read', arguments: { key: 'arr-contract-first' } })); - expect(knowledgeRead.key).toBe('arr-contract-first'); + }>(await client.callTool({ name: 'knowledge_read', arguments: { key: 'orbit-arr-contract-first-definition' } })); + expect(knowledgeRead.key).toBe('orbit-arr-contract-first-definition'); expect(knowledgeRead.summary).toContain('ARR'); expect(knowledgeRead.content).toContain('contract'); - expect(knowledgeRead.slRefs).toContain('orbit_demo.contracts'); + expect(knowledgeRead.slRefs).toContain('mart_arr_daily'); const slRead = structuredContent<{ sourceName: string; yaml: string }>( await client.callTool({ name: 'sl_read_source', - arguments: { connectionId: 'orbit_demo', sourceName: 'accounts' }, + arguments: { connectionId: 'dbt-main', sourceName: 'mart_arr_daily' }, }), ); - expect(slRead.sourceName).toBe('accounts'); - expect(slRead.yaml).toContain('name: accounts'); + expect(slRead.sourceName).toBe('mart_arr_daily'); + expect(slRead.yaml).toContain('name: mart_arr_daily'); expect(slRead.yaml).toContain('measures:'); const slValidate = structuredContent<{ success: boolean; errors: string[]; warnings: string[] }>( await client.callTool({ name: 'sl_validate', - arguments: { connectionId: 'orbit_demo', names: ['accounts', 'contracts'] }, + arguments: { connectionId: 'dbt-main', names: ['mart_arr_daily', 'stg_contracts'] }, }), ); expect(slValidate.success).toBe(true); From 69e546678facbb9ab1bc1742c450e8d7ca9f9790 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 12:56:46 +0200 Subject: [PATCH 26/33] docs: replace README logo with KTX lockup --- README.md | 2 +- assets/ktx-lockup.svg | 32 ++++++++++++++++++++++++++++++++ assets/ktx-readme-header.png | Bin 29179 -> 0 bytes 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 assets/ktx-lockup.svg delete mode 100644 assets/ktx-readme-header.png diff --git a/README.md b/README.md index 84592226..f8718542 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

- KTX + KTX

diff --git a/assets/ktx-lockup.svg b/assets/ktx-lockup.svg new file mode 100644 index 00000000..f1bcd2dd --- /dev/null +++ b/assets/ktx-lockup.svg @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + ktx + diff --git a/assets/ktx-readme-header.png b/assets/ktx-readme-header.png deleted file mode 100644 index 11cfb4e47be1693a4eaa3b08bb5221aa77bfe342..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 29179 zcmb@uWmuG57dEUSprlBLA|l-)T}nzz4xLI%h;)f`w{(Lv1JW=c-60|jjda%_UGE-z z?&rDR@7MR^{lhVi*R^Ad0kp)(I?X?S46V-MeF9 z&5u2*euwG%sa5z3UrO42q!izF82z0UJVh__?j4N@n`Zc~Rmz^Zv1|n%oJUOc^-;)L zC`}R1p7)a*%LH0heU;NoL_}!3yi!uOXJP)O^-O=*Ved5sp94|uyLawy{(E+ezj>as zj@MvuO{I<ost!I#y8S4NAcW7yrn4NsNJ+v30*nSws92f&`L}QL4=0KAUsPlojET9wH;hPYlgdUA4z>PQ54#eoZDHEgcf9tj zij+L|MST(RxGuhE{xfB?}Q=E=daI+g%}Cv33eE^`qsd`;USWU2{@?!;60cuxY?nG29*nzVc4r=M_O?Vv zhkMyScAh}wv_qFwXL_q+$hWo8;VzKgsqKGX#y)y1>~F*Th~E8cf+)o_5ueMcd-p%D zu8|KL^0G3$5YtBZ{_?>uR?lOO+#IhmSRMv&o@unIzkxQ{}I?P z@jrHr9wJd{exzz@fnt-HWq}?B|16Haa|RuExPK&lq4R`EaO$JDx&jV5`af2}RqMV; zC63%QOn|4SgL4ezM8wS zVMGYry8Dsa-@VgAa|SgV9O%fj%4u*GQCyqrU8xyp{Coocd3)x=*#eCW$zt}Rgasoy z{(m+RUH?io)By`C&z(bq*ce@K*wj zX;IR{M!%$;{heOg1)1lyHnFnY!T9D)?DR>f{H2a%8^-38ks+PI%mevo8x+l+m(o;p z&;RqXdCbvslrK%*1T2y_2jPcH^BYxF^`wldraPsS#SeRGZ4C--W@IKp^_VO0oI1+@6Tm%HFN0kNfk|D|-G+xPiNBguG>K$s@d}b*5V3e%A6R)}aq_Fa@ zC+%h8ARN?zefIydwc4A1e$vtMNV?~2=lZOkoxIlWRv@;riq710*bFC1x5;#PS$GqBKc>TWOQ*cBYDsV}cPtR;{64~QK zJM!TtS&7&t~vpO|8Of4 zEW5q|y0H83y$yHx&+nB@q)g`@j&IT<@FZn#Zyz4nOw64Qc30r0X+(19rB*Nv40Ftb zEFA=Eo24WFGbC>5Ouf9F5k1~_0XHgYChd~j(Mk9E@AJsp5tctAqi6Un{vRt*N`_tB zsob-W1`p$3b}6VD+XjA|?0+^w1Q%I!I5Sr8x5+Hazc-`@YiyO!5Y zinL}I8h?W>Z!rQ|{}201F46nwQl0iA@`P8WR-|1$dn^>Zju>LJ3KE;8jj_Q!_C&uo z2JXUKC2q1X7a9}M*2eRbLcd4vf--g7Ew_F9t*F1^<_X_5K}!s%RnNnT@hZ0+<>w8k z>;PUh7DFl9;CdL}A`O=g%uEk$|Dmd`Zp7|Z|C}J4OWTozj^+6#D!WLldwUgs8jY>d z1-kv0l22j=mdl-zMct+24r}x``Q36{0=kr9dW`-agEW>}8~2mD5*;*$0_@@fDX5PI z6m=wYriOx|Zt_fUAhcrR;@YTTD5yc1?tdP0eDy@{TH#n>D1ACuyt?%IW+(JeaTe=2 zua_J~YqS^`n46w&0{<-e8aJy__A>oVVu9u`!gM_7C9r0Eof=*fz#YzPH8~!TzinEO zaml^Oc)=rNMcg*o4>&#&rt?&F*7ZR)tDQ#%3szW4V3g$lgKO;)VxHY>5X`wqO~i-Nsep`e3}RR_9l(9qE&W-qC#BHtRMwGr ziv1Yo$ zJp!7P1lUbh*0QIDYr?*-2?&YFP2L3hUDvk|z)>IV*Yl?T zYiSjRU5|rt+ny~HtyJ?mEhiZ=;;x#4Md^(9N=E}HK(e0_8y}yYpRe(lN|~yCPFYJ! zy+&>6KJui9$jHdfj|fLEw6Q1dMXXsRpbjkz5+7SW8O>Drp`PbzO}hua!->44&i@&@&z1%bgYWfMK? z!@7D6{CiOywWBTrFP2L9GC8-`xMsn(hX&>l&JyP7+pl5G_kkqop~y-B=(uf}yoR0^ zD+Y&Y?#a?J@h?x?ShMT?Ec#q+xPfL(A-C}8e!16fitX=a!R<**K0cuLw0}OxegG7~Cx2EaHC!3x)fig3#awp*q!CMq4<8>&+Mw~`3Hk&0pUVmV*Hq9`t-St{nTuS^v(J0=oqwiv?8ivOo7|+PNRCvlzMkd zw@1`-wLe|jzsTH%#9=hLsE&FOHEP>XqCAh`rTW*Z9426HlpX$t+)A4=ahhzO%&$6$ z;SOSjj^5bKo56^}3Ro;DI5K~?zZO*LBc*IXVuFt+bPl)v^j46oskZd-Emd{fZ&|Sa zjhe)kHNoTOZ>TVwQBL!aQlwz)!3sSOV}_Q$dg+Kk$JEQT9QOwKI8f9n;`cDV`^5AY(Khymdz+J( zgoIqL+XxY^Z-eZXxk|a76kb34_w#ih)+e#u$J&FRgXd2MC-0;Bajh8^eufQNp}rMd z^p!vg75M`2zg&upOiXwaUiT#Y4Xeu!e$OZMSSo+ETXv`yi9~#gAwuf$KXx!G$iTqT zVWW^1OX5G_U*h$8farixmAuKR{-{y+M&U@O0E`z^K_4dPZJ6LO4YNPK^pW`ElKR-- zLY)uF$Y=L|i6G*(F`begi3#PR$)L-%L60&leD(kHXvjCjhhR*fOg~vO zZJ4V*zxAxFfn)x@NZMVa`T6Pkz&VqyZSs{a7nNPX8qmhvEb1n6L>3=^d+*4o< z^1{@gtp*pQh8#jlnx|>)7a5R8R>x-CPGALGxqdAtoWQSz zAP@*A+!Rc0Fg9M%8Bz#H+XjheZ;?4QD4kOqR5VCJz1j;0wC9AJKl~hxg5WtR*okKU zwJ&72`pN=TZ}N^ML|desC!F2CL@~40c35(FB?T*`U(J#Ht5akSCLhz=D-+yHKHNS- zu6u^tUg>gB_V$pmU-KIYN>`|#wk^5)Z2t@8qVLs}VnsMgBz!IyM6CMZ5=;;ScPr*e z$8GQQE!`7}i<@dz#`UZL{YS2@#dg!IK%aEjzl2Adoj1hoCT(uE{ONbT83mu`J-<40 z`axMQ%jxe<_}MS!@!9$m$+b5&$T|{DnC8?IzJFwRuAF|g%4L!x)KNb`ZaBwy7gX;Y z=cbFjCBDb*mj$=Jo)%F<``m6X=cbI`nB89)tLb2p_;?-K z7eABEw%5c=jt9(C^+N_ z_A-Gt&4Ai{)+!yZ>$fcdas0*clZS+!PD83p4$jvU3fBJ~b=>QZ?`hh5hl5TUc^yk8 zuv+izF8V2xHFA0W_+pK#VR)_Glu4bNW%y7JI8(3P=CbHH? zWEv3*Q87HbpfO>1md;h)N-N(jtiB;GSnGrtULqO?8`Nu)%y>d~m&VR z|9gBv0f#8`ijG#9>| zH2gx&z%haUj@#<9|7#GNc2V~~e>6?%LWqcF27W8kEo_Wp_UbIsO)e~SRJ8NgDdaV? zSGZf_r1M{1c^?)&92e4k%_E8A73n5|T2&H}Nzr=j@1rCbner3@W7F{hm<@mD& zyer!yKdJ?F{_U_hqp4R{RXnT9pKfmJK)mE5J@L%%KXJlmTul~LnT4%p$ zwR%YG2B9e}_Y2TZ?Xm3t^vdLX>z8v>dfc6UvjTq@>rVgui2ohXjd-=>>co;yAIV?T zD&*|6T;A2}(eSgp0ENi6pD`Ot{FhTs&!K?*i(5b z+pqqTtic5y^s8j*n{>gZpvTJFRGn2e9BVMS)tpmVr}379fS*~rB40Fu?$g2Y_fkwH>-^9 zYCeO?(%if5&FMy`nh9Z^r(@PLN(ZlK8F6h=GyGvYi9OOc+oSBrwdBDYw!9_$-l||6 z-CUnpqa2M?Q}ZI~=B(sol>I116|amU<$TYNs8GSCbub-DRi%Ab%TIwK7IpIbOG_z- zz#^)kb`9sIDP7_915gKyxtHnA6ag6q-vI^Xsgj-p6Lgu7&^an8xFWvvcXK)Zxcx~! zqWpM|gF?^|Bacl-Qd4uFE1f=HLs|KAfMVMG$;1Kj%-?~3AVBGo_m8uAAgr{-cQrPN zm#OP&Fc$5sD@c5ewJ(@1=!G|=_bj`zfA8RLl@fXFekm>(x*Cw0w#q!U4BgP zJ)H*1EMeI&*J7rZqqQs&3o&`T09!eFB=q&7!U+IGDRs`8S2G}`eF3+b;kLYuXShim zICUR_{meo7-sVsFI!l5A-Gg`*uI`qPiWId+=nSN9A1G!z-r2t=W*i@dy7kY7?a z-(BC#%v_)Tv|+KpKUxuEkILM)X+a$Vx5m3Tx$iHZ#u~C4p>jY(2jnmVWHPDUMX!gw5)*^`iqxq=P3La8Oz$-D z(N1s~7t^_F-M8R?XnycgoZ&&0=G{536B3_J#V?q0O;C9e&oYOWN>tg%=r4M7%5ji4 zpWeISK7nc5xKj(?TK}EKYhk|<%ZIrv0CDgp^hzIZq~J8>PO3d$Uo?jKknlk@VYVS=og7!J4gQFet>N_wixgh9*M z=?EdEDFIH&_k@{j+Cdg!A+Yx8KruQ6fJMAs=hM6J5IB0|tA9RtQdpPz^H|MFJn+x;r|@B9wGu2V z^D!n#-i1p=Mz-(wOewxT(Y0XTcs0tH_2j3e)u3IWIak$~X4Xse-XJwa_(^iMfWLgo zGr!6w0?Uo&t@iQE{<%)K(j&auLnUM_*q{MKWcqq=Y&<3sh0n;%Sz7C*5kS%%qfTZ! zr6Kmdvahw7bsKJ|bE&B0f%(g5pDl#(Qd*0=m=j`l*8Kc4Im@IbRus zy7F?ri%;!%>=$JRAdXcjBmg){K0?eKS9RAyKtxGTx{lgfdBz^Jc@D z(EaghO93M^tj^uRv|cqub((tIM`~^^9;bH`NX?dl+oIIA-R>=`8M+XL58WYb<~8{Jh%BjMsA3W`XhKhBizo*N!9#hY-7WZn22 zNR9113H$!;SZ`USoB_JT*53LOSKprmOKs7Od=>X(S{I9X$4JM8P~dwUVanZ`g%}!A z9hrG420Xsp5AkUzT|qUK*NHFIWINCIdR@p40;HBcf)OKA$RmomNHgS*lF+Xm;3Cif z*ifvSe#rk-^jsUHLG127Uuc|F{_Lw8FcbE$c!$T1QLVq}JzLUkmD@Qjx}Yo+x4>~nwO_`Kc5{g_(W57Zp+vuJqXqc)id z?(&0AnZ3j$(Od#4jSo1TpFI}zR4(eBagbKByMOeU&pRXE`gx<~;Ula4Q0#QUM{xX| z?uc7Z&E_aXAu(;|z$$Y3?%FrnE`%vF2JSBMye-m^h{634Dw8UL44)9BjYEPAIw-UJbMaXLz zg^scw&L7i7xroaMFGt(>Xpf667l0fp>o7pivbuf@yDPDiP?B0!Tjzmdw#eJLAjtB2 z)Z#lTJfQ)UtP?{uwRuq=w(y%xPY5x0F4dxzVct`^?hY^E(`CAqSb}Ix+hB z+4#EUCl@*BVSct@-g4kXzw59Q?!u(rH%9IMn(!;gCBvwo%gcnkUdv>zSmZ1XSo!W( zYQHSYN1wTESEqKLS767Luax*U9CH|KJ)8LOdJCiI!b6f8yL2uMa!T~R(Ky7o zyNEefUDdb5?I_bhPLaX5FkFl(C`-+-KVzQPWB!}cO38*Uyh_`tHQNC_H@yH&zW6QS zA8RqmUR(k>u!9XEbutrnVc{e(N*p2SGBvNFfV1yzkxGzKBsj{VQ-sr3t6 zxlHt|tOfA%*!c8_algK|mc>I!h3CWD_S-fUnD^qM**xJ9<}(0VmR#N_xeKPpLP>3O zNlXOzWGr0E^CcleFuCK7q=72Ly~O9hXtXgENZw#-7Nxs;u=GIU2{_djSRh%Cw7tHK z7Loo`ELG3c8D2UtnKb?Er-ySq2Rf6TI(hfUdM+!BJNguoTsr6 zWhk;s#C9Le2MM?wvO?8NH-pPp_mnEP2z=PI$JdRDdCaU(Ya$&rs_ZNq)i!dF#?=EH z)ja9!t94viee+#|@`_r}PZc4^wRHTo`YF-wY z(i@qKR-$`0C>{827@RvMpA8SN^^F~9% zM&K7HaEmxx5Ep#F1$43D>>zaWEcUxY`P@AoFS(Pu#^m9R+63y@a{utI z#Yo(HPwBUPj!VTUkf%rNeKRv&hezo&%n#tA!`*7zvwgzb)aB;z$BFzpkib(VcUZ7) zC6-sMGlLtO@W>d=K;s>Dv+1`winY`u%}BsyX2k78<3_U{he8fl%G+qach->uYxWdr z@CKelzYX^UXyCq;h>Pr7(^Qj=l=ppWYkfj($);*G#9bJJQzwY zoas9pkt8}99Y&egb?d9Bo2aXk)V_7I3Q0j9(P>tlM%tn?Ps?rkh`xO1-KGjVQCn4> zl!g!Tg982(HCgP7-yFz6BkGpE_IObu!_R)nuTE^*4Yv!{*%&}8Vx+b79m$Oy7)>?# zaOtAnlCBB+zFBV_$O~%=ibQDTA+$U?x%T%}{*Vcyedlvo%i=c`!$hyMr}~M~amsBR zf&$K3;hj@dzvq}ORP1v28g%c5T!Y8B7L8Ux>fj$)v(c0fDV>XLDBU26v-D1XH2tH= z)Jss_wI!YfyxMEPj`a*0B7g+!OsJruIySR)OinuQFMRd^#=aZ^4XQ8#&c1!eM=JJp z9kQ5Ld)1GfI5818S9bsLJf6(gf#3+mrxjvv2*;Hf+CW{Xf$RKzA0a?6HU=VeL=)MLHUU7n`e;Bl@JnVkE}RX9sk`~!QP z+YKa?0>lGCqw*DQ2cbn~QzNBPB3p2xM!q^1nVe~)=VzqqbO4pK39}ZX$ZLA?1 z0}|(Ymvh(O9+=9hJ%BR^Hy_+p@C61D1)+JLF?QUFBSW9 zG6CGj+^#n%Okp8Ro%%WBStvySRBN0`ySBUZ9lhft!OyL&SrKGj6MS_5K;vijxD%K9 zy~)fv#Cwt=%Jx`zs19jJSZHl85w44$$Pi@Yci+<5JI2W$u-8S_+~C z&M?`JEG?^VXPTKU=UZu~{9qNbUG8L2H9KHKy69*@*CUEKjmZO=7K2|guqM{tk>cDE zG5~3bYrEti7ec10*YR2UqT-lRSakX)p)F=uGcGr4Z^)+2pdOMDfb&=+Xd33eOys$+ zEA5UTXv^!5H1(Hwm4p@+My@9Ceq;A5`MI!%*~QlH@bQlQZ(7BLf#m>JkPp_rm6yRS zqyB){LqprSs{QtRtB_xFxTI4HV$=8DZ+X1ST){cunVegum_`F7cn^U&fai5xJP>li zd$BlZZS3>I^(ei)y>r69dxO14Q3#|-$BY2{%gVAO{@R5r0j8dAM;MZqh#B9)RoNJe zKDxXjB>({liN8=l1brOi#gJQa^p|kHK1gq62knlh+0uq(V7Cyv}nqf;o~?ox;0^SKR&V=@YL=Dq`|^n(Jvw7#?9!y?fa+vE4d`8_Ej$BK&xd`@H8mJr*}k{_6aPEE z3xkllgsU+%#OC9Mz=HsVl%LP2^oc`+_ZtWZ2{X&e-t0Ui=R{y&l$ozSzH2@dYJp&6 zl#EEl3$U!QCcgw$L%`&?bLMg9{3#9gfqJpG{7ZH+Ek1glm5#)tvV^pxrBn$eJL#~_ zq}LN^6>s@;#3mUul}^cJ(E;6}Is7tUDF7!JG`hNh~4J!Rx&B>@&YZ z8%Pg&bwhZMXQ!E7rtv6vB(?-Zg5v8P$l7V4>$tnO=pZNulaQ)xB_%ySRs7zf+e=m7 z5QNX4SJ=*PaQ%72rAL&gR7VrOw#caL*MLRg532WG#;oa*6Wc*c#3a9#yh|#FS2-&N@{6dt5!uv6m{GMOCj%oGdfu`JDAj zmX0{?^Y^i_MxRdkmTfoOS^lec5gWZS6hBu29fi&qA-c6^DMe->71mP35?M2$fkd^A*JdBWy+HKcAUEaoYiHveCG!FD-3m2RH z60-X~sYTJ-_7_k2`)1(1HGjkiC!Vn>kNj}@bJs;dz2o3)K=9%n$G4@*7ysCZoQ@uJ zyM#YRmL5t;4~?6=W)a{auGyK6Z^V_DB8YR+=@<;kCm<8DB>^X?P1O)Yvw#_h(q;8j zq!J>fz@j1kX$kB);Xcy$LT9(Kh_}w(H{|>de*B|w!1OzBz42Pky7S(NbjjouXbdjL zwHT}^>gA!|I@L_M08`RHK@jKlJQ73;PsSp@HpuNb>|5bBTi9Wmng?E7a;Nq~xfIwS z6V&L+IWfvl7d@gEChXvPR*&vT zDa;rl1o~?eRD~rTrrYA{AnX%R?QnOK6_vquk_Lk!5N9pmIIv#4(Vmu1;8Jw+;w@@i zEWCUVD2{IhQmE;M*pa#4MigXHhiZ~xK%?gwSritT%{Zw9-qdQ40r&YuG&-?WG8ahF;yG zxXa}|7sB;c#h7SzhuNpdY9&$m_-n1F<#OQKtF|T9^8C)8N5F3u%stXTBF3`bgxe;n z{MU?EU(&Z{$_7Gx4R_h z@rvtR@3QJ4gUCy~ZPSp2`+63^n{hKKe}EF0p+@C@1o=oS;@jIQ(L(P9?K2Y!EJ6L5*!| zHL6v|S#Olh@NM@=ilEc0(iy&VCA|T%hA)OU^K;O5>j4sxkZH&P-@KBZB@1-4fsRdC z88CIU#%nNu|3}Xa;k<7bwjpvC1e=%zPz|-$2HMryU%&Ddcj&755-uaeq%Q0oNXjdR zQEVo1@J^+QG0$tX6{j?FyKMCdGQ$XZ3lMVTbPpI_hiUz`W#i#mKD;ndaEyiHMnm>R zD!x^eJ0J3E{$3)Nbz63%nUy0Xa-c9m!|n>In9fAjn`)EZEKE#o3tXFq+myvRN;Q<* zI)GHDKS;@#x=1Y02B?D^mOUR_JQEH`m*dQqR+d!+NwCCVx5#UneKH@*^ln=2)GOhh0M{#(#6EjAp&YxF_1b0*#~@kG#|v z#9ga+W2Q`(oOS9wV^EXmp-b|`9zMJ`h{x^HYKxuvo#X-OaD>+4z0eK+N9VYiu&g_5 zoSfx`=d34H5$%dcr5mh>B#mC(7*-Vjgz&(py0Mn& z*~;!&e)MF^aAAyO$88B|b5fB`CB2^He%+?v7*wDAsPf9}3fBc!gU3u*DteZLnMr}g zGvCCjm8XhXl3KIe%tPc%mnU^#E}e&TZaj61t_#x=Uonu53U<)d5 z3ES13KwW|eLh|Yi`|He;nI72*&eG9>Ru8GLu7U&Njy3U_UciZ(hEW0*FXD>mSsCWVki>}%GvcH6#Dle8rjf;8i#E`| zxhfKmbq>Rcj?CtYC_!?_pdE|c7%t!z_6LE9KG`}|N+Mn>R<%2vKU1&(7HHk~#1$j> zKwb2K8lbv?z$@e*_Qh03*5xshaWI++GkLe#jfI<+QQd!a&MAIvHB3aO;3krj(|7^- z{vOZTr#PP?-Quch@hMkl#NFt0<;c1Z#q-dB5|!8ua&1)H)nD$q}ziOS;f-CCJtC8nIhI z@ZK=kr4ruR10d9>YDmJ1Dt|vGS#ewne?Vf*sLhE5%z03W8Ml{L0TTVN+Y;{!#{-Vd z(``DCc1k(KbUhKD=L5^#opLG3ZyryyLo&cao}p!<4UhkP*ic_rNL?C|^{JRRr6156 z>X?Sd0iNYP?6l;Z((uu*pcSdrAgiRnN1Yy0;pRh1xD{!OpZT5I22lbuI8qTZ7NVei zh8CLKj{49?sqLiB&yN5OiD`_*gTq~Pa+LJsdDLplliB>`5*Xh}6PPY6gah$%J@Ou4 zqi+V39)oRS=AoHnh-45pC{s9u9a$fXxOLit5)~ob{$%}^(*xqySd#MkFT%79+#rxD zWxu*b@cul=o5Q-BQ0(R>9lnkri!$0^`ZvEokbQBuCxs#r(m~2k_M$r4qtFeDGz>(M z?iUuG3E8k{C--$383k|CWE>p|4ic`=-6TzeG#S*SFXAYC<|#kup|p33HmJNI-x6}C0D<#^=OP7&j@K=j_<8*%A3%7+ z!%o&R(1t)%<&>YPIcqw>SbG=W0{?%yXHFahD5aT|#i6)3$TBD^P_7wv;a)smN$81} zy5B=dL@NXZ#vr_%U1m1Q|Mu-WOSqzWE-bo65eKT@=iQ2m=c9CBX7MiQQDF{-BJM3B zr`zK=0SF6P!}Sr2Hd=Wpfcg&k+(;vI+`;|mDJds0TN@xYd~2=kA|1?Nfv@O@IgS`n^JxQ5lF&wme#l!L50UW;k<{+C4Zg zMUKRYV&Mf(9}9W-uib9jtMEzg88pWeLjMhbM5KN9G?>k0ivYPV83nfF8_L~x5?vQ( zgQ@2h?rj?{Y!CK?A}sVSW4Smi%+>?L8_^foLz0jL3tB~MPhrp=z`n*w-ER!5Fi*9w zDhnvlzxSnhBJfYB+|@WR2X03F#%P+_x_d_Kvy>v-HR9+t+7#=#uaovtxSOLoUP?Zs z;{+(uvCI3Vg|PV;)8F8#LVj`al9)Jf7)kVmlP5$H5_Cg}xbczPv=#dZ;94ed%$nDN zYU5At5fTz!AR7zs<$|>hzOABxDB@q_!=>=YPVCBAk*nQd#sJ1TNF2w^8wKL1u()$ElXK(45zY`3;mN4;pWgU%|UU_x;@Zuv5?NW+;(zg4RmxV zUU!15-ip>s4JG{c?c+bZf;K60s8(|K_^CB?`NN~ zI+BE52c~OjWQ9OkxFs4g3}Es69@@}dbPRjmASFH0>vFgKm_`93;;u#WpO{1fNY;gX zN^uHBJ9OqAsLha9WNH@90*Y`=Sha$Q8450xfQZrs6%}5I@7_KMEojB+)TW*s!XZ9y zD4;H3I=>a~c!%&EbGNw5yI&b1Sx(8XK|zU%>gR{7JeNm(@koh4N3A`>P6ZtMoIbuy z9x27j@Et#3r3c*rP=WAY4uGK< z!@b+O`-WD#xOh-<*y(tdq-hwds}by zt6e;(;>yoBP{%~446hKQ-AweqKO+k$E<2D1+Jlw~=5BTBURbUVYd9~mWCa;X+1mlv zC1Q42u$-&TgQd3@X1l$p*lopaV}oSgJJq~%L)h!tLhmBRf~6%Du*0$MILf12r{C36 zv#^BZkBY2L{?hu2sTiCbK{u3k z;4Ta#$CuFoa2hB588&fn0q7+_GB0+va?hjJLhHlWlBaq*mlr}-x(mt!^=qU(<1X#r z+=_sz5(?Ls5%R+FH{{kohpTZgwU>S1AAoMmF&NnqKWRd@wJM-3>Y7Lf9LUXD(*>Ks zG!ky}dL0TRZE>^`Z@rDUR4+yfP0P7T^*p60hHlbflj*$fOGP>;GA?k3+b2rjJ9nX8&P1o@?0G^CXruaE;}tczu*gl28Wuybd(wWt|D$u z^+vBZC z>Yq$i7?gDsoE*DbAsL)!Pxm*8jsi(vh~gI%<-gshMat+c-mVu_nPS@a=A2qovz^CJ zL<|bz35`kyq}!I!@K>LK3wye0ToGUBm)+^A@yH3Bxf(wA}s09t=8?ka8K$Q?cMlby~J^(>q$VnP@n$ zb17g0ZUqaV`uCj+e$Xn0?Jn><`0huK&&IeDG$%=*Q6SyR2KaNGZXrCHd@a<>cQ&iH zqq`IVsTpD68pxHcA;zrNh_h2N#UwR(ia#Sq z%z73`~yfldm(GgPP%bByOXUrM6)Gj z76;sKO;-21vSCSdq9^LslZ9`C#pR3cQ;In01r=J-(_Gznj=bB{@pL&(B>MgnZkrup=RhSD&iQw8-X(T30JOry%=D3K?H7Bjodg?YSRh3YLf-wX~& z9VI%h~UD2?3rcUj-C&itf0Si58OY4zo(M5liTWE=e(!|6L^Rh`gX^|7Y&3w zS76Z$+F89NHQYfY=U3u?$PD%@@nf>8^vi~!C39={DPX<{wXopWT0qDEml37v4otC2 zDDY-Wa0m687*!4vgaIIPCK{?ad?ZsTP9b2Ao)2cGVu3qSx`iWU!@i?wt^ zswqYsYZ6clH>@MC09p+0)iSv0OH}a_Vvv_i)VKYH(<)m&J;&xgtI~9?TDnEs?z^lO zSj6i@L^Oh_bm^a!nH!^wDZ78{F)@t;SUoL(N;6*g86oqL9!i>+IJsL(A~IP|ub)vs zi<**DcWU@y`cB>OOJq*+EJPhTGN(z+LkT$AitDdjb`WYb%7rRFe{L?F`4SZEF$WTu zhz73rY_1MiEwKi6KgK&4SkXOy@Ng6b6by_t8h@;(?jI~dsJVn5#Sv)qG7Y0H7^{GL zecapQK6|B&`z|V$r>|sorO3q3Q$B_3mbovQXnG_7c5-lNKl98X)uAP^Uw~pdqnFKj zM55V6s&u+KxbF*%*_^80&3HJ}cIZ1r(e0a%0Kb+8`u z`1vG0z>kJd(Pxn;$KMvSJ3=gfe{R)&XKN+3A@DIT7B&mTuT74frJ$gS)gCM2uNz8w zL1&f*<=&i@!^$rX*jemKgs;yke9$H3(t^4a)9n?qU`c2p5GZ`~WeUGdROujof$b2j z#W#IO;9M&MyN>s4Q;Cv}PU~&?!28`rYauMIyiSz-7(CWJha47s0 zrxau=sm`?z=ZPF`Z60~ExE+-rgZM|!TKbgf{pKvnhkowLkL-LdCHxUKm>{VFXy4$r1i4AWc$IhEOC^FLxDywXx>kif1V_j=-`e38|X(p<(ct{k6 z)i$wjCjQrXfVR;yuy|~83AtYoQ0MixJB=yG2isdvZp z2UTr!DO@sE5q=MgJF7a^vIZR~1j9ulI21xu5ii~tFhJ|YDHN_dhIM&RY_5X2>b396 zB?u`7ojWZa&G0u4w&f`>m8;kb&EueRQ}KU+qlK01mBitLq{}A+Vgs}HrXfl@ukuSM z@hXcJp#36T!fxF1uuit$54f#-KazN-UAnC6{QKbI^E)D;0xzc{(T2yDcBh>zvDXtn zL+tUF)gYk}sYS`Rc}yfzcF;7G?8^XR|?^W|G&1P0d?kx>ZEN*^;KR>L9ZHI<;8$k9V$V$^hRPvvVLNJH(q7@yC=U9KI~%8I|-^ z+zVc|rsv>UVuVncpDxPJ?>ZvzU6a)-a;u%J!D#C?uc(w5MTz|~T3q6@rkEBR{TX1N zeQ9%r+fPFnuf*u&+fvbk5H`!A;1XJB2#L=E)}@~%9FJ@SQ)Ij@!LRnJs?^#r=#I&&VZa}QI`a9Snge>2sd%ebheA`lpNB^^gQev@&%|6+|E zd(wa&mR{WMQJkM&9B)a)K+mYNTYZVn_v8LBXcgkR18yIOe2l>hw_5lpC@ZtF1L!sH z0fdlq@#(Dd=WXqy9r$7V6AnDkH8c6+woX8xXA|?Yg1F~I-TRS8`8`_SH8H(pmy^E7 z1GwqJF_aLI*oA22lzQ9yAp4H%XMPB0V5H77|3FRj1LWWv>ELRl)p!#o;3>#|V^Ju1 zKFUn7WCvACt^uru09^ECHF?1QC=R(@rG^h_^g?|Yf=|fE7hu75dUkg2Ng;&i7=!(c z>)K$mwM8LNVX5j}7YVqOHvYuEiT5F=-I3rpsxM=_@P(Sa_9+L4=cQ!bo40X_05BFQ z?3xBzOm7OTH`s@u8cb>zL@pZ{9h)tW0Y zMhjIlHob1i?xOhPyY>>9n{@z`1nVg@t2<`gWBZ5OrwSYOCoY%DRToH9UJ_K?`w!CqUxpX|P1;(A*wqyw$QS<}d&d#vTZB9dR2qjJ=bFP(WZ zrJsM2X1TRNJ@re-9dUCx=kVnrtjzU59bleWl~;>$P2Cv#A%U$~qnnYb$z6^}Sx?X?eN! z$RHK43Cs6(U-f8|(jAq?YmIS>+?DmDA1SAzX=|wZ0!KhqYtIA9rPt<{5n7x(CDgXS zqY}i&?#*Yy$kJ!2I~fEF#NTzBAIKFo4_-=Hq_BjO3bxY2p`vb2n7%(*A}08-hi~MM zV9eP&YD6#lgB=_4e6;kGhfWcRuz`Lcyfz)&Mu?v5D|$Wj{lftfXtmRctnru`No!t4 z7`uJvt=_-m?d1bOzK*4x(*#mGnd!M? zr(M$JGr09sW_yJL-98oFEzy3^Ru5lE42j44nJXZyQpOu%HZuR1$Q#^IkRDzUEhj>d zWv9V{vqe=_S)xlK1IZQ`S7K4R6d5&(Rg0Cx%X1V7+`>EN@X1E-8*nLO?CDo&dMz7c z(xQUqQiS5b6;6l!{R&fBr=gAsjDTi#8Y0O~@cR&Vz>h(2oC&S*nB1Y$L#{zWGNQZb z0j2FDl(smzk@K-$Jb*2>fr5t@G8%~qfo99YX90M>#E1F9#iNH(`Zl;QZMV52^ye|3 zdJx&OYeClI#wvWP%g^r&nofTr z{PDiEPu)J#5*}rBfwKrPR96bHeMe&;riyY&LbwHhvSbYP=pb9#A2B5npvI}bqLQiV zz(ZYPhG|n}!Eg0zQADVx8ZDXlSiEoa-HS*i4FK@$#8?PI%d4F=7{dA}lV2gF!R#B# zJopt3nd!w*?P}8P$APFC$A;OlwYVv^_c;_@N@#i5C@pKYV=3j;y6?G6okEWH?rjOX z6S$zFda1ppv5|5=v7W2=K3%6?crTt7if^s~JNe9(s z*!MWrrR{&5^vDn}J$EeZIxT|7CWNlC=y%-re!%$!!ofyoDN41n0&X42q`?T?(Un3`XpK&)@HP@x1we9$wy@v+KIAIQc#2R_K-sN-*EXZT@{fGGo@MVfm#nWLH6W;Oynm-+AOz(5ERHL0p)?o z&Y@bR6}+p4icfZCigW8D@(+Ms7@V^9MX`cgV}g2_(&C)#kWH=tMyE zAulDA1j8iBXV8k#+>V`;~HHyz%EGxi>2YGJcuA$6X&fGXp8hSAa`UrI6-;2>Z^@|3u=) z?tILLb{zUET}Ynbe0|85_Ya51=dBtfH-J6`Zp6ghT^RwZ4S{mA0ETE8aD_Em6V9VD zSsA+1M1UJ@Wc}gp#C^lBHd*gAWCG~Q=a+sfaF^Thmrpp00-jUEmtPO}{b(3C9F+jQ zk$fW|E_28-8pjG44aba98j_M{0QeuMx~#gz$+l1V7dzi@GGc`Lc$t$I+{g1p; zGTlUrJC+ zWXdd^kdTym(E>kVHw{0(j1S^FTzsxB%&8j>;&#^snwyA4f@3NBw$1^dYRHK!mgk`I zU`KX%o(gafr70L;TS~||zYBgM02!AJG%^D8_sSle%h;c`whOvW0u$I$tJ$^4ukZ7@;-z;<)&Hmp16&wMZq5oO?UV${V~osd=9utsc> zraUG~T6pjAGJl2K={ah5s>+cC=+VSe;mS%oxRP@k^F;7eOVJNPOx|6>(H|S1run`! ztTOOuNVaBcngu{o@nHCY6gn+n4_3aJznwyRA@`fyVd*#&osX-UmO&Pxz{KUt1Fq+X(@J!Ui78e7xPWr+4}@6SD~j&ldME7pfu;ny@_- z6q^FO0>jVGXMq|T&ap7lj9S6T=5pIcH$3)^%!RKzB7)_snQGUo^VT4sq0RRLQPwy6 zsv5kvKc##f)MnP$Z6$t>cBTtCJ1G8~p5#JHV9HYtdH1v<$6!Y5MZFWTtKe^**av*s zsEQE{7>DavP7E~$)jm<2&Ox*~F|b-|_KvrP068d)j}3i{;|V)#PA?@f^Zt}10Mp=6 zgn#~=NXECQz=3!gK?zT`DW1G9AxfTHwh-$mcI6#vd9?e~?LJW7w*)kHT7|(YZLmS5 zqM^E((E;HUj_6rqy022Owe;=t3qMhDQRQHhNaDT#a+XW;MKdM8QemJ@&}j1;ZG42} zNPgPEs+K`P|Inb};nlW`uSz(|qq9o@Kg+~JES>d8t;@NN>1T`Hd1U_NY46ZI8@u23 zkxOcT6z9sWh;n>u4m3ddp4)5`rs>A;DN~9BC1|Azg8i~;V}W;2sBJfLaunbLmKt}1 z1I-D)oj8ms?lwl)*s>f3M3#uZ{HS4(W3pK2o3WC!_*i>5>%!C|8X>+~Yjm+(Gp*3pfd#=cq@+#^_ zo412IkADLos#Q~YPc;KZW?9>OEhZyVP_YjJ?tc6Z9wvxKwdW(O@D1;lm&XbYw|4!~1*ZhkJ3Pog82Cm_=DM_pOeO~h{5TbGSxrvL(e`wOx!)tb><)aw zV?@m{%%@A1c74V4B5vD{Y?cE=GnMh@bZLb@t7>@tI+t!`>AmAa7=9LAFF6bLDK3v9?Aeo@JrM|CN1-Iy*GH6 z9FPs}b2qh2sn4w$f6F#x5J@4)YZP-29&AV^ zu#-pMMA2gixlz&_ct%X-m zV{kL2vl@#9&{T_Jw=C?Z2jJLbVj$K&_Sx7ydUrX#fN*E-ru=kq!{e)&c4h#u1MImp z(4lO=3>5OlvJpnZX6e7K0qWG5Kxvn*y7I^+T)f$f=yGXwBX$VFN9(E2Ln{)K_^SPmoZeZWSbMkcgc*1#U?Xe?6_wW&rojc*T z>9Y(y$M_g!q2%92WD}ufox8=(3fw?#67N_TP|1?>vFSbb+JwX;U_kZWO)9h!SnC z-J(-tC^~1L>Dw}I>-p_El{?T8a53Ol9k4WH*-)fHvjNz^>npHuP%-Z|=q|U}R0NPk zIEcNxSF7rQ6Q)UU&Un~9B!8=a(phY4QZjpZGCudwH1n>Ss@zhV5>Ue%K@X^Z4q3|- z$S$7%0AL9QGUeRUBKcC5^(kTUU0P`zk^J5PD;adUco{q7iskJdv`gd~8kVnMdk_L$kumA!|C{ z@fs5`y_yvlh+-{(cCYt;_)TXhFrtB{?dvrL@9ht86*^mU)W^4L8!>>9xC~o)2y_HG z>eTeR#QeQXi!Y$2DE6IBV^|r8in@WagnOQh4u%CK94Bk!{%HuU`QxOHuUD)Sr0%2& zBLc#@^Tvke?jPrqfmBeR*&S=+t~oj8$gO*wPTovXcpH-^^ELoZi0CQ)LZQM|Tc5|b zM75weUCFI<0gXhKBtvty9QY&7KgXv<0#OL=4EjSu67%UGzurxB&rt7bWbz@_=L#Nx zxwJG)ymG%_sf^B!hUUh{2EWi_{YIli#QKaiI9h{?l~pyui|y^L4J8%-c+MN6x$19; zf)(BkD``G|mAMBUB3*QEcoZBLkQIP9ubnhK8sqO+K&3aygZD0-=6^abKP#OsNeovE ztJ{csO|pY~9iw|`x0HR(%1@e!=Cfy3B~b9?Xui_s9{s~S>9VjH%r5ZL8ghbeF@xA{ z9~<;a#qZ2)cHF`B#FpI*OO<^RvX6a(_zFUnp`^RZ-wGE?6d^^LtV|JyJHn@)D477) z=8j3%=`{nfAAv0PY*oR6va4=g!toQuL;x`R?XfS7gnjYf%resQEb>}+m65`+Y1o)u zqgwIn^JvZ)XBgIF;R|asH3;XulY-MY3Z^}-BXoDCk6hE|NUGi@64&tP>0OFfl;o{>+KaUqXf_a|>HC+=hG zyjUCCk9@?>xO@LL#8D&(Jo{ajC>c4H;kh{fwBuXb`_ef7FNN2Ye4&kCL0p%%>BX3a z{cu^(#`Lko2S;@a>nf+&v$X1opJ_F|V>Nqg8X0RuoI1_E>$fyt)QeqI)HV0HT5C+x!Z<&x zb#lzUgj=*kpJRB+OpoZQ=xzoZw{IzT6bow3H~6%Pxh(c>zcN#XG<Esh~pL*D2p3xwunOX>*c49?ds z-|ENm%%HheO%^lcw;U;cjW`;%>D{6-FKTZgT^;OgD2K6J7i-e<~Fi{xx>^XA&74~Q8^qYsii7_A1 zTi&D*&JNj3<&}H)fF*MzsyW=a3JS|JL^0@$AzTa?D_%Zro_hbr$2xoILK&B@uea?JLGF~QIpM@RFXc?pdU)3 z!OtjsuXwg@uV;>?SKv|WpFNd2@GTK{ zyH}QXLAqIk2bU#Q8TXB0k|Vzo6XSL|JFlSp?eFD92JYUyMpzzksH!2?`U1a~jFZZrjZD zCo1I{_YrdaYz#m7yjvjc_Lo9^eMa-@S3^0B3Iw07Rokp}ah0oYeZgiDcTScB4I>ZO z(L#I=E%h-wB^%1c_hE>=}sr_J>oFr7BBQM!&4zz4q1Y?{Tr- zrohg*Q%zjvL;CIvGchd4f9Moy#8dR1lv!3$2b=|&C#FJ%BE9FKO~x=(7`{} zn40>EqS7qtI@RFCwNHAsJ2f+)?_BT83ynuD8)l(N{QN}T%&8DL-gU*^3<}+>YiG~> zQ*de9U*42nVJqD1(7Cg^^+W@G+g)!+R7Xtq=9azw4o1DuJ}%%O4nbl?1!`ffdF`3y zY9bHg?T1Jp`^eG;7(YP^zOu5;98VQ$F<4#CVdzOLMn0Fo>p>RD=i|Q|XWBXRq=!x_ z`R}oBY2HD+JKwC`D!Z5pT%8f87q?G6bNk!1U({CTNh<__1z3q#yjfRd6C(@uggy7! z-Ym_5cIpFTlYEcrOoQj11dWNY=WcOtezV> zVP<@LDj@TG<8Y#QC@l2ZjtWt*AEmAWoz;xSqhSY{P1Fb~hNcZ5k|VvHNLkklgOCud6UiuiG72$5?!_zV~(H?u5Tf66X&aWd~X4o^%$>Vq-4AchDyC4!`Vp|V~)Nz-| znZTE98T8|x;BKvq(l(RQKHejUGjyp6h!6QNlgKeV$wVc#Zx zeRB~XZ4-v<-ayjkj|FJrb^^#56AJA;(_4NTZi#yo^-!C&(166hI9)*s#NHx*4ZKlM zHxFMZqvFXp&-2|)QO=L<;?aIL5ivXKy_2*wm9KN~WD7GNP`jz2K21$nBx=sjUpQkys;`L$uzvVy+s$F zuMx#5phZExSZ;|ll`B!Zobz1X-U?cLAe#lk2=30*S%pSrJ<^ubh>Hz%&-~?CQ;mz; z$8L8N>;E0_L~Miw0TK775dD(6AVIoGB`bT9c|A$^cJ6N8R%9t1xB0HO9rYq2N^wQ3u zkGys`fOYjA>H*$!cE1Sm+~~wqon96s_uUd9#4F!ML(3x|&2)^xf#GO>ls z;E zL4A=&=I|Z=)^ypdt(w6@2!m}`x?kKl`0MW)^@P~a!3^0dvy07)eQwmJK7~1<^OBCO zsI^O#t3_m;>*8lJ^E%CB7>`b(GyIe? zXZE;}>^?7-WliUM{AJD`H7>8NWcjM`9k4Fxhn-vO#%O7rOB0pRZy(`ryTA_X%SegR zlje?^$D@wF#I-5NA+&ZE$2k7M4jgvBJlQB&Hr`VQX4dqp@ae4UDfh;yy1aeT7k)_U z5ATz8Dv)NrDBNrVOZJB#mD!@vBik0m?6Dt1+Gdrqza%$#FfhZg?V#fO%Pl_EVrNT$aT&OYm68kZzSZK#BKA3Fv+eu0%kxFY1U)Y+cC^sy_dHo z`tFw8b24Ad}EDy(r4^I-*nV-->uN1 z?z1Spbt8)mB77l?1wM+H?ph5^bi+FbqB&Nh=<8LK`FdS}s}WChcO|B@CSJ1+GY&oF z)HEtfdKurA6rDwOd^RC!TMz#F)ZEBiNJ^zdqB-Bg4(QQRqI3bLI?T z%!LsD+fM`t+lKUm8p&mJ?~V7-6kb9Fx2Lz$k95p42c&k=%L`RKj9ll~<@v1yj*PXO#*DFzJmdNF5f%UWbe+&nLmffmnKlr6r}Ek4Q0B&5<<9k z_(Uf6>@HIDW!wN1I3VL4NZ@9#?dU_yZ{HsSBgCtnR!wM*9>T7=LP6v*k)os8rxT{NyFOH${`x7m^Onx6M*y41T z(gFR=k$K6yZttn8Fn>)z*uo{f^Yv@j@OPkAJGsI%1u}oJC5V*y>HjV;GS}rSUYy<%BittObU1pm?I+sfDD+wL%b0ksmt)V z$~%qq(J=`4U=`H(2tl8M*2$=Q{0j5;-*^uIF@11oczwRhUvS#-*D`1OSfjwK1%}d5 z_`R97`Sf?Gxf%}5Fo0IU7fLv#T*WiWw0;u3{OT?1b9xKM*g zXX|WvcJrFVl@;rS3V?;RSuT1wbPy^Pi&IN{A(;Y^TAG1OglgNhZ?!6iS+ao8i}I)a zM4Z?^wO`#N-FYJ=`2RD|Ly^`}-8+W}V?vLU(A#n6m_1FUx&_V;dY#{Idz@{GXWGR8 zc5dp~*I~yVx1qrQzRNL_@C@qa8F2b;jVpk_aaM&7?Cg3gG9kpDv$NSnmunT9VlrYF zfg!Ff*CssEO5HqqAE?!?rc*(9>p?qUAUJbADxGbP;yTl{rlB4@9_&LiY)}H!lpxKm z<7vh$ij_s;$k-1b+TRhC8x46+Y-tTiX8x7$1#YC)qCxrMCvspzf#0qfvaDY>xIm*7 zVExgS*GDpe{;?7?(K8TSLcyt!SK)4(15$KM&A8mEKp$XG@D(8AQ~NlO-UL|T)u62y ztzdT9F|P?knqLG?3EZXs$$)9nu`B)TuFH*ozb{tvMfV&84V3#B6XCjrarWCfaTfjd z-Kcu!7lv_>=V0gE0_P}%+tWCON&V@+;jzODKhZSbqQz<9JyJRFcow2s+dEM-7^BWVIeO!o85Eb=X z6fkQaGbe7*d-i3cI`}WhixA)d94YEnP#GiptoQCo6Gwvov(iM$V?`BGT^F9q!&&At z;LM3aQRT(t0I`BEy67Imlhpe^yML&A4Xqq=31_xVYg`+hR!N8s?c@wsx5K)OoiKUf zKP^RA9^-+axv$O0q*q>fRCgK)_+fM&9@DA9en- z>yM;nO{L3f!xb+8bp;!-YivI27T;QJIGoU`G4G&|?z3!1*hu=X096U%11h){W($20 z892qi$w5^px1`)Y0=x_pj%TWA_Nt3tf@1~-<_6Q7-aRzVT*`?uYWOi421F}Hc>oUJ zX7yclsQs0E7aZ+#(bGU>d~b!GdHOH=QK$y@7dIFD7z-XTeF zRg{MiC=T#z2ZE7gvpb1|Md z3}5N3QcBuXyc%1%S3Gbl?J_KSKOsrccjt-Q_Y^rf|CL)A|J?u&56?V{7xjqSJNI%n z`&WyRgyj?s)-#Vp_>7-LGfK0SpY1PjH3P)&_OV~EQTP8;D+V~ts>Dwi2?Qfiea7(% z%B?_9imGMr0X#v)_n(BEu-cY&8wLlR)P!hi{RjtQ={ml->HRG|G-jbsE~|p}@n^ri zxNSJ4CHA(v9DR-JLTmg!Hnvzl>-|8Z|MEh?tN*+a>`f;&>d>L~0f?yrF6o>D2yXA2TudR9{Iy4ax11&{+vQJ++53V7W}{;^;FK%n2_Xwvz}*`0l>ra+(>o}gip z;|rb^X+{CU{0#iodCz#(C7xULSR7ym_DL7~?c@UXe4&)yk~0XMq&}YqH1!plQA*Z> z&oz}BcYNBFUO3zmBtAX;THZv#xH-`ZYw-B!G=(LBk^#TazBM*g3C?P^;P{(DHf~jN zpMU-iTo5C}_A84~)?E)&#JEzcF!mkFU}69hL0)IH~fi<-}1F1EvOZ*;V!veOB!o#B%brQ29r~ z^Nx&Jx|rB+(`kX%~O2|HxtDP*@0f0=XBerrs57Owat^L)ow4r#9cy!Y(~sR#?JjNIL?9&1!uMOoitYIa>$IK{Kavc3 zr&Bd6U9HRAaBO2L3MXna6xiUg^MH&9;B4m$RK|SAhq7%y46X@^VSd1&a4+D-4o3cB{TN+G0{dDHESSQ-8w;!({z?K<$4iC(c}NMu>zn`Y zLqg!Ah``_fru;uV3^xA9_rEFs4-bL9YxuwaP5FO#xP0#)-~Xai_=%Ojjue8p40!G0 P|EsR5qf(~y>cjs7y4@6% From 36c3f93ad7fb1aeca641959835efc430f744ba2f Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 13:00:08 +0200 Subject: [PATCH 27/33] feat(cli): add reliable runtime stop --all (#30) * feat(cli): add runtime stop all * test(cli): avoid Metabase secret fixture path collision --------- Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> --- .../connection-metabase-setup.test.ts | 2 +- packages/cli/src/commands/runtime-commands.ts | 4 +- packages/cli/src/index.test.ts | 27 +- packages/cli/src/index.ts | 5 + .../cli/src/managed-python-daemon.test.ts | 154 +++++++ packages/cli/src/managed-python-daemon.ts | 435 +++++++++++++++++- packages/cli/src/runtime.test.ts | 53 ++- packages/cli/src/runtime.ts | 71 ++- 8 files changed, 734 insertions(+), 17 deletions(-) diff --git a/packages/cli/src/commands/connection-metabase-setup.test.ts b/packages/cli/src/commands/connection-metabase-setup.test.ts index cd94565a..cf7308d7 100644 --- a/packages/cli/src/commands/connection-metabase-setup.test.ts +++ b/packages/cli/src/commands/connection-metabase-setup.test.ts @@ -138,7 +138,7 @@ function makeIo(options: { isTTY?: boolean; stdinIsTTY?: boolean } = {}) { describe('runKtxConnectionMetabaseSetup', () => { const fakeMetabaseCredential = 'mb_example'; const existingMetabaseCredential = 'mb_existing'; - const fakeAdminCredential = 'pw'; + const fakeAdminCredential = 'admin-secret-value-123'; let tempDir: string; let projectDir: string; diff --git a/packages/cli/src/commands/runtime-commands.ts b/packages/cli/src/commands/runtime-commands.ts index 8f478658..3ce7d9ba 100644 --- a/packages/cli/src/commands/runtime-commands.ts +++ b/packages/cli/src/commands/runtime-commands.ts @@ -53,10 +53,12 @@ export function registerRuntimeCommands(program: Command, context: KtxCliCommand runtime .command('stop') .description('Stop the KTX-managed Python HTTP daemon') - .action(async () => { + .option('--all', 'Stop all KTX daemon processes recorded or discoverable on this machine', false) + .action(async (options: { all?: boolean }) => { await runRuntimeArgs(context, { command: 'stop', cliVersion: context.packageInfo.version, + all: options.all === true, }); }); diff --git a/packages/cli/src/index.test.ts b/packages/cli/src/index.test.ts index 8bc2a3a6..4a45274b 100644 --- a/packages/cli/src/index.test.ts +++ b/packages/cli/src/index.test.ts @@ -143,6 +143,7 @@ describe('runKtxCli', () => { const installIo = makeIo(); const startIo = makeIo(); const stopIo = makeIo(); + const stopAllIo = makeIo(); const statusIo = makeIo(); const doctorIo = makeIo(); const pruneIo = makeIo(); @@ -156,6 +157,7 @@ describe('runKtxCli', () => { runKtxCli(['runtime', 'start', '--feature', 'local-embeddings', '--force'], startIo.io, { runtime }), ).resolves.toBe(0); await expect(runKtxCli(['runtime', 'stop'], stopIo.io, { runtime })).resolves.toBe(0); + await expect(runKtxCli(['runtime', 'stop', '--all'], stopAllIo.io, { runtime })).resolves.toBe(0); await expect(runKtxCli(['runtime', 'status', '--json'], statusIo.io, { runtime })).resolves.toBe(0); await expect(runKtxCli(['runtime', 'doctor'], doctorIo.io, { runtime })).resolves.toBe(0); await expect(runKtxCli(['runtime', 'prune', '--dry-run'], pruneIo.io, { runtime })).resolves.toBe(0); @@ -185,11 +187,21 @@ describe('runKtxCli', () => { { command: 'stop', cliVersion: '0.0.0-private', + all: false, }, stopIo.io, ); expect(runtime).toHaveBeenNthCalledWith( 4, + { + command: 'stop', + cliVersion: '0.0.0-private', + all: true, + }, + stopAllIo.io, + ); + expect(runtime).toHaveBeenNthCalledWith( + 5, { command: 'status', cliVersion: '0.0.0-private', @@ -198,7 +210,7 @@ describe('runKtxCli', () => { statusIo.io, ); expect(runtime).toHaveBeenNthCalledWith( - 5, + 6, { command: 'doctor', cliVersion: '0.0.0-private', @@ -207,7 +219,7 @@ describe('runKtxCli', () => { doctorIo.io, ); expect(runtime).toHaveBeenNthCalledWith( - 6, + 7, { command: 'prune', cliVersion: '0.0.0-private', @@ -218,6 +230,17 @@ describe('runKtxCli', () => { ); }); + it('documents runtime stop all in command help', async () => { + const testIo = makeIo(); + + await expect(runKtxCli(['runtime', 'stop', '--help'], testIo.io)).resolves.toBe(0); + + expect(testIo.stdout()).toContain('--all'); + expect(testIo.stdout()).toContain('Stop all KTX daemon processes recorded or discoverable'); + expect(testIo.stdout()).toContain('on this machine'); + expect(testIo.stderr()).toBe(''); + }); + it('routes sl query managed runtime install policies', async () => { const sl = vi.fn(async () => 0); diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 96fbbeec..de906ece 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -47,13 +47,18 @@ export { runKtxRuntime, type KtxRuntimeArgs, type KtxRuntimeDeps } from './runti export { allocateDaemonPort, readManagedPythonDaemonStatus, + stopAllManagedPythonDaemons, startManagedPythonDaemon, stopManagedPythonDaemon, } from './managed-python-daemon.js'; export type { + ManagedPythonDaemonProcessInfo, ManagedPythonDaemonStartResult, ManagedPythonDaemonState, ManagedPythonDaemonStatus, + ManagedPythonDaemonStopAllEntry, + ManagedPythonDaemonStopAllFailure, + ManagedPythonDaemonStopAllResult, ManagedPythonDaemonStopResult, } from './managed-python-daemon.js'; export { diff --git a/packages/cli/src/managed-python-daemon.test.ts b/packages/cli/src/managed-python-daemon.test.ts index 4e7af22c..ffa69972 100644 --- a/packages/cli/src/managed-python-daemon.test.ts +++ b/packages/cli/src/managed-python-daemon.test.ts @@ -5,9 +5,11 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { readManagedPythonDaemonStatus, startManagedPythonDaemon, + stopAllManagedPythonDaemons, stopManagedPythonDaemon, type ManagedPythonDaemonChild, type ManagedPythonDaemonFetch, + type ManagedPythonDaemonProcessInfo, type ManagedPythonDaemonSpawn, type ManagedPythonDaemonState, } from './managed-python-daemon.js'; @@ -105,6 +107,24 @@ function runningState(root: string, overrides: Partial }; } +function daemonStatePath(root: string, version: string): string { + return join(root, 'runtime', version, 'daemon.json'); +} + +function runningStateForVersion( + root: string, + version: string, + overrides: Partial = {}, +): ManagedPythonDaemonState { + return { + ...runningState(root), + version, + stdoutLog: join(root, 'runtime', version, 'daemon.stdout.log'), + stderrLog: join(root, 'runtime', version, 'daemon.stderr.log'), + ...overrides, + }; +} + describe('managed Python daemon lifecycle', () => { let tempDir: string; @@ -271,4 +291,138 @@ describe('managed Python daemon lifecycle', () => { expect(killProcess).toHaveBeenCalledWith(4242); await expect(readFile(layout(tempDir).daemonStatePath, 'utf8')).rejects.toThrow(); }); + + it('stops all recorded daemon states across runtime versions and removes state files', async () => { + await mkdir(join(tempDir, 'runtime', '0.1.0'), { recursive: true }); + await mkdir(join(tempDir, 'runtime', '0.2.0'), { recursive: true }); + await writeFile( + daemonStatePath(tempDir, '0.1.0'), + `${JSON.stringify(runningStateForVersion(tempDir, '0.1.0', { pid: 1111, port: 61111 }), null, 2)}\n`, + ); + await writeFile( + daemonStatePath(tempDir, '0.2.0'), + `${JSON.stringify(runningStateForVersion(tempDir, '0.2.0', { pid: 2222, port: 62222 }), null, 2)}\n`, + ); + const alive = new Set([1111, 2222]); + const killProcess = vi.fn((pid: number) => { + alive.delete(pid); + }); + + const result = await stopAllManagedPythonDaemons({ + cliVersion: '0.2.0', + runtimeRoot: join(tempDir, 'runtime'), + listProcesses: vi.fn(async () => []), + processAlive: vi.fn((pid) => alive.has(pid)), + killProcess, + stopGraceMs: 0, + }); + + expect(result.failed).toHaveLength(0); + expect(result.stopped.map((entry) => entry.pid).sort()).toEqual([1111, 2222]); + expect(killProcess).toHaveBeenCalledWith(1111, 'SIGTERM'); + expect(killProcess).toHaveBeenCalledWith(2222, 'SIGTERM'); + await expect(readFile(daemonStatePath(tempDir, '0.1.0'), 'utf8')).rejects.toThrow(); + await expect(readFile(daemonStatePath(tempDir, '0.2.0'), 'utf8')).rejects.toThrow(); + }); + + it('removes stale state when the recorded daemon process is no longer alive', async () => { + await mkdir(layout(tempDir).versionDir, { recursive: true }); + await writeFile(layout(tempDir).daemonStatePath, `${JSON.stringify(runningState(tempDir), null, 2)}\n`); + + const result = await stopAllManagedPythonDaemons({ + cliVersion: '0.2.0', + runtimeRoot: join(tempDir, 'runtime'), + listProcesses: vi.fn(async () => []), + processAlive: vi.fn(() => false), + killProcess: vi.fn(), + stopGraceMs: 0, + }); + + expect(result.stopped).toHaveLength(0); + expect(result.stale.map((entry) => entry.pid)).toEqual([4242]); + await expect(readFile(layout(tempDir).daemonStatePath, 'utf8')).rejects.toThrow(); + }); + + it('deduplicates a daemon found by state and process scan, preferring state metadata', async () => { + await mkdir(layout(tempDir).versionDir, { recursive: true }); + await writeFile(layout(tempDir).daemonStatePath, `${JSON.stringify(runningState(tempDir), null, 2)}\n`); + const alive = new Set([4242]); + const killProcess = vi.fn((pid: number) => { + alive.delete(pid); + }); + + const result = await stopAllManagedPythonDaemons({ + cliVersion: '0.2.0', + runtimeRoot: join(tempDir, 'runtime'), + listProcesses: vi.fn(async (): Promise => [ + { pid: 4242, command: 'uv run ktx-daemon serve-http --host 127.0.0.1 --port 61234' }, + ]), + processAlive: vi.fn((pid) => alive.has(pid)), + killProcess, + stopGraceMs: 0, + }); + + expect(result.stopped).toHaveLength(1); + expect(result.stopped[0]).toMatchObject({ + pid: 4242, + source: 'state', + url: 'http://127.0.0.1:58731', + }); + expect(killProcess).toHaveBeenCalledTimes(1); + }); + + it('stops unrecorded ktx-daemon serve-http processes from process scan results', async () => { + const alive = new Set([3333, 5555]); + const killProcess = vi.fn((pid: number) => { + alive.delete(pid); + }); + + const result = await stopAllManagedPythonDaemons({ + cliVersion: '0.2.0', + runtimeRoot: join(tempDir, 'runtime'), + listProcesses: vi.fn(async (): Promise => [ + { pid: 3333, command: 'uv run ktx-daemon serve-http --host 127.0.0.1 --port 8765' }, + { pid: 4444, command: 'node server.js --port 8765' }, + { pid: 5555, command: 'grep ktx-daemon serve-http --port 8765' }, + ]), + processAlive: vi.fn((pid) => alive.has(pid)), + killProcess, + stopGraceMs: 0, + }); + + expect(result.failed).toHaveLength(0); + expect(result.stopped).toEqual([ + expect.objectContaining({ + pid: 3333, + source: 'process', + url: 'http://127.0.0.1:8765', + }), + ]); + expect(killProcess).toHaveBeenCalledWith(3333, 'SIGTERM'); + expect(killProcess).not.toHaveBeenCalledWith(4444, expect.anything()); + expect(killProcess).not.toHaveBeenCalledWith(5555, expect.anything()); + }); + + it('reports a failed stop when TERM and KILL leave a daemon running', async () => { + await mkdir(layout(tempDir).versionDir, { recursive: true }); + await writeFile(layout(tempDir).daemonStatePath, `${JSON.stringify(runningState(tempDir), null, 2)}\n`); + + const result = await stopAllManagedPythonDaemons({ + cliVersion: '0.2.0', + runtimeRoot: join(tempDir, 'runtime'), + listProcesses: vi.fn(async () => []), + processAlive: vi.fn(() => true), + killProcess: vi.fn(), + stopGraceMs: 0, + }); + + expect(result.stopped).toHaveLength(0); + expect(result.failed).toEqual([ + expect.objectContaining({ + pid: 4242, + detail: 'Process still running after SIGKILL', + }), + ]); + expect(await readFile(layout(tempDir).daemonStatePath, 'utf8')).toContain('"pid": 4242'); + }); }); diff --git a/packages/cli/src/managed-python-daemon.ts b/packages/cli/src/managed-python-daemon.ts index 2caf9182..b99de581 100644 --- a/packages/cli/src/managed-python-daemon.ts +++ b/packages/cli/src/managed-python-daemon.ts @@ -1,7 +1,9 @@ -import { spawn } from 'node:child_process'; -import { mkdir, open, readFile, rm, writeFile } from 'node:fs/promises'; +import { execFile, spawn } from 'node:child_process'; +import { mkdir, open, readdir, readFile, rm, writeFile } from 'node:fs/promises'; import { createServer } from 'node:net'; +import { join } from 'node:path'; import { setTimeout as delay } from 'node:timers/promises'; +import { promisify } from 'node:util'; import { z } from 'zod'; import { installManagedPythonRuntime, @@ -44,6 +46,35 @@ export interface ManagedPythonDaemonStopResult { state?: ManagedPythonDaemonState; } +export interface ManagedPythonDaemonProcessInfo { + pid: number; + command: string; +} + +export type ManagedPythonDaemonStopAllSource = 'state' | 'process'; + +export interface ManagedPythonDaemonStopAllEntry { + pid: number; + source: ManagedPythonDaemonStopAllSource; + url?: string; + health?: 'healthy' | 'unreachable'; + version?: string; + command?: string; + statePaths: string[]; +} + +export interface ManagedPythonDaemonStopAllFailure extends ManagedPythonDaemonStopAllEntry { + detail: string; +} + +export interface ManagedPythonDaemonStopAllResult { + runtimeRoot: string; + stopped: ManagedPythonDaemonStopAllEntry[]; + stale: ManagedPythonDaemonStopAllEntry[]; + failed: ManagedPythonDaemonStopAllFailure[]; + scanErrors: string[]; +} + export interface ManagedPythonDaemonChild { pid?: number; unref(): void; @@ -68,6 +99,8 @@ export type ManagedPythonDaemonFetch = ( text(): Promise; }>; +export type ManagedPythonDaemonKillProcess = (pid: number, signal?: NodeJS.Signals) => void; + export interface ManagedPythonDaemonStartOptions extends ManagedPythonRuntimeLayoutOptions { features: KtxRuntimeFeature[]; force?: boolean; @@ -76,7 +109,7 @@ export interface ManagedPythonDaemonStartOptions extends ManagedPythonRuntimeLay fetch?: ManagedPythonDaemonFetch; allocatePort?: () => Promise; processAlive?: (pid: number) => boolean; - killProcess?: (pid: number) => void; + killProcess?: ManagedPythonDaemonKillProcess; now?: () => Date; startupTimeoutMs?: number; pollIntervalMs?: number; @@ -89,9 +122,20 @@ export interface ManagedPythonDaemonStatusOptions extends ManagedPythonRuntimeLa export interface ManagedPythonDaemonStopOptions extends ManagedPythonRuntimeLayoutOptions { processAlive?: (pid: number) => boolean; - killProcess?: (pid: number) => void; + killProcess?: ManagedPythonDaemonKillProcess; } +export interface ManagedPythonDaemonStopAllOptions extends ManagedPythonRuntimeLayoutOptions { + listProcesses?: () => Promise; + processAlive?: (pid: number) => boolean; + killProcess?: ManagedPythonDaemonKillProcess; + stopGraceMs?: number; + pollIntervalMs?: number; + healthProbeMs?: number; +} + +const execFileAsync = promisify(execFile); + const daemonStateSchema = z.object({ schemaVersion: z.literal(1), pid: z.number().int().positive(), @@ -126,9 +170,9 @@ function defaultProcessAlive(pid: number): boolean { } } -function defaultKillProcess(pid: number): void { +function defaultKillProcess(pid: number, signal: NodeJS.Signals = 'SIGTERM'): void { try { - process.kill(pid, 'SIGTERM'); + process.kill(pid, signal); } catch (error) { const code = (error as { code?: unknown }).code; if (code !== 'ESRCH') { @@ -293,7 +337,7 @@ async function stopRecordedDaemon(input: { layout: ManagedPythonRuntimeLayout; state: ManagedPythonDaemonState; processAlive: (pid: number) => boolean; - killProcess: (pid: number) => void; + killProcess: ManagedPythonDaemonKillProcess; }): Promise { if (input.processAlive(input.state.pid)) { input.killProcess(input.state.pid); @@ -301,6 +345,323 @@ async function stopRecordedDaemon(input: { await removeState(input.layout); } +function runtimeRootForStopAll(options: ManagedPythonRuntimeLayoutOptions): string { + return managedPythonRuntimeLayout(options).runtimeRoot; +} + +async function removeStatePaths(paths: string[]): Promise { + await Promise.all([...new Set(paths)].map((path) => rm(path, { force: true }))); +} + +interface ManagedPythonDaemonStopCandidate { + pid: number; + source: ManagedPythonDaemonStopAllSource; + host?: string; + port?: number; + version?: string; + command?: string; + statePaths: string[]; +} + +function candidateUrl(candidate: ManagedPythonDaemonStopCandidate): string | undefined { + if (!candidate.host || !candidate.port) { + return undefined; + } + return `http://${candidate.host}:${candidate.port}`; +} + +function candidateEntry(candidate: ManagedPythonDaemonStopCandidate): ManagedPythonDaemonStopAllEntry { + return { + pid: candidate.pid, + source: candidate.source, + ...(candidateUrl(candidate) ? { url: candidateUrl(candidate) } : {}), + ...(candidate.version ? { version: candidate.version } : {}), + ...(candidate.command ? { command: candidate.command } : {}), + statePaths: [...candidate.statePaths], + }; +} + +async function probeCandidateHealth( + candidate: ManagedPythonDaemonStopCandidate, + timeoutMs: number, +): Promise<'healthy' | 'unreachable' | undefined> { + const url = candidateUrl(candidate); + if (!url) { + return undefined; + } + const controller = new AbortController(); + const timeout = setTimeout(() => { + controller.abort(); + }, timeoutMs); + try { + const response = await fetch(`${url}/health`, { signal: controller.signal }); + if (!response.ok) { + return 'unreachable'; + } + const body = (await response.json()) as unknown; + if (!body || typeof body !== 'object' || Array.isArray(body)) { + return 'unreachable'; + } + return (body as Record).status === 'healthy' ? 'healthy' : 'unreachable'; + } catch { + return 'unreachable'; + } finally { + clearTimeout(timeout); + } +} + +async function readStateCandidates(runtimeRoot: string): Promise { + let entries; + try { + entries = await readdir(runtimeRoot, { withFileTypes: true }); + } catch (error) { + const code = (error as { code?: unknown }).code; + if (code === 'ENOENT') { + return []; + } + throw error; + } + const candidates: ManagedPythonDaemonStopCandidate[] = []; + for (const entry of entries) { + if (!entry.isDirectory()) { + continue; + } + const statePath = join(runtimeRoot, entry.name, 'daemon.json'); + let state: ManagedPythonDaemonState | undefined; + try { + state = await readState(statePath); + } catch { + continue; + } + if (!state) { + continue; + } + candidates.push({ + pid: state.pid, + source: 'state', + host: state.host, + port: state.port, + version: state.version, + statePaths: [statePath], + }); + } + return candidates; +} + +function tokenizeCommand(command: string): string[] { + const tokens: string[] = []; + for (const match of command.matchAll(/"([^"]*)"|'([^']*)'|(\S+)/g)) { + tokens.push(match[1] ?? match[2] ?? match[3] ?? ''); + } + return tokens; +} + +function executableName(token: string): string { + return token.split(/[\\/]/).at(-1) ?? token; +} + +function isKtxDaemonExecutable(token: string): boolean { + return executableName(token) === 'ktx-daemon' || executableName(token) === 'ktx-daemon.exe'; +} + +function normalizedExecutableName(token: string): string { + return executableName(token).replace(/\.exe$/i, '').toLowerCase(); +} + +function hasUvRunPrefix(tokens: string[], daemonIndex: number): boolean { + return normalizedExecutableName(tokens[0] ?? '') === 'uv' && tokens.slice(1, daemonIndex).includes('run'); +} + +function isPythonExecutable(token: string): boolean { + const name = normalizedExecutableName(token); + return name === 'python' || name === 'python3'; +} + +function hasPythonModulePrefix(tokens: string[], moduleFlagIndex: number): boolean { + if (moduleFlagIndex === 1 && isPythonExecutable(tokens[0] ?? '')) { + return true; + } + return ( + normalizedExecutableName(tokens[0] ?? '') === 'uv' && + tokens.slice(1, moduleFlagIndex).includes('run') && + tokens.some((token, index) => index < moduleFlagIndex && isPythonExecutable(token)) + ); +} + +function isKtxDaemonServeHttp(tokens: string[]): boolean { + for (let index = 0; index < tokens.length; index += 1) { + if ( + isKtxDaemonExecutable(tokens[index] ?? '') && + tokens[index + 1] === 'serve-http' && + (index === 0 || hasUvRunPrefix(tokens, index)) + ) { + return true; + } + if ( + tokens[index] === '-m' && + tokens[index + 1] === 'ktx_daemon' && + tokens[index + 2] === 'serve-http' && + hasPythonModulePrefix(tokens, index) + ) { + return true; + } + } + return false; +} + +function parseCommandOption(tokens: string[], option: string): string | undefined { + for (let index = 0; index < tokens.length; index += 1) { + const token = tokens[index]; + if (token === option) { + return tokens[index + 1]; + } + if (token?.startsWith(`${option}=`)) { + return token.slice(option.length + 1); + } + } + return undefined; +} + +function processCandidate(processInfo: ManagedPythonDaemonProcessInfo): ManagedPythonDaemonStopCandidate | undefined { + const tokens = tokenizeCommand(processInfo.command); + if (!isKtxDaemonServeHttp(tokens)) { + return undefined; + } + const host = parseCommandOption(tokens, '--host') ?? '127.0.0.1'; + const rawPort = parseCommandOption(tokens, '--port'); + const parsedPort = rawPort ? Number.parseInt(rawPort, 10) : 8765; + const port = Number.isInteger(parsedPort) && parsedPort >= 1 && parsedPort <= 65535 ? parsedPort : 8765; + return { + pid: processInfo.pid, + source: 'process', + host, + port, + command: processInfo.command, + statePaths: [], + }; +} + +function mergeCandidates(candidates: ManagedPythonDaemonStopCandidate[]): ManagedPythonDaemonStopCandidate[] { + const byPid = new Map(); + for (const candidate of candidates) { + const existing = byPid.get(candidate.pid); + if (!existing) { + byPid.set(candidate.pid, { ...candidate, statePaths: [...candidate.statePaths] }); + continue; + } + existing.statePaths.push(...candidate.statePaths); + if (existing.source === 'process' && candidate.source === 'state') { + byPid.set(candidate.pid, { + ...candidate, + statePaths: [...new Set([...existing.statePaths, ...candidate.statePaths])], + }); + } else { + existing.statePaths = [...new Set(existing.statePaths)]; + } + } + return [...byPid.values()].sort((left, right) => left.pid - right.pid); +} + +function parsePosixProcessList(output: string): ManagedPythonDaemonProcessInfo[] { + const processes: ManagedPythonDaemonProcessInfo[] = []; + for (const line of output.split(/\r?\n/)) { + const match = line.match(/^\s*(\d+)\s+(.+)$/); + if (!match) { + continue; + } + processes.push({ pid: Number.parseInt(match[1], 10), command: match[2] }); + } + return processes; +} + +function parseWindowsProcessList(output: string): ManagedPythonDaemonProcessInfo[] { + if (!output.trim()) { + return []; + } + const parsed = JSON.parse(output) as unknown; + const records = Array.isArray(parsed) ? parsed : [parsed]; + const processes: ManagedPythonDaemonProcessInfo[] = []; + for (const record of records) { + if (!record || typeof record !== 'object') { + continue; + } + const value = record as Record; + const pid = value.ProcessId; + const command = value.CommandLine; + if (typeof pid === 'number' && typeof command === 'string' && command.length > 0) { + processes.push({ pid, command }); + } + } + return processes; +} + +async function defaultListProcesses(platform: NodeJS.Platform = process.platform): Promise { + if (platform === 'win32') { + const command = [ + 'Get-CimInstance Win32_Process', + '| Where-Object { $_.CommandLine -ne $null }', + '| Select-Object ProcessId,CommandLine', + '| ConvertTo-Json -Compress', + ].join(' '); + const { stdout } = await execFileAsync('powershell.exe', ['-NoProfile', '-Command', command], { + encoding: 'utf8', + maxBuffer: 10 * 1024 * 1024, + }); + return parseWindowsProcessList(stdout); + } + const { stdout } = await execFileAsync('ps', ['-axo', 'pid=,command='], { + encoding: 'utf8', + maxBuffer: 10 * 1024 * 1024, + }); + return parsePosixProcessList(stdout); +} + +async function waitUntilStopped(input: { + pid: number; + processAlive: (pid: number) => boolean; + timeoutMs: number; + pollIntervalMs: number; +}): Promise { + const deadline = Date.now() + input.timeoutMs; + do { + if (!input.processAlive(input.pid)) { + return true; + } + if (Date.now() >= deadline) { + break; + } + await delay(input.pollIntervalMs); + } while (Date.now() <= deadline); + return !input.processAlive(input.pid); +} + +async function discoverStopAllCandidates( + options: ManagedPythonDaemonStopAllOptions, +): Promise<{ + runtimeRoot: string; + candidates: ManagedPythonDaemonStopCandidate[]; + scanErrors: string[]; +}> { + const runtimeRoot = runtimeRootForStopAll(options); + const stateCandidates = await readStateCandidates(runtimeRoot); + const scanErrors: string[] = []; + let processCandidates: ManagedPythonDaemonStopCandidate[] = []; + try { + const processes = await (options.listProcesses ?? defaultListProcesses)(); + processCandidates = processes.flatMap((processInfo) => { + const candidate = processCandidate(processInfo); + return candidate ? [candidate] : []; + }); + } catch (error) { + scanErrors.push(error instanceof Error ? error.message : String(error)); + } + return { + runtimeRoot, + candidates: mergeCandidates([...stateCandidates, ...processCandidates]), + scanErrors, + }; +} + export async function startManagedPythonDaemon( options: ManagedPythonDaemonStartOptions, ): Promise { @@ -404,3 +765,63 @@ export async function stopManagedPythonDaemon( }); return { status: 'stopped', layout, state }; } + +export async function stopAllManagedPythonDaemons( + options: ManagedPythonDaemonStopAllOptions, +): Promise { + const processAlive = options.processAlive ?? defaultProcessAlive; + const killProcess = options.killProcess ?? defaultKillProcess; + const stopGraceMs = options.stopGraceMs ?? 500; + const pollIntervalMs = options.pollIntervalMs ?? 50; + const healthProbeMs = options.healthProbeMs ?? 100; + const discovery = await discoverStopAllCandidates(options); + const stopped: ManagedPythonDaemonStopAllEntry[] = []; + const stale: ManagedPythonDaemonStopAllEntry[] = []; + const failed: ManagedPythonDaemonStopAllFailure[] = []; + + for (const candidate of discovery.candidates) { + const health = await probeCandidateHealth(candidate, healthProbeMs); + const entry = { ...candidateEntry(candidate), ...(health ? { health } : {}) }; + if (!processAlive(candidate.pid)) { + await removeStatePaths(candidate.statePaths); + stale.push(entry); + continue; + } + try { + killProcess(candidate.pid, 'SIGTERM'); + if ( + !(await waitUntilStopped({ + pid: candidate.pid, + processAlive, + timeoutMs: stopGraceMs, + pollIntervalMs, + })) + ) { + killProcess(candidate.pid, 'SIGKILL'); + if ( + !(await waitUntilStopped({ + pid: candidate.pid, + processAlive, + timeoutMs: stopGraceMs, + pollIntervalMs, + })) + ) { + failed.push({ ...entry, detail: 'Process still running after SIGKILL' }); + continue; + } + } + await removeStatePaths(candidate.statePaths); + stopped.push(entry); + } catch (error) { + failed.push({ ...entry, detail: error instanceof Error ? error.message : String(error) }); + } + } + + return { + runtimeRoot: discovery.runtimeRoot, + stopped, + stale, + failed, + scanErrors: discovery.scanErrors, + }; +} diff --git a/packages/cli/src/runtime.test.ts b/packages/cli/src/runtime.test.ts index e367d339..46f708b2 100644 --- a/packages/cli/src/runtime.test.ts +++ b/packages/cli/src/runtime.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it, vi } from 'vitest'; import type { + ManagedPythonDaemonStopAllResult, ManagedPythonDaemonStartResult, ManagedPythonDaemonStopResult, } from './managed-python-daemon.js'; @@ -199,13 +200,63 @@ describe('runKtxRuntime', () => { })), }; - await expect(runKtxRuntime({ command: 'stop', cliVersion: '0.2.0' }, io.io, deps)).resolves.toBe(0); + await expect(runKtxRuntime({ command: 'stop', cliVersion: '0.2.0', all: false }, io.io, deps)).resolves.toBe(0); expect(deps.stopDaemon).toHaveBeenCalledWith({ cliVersion: '0.2.0' }); expect(io.stdout()).toContain('Stopped KTX Python daemon'); expect(io.stdout()).toContain('pid: 4242'); }); + it('stops all discovered Python daemons and reports the summary', async () => { + const io = makeIo(); + const deps: KtxRuntimeDeps = { + stopAllDaemons: vi.fn(async (): Promise => ({ + runtimeRoot: '/runtime', + stopped: [ + { pid: 4242, source: 'state', url: 'http://127.0.0.1:61234', statePaths: ['/runtime/0.2.0/daemon.json'] }, + { pid: 5252, source: 'process', url: 'http://127.0.0.1:8765', statePaths: [] }, + ], + stale: [], + failed: [], + scanErrors: [], + })), + }; + + await expect(runKtxRuntime({ command: 'stop', cliVersion: '0.2.0', all: true }, io.io, deps)).resolves.toBe(0); + + expect(deps.stopAllDaemons).toHaveBeenCalledWith({ cliVersion: '0.2.0' }); + expect(io.stdout()).toContain('Stopped 2 KTX Python daemons'); + expect(io.stdout()).toContain('pid: 4242 source: state url: http://127.0.0.1:61234'); + expect(io.stdout()).toContain('pid: 5252 source: process url: http://127.0.0.1:8765'); + }); + + it('returns failure when stop all cannot stop every daemon', async () => { + const io = makeIo(); + const deps: KtxRuntimeDeps = { + stopAllDaemons: vi.fn(async (): Promise => ({ + runtimeRoot: '/runtime', + stopped: [], + stale: [], + failed: [ + { + pid: 4242, + source: 'state', + url: 'http://127.0.0.1:61234', + statePaths: ['/runtime/0.2.0/daemon.json'], + detail: 'Process still running after SIGKILL', + }, + ], + scanErrors: ['ps failed'], + })), + }; + + await expect(runKtxRuntime({ command: 'stop', cliVersion: '0.2.0', all: true }, io.io, deps)).resolves.toBe(1); + + expect(io.stderr()).toContain('Stopped 0 KTX Python daemons; failed 1'); + expect(io.stderr()).toContain('pid: 4242 source: state url: http://127.0.0.1:61234'); + expect(io.stderr()).toContain('process scan: ps failed'); + }); + it('prints runtime status as JSON', async () => { const io = makeIo(); const deps: KtxRuntimeDeps = { diff --git a/packages/cli/src/runtime.ts b/packages/cli/src/runtime.ts index fe2b5f74..e88f2b31 100644 --- a/packages/cli/src/runtime.ts +++ b/packages/cli/src/runtime.ts @@ -1,7 +1,9 @@ import type { KtxCliIo } from './cli-runtime.js'; import { + stopAllManagedPythonDaemons, startManagedPythonDaemon, stopManagedPythonDaemon, + type ManagedPythonDaemonStopAllResult, type ManagedPythonDaemonStartResult, type ManagedPythonDaemonStopResult, } from './managed-python-daemon.js'; @@ -22,7 +24,7 @@ import { export type KtxRuntimeArgs = | { command: 'install'; cliVersion: string; feature: KtxRuntimeFeature; force: boolean } | { command: 'start'; cliVersion: string; feature: KtxRuntimeFeature; force: boolean } - | { command: 'stop'; cliVersion: string } + | { command: 'stop'; cliVersion: string; all: boolean } | { command: 'status'; cliVersion: string; json: boolean } | { command: 'doctor'; cliVersion: string; json: boolean } | { command: 'prune'; cliVersion: string; dryRun: boolean; yes: boolean }; @@ -35,6 +37,7 @@ export interface KtxRuntimeDeps { force?: boolean; }) => Promise; stopDaemon?: (options: { cliVersion: string }) => Promise; + stopAllDaemons?: (options: { cliVersion: string }) => Promise; readStatus?: (options: ManagedPythonRuntimeLayoutOptions) => Promise; doctorRuntime?: (options: ManagedPythonRuntimeLayoutOptions) => Promise; pruneRuntime?: (options: { @@ -81,6 +84,58 @@ function writeDaemonStop(io: KtxCliIo, result: ManagedPythonDaemonStopResult): v io.stdout.write(`state: ${result.layout.daemonStatePath}\n`); } +function writeStopAllEntry(io: KtxCliIo, entry: { pid: number; source: string; url?: string; health?: string; detail?: string }): void { + io.stdout.write( + `pid: ${entry.pid} source: ${entry.source}${entry.url ? ` url: ${entry.url}` : ''}${ + entry.health ? ` health: ${entry.health}` : '' + }${ + entry.detail ? ` detail: ${entry.detail}` : '' + }\n`, + ); +} + +function writeDaemonStopAll(io: KtxCliIo, result: ManagedPythonDaemonStopAllResult): number { + const failed = result.failed.length + result.scanErrors.length; + if ( + result.stopped.length === 0 && + result.stale.length === 0 && + result.failed.length === 0 && + result.scanErrors.length === 0 + ) { + io.stdout.write('No KTX Python daemons found\n'); + return 0; + } + if (failed === 0) { + io.stdout.write(`Stopped ${result.stopped.length} KTX Python daemons\n`); + if (result.stale.length > 0) { + io.stdout.write(`Cleaned ${result.stale.length} stale daemon states\n`); + } + for (const entry of result.stopped) { + writeStopAllEntry(io, entry); + } + for (const entry of result.stale) { + writeStopAllEntry(io, entry); + } + return 0; + } + io.stderr.write( + `Stopped ${result.stopped.length} KTX Python daemons; failed ${result.failed.length}${ + result.stale.length > 0 ? `; cleaned stale ${result.stale.length}` : '' + }\n`, + ); + for (const entry of result.failed) { + io.stderr.write( + `pid: ${entry.pid} source: ${entry.source}${entry.url ? ` url: ${entry.url}` : ''}${ + entry.health ? ` health: ${entry.health}` : '' + } detail: ${entry.detail}\n`, + ); + } + for (const error of result.scanErrors) { + io.stderr.write(`process scan: ${error}\n`); + } + return 1; +} + function writeStatus(io: KtxCliIo, status: ManagedPythonRuntimeStatus): void { io.stdout.write('KTX Python runtime\n'); io.stdout.write(`status: ${status.kind}\n`); @@ -142,10 +197,16 @@ export async function runKtxRuntime( return 0; } if (args.command === 'stop') { - const stopDaemon = deps.stopDaemon ?? stopManagedPythonDaemon; - const result = await stopDaemon({ cliVersion: args.cliVersion }); - writeDaemonStop(io, result); - return 0; + if (args.all) { + const stopAllDaemons = deps.stopAllDaemons ?? stopAllManagedPythonDaemons; + const result = await stopAllDaemons({ cliVersion: args.cliVersion }); + return writeDaemonStopAll(io, result); + } else { + const stopDaemon = deps.stopDaemon ?? stopManagedPythonDaemon; + const result = await stopDaemon({ cliVersion: args.cliVersion }); + writeDaemonStop(io, result); + return 0; + } } if (args.command === 'status') { const readStatus = deps.readStatus ?? readManagedPythonRuntimeStatus; From 52400c599ccbcc3faea738f710a43ad156139037 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 13:02:06 +0200 Subject: [PATCH 28/33] chore: standardize pre-commit checks --- .pre-commit-config.yaml | 70 ++++++++++++++++++++++++++++++++ scripts/precommit-check.mjs | 16 +++----- scripts/precommit-check.test.mjs | 15 +++++-- 3 files changed, 87 insertions(+), 14 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..555d7fc0 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,70 @@ +# See https://pre-commit.com for hook documentation. +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-json + - id: check-toml + - id: check-added-large-files + args: ["--maxkb=1000"] + - id: check-merge-conflict + - id: check-case-conflict + - id: mixed-line-ending + + - repo: https://github.com/asottile/pyupgrade + rev: v3.21.2 + hooks: + - id: pyupgrade + name: pyupgrade (python) + files: ^python/ + args: [--py313-plus] + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.15.2 + hooks: + - id: ruff + name: ruff (python) + files: ^python/ + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + name: ruff format (python) + files: ^python/ + + - repo: local + hooks: + - id: ktx-package-checks + name: ktx package checks + entry: node scripts/precommit-check.mjs + language: system + files: ^(packages/|scripts/|python/|package\.json$|pnpm-lock\.yaml$|pnpm-workspace\.yaml$|release-policy\.json$|tsconfig\.base\.json$|pyproject\.toml$|uv\.lock$|uv\.toml$) + + - repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 + hooks: + - id: detect-secrets + exclude: | + (?x)^( + .*\.lock$| + .*pnpm-lock\.yaml$| + .*package-lock\.json$| + .*yarn\.lock$| + .*\.log$| + .*\.dump$| + .*\.sql$| + .*\.csv$| + .*\.db$| + .*\.sqlite$| + .*\.sqlite3$| + .*/node_modules/.*| + .*/\.venv/.*| + .*/dist/.*| + .*/build/.*| + .*/coverage/.*| + .*/htmlcov/.*| + .*\.gen\.ts$| + .*\.gen\.py$| + .*\.generated\.ts$ + )$ diff --git a/scripts/precommit-check.mjs b/scripts/precommit-check.mjs index fdd405bf..299db534 100644 --- a/scripts/precommit-check.mjs +++ b/scripts/precommit-check.mjs @@ -1,12 +1,11 @@ #!/usr/bin/env node import { spawnSync } from 'node:child_process'; import { existsSync, readFileSync } from 'node:fs'; -import { dirname, join, relative, sep } from 'node:path'; +import { dirname, join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; const scriptPath = fileURLToPath(import.meta.url); const ktxRoot = dirname(dirname(scriptPath)); -const repoRoot = dirname(ktxRoot); const packageNameByDir = new Map( [ @@ -35,7 +34,8 @@ const pythonPackageTests = new Map([ ]); function normalizeFilePath(filePath) { - return filePath.replaceAll('\\', '/').replace(/^\.\//, ''); + const normalized = filePath.replaceAll('\\', '/').replace(/^\.\//, ''); + return normalized.startsWith('ktx/') ? normalized.slice('ktx/'.length) : normalized; } function stablePush(commands, key, cmd, args) { @@ -68,13 +68,7 @@ export function planChecks(files) { let runAllPythonTests = false; for (const rawFile of files) { - const file = normalizeFilePath(rawFile); - - if (!file.startsWith('ktx/')) { - continue; - } - - const ktxFile = file.slice('ktx/'.length); + const ktxFile = normalizeFilePath(rawFile); if (ktxFile.startsWith('packages/')) { const [, packageDir, ...rest] = ktxFile.split('/'); @@ -189,6 +183,6 @@ export function runChecks(files) { return 0; } -if (process.argv[1] && relative(repoRoot, process.argv[1]).split(sep).join('/') === 'ktx/scripts/precommit-check.mjs') { +if (process.argv[1] && resolve(process.argv[1]) === scriptPath) { process.exitCode = runChecks(process.argv.slice(2)); } diff --git a/scripts/precommit-check.test.mjs b/scripts/precommit-check.test.mjs index 55ef66bb..40bd1716 100644 --- a/scripts/precommit-check.test.mjs +++ b/scripts/precommit-check.test.mjs @@ -12,7 +12,16 @@ describe('precommit-check', () => { assert.deepEqual(commandKeys(['outside-workspace/src/app.ts']), []); }); - it('runs only the touched package checks for package code', () => { + it('runs only the touched package checks for standalone package paths', () => { + assert.deepEqual(commandKeys(['packages/cli/src/index.ts']), [ + 'boundary-check', + 'type-check:@ktx/cli', + 'build:@ktx/cli', + 'test:@ktx/cli', + ]); + }); + + it('accepts legacy subtree-prefixed package paths', () => { assert.deepEqual(commandKeys(['ktx/packages/cli/src/index.ts']), [ 'boundary-check', 'type-check:@ktx/cli', @@ -22,12 +31,12 @@ describe('precommit-check', () => { }); it('runs the matching script test when a script changes', () => { - assert.deepEqual(commandKeys(['ktx/scripts/check-boundaries.mjs']), [ + assert.deepEqual(commandKeys(['scripts/check-boundaries.mjs']), [ 'script-test:scripts/check-boundaries.test.mjs', ]); }); it('runs the touched python package tests', () => { - assert.deepEqual(commandKeys(['ktx/python/ktx-sl/semantic_layer/parser.py']), ['pytest:ktx-sl']); + assert.deepEqual(commandKeys(['python/ktx-sl/semantic_layer/parser.py']), ['pytest:ktx-sl']); }); }); From 22e1706907eb6497952b36f4ddee901fef8a238d Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 13:37:18 +0200 Subject: [PATCH 29/33] chore: ignore devtools artifacts --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ed14196b..112e7faa 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,4 @@ yarn-error.log* *.swo *~ .vercel +.devtools From e1129dd6a99672dbe04a02cb4e87218df916be3c Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 14:07:02 +0200 Subject: [PATCH 30/33] fix(cli): scale nested scan progress phases (#35) --- packages/cli/src/scan.test.ts | 26 ++++++++++++++++++++++++++ packages/cli/src/scan.ts | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/scan.test.ts b/packages/cli/src/scan.test.ts index 525ae53d..152c8b3a 100644 --- a/packages/cli/src/scan.test.ts +++ b/packages/cli/src/scan.test.ts @@ -573,6 +573,32 @@ describe('runKtxScan', () => { expect(io.stdout()).toContain('\n[90%] Building embeddings 1/4 batches\n'); }); + it('scales nested progress phases by the parent phase weight', async () => { + const io = makeIo({ isTTY: true }); + const previousCi = process.env.CI; + delete process.env.CI; + + try { + const progress = createCliScanProgress(io.io); + await progress.update(0.82, 'Enriching schema metadata'); + const enrichmentProgress = progress.startPhase(0.18); + await enrichmentProgress.update(0.05, 'Loaded schema snapshot with 56 tables'); + const descriptionProgress = enrichmentProgress.startPhase(0.45); + await descriptionProgress.update(37 / 56, 'Generating descriptions 37/56 tables', { transient: true }); + await descriptionProgress.update(1, 'Generated descriptions for 56 tables'); + } finally { + if (previousCi === undefined) { + delete process.env.CI; + } else { + process.env.CI = previousCi; + } + } + + expect(io.stdout()).toContain('\r[88%] Generating descriptions 37/56 tables'); + expect(io.stdout()).toContain('\n[91%] Generated descriptions for 56 tables\n'); + expect(io.stdout()).not.toContain('[100%] Generating descriptions 37/56 tables'); + }); + it('flushes transient TTY progress messages before printing scan failures', async () => { await initKtxProject({ projectDir: tempDir, projectName: 'warehouse' }); const runLocalScan = vi.fn(async (input: RunLocalScanOptions): Promise => { diff --git a/packages/cli/src/scan.ts b/packages/cli/src/scan.ts index f89a9d18..e3bda577 100644 --- a/packages/cli/src/scan.ts +++ b/packages/cli/src/scan.ts @@ -527,7 +527,7 @@ export function createCliScanProgress( io.stdout.write(`${line}\n`); }, startPhase(phaseWeight: number) { - return createCliScanProgress(io, state, state.progress, phaseWeight); + return createCliScanProgress(io, state, state.progress, weight * phaseWeight); }, flush() { if (!shouldWrite || !state.hasPendingTransient) { From f422facf1083e2849d306bd69ad88112ac72233c Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 14:21:57 +0200 Subject: [PATCH 31/33] fix(cli): clarify historic SQL ingest progress (#36) --- AGENTS.md | 13 +++++ packages/cli/src/ingest.test.ts | 97 ++++++++++++++++++++++++++++++++- packages/cli/src/ingest.ts | 49 ++++++++++++++--- 3 files changed, 150 insertions(+), 9 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index e8062dcb..2e5a684a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -156,6 +156,19 @@ pnpm run test 2>&1 | tee /tmp/ktx-test-output.log - Do not manually edit generated or built output under `dist/`; edit source and rebuild. +### CLI Standards + +- Use Commander for CLI command trees, arguments, options, help text, custom + parsers, and async action dispatch. Prefer `@commander-js/extra-typings` for + typed command definitions, use `InvalidArgumentError` for parse failures, and + call `parseAsync` when actions await asynchronous work. +- Use `@clack/prompts` for interactive flows. Always handle cancellation with + `isCancel` plus `cancel`, stop active spinners before exiting, and keep prompts + grouped or factored so multi-step setup flows share cancellation behavior. +- Keep command behavior scriptable: prefer flags and config over prompts when + values are supplied, and reserve prompts for interactive missing input or + explicit setup flows. + ### Zod Naming Convention ```typescript diff --git a/packages/cli/src/ingest.test.ts b/packages/cli/src/ingest.test.ts index 59df5e86..0307ca9e 100644 --- a/packages/cli/src/ingest.test.ts +++ b/packages/cli/src/ingest.test.ts @@ -1075,10 +1075,105 @@ describe('runKtxIngest', () => { const stdout = io.stdout(); expect(stdout).toContain('[45%] Planned 2 work units'); expect(stdout).toContain('[55%] Processing 1/2 work units: historic-sql-table-public-orders'); - expect(stdout).toContain('[58%] Processing 1/2 work units: historic-sql-table-public-orders step 7/40'); + expect(stdout).toContain( + '\r[58%] Processing work units: 0/2 complete, 1 active; latest historic-sql-table-public-orders step 7/40\u001b[K', + ); expect(stdout).toContain('[68%] Processed 1/2 work units'); }); + it('renders concurrent WorkUnit step progress as transient aggregate status', async () => { + const projectDir = join(tempDir, 'historic-sql-concurrent-progress-project'); + await mkdir(projectDir, { recursive: true }); + await writeFile( + join(projectDir, 'ktx.yaml'), + [ + 'project: historic-sql-concurrent-progress-project', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_DATABASE_URL', + ' historicSql:', + ' enabled: true', + ' dialect: postgres', + ' minExecutions: 2', + 'ingest:', + ' adapters:', + ' - historic-sql', + '', + ].join('\n'), + 'utf-8', + ); + const createdAdapters: SourceAdapter[] = [ + { source: 'historic-sql', skillNames: [], detect: async () => true, chunk: async () => ({ workUnits: [] }) }, + ]; + const workUnitKeys = [ + 'historic-sql-table-public-orders', + 'historic-sql-table-public-customers', + 'historic-sql-table-public-line-items', + 'historic-sql-table-public-payments', + 'historic-sql-table-public-products', + 'historic-sql-table-public-suppliers', + ]; + const runLocal = vi.fn(async (input: RunLocalIngestOptions) => { + input.memoryFlow?.update({ + plannedWorkUnits: workUnitKeys.map((unitKey) => ({ + unitKey, + rawFiles: [`tables/${unitKey}.json`], + peerFileCount: 0, + dependencyCount: 0, + })), + }); + input.memoryFlow?.emit({ + type: 'chunks_planned', + chunkCount: workUnitKeys.length, + workUnitCount: workUnitKeys.length, + evictionCount: 0, + }); + for (const unitKey of workUnitKeys) { + input.memoryFlow?.emit({ + type: 'work_unit_started', + unitKey, + skills: ['historic_sql_table_digest'], + stepBudget: 40, + }); + } + for (const unitKey of workUnitKeys) { + input.memoryFlow?.emit({ type: 'work_unit_step', unitKey, stepIndex: 1, stepBudget: 40 }); + } + input.memoryFlow?.finish('done'); + return completedLocalBundleRun(input, input.jobId ?? 'historic-concurrent-progress-job'); + }); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxIngest( + { + command: 'run', + projectDir, + connectionId: 'warehouse', + adapter: 'historic-sql', + outputMode: 'plain', + }, + io.io, + { + env: interactiveEnv(), + createAdapters: vi.fn(() => createdAdapters as never), + runLocalIngest: runLocal, + jobIdFactory: () => 'historic-concurrent-progress-job', + }, + ), + ).resolves.toBe(0); + + const stdout = io.stdout(); + expect(stdout).toContain( + '\r[56%] Processing work units: 0/6 complete, 6 active; latest historic-sql-table-public-suppliers step 1/40\u001b[K', + ); + expect(stdout).not.toContain( + '\n[56%] Processing 6/6 work units: historic-sql-table-public-suppliers step 1/40\n', + ); + expect(stdout).toContain('\n[100%] Ingest completed\n'); + }); + it('passes local Looker pull-config options and agent runner into scheduled ingest for Looker scheduled ingest', async () => { const projectDir = join(tempDir, 'project'); await writeWarehouseConfig(projectDir); diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index 39bf21bb..5eadce29 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -176,6 +176,19 @@ function completedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventInd return workUnitEventsThrough(snapshot, eventIndex).filter((event) => event.type === 'work_unit_finished').length; } +function activeWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number { + const active = new Set(); + for (const event of workUnitEventsThrough(snapshot, eventIndex)) { + if (event.type === 'work_unit_started') { + active.add(event.unitKey); + } + if (event.type === 'work_unit_finished') { + active.delete(event.unitKey); + } + } + return active.size; +} + function plannedWorkUnitCountThrough(snapshot: MemoryFlowReplayInput, eventIndex: number): number { if (snapshot.plannedWorkUnits.length > 0) { return snapshot.plannedWorkUnits.length; @@ -199,7 +212,7 @@ function plainIngestEventProgress( event: MemoryFlowEvent, snapshot: MemoryFlowReplayInput, eventIndex: number, -): { percent: number; message: string } | null { +): { percent: number; message: string; transient?: boolean } | null { switch (event.type) { case 'source_acquired': return { @@ -229,13 +242,14 @@ function plainIngestEventProgress( case 'work_unit_step': { const total = plannedWorkUnitCountThrough(snapshot, eventIndex); const completed = completedWorkUnitCountThrough(snapshot, eventIndex); - const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey); + const active = activeWorkUnitCountThrough(snapshot, eventIndex); const stepFraction = event.stepBudget > 0 ? Math.min(1, event.stepIndex / event.stepBudget) : 0; const percent = total > 0 ? 55 + Math.ceil(((completed + stepFraction) / total) * 25) : 55; - const progress = total > 0 ? `${ordinal}/${total} work units: ` : ''; + const latest = `${event.unitKey} step ${event.stepIndex}/${event.stepBudget}`; return { percent, - message: `Processing ${progress}${event.unitKey} step ${event.stepIndex}/${event.stepBudget}`, + message: `Processing work units: ${completed}/${total} complete, ${active} active; latest ${latest}`, + transient: true, }; } case 'work_unit_finished': { @@ -281,15 +295,31 @@ function shouldWritePlainIngestProgress( function createPlainIngestProgressRenderer( args: Extract, io: KtxIngestIo, -): { start(): void; update(snapshot: MemoryFlowReplayInput): void } { +): { start(): void; update(snapshot: MemoryFlowReplayInput): void; flush(): void } { let printedEvents = 0; let lastPercent = 0; let printedCompletion = false; + let hasPendingTransient = false; - const write = (percent: number, message: string) => { + const flush = () => { + if (!hasPendingTransient) { + return; + } + io.stdout.write('\n'); + hasPendingTransient = false; + }; + + const write = (percent: number, message: string, options?: { transient?: boolean }) => { const nextPercent = Math.max(lastPercent, Math.max(0, Math.min(100, percent))); lastPercent = nextPercent; - io.stdout.write(`[${nextPercent}%] ${message}\n`); + const line = `[${nextPercent}%] ${message}`; + if (options?.transient === true) { + io.stdout.write(`\r${line}\u001b[K`); + hasPendingTransient = true; + return; + } + flush(); + io.stdout.write(`${line}\n`); }; return { @@ -305,7 +335,7 @@ function createPlainIngestProgressRenderer( } const progress = plainIngestEventProgress(event, snapshot, eventIndex); if (progress) { - write(progress.percent, progress.message); + write(progress.percent, progress.message, progress.transient === true ? { transient: true } : undefined); } } if (!printedCompletion && snapshot.status !== 'running') { @@ -313,6 +343,7 @@ function createPlainIngestProgressRenderer( write(100, snapshot.status === 'done' ? 'Ingest completed' : 'Ingest failed'); } }, + flush, }; } @@ -564,6 +595,7 @@ export async function runKtxIngest( io.stdout.write(formatMemoryFlowFinalSummary(latestMemoryFlowSnapshot)); return reportStatus(result.report) === 'done' ? 0 : 1; } + plainProgress?.flush(); await writeReportRecord(result.report, runOutputMode, io, { interactive: (args.inputMode ?? 'auto') === 'auto', renderStoredMemoryFlow: deps.renderStoredMemoryFlow, @@ -571,6 +603,7 @@ export async function runKtxIngest( }); return reportStatus(result.report) === 'done' ? 0 : 1; } finally { + plainProgress?.flush(); liveTui?.close(); } } From 4d4441ccd505d32b2cefe3fe52ccb8559758ab8f Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Tue, 12 May 2026 14:34:15 +0200 Subject: [PATCH 32/33] fix(context): avoid saving scan error descriptions (#37) --- .../src/scan/description-generation.test.ts | 68 +++++++++++++++++++ .../src/scan/description-generation.ts | 20 +++--- .../scan/local-enrichment-artifacts.test.ts | 41 +++++++++++ .../src/scan/local-enrichment-artifacts.ts | 12 +++- 4 files changed, 129 insertions(+), 12 deletions(-) diff --git a/packages/context/src/scan/description-generation.test.ts b/packages/context/src/scan/description-generation.test.ts index de69fb27..70117919 100644 --- a/packages/context/src/scan/description-generation.test.ts +++ b/packages/context/src/scan/description-generation.test.ts @@ -51,6 +51,29 @@ function createLlmProvider(text = 'generated description') { } as any; } +function createFailingLlmProvider(message = 'timeout exceeded when trying to connect') { + vi.mocked(generateText).mockRejectedValue(new Error(message) as never); + return { + getModel: vi.fn().mockReturnValue({ modelId: 'claude-sonnet-4-6', provider: 'anthropic' }), + getModelByName: vi.fn(), + cacheMarker: vi.fn(), + repairToolCallHandler: vi.fn(), + thinkingProviderOptions: vi.fn(), + telemetryConfig: vi.fn(), + promptCachingConfig: vi.fn(() => ({ + enabled: false, + systemTtl: '1h', + toolsTtl: '1h', + historyTtl: '5m', + cacheSystem: true, + cacheTools: true, + cacheHistory: true, + vertexFallbackTo5m: false, + })), + activeBackend: vi.fn(() => 'anthropic'), + } as any; +} + function createConnector(): KtxScanConnector { return { id: 'test-connector', @@ -274,6 +297,51 @@ describe('KtxDescriptionGenerator', () => { expect('introspect' in sampler).toBe(false); }); + it('does not turn LLM failures into generated descriptions', async () => { + const cache = createCache(); + const connector = createConnector(); + const generator = new KtxDescriptionGenerator({ + llmProvider: createFailingLlmProvider(), + cache, + settings: { + columnMaxWords: 12, + tableMaxWords: 18, + dataSourceMaxWords: 24, + }, + }); + + const columnResult = await generator.generateColumnDescriptions({ + connectionId: 'conn-1', + connector, + context: { runId: 'run-1' }, + dataSourceType: 'POSTGRESQL', + supportsNestedAnalysis: false, + table: { + catalog: null, + db: 'public', + name: 'orders', + columns: [{ name: 'status' }], + }, + }); + + await expect( + generator.generateTableDescription({ + connectionId: 'conn-1', + connector, + context: { runId: 'run-1' }, + dataSourceType: 'POSTGRESQL', + table: { catalog: null, db: 'public', name: 'orders' }, + }), + ).resolves.toBeNull(); + + expect(columnResult).toEqual({ + columnDescriptions: [['status', null]], + processedColumns: [], + skippedColumns: [], + }); + expect(cache.set).not.toHaveBeenCalled(); + }); + it('generates and caches table and data-source descriptions', async () => { const cache = createCache(); const connector = createConnector(); diff --git a/packages/context/src/scan/description-generation.ts b/packages/context/src/scan/description-generation.ts index dc30af04..c719ca65 100644 --- a/packages/context/src/scan/description-generation.ts +++ b/packages/context/src/scan/description-generation.ts @@ -348,7 +348,7 @@ export class KtxDescriptionGenerator { }; } - async generateTableDescription(input: KtxGenerateTableDescriptionInput): Promise { + async generateTableDescription(input: KtxGenerateTableDescriptionInput): Promise { const tableRef = toTableRef(input.table); const cacheKey = this.cache?.buildTableKey(tableRef); if (cacheKey) { @@ -386,7 +386,7 @@ export class KtxDescriptionGenerator { this.settings.tableMaxWords, 'ktx-table-description', ); - if (cacheKey) { + if (cacheKey && description) { await this.cache?.set(cacheKey, description); } return description; @@ -396,7 +396,7 @@ export class KtxDescriptionGenerator { } } - async generateDataSourceDescription(input: KtxGenerateDataSourceDescriptionInput): Promise { + async generateDataSourceDescription(input: KtxGenerateDataSourceDescriptionInput): Promise { if (input.tables.length === 0) { return 'No tables found in database'; } @@ -451,7 +451,7 @@ export class KtxDescriptionGenerator { this.settings.dataSourceMaxWords, 'ktx-data-source-description', ); - if (cacheKey) { + if (cacheKey && description) { await this.cache?.set(cacheKey, description); } return description; @@ -543,7 +543,7 @@ export class KtxDescriptionGenerator { 'ktx-column-description', ); - if (cacheKey) { + if (cacheKey && description) { await this.cache?.set(cacheKey, description); } @@ -551,20 +551,20 @@ export class KtxDescriptionGenerator { columnName: column.name, description, skipped: false, - processed: true, + processed: description !== null, }; } catch (error) { this.logger?.error(`Error analyzing column '${column.name}': ${errorMessage(error)}`); return { columnName: column.name, - description: `Error generating description: ${errorMessage(error)}`, + description: null, skipped: false, processed: false, }; } } - private async generateAiDescription(prompt: string, maxWords: number, _operationName: string): Promise { + private async generateAiDescription(prompt: string, maxWords: number, _operationName: string): Promise { try { const text = await generateKtxText({ llmProvider: this.llmProvider, @@ -573,10 +573,10 @@ export class KtxDescriptionGenerator { temperature: this.settings.temperature, }); const description = text.trim(); - return description || 'Failed to generate description'; + return description || null; } catch (error) { this.logger?.error(`Error generating AI description: ${errorMessage(error)}`); - return `Error generating description: ${errorMessage(error)}`; + return null; } } } diff --git a/packages/context/src/scan/local-enrichment-artifacts.test.ts b/packages/context/src/scan/local-enrichment-artifacts.test.ts index 0123f086..8e0c25fd 100644 --- a/packages/context/src/scan/local-enrichment-artifacts.test.ts +++ b/packages/context/src/scan/local-enrichment-artifacts.test.ts @@ -553,6 +553,47 @@ describe('writeLocalScanEnrichmentArtifacts', () => { }); }); + it('does not persist generated error descriptions in manifest shards', async () => { + await writeLocalScanManifestShards({ + project, + connectionId: 'warehouse', + syncId: 'sync-error-description', + driver: 'postgres', + snapshot, + descriptionUpdates: [ + { + table: { catalog: null, db: 'public', name: 'orders' }, + tableDescription: 'Error generating description: timeout exceeded when trying to connect', + columnDescriptions: { + id: 'Error generating description: timeout exceeded when trying to connect', + customer_id: 'AI customer reference', + }, + }, + ], + dryRun: false, + }); + + const shard = YAML.parse( + await readFile(join(tempDir, 'project/semantic-layer/warehouse/_schema/public.yaml'), 'utf8'), + ) as { + tables: { + orders: { + descriptions?: Record; + columns: Array<{ name: string; descriptions?: Record }>; + }; + }; + }; + + expect(shard.tables.orders.descriptions).toEqual({ db: 'DB orders table' }); + expect(shard.tables.orders.columns.find((column) => column.name === 'id')?.descriptions).toEqual({ + db: 'DB order id', + }); + expect(shard.tables.orders.columns.find((column) => column.name === 'customer_id')?.descriptions).toEqual({ + db: 'DB customer id', + ai: 'AI customer reference', + }); + }); + it('writes accepted composite relationships to relationship artifacts and manifest shards', async () => { const compositeSnapshot: KtxSchemaSnapshot = { connectionId: 'warehouse', diff --git a/packages/context/src/scan/local-enrichment-artifacts.ts b/packages/context/src/scan/local-enrichment-artifacts.ts index 101d062e..78f5e36d 100644 --- a/packages/context/src/scan/local-enrichment-artifacts.ts +++ b/packages/context/src/scan/local-enrichment-artifacts.ts @@ -62,6 +62,14 @@ interface ExistingManifestState { type LocalDescriptionUpdates = KtxLocalScanEnrichmentResult['descriptionUpdates']; +function isGeneratedErrorDescription(description: string | null | undefined): boolean { + const normalized = description?.trim().toLowerCase(); + return ( + normalized === 'failed to generate description' || + normalized?.startsWith('error generating description:') === true + ); +} + function artifactDir(connectionId: string, syncId: string): string { return `raw-sources/${connectionId}/${LIVE_DATABASE_ADAPTER}/${syncId}/enrichment`; } @@ -79,7 +87,7 @@ function tableDescription( if (table.comment) { descriptions.db = table.comment; } - if (update?.tableDescription) { + if (update?.tableDescription && !isGeneratedErrorDescription(update.tableDescription)) { descriptions.ai = update.tableDescription; } return Object.keys(descriptions).length > 0 ? descriptions : undefined; @@ -96,7 +104,7 @@ function columnDescription( if (column.comment) { descriptions.db = column.comment; } - if (aiDescription) { + if (aiDescription && !isGeneratedErrorDescription(aiDescription)) { descriptions.ai = aiDescription; } return Object.keys(descriptions).length > 0 ? descriptions : undefined; From 366933c755be1d11ca112e7da78d6d8b78f2d389 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 14:34:59 +0200 Subject: [PATCH 33/33] perf: parallelize scan description generation --- .../context/src/scan/local-enrichment.test.ts | 67 +++++++++++++++- packages/context/src/scan/local-enrichment.ts | 79 +++++++++++-------- 2 files changed, 109 insertions(+), 37 deletions(-) diff --git a/packages/context/src/scan/local-enrichment.test.ts b/packages/context/src/scan/local-enrichment.test.ts index c25dae61..cbed687d 100644 --- a/packages/context/src/scan/local-enrichment.test.ts +++ b/packages/context/src/scan/local-enrichment.test.ts @@ -427,6 +427,69 @@ describe('local scan enrichment', () => { expect(result.relationships).toEqual({ accepted: 0, review: 1, rejected: 0, skipped: 0 }); }); + it('generates table descriptions with bounded table-level concurrency', async () => { + const concurrentSnapshot: KtxSchemaSnapshot = { + ...snapshot, + tables: Array.from({ length: 8 }, (_, index) => ({ + catalog: null, + db: 'public', + name: `table_${index + 1}`, + kind: 'table' as const, + comment: null, + estimatedRows: 2, + foreignKeys: [], + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number' as const, + nullable: false, + primaryKey: true, + comment: null, + }, + ], + })), + }; + let activeColumnSamples = 0; + let maxActiveColumnSamples = 0; + const scanConnector = { + ...connector(), + introspect: vi.fn(async () => concurrentSnapshot), + sampleColumn: vi.fn(async () => { + activeColumnSamples += 1; + maxActiveColumnSamples = Math.max(maxActiveColumnSamples, activeColumnSamples); + await new Promise((resolve) => setTimeout(resolve, 10)); + activeColumnSamples -= 1; + return { + values: ['1'], + nullCount: 0, + distinctCount: 1, + }; + }), + sampleTable: vi.fn(async () => ({ + headers: ['id'], + rows: [[1]], + totalRows: 1, + })), + }; + const settings = { + ...buildDefaultKtxProjectConfig('test').scan.relationships, + enabled: false, + }; + + await runLocalScanEnrichment({ + connectionId: 'warehouse', + mode: 'enriched', + connector: scanConnector, + context: { runId: 'scan-run-concurrent-descriptions' }, + providers: createDeterministicLocalScanEnrichmentProviders({ embeddingDimensions: 3 }), + relationshipSettings: settings, + }); + + expect(maxActiveColumnSamples).toBe(6); + }); + it('reports enrichment progress for countable stages', async () => { const events: Array<{ progress: number; message?: string; transient?: boolean }> = []; const progress = { @@ -713,7 +776,7 @@ describe('local scan enrichment', () => { model: 'provider/embedding-model', dimensions: 1536, batchSize: 8, - openai: { api_key: 'env:OPENAI_API_KEY' }, + openai: { api_key: 'env:OPENAI_API_KEY' }, // pragma: allowlist secret }, }, { @@ -726,7 +789,7 @@ describe('local scan enrichment', () => { { createKtxLlmProvider: createKtxLlmProvider as any, createKtxEmbeddingProvider: createKtxEmbeddingProvider as any, - env: { OPENAI_API_KEY: 'openai-key' }, + env: { OPENAI_API_KEY: 'openai-key' }, // pragma: allowlist secret }, ); diff --git a/packages/context/src/scan/local-enrichment.ts b/packages/context/src/scan/local-enrichment.ts index cefecadb..5d58e189 100644 --- a/packages/context/src/scan/local-enrichment.ts +++ b/packages/context/src/scan/local-enrichment.ts @@ -1,4 +1,5 @@ import type { KtxLlmProvider } from '@ktx/llm'; +import pLimit from 'p-limit'; import { buildDefaultKtxProjectConfig, type KtxScanRelationshipConfig } from '../project/config.js'; import { type KtxDescriptionColumnTable, KtxDescriptionGenerator } from './description-generation.js'; import { buildKtxColumnEmbeddingText } from './embedding-text.js'; @@ -40,6 +41,8 @@ import type { KtxTableRef, } from './types.js'; +const DESCRIPTION_TABLE_CONCURRENCY = 6; + export interface DeterministicLocalScanEnrichmentProviderOptions { embeddingDimensions?: number; maxBatchSize?: number; @@ -322,41 +325,47 @@ async function generateDescriptions(input: { await input.progress?.update(1, 'No tables to describe'); return updates; } - for (const [index, table] of input.snapshot.tables.entries()) { - await input.progress?.update( - (index + 1) / totalTables, - `Generating descriptions ${index + 1}/${totalTables} tables`, - { - transient: true, - }, - ); - const tableInput = descriptionTable(table); - const columnResult = await generator.generateColumnDescriptions({ - connectionId: input.snapshot.connectionId, - connector: input.connector, - context: input.context, - dataSourceType: input.snapshot.driver, - supportsNestedAnalysis: input.connector.capabilities.nestedAnalysis, - table: tableInput, - }); - const tableDescription = await generator.generateTableDescription({ - connectionId: input.snapshot.connectionId, - connector: input.connector, - context: input.context, - dataSourceType: input.snapshot.driver, - table: { - catalog: table.catalog, - db: table.db, - name: table.name, - rawDescriptions: table.comment ? { db: table.comment } : {}, - }, - }); - updates.push({ - table: tableRef(table), - tableDescription, - columnDescriptions: Object.fromEntries(columnResult.columnDescriptions), - }); - } + const limitTable = pLimit(DESCRIPTION_TABLE_CONCURRENCY); + const tableUpdates = await Promise.all( + input.snapshot.tables.map((table, index) => + limitTable(async () => { + await input.progress?.update( + (index + 1) / totalTables, + `Generating descriptions ${index + 1}/${totalTables} tables`, + { + transient: true, + }, + ); + const tableInput = descriptionTable(table); + const columnResult = await generator.generateColumnDescriptions({ + connectionId: input.snapshot.connectionId, + connector: input.connector, + context: input.context, + dataSourceType: input.snapshot.driver, + supportsNestedAnalysis: input.connector.capabilities.nestedAnalysis, + table: tableInput, + }); + const tableDescription = await generator.generateTableDescription({ + connectionId: input.snapshot.connectionId, + connector: input.connector, + context: input.context, + dataSourceType: input.snapshot.driver, + table: { + catalog: table.catalog, + db: table.db, + name: table.name, + rawDescriptions: table.comment ? { db: table.comment } : {}, + }, + }); + return { + table: tableRef(table), + tableDescription, + columnDescriptions: Object.fromEntries(columnResult.columnDescriptions), + }; + }), + ), + ); + updates.push(...tableUpdates); await input.progress?.update(1, `Generated descriptions for ${totalTables} tables`); return updates; }