From e05a6d43abb4aa7b6e69376cac675c6a94def809 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com> Date: Tue, 12 May 2026 01:07:47 +0200 Subject: [PATCH 1/2] fix(cli): report metabase ingest readiness --- packages/cli/src/setup.test.ts | 57 ++++++++++++++++++++++++++++++++++ packages/cli/src/setup.ts | 42 +++++++++++++++++++++++-- 2 files changed, 96 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index c8961e2a..44fc8c7d 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -3,6 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { localFakeBundleReport, persistLocalBundleReport } from './ingest.test-utils.js'; import { contextBuildCommands, writeKtxSetupContextState } from './setup-context.js'; import { readKtxSetupStatus, runKtxSetup } from './setup.js'; @@ -274,6 +275,62 @@ describe('setup status', () => { }); }); + it('reports Vertex LLM and context ready after a successful Metabase ingest report', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: revenue', + 'setup:', + ' database_connection_ids:', + ' - warehouse', + ' completed_steps:', + ' - project', + ' - databases', + ' - sources', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:DATABASE_URL', + ' metabase:', + ' driver: metabase', + ' url: env:METABASE_URL', + ' api_key_ref: env:METABASE_API_KEY', + ' warehouse_connection_id: warehouse', + 'llm:', + ' provider:', + ' backend: vertex', + ' vertex:', + ' project: kaelio-dev', + ' location: us-east5', + ' models:', + ' default: claude-sonnet-4-6', + 'ingest:', + ' embeddings:', + ' backend: deterministic', + ' model: deterministic', + ' dimensions: 8', + '', + ].join('\n'), + 'utf-8', + ); + await persistLocalBundleReport( + tempDir, + localFakeBundleReport('metabase-job-1', { + connectionId: 'warehouse', + sourceKey: 'metabase', + }), + ); + + const status = await readKtxSetupStatus(tempDir); + const io = makeIo(); + await expect(runKtxSetup({ command: 'status', projectDir: tempDir, json: false }, io.io)).resolves.toBe(0); + + expect(status.llm).toMatchObject({ backend: 'vertex', ready: true, model: 'claude-sonnet-4-6' }); + expect(status.context).toMatchObject({ ready: true, status: 'completed' }); + expect(io.stdout()).toContain('LLM ready: yes (claude-sonnet-4-6)'); + expect(io.stdout()).toContain('KTX context built: yes'); + }); + it('prints plain and JSON setup status', async () => { const plainIo = makeIo(); const jsonIo = makeIo(); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 89c5dcdc..0b0c400d 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -1,7 +1,8 @@ import { existsSync } from 'node:fs'; import { join, resolve } from 'node:path'; import { cancel, isCancel, select } from '@clack/prompts'; -import { loadKtxProject } from '@ktx/context/project'; +import { getLatestLocalIngestStatus, savedMemoryCountsForReport } from '@ktx/context/ingest'; +import { ktxLocalStateDbPath, loadKtxProject, type KtxLocalProject } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import type { KtxDemoArgs } from './demo.js'; import { defaultDemoProjectDir } from './demo-assets.js'; @@ -152,6 +153,7 @@ export interface KtxSetupDeps { } const SOURCE_DRIVERS = new Set(['dbt', 'metricflow', 'metabase', 'looker', 'lookml', 'notion']); +const READY_LLM_BACKENDS = new Set(['anthropic', 'vertex', 'gateway']); type KtxSetupEntryAction = 'setup' | 'new-project' | 'agents' | 'status' | 'demo' | 'exit'; type KtxSetupFlowStep = 'models' | 'embeddings' | 'databases' | 'sources' | 'context' | 'agents'; @@ -234,7 +236,12 @@ async function runKtxSetupDemoFromEntryMenu( } function llmReady(status: KtxSetupStatus['llm']): boolean { - return status.backend === 'anthropic' && typeof status.model === 'string' && status.model.length > 0; + return ( + status.backend !== undefined && + READY_LLM_BACKENDS.has(status.backend) && + typeof status.model === 'string' && + status.model.length > 0 + ); } function embeddingsReady(status: KtxSetupStatus['embeddings']): boolean { @@ -259,6 +266,31 @@ function sourceConnections(config: Awaited>['c .sort((left, right) => left.connectionId.localeCompare(right.connectionId)); } +type LocalIngestStatusReport = NonNullable>>; + +function reportHasSavedContext(report: LocalIngestStatusReport): boolean { + if (report.body.failedWorkUnits.length > 0) { + return false; + } + const counts = savedMemoryCountsForReport(report); + return counts.wikiCount > 0 || counts.slCount > 0; +} + +async function readIngestContextStatus(project: KtxLocalProject): Promise { + if (!existsSync(ktxLocalStateDbPath(project))) { + return null; + } + const report = await getLatestLocalIngestStatus(project); + if (!report || !reportHasSavedContext(report)) { + return null; + } + return { + ready: true, + status: 'completed', + runId: report.runId, + }; +} + export async function readKtxSetupStatus(projectDir: string): Promise { const resolvedProjectDir = resolve(projectDir); if (!existsSync(join(resolvedProjectDir, 'ktx.yaml'))) { @@ -291,6 +323,10 @@ export async function readKtxSetupStatus(projectDir: string): Promise Date: Tue, 12 May 2026 12:26:19 +0200 Subject: [PATCH 2/2] docs: refresh KTX demo readiness guidance --- README.md | 89 ++++++++++++++++++++++- packages/cli/src/demo.test.ts | 25 +++++-- packages/cli/src/standalone-smoke.test.ts | 18 ++--- 3 files changed, 117 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 84592226..696558a5 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ SQLite. Install the CLI and run the setup wizard: ```bash +npm install @kaelio/ktx npm install -g @kaelio/ktx ktx setup ``` @@ -70,6 +71,40 @@ KTX context built: yes Agent integration ready: yes (claude-code:project) ``` +Run the packaged demo without installing globally: + +```bash +npx @kaelio/ktx setup demo --no-input +npx @kaelio/ktx setup demo inspect +``` + +The default demo uses packaged sample data and prebuilt context. It does not +require API keys, network access, or an LLM provider. + +Generate SQL from a semantic-layer source: + +```bash +npx @kaelio/ktx sl query --project-dir "$PROJECT_DIR" \ + --connection-id warehouse \ + --measure accounts.account_count \ + --dimension accounts.segment \ + --format sql +``` + +List and test a configured warehouse connection: + +```bash +ktx connection list --project-dir "$PROJECT_DIR" +ktx connection test warehouse --project-dir "$PROJECT_DIR" +``` + +The connection test prints the configured driver and discovered table count: + +```text +Driver: sqlite +Tables: 1 +``` + ## What's in a project ``` @@ -97,6 +132,47 @@ Semantic sources and knowledge pages are committed to git. The `.ktx/` directory holds ephemeral state and is git-ignored — delete it and KTX rebuilds on the next run. +### Scan the demo warehouse + +Scan artifacts are written under +`raw-sources/warehouse/live-database//` in the project directory. + +```bash +SCAN_OUTPUT="$(ktx scan warehouse --project-dir "$PROJECT_DIR")" +printf '%s\n' "$SCAN_OUTPUT" +SCAN_RUN_ID="$(printf '%s\n' "$SCAN_OUTPUT" | awk '/^Run: / { print $2 }')" +ktx scan status --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" +ktx scan report --project-dir "$PROJECT_DIR" "$SCAN_RUN_ID" +``` + +For non-SQLite drivers, prefer credential references such as `--url env:NAME` +or `--url file:PATH` over literal credential URLs. + +## Managed Python runtime + +KTX installs its Python runtime only when a Python-backed command needs it. +The runtime lives outside the npm cache, is versioned by the installed CLI +version, and is managed by `ktx runtime` commands. + +KTX requires `uv` on `PATH` to create the managed runtime. Install `uv` with +your system package manager or the official installer before running Python- +backed KTX commands. KTX doesn't download `uv` automatically; run +`ktx runtime doctor` if runtime installation fails: + +```bash +ktx runtime install --yes +ktx runtime status +ktx runtime doctor +ktx runtime start +ktx runtime stop +ktx runtime prune --dry-run +ktx runtime prune --yes +``` + +The release artifact manifest contains the public npm tarball and the bundled `kaelio-ktx` +runtime wheel. The `python/ktx-sl` and `python/ktx-daemon` directories remain +source packages for development, not public release artifacts. + ## Serve agents KTX integrates with coding agents through CLI skills, an MCP server, or both. @@ -126,6 +202,11 @@ This exposes tools for connections, knowledge search, semantic-layer sources, validation, queries, ingestion, and replay. The `--semantic-compute` flag starts the managed Python runtime for query planning automatically. +The standalone MCP server exposes `connection_list`, `knowledge_search`, +`knowledge_read`, `knowledge_write`, `sl_list_sources`, `sl_read_source`, +`sl_write_source`, `sl_validate`, `sl_query`, `ingest_trigger`, +`ingest_status`, `ingest_report`, and `ingest_replay`. + Supported agents: Claude Code, Codex, Cursor, OpenCode, and any agent that reads `.agents/` skills or MCP configuration. @@ -136,7 +217,13 @@ reads `.agents/` skills or MCP configuration. | `packages/cli` | CLI entry point | | `packages/context` | Core context engine | | `packages/llm` | LLM and embedding providers | -| `packages/connector-*` | Database connectors (Postgres, Snowflake, BigQuery, ClickHouse, MySQL, SQL Server, SQLite) | +| `packages/connector-bigquery` | BigQuery scan connector | +| `packages/connector-clickhouse` | ClickHouse scan connector | +| `packages/connector-mysql` | MySQL scan connector | +| `packages/connector-postgres` | Postgres scan connector | +| `packages/connector-snowflake` | Snowflake scan connector | +| `packages/connector-sqlite` | SQLite scan connector | +| `packages/connector-sqlserver` | SQL Server scan connector | | `python/ktx-sl` | Semantic-layer query planning | | `python/ktx-daemon` | Portable compute service | diff --git a/packages/cli/src/demo.test.ts b/packages/cli/src/demo.test.ts index 0cedba99..0b053ee6 100644 --- a/packages/cli/src/demo.test.ts +++ b/packages/cli/src/demo.test.ts @@ -11,6 +11,9 @@ import type { renderMemoryFlowTui } from './memory-flow-tui.js'; import { KTX_NEXT_STEP_COMMANDS } from './next-steps.js'; import { resetVizFallbackWarningsForTest } from './viz-fallback.js'; +const SEEDED_DEMO_SEMANTIC_SOURCE_COUNT = 46; +const SEEDED_DEMO_KNOWLEDGE_PAGE_COUNT = 28; + function makeIo(options: { isTTY?: boolean; columns?: number; rawMode?: boolean } = {}) { let stdout = ''; let stderr = ''; @@ -336,8 +339,14 @@ describe('runKtxDemo', () => { notion: { pageCount: 8 }, }, generatedOutputs: { - semanticLayer: { manifestSourceCount: 6, fileCount: 6 }, - knowledge: { manifestPageCount: 10, fileCount: 10 }, + semanticLayer: { + manifestSourceCount: SEEDED_DEMO_SEMANTIC_SOURCE_COUNT, + fileCount: SEEDED_DEMO_SEMANTIC_SOURCE_COUNT, + }, + knowledge: { + manifestPageCount: SEEDED_DEMO_KNOWLEDGE_PAGE_COUNT, + fileCount: SEEDED_DEMO_KNOWLEDGE_PAGE_COUNT, + }, links: { manifestLinkCount: 23, linkCount: 23 }, reports: { primaryPath: 'reports/seeded-demo-report.json', fileCount: 1 }, }, @@ -636,10 +645,16 @@ describe('runKtxDemo', () => { ).resolves.toBe(0); expect(seededIo.stdout()).toContain('Status: ready'); - expect(seededIo.stdout()).toContain('Semantic-layer sources: 6 manifest, 6 files'); - expect(seededIo.stdout()).toContain('Knowledge pages: 10 manifest, 10 files'); + expect(seededIo.stdout()).toContain( + `Semantic-layer sources: ${SEEDED_DEMO_SEMANTIC_SOURCE_COUNT} manifest, ${SEEDED_DEMO_SEMANTIC_SOURCE_COUNT} files`, + ); + expect(seededIo.stdout()).toContain( + `Knowledge pages: ${SEEDED_DEMO_KNOWLEDGE_PAGE_COUNT} manifest, ${SEEDED_DEMO_KNOWLEDGE_PAGE_COUNT} files`, + ); expect(seededIo.stdout()).not.toContain('Status: corrupt'); - expect(seededIo.stdout()).not.toContain('Semantic-layer sources: 6 manifest, 0 files'); + expect(seededIo.stdout()).not.toContain( + `Semantic-layer sources: ${SEEDED_DEMO_SEMANTIC_SOURCE_COUNT} manifest, 0 files`, + ); }); it('fails corrupted demo projects in no-input mode with reset guidance', async () => { diff --git a/packages/cli/src/standalone-smoke.test.ts b/packages/cli/src/standalone-smoke.test.ts index 27f34b92..0b15410c 100644 --- a/packages/cli/src/standalone-smoke.test.ts +++ b/packages/cli/src/standalone-smoke.test.ts @@ -368,9 +368,9 @@ describe('standalone built ktx CLI smoke', () => { const knowledgeSearch = structuredContent<{ results: Array<{ key: string; summary: string; score: number }>; totalFound: number; - }>(await client.callTool({ name: 'knowledge_search', arguments: { query: 'ARR contract', limit: 5 } })); + }>(await client.callTool({ name: 'knowledge_search', arguments: { query: 'ARR contract-first definition', limit: 10 } })); expect(knowledgeSearch.totalFound).toBeGreaterThan(0); - expect(knowledgeSearch.results.map((result) => result.key)).toContain('arr-contract-first'); + expect(knowledgeSearch.results.map((result) => result.key)).toContain('orbit-arr-contract-first-definition'); const knowledgeRead = structuredContent<{ key: string; @@ -378,26 +378,26 @@ describe('standalone built ktx CLI smoke', () => { content: string; tags: string[]; slRefs: string[]; - }>(await client.callTool({ name: 'knowledge_read', arguments: { key: 'arr-contract-first' } })); - expect(knowledgeRead.key).toBe('arr-contract-first'); + }>(await client.callTool({ name: 'knowledge_read', arguments: { key: 'orbit-arr-contract-first-definition' } })); + expect(knowledgeRead.key).toBe('orbit-arr-contract-first-definition'); expect(knowledgeRead.summary).toContain('ARR'); expect(knowledgeRead.content).toContain('contract'); - expect(knowledgeRead.slRefs).toContain('orbit_demo.contracts'); + expect(knowledgeRead.slRefs).toContain('mart_arr_daily'); const slRead = structuredContent<{ sourceName: string; yaml: string }>( await client.callTool({ name: 'sl_read_source', - arguments: { connectionId: 'orbit_demo', sourceName: 'accounts' }, + arguments: { connectionId: 'dbt-main', sourceName: 'mart_arr_daily' }, }), ); - expect(slRead.sourceName).toBe('accounts'); - expect(slRead.yaml).toContain('name: accounts'); + expect(slRead.sourceName).toBe('mart_arr_daily'); + expect(slRead.yaml).toContain('name: mart_arr_daily'); expect(slRead.yaml).toContain('measures:'); const slValidate = structuredContent<{ success: boolean; errors: string[]; warnings: string[] }>( await client.callTool({ name: 'sl_validate', - arguments: { connectionId: 'orbit_demo', names: ['accounts', 'contracts'] }, + arguments: { connectionId: 'dbt-main', names: ['mart_arr_daily', 'stg_contracts'] }, }), ); expect(slValidate.success).toBe(true);