ktx/packages/cli/test/scan.test.ts

1453 lines
45 KiB
TypeScript
Raw Permalink Normal View History

import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
2026-05-10 23:12:26 +02:00
import { tmpdir } from 'node:os';
import { join } from 'node:path';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import type { SourceAdapter } from '../src/context/ingest/types.js';
import { initKtxProject } from '../src/context/project/project.js';
import type { KtxScanReport } from '../src/context/scan/types.js';
import type { LocalScanRunResult, RunLocalScanOptions } from '../src/context/scan/local-scan.js';
2026-05-10 23:12:26 +02:00
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import { createCliScanProgress, runKtxScan, type KtxScanDeps } from '../src/scan.js';
2026-05-10 23:12:26 +02:00
const sqlServerExtractSchema = vi.hoisted(() =>
vi.fn(async (connectionId: string) => ({
connectionId,
extractedAt: '2026-04-29T16:00:00.000Z',
metadata: { database: 'analytics' },
tables: [
{
catalog: 'analytics',
db: 'dbo',
name: 'orders',
columns: [{ name: 'id', type: 'int', nullable: false, primaryKey: true }],
foreignKeys: [],
},
],
})),
);
const createSqlServerLiveDatabaseIntrospection = vi.hoisted(() =>
vi.fn(() => ({ extractSchema: sqlServerExtractSchema })),
);
2026-05-10 23:51:24 +02:00
const isKtxSqlServerConnectionConfig = vi.hoisted(() =>
2026-05-10 23:12:26 +02:00
vi.fn((connection: { driver?: string } | undefined) => connection?.driver === 'sqlserver'),
);
2026-05-10 23:51:24 +02:00
const KtxSqlServerScanConnector = vi.hoisted(
2026-05-10 23:12:26 +02:00
() =>
class {
readonly id: string;
readonly driver = 'sqlserver';
constructor(options: { connectionId: string }) {
this.id = `sqlserver:${options.connectionId}`;
}
},
);
const bigQueryExtractSchema = vi.hoisted(() =>
vi.fn(async (connectionId: string) => ({
connectionId,
extractedAt: '2026-04-29T17:00:00.000Z',
metadata: { project_id: 'project-1', datasets: ['analytics'] },
tables: [
{
catalog: 'project-1',
db: 'analytics',
name: 'orders',
columns: [{ name: 'id', type: 'INT64', nullable: false, primaryKey: true }],
foreignKeys: [],
},
],
})),
);
const createBigQueryLiveDatabaseIntrospection = vi.hoisted(() =>
vi.fn(() => ({ extractSchema: bigQueryExtractSchema })),
);
2026-05-10 23:51:24 +02:00
const isKtxBigQueryConnectionConfig = vi.hoisted(() =>
2026-05-10 23:12:26 +02:00
vi.fn((connection: { driver?: string } | undefined) => connection?.driver === 'bigquery'),
);
2026-05-10 23:51:24 +02:00
const KtxBigQueryScanConnector = vi.hoisted(
2026-05-10 23:12:26 +02:00
() =>
class {
readonly id: string;
readonly driver = 'bigquery';
constructor(options: { connectionId: string }) {
this.id = `bigquery:${options.connectionId}`;
}
},
);
const snowflakeExtractSchema = vi.hoisted(() =>
vi.fn(async (connectionId: string) => ({
connectionId,
extractedAt: '2026-04-29T18:00:00.000Z',
metadata: { database: 'ANALYTICS', schemas: ['PUBLIC'] },
tables: [
{
catalog: 'ANALYTICS',
db: 'PUBLIC',
name: 'ORDERS',
columns: [{ name: 'ID', type: 'NUMBER', nullable: false, primaryKey: true }],
foreignKeys: [],
},
],
})),
);
const createSnowflakeLiveDatabaseIntrospection = vi.hoisted(() =>
vi.fn(() => ({ extractSchema: snowflakeExtractSchema })),
);
2026-05-10 23:51:24 +02:00
const isKtxSnowflakeConnectionConfig = vi.hoisted(() =>
2026-05-10 23:12:26 +02:00
vi.fn((connection: { driver?: string } | undefined) => connection?.driver === 'snowflake'),
);
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204) * feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure Snowflake setup previously asked for a single schema as free text, then ran a multiselect against the discovered schemas — two schema questions back-to-back, with the first being only a session bootstrap. The SDK's `schema` is optional, so the bootstrap step is unnecessary. - Remove the free-text Snowflake schema prompt; only pass `schema` to snowflake-sdk when one is configured. - When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the user for a comma-separated list, persist it as `schema_names`, and use it as both the table-list filter and the multiselect default. Applies to every driver with a scope-discovery spec, not just Snowflake. - Update docs to lead with `schema_names`; keep `schema_name` as a documented single-schema shorthand. * fix(snowflake): keep introspecting when primary-key discovery is denied The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the connection role may not have. Previously a 'SQL compilation error: Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist or not authorized' aborted the entire introspect — schemas, columns, and row counts were all discarded over a missing nice-to-have. Wrap the constraint query in try/catch, log a one-line warning per schema, and return an empty PK map. Columns end up with primaryKey=false; relationship inference still has FK and profiling to fall back on. * fix(scan): unblock relationship discovery on Snowflake Two adjacent bugs prevented the scan's relationship pipeline from producing any joins on a Snowflake warehouse: - relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table profile query failed with "Unknown function GROUP_CONCAT". Add an explicit Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter (Snowflake requires the delimiter to be a constant, so CHR(31) is rejected). - description-generation.ts destructured `connector.sampleTable` and `connector.sampleColumn` into bare locals, losing the `this` binding when the class-method connectors (Snowflake, Postgres, MySQL) were invoked. Every sample call threw "Cannot read properties of undefined (reading 'assertConnection')" and degraded LLM descriptions to metadata-only prompts. Call the methods through the connector instead. Without these, even after the primary-key probe is allowed to fail softly, the scan ends up with 0 validated relationships and an empty `joins:` block in every shard YAML. * test(scan): cover table-ref helpers * feat(scan): plumb tableScope through live-database introspection port * feat(scan): apply tableScope during metadata fetch * feat(scan): enforce table scope at fetch boundary * feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206) * feat(cli): add RSA key-pair auth option to Snowflake setup wizard Extends the interactive Snowflake setup flow with an authentication-method prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key path (env/file/absolute) and an optional passphrase; the resulting connection config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead of `password`. * feat(scan): pool Snowflake sessions * fix(scan): reuse structural snapshots and cleanup connectors * feat(scan): parallelize relationship profiling * feat(scan): batch table description generation * docs: document Snowflake ingest concurrency knobs * fix(scan): close Snowflake ingest perf verification gaps * fix(scan): keep batched description failure bounded * feat(scan): dispatch query-history probes by connection driver Extract historic-sql dialect resolution into a shared helper so the status-project readiness check and the local ingest factory agree on which connections enable query history and which probe to run. The status command now picks the postgres/snowflake/bigquery probe based on the connection's driver instead of always reporting against postgres, which previously caused snowflake connections with queryHistory.enabled to surface a misleading "driver is snowflake" failure. Also drops a noisy console.warn from Snowflake primary-key discovery — INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only roles and the FK + profiling paths handle the empty PK map already. * fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject The Claude Code agent SDK announces an internal pseudo-tool named StructuredOutput in the system/init message whenever outputFormat is set to { type: 'json_schema' }. The runtime's isolation check built its allowedToolIds set only from MCP tool ids and treated StructuredOutput as an unexpected host-injected tool, so every generateObject call threw "Claude Code runtime isolation failed: tools=StructuredOutput ..." and the table-descriptions and relationship-LLM-proposal enrichment stages recorded null output across the board. Whitelist StructuredOutput specifically in generateObject's allowedToolIds — the check also enforces missing_tools symmetry, so generateText and runAgentLoop, which do not see StructuredOutput, must not require it. generateObject also ran with maxTurns: 1, which the model intermittently breached when it emitted thinking text before the structured response. Raised to 5 to give the schema-bound call enough headroom without allowing unbounded loops. The existing tests now exercise the path with an init message that announces StructuredOutput so the regression cannot slip back in. * chore(scripts): add ktx-reset.sh project-cleanup helper Convenience script for repeatable ingest testing: takes a project directory and prunes everything except ktx.yaml and .ktx/secrets/, so the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
const snowflakeConnectorInstances = vi.hoisted(() => [] as Array<{ cleanup: ReturnType<typeof vi.fn> }>);
2026-05-10 23:51:24 +02:00
const KtxSnowflakeScanConnector = vi.hoisted(
2026-05-10 23:12:26 +02:00
() =>
class {
readonly id: string;
readonly driver = 'snowflake';
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204) * feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure Snowflake setup previously asked for a single schema as free text, then ran a multiselect against the discovered schemas — two schema questions back-to-back, with the first being only a session bootstrap. The SDK's `schema` is optional, so the bootstrap step is unnecessary. - Remove the free-text Snowflake schema prompt; only pass `schema` to snowflake-sdk when one is configured. - When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the user for a comma-separated list, persist it as `schema_names`, and use it as both the table-list filter and the multiselect default. Applies to every driver with a scope-discovery spec, not just Snowflake. - Update docs to lead with `schema_names`; keep `schema_name` as a documented single-schema shorthand. * fix(snowflake): keep introspecting when primary-key discovery is denied The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the connection role may not have. Previously a 'SQL compilation error: Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist or not authorized' aborted the entire introspect — schemas, columns, and row counts were all discarded over a missing nice-to-have. Wrap the constraint query in try/catch, log a one-line warning per schema, and return an empty PK map. Columns end up with primaryKey=false; relationship inference still has FK and profiling to fall back on. * fix(scan): unblock relationship discovery on Snowflake Two adjacent bugs prevented the scan's relationship pipeline from producing any joins on a Snowflake warehouse: - relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table profile query failed with "Unknown function GROUP_CONCAT". Add an explicit Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter (Snowflake requires the delimiter to be a constant, so CHR(31) is rejected). - description-generation.ts destructured `connector.sampleTable` and `connector.sampleColumn` into bare locals, losing the `this` binding when the class-method connectors (Snowflake, Postgres, MySQL) were invoked. Every sample call threw "Cannot read properties of undefined (reading 'assertConnection')" and degraded LLM descriptions to metadata-only prompts. Call the methods through the connector instead. Without these, even after the primary-key probe is allowed to fail softly, the scan ends up with 0 validated relationships and an empty `joins:` block in every shard YAML. * test(scan): cover table-ref helpers * feat(scan): plumb tableScope through live-database introspection port * feat(scan): apply tableScope during metadata fetch * feat(scan): enforce table scope at fetch boundary * feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206) * feat(cli): add RSA key-pair auth option to Snowflake setup wizard Extends the interactive Snowflake setup flow with an authentication-method prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key path (env/file/absolute) and an optional passphrase; the resulting connection config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead of `password`. * feat(scan): pool Snowflake sessions * fix(scan): reuse structural snapshots and cleanup connectors * feat(scan): parallelize relationship profiling * feat(scan): batch table description generation * docs: document Snowflake ingest concurrency knobs * fix(scan): close Snowflake ingest perf verification gaps * fix(scan): keep batched description failure bounded * feat(scan): dispatch query-history probes by connection driver Extract historic-sql dialect resolution into a shared helper so the status-project readiness check and the local ingest factory agree on which connections enable query history and which probe to run. The status command now picks the postgres/snowflake/bigquery probe based on the connection's driver instead of always reporting against postgres, which previously caused snowflake connections with queryHistory.enabled to surface a misleading "driver is snowflake" failure. Also drops a noisy console.warn from Snowflake primary-key discovery — INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only roles and the FK + profiling paths handle the empty PK map already. * fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject The Claude Code agent SDK announces an internal pseudo-tool named StructuredOutput in the system/init message whenever outputFormat is set to { type: 'json_schema' }. The runtime's isolation check built its allowedToolIds set only from MCP tool ids and treated StructuredOutput as an unexpected host-injected tool, so every generateObject call threw "Claude Code runtime isolation failed: tools=StructuredOutput ..." and the table-descriptions and relationship-LLM-proposal enrichment stages recorded null output across the board. Whitelist StructuredOutput specifically in generateObject's allowedToolIds — the check also enforces missing_tools symmetry, so generateText and runAgentLoop, which do not see StructuredOutput, must not require it. generateObject also ran with maxTurns: 1, which the model intermittently breached when it emitted thinking text before the structured response. Raised to 5 to give the schema-bound call enough headroom without allowing unbounded loops. The existing tests now exercise the path with an init message that announces StructuredOutput so the regression cannot slip back in. * chore(scripts): add ktx-reset.sh project-cleanup helper Convenience script for repeatable ingest testing: takes a project directory and prunes everything except ktx.yaml and .ktx/secrets/, so the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
readonly cleanup = vi.fn(async () => undefined);
2026-05-10 23:12:26 +02:00
constructor(options: { connectionId: string }) {
this.id = `snowflake:${options.connectionId}`;
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204) * feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure Snowflake setup previously asked for a single schema as free text, then ran a multiselect against the discovered schemas — two schema questions back-to-back, with the first being only a session bootstrap. The SDK's `schema` is optional, so the bootstrap step is unnecessary. - Remove the free-text Snowflake schema prompt; only pass `schema` to snowflake-sdk when one is configured. - When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the user for a comma-separated list, persist it as `schema_names`, and use it as both the table-list filter and the multiselect default. Applies to every driver with a scope-discovery spec, not just Snowflake. - Update docs to lead with `schema_names`; keep `schema_name` as a documented single-schema shorthand. * fix(snowflake): keep introspecting when primary-key discovery is denied The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the connection role may not have. Previously a 'SQL compilation error: Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist or not authorized' aborted the entire introspect — schemas, columns, and row counts were all discarded over a missing nice-to-have. Wrap the constraint query in try/catch, log a one-line warning per schema, and return an empty PK map. Columns end up with primaryKey=false; relationship inference still has FK and profiling to fall back on. * fix(scan): unblock relationship discovery on Snowflake Two adjacent bugs prevented the scan's relationship pipeline from producing any joins on a Snowflake warehouse: - relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table profile query failed with "Unknown function GROUP_CONCAT". Add an explicit Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter (Snowflake requires the delimiter to be a constant, so CHR(31) is rejected). - description-generation.ts destructured `connector.sampleTable` and `connector.sampleColumn` into bare locals, losing the `this` binding when the class-method connectors (Snowflake, Postgres, MySQL) were invoked. Every sample call threw "Cannot read properties of undefined (reading 'assertConnection')" and degraded LLM descriptions to metadata-only prompts. Call the methods through the connector instead. Without these, even after the primary-key probe is allowed to fail softly, the scan ends up with 0 validated relationships and an empty `joins:` block in every shard YAML. * test(scan): cover table-ref helpers * feat(scan): plumb tableScope through live-database introspection port * feat(scan): apply tableScope during metadata fetch * feat(scan): enforce table scope at fetch boundary * feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206) * feat(cli): add RSA key-pair auth option to Snowflake setup wizard Extends the interactive Snowflake setup flow with an authentication-method prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key path (env/file/absolute) and an optional passphrase; the resulting connection config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead of `password`. * feat(scan): pool Snowflake sessions * fix(scan): reuse structural snapshots and cleanup connectors * feat(scan): parallelize relationship profiling * feat(scan): batch table description generation * docs: document Snowflake ingest concurrency knobs * fix(scan): close Snowflake ingest perf verification gaps * fix(scan): keep batched description failure bounded * feat(scan): dispatch query-history probes by connection driver Extract historic-sql dialect resolution into a shared helper so the status-project readiness check and the local ingest factory agree on which connections enable query history and which probe to run. The status command now picks the postgres/snowflake/bigquery probe based on the connection's driver instead of always reporting against postgres, which previously caused snowflake connections with queryHistory.enabled to surface a misleading "driver is snowflake" failure. Also drops a noisy console.warn from Snowflake primary-key discovery — INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only roles and the FK + profiling paths handle the empty PK map already. * fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject The Claude Code agent SDK announces an internal pseudo-tool named StructuredOutput in the system/init message whenever outputFormat is set to { type: 'json_schema' }. The runtime's isolation check built its allowedToolIds set only from MCP tool ids and treated StructuredOutput as an unexpected host-injected tool, so every generateObject call threw "Claude Code runtime isolation failed: tools=StructuredOutput ..." and the table-descriptions and relationship-LLM-proposal enrichment stages recorded null output across the board. Whitelist StructuredOutput specifically in generateObject's allowedToolIds — the check also enforces missing_tools symmetry, so generateText and runAgentLoop, which do not see StructuredOutput, must not require it. generateObject also ran with maxTurns: 1, which the model intermittently breached when it emitted thinking text before the structured response. Raised to 5 to give the schema-bound call enough headroom without allowing unbounded loops. The existing tests now exercise the path with an init message that announces StructuredOutput so the regression cannot slip back in. * chore(scripts): add ktx-reset.sh project-cleanup helper Convenience script for repeatable ingest testing: takes a project directory and prunes everything except ktx.yaml and .ktx/secrets/, so the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
snowflakeConnectorInstances.push(this);
2026-05-10 23:12:26 +02:00
}
},
);
const postgresExtractSchema = vi.hoisted(() =>
vi.fn(async (connectionId: string) => ({
connectionId,
extractedAt: '2026-04-29T12:00:00.000Z',
metadata: { database: 'analytics' },
tables: [],
})),
);
const createPostgresLiveDatabaseIntrospection = vi.hoisted(() =>
vi.fn(() => ({ extractSchema: postgresExtractSchema })),
);
2026-05-10 23:51:24 +02:00
const isKtxPostgresConnectionConfig = vi.hoisted(() =>
2026-05-10 23:12:26 +02:00
vi.fn((connection: { driver?: string } | undefined) =>
String(connection?.driver ?? '').toLowerCase() === 'postgres',
2026-05-10 23:12:26 +02:00
),
);
2026-05-10 23:51:24 +02:00
const KtxPostgresScanConnector = vi.hoisted(
2026-05-10 23:12:26 +02:00
() =>
class {
readonly id: string;
readonly driver = 'postgres';
constructor(options: { connectionId: string }) {
this.id = `postgres:${options.connectionId}`;
}
},
);
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
vi.mock('../src/connectors/sqlserver/connector.js', () => ({
2026-05-10 23:51:24 +02:00
isKtxSqlServerConnectionConfig,
KtxSqlServerScanConnector,
2026-05-10 23:12:26 +02:00
}));
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
vi.mock('../src/connectors/sqlserver/live-database-introspection.js', () => ({
chore(workspace): gate dead-code with knip production mode (#196) * refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
createSqlServerLiveDatabaseIntrospection,
}));
2026-05-10 23:12:26 +02:00
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
vi.mock('../src/connectors/bigquery/connector.js', () => ({
2026-05-10 23:51:24 +02:00
isKtxBigQueryConnectionConfig,
KtxBigQueryScanConnector,
2026-05-10 23:12:26 +02:00
}));
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
vi.mock('../src/connectors/bigquery/live-database-introspection.js', () => ({
chore(workspace): gate dead-code with knip production mode (#196) * refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
createBigQueryLiveDatabaseIntrospection,
}));
2026-05-10 23:12:26 +02:00
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
vi.mock('../src/connectors/snowflake/connector.js', () => ({
2026-05-10 23:51:24 +02:00
isKtxSnowflakeConnectionConfig,
KtxSnowflakeScanConnector,
2026-05-10 23:12:26 +02:00
}));
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
vi.mock('../src/connectors/snowflake/live-database-introspection.js', () => ({
chore(workspace): gate dead-code with knip production mode (#196) * refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
createSnowflakeLiveDatabaseIntrospection,
}));
2026-05-10 23:12:26 +02:00
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
vi.mock('../src/connectors/postgres/connector.js', () => ({
2026-05-10 23:51:24 +02:00
isKtxPostgresConnectionConfig,
KtxPostgresScanConnector,
2026-05-10 23:12:26 +02:00
}));
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
vi.mock('../src/connectors/postgres/live-database-introspection.js', () => ({
chore(workspace): gate dead-code with knip production mode (#196) * refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
createPostgresLiveDatabaseIntrospection,
}));
2026-05-10 23:12:26 +02:00
function makeIo(options: { isTTY?: boolean } = {}) {
let stdout = '';
let stderr = '';
return {
io: {
stdout: {
isTTY: options.isTTY,
write: (chunk: string) => {
stdout += chunk;
},
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
},
stdout: () => stdout,
stderr: () => stderr,
};
}
function fakeLiveDatabaseAdapter(
createIntrospection: (options: { connections: unknown }) => {
extractSchema: (connectionId: string) => Promise<unknown>;
},
): SourceAdapter {
return {
source: 'live-database',
skillNames: [],
async detect() {
return true;
},
async fetch(_pullConfig: unknown, stagedDir: string, ctx: { connectionId: string }) {
await mkdir(stagedDir, { recursive: true });
const schema = await createIntrospection({ connections: {} }).extractSchema(ctx.connectionId);
await writeFile(
join(stagedDir, 'connection.json'),
JSON.stringify({ connectionId: ctx.connectionId, schema }, null, 2),
'utf-8',
);
},
async chunk() {
return { workUnits: [] };
},
};
}
2026-05-10 23:51:24 +02:00
const report: KtxScanReport = {
2026-05-10 23:12:26 +02:00
connectionId: 'warehouse',
driver: 'postgres',
syncId: 'sync-1',
runId: 'scan-run-1',
trigger: 'cli',
mode: 'structural',
dryRun: false,
artifactPaths: {
rawSourcesDir: 'raw-sources/warehouse/live-database/sync-1',
reportPath: 'raw-sources/warehouse/live-database/sync-1/scan-report.json',
manifestShards: [],
enrichmentArtifacts: [],
},
diffSummary: {
tablesAdded: 1,
tablesModified: 0,
tablesDeleted: 0,
tablesUnchanged: 0,
columnsAdded: 0,
columnsModified: 0,
columnsDeleted: 0,
},
manifestShardsWritten: 0,
structuralSyncStats: {
tablesCreated: 0,
tablesUpdated: 0,
tablesDeleted: 0,
columnsCreated: 0,
columnsUpdated: 0,
columnsDeleted: 0,
},
enrichment: {
dataDictionary: 'skipped',
tableDescriptions: 'skipped',
columnDescriptions: 'skipped',
embeddings: 'skipped',
deterministicRelationships: 'skipped',
llmRelationshipValidation: 'skipped',
statisticalValidation: 'skipped',
},
capabilityGaps: [],
warnings: [],
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
enrichmentState: {
resumedStages: [],
completedStages: [],
failedStages: [],
},
createdAt: '2026-04-29T09:00:00.000Z',
};
2026-05-10 23:51:24 +02:00
const reportWithAttention: KtxScanReport = {
2026-05-10 23:12:26 +02:00
...report,
mode: 'relationships',
diffSummary: {
tablesAdded: 3,
tablesModified: 2,
tablesDeleted: 0,
tablesUnchanged: 13,
columnsAdded: 18,
columnsModified: 5,
columnsDeleted: 0,
},
capabilityGaps: ['columnStats'],
warnings: [
{
code: 'connector_capability_missing',
2026-05-10 23:51:24 +02:00
message: 'KTX scan connector is missing optional capability: columnStats',
2026-05-10 23:12:26 +02:00
recoverable: true,
metadata: { capability: 'columnStats' },
},
{
code: 'relationship_validation_failed',
message: 'Could not validate relationship orders.customer_id -> customers.id',
table: 'orders',
column: 'customer_id',
recoverable: true,
},
],
relationships: { accepted: 7, review: 3, rejected: 2, skipped: 4 },
enrichmentState: {
resumedStages: ['relationships'],
completedStages: ['descriptions', 'relationships'],
failedStages: [],
},
artifactPaths: {
...report.artifactPaths,
manifestShards: ['raw-sources/warehouse/live-database/sync-1/_schema/shard-000.json'],
enrichmentArtifacts: ['raw-sources/warehouse/live-database/sync-1/_enrichment/relationships.json'],
},
};
2026-05-10 23:51:24 +02:00
describe('runKtxScan', () => {
2026-05-10 23:12:26 +02:00
let tempDir: string;
const noLocalIngestAdapters = () => [];
2026-05-10 23:12:26 +02:00
beforeEach(async () => {
2026-05-10 23:51:24 +02:00
tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-scan-'));
2026-05-10 23:12:26 +02:00
});
afterEach(async () => {
feat(telemetry): anonymous posthog usage telemetry across node cli and python daemon (#205) * feat: add telemetry phase 1 * feat: add node telemetry event catalog * feat: add telemetry event helpers * feat: emit setup and connection telemetry * feat: emit connection and stack telemetry * feat: emit ingest and scan telemetry * feat: emit query telemetry * feat: emit sampled mcp telemetry * docs: expand telemetry event catalog * feat: add telemetry schema sync artifact * feat: pass telemetry project id to semantic daemon * feat: add daemon telemetry foundation * feat: emit semantic daemon telemetry * feat: emit daemon lifecycle telemetry * docs: document full telemetry event catalog * feat(telemetry): dim first-run notice * feat(telemetry): show first-run notice before command output * feat(telemetry): wire ktx PostHog project for live ingestion * docs(telemetry): drop posthog project name and host from storage section * docs(telemetry): trim to general overview and disclaimer * docs(agents): add short telemetry guidelines * feat(telemetry): enable posthog geoip enrichment * docs(telemetry): drop ip-geoip note from public overview * refactor(telemetry): drop no-op groupIdentify, rely on capture groups field * fix(telemetry): respect CI kill switch in python daemon identity * fix(sql): route table-count analysis to existing analyze-batch endpoint * fix(telemetry): emit install_first_run from notice path and derive flagsPresent from commander * fix(telemetry): read package info via getKtxCliPackageInfo to satisfy boundary check * fix(telemetry): make python identity env={} bypass os.environ and unset CI in tests * fix(telemetry): unset CI kill switch in cli-program-telemetry tests
2026-05-22 18:18:47 +02:00
vi.unstubAllEnvs();
2026-05-10 23:12:26 +02:00
await rm(tempDir, { recursive: true, force: true });
});
it('runs structural scans and prints a dev-friendly plain summary', async () => {
await initKtxProject({ projectDir: tempDir });
2026-05-10 23:12:26 +02:00
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
}),
);
const io = makeIo();
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
databaseIntrospectionUrl: 'http://127.0.0.1:8765',
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(runLocalScan).toHaveBeenCalledWith(
expect.objectContaining({
connectionId: 'warehouse',
mode: 'structural',
databaseIntrospectionUrl: 'http://127.0.0.1:8765',
connector: undefined,
}),
);
2026-05-10 23:51:24 +02:00
expect(io.stdout()).toContain('KTX scan completed\n');
2026-05-10 23:12:26 +02:00
expect(io.stdout()).toContain('Run: scan-run-1');
expect(io.stdout()).toContain('Mode: structural');
expect(io.stdout()).toContain('What changed\n');
expect(io.stdout()).toContain('New tables: 1\n');
expect(io.stdout()).toContain('Changed tables: 0\n');
expect(io.stdout()).toContain('Removed tables: 0\n');
expect(io.stdout()).toContain('Unchanged tables: 0\n');
expect(io.stdout()).toContain('Needs attention\n None\n');
expect(io.stdout()).toContain('Artifacts\n');
expect(io.stdout()).toContain('Report: raw-sources/warehouse/live-database/sync-1/scan-report.json');
expect(io.stdout()).toContain('Next:\n');
expect(io.stdout()).toContain('ktx status --project-dir ');
expect(io.stdout()).not.toContain('ktx admin scan status');
expect(io.stdout()).not.toContain('ktx admin scan report');
2026-05-10 23:12:26 +02:00
expect(io.stdout()).not.toContain('\u001b[');
expect(io.stdout()).not.toContain('✓');
expect(io.stdout()).not.toContain('+1');
expect(io.stdout()).not.toContain('/~');
});
feat(telemetry): anonymous posthog usage telemetry across node cli and python daemon (#205) * feat: add telemetry phase 1 * feat: add node telemetry event catalog * feat: add telemetry event helpers * feat: emit setup and connection telemetry * feat: emit connection and stack telemetry * feat: emit ingest and scan telemetry * feat: emit query telemetry * feat: emit sampled mcp telemetry * docs: expand telemetry event catalog * feat: add telemetry schema sync artifact * feat: pass telemetry project id to semantic daemon * feat: add daemon telemetry foundation * feat: emit semantic daemon telemetry * feat: emit daemon lifecycle telemetry * docs: document full telemetry event catalog * feat(telemetry): dim first-run notice * feat(telemetry): show first-run notice before command output * feat(telemetry): wire ktx PostHog project for live ingestion * docs(telemetry): drop posthog project name and host from storage section * docs(telemetry): trim to general overview and disclaimer * docs(agents): add short telemetry guidelines * feat(telemetry): enable posthog geoip enrichment * docs(telemetry): drop ip-geoip note from public overview * refactor(telemetry): drop no-op groupIdentify, rely on capture groups field * fix(telemetry): respect CI kill switch in python daemon identity * fix(sql): route table-count analysis to existing analyze-batch endpoint * fix(telemetry): emit install_first_run from notice path and derive flagsPresent from commander * fix(telemetry): read package info via getKtxCliPackageInfo to satisfy boundary check * fix(telemetry): make python identity env={} bypass os.environ and unset CI in tests * fix(telemetry): unset CI kill switch in cli-program-telemetry tests
2026-05-22 18:18:47 +02:00
it('emits debug telemetry for completed scans without project paths', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
await initKtxProject({ projectDir: tempDir });
const runLocalScan = vi.fn(
async (): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
}),
);
const io = makeIo({ isTTY: true });
const code = await runKtxScan(
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
databaseIntrospectionUrl: 'http://127.0.0.1:8765',
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
);
expect(code).toBe(0);
expect(io.stderr()).toContain('"event":"scan_completed"');
expect(io.stderr()).toContain('"tableCount"');
expect(io.stderr()).not.toContain(tempDir);
});
refactor(release): drop release-policy.json runtime dep and next branch (#180) * chore: standardize daemon naming on "KTX daemon" Replace inconsistent names ("KTX Python daemon", "KTX local embeddings daemon", "KTX managed daemon", "Python daemon") with the single name "KTX daemon" in CLI output, errors, command descriptions, test assertions, smoke scripts, docs, AGENTS.md, issue templates, and codecov flags. The daemon is a portable compute server with endpoints for SQL analysis, semantic layer, LookML, database introspection, and embeddings; the previous labels misrepresented it as embeddings-only or exposed implementation details ("Python", "managed"). The "KTX Python runtime" concept (installed interpreter + packages) is deliberately left as-is — it is a separate concept from the daemon process. * refactor(release): drop release-policy.json runtime dep and next branch Strips the release-policy.json fallback from release-version.ts so the CLI reads its version straight from packages/cli/package.json. dev → 0.0.0-private, installed @kaelio/ktx → the real semver baked into the published package.json. KtxCliPackageInfo collapses to { name, version, contextPackageName }; /health no longer depends on version files surviving past a CI run. Replaces the dual-branch (main + next) semantic-release model with a single- branch model on main. rcs and stables interleave on the same branch via { name: 'main', prerelease: 'rc', channel: 'next' } / ['main']. Drops @semantic-release/git and @semantic-release/changelog (nothing is committed back to the repo on any channel) and the workflow's "Prepare next prerelease branch" step plus the KTX_PRERELEASE_BRANCH plumbing. The git tag plus the published npm artifact carry the version forward. Updates docs/release.md, removes the two now-unused devDeps, regenerates pnpm-lock.yaml. 611/611 @ktx/cli tests, 173/173 script tests, type-check, biome, knip all clean. * fix(release): don't throw on non-main branches at config-load time knip loads .releaserc.cjs on every PR run, where GITHUB_REF_NAME is the merge ref (e.g. 180/merge). The previous version of releaseBranches threw immediately when the branch wasn't main, which made knip fail to evaluate the config and then mis-flag @semantic-release/exec as an unused dep. semantic-release already refuses to publish when the current branch doesn't match a configured release branch, so the explicit throw was redundant. Drop it (and the unused currentBranch helper) and replace the "rejects releases from non-main" assertion with one that exercises a CI- shaped GITHUB_REF_NAME and confirms the config loads.
2026-05-20 13:53:14 +02:00
it('passes KTX daemon options to local ingest adapters when no explicit daemon URL is set', async () => {
await initKtxProject({ projectDir: tempDir });
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
const createLocalIngestAdapters = vi.fn(() => []);
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
}),
);
const io = makeIo();
const runtimeIo = makeIo({ isTTY: true });
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
await expect(
runKtxScan(
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
},
io.io,
{ runLocalScan, createLocalIngestAdapters, runtimeIo: runtimeIo.io } as KtxScanDeps & {
runtimeIo: typeof runtimeIo.io;
},
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
),
).resolves.toBe(0);
expect(createLocalIngestAdapters).toHaveBeenCalledWith(expect.objectContaining({ projectDir: tempDir }), {
managedDaemon: {
cliVersion: '0.2.0',
projectDir: tempDir,
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
installPolicy: 'auto',
io: runtimeIo.io,
feat: npm-managed Python runtime for @kaelio/ktx (#7) * docs: add npm managed python runtime design * build: add bundled python runtime wheel builder * build: make local embedding dependencies optional * build: bundle python runtime wheel in cli artifacts * build: track bundled python runtime release artifact * test: verify bundled python runtime wheel * docs: add plan for bundled python runtime wheel * test: cover managed python runtime lifecycle * feat: add managed python runtime installer * feat: add runtime command runner * feat: expose runtime management commands * test: verify managed python runtime commands * docs: add plan for managed python runtime installer * feat: add managed python command helper * feat: use managed runtime for sl query compute * feat: route sl query managed runtime policy * docs: add plan for managed runtime sl query integration * feat: add managed runtime daemon metadata * feat: manage python daemon lifecycle * feat: add runtime daemon start stop commands * fix: verify managed runtime daemon lifecycle * docs: add plan for managed runtime daemon lifecycle * feat: add managed local embeddings config marker * feat: add managed local embeddings daemon helper * feat: use managed runtime for local embedding setup * feat: pass managed runtime policy through setup * docs: add plan for managed local embeddings runtime * feat: read CLI package metadata dynamically * feat: assemble public kaelio ktx npm package * feat: release one public kaelio ktx npm artifact * test: cover public kaelio ktx package invocations * chore: verify public kaelio ktx package artifacts * docs: add plan for public kaelio ktx npm package * test: verify managed runtime in public package smoke * test: finalize managed runtime release smoke * docs: add plan for managed runtime release smoke * test: specify local embeddings release smoke * feat: add local embeddings runtime smoke * chore: register local embeddings smoke * fix: verify local embeddings smoke * fix: restore artifact smoke python env helper * docs: add plan for managed local embeddings release smoke * refactor: share managed runtime install policy parsing * feat: use managed runtime for agent semantic queries * feat: use managed runtime for MCP semantic compute * docs: add plan for managed agent and MCP semantic runtime * feat(cli): add managed daemon HTTP helpers * feat(cli): route local adapters through managed daemon * feat(cli): use managed daemon for ingest helpers * feat(cli): pass managed daemon options to scan * feat(context): pass MCP ingest pull config options * feat(cli): pass managed daemon options to serve ingest * test: verify managed local ingest daemon runtime * docs: add plan for managed local ingest daemon runtime * docs: align managed runtime examples * docs: add plan for managed runtime docs cleanup * test: cover published package runtime smoke commands * test: validate published package smoke outputs * docs: add plan for published package runtime smoke * build: stamp public npm package version * release: add npm public release policy * release: add guarded npm publish script * release: document public npm release handoff * docs: add plan for public npm release handoff * test: cover managed runtime prune in package smoke * docs: document managed runtime prune * docs: add plan for managed runtime prune smoke and docs * chore: encode uv runtime prerequisite policy * fix: clarify missing uv runtime error * docs: document uv runtime prerequisite * docs: add plan for uv runtime prerequisite contract * refactor: limit release artifacts to public package runtime * chore: align release policy with bundled runtime wheel * docs: describe single public runtime artifact surface * test: verify single public runtime artifact contract * docs: add plan for single public runtime artifact cleanup * fix: align local embeddings smoke with public version * docs: add plan for local embeddings smoke public version * release: soft-launch as @kaelio/ktx@0.1.0-rc.0 on next tag Publish target moves to the pre-release version 0.1.0-rc.0 under the next dist-tag so npm install @kaelio/ktx (which resolves to latest) does not pick up the soft-launch build. Users opt in via @kaelio/ktx@next. * Fix release script boundary checks * Remove PostHog from public package bundle
2026-05-11 15:50:34 +02:00
},
});
});
chore(workspace): gate dead-code with knip production mode (#196) * refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
it('uses runtime IO when resolving managed embedding runtime', async () => {
await initKtxProject({ projectDir: tempDir });
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
}),
);
const resolveEmbeddingProvider = vi.fn(async () => ({ kind: 'disabled' as const }));
const io = makeIo();
const runtimeIo = makeIo({ isTTY: true });
await expect(
runKtxScan(
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
cliVersion: '0.2.0',
runtimeInstallPolicy: 'auto',
},
io.io,
{
runLocalScan,
createLocalIngestAdapters: noLocalIngestAdapters,
runtimeIo: runtimeIo.io,
resolveEmbeddingProvider,
},
),
).resolves.toBe(0);
expect(resolveEmbeddingProvider).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({
installPolicy: 'auto',
io: runtimeIo.io,
}),
);
});
2026-05-10 23:12:26 +02:00
it('explains warnings, capability gaps, and relationships in human scan summaries', async () => {
await initKtxProject({ projectDir: tempDir });
2026-05-10 23:12:26 +02:00
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'relationships',
dryRun: false,
syncId: 'sync-1',
report: reportWithAttention,
}),
);
const io = makeIo();
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Semantic layer comparison found 5 changes across 18 tables');
expect(io.stdout()).toContain('New columns: 18');
expect(io.stdout()).toContain('Changed columns: 5');
expect(io.stdout()).toContain('Relationships\n');
expect(io.stdout()).toContain('Accepted: 7');
expect(io.stdout()).toContain('Review: 3');
expect(io.stdout()).toContain('Rejected: 2');
expect(io.stdout()).toContain('Skipped: 4');
expect(io.stdout()).toContain('Needs attention\n');
expect(io.stdout()).toContain('2 warnings');
expect(io.stdout()).toContain('1 capability gap');
expect(io.stdout()).toContain('columnStats is unavailable; relationship confidence may be lower.');
expect(io.stdout()).toContain(
'relationship_validation_failed: orders.customer_id: Could not validate relationship orders.customer_id -> customers.id',
);
expect(io.stdout()).not.toContain('+3');
expect(io.stdout()).not.toContain('~2');
expect(io.stdout()).not.toContain('=13');
});
it('prints review-only relationship summaries and validation capability warnings', async () => {
await initKtxProject({ projectDir: tempDir });
2026-05-10 23:51:24 +02:00
const reviewOnlyReport: KtxScanReport = {
2026-05-10 23:12:26 +02:00
...reportWithAttention,
capabilityGaps: [],
warnings: [
{
code: 'connector_capability_missing',
2026-05-10 23:51:24 +02:00
message: 'KTX scan connector cannot run read-only SQL relationship validation',
2026-05-10 23:12:26 +02:00
recoverable: true,
metadata: { capability: 'readOnlySql' },
},
],
relationships: { accepted: 0, review: 12, rejected: 44, skipped: 0 },
};
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-review',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'relationships',
dryRun: false,
syncId: 'sync-review',
report: reviewOnlyReport,
}),
);
const io = makeIo();
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(io.stdout()).toContain('Relationships');
expect(io.stdout()).toContain('Accepted: 0');
expect(io.stdout()).toContain('Review: 12');
expect(io.stdout()).toContain('Rejected: 44');
expect(io.stdout()).toContain(
2026-05-10 23:51:24 +02:00
'connector_capability_missing: KTX scan connector cannot run read-only SQL relationship validation',
2026-05-10 23:12:26 +02:00
);
});
it('passes a scan progress port and prints TTY progress messages', async () => {
await initKtxProject({ projectDir: tempDir });
2026-05-10 23:12:26 +02:00
const runLocalScan = vi.fn(async (input: RunLocalScanOptions): Promise<LocalScanRunResult> => {
await input.progress?.update(0.15, 'Inspecting database schema');
await input.progress?.update(0.55, 'Semantic layer comparison found 5 changes across 18 tables');
return {
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'relationships',
dryRun: false,
syncId: 'sync-1',
report: reportWithAttention,
};
});
const io = makeIo({ isTTY: true });
const previousCi = process.env.CI;
delete process.env.CI;
try {
2026-05-10 23:51:24 +02:00
const exitCode = await runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
2026-05-10 23:12:26 +02:00
);
expect({ exitCode, stderr: io.stderr() }).toEqual({ exitCode: 0, stderr: '' });
} finally {
if (previousCi === undefined) {
delete process.env.CI;
} else {
process.env.CI = previousCi;
}
}
expect(runLocalScan.mock.calls[0]?.[0].progress).toBeDefined();
expect(io.stdout()).toContain('[15%] Inspecting database schema');
expect(io.stdout()).toContain('[55%] Semantic layer comparison found 5 changes across 18 tables');
});
it('uses injected structured progress without requiring TTY progress output', async () => {
await initKtxProject({ projectDir: tempDir });
const progressEvents: Array<{ progress: number; message?: string; transient?: boolean }> = [];
const structuredProgress = {
async update(progress: number, message?: string, options?: { transient?: boolean }) {
progressEvents.push({
progress,
...(message !== undefined ? { message } : {}),
...(options?.transient !== undefined ? { transient: options.transient } : {}),
});
},
startPhase() {
return structuredProgress;
},
};
const runLocalScan = vi.fn(async (input: RunLocalScanOptions): Promise<LocalScanRunResult> => {
await input.progress?.update(0.42, 'Generating descriptions 4/10 tables', { transient: true });
return {
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
};
});
const io = makeIo();
await expect(
runKtxScan(
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters, progress: structuredProgress },
),
).resolves.toBe(0);
expect(progressEvents).toContainEqual({
progress: 0.42,
message: 'Generating descriptions 4/10 tables',
transient: true,
});
expect(io.stdout()).not.toContain('[42%] Generating descriptions 4/10 tables');
});
2026-05-10 23:12:26 +02:00
it('updates transient TTY progress messages in place', async () => {
const io = makeIo({ isTTY: true });
const previousCi = process.env.CI;
delete process.env.CI;
try {
const progress = createCliScanProgress(io.io);
await progress.update(0.84, 'Generating descriptions 1/35 tables', { transient: true });
await progress.update(0.85, 'Generating descriptions 2/35 tables', { transient: true });
await progress.update(0.9, 'Building embeddings 1/4 batches');
} finally {
if (previousCi === undefined) {
delete process.env.CI;
} else {
process.env.CI = previousCi;
}
}
expect(io.stdout()).toContain('\r[84%] Generating descriptions 1/35 tables');
expect(io.stdout()).toContain('\r[85%] Generating descriptions 2/35 tables');
expect(io.stdout()).toContain('\n[90%] Building embeddings 1/4 batches\n');
});
it('scales nested progress phases by the parent phase weight', async () => {
const io = makeIo({ isTTY: true });
const previousCi = process.env.CI;
delete process.env.CI;
try {
const progress = createCliScanProgress(io.io);
await progress.update(0.82, 'Enriching schema metadata');
const enrichmentProgress = progress.startPhase(0.18);
await enrichmentProgress.update(0.05, 'Loaded schema snapshot with 56 tables');
const descriptionProgress = enrichmentProgress.startPhase(0.45);
await descriptionProgress.update(37 / 56, 'Generating descriptions 37/56 tables', { transient: true });
await descriptionProgress.update(1, 'Generated descriptions for 56 tables');
} finally {
if (previousCi === undefined) {
delete process.env.CI;
} else {
process.env.CI = previousCi;
}
}
expect(io.stdout()).toContain('\r[88%] Generating descriptions 37/56 tables');
expect(io.stdout()).toContain('\n[91%] Generated descriptions for 56 tables\n');
expect(io.stdout()).not.toContain('[100%] Generating descriptions 37/56 tables');
});
2026-05-10 23:12:26 +02:00
it('flushes transient TTY progress messages before printing scan failures', async () => {
await initKtxProject({ projectDir: tempDir });
2026-05-10 23:12:26 +02:00
const runLocalScan = vi.fn(async (input: RunLocalScanOptions): Promise<LocalScanRunResult> => {
await input.progress?.update(0.42, 'Generating descriptions 3/35 tables', { transient: true });
throw new Error('scan failed');
});
const io = makeIo({ isTTY: true });
const previousCi = process.env.CI;
delete process.env.CI;
try {
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: () => [] },
),
).resolves.toBe(1);
} finally {
if (previousCi === undefined) {
delete process.env.CI;
} else {
process.env.CI = previousCi;
}
}
expect(io.stdout()).toContain('\r[42%] Generating descriptions 3/35 tables\u001b[K\n');
expect(io.stderr()).toBe('scan failed\n');
});
it('does not print live progress messages for non-TTY output', async () => {
await initKtxProject({ projectDir: tempDir });
2026-05-10 23:12:26 +02:00
const runLocalScan = vi.fn(async (input: RunLocalScanOptions): Promise<LocalScanRunResult> => {
await input.progress?.update(0.15, 'Inspecting database schema');
return {
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
};
});
const io = makeIo();
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(io.stdout()).not.toContain('[15%]');
expect(io.stdout()).not.toContain('Inspecting database schema');
});
it('uses terminal-aware visual styling only for TTY output', async () => {
await initKtxProject({ projectDir: tempDir });
2026-05-10 23:12:26 +02:00
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
}),
);
const io = makeIo({ isTTY: true });
const previousNoColor = process.env.NO_COLOR;
const previousCi = process.env.CI;
const previousTerm = process.env.TERM;
delete process.env.NO_COLOR;
delete process.env.CI;
process.env.TERM = 'xterm-256color';
try {
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
} finally {
if (previousNoColor === undefined) {
delete process.env.NO_COLOR;
} else {
process.env.NO_COLOR = previousNoColor;
}
if (previousCi === undefined) {
delete process.env.CI;
} else {
process.env.CI = previousCi;
}
if (previousTerm === undefined) {
delete process.env.TERM;
} else {
process.env.TERM = previousTerm;
}
}
expect(io.stdout()).toContain('✓');
2026-05-10 23:51:24 +02:00
expect(io.stdout()).toContain('KTX scan completed');
2026-05-10 23:12:26 +02:00
expect(io.stdout()).toContain('\u001b[');
});
it('honors NO_COLOR for TTY scan summaries', async () => {
await initKtxProject({ projectDir: tempDir });
2026-05-10 23:12:26 +02:00
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
}),
);
const io = makeIo({ isTTY: true });
const previousNoColor = process.env.NO_COLOR;
process.env.NO_COLOR = '1';
try {
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
} finally {
if (previousNoColor === undefined) {
delete process.env.NO_COLOR;
} else {
process.env.NO_COLOR = previousNoColor;
}
}
2026-05-10 23:51:24 +02:00
expect(io.stdout()).toContain('KTX scan completed');
2026-05-10 23:12:26 +02:00
expect(io.stdout()).not.toContain('\u001b[');
});
it('passes native CLI adapters into local scan runs for mysql configs', async () => {
2026-05-10 23:51:24 +02:00
const tempProject = await mkdtemp(join(tmpdir(), 'ktx-scan-cli-native-'));
await initKtxProject({ projectDir: tempProject });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(tempProject, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: mysql',
' url: env:MYSQL_URL',
'',
].join('\n'),
'utf-8',
);
const io = makeIo();
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
}),
);
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempProject,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) }));
await rm(tempProject, { recursive: true, force: true });
});
it('creates a native connector for standalone relationship scans', async () => {
2026-05-10 23:51:24 +02:00
const tempProject = await mkdtemp(join(tmpdir(), 'ktx-scan-cli-relationships-'));
await initKtxProject({ projectDir: tempProject });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(tempProject, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: sqlite',
' path: warehouse.db',
'',
].join('\n'),
'utf-8',
);
const io = makeIo();
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'relationships',
dryRun: false,
syncId: 'sync-1',
report: { ...report, mode: 'relationships' },
}),
);
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempProject,
connectionId: 'warehouse',
mode: 'relationships',
detectRelationships: true,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(runLocalScan).toHaveBeenCalledWith(
expect.objectContaining({
mode: 'relationships',
detectRelationships: true,
connector: expect.objectContaining({ driver: 'sqlite' }),
}),
);
await rm(tempProject, { recursive: true, force: true });
});
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204) * feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure Snowflake setup previously asked for a single schema as free text, then ran a multiselect against the discovered schemas — two schema questions back-to-back, with the first being only a session bootstrap. The SDK's `schema` is optional, so the bootstrap step is unnecessary. - Remove the free-text Snowflake schema prompt; only pass `schema` to snowflake-sdk when one is configured. - When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the user for a comma-separated list, persist it as `schema_names`, and use it as both the table-list filter and the multiselect default. Applies to every driver with a scope-discovery spec, not just Snowflake. - Update docs to lead with `schema_names`; keep `schema_name` as a documented single-schema shorthand. * fix(snowflake): keep introspecting when primary-key discovery is denied The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the connection role may not have. Previously a 'SQL compilation error: Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist or not authorized' aborted the entire introspect — schemas, columns, and row counts were all discarded over a missing nice-to-have. Wrap the constraint query in try/catch, log a one-line warning per schema, and return an empty PK map. Columns end up with primaryKey=false; relationship inference still has FK and profiling to fall back on. * fix(scan): unblock relationship discovery on Snowflake Two adjacent bugs prevented the scan's relationship pipeline from producing any joins on a Snowflake warehouse: - relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table profile query failed with "Unknown function GROUP_CONCAT". Add an explicit Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter (Snowflake requires the delimiter to be a constant, so CHR(31) is rejected). - description-generation.ts destructured `connector.sampleTable` and `connector.sampleColumn` into bare locals, losing the `this` binding when the class-method connectors (Snowflake, Postgres, MySQL) were invoked. Every sample call threw "Cannot read properties of undefined (reading 'assertConnection')" and degraded LLM descriptions to metadata-only prompts. Call the methods through the connector instead. Without these, even after the primary-key probe is allowed to fail softly, the scan ends up with 0 validated relationships and an empty `joins:` block in every shard YAML. * test(scan): cover table-ref helpers * feat(scan): plumb tableScope through live-database introspection port * feat(scan): apply tableScope during metadata fetch * feat(scan): enforce table scope at fetch boundary * feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206) * feat(cli): add RSA key-pair auth option to Snowflake setup wizard Extends the interactive Snowflake setup flow with an authentication-method prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key path (env/file/absolute) and an optional passphrase; the resulting connection config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead of `password`. * feat(scan): pool Snowflake sessions * fix(scan): reuse structural snapshots and cleanup connectors * feat(scan): parallelize relationship profiling * feat(scan): batch table description generation * docs: document Snowflake ingest concurrency knobs * fix(scan): close Snowflake ingest perf verification gaps * fix(scan): keep batched description failure bounded * feat(scan): dispatch query-history probes by connection driver Extract historic-sql dialect resolution into a shared helper so the status-project readiness check and the local ingest factory agree on which connections enable query history and which probe to run. The status command now picks the postgres/snowflake/bigquery probe based on the connection's driver instead of always reporting against postgres, which previously caused snowflake connections with queryHistory.enabled to surface a misleading "driver is snowflake" failure. Also drops a noisy console.warn from Snowflake primary-key discovery — INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only roles and the FK + profiling paths handle the empty PK map already. * fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject The Claude Code agent SDK announces an internal pseudo-tool named StructuredOutput in the system/init message whenever outputFormat is set to { type: 'json_schema' }. The runtime's isolation check built its allowedToolIds set only from MCP tool ids and treated StructuredOutput as an unexpected host-injected tool, so every generateObject call threw "Claude Code runtime isolation failed: tools=StructuredOutput ..." and the table-descriptions and relationship-LLM-proposal enrichment stages recorded null output across the board. Whitelist StructuredOutput specifically in generateObject's allowedToolIds — the check also enforces missing_tools symmetry, so generateText and runAgentLoop, which do not see StructuredOutput, must not require it. generateObject also ran with maxTurns: 1, which the model intermittently breached when it emitted thinking text before the structured response. Raised to 5 to give the schema-bound call enough headroom without allowing unbounded loops. The existing tests now exercise the path with an init message that announces StructuredOutput so the regression cannot slip back in. * chore(scripts): add ktx-reset.sh project-cleanup helper Convenience script for repeatable ingest testing: takes a project directory and prunes everything except ktx.yaml and .ktx/secrets/, so the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
it('cleans up a constructed scan connector after an enriched scan succeeds', async () => {
await initKtxProject({ projectDir: tempDir });
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'connections:',
' warehouse:',
' driver: snowflake',
' account: acct',
' warehouse: WH',
' database: ANALYTICS',
' schema_name: PUBLIC',
' username: reader',
' password: env:SNOWFLAKE_PASSWORD',
'',
].join('\n'),
'utf-8',
);
snowflakeConnectorInstances.length = 0;
const runLocalScan = vi.fn(async (): Promise<LocalScanRunResult> => ({
runId: 'scan-run-cleanup',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'enriched',
dryRun: false,
syncId: 'sync-1',
report: { ...report, mode: 'enriched' },
}));
await expect(
runKtxScan(
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'enriched',
detectRelationships: false,
dryRun: false,
},
makeIo().io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
),
).resolves.toBe(0);
expect(snowflakeConnectorInstances[0]?.cleanup).toHaveBeenCalledTimes(1);
});
it('cleans up a constructed scan connector after runLocalScan throws', async () => {
await initKtxProject({ projectDir: tempDir });
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'connections:',
' warehouse:',
' driver: snowflake',
' account: acct',
' warehouse: WH',
' database: ANALYTICS',
' schema_name: PUBLIC',
' username: reader',
' password: env:SNOWFLAKE_PASSWORD',
'',
].join('\n'),
'utf-8',
);
snowflakeConnectorInstances.length = 0;
const runLocalScan = vi.fn(async () => {
throw new Error('scan failed');
});
await expect(
runKtxScan(
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'relationships',
detectRelationships: true,
dryRun: false,
},
makeIo().io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
),
).resolves.toBe(1);
expect(snowflakeConnectorInstances[0]?.cleanup).toHaveBeenCalledTimes(1);
});
2026-05-10 23:12:26 +02:00
it('routes standalone postgres scans through the native connector before daemon fallback', async () => {
2026-05-10 23:51:24 +02:00
const tempProject = await mkdtemp(join(tmpdir(), 'ktx-scan-cli-native-postgres-'));
await initKtxProject({ projectDir: tempProject });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(tempProject, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: postgres',
' host: db.example.test',
' database: analytics',
' username: reader',
' password: env:POSTGRES_PASSWORD',
'',
].join('\n'),
'utf-8',
);
const io = makeIo();
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report,
}),
);
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempProject,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{
runLocalScan,
createLocalIngestAdapters: () => [fakeLiveDatabaseAdapter(createPostgresLiveDatabaseIntrospection)],
},
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) }));
const scanOptions = runLocalScan.mock.calls[0]?.[0];
const liveDatabase = scanOptions?.adapters?.find((adapter) => adapter.source === 'live-database');
if (!liveDatabase?.fetch) {
throw new Error('Expected scan adapters to include a fetch-capable live-database adapter');
}
const stagedDir = join(tempProject, 'postgres-staged');
await liveDatabase.fetch(undefined, stagedDir, { connectionId: 'warehouse', sourceKey: 'live-database' });
expect(createPostgresLiveDatabaseIntrospection).toHaveBeenCalledWith({ connections: expect.any(Object) });
expect(postgresExtractSchema).toHaveBeenCalledWith('warehouse');
await expect(readFile(join(stagedDir, 'connection.json'), 'utf-8')).resolves.toContain(
'"connectionId": "warehouse"',
);
await rm(tempProject, { recursive: true, force: true });
});
it('passes native CLI adapters into local scan runs for clickhouse configs', async () => {
2026-05-10 23:51:24 +02:00
const tempProject = await mkdtemp(join(tmpdir(), 'ktx-scan-cli-native-clickhouse-'));
await initKtxProject({ projectDir: tempProject });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(tempProject, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: clickhouse',
' host: env:CLICKHOUSE_HOST',
' database: analytics',
' username: reader',
' password: env:CLICKHOUSE_PASSWORD',
'',
].join('\n'),
'utf-8',
);
const io = makeIo();
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report: { ...report, driver: 'clickhouse' },
}),
);
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempProject,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) }));
await rm(tempProject, { recursive: true, force: true });
});
it('passes native CLI adapters into local scan runs for sqlserver configs', async () => {
2026-05-10 23:51:24 +02:00
const tempProject = await mkdtemp(join(tmpdir(), 'ktx-scan-cli-native-sqlserver-'));
await initKtxProject({ projectDir: tempProject });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(tempProject, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: sqlserver',
' host: env:SQLSERVER_HOST',
' database: analytics',
' username: reader',
' schema: dbo',
'',
].join('\n'),
'utf-8',
);
const io = makeIo();
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report: { ...report, driver: 'sqlserver' },
}),
);
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempProject,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{
runLocalScan,
createLocalIngestAdapters: () => [fakeLiveDatabaseAdapter(createSqlServerLiveDatabaseIntrospection)],
},
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) }));
const scanOptions = runLocalScan.mock.calls[0]?.[0];
const liveDatabase = scanOptions?.adapters?.find((adapter) => adapter.source === 'live-database');
if (!liveDatabase?.fetch) {
throw new Error('Expected scan adapters to include a fetch-capable live-database adapter');
}
const stagedDir = join(tempProject, 'sqlserver-staged');
await liveDatabase.fetch(undefined, stagedDir, { connectionId: 'warehouse', sourceKey: 'live-database' });
expect(createSqlServerLiveDatabaseIntrospection).toHaveBeenCalledWith({ connections: expect.any(Object) });
expect(sqlServerExtractSchema).toHaveBeenCalledWith('warehouse');
await expect(readFile(join(stagedDir, 'connection.json'), 'utf-8')).resolves.toContain(
'"connectionId": "warehouse"',
);
await rm(tempProject, { recursive: true, force: true });
});
it('passes native CLI adapters into local scan runs for bigquery configs', async () => {
2026-05-10 23:51:24 +02:00
const tempProject = await mkdtemp(join(tmpdir(), 'ktx-scan-cli-native-bigquery-'));
await initKtxProject({ projectDir: tempProject });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(tempProject, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: bigquery',
' dataset_id: analytics',
' credentials_json: env:BIGQUERY_CREDENTIALS_JSON',
' location: US',
'',
].join('\n'),
'utf-8',
);
const io = makeIo();
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report: { ...report, driver: 'bigquery' },
}),
);
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempProject,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{
runLocalScan,
createLocalIngestAdapters: () => [fakeLiveDatabaseAdapter(createBigQueryLiveDatabaseIntrospection)],
},
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) }));
const scanOptions = runLocalScan.mock.calls[0]?.[0];
const liveDatabase = scanOptions?.adapters?.find((adapter) => adapter.source === 'live-database');
if (!liveDatabase?.fetch) {
throw new Error('Expected scan adapters to include a fetch-capable live-database adapter');
}
const stagedDir = join(tempProject, 'bigquery-staged');
await liveDatabase.fetch(undefined, stagedDir, { connectionId: 'warehouse', sourceKey: 'live-database' });
expect(createBigQueryLiveDatabaseIntrospection).toHaveBeenCalledWith({ connections: expect.any(Object) });
expect(bigQueryExtractSchema).toHaveBeenCalledWith('warehouse');
await expect(readFile(join(stagedDir, 'connection.json'), 'utf-8')).resolves.toContain(
'"connectionId": "warehouse"',
);
await rm(tempProject, { recursive: true, force: true });
});
it('passes native CLI adapters into local scan runs for snowflake configs', async () => {
2026-05-10 23:51:24 +02:00
const tempProject = await mkdtemp(join(tmpdir(), 'ktx-scan-cli-native-snowflake-'));
await initKtxProject({ projectDir: tempProject });
2026-05-10 23:12:26 +02:00
await writeFile(
2026-05-10 23:51:24 +02:00
join(tempProject, 'ktx.yaml'),
2026-05-10 23:12:26 +02:00
[
'connections:',
' warehouse:',
' driver: snowflake',
' authMethod: password',
' account: env:SNOWFLAKE_ACCOUNT',
' warehouse: WH',
' database: ANALYTICS',
' schema_name: PUBLIC',
' username: reader',
'',
].join('\n'),
'utf-8',
);
const io = makeIo();
const runLocalScan = vi.fn(
async (_input: RunLocalScanOptions): Promise<LocalScanRunResult> => ({
runId: 'scan-run-1',
status: 'done',
done: true,
connectionId: 'warehouse',
mode: 'structural',
dryRun: false,
syncId: 'sync-1',
report: { ...report, driver: 'snowflake' },
}),
);
await expect(
2026-05-10 23:51:24 +02:00
runKtxScan(
2026-05-10 23:12:26 +02:00
{
command: 'run',
projectDir: tempProject,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
},
io.io,
{
runLocalScan,
createLocalIngestAdapters: () => [fakeLiveDatabaseAdapter(createSnowflakeLiveDatabaseIntrospection)],
},
2026-05-10 23:12:26 +02:00
),
).resolves.toBe(0);
expect(runLocalScan).toHaveBeenCalledWith(expect.objectContaining({ adapters: expect.any(Array) }));
const scanOptions = runLocalScan.mock.calls[0]?.[0];
const liveDatabase = scanOptions?.adapters?.find((adapter) => adapter.source === 'live-database');
if (!liveDatabase?.fetch) {
throw new Error('Expected scan adapters to include a fetch-capable live-database adapter');
}
const stagedDir = join(tempProject, 'snowflake-staged');
await liveDatabase.fetch(undefined, stagedDir, { connectionId: 'warehouse', sourceKey: 'live-database' });
expect(createSnowflakeLiveDatabaseIntrospection).toHaveBeenCalledWith({ connections: expect.any(Object) });
expect(snowflakeExtractSchema).toHaveBeenCalledWith('warehouse');
await expect(readFile(join(stagedDir, 'connection.json'), 'utf-8')).resolves.toContain(
'"connectionId": "warehouse"',
);
await rm(tempProject, { recursive: true, force: true });
});
});