2026-05-22 18:18:47 +02:00
import { mkdtemp , rm } from 'node:fs/promises' ;
import { tmpdir } from 'node:os' ;
import { join } from 'node:path' ;
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import { buildDefaultKtxProjectConfig , type KtxProjectConfig } from '../src/context/project/config.js' ;
import { initKtxProject } from '../src/context/project/project.js' ;
2026-05-22 18:18:47 +02:00
import { afterEach , describe , expect , it , vi } from 'vitest' ;
2026-05-14 01:43:06 +02:00
import {
buildPublicIngestPlan ,
type KtxPublicIngestDeps ,
type KtxPublicIngestProject ,
runKtxPublicIngest ,
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
} from '../src/public-ingest.js' ;
import type { ManagedPythonCommandRuntime } from '../src/managed-python-command.js' ;
2026-05-10 23:12:26 +02:00
2026-05-14 01:43:06 +02:00
function makeIo ( options : { isTTY? : boolean ; interactive? : boolean } = { } ) {
2026-05-10 23:12:26 +02:00
let stdout = '' ;
let stderr = '' ;
return {
io : {
2026-05-14 01:43:06 +02:00
. . . ( options . interactive
? {
stdin : {
isTTY : true ,
setRawMode : vi.fn ( ) ,
} ,
}
: { } ) ,
2026-05-10 23:12:26 +02:00
stdout : {
isTTY : options.isTTY ,
write : ( chunk : string ) = > {
stdout += chunk ;
} ,
} ,
stderr : {
write : ( chunk : string ) = > {
stderr += chunk ;
} ,
} ,
} ,
stdout : ( ) = > stdout ,
stderr : ( ) = > stderr ,
} ;
}
2026-05-10 23:51:24 +02:00
function projectWithConnections ( connections : KtxProjectConfig [ 'connections' ] ) : KtxPublicIngestProject {
2026-05-10 23:12:26 +02:00
return {
projectDir : '/tmp/project' ,
config : {
2026-05-14 17:39:31 +02:00
. . . buildDefaultKtxProjectConfig ( ) ,
2026-05-10 23:12:26 +02:00
connections ,
} ,
} ;
}
2026-05-14 01:43:06 +02:00
function deepReadyProject (
connections : KtxProjectConfig [ 'connections' ] ,
relationshipsEnabled = true ,
) : KtxPublicIngestProject {
2026-05-14 17:39:31 +02:00
const config = buildDefaultKtxProjectConfig ( ) ;
2026-05-14 01:43:06 +02:00
return {
projectDir : '/tmp/project' ,
config : {
. . . config ,
connections ,
llm : {
. . . config . llm ,
provider : { backend : 'gateway' , gateway : { api_key : 'env:KTX_GATEWAY_API_KEY' } } , // pragma: allowlist secret
models : { default : 'gpt-test' } ,
} ,
scan : {
. . . config . scan ,
enrichment : {
mode : 'llm' ,
embeddings : {
backend : 'openai' ,
model : 'text-embedding-3-small' ,
dimensions : 1536 ,
} ,
} ,
relationships : {
. . . config . scan . relationships ,
enabled : relationshipsEnabled ,
} ,
} ,
} ,
} ;
}
2026-05-10 23:12:26 +02:00
describe ( 'buildPublicIngestPlan' , ( ) = > {
it ( 'plans warehouse connections as scan targets and source connections as source ingest targets' , ( ) = > {
const project = projectWithConnections ( {
warehouse : { driver : 'postgres' } ,
2026-05-15 00:08:11 +02:00
prod_metabase : { driver : 'metabase' , api_url : 'https://metabase.example.com' } ,
2026-05-10 23:12:26 +02:00
docs : { driver : 'notion' } ,
} ) ;
expect ( buildPublicIngestPlan ( project , { projectDir : '/tmp/project' , all : true } ) ) . toEqual ( {
projectDir : '/tmp/project' ,
targets : [
{
connectionId : 'warehouse' ,
driver : 'postgres' ,
2026-05-14 01:43:06 +02:00
operation : 'database-ingest' ,
debugCommand : 'ktx ingest warehouse --debug' ,
steps : [ 'database-schema' ] ,
databaseDepth : 'fast' ,
detectRelationships : false ,
queryHistory : { enabled : false } ,
2026-05-10 23:12:26 +02:00
} ,
{
connectionId : 'docs' ,
driver : 'notion' ,
operation : 'source-ingest' ,
adapter : 'notion' ,
2026-05-14 01:43:06 +02:00
debugCommand : 'ktx ingest docs --debug' ,
2026-05-10 23:12:26 +02:00
steps : [ 'source-ingest' , 'memory-update' ] ,
} ,
{
connectionId : 'prod_metabase' ,
driver : 'metabase' ,
operation : 'source-ingest' ,
adapter : 'metabase' ,
2026-05-14 01:43:06 +02:00
debugCommand : 'ktx ingest prod_metabase --debug' ,
2026-05-10 23:12:26 +02:00
steps : [ 'source-ingest' , 'memory-update' ] ,
} ,
] ,
2026-05-14 01:43:06 +02:00
warnings : [ ] ,
2026-05-10 23:12:26 +02:00
} ) ;
} ) ;
2026-05-20 01:52:37 +02:00
it ( 'treats a bare invocation (no connection id, no --all) as all configured connections' , ( ) = > {
const project = projectWithConnections ( {
warehouse : { driver : 'postgres' } ,
docs : { driver : 'notion' } ,
} ) ;
const plan = buildPublicIngestPlan ( project , { projectDir : '/tmp/project' , all : false } ) ;
2026-05-10 23:12:26 +02:00
2026-05-20 01:52:37 +02:00
expect ( plan . targets . map ( ( target ) = > target . connectionId ) . sort ( ) ) . toEqual ( [ 'docs' , 'warehouse' ] ) ;
2026-05-10 23:12:26 +02:00
} ) ;
2026-05-14 01:43:06 +02:00
it ( 'resolves database depth from flags, stored context, and defaults' , ( ) = > {
const project = projectWithConnections ( {
fast_default : { driver : 'postgres' } ,
deep_default : { driver : 'postgres' , context : { depth : 'deep' } } ,
docs : { driver : 'notion' } ,
} ) ;
expect (
buildPublicIngestPlan ( project , {
projectDir : '/tmp/project' ,
targetConnectionId : 'fast_default' ,
all : false ,
queryHistory : 'default' ,
} ) . targets [ 0 ] ,
) . toMatchObject ( { connectionId : 'fast_default' , databaseDepth : 'fast' , queryHistory : { enabled : false } } ) ;
expect (
buildPublicIngestPlan ( project , {
projectDir : '/tmp/project' ,
targetConnectionId : 'deep_default' ,
all : false ,
queryHistory : 'default' ,
} ) . targets [ 0 ] ,
) . toMatchObject ( { connectionId : 'deep_default' , databaseDepth : 'deep' } ) ;
expect (
buildPublicIngestPlan ( project , {
projectDir : '/tmp/project' ,
targetConnectionId : 'docs' ,
all : false ,
depth : 'deep' ,
queryHistory : 'default' ,
} ) . warnings ,
) . toEqual ( [ '--deep affects database ingest only; ignoring it for docs.' ] ) ;
} ) ;
2026-05-24 16:57:23 +02:00
it ( 'does not infer deep ingest from legacy scanMode values' , ( ) = > {
const project = projectWithConnections ( {
warehouse : { driver : 'postgres' } ,
} ) ;
const plan = buildPublicIngestPlan ( project , {
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
scanMode : 'enriched' ,
} ) ;
expect ( plan . targets [ 0 ] ) . toMatchObject ( {
connectionId : 'warehouse' ,
databaseDepth : 'fast' ,
steps : [ 'database-schema' ] ,
} ) ;
} ) ;
it ( 'rejects stale local Looker source driver aliases' , ( ) = > {
const project = projectWithConnections ( {
local_looker : { driver : 'local_looker' } as never ,
} ) ;
expect ( ( ) = > buildPublicIngestPlan ( project , { projectDir : '/tmp/project' , all : true } ) ) . toThrow (
'unsupported public ingest driver "local_looker"' ,
) ;
} ) ;
2026-05-14 01:43:06 +02:00
it ( 'upgrades effective depth when query history is explicitly enabled' , ( ) = > {
const project = projectWithConnections ( {
warehouse : { driver : 'postgres' , context : { queryHistory : { enabled : false } } } ,
} ) ;
const plan = buildPublicIngestPlan ( project , {
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
depth : 'fast' ,
queryHistory : 'enabled' ,
queryHistoryWindowDays : 30 ,
} ) ;
expect ( plan . targets [ 0 ] ) . toMatchObject ( {
connectionId : 'warehouse' ,
databaseDepth : 'deep' ,
queryHistory : { enabled : true , windowDays : 30 , dialect : 'postgres' } ,
} ) ;
expect ( plan . warnings ) . toEqual ( [ '--query-history requires deep ingest; running warehouse with --deep.' ] ) ;
} ) ;
it ( 'warns and skips query history for unsupported database drivers' , ( ) = > {
const project = projectWithConnections ( { local : { driver : 'sqlite' } } ) ;
const plan = buildPublicIngestPlan ( project , {
projectDir : '/tmp/project' ,
targetConnectionId : 'local' ,
all : false ,
queryHistory : 'enabled' ,
} ) ;
expect ( plan . targets [ 0 ] ) . toMatchObject ( {
connectionId : 'local' ,
databaseDepth : 'fast' ,
queryHistory : { enabled : false , unsupported : true } ,
} ) ;
expect ( plan . warnings ) . toEqual ( [ '--query-history is not supported for sqlite; running schema ingest for local.' ] ) ;
} ) ;
it ( 'aggregates unsupported query-history warnings for all database targets' , ( ) = > {
const plan = buildPublicIngestPlan (
deepReadyProject ( {
local : { driver : 'sqlite' } ,
mysql_warehouse : { driver : 'mysql' } ,
warehouse : { driver : 'postgres' , context : { depth : 'deep' } } ,
} ) ,
{
projectDir : '/tmp/project' ,
all : true ,
depth : 'deep' ,
queryHistory : 'enabled' ,
} ,
) ;
expect ( plan . targets ) . toEqual ( [
expect . objectContaining ( {
connectionId : 'local' ,
queryHistory : { enabled : false , unsupported : true } ,
steps : [ 'database-schema' ] ,
} ) ,
expect . objectContaining ( {
connectionId : 'mysql_warehouse' ,
queryHistory : { enabled : false , unsupported : true } ,
steps : [ 'database-schema' ] ,
} ) ,
expect . objectContaining ( {
connectionId : 'warehouse' ,
queryHistory : expect.objectContaining ( { enabled : true , dialect : 'postgres' } ) ,
steps : [ 'database-schema' , 'query-history' ] ,
} ) ,
] ) ;
expect ( plan . warnings ) . toEqual ( [
'--query-history is not supported for 2 database connections (mysql, sqlite); running schema ingest for those connections.' ,
] ) ;
} ) ;
it ( 'aggregates stored unsupported query-history config warnings for all database targets' , ( ) = > {
const plan = buildPublicIngestPlan (
projectWithConnections ( {
local : { driver : 'sqlite' , context : { queryHistory : { enabled : true } } } ,
mysql_warehouse : { driver : 'mysql' , context : { queryHistory : { enabled : true } } } ,
} ) ,
{
projectDir : '/tmp/project' ,
all : true ,
queryHistory : 'default' ,
} ,
) ;
expect ( plan . targets ) . toEqual ( [
expect . objectContaining ( {
connectionId : 'local' ,
queryHistory : { enabled : false , unsupported : true } ,
steps : [ 'database-schema' ] ,
} ) ,
expect . objectContaining ( {
connectionId : 'mysql_warehouse' ,
queryHistory : { enabled : false , unsupported : true } ,
steps : [ 'database-schema' ] ,
} ) ,
] ) ;
expect ( plan . warnings ) . toEqual ( [
'2 database connections have query history enabled in ktx.yaml, but their drivers do not support it; running schema ingest for those connections.' ,
] ) ;
} ) ;
it ( 'treats query-history window override as current-run query-history enablement' , ( ) = > {
const project = deepReadyProject ( {
warehouse : { driver : 'postgres' , context : { queryHistory : { enabled : false , windowDays : 90 } } } ,
} ) ;
const plan = buildPublicIngestPlan ( project , {
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
queryHistory : 'default' ,
queryHistoryWindowDays : 30 ,
} ) ;
expect ( plan . targets [ 0 ] ) . toMatchObject ( {
connectionId : 'warehouse' ,
databaseDepth : 'deep' ,
queryHistory : { enabled : true , dialect : 'postgres' , windowDays : 30 } ,
steps : [ 'database-schema' , 'query-history' ] ,
} ) ;
} ) ;
it ( 'adds a schema-first notice when query history is explicitly enabled' , ( ) = > {
const project = deepReadyProject ( {
warehouse : { driver : 'postgres' , context : { depth : 'deep' } } ,
} ) ;
expect (
buildPublicIngestPlan ( project , {
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
queryHistory : 'enabled' ,
} ) . notices ,
) . toEqual ( [ 'Schema ingest runs before query history for warehouse.' ] ) ;
} ) ;
it ( 'warns and skips query-history window override for unsupported database drivers' , ( ) = > {
const plan = buildPublicIngestPlan (
projectWithConnections ( {
local : { driver : 'sqlite' } ,
} ) ,
{
projectDir : '/tmp/project' ,
targetConnectionId : 'local' ,
all : false ,
queryHistory : 'default' ,
queryHistoryWindowDays : 30 ,
} ,
) ;
expect ( plan . targets [ 0 ] ) . toMatchObject ( {
connectionId : 'local' ,
databaseDepth : 'fast' ,
queryHistory : { enabled : false , windowDays : 30 , unsupported : true } ,
steps : [ 'database-schema' ] ,
} ) ;
expect ( plan . warnings ) . toEqual ( [ '--query-history is not supported for sqlite; running schema ingest for local.' ] ) ;
} ) ;
it ( 'aggregates ignored database-depth warnings for all source targets' , ( ) = > {
const plan = buildPublicIngestPlan (
projectWithConnections ( {
warehouse : { driver : 'postgres' } ,
docs : { driver : 'notion' } ,
dbt : { driver : 'dbt' } ,
} ) ,
{
projectDir : '/tmp/project' ,
all : true ,
depth : 'deep' ,
queryHistory : 'default' ,
} ,
) ;
expect ( plan . warnings ) . toEqual ( [ '--deep ignored for 2 non-database sources.' ] ) ;
} ) ;
it ( 'records a preflight failure for deep database ingest when readiness config is missing' , ( ) = > {
const project = projectWithConnections ( {
warehouse : { driver : 'postgres' , context : { depth : 'deep' } } ,
} ) ;
const plan = buildPublicIngestPlan ( project , {
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
queryHistory : 'default' ,
} ) ;
expect ( plan . targets [ 0 ] ) . toMatchObject ( {
connectionId : 'warehouse' ,
databaseDepth : 'deep' ,
preflightFailure :
'warehouse requires deep ingest readiness: model configuration, scan enrichment mode, scan embeddings. Run ktx setup or rerun with --fast.' ,
} ) ;
} ) ;
it ( 'honors scan.relationships.enabled when planning deep database ingest' , ( ) = > {
const plan = buildPublicIngestPlan (
deepReadyProject ( { warehouse : { driver : 'postgres' , context : { depth : 'deep' } } } , false ) ,
{
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
queryHistory : 'default' ,
} ,
) ;
expect ( plan . targets [ 0 ] ) . toMatchObject ( {
connectionId : 'warehouse' ,
databaseDepth : 'deep' ,
detectRelationships : false ,
} ) ;
} ) ;
2026-05-10 23:12:26 +02:00
} ) ;
2026-05-10 23:51:24 +02:00
describe ( 'runKtxPublicIngest' , ( ) = > {
2026-05-22 18:18:47 +02:00
afterEach ( ( ) = > {
vi . unstubAllEnvs ( ) ;
} ) ;
2026-05-14 01:43:06 +02:00
it ( 'maps fast and deep database targets to scan internals' , async ( ) = > {
const io = makeIo ( ) ;
const project = deepReadyProject ( {
fast : { driver : 'postgres' } ,
deep : { driver : 'postgres' , context : { depth : 'deep' } } ,
} ) ;
const runScan = vi . fn ( async ( ) = > 0 ) ;
await expect (
runKtxPublicIngest (
{ command : 'run' , projectDir : '/tmp/project' , all : true , json : false , inputMode : 'disabled' , queryHistory : 'default' } ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runScan } ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( runScan ) . toHaveBeenNthCalledWith (
1 ,
expect . objectContaining ( { connectionId : 'deep' , mode : 'enriched' , detectRelationships : true } ) ,
expect . anything ( ) ,
) ;
expect ( runScan ) . toHaveBeenNthCalledWith (
2 ,
expect . objectContaining ( { connectionId : 'fast' , mode : 'structural' , detectRelationships : false } ) ,
expect . anything ( ) ,
) ;
} ) ;
2026-05-22 18:18:47 +02:00
it ( 'emits debug telemetry for ingest targets and project snapshots without project paths' , async ( ) = > {
vi . stubEnv ( 'KTX_TELEMETRY_DEBUG' , '1' ) ;
vi . stubEnv ( 'CI' , '' ) ;
const projectDir = await mkdtemp ( join ( tmpdir ( ) , 'ktx-public-ingest-telemetry-' ) ) ;
try {
await initKtxProject ( { projectDir } ) ;
const io = makeIo ( { isTTY : true } ) ;
const project = projectWithConnections ( {
warehouse : { driver : 'sqlite' , path : join ( projectDir , 'warehouse.sqlite' ) } ,
} ) ;
const code = await runKtxPublicIngest (
{ command : 'run' , projectDir , targetConnectionId : 'warehouse' , all : false , json : false , inputMode : 'disabled' } ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runScan : vi.fn ( async ( ) = > 0 ) } ,
) ;
expect ( code ) . toBe ( 0 ) ;
expect ( io . stderr ( ) ) . toContain ( '"event":"ingest_completed"' ) ;
expect ( io . stderr ( ) ) . toContain ( '"event":"project_stack_snapshot"' ) ;
expect ( io . stderr ( ) ) . not . toContain ( projectDir ) ;
} finally {
await rm ( projectDir , { recursive : true , force : true } ) ;
}
} ) ;
2026-05-14 01:43:06 +02:00
it ( 'runs query history after schema ingest with current-run window override' , async ( ) = > {
const io = makeIo ( ) ;
2026-05-16 11:39:43 +02:00
const runtimeIo = makeIo ( { isTTY : true } ) ;
2026-05-14 01:43:06 +02:00
const project = deepReadyProject ( {
warehouse : { driver : 'postgres' , context : { queryHistory : { enabled : true , windowDays : 90 } } } ,
} ) ;
const runScan = vi . fn ( async ( ) = > 0 ) ;
const runIngest = vi . fn < NonNullable < KtxPublicIngestDeps [ 'runIngest' ] > > ( async ( ) = > 0 ) ;
2026-05-16 11:39:43 +02:00
const deps = {
loadProject : vi.fn ( async ( ) = > project ) ,
runScan ,
runIngest ,
runtimeIo : runtimeIo.io ,
} as KtxPublicIngestDeps & { runtimeIo : typeof runtimeIo . io } ;
2026-05-14 01:43:06 +02:00
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
cliVersion : '0.0.0-test' ,
runtimeInstallPolicy : 'never' ,
queryHistory : 'enabled' ,
queryHistoryWindowDays : 30 ,
} ,
io . io ,
2026-05-16 11:39:43 +02:00
deps ,
2026-05-14 01:43:06 +02:00
) ,
) . resolves . toBe ( 0 ) ;
expect ( runScan ) . toHaveBeenCalledWith (
expect . objectContaining ( { connectionId : 'warehouse' , mode : 'enriched' } ) ,
expect . anything ( ) ,
2026-05-16 11:39:43 +02:00
expect . objectContaining ( { runtimeIo : runtimeIo.io } ) ,
2026-05-14 01:43:06 +02:00
) ;
expect ( runIngest ) . toHaveBeenCalledWith (
expect . objectContaining ( {
command : 'run' ,
connectionId : 'warehouse' ,
adapter : 'historic-sql' ,
allowImplicitAdapter : true ,
cliVersion : '0.0.0-test' ,
runtimeInstallPolicy : 'never' ,
historicSqlPullConfigOverride : expect.objectContaining ( { dialect : 'postgres' , windowDays : 30 } ) ,
} ) ,
expect . anything ( ) ,
2026-05-16 11:39:43 +02:00
expect . objectContaining ( { runtimeIo : runtimeIo.io } ) ,
2026-05-14 01:43:06 +02:00
) ;
} ) ;
it ( 'preserves configured query-history pull fields while overriding the current-run window' , async ( ) = > {
const io = makeIo ( ) ;
const project = deepReadyProject ( {
warehouse : {
driver : 'postgres' ,
enabled_tables : [ 'orbit_analytics.int_active_contract_arr' ] ,
context : {
queryHistory : {
enabled : true ,
windowDays : 90 ,
minExecutions : 7 ,
concurrency : 3 ,
staleArchiveAfterDays : 120 ,
filters : {
dropTrivialProbes : true ,
serviceAccounts : { patterns : [ '^svc_' ] , mode : 'exclude' } ,
orchestrators : { mode : 'mark-only' } ,
dropFailedBelow : { errorRate : 0.5 , executions : 3 } ,
} ,
redactionPatterns : [ '(?i)secret' ] ,
} ,
} ,
} ,
} ) ;
const runScan = vi . fn ( async ( ) = > 0 ) ;
const runIngest = vi . fn < NonNullable < KtxPublicIngestDeps [ 'runIngest' ] > > ( async ( ) = > 0 ) ;
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
queryHistory : 'enabled' ,
queryHistoryWindowDays : 30 ,
} ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runScan , runIngest } ,
) ,
) . resolves . toBe ( 0 ) ;
const ingestArgs = runIngest . mock . calls [ 0 ] ? . [ 0 ] as
| Extract < Parameters < NonNullable < KtxPublicIngestDeps [ 'runIngest' ] > > [ 0 ] , { command : 'run' } >
| undefined ;
expect ( ingestArgs ) . toMatchObject ( {
command : 'run' ,
connectionId : 'warehouse' ,
adapter : 'historic-sql' ,
allowImplicitAdapter : true ,
historicSqlPullConfigOverride : {
dialect : 'postgres' ,
windowDays : 30 ,
minExecutions : 7 ,
concurrency : 3 ,
staleArchiveAfterDays : 120 ,
filters : {
dropTrivialProbes : true ,
serviceAccounts : { patterns : [ '^svc_' ] , mode : 'exclude' } ,
orchestrators : { mode : 'mark-only' } ,
dropFailedBelow : { errorRate : 0.5 , executions : 3 } ,
} ,
redactionPatterns : [ '(?i)secret' ] ,
enabledTables : [ 'orbit_analytics.int_active_contract_arr' ] ,
} ,
} ) ;
expect ( ingestArgs ? . historicSqlPullConfigOverride ) . not . toHaveProperty ( 'enabled' ) ;
} ) ;
it ( 'prints the schema-first notice for explicit query-history runs' , async ( ) = > {
const io = makeIo ( ) ;
const project = deepReadyProject ( {
warehouse : { driver : 'postgres' , context : { depth : 'deep' } } ,
} ) ;
const runScan = vi . fn ( async ( ) = > 0 ) ;
const runIngest = vi . fn ( async ( ) = > 0 ) ;
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
queryHistory : 'enabled' ,
} ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runScan , runIngest } ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( io . stdout ( ) ) . toContain ( 'Schema ingest runs before query history for warehouse.' ) ;
} ) ;
it ( 'suppresses internal scan output for public database ingest summaries' , async ( ) = > {
const io = makeIo ( ) ;
const project = projectWithConnections ( { warehouse : { driver : 'postgres' } } ) ;
const runScan = vi . fn ( async ( _args , scanIo ) = > {
scanIo . stdout . write ( 'KTX scan completed\n' ) ;
scanIo . stdout . write ( 'Mode: structural\n' ) ;
scanIo . stdout . write ( 'Report: raw-sources/warehouse/live-database/sync-1/scan-report.json\n' ) ;
scanIo . stdout . write ( 'Raw sources: raw-sources/warehouse/live-database/sync-1\n' ) ;
return 0 ;
} ) ;
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
} ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runScan } ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( io . stdout ( ) ) . toContain ( 'Ingest finished\n' ) ;
expect ( io . stdout ( ) ) . toContain ( 'warehouse' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'KTX scan completed' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'Mode: structural' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'Report: raw-sources' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'live-database' ) ;
} ) ;
it ( 'sanitizes captured database scan failure details in direct public output' , async ( ) = > {
const io = makeIo ( ) ;
const project = deepReadyProject ( { warehouse : { driver : 'postgres' , context : { depth : 'deep' } } } ) ;
const runScan = vi . fn ( async ( _args , scanIo ) = > {
scanIo . stdout . write ( 'KTX scan enrichment failed after structural scan completed: embedding service timed out\n' ) ;
return 1 ;
} ) ;
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
depth : 'deep' ,
} ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runScan } ,
) ,
) . resolves . toBe ( 1 ) ;
expect ( io . stdout ( ) ) . toContain (
'warehouse failed: Database enrichment failed after schema context completed: embedding service timed out.' ,
) ;
expect ( io . stdout ( ) ) . toContain ( 'Retry: ktx ingest warehouse --project-dir /tmp/project --deep' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'KTX scan enrichment failed' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'structural scan' ) ;
} ) ;
it ( 'suppresses lower-level source report output during direct public source ingest' , async ( ) = > {
const io = makeIo ( ) ;
const project = projectWithConnections ( {
docs : { driver : 'notion' } ,
} ) ;
const runIngest = vi . fn ( async ( _args , ingestIo ) = > {
ingestIo . stdout . write ( 'Report: report-docs-1\n' ) ;
ingestIo . stdout . write ( 'Adapter: notion\n' ) ;
ingestIo . stdout . write ( 'Saved memory: 2 wiki, 0 SL\n' ) ;
return 0 ;
} ) ;
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'docs' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
} ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runIngest } ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( io . stdout ( ) ) . toContain ( 'Ingest finished' ) ;
expect ( io . stdout ( ) ) . toContain ( 'docs' ) ;
expect ( io . stdout ( ) ) . toContain ( 'Source ingest' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'Report: report-docs-1' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'Adapter:' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'notion\n' ) ;
expect ( io . stderr ( ) ) . toBe ( '' ) ;
} ) ;
it ( 'suppresses historic-sql report output during direct public query-history ingest' , async ( ) = > {
const io = makeIo ( ) ;
const project = deepReadyProject ( {
warehouse : { driver : 'postgres' , context : { depth : 'deep' } } ,
} ) ;
const runScan = vi . fn ( async ( ) = > 0 ) ;
const runIngest = vi . fn ( async ( _args , ingestIo ) = > {
ingestIo . stdout . write ( 'Report: report-query-history-1\n' ) ;
ingestIo . stdout . write ( 'Adapter: historic-sql\n' ) ;
ingestIo . stdout . write ( 'Saved memory: 1 wiki, 1 SL\n' ) ;
return 0 ;
} ) ;
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
queryHistory : 'enabled' ,
} ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runScan , runIngest } ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( io . stdout ( ) ) . toContain ( 'Schema ingest runs before query history for warehouse.' ) ;
expect ( io . stdout ( ) ) . toContain ( 'Ingest finished' ) ;
expect ( io . stdout ( ) ) . toContain ( 'warehouse' ) ;
expect ( io . stdout ( ) ) . toContain ( 'done' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'Report: report-query-history-1' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'Adapter:' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'historic-sql' ) ;
expect ( io . stderr ( ) ) . toBe ( '' ) ;
} ) ;
it ( 'delegates interactive TTY public ingest to the foreground context-build view' , async ( ) = > {
const io = makeIo ( { isTTY : true , interactive : true } ) ;
const project = projectWithConnections ( { warehouse : { driver : 'postgres' } } ) ;
const runContextBuild = vi . fn ( async ( ) = > ( { exitCode : 0 } ) ) ;
const runScan = vi . fn ( async ( ) = > 0 ) ;
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'auto' ,
depth : 'fast' ,
queryHistory : 'default' ,
} ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runContextBuild , runScan } ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( runContextBuild ) . toHaveBeenCalledWith (
project ,
expect . objectContaining ( {
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
entrypoint : 'ingest' ,
depth : 'fast' ,
queryHistory : 'default' ,
} ) ,
io . io ,
) ;
expect ( runScan ) . not . toHaveBeenCalled ( ) ;
} ) ;
2026-05-17 10:27:29 +02:00
it ( 'preflights foreground query-history runtime before starting the context-build view' , async ( ) = > {
const io = makeIo ( { isTTY : true , interactive : true } ) ;
const calls : string [ ] = [ ] ;
const project = projectWithConnections ( {
warehouse : { driver : 'postgres' , context : { depth : 'deep' } } ,
} ) ;
const ensureRuntime = vi . fn ( async ( ) : Promise < ManagedPythonCommandRuntime > = > {
calls . push ( 'runtime' ) ;
return { } as ManagedPythonCommandRuntime ;
} ) ;
const runContextBuild = vi . fn ( async ( ) = > {
calls . push ( 'context-build' ) ;
return { exitCode : 0 } ;
} ) ;
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'auto' ,
queryHistory : 'enabled' ,
cliVersion : '0.2.0' ,
runtimeInstallPolicy : 'prompt' ,
} ,
io . io ,
{
loadProject : vi.fn ( async ( ) = > project ) ,
ensureRuntime ,
runContextBuild ,
} ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( calls ) . toEqual ( [ 'runtime' , 'context-build' ] ) ;
expect ( ensureRuntime ) . toHaveBeenCalledWith (
expect . objectContaining ( {
cliVersion : '0.2.0' ,
installPolicy : 'prompt' ,
feature : 'core' ,
} ) ,
) ;
} ) ;
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm
* refactor(workspace): rewrite @ktx/llm imports to relative paths
* refactor(workspace): fold internal packages into cli
* chore(workspace): gate dead-code with knip production mode
Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.
* refactor(cli): delete internal barrel index.ts files
The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).
This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
(the published package entry).
- Rewrites ~270 source/test files to import each name directly from
the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
`create-warehouse-verification-tools.ts` (the function it defined
locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
live-database/extracted-schema, live-database/structural-sync,
relationship-* feedback/review chain) plus their tests and a
cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
(notion-client, connector barrels in scan/local-scan-connectors
tests) to mock the source files instead.
- Points the maintainer benchmark script
(`scripts/relationship-benchmark-report.mjs`) at source files
instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
production entries only for the benchmark code reached via dist by
the maintainer script.
Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.
`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.
* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly
Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.
Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.
* docs: align "agent clients" and "data agents" terminology
Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.
Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.
* refactor(release): single source of truth for package version
Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.
Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.
- update-public-release-version.mjs rewrites both Python pyproject.toml
files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
@semantic-release/git assets so the release commit back to main
carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
replaced with "?? getKtxCliPackageInfo().version", and
createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
always reflects the most recent release; no sentinel pin to
maintain.
Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.
* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime
Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.
* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal
Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.
* fix(cli): use real package metadata in print-command-tree
The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.
* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts
Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.
Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
2026-05-21 15:28:58 +02:00
it ( 'preflights foreground managed embeddings runtime before starting the context-build view' , async ( ) = > {
const io = makeIo ( { isTTY : true , interactive : true } ) ;
const config = buildDefaultKtxProjectConfig ( ) ;
const project : KtxPublicIngestProject = {
projectDir : '/tmp/project' ,
config : {
. . . config ,
connections : {
warehouse : { driver : 'postgres' } ,
} ,
ingest : {
. . . config . ingest ,
embeddings : {
backend : 'sentence-transformers' ,
model : 'all-MiniLM-L6-v2' ,
dimensions : 384 ,
} ,
} ,
} ,
} ;
const ensureRuntime = vi . fn ( async ( ) : Promise < ManagedPythonCommandRuntime > = > {
return { } as ManagedPythonCommandRuntime ;
} ) ;
const runContextBuild = vi . fn ( async ( ) = > ( { exitCode : 0 } ) ) ;
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'auto' ,
queryHistory : 'default' ,
cliVersion : '0.2.0' ,
runtimeInstallPolicy : 'prompt' ,
} ,
io . io ,
{
loadProject : vi.fn ( async ( ) = > project ) ,
ensureRuntime ,
runContextBuild ,
} ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( ensureRuntime ) . toHaveBeenCalledWith (
expect . objectContaining ( {
cliVersion : '0.2.0' ,
installPolicy : 'prompt' ,
feature : 'local-embeddings' ,
} ) ,
) ;
expect ( runContextBuild ) . toHaveBeenCalled ( ) ;
} ) ;
2026-05-10 23:12:26 +02:00
it ( 'runs all independent targets and reports partial failures' , async ( ) = > {
const io = makeIo ( ) ;
const project = projectWithConnections ( {
warehouse : { driver : 'postgres' } ,
2026-05-15 00:08:11 +02:00
prod_metabase : { driver : 'metabase' , api_url : 'https://metabase.example.com' } ,
2026-05-10 23:12:26 +02:00
} ) ;
const runScan = vi . fn ( async ( ) = > 1 ) ;
const runIngest = vi . fn ( async ( ) = > 0 ) ;
await expect (
2026-05-10 23:51:24 +02:00
runKtxPublicIngest (
2026-05-10 23:12:26 +02:00
{ command : 'run' , projectDir : '/tmp/project' , all : true , json : false , inputMode : 'disabled' } ,
io . io ,
{
loadProject : vi.fn ( async ( ) = > project ) ,
runScan ,
runIngest ,
} ,
) ,
) . resolves . toBe ( 1 ) ;
expect ( runIngest ) . toHaveBeenCalledWith (
2026-05-14 01:43:06 +02:00
expect . objectContaining ( {
2026-05-10 23:12:26 +02:00
command : 'run' ,
projectDir : '/tmp/project' ,
connectionId : 'prod_metabase' ,
adapter : 'metabase' ,
2026-05-14 01:43:06 +02:00
allowImplicitAdapter : true ,
2026-05-10 23:12:26 +02:00
outputMode : 'plain' ,
inputMode : 'disabled' ,
2026-05-14 01:43:06 +02:00
} ) ,
2026-05-10 23:12:26 +02:00
expect . anything ( ) ,
) ;
expect ( runScan ) . toHaveBeenCalledWith (
{
command : 'run' ,
projectDir : '/tmp/project' ,
connectionId : 'warehouse' ,
mode : 'structural' ,
detectRelationships : false ,
dryRun : false ,
} ,
expect . anything ( ) ,
) ;
expect ( io . stdout ( ) ) . toContain ( 'Ingest finished with partial failures' ) ;
2026-05-14 01:43:06 +02:00
expect ( io . stdout ( ) ) . toContain ( 'warehouse failed at database-schema.' ) ;
expect ( io . stdout ( ) ) . toContain ( 'Retry: ktx ingest warehouse --project-dir /tmp/project --fast' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'Debug:' ) ;
} ) ;
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
it ( 'skips the query-history facet but keeps the target green when query-history fails' , async ( ) = > {
2026-05-14 01:43:06 +02:00
const io = makeIo ( ) ;
const project = deepReadyProject ( {
warehouse : { driver : 'postgres' , context : { depth : 'deep' } } ,
} ) ;
const runScan = vi . fn ( async ( ) = > 0 ) ;
2026-05-17 10:27:29 +02:00
const runIngest = vi . fn ( async ( _args , ingestIo ) = > {
ingestIo . stdout . write (
'Error: Query history failed for 60 tasks. First failure: Google Cloud authentication failed while analyzing query history: application-default credentials expired or require reauthentication (invalid_grant / invalid_rapt). Run `gcloud auth application-default login`, then retry.\n' ,
) ;
return 1 ;
} ) ;
2026-05-14 01:43:06 +02:00
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
queryHistory : 'enabled' ,
} ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runScan , runIngest } ,
) ,
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
) . resolves . toBe ( 0 ) ;
2026-05-14 01:43:06 +02:00
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
expect ( io . stdout ( ) ) . toContain ( 'Ingest finished with skipped query history' ) ;
expect ( io . stdout ( ) ) . toMatch ( /warehouse\s+done\s+skipped\s+skipped\s+skipped/ ) ;
expect ( io . stdout ( ) ) . toContain ( 'Skipped query history:' ) ;
2026-05-17 10:27:29 +02:00
expect ( io . stdout ( ) ) . toContain (
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
'Query history failed for 60 tasks. First failure: Google Cloud authentication failed while analyzing query history' ,
2026-05-17 10:27:29 +02:00
) ;
expect ( io . stdout ( ) ) . not . toContain ( 'warehouse failed: Error:' ) ;
2026-05-14 01:43:06 +02:00
expect ( io . stdout ( ) ) . toContain ( 'Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'historic-sql' ) ;
} ) ;
2026-05-17 01:04:44 +02:00
it ( 'prints the runtime artifact build hint for missing query-history runtime assets' , async ( ) = > {
const io = makeIo ( ) ;
const project = deepReadyProject ( {
warehouse : { driver : 'postgres' , context : { depth : 'deep' } } ,
} ) ;
const runScan = vi . fn ( async ( ) = > 0 ) ;
const runIngest = vi . fn ( async ( _args , ingestIo ) = > {
ingestIo . stderr . write ( 'Missing bundled Python runtime manifest: /repo/packages/cli/assets/python/manifest.json\n' ) ;
ingestIo . stderr . write ( 'In a source checkout, build the local runtime assets with: pnpm run artifacts:build\n' ) ;
ingestIo . stderr . write ( 'Then retry the runtime-backed KTX command.\n' ) ;
return 1 ;
} ) ;
await expect (
runKtxPublicIngest (
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
queryHistory : 'enabled' ,
} ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runScan , runIngest } ,
) ,
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
) . resolves . toBe ( 0 ) ;
2026-05-17 01:04:44 +02:00
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
expect ( io . stdout ( ) ) . toContain ( 'Ingest finished with skipped query history' ) ;
2026-05-17 01:04:44 +02:00
expect ( io . stdout ( ) ) . toContain ( 'Missing bundled Python runtime manifest' ) ;
expect ( io . stdout ( ) ) . toContain (
'In a source checkout, build the local runtime assets with: pnpm run artifacts:build' ,
) ;
expect ( io . stdout ( ) ) . toContain ( 'Retry: ktx ingest warehouse --project-dir /tmp/project --deep --query-history' ) ;
expect ( io . stdout ( ) ) . not . toContain ( 'Then retry the runtime-backed KTX command' ) ;
} ) ;
2026-05-14 01:43:06 +02:00
it ( 'fails deep-readiness targets before work starts while continuing independent --all targets' , async ( ) = > {
const io = makeIo ( ) ;
const project = projectWithConnections ( {
warehouse : { driver : 'postgres' , context : { depth : 'deep' } } ,
docs : { driver : 'notion' } ,
} ) ;
const runScan = vi . fn ( async ( ) = > 0 ) ;
const runIngest = vi . fn ( async ( ) = > 0 ) ;
await expect (
runKtxPublicIngest (
{ command : 'run' , projectDir : '/tmp/project' , all : true , json : false , inputMode : 'disabled' } ,
io . io ,
{ loadProject : vi.fn ( async ( ) = > project ) , runScan , runIngest } ,
) ,
) . resolves . toBe ( 1 ) ;
expect ( runScan ) . not . toHaveBeenCalled ( ) ;
expect ( runIngest ) . toHaveBeenCalledWith (
expect . objectContaining ( { command : 'run' , connectionId : 'docs' , adapter : 'notion' } ) ,
expect . anything ( ) ,
) ;
expect ( io . stdout ( ) ) . toContain ( 'warehouse requires deep ingest readiness' ) ;
2026-05-10 23:12:26 +02:00
} ) ;
2026-05-24 16:57:23 +02:00
it ( 'does not infer enriched relationship scans from legacy scanMode values' , async ( ) = > {
2026-05-10 23:12:26 +02:00
const io = makeIo ( ) ;
2026-05-14 01:43:06 +02:00
const project = deepReadyProject ( { warehouse : { driver : 'postgres' } } ) ;
2026-05-10 23:12:26 +02:00
const runScan = vi . fn ( async ( ) = > 0 ) ;
await expect (
2026-05-10 23:51:24 +02:00
runKtxPublicIngest (
2026-05-10 23:12:26 +02:00
{
command : 'run' ,
projectDir : '/tmp/project' ,
all : true ,
json : false ,
inputMode : 'disabled' ,
scanMode : 'enriched' ,
detectRelationships : true ,
} ,
io . io ,
{
loadProject : vi.fn ( async ( ) = > project ) ,
runScan ,
} ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( runScan ) . toHaveBeenCalledWith (
{
command : 'run' ,
projectDir : '/tmp/project' ,
connectionId : 'warehouse' ,
2026-05-24 16:57:23 +02:00
mode : 'structural' ,
detectRelationships : false ,
2026-05-10 23:12:26 +02:00
dryRun : false ,
} ,
2026-05-14 01:43:06 +02:00
expect . objectContaining ( { capturedOutput : expect.any ( Function ) } ) ,
2026-05-10 23:12:26 +02:00
) ;
} ) ;
it ( 'prints stable JSON results' , async ( ) = > {
const io = makeIo ( ) ;
const project = projectWithConnections ( { warehouse : { driver : 'postgres' } } ) ;
await expect (
2026-05-10 23:51:24 +02:00
runKtxPublicIngest (
2026-05-10 23:12:26 +02:00
{
command : 'run' ,
projectDir : '/tmp/project' ,
targetConnectionId : 'warehouse' ,
all : false ,
json : true ,
inputMode : 'disabled' ,
} ,
io . io ,
{
loadProject : vi.fn ( async ( ) = > project ) ,
runScan : vi.fn ( async ( ) = > 0 ) ,
} ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( JSON . parse ( io . stdout ( ) ) ) . toMatchObject ( {
plan : { projectDir : '/tmp/project' } ,
results : [ { connectionId : 'warehouse' , driver : 'postgres' } ] ,
} ) ;
} ) ;
2026-05-10 23:51:24 +02:00
it ( 'passes dbt source_dir from connection config to runKtxIngest' , async ( ) = > {
2026-05-10 23:12:26 +02:00
const runIngest = vi . fn ( async ( ) = > 0 ) ;
const io = makeIo ( ) ;
await expect (
2026-05-10 23:51:24 +02:00
runKtxPublicIngest (
2026-05-10 23:12:26 +02:00
{
command : 'run' ,
2026-05-10 23:51:24 +02:00
projectDir : '/tmp/ktx' ,
2026-05-10 23:12:26 +02:00
targetConnectionId : 'analytics_dbt' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
} ,
io . io ,
{
loadProject : async ( ) = >
( {
2026-05-10 23:51:24 +02:00
projectDir : '/tmp/ktx' ,
2026-05-10 23:12:26 +02:00
config : {
connections : {
analytics_dbt : {
driver : 'dbt' ,
source_dir : '/repo/dbt' ,
} ,
} ,
} ,
} ) as never ,
runIngest ,
} ,
) ,
) . resolves . toBe ( 0 ) ;
expect ( runIngest ) . toHaveBeenCalledWith (
expect . objectContaining ( {
command : 'run' ,
connectionId : 'analytics_dbt' ,
adapter : 'dbt' ,
sourceDir : '/repo/dbt' ,
} ) ,
2026-05-14 01:43:06 +02:00
expect . objectContaining ( { capturedOutput : expect.any ( Function ) } ) ,
2026-05-10 23:12:26 +02:00
) ;
} ) ;
2026-05-14 01:43:06 +02:00
it ( 'bypasses adapter allow-lists for connection-centric source ingest' , async ( ) = > {
2026-05-10 23:12:26 +02:00
const runIngest = vi . fn ( async ( ) = > 0 ) ;
2026-05-14 01:43:06 +02:00
const io = makeIo ( ) ;
2026-05-10 23:12:26 +02:00
await expect (
2026-05-10 23:51:24 +02:00
runKtxPublicIngest (
2026-05-14 01:43:06 +02:00
{
command : 'run' ,
projectDir : '/tmp/ktx' ,
targetConnectionId : 'docs' ,
all : false ,
json : false ,
inputMode : 'disabled' ,
} ,
io . io ,
{
loadProject : async ( ) = >
projectWithConnections ( {
docs : { driver : 'notion' } ,
} ) ,
runIngest ,
} ,
2026-05-10 23:12:26 +02:00
) ,
) . resolves . toBe ( 0 ) ;
2026-05-14 01:43:06 +02:00
expect ( runIngest ) . toHaveBeenCalledWith (
expect . objectContaining ( {
command : 'run' ,
connectionId : 'docs' ,
adapter : 'notion' ,
allowImplicitAdapter : true ,
} ) ,
expect . objectContaining ( { capturedOutput : expect.any ( Function ) } ) ,
2026-05-10 23:12:26 +02:00
) ;
} ) ;
2026-05-14 01:43:06 +02:00
2026-05-10 23:12:26 +02:00
} ) ;