2026-05-10 23:12:26 +02:00
|
|
|
import { mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises';
|
|
|
|
|
import { tmpdir } from 'node:os';
|
|
|
|
|
import { join, resolve } from 'node:path';
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
import { initKtxProject, loadKtxProject } from '../src/context/project/project.js';
|
|
|
|
|
import { parseKtxProjectConfig } from '../src/context/project/config.js';
|
|
|
|
|
import { readKtxSetupState, writeKtxSetupState } from '../src/context/project/setup-config.js';
|
2026-05-10 23:12:26 +02:00
|
|
|
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
|
|
|
import {
|
2026-06-03 17:19:42 +02:00
|
|
|
managedDaemonOptionsForSetupQueryHistoryPicker,
|
2026-05-10 23:51:24 +02:00
|
|
|
type KtxSetupDatabaseDriver,
|
2026-05-13 18:41:44 -04:00
|
|
|
type KtxSetupDatabasesDeps,
|
2026-05-10 23:51:24 +02:00
|
|
|
type KtxSetupDatabasesPromptAdapter,
|
|
|
|
|
runKtxSetupDatabasesStep,
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
} from '../src/setup-databases.js';
|
|
|
|
|
import type { KtxCliIo } from '../src/cli-runtime.js';
|
2026-05-13 18:41:44 -04:00
|
|
|
import type {
|
|
|
|
|
DatabaseScopePickResult,
|
|
|
|
|
PickDatabaseScopeArgs,
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
} from '../src/database-tree-picker.js';
|
|
|
|
|
import type { KtxSetupPromptOption } from '../src/setup-prompts.js';
|
2026-05-10 23:12:26 +02:00
|
|
|
|
|
|
|
|
function makeIo() {
|
|
|
|
|
let stdout = '';
|
|
|
|
|
let stderr = '';
|
|
|
|
|
return {
|
|
|
|
|
io: {
|
|
|
|
|
stdout: {
|
|
|
|
|
isTTY: true,
|
|
|
|
|
write: (chunk: string) => {
|
|
|
|
|
stdout += chunk;
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
stderr: {
|
|
|
|
|
write: (chunk: string) => {
|
|
|
|
|
stderr += chunk;
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
stdout: () => stdout,
|
|
|
|
|
stderr: () => stderr,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-13 18:41:44 -04:00
|
|
|
type ScopePick =
|
|
|
|
|
| 'back'
|
|
|
|
|
| 'enable-all'
|
2026-05-22 14:22:11 +02:00
|
|
|
| { schemas: string[]; tables: string[] | 'back' };
|
2026-05-13 18:41:44 -04:00
|
|
|
|
|
|
|
|
interface PickerStubs {
|
|
|
|
|
pickDatabaseScope: KtxSetupDatabasesDeps['pickDatabaseScope'];
|
|
|
|
|
scopeCalls: PickDatabaseScopeArgs[];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function makePickerStubs(options: { scopes?: ScopePick[] } = {}): PickerStubs {
|
|
|
|
|
const queue: ScopePick[] = [...(options.scopes ?? [])];
|
|
|
|
|
const scopeCalls: PickDatabaseScopeArgs[] = [];
|
|
|
|
|
return {
|
|
|
|
|
scopeCalls,
|
|
|
|
|
pickDatabaseScope: vi.fn(async (args: PickDatabaseScopeArgs): Promise<DatabaseScopePickResult> => {
|
|
|
|
|
scopeCalls.push(args);
|
|
|
|
|
const next = queue.shift();
|
|
|
|
|
if (next === undefined || next === 'enable-all') {
|
2026-05-22 14:22:11 +02:00
|
|
|
const schemas = args.initialSchemas && args.initialSchemas.length > 0 ? [...args.initialSchemas] : [...args.schemas];
|
|
|
|
|
const discovered = await args.listTablesForSchemas(schemas);
|
|
|
|
|
const enabledTables = discovered.map((t) => `${t.schema}.${t.name}`);
|
2026-05-13 18:41:44 -04:00
|
|
|
const activeSchemas = args.supportsSchemaScope
|
2026-05-22 14:22:11 +02:00
|
|
|
? Array.from(new Set(discovered.map((t) => t.schema)))
|
2026-05-13 18:41:44 -04:00
|
|
|
: [];
|
|
|
|
|
return { kind: 'selected', activeSchemas, enabledTables };
|
|
|
|
|
}
|
|
|
|
|
if (next === 'back') {
|
|
|
|
|
return { kind: 'back' };
|
|
|
|
|
}
|
2026-05-22 14:22:11 +02:00
|
|
|
await args.listTablesForSchemas(next.schemas);
|
|
|
|
|
if (next.tables === 'back') {
|
|
|
|
|
return { kind: 'back' };
|
|
|
|
|
}
|
2026-05-13 18:41:44 -04:00
|
|
|
return {
|
|
|
|
|
kind: 'selected',
|
|
|
|
|
activeSchemas: args.supportsSchemaScope ? next.schemas : [],
|
|
|
|
|
enabledTables: next.tables,
|
|
|
|
|
};
|
|
|
|
|
}),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
function makePromptAdapter(options: {
|
|
|
|
|
multiselectValues?: string[][];
|
|
|
|
|
selectValues?: string[];
|
|
|
|
|
textValues?: (string | undefined)[];
|
|
|
|
|
passwordValues?: (string | undefined)[];
|
2026-05-10 23:51:24 +02:00
|
|
|
}): KtxSetupDatabasesPromptAdapter {
|
2026-05-10 23:12:26 +02:00
|
|
|
const multiselectValues = [...(options.multiselectValues ?? [])];
|
|
|
|
|
const selectValues = [...(options.selectValues ?? [])];
|
|
|
|
|
const textValues = [...(options.textValues ?? [])];
|
|
|
|
|
const passwordValues = [...(options.passwordValues ?? [])];
|
|
|
|
|
return {
|
|
|
|
|
multiselect: vi.fn(async () => multiselectValues.shift() ?? ['postgres']),
|
2026-05-22 14:22:11 +02:00
|
|
|
autocompleteMultiselect: vi.fn(async (options) => {
|
|
|
|
|
if (multiselectValues.length > 0) {
|
|
|
|
|
return multiselectValues.shift() ?? [];
|
|
|
|
|
}
|
|
|
|
|
if (options.initialValues && options.initialValues.length > 0) {
|
|
|
|
|
return options.initialValues;
|
|
|
|
|
}
|
|
|
|
|
return options.options.length > 0
|
|
|
|
|
? options.options.map((option: { value: string }) => option.value)
|
|
|
|
|
: ['back'];
|
|
|
|
|
}),
|
2026-05-14 01:43:06 +02:00
|
|
|
select: vi.fn(async ({ message }) => {
|
2026-05-22 14:22:11 +02:00
|
|
|
if (message.startsWith('Enable all tables in ') && message.includes(', or refine tables?')) {
|
|
|
|
|
return 'save';
|
|
|
|
|
}
|
2026-06-11 13:49:45 +02:00
|
|
|
if (message.includes('How much database context should ktx build?')) {
|
2026-05-14 01:43:06 +02:00
|
|
|
const nextValue = selectValues[0];
|
|
|
|
|
return nextValue === 'fast' || nextValue === 'deep' || nextValue === 'back'
|
|
|
|
|
? (selectValues.shift() ?? 'fast')
|
|
|
|
|
: 'fast';
|
|
|
|
|
}
|
|
|
|
|
return selectValues.shift() ?? 'finish';
|
|
|
|
|
}),
|
2026-05-10 23:12:26 +02:00
|
|
|
text: vi.fn(async () => (textValues.length > 0 ? textValues.shift() : '')),
|
|
|
|
|
password: vi.fn(async () => (passwordValues.length > 0 ? passwordValues.shift() : '')),
|
|
|
|
|
cancel: vi.fn(),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function connectionNamePrompt(label: string): string {
|
2026-06-11 13:49:45 +02:00
|
|
|
return `Name this ${label} connection\nktx will use this short name in commands and config. You can rename it now.`;
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function textInputPrompt(message: string): string {
|
|
|
|
|
const normalized = message.replace(/\n+$/, '');
|
|
|
|
|
if (!normalized.includes('\n')) {
|
2026-05-12 16:58:00 -07:00
|
|
|
return `${normalized}\n│ Press Escape to go back.\n│`;
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
const [title, ...bodyLines] = normalized.split('\n');
|
2026-05-12 16:58:00 -07:00
|
|
|
return `${title}\n│\n│ ${bodyLines.join('\n│ ')}\n│ Press Escape to go back.\n│`;
|
2026-05-10 23:12:26 +02:00
|
|
|
}
|
|
|
|
|
|
2026-06-03 17:19:42 +02:00
|
|
|
function queryHistoryFromConfig(connection: unknown): {
|
|
|
|
|
filters?: { serviceAccounts?: unknown; dropTrivialProbes?: boolean };
|
|
|
|
|
} | undefined {
|
|
|
|
|
if (!connection || typeof connection !== 'object' || Array.isArray(connection)) {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
const context = (connection as { context?: unknown }).context;
|
|
|
|
|
if (!context || typeof context !== 'object' || Array.isArray(context)) {
|
|
|
|
|
return undefined;
|
|
|
|
|
}
|
|
|
|
|
const queryHistory = (context as { queryHistory?: unknown }).queryHistory;
|
|
|
|
|
return queryHistory && typeof queryHistory === 'object' && !Array.isArray(queryHistory)
|
|
|
|
|
? (queryHistory as { filters?: { serviceAccounts?: unknown; dropTrivialProbes?: boolean } })
|
|
|
|
|
: undefined;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
describe('setup databases step', () => {
|
|
|
|
|
let tempDir: string;
|
|
|
|
|
|
|
|
|
|
beforeEach(async () => {
|
2026-05-10 23:51:24 +02:00
|
|
|
tempDir = await mkdtemp(join(tmpdir(), 'ktx-setup-databases-'));
|
2026-05-14 17:39:31 +02:00
|
|
|
await initKtxProject({ projectDir: tempDir });
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
afterEach(async () => {
|
2026-05-22 18:18:47 +02:00
|
|
|
vi.unstubAllEnvs();
|
2026-05-10 23:12:26 +02:00
|
|
|
await rm(tempDir, { recursive: true, force: true });
|
|
|
|
|
});
|
|
|
|
|
|
2026-06-03 17:19:42 +02:00
|
|
|
it('builds managed daemon options for setup query-history SQL analysis', () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
|
|
|
|
|
expect(
|
|
|
|
|
managedDaemonOptionsForSetupQueryHistoryPicker({
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
args: {
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
cliVersion: '0.2.0',
|
|
|
|
|
runtimeInstallPolicy: 'auto',
|
|
|
|
|
},
|
|
|
|
|
io: io.io,
|
|
|
|
|
}),
|
|
|
|
|
).toEqual({
|
|
|
|
|
cliVersion: '0.2.0',
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
installPolicy: 'auto',
|
|
|
|
|
io: io.io,
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('defaults managed daemon setup options when the database step is called directly', () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
|
|
|
|
|
expect(
|
|
|
|
|
managedDaemonOptionsForSetupQueryHistoryPicker({
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
args: {
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
},
|
|
|
|
|
io: io.io,
|
|
|
|
|
}),
|
|
|
|
|
).toMatchObject({
|
|
|
|
|
cliVersion: expect.any(String),
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
installPolicy: 'never',
|
|
|
|
|
io: io.io,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(
|
|
|
|
|
managedDaemonOptionsForSetupQueryHistoryPicker({
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
args: {
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
},
|
|
|
|
|
io: io.io,
|
|
|
|
|
}),
|
|
|
|
|
).toMatchObject({
|
|
|
|
|
cliVersion: expect.any(String),
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
installPolicy: 'prompt',
|
|
|
|
|
io: io.io,
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('shows every supported database in the interactive checklist', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const prompts = makePromptAdapter({ multiselectValues: [['back']] });
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('back');
|
|
|
|
|
expect(prompts.multiselect).toHaveBeenCalledWith({
|
|
|
|
|
message:
|
2026-06-11 13:49:45 +02:00
|
|
|
'Which databases should ktx connect to?\n' +
|
feat(cli): setup progress spinners, Tab-to-select, and banner polish (#296)
* fix(cli): double the height of the setup banner t crossbar
* fix(cli): unify setup multi-select hints and make Tab the select key
The six interactive multi-select surfaces in `ktx setup` documented three
different hint voices, one had no hint at all, and they named two different
select keys (Space vs Tab). Tab is the only key that can toggle selection
without colliding with type-to-search input, so make it the single documented
select key everywhere and compose every hint from one shared fragment
vocabulary in prompt-navigation.ts.
- Register `updateSettings({ aliases: { tab: 'space' } })` so Tab toggles flat
multiselects; the alias applies only to non-text prompts, leaving typed
search input (schema/Notion) untouched.
- Add the missing hint to the agent-targets prompt and drop the stray
"Space to select … Esc …" info line plus the now-dead writeSetupInfo helper.
- Replace the schema-scope ad-hoc hint with the searchable-multiselect voice
and standardize "filter" -> "search" vocabulary.
- Delete DEFAULT_TREE_PICKER_HELP_TEXT and the unused TreePickerChrome.helpText
seam; render the shared tree hint instead.
* refactor(cli): show LLM check progress for every setup backend
Rename runLlmHealthCheckWithProgress to validateModelWithProgress and
wrap the Claude subscription and Codex auth probes in the same spinner
progress as the Anthropic API and Vertex backends, so each backend shows
consistent "Checking <provider> LLM" output during setup.
* feat(cli): add ktx-orange progress spinners to setup steps
Add a shared runWithCliSpinner helper and a TTY-aware createCliSpinner:
an animated clack spinner in a terminal, and a static stderr-only spinner
before raw-mode pickers (the table tree picker and demo tour), where the
animated spinner's stdin grab would otherwise corrupt the next prompt.
Wrap the slow setup waits in progress spinners: managed runtime install,
embedding daemon start + first-run model download, embeddings health
check, the connection-test gate, and source validation / dbt clone /
Metabase discovery. Recolor every spinner frame from clack's magenta to
the ktx mascot orange (#FF8A4C) via the static helper and clack's
styleFrame option.
2026-06-12 16:43:10 +02:00
|
|
|
'Up/Down to move, Tab to select or unselect, Enter to confirm, Escape to go back, Ctrl+C to exit.',
|
2026-05-10 23:12:26 +02:00
|
|
|
options: [
|
|
|
|
|
{ value: 'postgres', label: 'PostgreSQL' },
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
{ value: 'bigquery', label: 'BigQuery' },
|
|
|
|
|
{ value: 'snowflake', label: 'Snowflake' },
|
2026-05-10 23:12:26 +02:00
|
|
|
{ value: 'mysql', label: 'MySQL' },
|
|
|
|
|
{ value: 'clickhouse', label: 'ClickHouse' },
|
|
|
|
|
{ value: 'sqlserver', label: 'SQL Server' },
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
{ value: 'sqlite', label: 'SQLite' },
|
2026-05-10 23:12:26 +02:00
|
|
|
],
|
2026-05-14 14:35:58 +02:00
|
|
|
required: true,
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('lets Back from connection method selection return to database selection when adding a new driver', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
multiselectValues: [['postgres'], ['back']],
|
|
|
|
|
selectValues: ['back'],
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('back');
|
|
|
|
|
expect(prompts.select).toHaveBeenCalledWith({
|
|
|
|
|
message: 'How do you want to connect to PostgreSQL?',
|
|
|
|
|
options: [
|
|
|
|
|
{ value: 'url', label: 'Paste a connection URL' },
|
2026-05-12 17:14:35 -07:00
|
|
|
{ value: 'fields', label: 'Enter connection details (host, port, database, user)' },
|
2026-05-10 23:12:26 +02:00
|
|
|
{ value: 'back', label: 'Back' },
|
|
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
expect(prompts.multiselect).toHaveBeenCalledTimes(2);
|
|
|
|
|
expect(vi.mocked(prompts.multiselect).mock.calls[1]?.[0].message).toBe(
|
2026-06-11 13:49:45 +02:00
|
|
|
'Which databases should ktx connect to?\n' +
|
feat(cli): setup progress spinners, Tab-to-select, and banner polish (#296)
* fix(cli): double the height of the setup banner t crossbar
* fix(cli): unify setup multi-select hints and make Tab the select key
The six interactive multi-select surfaces in `ktx setup` documented three
different hint voices, one had no hint at all, and they named two different
select keys (Space vs Tab). Tab is the only key that can toggle selection
without colliding with type-to-search input, so make it the single documented
select key everywhere and compose every hint from one shared fragment
vocabulary in prompt-navigation.ts.
- Register `updateSettings({ aliases: { tab: 'space' } })` so Tab toggles flat
multiselects; the alias applies only to non-text prompts, leaving typed
search input (schema/Notion) untouched.
- Add the missing hint to the agent-targets prompt and drop the stray
"Space to select … Esc …" info line plus the now-dead writeSetupInfo helper.
- Replace the schema-scope ad-hoc hint with the searchable-multiselect voice
and standardize "filter" -> "search" vocabulary.
- Delete DEFAULT_TREE_PICKER_HELP_TEXT and the unused TreePickerChrome.helpText
seam; render the shared tree hint instead.
* refactor(cli): show LLM check progress for every setup backend
Rename runLlmHealthCheckWithProgress to validateModelWithProgress and
wrap the Claude subscription and Codex auth probes in the same spinner
progress as the Anthropic API and Vertex backends, so each backend shows
consistent "Checking <provider> LLM" output during setup.
* feat(cli): add ktx-orange progress spinners to setup steps
Add a shared runWithCliSpinner helper and a TTY-aware createCliSpinner:
an animated clack spinner in a terminal, and a static stderr-only spinner
before raw-mode pickers (the table tree picker and demo tour), where the
animated spinner's stdin grab would otherwise corrupt the next prompt.
Wrap the slow setup waits in progress spinners: managed runtime install,
embedding daemon start + first-run model download, embeddings health
check, the connection-test gate, and source validation / dbt clone /
Metabase discovery. Recolor every spinner frame from clack's magenta to
the ktx mascot orange (#FF8A4C) via the static helper and clack's
styleFrame option.
2026-06-12 16:43:10 +02:00
|
|
|
'Up/Down to move, Tab to select or unselect, Enter to confirm, Escape to go back, Ctrl+C to exit.',
|
2026-05-10 23:12:26 +02:00
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('offers connection URL paste first for URL-capable databases', async () => {
|
2026-05-12 17:14:35 -07:00
|
|
|
const cases: Array<{ driver: KtxSetupDatabaseDriver; label: string }> = [
|
|
|
|
|
{ driver: 'postgres', label: 'PostgreSQL' },
|
|
|
|
|
{ driver: 'mysql', label: 'MySQL' },
|
|
|
|
|
{ driver: 'clickhouse', label: 'ClickHouse' },
|
|
|
|
|
{ driver: 'sqlserver', label: 'SQL Server' },
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (const testCase of cases) {
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['back'],
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: [testCase.driver],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
},
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('back');
|
|
|
|
|
expect(prompts.select).toHaveBeenCalledWith({
|
|
|
|
|
message: `How do you want to connect to ${testCase.label}?`,
|
|
|
|
|
options: [
|
|
|
|
|
{ value: 'url', label: 'Paste a connection URL' },
|
|
|
|
|
{ value: 'fields', label: 'Enter connection details (host, port, database, user)' },
|
|
|
|
|
{ value: 'back', label: 'Back' },
|
|
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
it('lets Back leave database setup when the driver came from flags', async () => {
|
|
|
|
|
const prompts = makePromptAdapter({ selectValues: ['back'] });
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
},
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('back');
|
|
|
|
|
expect(prompts.multiselect).not.toHaveBeenCalled();
|
|
|
|
|
expect(prompts.select).toHaveBeenCalledTimes(1);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('labels existing database connections with the database type', async () => {
|
|
|
|
|
await writeFile(
|
2026-05-10 23:51:24 +02:00
|
|
|
join(tempDir, 'ktx.yaml'),
|
2026-05-10 23:12:26 +02:00
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const prompts = makePromptAdapter({ selectValues: ['back'] });
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
},
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('back');
|
|
|
|
|
expect(prompts.select).toHaveBeenCalledWith({
|
|
|
|
|
message: 'Configure PostgreSQL',
|
|
|
|
|
options: [
|
2026-05-13 17:22:59 -04:00
|
|
|
{ value: 'existing:warehouse', label: 'Keep existing PostgreSQL connection: warehouse' },
|
|
|
|
|
{ value: 'edit:warehouse', label: 'Edit PostgreSQL connection: warehouse' },
|
|
|
|
|
{ value: 'new', label: 'Add another PostgreSQL connection' },
|
2026-05-10 23:12:26 +02:00
|
|
|
{ value: 'back', label: 'Back' },
|
|
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('uses a database-specific editable connection name for new interactive connections', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'env:DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(prompts.text).toHaveBeenNthCalledWith(1, {
|
|
|
|
|
message: textInputPrompt(connectionNamePrompt('PostgreSQL')),
|
|
|
|
|
placeholder: 'postgres-warehouse',
|
|
|
|
|
initialValue: 'postgres-warehouse',
|
|
|
|
|
});
|
|
|
|
|
expect(testConnection).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', expect.anything());
|
|
|
|
|
expect(scanConnection).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', expect.anything());
|
2026-05-14 01:43:06 +02:00
|
|
|
const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8');
|
|
|
|
|
const config = parseKtxProjectConfig(configText);
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.connections['postgres-warehouse']).toEqual({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
url: 'env:DATABASE_URL',
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-22 18:18:47 +02:00
|
|
|
it('emits debug telemetry when setup writes a database connection', async () => {
|
|
|
|
|
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
vi.stubEnv('KTX_TELEMETRY_DISABLED', '');
|
|
|
|
|
vi.stubEnv('DO_NOT_TRACK', '');
|
2026-05-22 18:18:47 +02:00
|
|
|
vi.stubEnv('CI', '');
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'env:DATABASE_URL'],
|
|
|
|
|
});
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
const listSchemas = vi.fn(async () => []);
|
|
|
|
|
const listTables = vi.fn(async () => []);
|
2026-05-22 18:18:47 +02:00
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
listSchemas,
|
|
|
|
|
listTables,
|
|
|
|
|
},
|
2026-05-22 18:18:47 +02:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(io.stderr()).toContain('"event":"connection_added"');
|
|
|
|
|
expect(io.stderr()).toContain('"driver":"postgres"');
|
|
|
|
|
expect(io.stderr()).toContain('"isDemoConnection":false');
|
|
|
|
|
expect(io.stderr()).not.toContain(tempDir);
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
it('tells users Escape goes back in free-text connection prompts', async () => {
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'env:DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(prompts.text).toHaveBeenNthCalledWith(1, {
|
|
|
|
|
message: textInputPrompt(connectionNamePrompt('PostgreSQL')),
|
|
|
|
|
placeholder: 'postgres-warehouse',
|
|
|
|
|
initialValue: 'postgres-warehouse',
|
|
|
|
|
});
|
|
|
|
|
expect(prompts.text).toHaveBeenNthCalledWith(2, {
|
|
|
|
|
message: textInputPrompt('PostgreSQL connection URL'),
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('uses clear setup prompts for every new database connection type', async () => {
|
|
|
|
|
const cases: Array<{
|
2026-05-10 23:51:24 +02:00
|
|
|
driver: KtxSetupDatabaseDriver;
|
2026-05-10 23:12:26 +02:00
|
|
|
selectValues?: string[];
|
|
|
|
|
textValues: string[];
|
|
|
|
|
passwordValues?: string[];
|
|
|
|
|
expectedTextPrompts: Array<{ message: string; placeholder?: string; initialValue?: string }>;
|
|
|
|
|
expectedPasswordPrompts?: Array<{ message: string }>;
|
|
|
|
|
}> = [
|
|
|
|
|
{
|
|
|
|
|
driver: 'sqlite',
|
|
|
|
|
textValues: ['', './warehouse.sqlite'],
|
|
|
|
|
expectedTextPrompts: [
|
|
|
|
|
{
|
|
|
|
|
message: connectionNamePrompt('SQLite'),
|
|
|
|
|
placeholder: 'sqlite-local',
|
|
|
|
|
initialValue: 'sqlite-local',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'SQLite database file\nEnter a relative or absolute path, for example ./warehouse.sqlite.',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'env:DATABASE_URL'],
|
|
|
|
|
expectedTextPrompts: [
|
|
|
|
|
{
|
|
|
|
|
message: connectionNamePrompt('PostgreSQL'),
|
|
|
|
|
placeholder: 'postgres-warehouse',
|
|
|
|
|
initialValue: 'postgres-warehouse',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'PostgreSQL connection URL',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
driver: 'mysql',
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'env:MYSQL_DATABASE_URL'],
|
|
|
|
|
expectedTextPrompts: [
|
|
|
|
|
{
|
|
|
|
|
message: connectionNamePrompt('MySQL'),
|
|
|
|
|
placeholder: 'mysql-warehouse',
|
|
|
|
|
initialValue: 'mysql-warehouse',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'MySQL connection URL',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
driver: 'clickhouse',
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'env:CLICKHOUSE_URL'],
|
|
|
|
|
expectedTextPrompts: [
|
|
|
|
|
{
|
|
|
|
|
message: connectionNamePrompt('ClickHouse'),
|
|
|
|
|
placeholder: 'clickhouse-warehouse',
|
|
|
|
|
initialValue: 'clickhouse-warehouse',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'ClickHouse connection URL',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
driver: 'sqlserver',
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'env:SQLSERVER_DATABASE_URL'],
|
|
|
|
|
expectedTextPrompts: [
|
|
|
|
|
{
|
|
|
|
|
message: connectionNamePrompt('SQL Server'),
|
|
|
|
|
placeholder: 'sqlserver-warehouse',
|
|
|
|
|
initialValue: 'sqlserver-warehouse',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'SQL Server connection URL',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
driver: 'bigquery',
|
|
|
|
|
selectValues: ['no'],
|
2026-05-22 14:22:11 +02:00
|
|
|
textValues: ['', '/path/to/service-account.json', ''],
|
2026-05-10 23:12:26 +02:00
|
|
|
expectedTextPrompts: [
|
|
|
|
|
{
|
|
|
|
|
message: connectionNamePrompt('BigQuery'),
|
|
|
|
|
placeholder: 'bigquery-warehouse',
|
|
|
|
|
initialValue: 'bigquery-warehouse',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'Path to service account JSON file',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'BigQuery location\nPress Enter for US, or enter a location like EU.',
|
|
|
|
|
placeholder: 'US',
|
|
|
|
|
initialValue: 'US',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
driver: 'snowflake',
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
selectValues: ['password', 'no'],
|
|
|
|
|
textValues: ['', 'env:SNOWFLAKE_ACCOUNT', 'ANALYTICS_WH', 'ANALYTICS', 'env:SNOWFLAKE_USER', ''],
|
2026-05-10 23:12:26 +02:00
|
|
|
passwordValues: ['env:SNOWFLAKE_PASSWORD'],
|
|
|
|
|
expectedTextPrompts: [
|
|
|
|
|
{
|
|
|
|
|
message: connectionNamePrompt('Snowflake'),
|
|
|
|
|
placeholder: 'snowflake-warehouse',
|
|
|
|
|
initialValue: 'snowflake-warehouse',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'Snowflake account identifier',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'Snowflake warehouse\nFor example ANALYTICS_WH.',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'Snowflake database name',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'Snowflake username',
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
message: 'Snowflake role (optional)\nPress Enter to skip.',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
expectedPasswordPrompts: [
|
|
|
|
|
{
|
|
|
|
|
message: 'Snowflake password',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (const testCase of cases) {
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: testCase.selectValues ?? ['new'],
|
|
|
|
|
textValues: testCase.textValues,
|
|
|
|
|
passwordValues: testCase.passwordValues,
|
|
|
|
|
});
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: [testCase.driver],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
listSchemas: vi.fn(async () => []),
|
|
|
|
|
listTables: vi.fn(async () => []),
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(vi.mocked(prompts.text).mock.calls.map(([options]) => options)).toEqual(
|
|
|
|
|
testCase.expectedTextPrompts.map((expectedPrompt) => ({
|
|
|
|
|
...expectedPrompt,
|
|
|
|
|
message: textInputPrompt(expectedPrompt.message),
|
|
|
|
|
})),
|
|
|
|
|
);
|
|
|
|
|
if (testCase.expectedPasswordPrompts) {
|
|
|
|
|
expect(vi.mocked(prompts.password).mock.calls.map(([options]) => options)).toEqual(
|
|
|
|
|
testCase.expectedPasswordPrompts.map((expectedPrompt) => ({
|
|
|
|
|
...expectedPrompt,
|
|
|
|
|
message: textInputPrompt(expectedPrompt.message),
|
|
|
|
|
})),
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('lets Back from connection method selection return to database selection', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
multiselectValues: [['postgres'], ['back']],
|
|
|
|
|
selectValues: ['back'],
|
|
|
|
|
textValues: [''],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
2026-05-14 01:43:06 +02:00
|
|
|
const listSchemas = vi.fn(async () => []);
|
|
|
|
|
const listTables = vi.fn(async () => []);
|
2026-05-10 23:12:26 +02:00
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-14 01:43:06 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
disableQueryHistory: true,
|
|
|
|
|
},
|
2026-05-10 23:12:26 +02:00
|
|
|
makeIo().io,
|
2026-05-14 01:43:06 +02:00
|
|
|
{ prompts, testConnection, scanConnection, listSchemas, listTables },
|
2026-05-10 23:12:26 +02:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('back');
|
|
|
|
|
expect(prompts.select).toHaveBeenNthCalledWith(1, {
|
|
|
|
|
message: 'How do you want to connect to PostgreSQL?',
|
|
|
|
|
options: [
|
|
|
|
|
{ value: 'url', label: 'Paste a connection URL' },
|
2026-05-12 17:14:35 -07:00
|
|
|
{ value: 'fields', label: 'Enter connection details (host, port, database, user)' },
|
2026-05-10 23:12:26 +02:00
|
|
|
{ value: 'back', label: 'Back' },
|
|
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
expect(prompts.multiselect).toHaveBeenCalledTimes(2);
|
|
|
|
|
expect(testConnection).not.toHaveBeenCalled();
|
|
|
|
|
expect(scanConnection).not.toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('shows a configured database menu instead of the type checklist when a database exists', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
await writeFile(
|
2026-05-10 23:51:24 +02:00
|
|
|
join(tempDir, 'ktx.yaml'),
|
2026-05-10 23:12:26 +02:00
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - warehouse',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
2026-05-13 13:55:21 +02:00
|
|
|
await writeKtxSetupState(tempDir, { completed_steps: ['databases'] });
|
2026-05-10 23:12:26 +02:00
|
|
|
const prompts = makePromptAdapter({ multiselectValues: [['back']], selectValues: ['continue'] });
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-14 01:43:06 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
disableQueryHistory: true,
|
|
|
|
|
},
|
2026-05-10 23:12:26 +02:00
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] });
|
|
|
|
|
expect(prompts.multiselect).not.toHaveBeenCalled();
|
|
|
|
|
expect(prompts.select).toHaveBeenCalledWith({
|
2026-05-13 19:51:24 -04:00
|
|
|
message: 'Databases configured: warehouse\nWhat would you like to do?',
|
2026-05-10 23:12:26 +02:00
|
|
|
options: [
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'continue', label: 'Continue to context sources' },
|
2026-05-24 19:29:37 +02:00
|
|
|
{ value: 'skip-sources', label: 'Skip context sources' },
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'edit', label: 'Edit an existing database' },
|
|
|
|
|
{ value: 'add', label: 'Add another database' },
|
2026-05-10 23:12:26 +02:00
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
expect(testConnection).not.toHaveBeenCalled();
|
|
|
|
|
expect(scanConnection).not.toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-24 19:29:37 +02:00
|
|
|
it('can skip context sources from the configured database menu', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - warehouse',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
await writeKtxSetupState(tempDir, { completed_steps: ['databases'] });
|
|
|
|
|
const prompts = makePromptAdapter({ selectValues: ['skip-sources'] });
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
disableQueryHistory: true,
|
|
|
|
|
},
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({
|
|
|
|
|
status: 'ready',
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
connectionIds: ['warehouse'],
|
|
|
|
|
skipSources: true,
|
|
|
|
|
});
|
|
|
|
|
expect(testConnection).not.toHaveBeenCalled();
|
|
|
|
|
expect(scanConnection).not.toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('preserves existing database ids when adding another database from the configured menu', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
await writeFile(
|
2026-05-10 23:51:24 +02:00
|
|
|
join(tempDir, 'ktx.yaml'),
|
2026-05-10 23:12:26 +02:00
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - warehouse',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
2026-05-13 13:55:21 +02:00
|
|
|
await writeKtxSetupState(tempDir, { completed_steps: ['databases'] });
|
2026-05-10 23:12:26 +02:00
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['add', 'url', 'continue'],
|
|
|
|
|
multiselectValues: [['mysql']],
|
|
|
|
|
textValues: ['', 'env:MYSQL_DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-14 01:43:06 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
disableQueryHistory: true,
|
|
|
|
|
},
|
2026-05-10 23:12:26 +02:00
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({
|
|
|
|
|
status: 'ready',
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
connectionIds: ['warehouse', 'mysql-warehouse'],
|
|
|
|
|
});
|
|
|
|
|
expect(prompts.multiselect).toHaveBeenCalledTimes(1);
|
2026-05-13 17:22:59 -04:00
|
|
|
expect(prompts.multiselect).toHaveBeenCalledWith(expect.objectContaining({
|
|
|
|
|
initialValues: ['postgres'],
|
|
|
|
|
required: true,
|
|
|
|
|
}));
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(prompts.select).toHaveBeenCalledWith({
|
2026-05-13 19:51:24 -04:00
|
|
|
message: 'Databases configured: warehouse\nWhat would you like to do?',
|
2026-05-10 23:12:26 +02:00
|
|
|
options: [
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'continue', label: 'Continue to context sources' },
|
2026-05-24 19:29:37 +02:00
|
|
|
{ value: 'skip-sources', label: 'Skip context sources' },
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'edit', label: 'Edit an existing database' },
|
|
|
|
|
{ value: 'add', label: 'Add another database' },
|
2026-05-10 23:12:26 +02:00
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
expect(testConnection).toHaveBeenCalledTimes(1);
|
|
|
|
|
expect(testConnection).toHaveBeenCalledWith(tempDir, 'mysql-warehouse', expect.anything());
|
2026-05-14 01:43:06 +02:00
|
|
|
const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8');
|
|
|
|
|
const config = parseKtxProjectConfig(configText);
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.setup?.database_connection_ids).toEqual(['warehouse', 'mysql-warehouse']);
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('lets users add another database after completing the first one', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
multiselectValues: [['postgres'], ['mysql']],
|
|
|
|
|
selectValues: ['url', 'add', 'url', 'continue'],
|
|
|
|
|
textValues: ['', 'env:DATABASE_URL', '', 'env:MYSQL_DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
const listSchemas = vi.fn(async () => []);
|
|
|
|
|
const listTables = vi.fn(async () => []);
|
2026-05-10 23:12:26 +02:00
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-14 01:43:06 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
disableQueryHistory: true,
|
|
|
|
|
},
|
2026-05-10 23:12:26 +02:00
|
|
|
makeIo().io,
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
{ prompts, testConnection, scanConnection, listSchemas, listTables },
|
2026-05-10 23:12:26 +02:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({
|
|
|
|
|
status: 'ready',
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
connectionIds: ['postgres-warehouse', 'mysql-warehouse'],
|
|
|
|
|
});
|
|
|
|
|
expect(prompts.multiselect).toHaveBeenCalledTimes(2);
|
2026-05-13 17:22:59 -04:00
|
|
|
expect(prompts.multiselect).toHaveBeenNthCalledWith(2, expect.objectContaining({
|
|
|
|
|
initialValues: ['postgres'],
|
|
|
|
|
required: true,
|
|
|
|
|
}));
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(prompts.select).toHaveBeenCalledWith({
|
2026-05-13 19:51:24 -04:00
|
|
|
message: 'Databases configured: postgres-warehouse\nWhat would you like to do?',
|
2026-05-10 23:12:26 +02:00
|
|
|
options: [
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'continue', label: 'Continue to context sources' },
|
2026-05-24 19:29:37 +02:00
|
|
|
{ value: 'skip-sources', label: 'Skip context sources' },
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'edit', label: 'Edit an existing database' },
|
|
|
|
|
{ value: 'add', label: 'Add another database' },
|
2026-05-10 23:12:26 +02:00
|
|
|
],
|
|
|
|
|
});
|
2026-05-10 23:51:24 +02:00
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.setup?.database_connection_ids).toEqual(['postgres-warehouse', 'mysql-warehouse']);
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 14:35:58 +02:00
|
|
|
it('returns to configured primary menu when pressing back on driver selection after adding a source', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
2026-05-14 14:35:58 +02:00
|
|
|
multiselectValues: [['postgres'], ['back']],
|
2026-05-10 23:12:26 +02:00
|
|
|
selectValues: ['url', 'add', 'continue'],
|
|
|
|
|
textValues: ['', 'env:DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-14 01:43:06 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
disableQueryHistory: true,
|
|
|
|
|
},
|
2026-05-10 23:12:26 +02:00
|
|
|
io.io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({
|
|
|
|
|
status: 'ready',
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
connectionIds: ['postgres-warehouse'],
|
|
|
|
|
});
|
|
|
|
|
expect(prompts.multiselect).toHaveBeenCalledTimes(2);
|
2026-05-13 17:22:59 -04:00
|
|
|
expect(prompts.multiselect).toHaveBeenNthCalledWith(2, expect.objectContaining({
|
|
|
|
|
initialValues: ['postgres'],
|
|
|
|
|
required: true,
|
|
|
|
|
}));
|
2026-06-11 13:49:45 +02:00
|
|
|
expect(io.stdout()).not.toContain('ktx cannot work without at least one database');
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(prompts.select).toHaveBeenNthCalledWith(3, {
|
2026-05-13 19:51:24 -04:00
|
|
|
message: 'Databases configured: postgres-warehouse\nWhat would you like to do?',
|
2026-05-10 23:12:26 +02:00
|
|
|
options: [
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'continue', label: 'Continue to context sources' },
|
2026-05-24 19:29:37 +02:00
|
|
|
{ value: 'skip-sources', label: 'Skip context sources' },
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'edit', label: 'Edit an existing database' },
|
|
|
|
|
{ value: 'add', label: 'Add another database' },
|
2026-05-10 23:12:26 +02:00
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 14:35:58 +02:00
|
|
|
it('returns to configured primary menu when pressing back on driver selection with pre-existing source', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
await writeFile(
|
2026-05-10 23:51:24 +02:00
|
|
|
join(tempDir, 'ktx.yaml'),
|
2026-05-10 23:12:26 +02:00
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - warehouse',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
2026-05-13 13:55:21 +02:00
|
|
|
await writeKtxSetupState(tempDir, { completed_steps: ['databases'] });
|
2026-05-10 23:12:26 +02:00
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
2026-05-14 14:35:58 +02:00
|
|
|
multiselectValues: [['back']],
|
2026-05-10 23:12:26 +02:00
|
|
|
selectValues: ['add', 'continue'],
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
io.io,
|
|
|
|
|
{ prompts },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] });
|
2026-05-13 17:22:59 -04:00
|
|
|
expect(prompts.multiselect).toHaveBeenCalledWith(expect.objectContaining({
|
|
|
|
|
initialValues: ['postgres'],
|
|
|
|
|
required: true,
|
|
|
|
|
}));
|
2026-06-11 13:49:45 +02:00
|
|
|
expect(io.stdout()).not.toContain('ktx cannot work without at least one database');
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(prompts.select).toHaveBeenNthCalledWith(2, {
|
2026-05-13 19:51:24 -04:00
|
|
|
message: 'Databases configured: warehouse\nWhat would you like to do?',
|
2026-05-10 23:12:26 +02:00
|
|
|
options: [
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'continue', label: 'Continue to context sources' },
|
2026-05-24 19:29:37 +02:00
|
|
|
{ value: 'skip-sources', label: 'Skip context sources' },
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'edit', label: 'Edit an existing database' },
|
|
|
|
|
{ value: 'add', label: 'Add another database' },
|
2026-05-10 23:12:26 +02:00
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('returns from database edit selection back to the configured source menu', async () => {
|
2026-05-13 17:22:59 -04:00
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - warehouse',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
await writeKtxSetupState(tempDir, { completed_steps: ['databases'] });
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['edit', 'back', 'continue'],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] });
|
|
|
|
|
expect(prompts.select).toHaveBeenNthCalledWith(2, {
|
2026-05-14 01:43:06 +02:00
|
|
|
message: 'Database to edit',
|
2026-05-13 17:22:59 -04:00
|
|
|
options: [
|
|
|
|
|
{ value: 'warehouse', label: 'warehouse (PostgreSQL)' },
|
|
|
|
|
{ value: 'back', label: 'Back' },
|
|
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
expect(prompts.select).toHaveBeenNthCalledWith(3, {
|
2026-05-13 19:51:24 -04:00
|
|
|
message: 'Databases configured: warehouse\nWhat would you like to do?',
|
2026-05-13 17:22:59 -04:00
|
|
|
options: [
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'continue', label: 'Continue to context sources' },
|
2026-05-24 19:29:37 +02:00
|
|
|
{ value: 'skip-sources', label: 'Skip context sources' },
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'edit', label: 'Edit an existing database' },
|
|
|
|
|
{ value: 'add', label: 'Add another database' },
|
2026-05-13 17:22:59 -04:00
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
expect(testConnection).not.toHaveBeenCalled();
|
|
|
|
|
expect(scanConnection).not.toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('reruns table selection after editing schema scope so stale enabled tables are removed', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
' schemas:',
|
|
|
|
|
' - public',
|
|
|
|
|
' enabled_tables:',
|
|
|
|
|
' - public.orders',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - warehouse',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
await writeKtxSetupState(tempDir, { completed_steps: ['databases'] });
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
textValues: ['env:DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
let primaryMenuCount = 0;
|
|
|
|
|
vi.mocked(prompts.select).mockImplementation(async (options) => {
|
2026-05-13 19:51:24 -04:00
|
|
|
if (options.message === 'Databases configured: warehouse\nWhat would you like to do?') {
|
2026-05-13 17:22:59 -04:00
|
|
|
primaryMenuCount += 1;
|
|
|
|
|
return primaryMenuCount === 1 ? 'edit' : 'continue';
|
|
|
|
|
}
|
2026-05-14 01:43:06 +02:00
|
|
|
if (options.message === 'Database to edit') return 'warehouse';
|
2026-05-13 17:22:59 -04:00
|
|
|
if (options.message === 'How do you want to connect to PostgreSQL?') return 'url';
|
2026-05-14 01:43:06 +02:00
|
|
|
if (options.message.startsWith('Enable query-history ingest')) return 'no';
|
2026-05-13 17:22:59 -04:00
|
|
|
return 'back';
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
const listSchemas = vi.fn(async () => ['analytics', 'public']);
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
const listTables = vi.fn(async () => [{ catalog: null, schema: 'analytics', name: 'customers', kind: 'table' as const }]);
|
2026-05-13 18:41:44 -04:00
|
|
|
const pickers = makePickerStubs({
|
|
|
|
|
scopes: [{ schemas: ['analytics'], tables: ['analytics.customers'] }],
|
|
|
|
|
});
|
2026-05-13 17:22:59 -04:00
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
2026-05-13 18:41:44 -04:00
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection,
|
|
|
|
|
scanConnection,
|
|
|
|
|
listSchemas,
|
|
|
|
|
listTables,
|
|
|
|
|
pickDatabaseScope: pickers.pickDatabaseScope,
|
|
|
|
|
},
|
2026-05-13 17:22:59 -04:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] });
|
|
|
|
|
expect(prompts.text).toHaveBeenCalledWith({
|
|
|
|
|
message: textInputPrompt('PostgreSQL connection URL'),
|
|
|
|
|
placeholder: 'env:DATABASE_URL',
|
|
|
|
|
initialValue: 'env:DATABASE_URL',
|
|
|
|
|
});
|
2026-05-22 14:22:11 +02:00
|
|
|
expect(listTables).toHaveBeenCalledWith(tempDir, 'warehouse', ['analytics']);
|
2026-05-13 17:22:59 -04:00
|
|
|
expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
|
|
|
|
|
expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
|
|
|
|
schemas: ['analytics'],
|
|
|
|
|
enabled_tables: ['analytics.customers'],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('preselects existing schema and table choices when editing a database', async () => {
|
2026-05-13 17:22:59 -04:00
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
' schemas:',
|
|
|
|
|
' - public',
|
|
|
|
|
' enabled_tables:',
|
|
|
|
|
' - public.customers',
|
|
|
|
|
' - public.orders',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - warehouse',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
await writeKtxSetupState(tempDir, { completed_steps: ['databases'] });
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
textValues: ['env:DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
let primaryMenuCount = 0;
|
|
|
|
|
vi.mocked(prompts.select).mockImplementation(async (options) => {
|
2026-05-13 19:51:24 -04:00
|
|
|
if (options.message === 'Databases configured: warehouse\nWhat would you like to do?') {
|
2026-05-13 17:22:59 -04:00
|
|
|
primaryMenuCount += 1;
|
|
|
|
|
return primaryMenuCount === 1 ? 'edit' : 'continue';
|
|
|
|
|
}
|
2026-05-14 01:43:06 +02:00
|
|
|
if (options.message === 'Database to edit') return 'warehouse';
|
2026-05-13 17:22:59 -04:00
|
|
|
if (options.message === 'How do you want to connect to PostgreSQL?') return 'url';
|
2026-05-14 01:43:06 +02:00
|
|
|
if (options.message.startsWith('Enable query-history ingest')) return 'no';
|
2026-05-13 17:22:59 -04:00
|
|
|
return 'back';
|
|
|
|
|
});
|
|
|
|
|
const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']);
|
|
|
|
|
const listTables = vi.fn(async () => [
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
{ catalog: null, schema: 'public', name: 'customers', kind: 'table' as const },
|
|
|
|
|
{ catalog: null, schema: 'public', name: 'orders', kind: 'table' as const },
|
|
|
|
|
{ catalog: null, schema: 'public', name: 'products', kind: 'table' as const },
|
2026-05-13 17:22:59 -04:00
|
|
|
]);
|
2026-05-13 18:41:44 -04:00
|
|
|
const pickers = makePickerStubs({
|
|
|
|
|
scopes: [{ schemas: ['public'], tables: ['public.customers', 'public.orders'] }],
|
|
|
|
|
});
|
2026-05-13 17:22:59 -04:00
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
listSchemas,
|
|
|
|
|
listTables,
|
2026-05-13 18:41:44 -04:00
|
|
|
pickDatabaseScope: pickers.pickDatabaseScope,
|
2026-05-13 17:22:59 -04:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] });
|
2026-05-13 18:41:44 -04:00
|
|
|
expect(pickers.scopeCalls).toHaveLength(1);
|
|
|
|
|
expect(pickers.scopeCalls[0]).toMatchObject({
|
|
|
|
|
connectionId: 'warehouse',
|
|
|
|
|
schemaNoun: 'schema',
|
|
|
|
|
supportsSchemaScope: true,
|
|
|
|
|
existing: { enabledTables: ['public.customers', 'public.orders'] },
|
2026-05-13 17:22:59 -04:00
|
|
|
});
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
|
|
|
|
schemas: ['public'],
|
|
|
|
|
enabled_tables: ['public.customers', 'public.orders'],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('returns to the configured primary menu when backing out of schema review during edit', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
' schemas:',
|
|
|
|
|
' - public',
|
|
|
|
|
' enabled_tables:',
|
|
|
|
|
' - public.orders',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - warehouse',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
await writeKtxSetupState(tempDir, { completed_steps: ['databases'] });
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
textValues: ['env:DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
let primaryMenuCount = 0;
|
|
|
|
|
vi.mocked(prompts.select).mockImplementation(async (options) => {
|
2026-05-13 19:51:24 -04:00
|
|
|
if (options.message === 'Databases configured: warehouse\nWhat would you like to do?') {
|
2026-05-13 17:22:59 -04:00
|
|
|
primaryMenuCount += 1;
|
|
|
|
|
return primaryMenuCount === 1 ? 'edit' : 'continue';
|
|
|
|
|
}
|
2026-05-14 01:43:06 +02:00
|
|
|
if (options.message === 'Database to edit') return 'warehouse';
|
2026-05-13 17:22:59 -04:00
|
|
|
if (options.message === 'How do you want to connect to PostgreSQL?') return 'url';
|
2026-05-14 01:43:06 +02:00
|
|
|
if (options.message.startsWith('Enable query-history ingest')) return 'no';
|
2026-05-13 17:22:59 -04:00
|
|
|
return 'back';
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
const listSchemas = vi.fn(async () => ['analytics', 'public']);
|
2026-05-13 18:41:44 -04:00
|
|
|
const listTables = vi.fn(async () => [
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
{ catalog: null, schema: 'analytics', name: 'customers', kind: 'table' as const },
|
|
|
|
|
{ catalog: null, schema: 'public', name: 'orders', kind: 'table' as const },
|
2026-05-13 18:41:44 -04:00
|
|
|
]);
|
|
|
|
|
const pickers = makePickerStubs({ scopes: ['back'] });
|
2026-05-13 17:22:59 -04:00
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
2026-05-13 18:41:44 -04:00
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection,
|
|
|
|
|
scanConnection,
|
|
|
|
|
listSchemas,
|
|
|
|
|
listTables,
|
|
|
|
|
pickDatabaseScope: pickers.pickDatabaseScope,
|
|
|
|
|
},
|
2026-05-13 17:22:59 -04:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] });
|
|
|
|
|
expect(primaryMenuCount).toBe(2);
|
|
|
|
|
expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
|
|
|
|
|
expect(scanConnection).not.toHaveBeenCalled();
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
|
|
|
|
url: 'env:DATABASE_URL',
|
|
|
|
|
schemas: ['public'],
|
|
|
|
|
enabled_tables: ['public.orders'],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('returns to the configured primary menu when backing out of table review during edit', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
' schemas:',
|
|
|
|
|
' - public',
|
|
|
|
|
' enabled_tables:',
|
|
|
|
|
' - public.orders',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - warehouse',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
await writeKtxSetupState(tempDir, { completed_steps: ['databases'] });
|
|
|
|
|
const prompts = makePromptAdapter({ textValues: ['env:DATABASE_URL'] });
|
|
|
|
|
let primaryMenuCount = 0;
|
|
|
|
|
vi.mocked(prompts.select).mockImplementation(async (options) => {
|
2026-05-13 19:51:24 -04:00
|
|
|
if (options.message === 'Databases configured: warehouse\nWhat would you like to do?') {
|
2026-05-13 17:22:59 -04:00
|
|
|
primaryMenuCount += 1;
|
|
|
|
|
return primaryMenuCount === 1 ? 'edit' : 'continue';
|
|
|
|
|
}
|
2026-05-14 01:43:06 +02:00
|
|
|
if (options.message === 'Database to edit') return 'warehouse';
|
2026-05-13 17:22:59 -04:00
|
|
|
if (options.message === 'How do you want to connect to PostgreSQL?') return 'url';
|
2026-05-14 01:43:06 +02:00
|
|
|
if (options.message.startsWith('Enable query-history ingest')) return 'no';
|
2026-05-13 17:22:59 -04:00
|
|
|
return 'back';
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
const listSchemas = vi.fn(async () => ['public']);
|
|
|
|
|
const listTables = vi.fn(async () => [
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
{ catalog: null, schema: 'public', name: 'customers', kind: 'table' as const },
|
|
|
|
|
{ catalog: null, schema: 'public', name: 'orders', kind: 'table' as const },
|
2026-05-13 17:22:59 -04:00
|
|
|
]);
|
2026-05-22 14:22:11 +02:00
|
|
|
const pickers = makePickerStubs({ scopes: [{ schemas: ['public'], tables: 'back' }] });
|
2026-05-13 17:22:59 -04:00
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
2026-05-13 18:41:44 -04:00
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection,
|
|
|
|
|
scanConnection,
|
|
|
|
|
listSchemas,
|
|
|
|
|
listTables,
|
|
|
|
|
pickDatabaseScope: pickers.pickDatabaseScope,
|
|
|
|
|
},
|
2026-05-13 17:22:59 -04:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] });
|
|
|
|
|
expect(primaryMenuCount).toBe(2);
|
2026-05-13 18:41:44 -04:00
|
|
|
expect(listTables).toHaveBeenCalledWith(tempDir, 'warehouse', ['public']);
|
2026-05-13 17:22:59 -04:00
|
|
|
expect(scanConnection).not.toHaveBeenCalled();
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
|
|
|
|
url: 'env:DATABASE_URL',
|
|
|
|
|
schemas: ['public'],
|
|
|
|
|
enabled_tables: ['public.orders'],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('restores an existing database edit when the follow-up scan fails', async () => {
|
2026-05-13 17:22:59 -04:00
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
' schemas:',
|
|
|
|
|
' - public',
|
|
|
|
|
' enabled_tables:',
|
|
|
|
|
' - public.orders',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - warehouse',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
await writeKtxSetupState(tempDir, { completed_steps: ['databases'] });
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
textValues: ['env:DATABASE_URL'],
|
|
|
|
|
});
|
2026-06-03 13:08:46 +02:00
|
|
|
let primaryMenuCount = 0;
|
2026-05-13 17:22:59 -04:00
|
|
|
vi.mocked(prompts.select).mockImplementation(async (options) => {
|
2026-06-03 13:08:46 +02:00
|
|
|
if (options.message === 'Databases configured: warehouse\nWhat would you like to do?') {
|
|
|
|
|
primaryMenuCount += 1;
|
|
|
|
|
return primaryMenuCount === 1 ? 'edit' : 'continue';
|
|
|
|
|
}
|
2026-05-14 01:43:06 +02:00
|
|
|
if (options.message === 'Database to edit') return 'warehouse';
|
2026-05-13 17:22:59 -04:00
|
|
|
if (options.message === 'How do you want to connect to PostgreSQL?') return 'url';
|
2026-05-14 01:43:06 +02:00
|
|
|
if (options.message.startsWith('Enable query-history ingest')) return 'no';
|
2026-06-03 13:08:46 +02:00
|
|
|
if (options.message === 'Connection setup failed for warehouse') return 'back';
|
2026-05-13 17:22:59 -04:00
|
|
|
return 'back';
|
|
|
|
|
});
|
|
|
|
|
const listTables = vi.fn(async () => [
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
{ catalog: null, schema: 'public', name: 'customers', kind: 'table' as const },
|
|
|
|
|
{ catalog: null, schema: 'public', name: 'orders', kind: 'table' as const },
|
2026-05-13 17:22:59 -04:00
|
|
|
]);
|
2026-05-13 18:41:44 -04:00
|
|
|
const pickers = makePickerStubs({ scopes: ['enable-all'] });
|
2026-05-13 17:22:59 -04:00
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 1),
|
|
|
|
|
listTables,
|
2026-05-13 18:41:44 -04:00
|
|
|
pickDatabaseScope: pickers.pickDatabaseScope,
|
2026-05-13 17:22:59 -04:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
2026-06-03 13:08:46 +02:00
|
|
|
expect(result).toEqual({ status: 'ready', projectDir: tempDir, connectionIds: ['warehouse'] });
|
2026-05-13 17:22:59 -04:00
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
|
|
|
|
enabled_tables: ['public.orders'],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-06-03 13:08:46 +02:00
|
|
|
it('recovers from an interactive database edit failure by re-entering details', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' analytics:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:OLD_DATABASE_URL',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - analytics',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['edit', 'analytics', 'url', 'no', 're-enter', 'url', 'no', 'continue'],
|
|
|
|
|
textValues: ['env:BAD_DATABASE_URL', 'env:FIXED_DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
let attempts = 0;
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => {
|
|
|
|
|
attempts += 1;
|
|
|
|
|
return attempts === 1 ? 1 : 0;
|
|
|
|
|
}),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
listSchemas: vi.fn(async () => ['public']),
|
|
|
|
|
listTables: vi.fn(async () => [{ catalog: null, schema: 'public', name: 'orders', kind: 'table' as const }]),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(vi.mocked(prompts.select)).toHaveBeenCalledWith(
|
|
|
|
|
expect.objectContaining({
|
|
|
|
|
message: 'Connection setup failed for analytics',
|
|
|
|
|
options: expect.arrayContaining([
|
|
|
|
|
{ value: 'retry', label: 'Retry connection test' },
|
|
|
|
|
{ value: 're-enter', label: 'Re-enter connection details' },
|
|
|
|
|
{ value: 'back', label: 'Back' },
|
|
|
|
|
]),
|
|
|
|
|
}),
|
|
|
|
|
);
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.analytics).toMatchObject({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
url: 'env:FIXED_DATABASE_URL',
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('re-enters details after an interactive existing database validation failure', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:OLD_DATABASE_URL',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['existing:warehouse', 'no', 're-enter', 'url', 'no'],
|
|
|
|
|
textValues: ['env:FIXED_DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
let attempts = 0;
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => {
|
|
|
|
|
attempts += 1;
|
|
|
|
|
return attempts === 1 ? 1 : 0;
|
|
|
|
|
}),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
listSchemas: vi.fn(async () => ['public']),
|
|
|
|
|
listTables: vi.fn(async () => [
|
|
|
|
|
{ catalog: null, schema: 'public', name: 'orders', kind: 'table' as const },
|
|
|
|
|
]),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(vi.mocked(prompts.select)).toHaveBeenCalledWith({
|
|
|
|
|
message: 'How do you want to connect to PostgreSQL?',
|
|
|
|
|
options: [
|
|
|
|
|
{ value: 'url', label: 'Paste a connection URL' },
|
|
|
|
|
{ value: 'fields', label: 'Enter connection details (host, port, database, user)' },
|
|
|
|
|
{ value: 'back', label: 'Back' },
|
|
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
expect(vi.mocked(prompts.select)).toHaveBeenCalledWith(
|
|
|
|
|
expect.objectContaining({
|
|
|
|
|
message: 'Connection setup failed for warehouse',
|
|
|
|
|
options: expect.arrayContaining([
|
|
|
|
|
{ value: 'retry', label: 'Retry connection test' },
|
|
|
|
|
{ value: 're-enter', label: 'Re-enter connection details' },
|
|
|
|
|
{ value: 'skip', label: 'Skip this connection' },
|
|
|
|
|
{ value: 'back', label: 'Back' },
|
|
|
|
|
]),
|
|
|
|
|
}),
|
|
|
|
|
);
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
url: 'env:FIXED_DATABASE_URL',
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('restores the previous database config when backing out of a failed edit', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' analytics:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:OLD_DATABASE_URL',
|
|
|
|
|
'setup:',
|
|
|
|
|
' database_connection_ids:',
|
|
|
|
|
' - analytics',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['edit', 'analytics', 'url', 'no', 'back', 'continue'],
|
|
|
|
|
textValues: ['env:BAD_DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 1),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.analytics).toMatchObject({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
url: 'env:OLD_DATABASE_URL',
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('keeps scripted database setup fail-fast without rolling back attempted config', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' analytics:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:OLD_DATABASE_URL',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseConnectionIds: ['analytics'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
enableQueryHistory: true,
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 1),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('failed');
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.analytics).toMatchObject({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
url: 'env:OLD_DATABASE_URL',
|
|
|
|
|
context: {
|
|
|
|
|
queryHistory: {
|
|
|
|
|
enabled: true,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('keeps scripted database ids fail-fast even when input mode is auto', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' analytics:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:OLD_DATABASE_URL',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({});
|
|
|
|
|
vi.mocked(prompts.select).mockImplementation(async ({ message }) => {
|
|
|
|
|
if (message === 'Connection setup failed for analytics') {
|
|
|
|
|
throw new Error('scripted selected-id setup opened the recovery menu');
|
|
|
|
|
}
|
|
|
|
|
return 'finish';
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseConnectionIds: ['analytics'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
enableQueryHistory: true,
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 1),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('failed');
|
|
|
|
|
expect(prompts.select).not.toHaveBeenCalledWith(
|
|
|
|
|
expect.objectContaining({ message: 'Connection setup failed for analytics' }),
|
|
|
|
|
);
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.analytics).toMatchObject({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
url: 'env:OLD_DATABASE_URL',
|
|
|
|
|
context: {
|
|
|
|
|
queryHistory: {
|
|
|
|
|
enabled: true,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
it('lets Escape from connection fields return to connection method selection', async () => {
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['fields', 'url'],
|
|
|
|
|
textValues: ['', undefined, 'env:DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
2026-05-14 01:43:06 +02:00
|
|
|
disableQueryHistory: true,
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-14 01:43:06 +02:00
|
|
|
const selectMessages = vi.mocked(prompts.select).mock.calls.map(([options]) => options.message);
|
|
|
|
|
expect(selectMessages.filter((message) => message === 'How do you want to connect to PostgreSQL?')).toHaveLength(2);
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(testConnection).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', expect.anything());
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('explains where Back goes after missing PostgreSQL field input', async () => {
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
multiselectValues: [['postgres'], ['back']],
|
|
|
|
|
selectValues: ['fields', 'back'],
|
|
|
|
|
textValues: ['', 'db.example.com', '5432', ''],
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('back');
|
|
|
|
|
expect(prompts.select).toHaveBeenNthCalledWith(2, {
|
|
|
|
|
message:
|
|
|
|
|
'Some PostgreSQL connection details are missing.\n' +
|
2026-05-14 01:43:06 +02:00
|
|
|
'Continue entering details, or go back to database selection.',
|
2026-05-10 23:12:26 +02:00
|
|
|
options: [
|
|
|
|
|
{ value: 'retry', label: 'Continue entering PostgreSQL details' },
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'back', label: 'Back to database selection' },
|
2026-05-10 23:12:26 +02:00
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('lets Escape from connection name return to database selection', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
multiselectValues: [['postgres'], ['back']],
|
|
|
|
|
textValues: [undefined],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('back');
|
|
|
|
|
expect(prompts.multiselect).toHaveBeenCalledTimes(2);
|
|
|
|
|
expect(prompts.select).not.toHaveBeenCalled();
|
|
|
|
|
expect(testConnection).not.toHaveBeenCalled();
|
|
|
|
|
expect(scanConnection).not.toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
it('builds a Postgres connection from individual fields and stores password in .ktx/secrets', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['fields'],
|
|
|
|
|
textValues: ['', 'db.example.com', '', 'analytics', 'readonly'],
|
|
|
|
|
passwordValues: ['s3cret'],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-14 01:43:06 +02:00
|
|
|
const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8');
|
|
|
|
|
const config = parseKtxProjectConfig(configText);
|
2026-05-10 23:12:26 +02:00
|
|
|
const connection = config.connections['postgres-warehouse'];
|
|
|
|
|
expect(connection).toMatchObject({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
host: 'db.example.com',
|
|
|
|
|
port: 5432,
|
|
|
|
|
database: 'analytics',
|
|
|
|
|
username: 'readonly',
|
|
|
|
|
});
|
|
|
|
|
expect(connection.password).toMatch(/^file:/);
|
2026-05-10 23:51:24 +02:00
|
|
|
const secretPath = join(tempDir, '.ktx/secrets/postgres-warehouse-password');
|
2026-05-10 23:12:26 +02:00
|
|
|
await expect(readFile(secretPath, 'utf-8')).resolves.toBe('s3cret\n');
|
|
|
|
|
if (process.platform !== 'win32') {
|
|
|
|
|
expect((await stat(secretPath)).mode & 0o777).toBe(0o600);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
it('stores credential-bearing pasted URLs in .ktx/secrets automatically', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'postgresql://myuser:s3cret@db.example.com:5432/analytics'], // pragma: allowlist secret
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-14 01:43:06 +02:00
|
|
|
const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8');
|
|
|
|
|
const config = parseKtxProjectConfig(configText);
|
2026-05-10 23:12:26 +02:00
|
|
|
const connection = config.connections['postgres-warehouse'];
|
2026-05-10 23:51:24 +02:00
|
|
|
expect(connection.url).toBe(`file:${resolve(tempDir, '.ktx/secrets/postgres-warehouse-url')}`);
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(connection.driver).toBe('postgres');
|
2026-05-10 23:51:24 +02:00
|
|
|
const secretContent = await readFile(join(tempDir, '.ktx/secrets/postgres-warehouse-url'), 'utf-8');
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(secretContent).toBe('postgresql://myuser:s3cret@db.example.com:5432/analytics\n'); // pragma: allowlist secret
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('summarizes connection test and structural scan output during setup', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'env:DATABASE_URL'],
|
|
|
|
|
});
|
2026-05-10 23:51:24 +02:00
|
|
|
const testConnection = vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KtxCliIo) => {
|
2026-05-10 23:12:26 +02:00
|
|
|
commandIo.stdout.write('Connection test passed: postgres-warehouse\n');
|
|
|
|
|
commandIo.stdout.write('Driver: postgres\n');
|
2026-05-14 16:21:18 +02:00
|
|
|
commandIo.stdout.write('Status: ok\n');
|
2026-05-10 23:12:26 +02:00
|
|
|
return 0;
|
|
|
|
|
});
|
2026-05-10 23:51:24 +02:00
|
|
|
const scanConnection = vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KtxCliIo) => {
|
2026-05-14 01:43:06 +02:00
|
|
|
commandIo.stdout.write('Scanning postgres-warehouse for context. Large databases can take a while.\n');
|
2026-05-10 23:12:26 +02:00
|
|
|
commandIo.stdout.write('[5%] Preparing scan\n');
|
|
|
|
|
commandIo.stdout.write('[15%] Inspecting database schema\n');
|
|
|
|
|
commandIo.stdout.write('[55%] Semantic layer comparison found 2 changes across 2 tables\n');
|
|
|
|
|
commandIo.stdout.write('[70%] Writing schema artifacts\n');
|
|
|
|
|
commandIo.stdout.write('[100%] Scan completed\n');
|
2026-06-11 13:49:45 +02:00
|
|
|
commandIo.stdout.write('✓ ktx scan completed\n');
|
2026-05-10 23:12:26 +02:00
|
|
|
commandIo.stdout.write('Status: done\n');
|
|
|
|
|
commandIo.stdout.write('Run: local-moywh3ky\n');
|
|
|
|
|
commandIo.stdout.write('Connection: postgres-warehouse\n');
|
|
|
|
|
commandIo.stdout.write('Mode: structural\n');
|
|
|
|
|
commandIo.stdout.write('Sync: 2026-05-09-221301-local-moywh3ky\n');
|
|
|
|
|
commandIo.stdout.write('Dry run: no\n\n');
|
|
|
|
|
commandIo.stdout.write('What changed\n');
|
|
|
|
|
commandIo.stdout.write(' Semantic layer comparison found 2 changes across 2 tables\n');
|
|
|
|
|
commandIo.stdout.write(' New tables: 2\n');
|
|
|
|
|
commandIo.stdout.write(' Changed tables: 0\n');
|
|
|
|
|
commandIo.stdout.write(' Removed tables: 0\n');
|
|
|
|
|
commandIo.stdout.write(' Unchanged tables: 0\n\n');
|
|
|
|
|
commandIo.stdout.write('Needs attention\n');
|
|
|
|
|
commandIo.stdout.write(' None\n\n');
|
|
|
|
|
commandIo.stdout.write('Artifacts\n');
|
|
|
|
|
commandIo.stdout.write(
|
|
|
|
|
' Report: raw-sources/postgres-warehouse/live-database/2026-05-09-221301-local-moywh3ky/scan-report.json\n',
|
|
|
|
|
);
|
|
|
|
|
commandIo.stdout.write(' Raw sources: raw-sources/postgres-warehouse/live-database/2026-05-09-221301-local-moywh3ky\n');
|
|
|
|
|
commandIo.stdout.write(' Schema shards: 1\n\n');
|
|
|
|
|
commandIo.stdout.write('Next:\n');
|
2026-05-13 12:00:08 +02:00
|
|
|
commandIo.stdout.write(` ktx status --project-dir ${tempDir} local-moywh3ky\n`);
|
2026-05-10 23:12:26 +02:00
|
|
|
return 0;
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(io.stdout()).toContain(
|
|
|
|
|
[
|
|
|
|
|
'◇ Testing postgres-warehouse',
|
|
|
|
|
'│ ✓ Connection test passed',
|
2026-05-12 17:14:56 -07:00
|
|
|
'│ Driver: PostgreSQL',
|
2026-05-10 23:12:26 +02:00
|
|
|
'│',
|
2026-05-12 16:56:58 -04:00
|
|
|
].join('\n'),
|
|
|
|
|
);
|
2026-05-12 17:14:56 -07:00
|
|
|
expect(io.stdout()).not.toContain('Tables: 2');
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(io.stdout()).toContain('◇ Building schema context for postgres-warehouse');
|
2026-05-29 17:41:04 +02:00
|
|
|
expect(io.stdout()).toContain('│ Running database scan…');
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(io.stdout()).toContain('◇ Schema context complete for postgres-warehouse');
|
|
|
|
|
expect(io.stdout()).toContain('│ Changes: 2 new tables');
|
|
|
|
|
expect(io.stdout()).toContain('◇ Database ready');
|
|
|
|
|
expect(io.stdout()).not.toContain(['Primary source', 'ready'].join(' '));
|
|
|
|
|
expect(io.stdout()).toContain('│ postgres-warehouse · PostgreSQL · schema context complete');
|
|
|
|
|
expect(io.stdout()).not.toContain('Scanning postgres-warehouse');
|
|
|
|
|
expect(io.stdout()).not.toContain('Scan complete for postgres-warehouse');
|
|
|
|
|
expect(io.stdout()).not.toContain('structural scan complete');
|
|
|
|
|
expect(io.stdout()).not.toContain('Report: raw-sources');
|
|
|
|
|
expect(io.stdout()).not.toContain('live-database');
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(io.stdout()).not.toContain('[5%] Preparing scan');
|
|
|
|
|
expect(io.stdout()).not.toContain('What changed');
|
|
|
|
|
expect(io.stdout()).not.toContain('Next:');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('normalizes $ENV_VAR syntax to env: references in pasted URLs', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', '$DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-10 23:51:24 +02:00
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.connections['postgres-warehouse']).toMatchObject({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
url: 'env:DATABASE_URL',
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-22 14:22:11 +02:00
|
|
|
it('offers schema scope discovery for MySQL and writes selected schemas', async () => {
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
multiselectValues: [['mysql']],
|
|
|
|
|
selectValues: ['url', 'continue'],
|
|
|
|
|
textValues: ['mysql-warehouse', 'mysql://reader@localhost/analytics'],
|
|
|
|
|
});
|
|
|
|
|
const listSchemas = vi.fn(async () => ['analytics', 'mart']);
|
|
|
|
|
const listTables = vi.fn(async (_projectDir: string, _connectionId: string, schemas?: string[]) =>
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
(schemas ?? []).map((schema) => ({ catalog: null, schema, name: 'orders', kind: 'table' as const })),
|
2026-05-22 14:22:11 +02:00
|
|
|
);
|
|
|
|
|
const pickDatabaseScope = vi.fn(async (args: PickDatabaseScopeArgs) => {
|
|
|
|
|
const scopedArgs = args as PickDatabaseScopeArgs & {
|
|
|
|
|
schemaSuggestion: { suggested: Set<string> };
|
|
|
|
|
};
|
|
|
|
|
expect(args.schemaNoun).toBe('database');
|
|
|
|
|
expect(args.schemas).toEqual(['analytics', 'mart']);
|
|
|
|
|
expect(scopedArgs.schemaSuggestion.suggested).toEqual(new Set(['analytics', 'mart']));
|
|
|
|
|
return { kind: 'selected' as const, activeSchemas: ['mart'], enabledTables: ['mart.orders'] };
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
await runKtxSetupDatabasesStep(
|
|
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts, testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), listSchemas, listTables, pickDatabaseScope },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const project = await loadKtxProject({ projectDir: tempDir });
|
|
|
|
|
expect(project.config.connections['mysql-warehouse']).toMatchObject({
|
|
|
|
|
driver: 'mysql',
|
|
|
|
|
schemas: ['mart'],
|
|
|
|
|
enabled_tables: ['mart.orders'],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('maps ClickHouse scripted database schema input to databases and preserves database', async () => {
|
|
|
|
|
await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
databaseDrivers: ['clickhouse'],
|
|
|
|
|
databaseConnectionId: 'clickhouse-warehouse',
|
|
|
|
|
databaseUrl: 'clickhouse://reader@localhost/analytics',
|
|
|
|
|
databaseSchemas: ['analytics', 'mart'],
|
|
|
|
|
},
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0) },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const project = await loadKtxProject({ projectDir: tempDir });
|
|
|
|
|
expect(project.config.connections['clickhouse-warehouse']).toMatchObject({
|
|
|
|
|
driver: 'clickhouse',
|
|
|
|
|
database: 'analytics',
|
|
|
|
|
databases: ['analytics', 'mart'],
|
|
|
|
|
});
|
|
|
|
|
expect(project.config.connections['clickhouse-warehouse']).not.toHaveProperty('schemas');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('does not prompt for a bootstrap BigQuery dataset before scope discovery', async () => {
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
multiselectValues: [['bigquery']],
|
|
|
|
|
selectValues: ['no', 'continue'],
|
|
|
|
|
textValues: ['bigquery-warehouse', '/tmp/service-account.json', 'US'],
|
|
|
|
|
});
|
|
|
|
|
const listSchemas = vi.fn(async () => ['analytics']);
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
const listTables = vi.fn(async () => [{ catalog: 'project-1', schema: 'analytics', name: 'orders', kind: 'table' as const }]);
|
2026-05-22 14:22:11 +02:00
|
|
|
const pickDatabaseScope = vi.fn(async () => ({
|
|
|
|
|
kind: 'selected' as const,
|
|
|
|
|
activeSchemas: ['analytics'],
|
|
|
|
|
enabledTables: ['analytics.orders'],
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
await runKtxSetupDatabasesStep(
|
|
|
|
|
{ projectDir: tempDir, inputMode: 'auto', skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{ prompts, testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0), listSchemas, listTables, pickDatabaseScope },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const textMessages = vi.mocked(prompts.text).mock.calls.map(([options]) => options.message);
|
|
|
|
|
expect(textMessages).not.toContain(textInputPrompt('BigQuery dataset\nFor example analytics.'));
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:13:17 -07:00
|
|
|
it('prompts for discovered Postgres schemas before the first scan', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'env:DATABASE_URL'],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async asyncScanProjectDir => {
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(asyncScanProjectDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections['postgres-warehouse']).toMatchObject({
|
|
|
|
|
schemas: ['orbit_analytics', 'orbit_raw'],
|
|
|
|
|
});
|
|
|
|
|
return 0;
|
|
|
|
|
});
|
|
|
|
|
const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']);
|
2026-05-13 18:41:44 -04:00
|
|
|
const listTables = vi.fn(async () => [
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
{ catalog: null, schema: 'orbit_analytics', name: 'events', kind: 'table' as const },
|
|
|
|
|
{ catalog: null, schema: 'orbit_raw', name: 'inputs', kind: 'table' as const },
|
|
|
|
|
{ catalog: null, schema: 'public', name: 'misc', kind: 'table' as const },
|
2026-05-13 18:41:44 -04:00
|
|
|
]);
|
|
|
|
|
const pickers = makePickerStubs({
|
|
|
|
|
scopes: [
|
|
|
|
|
{
|
|
|
|
|
schemas: ['orbit_analytics', 'orbit_raw'],
|
|
|
|
|
tables: ['orbit_analytics.events', 'orbit_raw.inputs'],
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
});
|
2026-05-10 23:13:17 -07:00
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
2026-05-13 18:41:44 -04:00
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection,
|
|
|
|
|
scanConnection,
|
|
|
|
|
listSchemas,
|
|
|
|
|
listTables,
|
|
|
|
|
pickDatabaseScope: pickers.pickDatabaseScope,
|
|
|
|
|
},
|
2026-05-10 23:13:17 -07:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(listSchemas).toHaveBeenCalledWith(tempDir, 'postgres-warehouse');
|
2026-05-13 18:41:44 -04:00
|
|
|
expect(pickers.scopeCalls).toHaveLength(1);
|
|
|
|
|
expect(pickers.scopeCalls[0]).toMatchObject({
|
|
|
|
|
connectionId: 'postgres-warehouse',
|
|
|
|
|
schemaNoun: 'schema',
|
|
|
|
|
schemaNounPlural: 'schemas',
|
2026-05-22 14:22:11 +02:00
|
|
|
schemas: ['orbit_analytics', 'orbit_raw', 'public'],
|
|
|
|
|
schemaSuggestion: { excluded: new Set(), suggested: new Set() },
|
2026-05-10 23:13:17 -07:00
|
|
|
});
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections['postgres-warehouse']).toMatchObject({
|
|
|
|
|
schemas: ['orbit_analytics', 'orbit_raw'],
|
|
|
|
|
});
|
2026-05-12 16:56:58 -04:00
|
|
|
expect(io.stdout()).toContain('✓ orbit_analytics, orbit_raw');
|
2026-05-10 23:13:17 -07:00
|
|
|
});
|
|
|
|
|
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
it('falls back to comma-separated free-text when listSchemas fails interactively', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['url'],
|
|
|
|
|
textValues: ['', 'env:DATABASE_URL', 'orbit_analytics, orbit_raw'],
|
|
|
|
|
});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
const listSchemas = vi.fn(async () => {
|
|
|
|
|
throw new Error('permission denied to list schemas');
|
|
|
|
|
});
|
|
|
|
|
const listTables = vi.fn(async (_projectDir: string, _connectionId: string, schemas?: string[]) =>
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
(schemas ?? []).map((schema) => ({ catalog: null, schema, name: 'events', kind: 'table' as const })),
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
);
|
|
|
|
|
const pickers = makePickerStubs({
|
|
|
|
|
scopes: [
|
|
|
|
|
{
|
|
|
|
|
schemas: ['orbit_analytics', 'orbit_raw'],
|
|
|
|
|
tables: ['orbit_analytics.events', 'orbit_raw.events'],
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection,
|
|
|
|
|
scanConnection,
|
|
|
|
|
listSchemas,
|
|
|
|
|
listTables,
|
|
|
|
|
pickDatabaseScope: pickers.pickDatabaseScope,
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(io.stderr()).toContain('Could not discover postgresql schemas');
|
|
|
|
|
expect(vi.mocked(prompts.text).mock.calls.map(([options]) => options.message)).toContain(
|
|
|
|
|
textInputPrompt(
|
|
|
|
|
'Enter schemas for postgres-warehouse as a comma-separated list (e.g. SALES, MARKETING).',
|
|
|
|
|
),
|
|
|
|
|
);
|
|
|
|
|
expect(pickers.scopeCalls[0]).toMatchObject({
|
|
|
|
|
schemas: ['orbit_analytics', 'orbit_raw'],
|
|
|
|
|
initialSchemas: ['orbit_analytics', 'orbit_raw'],
|
|
|
|
|
schemaSuggestion: { suggested: new Set(['orbit_analytics', 'orbit_raw']) },
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-22 14:22:11 +02:00
|
|
|
it('passes schemas and a lazy table callback to the scope picker instead of eager table discovery', async () => {
|
|
|
|
|
const listSchemas = vi.fn(async () => ['analytics', 'raw']);
|
|
|
|
|
const listTables = vi.fn(async (_projectDir: string, _connectionId: string, schemas?: string[]) =>
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
(schemas ?? []).map((schema) => ({ catalog: null, schema, name: 'orders', kind: 'table' as const })),
|
2026-05-22 14:22:11 +02:00
|
|
|
);
|
|
|
|
|
const pickDatabaseScope = vi.fn(async (args: PickDatabaseScopeArgs) => {
|
|
|
|
|
const lazyArgs = args as PickDatabaseScopeArgs & {
|
|
|
|
|
schemas: string[];
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
listTablesForSchemas: (schemas: string[]) => Promise<Array<{ catalog: string | null; schema: string; name: string; kind: 'table' }>>;
|
2026-05-22 14:22:11 +02:00
|
|
|
};
|
|
|
|
|
expect(lazyArgs.schemas).toEqual(['analytics', 'raw']);
|
|
|
|
|
expect(args).not.toHaveProperty('discovered');
|
|
|
|
|
expect(listTables).not.toHaveBeenCalled();
|
|
|
|
|
const tables = await lazyArgs.listTablesForSchemas(['analytics']);
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
expect(tables).toEqual([{ catalog: null, schema: 'analytics', name: 'orders', kind: 'table' }]);
|
2026-05-22 14:22:11 +02:00
|
|
|
return { kind: 'selected' as const, activeSchemas: ['analytics'], enabledTables: ['analytics.orders'] };
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
await runKtxSetupDatabasesStep(
|
|
|
|
|
{ projectDir: tempDir, inputMode: 'auto', databaseDrivers: ['postgres'], skipDatabases: false, databaseSchemas: [] },
|
|
|
|
|
makeIo().io,
|
|
|
|
|
{
|
|
|
|
|
prompts: makePromptAdapter({ selectValues: ['url'], textValues: ['', 'env:DATABASE_URL'] }),
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
listSchemas,
|
|
|
|
|
listTables,
|
|
|
|
|
pickDatabaseScope,
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(listTables).toHaveBeenCalledTimes(1);
|
|
|
|
|
expect(listTables).toHaveBeenCalledWith(tempDir, 'postgres-warehouse', ['analytics']);
|
|
|
|
|
});
|
|
|
|
|
|
2026-06-10 12:36:53 +02:00
|
|
|
it('fails non-interactive setup when a scope-bearing connection has no schema configured', async () => {
|
2026-05-10 23:13:17 -07:00
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
2026-06-10 12:36:53 +02:00
|
|
|
const scanConnection = vi.fn(async () => 0);
|
2026-05-10 23:13:17 -07:00
|
|
|
const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']);
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{ prompts, testConnection, scanConnection, listSchemas },
|
|
|
|
|
);
|
|
|
|
|
|
2026-06-10 12:36:53 +02:00
|
|
|
expect(result.status).toBe('failed');
|
|
|
|
|
expect(listSchemas).not.toHaveBeenCalled();
|
|
|
|
|
expect(scanConnection).not.toHaveBeenCalled();
|
|
|
|
|
expect(io.stderr()).toContain('--database-schema');
|
|
|
|
|
expect(io.stderr()).toContain('connections.warehouse.schemas');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('preserves existing BigQuery dataset_ids in non-interactive setup without rediscovering', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' bigquery-warehouse:',
|
|
|
|
|
' driver: bigquery',
|
|
|
|
|
' dataset_ids:',
|
|
|
|
|
" - 'sales'",
|
|
|
|
|
' credentials_json: env:BIGQUERY_CREDENTIALS_JSON',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
const listSchemas = vi.fn(async () => ['sales', 'stripe', 'posthog', 'linear']);
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseConnectionIds: ['bigquery-warehouse'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{ testConnection, scanConnection, listSchemas },
|
|
|
|
|
);
|
|
|
|
|
|
2026-05-10 23:13:17 -07:00
|
|
|
expect(result.status).toBe('ready');
|
2026-06-10 12:36:53 +02:00
|
|
|
expect(listSchemas).not.toHaveBeenCalled();
|
|
|
|
|
expect(scanConnection).toHaveBeenCalledWith(tempDir, 'bigquery-warehouse', expect.anything());
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections['bigquery-warehouse']).toMatchObject({
|
|
|
|
|
driver: 'bigquery',
|
|
|
|
|
dataset_ids: ['sales'],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('preserves existing Postgres schemas in non-interactive setup without rediscovering', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
' schemas:',
|
|
|
|
|
" - 'analytics'",
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
const listSchemas = vi.fn(async () => ['analytics', 'raw', 'public']);
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseConnectionIds: ['warehouse'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{ testConnection, scanConnection, listSchemas },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(listSchemas).not.toHaveBeenCalled();
|
|
|
|
|
expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
|
2026-05-10 23:13:17 -07:00
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
2026-06-10 12:36:53 +02:00
|
|
|
driver: 'postgres',
|
|
|
|
|
schemas: ['analytics'],
|
2026-05-10 23:13:17 -07:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-10 23:12:26 +02:00
|
|
|
it('adds one non-interactive Postgres URL connection, tests it, scans it, and marks databases complete', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
2026-05-10 23:13:17 -07:00
|
|
|
const listSchemas = vi.fn(async () => ['orbit_analytics', 'orbit_raw', 'public']);
|
2026-05-10 23:12:26 +02:00
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
|
|
|
|
databaseSchemas: ['public'],
|
|
|
|
|
skipDatabases: false,
|
2026-05-14 01:43:06 +02:00
|
|
|
disableQueryHistory: true,
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
io.io,
|
2026-05-10 23:13:17 -07:00
|
|
|
{ testConnection, scanConnection, listSchemas },
|
2026-05-10 23:12:26 +02:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-10 23:13:17 -07:00
|
|
|
expect(listSchemas).not.toHaveBeenCalled();
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
|
|
|
|
|
expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
|
2026-05-10 23:51:24 +02:00
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.connections.warehouse).toEqual({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
url: 'env:DATABASE_URL',
|
|
|
|
|
schemas: ['public'],
|
2026-05-29 17:41:04 +02:00
|
|
|
context: { queryHistory: { enabled: false } },
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
expect(config.setup).toEqual({
|
|
|
|
|
database_connection_ids: ['warehouse'],
|
|
|
|
|
});
|
2026-05-12 16:26:23 -07:00
|
|
|
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('databases');
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(io.stdout()).toContain('Database ready');
|
|
|
|
|
expect(io.stdout()).not.toContain(['Primary source', 'ready'].join(' '));
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(io.stdout()).not.toContain('DATABASE_URL=');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('adds one non-interactive SQLite connection from --database-url without prompting', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({});
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['sqlite'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: './warehouse.sqlite',
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{ prompts, testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(prompts.text).not.toHaveBeenCalled();
|
|
|
|
|
expect(testConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
|
|
|
|
|
expect(scanConnection).toHaveBeenCalledWith(tempDir, 'warehouse', expect.anything());
|
2026-05-10 23:51:24 +02:00
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.connections.warehouse).toEqual({
|
|
|
|
|
driver: 'sqlite',
|
|
|
|
|
path: './warehouse.sqlite',
|
|
|
|
|
});
|
|
|
|
|
expect(config.setup).toEqual({
|
|
|
|
|
database_connection_ids: ['warehouse'],
|
|
|
|
|
});
|
2026-05-12 16:26:23 -07:00
|
|
|
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('databases');
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('selects multiple existing connections and validates each before recording setup ids', async () => {
|
|
|
|
|
await writeFile(
|
2026-05-10 23:51:24 +02:00
|
|
|
join(tempDir, 'ktx.yaml'),
|
2026-05-10 23:12:26 +02:00
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
2026-06-10 12:36:53 +02:00
|
|
|
' schemas:',
|
|
|
|
|
" - 'public'",
|
2026-05-10 23:12:26 +02:00
|
|
|
' analytics:',
|
|
|
|
|
' driver: snowflake',
|
|
|
|
|
' authMethod: password',
|
|
|
|
|
' account: env:SNOWFLAKE_ACCOUNT',
|
|
|
|
|
' warehouse: WH',
|
|
|
|
|
' database: ANALYTICS',
|
|
|
|
|
' schema_name: PUBLIC',
|
|
|
|
|
' username: reader',
|
|
|
|
|
' password: env:SNOWFLAKE_PASSWORD',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const testConnection = vi.fn(async () => 0);
|
|
|
|
|
const scanConnection = vi.fn(async () => 0);
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseConnectionIds: ['warehouse', 'analytics'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{ testConnection, scanConnection },
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(testConnection).toHaveBeenCalledTimes(2);
|
|
|
|
|
expect(scanConnection).toHaveBeenCalledTimes(2);
|
2026-05-10 23:51:24 +02:00
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.setup?.database_connection_ids).toEqual(['warehouse', 'analytics']);
|
2026-05-13 13:55:21 +02:00
|
|
|
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
|
2026-05-12 16:26:23 -07:00
|
|
|
expect((await readKtxSetupState(tempDir)).completed_steps).toContain('databases');
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('keeps the connection config but does not mark databases complete when scanning fails', async () => {
|
|
|
|
|
const io = makeIo();
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
2026-06-10 12:36:53 +02:00
|
|
|
databaseSchemas: ['public'],
|
2026-05-10 23:12:26 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 1),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('failed');
|
2026-05-10 23:51:24 +02:00
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.connections.warehouse).toMatchObject({ driver: 'postgres', url: 'env:DATABASE_URL' });
|
2026-05-13 13:55:21 +02:00
|
|
|
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
|
2026-05-29 17:41:04 +02:00
|
|
|
expect(io.stderr()).toContain('Database scan failed for warehouse.');
|
|
|
|
|
expect(io.stderr()).toContain('│ Database scan failed for warehouse.');
|
|
|
|
|
expect(io.stderr()).toContain(`Debug command: ktx ingest warehouse --project-dir ${tempDir} --debug`);
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(io.stderr()).not.toContain('Structural scan failed for warehouse.');
|
2026-05-29 17:41:04 +02:00
|
|
|
expect(io.stderr()).not.toMatch(/^Database scan failed for warehouse\./m);
|
2026-05-13 15:04:50 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('prints the native SQLite rebuild command when scanning hits a Node ABI mismatch', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
2026-06-10 12:36:53 +02:00
|
|
|
databaseSchemas: ['public'],
|
2026-05-13 15:04:50 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
2026-05-13 15:55:00 +02:00
|
|
|
rebuildNativeSqlite: vi.fn(async () => 1),
|
2026-05-13 15:04:50 +02:00
|
|
|
scanConnection: vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KtxCliIo) => {
|
|
|
|
|
commandIo.stderr.write(
|
|
|
|
|
[
|
|
|
|
|
"The module '/workspace/node_modules/better-sqlite3/build/Release/better_sqlite3.node'",
|
|
|
|
|
'was compiled against a different Node.js version using',
|
|
|
|
|
'NODE_MODULE_VERSION 147. This version of Node.js requires',
|
|
|
|
|
'NODE_MODULE_VERSION 137. Please try re-compiling or re-installing',
|
|
|
|
|
'the module (for instance, using `npm rebuild` or `npm install`).',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
);
|
|
|
|
|
return 1;
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('failed');
|
|
|
|
|
expect(io.stderr()).toContain('Native SQLite is built for a different Node.js ABI.');
|
|
|
|
|
expect(io.stderr()).toContain('│ Native SQLite is built for a different Node.js ABI.');
|
|
|
|
|
expect(io.stderr()).toContain('Fix: pnpm run native:rebuild');
|
2026-05-29 17:41:04 +02:00
|
|
|
expect(io.stderr()).toContain(`Retry: ktx ingest warehouse --project-dir ${tempDir}`);
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(io.stderr()).not.toContain('ktx scan');
|
2026-05-13 15:04:50 +02:00
|
|
|
expect(io.stderr()).not.toContain('npm rebuild');
|
|
|
|
|
expect(io.stderr()).not.toMatch(/^Native SQLite is built for a different Node.js ABI\./m);
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
|
2026-05-13 15:55:00 +02:00
|
|
|
it('rebuilds native SQLite once and retries setup scanning after a Node ABI mismatch', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const scanConnection = vi.fn(async (_projectDir: string, _connectionId: string, commandIo: KtxCliIo) => {
|
|
|
|
|
if (scanConnection.mock.calls.length === 1) {
|
|
|
|
|
commandIo.stderr.write(
|
|
|
|
|
[
|
|
|
|
|
"The module '/workspace/node_modules/better-sqlite3/build/Release/better_sqlite3.node'",
|
|
|
|
|
'was compiled against a different Node.js version using',
|
|
|
|
|
'NODE_MODULE_VERSION 147. This version of Node.js requires',
|
|
|
|
|
'NODE_MODULE_VERSION 137. Please try re-compiling or re-installing',
|
|
|
|
|
'the module (for instance, using `npm rebuild` or `npm install`).',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
);
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
commandIo.stdout.write('What changed\n');
|
|
|
|
|
commandIo.stdout.write(' Semantic layer comparison found 0 changes across 56 tables\n');
|
|
|
|
|
commandIo.stdout.write(' New tables: 0\n');
|
|
|
|
|
commandIo.stdout.write(' Changed tables: 0\n');
|
|
|
|
|
commandIo.stdout.write(' Removed tables: 0\n');
|
|
|
|
|
commandIo.stdout.write(' Unchanged tables: 56\n');
|
|
|
|
|
return 0;
|
|
|
|
|
});
|
|
|
|
|
const rebuildNativeSqlite = vi.fn(async () => 0);
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
2026-06-10 12:36:53 +02:00
|
|
|
databaseSchemas: ['public'],
|
2026-05-13 15:55:00 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection,
|
|
|
|
|
rebuildNativeSqlite,
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(rebuildNativeSqlite).toHaveBeenCalledOnce();
|
|
|
|
|
expect(rebuildNativeSqlite).toHaveBeenCalledWith(expect.anything());
|
|
|
|
|
expect(scanConnection).toHaveBeenCalledTimes(2);
|
|
|
|
|
expect(io.stderr()).toContain('Native SQLite is built for a different Node.js ABI.');
|
|
|
|
|
expect(io.stderr()).toContain('Rebuilding Native SQLite with pnpm run native:rebuild…');
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(io.stdout()).toContain('◇ Schema context complete for warehouse');
|
|
|
|
|
expect(io.stdout()).toContain('│ Changes: 0 changes across 56 tables');
|
2026-05-13 15:55:00 +02:00
|
|
|
});
|
|
|
|
|
|
2026-05-24 19:30:06 +02:00
|
|
|
function fakeHistoricSqlRunner(
|
|
|
|
|
dialect: 'postgres' | 'snowflake' | 'bigquery',
|
|
|
|
|
catalogName: string,
|
|
|
|
|
) {
|
|
|
|
|
return {
|
|
|
|
|
dialect,
|
|
|
|
|
catalogName,
|
|
|
|
|
async run() {
|
|
|
|
|
return { warnings: [], info: [] };
|
|
|
|
|
},
|
|
|
|
|
formatSuccessDetail() {
|
|
|
|
|
return { detail: `${catalogName} ready`, warnings: [] };
|
|
|
|
|
},
|
|
|
|
|
fixAdvice() {
|
|
|
|
|
return {
|
|
|
|
|
failHeadline: `${catalogName} unavailable`,
|
|
|
|
|
remediation: 'Fix query-history grants.',
|
|
|
|
|
};
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('writes query history config for supported Snowflake databases after validation succeeds', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const io = makeIo();
|
2026-05-24 19:30:06 +02:00
|
|
|
const runner = fakeHistoricSqlRunner(
|
|
|
|
|
'snowflake',
|
|
|
|
|
'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY',
|
|
|
|
|
);
|
|
|
|
|
const historicSqlReadinessProbe = vi.fn(async () => ({
|
|
|
|
|
ok: true as const,
|
|
|
|
|
dialect: 'snowflake' as const,
|
|
|
|
|
runner,
|
|
|
|
|
result: { warnings: [], info: [] },
|
|
|
|
|
}));
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['snowflake'],
|
|
|
|
|
databaseConnectionId: 'snowflake',
|
2026-06-10 12:36:53 +02:00
|
|
|
databaseSchemas: ['PUBLIC'],
|
2026-05-14 01:43:06 +02:00
|
|
|
enableQueryHistory: true,
|
|
|
|
|
queryHistoryWindowDays: 30,
|
|
|
|
|
queryHistoryServiceAccountPatterns: ['^svc_'],
|
|
|
|
|
queryHistoryRedactionPatterns: ['(?i)secret'],
|
2026-05-10 23:12:26 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
2026-05-24 19:30:06 +02:00
|
|
|
historicSqlReadinessProbe,
|
2026-05-10 23:12:26 +02:00
|
|
|
prompts: makePromptAdapter({
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
selectValues: ['password'],
|
|
|
|
|
textValues: ['env:SNOWFLAKE_ACCOUNT', 'WH', 'ANALYTICS', 'reader', ''],
|
2026-05-10 23:12:26 +02:00
|
|
|
passwordValues: ['env:SNOWFLAKE_PASSWORD'],
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
);
|
2026-05-24 19:30:06 +02:00
|
|
|
expect(historicSqlReadinessProbe).toHaveBeenCalledWith(
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
expect.objectContaining({
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
connectionId: 'snowflake',
|
2026-05-24 19:30:06 +02:00
|
|
|
connection: expect.objectContaining({ driver: 'snowflake' }),
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
}),
|
|
|
|
|
);
|
2026-05-10 23:12:26 +02:00
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-14 01:43:06 +02:00
|
|
|
const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8');
|
|
|
|
|
const config = parseKtxProjectConfig(configText);
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.connections.snowflake).toMatchObject({
|
|
|
|
|
driver: 'snowflake',
|
|
|
|
|
authMethod: 'password',
|
2026-05-14 01:43:06 +02:00
|
|
|
context: {
|
|
|
|
|
queryHistory: {
|
|
|
|
|
enabled: true,
|
|
|
|
|
windowDays: 30,
|
|
|
|
|
filters: {
|
|
|
|
|
dropTrivialProbes: true,
|
|
|
|
|
serviceAccounts: {
|
|
|
|
|
patterns: ['^svc_'],
|
|
|
|
|
mode: 'exclude',
|
|
|
|
|
},
|
2026-05-11 19:39:00 +02:00
|
|
|
},
|
2026-05-14 01:43:06 +02:00
|
|
|
redactionPatterns: ['(?i)secret'],
|
2026-05-11 19:39:00 +02:00
|
|
|
},
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
});
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(configText).not.toContain('live-database');
|
|
|
|
|
expect(configText).not.toContain('historic-sql');
|
|
|
|
|
expect(configText).not.toMatch(/^\s+adapters:/m);
|
|
|
|
|
expect(config.ingest.adapters).toEqual([]);
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
it('configures Snowflake with RSA key-pair auth via setup wizard', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['snowflake'],
|
|
|
|
|
databaseConnectionId: 'snowflake',
|
2026-06-10 12:36:53 +02:00
|
|
|
databaseSchemas: ['PUBLIC'],
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
prompts: makePromptAdapter({
|
|
|
|
|
selectValues: ['rsa'],
|
|
|
|
|
textValues: [
|
|
|
|
|
'env:SNOWFLAKE_ACCOUNT',
|
|
|
|
|
'WH',
|
|
|
|
|
'ANALYTICS',
|
|
|
|
|
'reader',
|
|
|
|
|
'~/.ssh/snowflake_rsa_key.p8',
|
|
|
|
|
'',
|
|
|
|
|
],
|
|
|
|
|
passwordValues: ['env:SNOWFLAKE_KEY_PASS'],
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.snowflake).toMatchObject({
|
|
|
|
|
driver: 'snowflake',
|
|
|
|
|
authMethod: 'rsa',
|
|
|
|
|
account: 'env:SNOWFLAKE_ACCOUNT',
|
|
|
|
|
warehouse: 'WH',
|
|
|
|
|
database: 'ANALYTICS',
|
|
|
|
|
username: 'reader',
|
|
|
|
|
privateKey: 'file:~/.ssh/snowflake_rsa_key.p8', // pragma: allowlist secret
|
|
|
|
|
passphrase: 'env:SNOWFLAKE_KEY_PASS', // pragma: allowlist secret
|
|
|
|
|
});
|
|
|
|
|
expect(config.connections.snowflake.password).toBeUndefined();
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('writes Postgres query history config with minExecutions and ignores window/redaction output', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const io = makeIo();
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
|
|
|
|
databaseSchemas: ['public'],
|
2026-05-14 01:43:06 +02:00
|
|
|
enableQueryHistory: true,
|
|
|
|
|
queryHistoryWindowDays: 30,
|
|
|
|
|
queryHistoryMinExecutions: 12,
|
|
|
|
|
queryHistoryServiceAccountPatterns: ['^svc_'],
|
|
|
|
|
queryHistoryRedactionPatterns: ['(?i)secret'],
|
2026-05-10 23:12:26 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
2026-05-24 19:30:06 +02:00
|
|
|
historicSqlReadinessProbe: vi.fn(async () => {
|
|
|
|
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
|
|
|
|
return {
|
|
|
|
|
ok: true as const,
|
|
|
|
|
dialect: 'postgres' as const,
|
|
|
|
|
runner,
|
|
|
|
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
|
|
|
|
};
|
|
|
|
|
}),
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-14 01:43:06 +02:00
|
|
|
const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8');
|
|
|
|
|
const config = parseKtxProjectConfig(configText);
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
url: 'env:DATABASE_URL',
|
|
|
|
|
schemas: ['public'],
|
2026-05-14 01:43:06 +02:00
|
|
|
context: {
|
|
|
|
|
queryHistory: {
|
|
|
|
|
enabled: true,
|
|
|
|
|
minExecutions: 12,
|
|
|
|
|
filters: {
|
|
|
|
|
dropTrivialProbes: true,
|
|
|
|
|
serviceAccounts: {
|
|
|
|
|
patterns: ['^svc_'],
|
|
|
|
|
mode: 'exclude',
|
|
|
|
|
},
|
2026-05-11 19:39:00 +02:00
|
|
|
},
|
|
|
|
|
},
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
});
|
2026-05-14 01:43:06 +02:00
|
|
|
const warehouseContext =
|
|
|
|
|
config.connections.warehouse.context &&
|
|
|
|
|
typeof config.connections.warehouse.context === 'object' &&
|
|
|
|
|
!Array.isArray(config.connections.warehouse.context)
|
|
|
|
|
? (config.connections.warehouse.context as Record<string, unknown>)
|
|
|
|
|
: {};
|
|
|
|
|
expect(warehouseContext.queryHistory).not.toHaveProperty('windowDays');
|
|
|
|
|
expect(warehouseContext.queryHistory).not.toHaveProperty('redactionPatterns');
|
|
|
|
|
expect(configText).not.toContain('live-database');
|
|
|
|
|
expect(configText).not.toContain('historic-sql');
|
|
|
|
|
expect(configText).not.toMatch(/^\s+adapters:/m);
|
|
|
|
|
expect(config.ingest.adapters).toEqual([]);
|
2026-05-12 10:25:58 +02:00
|
|
|
expect(config.ingest.workUnits.maxConcurrency).toBe(6);
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(io.stdout()).toContain('Query history probe...');
|
|
|
|
|
expect(io.stdout()).not.toContain('Historic SQL probe...');
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(io.stdout()).toContain('pg_stat_statements ready');
|
|
|
|
|
});
|
|
|
|
|
|
2026-06-03 17:19:42 +02:00
|
|
|
it('auto-applies derived query-history service-account filters in non-interactive setup', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const queryHistoryFilterPicker = vi.fn(async () => ({
|
|
|
|
|
excludedRoles: [
|
|
|
|
|
{
|
|
|
|
|
role: 'svc_loader',
|
|
|
|
|
pattern: '^svc_loader$',
|
|
|
|
|
reason: 'Runs recurring loader traffic against modeled tables.',
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
consideredRoleCount: 2,
|
|
|
|
|
skipped: null,
|
|
|
|
|
warnings: [],
|
2026-06-04 14:11:08 +02:00
|
|
|
parseFailedTemplateIds: [],
|
2026-06-03 17:19:42 +02:00
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
yes: true,
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
|
|
|
|
databaseSchemas: ['public'],
|
|
|
|
|
enableQueryHistory: true,
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
historicSqlReadinessProbe: vi.fn(async () => {
|
|
|
|
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
|
|
|
|
return {
|
|
|
|
|
ok: true as const,
|
|
|
|
|
dialect: 'postgres' as const,
|
|
|
|
|
runner,
|
|
|
|
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
|
|
|
|
};
|
|
|
|
|
}),
|
|
|
|
|
queryHistoryFilterPicker,
|
|
|
|
|
createQueryHistoryLlmRuntime: vi.fn(() => null),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(queryHistoryFilterPicker).toHaveBeenCalledTimes(1);
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
|
|
|
|
context: {
|
|
|
|
|
queryHistory: {
|
|
|
|
|
filters: {
|
|
|
|
|
dropTrivialProbes: true,
|
|
|
|
|
serviceAccounts: {
|
|
|
|
|
mode: 'exclude',
|
|
|
|
|
patterns: ['^svc_loader$'],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
expect(io.stdout()).toContain('Proposed query-history service-account filters');
|
|
|
|
|
expect(io.stdout()).toContain('svc_loader');
|
|
|
|
|
});
|
|
|
|
|
|
2026-06-04 14:11:08 +02:00
|
|
|
it('collapses query-history parse failures to a count and lists ids only with --debug', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const queryHistoryFilterPicker = vi.fn(async () => ({
|
|
|
|
|
excludedRoles: [],
|
|
|
|
|
consideredRoleCount: 1,
|
|
|
|
|
skipped: { reason: 'no-in-scope-history' as const },
|
|
|
|
|
warnings: [],
|
|
|
|
|
parseFailedTemplateIds: ['111', '222'],
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
debug: true,
|
|
|
|
|
yes: true,
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
|
|
|
|
databaseSchemas: ['public'],
|
|
|
|
|
enableQueryHistory: true,
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
historicSqlReadinessProbe: vi.fn(async () => {
|
|
|
|
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
|
|
|
|
return {
|
|
|
|
|
ok: true as const,
|
|
|
|
|
dialect: 'postgres' as const,
|
|
|
|
|
runner,
|
|
|
|
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
|
|
|
|
};
|
|
|
|
|
}),
|
|
|
|
|
queryHistoryFilterPicker,
|
|
|
|
|
createQueryHistoryLlmRuntime: vi.fn(() => null),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(io.stdout()).toContain('Skipped 2 query templates ktx could not parse');
|
|
|
|
|
expect(io.stdout()).not.toContain('111');
|
|
|
|
|
expect(io.stdout()).not.toContain('222');
|
|
|
|
|
expect(io.stderr()).toContain('could not parse 2 template(s): 111, 222');
|
|
|
|
|
});
|
|
|
|
|
|
2026-06-03 17:19:42 +02:00
|
|
|
it('lets interactive setup skip applying derived filters', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
selectValues: ['skip'],
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
yes: false,
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
|
|
|
|
databaseSchemas: ['public'],
|
|
|
|
|
enableQueryHistory: true,
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
historicSqlReadinessProbe: vi.fn(async () => {
|
|
|
|
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
|
|
|
|
return {
|
|
|
|
|
ok: true as const,
|
|
|
|
|
dialect: 'postgres' as const,
|
|
|
|
|
runner,
|
|
|
|
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
|
|
|
|
};
|
|
|
|
|
}),
|
|
|
|
|
queryHistoryFilterPicker: vi.fn(async () => ({
|
|
|
|
|
excludedRoles: [{ role: 'svc_loader', pattern: '^svc_loader$', reason: 'Loader traffic.' }],
|
|
|
|
|
consideredRoleCount: 2,
|
|
|
|
|
skipped: null,
|
|
|
|
|
warnings: [],
|
2026-06-04 14:11:08 +02:00
|
|
|
parseFailedTemplateIds: [],
|
2026-06-03 17:19:42 +02:00
|
|
|
})),
|
|
|
|
|
createQueryHistoryLlmRuntime: vi.fn(() => null),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(queryHistoryFromConfig(config.connections.warehouse)?.filters).toEqual({ dropTrivialProbes: true });
|
|
|
|
|
expect(prompts.select).toHaveBeenCalledWith({
|
|
|
|
|
message: 'Apply 1 derived query-history service-account exclusion?',
|
|
|
|
|
options: [
|
|
|
|
|
{ value: 'apply', label: 'Apply derived filters (recommended)' },
|
|
|
|
|
{ value: 'skip', label: 'Leave query history filters unchanged' },
|
|
|
|
|
],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('does not overwrite an existing serviceAccounts block', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
2026-06-10 12:36:53 +02:00
|
|
|
' schemas:',
|
|
|
|
|
" - 'public'",
|
2026-06-03 17:19:42 +02:00
|
|
|
' context:',
|
|
|
|
|
' queryHistory:',
|
|
|
|
|
' enabled: true',
|
|
|
|
|
' filters:',
|
|
|
|
|
' dropTrivialProbes: true',
|
|
|
|
|
' serviceAccounts:',
|
|
|
|
|
' mode: exclude',
|
|
|
|
|
' patterns:',
|
|
|
|
|
" - '^existing$'",
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
yes: true,
|
|
|
|
|
databaseConnectionIds: ['warehouse'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
enableQueryHistory: true,
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
historicSqlReadinessProbe: vi.fn(async () => {
|
|
|
|
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
|
|
|
|
return {
|
|
|
|
|
ok: true as const,
|
|
|
|
|
dialect: 'postgres' as const,
|
|
|
|
|
runner,
|
|
|
|
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
|
|
|
|
};
|
|
|
|
|
}),
|
|
|
|
|
queryHistoryFilterPicker: vi.fn(async () => ({
|
|
|
|
|
excludedRoles: [{ role: 'svc_loader', pattern: '^svc_loader$', reason: 'Loader traffic.' }],
|
|
|
|
|
consideredRoleCount: 2,
|
|
|
|
|
skipped: { reason: 'user-block-present' as const },
|
|
|
|
|
warnings: [],
|
2026-06-04 14:11:08 +02:00
|
|
|
parseFailedTemplateIds: [],
|
2026-06-03 17:19:42 +02:00
|
|
|
})),
|
|
|
|
|
createQueryHistoryLlmRuntime: vi.fn(() => null),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(queryHistoryFromConfig(config.connections.warehouse)?.filters?.serviceAccounts).toEqual({
|
|
|
|
|
mode: 'exclude',
|
|
|
|
|
patterns: ['^existing$'],
|
|
|
|
|
});
|
|
|
|
|
expect(io.stdout()).toContain('Existing query-history service-account filters left unchanged');
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('asks interactive Postgres setup whether to enable query history', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
|
|
|
|
' readonly: true',
|
|
|
|
|
'llm:',
|
|
|
|
|
' provider:',
|
|
|
|
|
' backend: anthropic',
|
|
|
|
|
' models:',
|
|
|
|
|
' default: claude-sonnet-4-6',
|
|
|
|
|
'scan:',
|
|
|
|
|
' enrichment:',
|
|
|
|
|
' mode: llm',
|
|
|
|
|
' embeddings:',
|
|
|
|
|
' backend: openai',
|
|
|
|
|
' model: text-embedding-3-small',
|
|
|
|
|
' dimensions: 1536',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
2026-05-29 17:41:04 +02:00
|
|
|
const prompts = makePromptAdapter({ selectValues: ['yes'] });
|
2026-05-24 19:30:06 +02:00
|
|
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
|
|
|
|
const historicSqlReadinessProbe = vi.fn(async () => ({
|
|
|
|
|
ok: true as const,
|
|
|
|
|
dialect: 'postgres' as const,
|
|
|
|
|
runner,
|
|
|
|
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
|
|
|
|
}));
|
2026-05-14 01:43:06 +02:00
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseConnectionIds: ['warehouse'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
2026-05-24 19:30:06 +02:00
|
|
|
historicSqlReadinessProbe,
|
2026-05-14 01:43:06 +02:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(prompts.select).toHaveBeenCalledWith({
|
|
|
|
|
message: 'Enable query-history ingest for this PostgreSQL connection?',
|
|
|
|
|
options: [
|
2026-05-14 14:35:58 +02:00
|
|
|
{ value: 'yes', label: 'Enable query history (recommended)' },
|
2026-05-14 01:43:06 +02:00
|
|
|
{ value: 'no', label: 'Do not enable query history' },
|
|
|
|
|
{ value: 'back', label: 'Back' },
|
|
|
|
|
],
|
|
|
|
|
});
|
2026-05-24 19:30:06 +02:00
|
|
|
expect(historicSqlReadinessProbe).toHaveBeenCalledWith(
|
|
|
|
|
expect.objectContaining({
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
connectionId: 'warehouse',
|
|
|
|
|
connection: expect.objectContaining({ driver: 'postgres' }),
|
|
|
|
|
}),
|
|
|
|
|
);
|
2026-05-14 01:43:06 +02:00
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
|
|
|
|
context: {
|
|
|
|
|
queryHistory: {
|
|
|
|
|
enabled: true,
|
|
|
|
|
minExecutions: 5,
|
|
|
|
|
filters: { dropTrivialProbes: true },
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('writes query history config for supported existing database connections', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
await writeFile(
|
2026-05-10 23:51:24 +02:00
|
|
|
join(tempDir, 'ktx.yaml'),
|
2026-05-10 23:12:26 +02:00
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' analytics:',
|
|
|
|
|
' driver: bigquery',
|
|
|
|
|
' dataset_id: analytics',
|
|
|
|
|
' credentials_json: env:BIGQUERY_CREDENTIALS_JSON',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
2026-05-24 19:30:06 +02:00
|
|
|
const runner = fakeHistoricSqlRunner('bigquery', 'INFORMATION_SCHEMA.JOBS_BY_PROJECT');
|
|
|
|
|
const historicSqlReadinessProbe = vi.fn(async () => ({
|
|
|
|
|
ok: true as const,
|
|
|
|
|
dialect: 'bigquery' as const,
|
|
|
|
|
runner,
|
|
|
|
|
result: { warnings: [], info: [] },
|
|
|
|
|
}));
|
2026-05-10 23:12:26 +02:00
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseConnectionIds: ['analytics'],
|
|
|
|
|
databaseSchemas: [],
|
2026-05-14 01:43:06 +02:00
|
|
|
enableQueryHistory: true,
|
|
|
|
|
queryHistoryWindowDays: 45,
|
2026-05-10 23:12:26 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
2026-05-24 19:30:06 +02:00
|
|
|
historicSqlReadinessProbe,
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-24 19:30:06 +02:00
|
|
|
expect(historicSqlReadinessProbe).toHaveBeenCalledWith(
|
|
|
|
|
expect.objectContaining({
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
connectionId: 'analytics',
|
|
|
|
|
connection: expect.objectContaining({ driver: 'bigquery' }),
|
|
|
|
|
}),
|
|
|
|
|
);
|
2026-05-14 01:43:06 +02:00
|
|
|
const configText = await readFile(join(tempDir, 'ktx.yaml'), 'utf-8');
|
|
|
|
|
const config = parseKtxProjectConfig(configText);
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.connections.analytics).toMatchObject({
|
2026-05-14 01:43:06 +02:00
|
|
|
context: {
|
|
|
|
|
queryHistory: {
|
|
|
|
|
enabled: true,
|
|
|
|
|
windowDays: 45,
|
|
|
|
|
filters: {
|
|
|
|
|
dropTrivialProbes: true,
|
|
|
|
|
},
|
|
|
|
|
redactionPatterns: [],
|
2026-05-11 19:39:00 +02:00
|
|
|
},
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
});
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(configText).not.toContain('live-database');
|
|
|
|
|
expect(configText).not.toContain('historic-sql');
|
|
|
|
|
expect(configText).not.toMatch(/^\s+adapters:/m);
|
|
|
|
|
expect(config.ingest.adapters).toEqual([]);
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
|
2026-05-24 19:30:06 +02:00
|
|
|
it('prints a non-blocking BigQuery query history probe failure with the grants remediation', async () => {
|
|
|
|
|
await writeFile(
|
|
|
|
|
join(tempDir, 'ktx.yaml'),
|
|
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' analytics:',
|
|
|
|
|
' driver: bigquery',
|
|
|
|
|
' dataset_id: analytics',
|
|
|
|
|
' credentials_json: env:BIGQUERY_CREDENTIALS_JSON',
|
|
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const runner = {
|
|
|
|
|
...fakeHistoricSqlRunner('bigquery', 'INFORMATION_SCHEMA.JOBS_BY_PROJECT'),
|
|
|
|
|
fixAdvice: () => ({
|
|
|
|
|
failHeadline: 'BigQuery principal cannot read INFORMATION_SCHEMA.JOBS_BY_PROJECT',
|
|
|
|
|
remediation:
|
|
|
|
|
'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.',
|
|
|
|
|
}),
|
|
|
|
|
};
|
|
|
|
|
const error = new Error('access denied');
|
|
|
|
|
const historicSqlReadinessProbe = vi.fn(async () => ({
|
|
|
|
|
ok: false as const,
|
|
|
|
|
dialect: 'bigquery' as const,
|
|
|
|
|
runner,
|
|
|
|
|
error,
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseConnectionIds: ['analytics'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
enableQueryHistory: true,
|
|
|
|
|
queryHistoryWindowDays: 45,
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
historicSqlReadinessProbe,
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(historicSqlReadinessProbe).toHaveBeenCalledWith(
|
|
|
|
|
expect.objectContaining({
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
connectionId: 'analytics',
|
|
|
|
|
connection: expect.objectContaining({ driver: 'bigquery' }),
|
|
|
|
|
}),
|
|
|
|
|
);
|
|
|
|
|
expect(io.stdout()).toContain('Query history probe...');
|
|
|
|
|
expect(io.stdout()).toContain(
|
|
|
|
|
'BigQuery principal cannot read INFORMATION_SCHEMA.JOBS_BY_PROJECT',
|
|
|
|
|
);
|
|
|
|
|
expect(io.stdout()).toContain('roles/bigquery.resourceViewer');
|
|
|
|
|
expect(io.stdout()).toContain('bigquery.jobs.listAll');
|
|
|
|
|
expect(io.stdout()).toContain('Setup written; query history will be skipped until fixed.');
|
|
|
|
|
});
|
|
|
|
|
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
it('lets interactive BigQuery setup disable unavailable query history and retry after scan failure', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
const failurePromptOptions: KtxSetupPromptOption[][] = [];
|
|
|
|
|
let failurePromptCount = 0;
|
|
|
|
|
const prompts = makePromptAdapter({
|
|
|
|
|
textValues: ['/tmp/service-account.json', 'US'],
|
|
|
|
|
});
|
|
|
|
|
vi.mocked(prompts.select).mockImplementation(async ({ message, options }) => {
|
|
|
|
|
if (message.startsWith('Enable query-history ingest')) return 'yes';
|
2026-06-11 13:49:45 +02:00
|
|
|
if (message.includes('How much database context should ktx build?')) return 'fast';
|
2026-06-03 13:08:46 +02:00
|
|
|
if (message.startsWith('Connection setup failed for analytics')) {
|
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
|
|
|
failurePromptCount += 1;
|
|
|
|
|
failurePromptOptions.push(options);
|
|
|
|
|
if (failurePromptCount === 1) return 'disable-query-history';
|
|
|
|
|
throw new Error('setup did not disable query history before retrying');
|
|
|
|
|
}
|
|
|
|
|
throw new Error(`unexpected select prompt: ${message}`);
|
|
|
|
|
});
|
|
|
|
|
const runner = {
|
|
|
|
|
...fakeHistoricSqlRunner('bigquery', 'INFORMATION_SCHEMA.JOBS_BY_PROJECT'),
|
|
|
|
|
fixAdvice: () => ({
|
|
|
|
|
failHeadline: 'BigQuery principal cannot read INFORMATION_SCHEMA.JOBS_BY_PROJECT',
|
|
|
|
|
remediation:
|
|
|
|
|
'Grant roles/bigquery.resourceViewer on the BigQuery project, or grant a custom role containing bigquery.jobs.listAll.',
|
|
|
|
|
}),
|
|
|
|
|
};
|
|
|
|
|
const historicSqlReadinessProbe = vi.fn(async () => ({
|
|
|
|
|
ok: false as const,
|
|
|
|
|
dialect: 'bigquery' as const,
|
|
|
|
|
runner,
|
|
|
|
|
error: new Error('access denied'),
|
|
|
|
|
}));
|
|
|
|
|
let scanAttempts = 0;
|
|
|
|
|
const scanConnection = vi.fn(async () => {
|
|
|
|
|
scanAttempts += 1;
|
|
|
|
|
return scanAttempts === 1 ? 1 : 0;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'auto',
|
|
|
|
|
databaseDrivers: ['bigquery'],
|
|
|
|
|
databaseConnectionId: 'analytics',
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
prompts,
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection,
|
|
|
|
|
historicSqlReadinessProbe,
|
|
|
|
|
listSchemas: vi.fn(async () => ['analytics']),
|
|
|
|
|
listTables: vi.fn(async () => [{ catalog: null, schema: 'analytics', name: 'orders', kind: 'table' as const }]),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
expect(scanConnection).toHaveBeenCalledTimes(2);
|
|
|
|
|
expect(historicSqlReadinessProbe).toHaveBeenCalledTimes(1);
|
|
|
|
|
expect(failurePromptOptions[0]).toContainEqual({
|
|
|
|
|
value: 'disable-query-history',
|
|
|
|
|
label: 'Disable query history and retry',
|
|
|
|
|
});
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.analytics).toMatchObject({
|
|
|
|
|
context: {
|
|
|
|
|
queryHistory: {
|
|
|
|
|
enabled: false,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('enables query history on an existing Postgres connection', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
await writeFile(
|
2026-05-10 23:51:24 +02:00
|
|
|
join(tempDir, 'ktx.yaml'),
|
2026-05-10 23:12:26 +02:00
|
|
|
[
|
|
|
|
|
'connections:',
|
|
|
|
|
' warehouse:',
|
|
|
|
|
' driver: postgres',
|
|
|
|
|
' url: env:DATABASE_URL',
|
2026-06-10 12:36:53 +02:00
|
|
|
' schemas:',
|
|
|
|
|
" - 'public'",
|
2026-05-10 23:12:26 +02:00
|
|
|
'',
|
|
|
|
|
].join('\n'),
|
|
|
|
|
'utf-8',
|
|
|
|
|
);
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseConnectionIds: ['warehouse'],
|
|
|
|
|
databaseSchemas: [],
|
2026-05-14 01:43:06 +02:00
|
|
|
enableQueryHistory: true,
|
|
|
|
|
queryHistoryMinExecutions: 8,
|
2026-05-10 23:12:26 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
2026-05-24 19:30:06 +02:00
|
|
|
historicSqlReadinessProbe: vi.fn(async () => {
|
|
|
|
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
|
|
|
|
return {
|
|
|
|
|
ok: true as const,
|
|
|
|
|
dialect: 'postgres' as const,
|
|
|
|
|
runner,
|
|
|
|
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
|
|
|
|
};
|
|
|
|
|
}),
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-10 23:51:24 +02:00
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(config.connections.warehouse).toMatchObject({
|
2026-05-14 01:43:06 +02:00
|
|
|
context: {
|
|
|
|
|
queryHistory: {
|
|
|
|
|
enabled: true,
|
|
|
|
|
minExecutions: 8,
|
|
|
|
|
filters: {
|
|
|
|
|
dropTrivialProbes: true,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
});
|
2026-05-24 19:30:06 +02:00
|
|
|
expect(config.connections.warehouse.historicSql).toBeUndefined();
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('prints a non-blocking Postgres query history probe failure after connection test succeeds', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const io = makeIo();
|
2026-05-24 19:30:06 +02:00
|
|
|
const runner = {
|
|
|
|
|
...fakeHistoricSqlRunner('postgres', 'pg_stat_statements'),
|
|
|
|
|
fixAdvice: () => ({
|
|
|
|
|
failHeadline: 'pg_stat_statements extension is not installed in the connection database',
|
|
|
|
|
remediation: 'Run (against this database): CREATE EXTENSION pg_stat_statements;',
|
|
|
|
|
}),
|
|
|
|
|
};
|
|
|
|
|
const historicSqlReadinessProbe = vi.fn(async () => ({
|
|
|
|
|
ok: false as const,
|
|
|
|
|
dialect: 'postgres' as const,
|
|
|
|
|
runner,
|
|
|
|
|
error: new Error('missing extension'),
|
2026-05-10 23:12:26 +02:00
|
|
|
}));
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
2026-06-10 12:36:53 +02:00
|
|
|
databaseSchemas: ['public'],
|
2026-05-14 01:43:06 +02:00
|
|
|
enableQueryHistory: true,
|
2026-05-10 23:12:26 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
2026-05-24 19:30:06 +02:00
|
|
|
historicSqlReadinessProbe,
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-24 19:30:06 +02:00
|
|
|
expect(historicSqlReadinessProbe).toHaveBeenCalledWith(
|
2026-05-10 23:12:26 +02:00
|
|
|
expect.objectContaining({
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
connectionId: 'warehouse',
|
2026-05-24 19:30:06 +02:00
|
|
|
connection: expect.objectContaining({ driver: 'postgres' }),
|
2026-05-10 23:12:26 +02:00
|
|
|
}),
|
|
|
|
|
);
|
2026-05-14 01:43:06 +02:00
|
|
|
expect(io.stdout()).toContain('Query history probe...');
|
|
|
|
|
expect(io.stdout()).not.toContain('Historic SQL probe...');
|
2026-05-10 23:12:26 +02:00
|
|
|
expect(io.stdout()).toContain('pg_stat_statements extension is not installed');
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
expect(io.stdout()).toContain('Setup written; query history will be skipped until fixed.');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('prints a non-blocking Snowflake query history probe failure with the grants remediation', async () => {
|
|
|
|
|
const io = makeIo();
|
2026-05-24 19:30:06 +02:00
|
|
|
const runner = {
|
|
|
|
|
...fakeHistoricSqlRunner('snowflake', 'SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY'),
|
|
|
|
|
fixAdvice: () => ({
|
|
|
|
|
failHeadline: 'Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY',
|
|
|
|
|
remediation:
|
|
|
|
|
'Run (as ACCOUNTADMIN): GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE TO ROLE <connection role>;',
|
|
|
|
|
}),
|
|
|
|
|
};
|
|
|
|
|
const historicSqlReadinessProbe = vi.fn(async () => ({
|
|
|
|
|
ok: false as const,
|
|
|
|
|
dialect: 'snowflake' as const,
|
|
|
|
|
runner,
|
|
|
|
|
error: new Error('role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY'),
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['snowflake'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
2026-06-10 12:36:53 +02:00
|
|
|
databaseSchemas: ['PUBLIC'],
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
enableQueryHistory: true,
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
2026-05-24 19:30:06 +02:00
|
|
|
historicSqlReadinessProbe,
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
prompts: makePromptAdapter({
|
|
|
|
|
textValues: ['env:SNOWFLAKE_ACCOUNT', 'WH', 'ANALYTICS', 'reader', ''],
|
|
|
|
|
passwordValues: ['env:SNOWFLAKE_PASSWORD'],
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
2026-05-24 19:30:06 +02:00
|
|
|
expect(historicSqlReadinessProbe).toHaveBeenCalledWith(
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
expect.objectContaining({
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
connectionId: 'warehouse',
|
2026-05-24 19:30:06 +02:00
|
|
|
connection: expect.objectContaining({ driver: 'snowflake' }),
|
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)
* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure
Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.
- Remove the free-text Snowflake schema prompt; only pass `schema` to
snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
user for a comma-separated list, persist it as `schema_names`, and use
it as both the table-list filter and the multiselect default. Applies
to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
documented single-schema shorthand.
* fix(snowflake): keep introspecting when primary-key discovery is denied
The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.
Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.
* fix(scan): unblock relationship discovery on Snowflake
Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:
- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
(Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
`connector.sampleColumn` into bare locals, losing the `this` binding when
the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
Every sample call threw "Cannot read properties of undefined (reading
'assertConnection')" and degraded LLM descriptions to metadata-only
prompts. Call the methods through the connector instead.
Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.
* test(scan): cover table-ref helpers
* feat(scan): plumb tableScope through live-database introspection port
* feat(scan): apply tableScope during metadata fetch
* feat(scan): enforce table scope at fetch boundary
* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)
* feat(cli): add RSA key-pair auth option to Snowflake setup wizard
Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.
* feat(scan): pool Snowflake sessions
* fix(scan): reuse structural snapshots and cleanup connectors
* feat(scan): parallelize relationship profiling
* feat(scan): batch table description generation
* docs: document Snowflake ingest concurrency knobs
* fix(scan): close Snowflake ingest perf verification gaps
* fix(scan): keep batched description failure bounded
* feat(scan): dispatch query-history probes by connection driver
Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.
Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.
* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject
The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.
Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.
generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.
* chore(scripts): add ktx-reset.sh project-cleanup helper
Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
|
|
|
}),
|
|
|
|
|
);
|
|
|
|
|
expect(io.stdout()).toContain('Query history probe...');
|
|
|
|
|
expect(io.stdout()).toContain('Snowflake role cannot read SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY');
|
|
|
|
|
expect(io.stdout()).toContain('GRANT IMPORTED PRIVILEGES ON DATABASE SNOWFLAKE');
|
|
|
|
|
expect(io.stdout()).toContain('Setup written; query history will be skipped until fixed.');
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('does not run the query history probe when the regular connection test fails', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const io = makeIo();
|
2026-05-24 19:30:06 +02:00
|
|
|
const historicSqlReadinessProbe = vi.fn(async () => {
|
|
|
|
|
const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements');
|
|
|
|
|
return {
|
|
|
|
|
ok: true as const,
|
|
|
|
|
dialect: 'postgres' as const,
|
|
|
|
|
runner,
|
|
|
|
|
result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] },
|
|
|
|
|
};
|
|
|
|
|
});
|
2026-05-10 23:12:26 +02:00
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
|
|
|
|
databaseSchemas: [],
|
2026-05-14 01:43:06 +02:00
|
|
|
enableQueryHistory: true,
|
2026-05-10 23:12:26 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 1),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
2026-05-24 19:30:06 +02:00
|
|
|
historicSqlReadinessProbe,
|
2026-05-10 23:12:26 +02:00
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('failed');
|
2026-05-24 19:30:06 +02:00
|
|
|
expect(historicSqlReadinessProbe).not.toHaveBeenCalled();
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('returns missing input when non-interactive database flags are incomplete', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('missing-input');
|
|
|
|
|
expect(io.stderr()).toContain('Missing database connection id');
|
|
|
|
|
});
|
|
|
|
|
|
2026-06-03 13:08:46 +02:00
|
|
|
it('returns missing input when a non-interactive new connection is missing required details', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'warehouse',
|
|
|
|
|
databaseSchemas: [],
|
|
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('missing-input');
|
|
|
|
|
expect(io.stderr()).toContain('Missing connection details');
|
|
|
|
|
});
|
|
|
|
|
|
2026-05-14 01:43:06 +02:00
|
|
|
it('accepts former ingest subcommand names as non-interactive database connection ids', async () => {
|
|
|
|
|
const io = makeIo();
|
|
|
|
|
|
|
|
|
|
const result = await runKtxSetupDatabasesStep(
|
|
|
|
|
{
|
|
|
|
|
projectDir: tempDir,
|
|
|
|
|
inputMode: 'disabled',
|
|
|
|
|
databaseDrivers: ['postgres'],
|
|
|
|
|
databaseConnectionId: 'replay',
|
|
|
|
|
databaseUrl: 'env:DATABASE_URL',
|
2026-06-10 12:36:53 +02:00
|
|
|
databaseSchemas: ['public'],
|
2026-05-14 01:43:06 +02:00
|
|
|
skipDatabases: false,
|
|
|
|
|
},
|
|
|
|
|
io.io,
|
|
|
|
|
{
|
|
|
|
|
testConnection: vi.fn(async () => 0),
|
|
|
|
|
scanConnection: vi.fn(async () => 0),
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('ready');
|
|
|
|
|
const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8'));
|
|
|
|
|
expect(config.connections.replay).toMatchObject({
|
|
|
|
|
driver: 'postgres',
|
|
|
|
|
url: 'env:DATABASE_URL',
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('leaves setup incomplete when databases are skipped', async () => {
|
2026-05-10 23:12:26 +02:00
|
|
|
const io = makeIo();
|
|
|
|
|
|
2026-05-10 23:51:24 +02:00
|
|
|
const result = await runKtxSetupDatabasesStep(
|
2026-05-10 23:12:26 +02:00
|
|
|
{ projectDir: tempDir, inputMode: 'disabled', databaseSchemas: [], skipDatabases: true },
|
|
|
|
|
io.io,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(result.status).toBe('skipped');
|
2026-06-11 13:49:45 +02:00
|
|
|
expect(io.stdout()).toContain('ktx cannot work until you add a database.');
|
2026-05-13 13:55:21 +02:00
|
|
|
expect(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')).not.toContain('completed_steps:');
|
2026-05-10 23:12:26 +02:00
|
|
|
});
|
|
|
|
|
});
|