ktx/packages/cli/test/connectors/sqlserver/connector.test.ts

505 lines
18 KiB
TypeScript
Raw Permalink Normal View History

2026-05-10 23:12:26 +02:00
import { describe, expect, it, vi } from 'vitest';
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
import { createSqlServerLiveDatabaseIntrospection } from '../../../src/connectors/sqlserver/live-database-introspection.js';
import { isKtxSqlServerConnectionConfig, KtxSqlServerScanConnector, prepareSqlServerReadOnlyQuery, sqlServerConnectionPoolConfigFromConfig, type KtxSqlServerConnectionConfig, type KtxSqlServerPoolFactory, type KtxSqlServerQueryResult } from '../../../src/connectors/sqlserver/connector.js';
import { tableRefSet } from '../../../src/context/scan/table-ref.js';
2026-05-10 23:12:26 +02:00
function recordset<T extends Record<string, unknown>>(
rows: T[],
columnNames: string[],
): T[] & { columns: Record<string, { type: { declaration: string } }> } {
const withColumns = rows as T[] & { columns: Record<string, { type: { declaration: string } }> };
withColumns.columns = Object.fromEntries(columnNames.map((name) => [name, { type: { declaration: 'nvarchar' } }]));
return withColumns;
}
2026-05-10 23:51:24 +02:00
function result<T extends Record<string, unknown>>(rows: T[], columnNames: string[]): KtxSqlServerQueryResult {
2026-05-10 23:12:26 +02:00
return { recordset: recordset(rows, columnNames) };
}
function fakePoolFactory(options: { primaryKeyError?: Error; foreignKeyError?: Error } = {}): KtxSqlServerPoolFactory {
2026-05-10 23:51:24 +02:00
const query = vi.fn(async (sql: string): Promise<KtxSqlServerQueryResult> => {
2026-05-10 23:12:26 +02:00
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return result(
[
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
{ schema_name: 'dbo', table_name: 'customers', table_type: 'BASE TABLE' },
{ schema_name: 'dbo', table_name: 'orders', table_type: 'BASE TABLE' },
{ schema_name: 'dbo', table_name: 'order_summary', table_type: 'VIEW' },
2026-05-10 23:12:26 +02:00
],
['table_name', 'table_type'],
);
}
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = 0')) {
return result([{ table_name: 'customers', table_comment: 'Customer table' }], [
'table_name',
'table_comment',
]);
}
if (sql.includes("ep.name = 'MS_Description'") && sql.includes('ep.minor_id = c.column_id')) {
return result([{ table_name: 'customers', column_name: 'id', column_comment: 'PK' }], [
'table_name',
'column_name',
'column_comment',
]);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return result(
[
{ table_name: 'customers', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'customers', column_name: 'name', data_type: 'nvarchar', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'customer_id', data_type: 'int', is_nullable: 'NO' },
{ table_name: 'orders', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
{ table_name: 'order_summary', column_name: 'status', data_type: 'nvarchar', is_nullable: 'YES' },
],
['table_name', 'column_name', 'data_type', 'is_nullable'],
);
}
if (sql.includes("CONSTRAINT_TYPE = 'PRIMARY KEY'")) {
if (options.primaryKeyError) {
throw options.primaryKeyError;
}
2026-05-10 23:12:26 +02:00
return result(
[
{ table_name: 'customers', column_name: 'id' },
{ table_name: 'orders', column_name: 'id' },
],
['table_name', 'column_name'],
);
}
if (sql.includes('REFERENTIAL_CONSTRAINTS')) {
if (options.foreignKeyError) {
throw options.foreignKeyError;
}
2026-05-10 23:12:26 +02:00
return result(
[
{
table_name: 'orders',
column_name: 'customer_id',
referenced_table_schema: 'dbo',
referenced_table_name: 'customers',
referenced_column_name: 'id',
constraint_name: 'orders_customer_id_fk',
},
],
[
'table_name',
'column_name',
'referenced_table_schema',
'referenced_table_name',
'referenced_column_name',
'constraint_name',
],
);
}
if (sql.includes('sys.partitions') && sql.includes('GROUP BY t.name')) {
return result(
[
{ table_name: 'customers', row_count: 2 },
{ table_name: 'orders', row_count: 2 },
],
['table_name', 'row_count'],
);
}
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
if (sql.includes('SELECT TOP 1 [id], [status] FROM [analytics].[dbo].[orders]')) {
2026-05-10 23:12:26 +02:00
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
}
2026-05-10 23:51:24 +02:00
if (sql.includes('SELECT TOP 1 * FROM (select id, status from dbo.orders) AS ktx_query_result')) {
2026-05-10 23:12:26 +02:00
return result([{ id: 10, status: 'paid' }], ['id', 'status']);
}
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
if (sql.includes('SELECT TOP 5 [status] FROM [analytics].[dbo].[orders]')) {
2026-05-10 23:12:26 +02:00
return result([{ status: 'paid' }, { status: 'open' }], ['status']);
}
if (sql.includes('COUNT(DISTINCT val)')) {
return result([{ cardinality: 2 }], ['cardinality']);
}
if (sql.includes('SELECT TOP 10 val')) {
return result([{ val: 'open' }, { val: 'paid' }], ['val']);
}
if (sql.includes('SUM(p.rows) AS row_count') && sql.includes('t.name = @tableName')) {
return result([{ row_count: 2 }], ['row_count']);
}
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
if (sql.includes('FROM sys.objects o')) {
return result(
[
{ schema_name: 'dbo', table_name: 'customers', table_type: 'USER_TABLE' },
{ schema_name: 'dbo', table_name: 'order_summary', table_type: 'VIEW' },
{ schema_name: 'dbo', table_name: 'orders', table_type: 'USER_TABLE' },
],
['schema_name', 'table_name', 'table_type'],
);
}
2026-05-10 23:12:26 +02:00
if (sql.includes('SELECT s.name AS schema_name')) {
return result([{ schema_name: 'dbo' }, { schema_name: 'sales' }], ['schema_name']);
}
if (sql.trim() === 'SELECT 1') {
return result([{ ok: 1 }], ['ok']);
}
throw new Error(`Unexpected SQL: ${sql}`);
});
const request: { input(name: string, value: unknown): typeof request; query: typeof query } = {
input: vi.fn((_key: string, _value: unknown) => request),
query,
};
const close = vi.fn(async () => undefined);
return {
createPool: vi.fn(async () => ({
request: () => request,
close,
})),
};
}
2026-05-10 23:51:24 +02:00
describe('KtxSqlServerScanConnector', () => {
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
it('prepares read-only SQL parameters with SQL Server named placeholders', () => {
expect(
prepareSqlServerReadOnlyQuery('select * from events where id = :id and name = :name', {
id: 10,
name: 'signup',
}),
).toEqual({
sql: 'select * from events where id = @id and name = @name',
params: { id: 10, name: 'signup' },
});
expect(prepareSqlServerReadOnlyQuery('select 1')).toEqual({ sql: 'select 1', params: undefined });
});
2026-05-10 23:12:26 +02:00
it('resolves SQL Server connection configuration safely', () => {
expect(
2026-05-10 23:51:24 +02:00
isKtxSqlServerConnectionConfig({
2026-05-10 23:12:26 +02:00
driver: 'sqlserver',
host: 'localhost',
database: 'analytics',
}),
).toBe(true);
2026-05-10 23:51:24 +02:00
expect(isKtxSqlServerConnectionConfig({ driver: 'mysql', host: 'localhost', database: 'analytics' })).toBe(false);
2026-05-10 23:12:26 +02:00
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
port: 14330,
database: 'analytics',
username: 'reader',
trustServerCertificate: false,
},
}),
).toMatchObject({
server: 'db.example.test',
port: 14330,
database: 'analytics',
user: 'reader',
options: { encrypt: true, trustServerCertificate: false },
});
});
it('defaults and validates SQL Server maxConnections', () => {
const baseConnection: KtxSqlServerConnectionConfig = {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
};
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: baseConnection,
}),
).toMatchObject({ pool: { max: 10 } });
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: 15 },
}),
).toMatchObject({ pool: { max: 15 } });
expect(
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections: '12' as never },
}),
).toMatchObject({ pool: { max: 12 } });
for (const maxConnections of [0, -1, 1.5, Number.NaN, 'abc' as never]) {
expect(() =>
sqlServerConnectionPoolConfigFromConfig({
connectionId: 'warehouse',
connection: { ...baseConnection, maxConnections },
}),
).toThrow('connections.warehouse.maxConnections must be a positive integer');
}
});
2026-05-10 23:12:26 +02:00
it('introspects schema, primary keys, comments, row counts, views, and foreign keys', async () => {
2026-05-10 23:51:24 +02:00
const connector = new KtxSqlServerScanConnector({
2026-05-10 23:12:26 +02:00
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver' },
{ runId: 'scan-run-1' },
);
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
driver: 'sqlserver',
extractedAt: '2026-04-29T16:00:00.000Z',
scope: { catalogs: ['analytics'], schemas: ['dbo'] },
metadata: {
database: 'analytics',
host: 'db.example.test',
schemas: ['dbo'],
table_count: 3,
total_columns: 6,
},
});
expect(snapshot.tables.map((table) => [table.name, table.kind, table.estimatedRows, table.comment])).toEqual([
['customers', 'table', 2, 'Customer table'],
['orders', 'table', 2, null],
['order_summary', 'view', null, null],
]);
expect(snapshot.tables.find((table) => table.name === 'customers')?.columns[0]).toMatchObject({
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
});
expect(snapshot.tables.find((table) => table.name === 'orders')?.foreignKeys).toEqual([
{
fromColumn: 'customer_id',
toCatalog: 'analytics',
toDb: 'dbo',
toTable: 'customers',
toColumn: 'id',
constraintName: 'orders_customer_id_fk',
},
]);
});
it('soft-fails denied SQL Server constraint discovery with scan warnings', async () => {
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory: fakePoolFactory({
primaryKeyError: Object.assign(new Error('SELECT permission denied'), { number: 229 }),
foreignKeyError: Object.assign(new Error('EXECUTE permission denied'), { number: 230 }),
}),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver' },
{ runId: 'scan-run-sqlserver-denied-constraints' },
);
expect(snapshot.warnings).toEqual([
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped primary-key discovery in dbo (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'dbo', kind: 'primary_key' },
},
{
code: 'constraint_discovery_unauthorized',
message: 'Skipped foreign-key discovery in dbo (insufficient grants on system catalogs)',
recoverable: true,
metadata: { schema: 'dbo', kind: 'foreign_key' },
},
]);
expect(snapshot.tables.every((table) => table.columns.every((column) => column.primaryKey === false))).toBe(true);
expect(snapshot.tables.every((table) => table.foreignKeys.length === 0)).toBe(true);
});
2026-05-10 23:12:26 +02:00
it('runs samples, distinct values, read-only SQL, row count, schema list, and cleanup', async () => {
const poolFactory = fakePoolFactory();
2026-05-10 23:51:24 +02:00
const connector = new KtxSqlServerScanConnector({
2026-05-10 23:12:26 +02:00
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory,
});
await expect(
connector.sampleTable(
{
connectionId: 'warehouse',
table: { catalog: 'analytics', db: 'dbo', name: 'orders' },
columns: ['id', 'status'],
limit: 1,
},
{ runId: 'scan-run-1' },
),
).resolves.toEqual({
headers: ['id', 'status'],
headerTypes: ['nvarchar', 'nvarchar'],
rows: [[10, 'paid']],
totalRows: 1,
});
await expect(
connector.sampleColumn(
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status', limit: 5 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ values: ['paid', 'open'], nullCount: null, distinctCount: null });
await expect(
connector.getColumnDistinctValues(
{ catalog: 'analytics', db: 'dbo', name: 'orders' },
'status',
{ maxCardinality: 5, limit: 10, sampleSize: 100 },
),
).resolves.toEqual({ values: ['open', 'paid'], cardinality: 2 });
await expect(
connector.executeReadOnly(
{ connectionId: 'warehouse', sql: 'select id, status from dbo.orders', maxRows: 1 },
{ runId: 'scan-run-1' },
),
).resolves.toMatchObject({ headers: ['id', 'status'], rows: [[10, 'paid']], totalRows: 1, rowCount: 1 });
await expect(
connector.executeReadOnly({ connectionId: 'warehouse', sql: 'delete from orders' }, { runId: 'scan-run-1' }),
).rejects.toThrow('Only read-only SELECT/WITH queries can be executed locally');
await expect(connector.getTableRowCount('orders')).resolves.toBe(2);
await expect(connector.listSchemas()).resolves.toEqual(['dbo', 'sales']);
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
await expect(connector.listTables(['dbo'])).resolves.toEqual([
{ catalog: 'analytics', schema: 'dbo', name: 'customers', kind: 'table' },
{ catalog: 'analytics', schema: 'dbo', name: 'order_summary', kind: 'view' },
{ catalog: 'analytics', schema: 'dbo', name: 'orders', kind: 'table' },
]);
2026-05-10 23:12:26 +02:00
await expect(
connector.columnStats(
{ connectionId: 'warehouse', table: { catalog: 'analytics', db: 'dbo', name: 'orders' }, column: 'status' },
{ runId: 'scan-run-1' },
),
).resolves.toBeNull();
await connector.cleanup();
});
fix(snowflake): unblock multi-schema ingest and relationship discovery (#204) * feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure Snowflake setup previously asked for a single schema as free text, then ran a multiselect against the discovered schemas — two schema questions back-to-back, with the first being only a session bootstrap. The SDK's `schema` is optional, so the bootstrap step is unnecessary. - Remove the free-text Snowflake schema prompt; only pass `schema` to snowflake-sdk when one is configured. - When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the user for a comma-separated list, persist it as `schema_names`, and use it as both the table-list filter and the multiselect default. Applies to every driver with a scope-discovery spec, not just Snowflake. - Update docs to lead with `schema_names`; keep `schema_name` as a documented single-schema shorthand. * fix(snowflake): keep introspecting when primary-key discovery is denied The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the connection role may not have. Previously a 'SQL compilation error: Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist or not authorized' aborted the entire introspect — schemas, columns, and row counts were all discarded over a missing nice-to-have. Wrap the constraint query in try/catch, log a one-line warning per schema, and return an empty PK map. Columns end up with primaryKey=false; relationship inference still has FK and profiling to fall back on. * fix(scan): unblock relationship discovery on Snowflake Two adjacent bugs prevented the scan's relationship pipeline from producing any joins on a Snowflake warehouse: - relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table profile query failed with "Unknown function GROUP_CONCAT". Add an explicit Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter (Snowflake requires the delimiter to be a constant, so CHR(31) is rejected). - description-generation.ts destructured `connector.sampleTable` and `connector.sampleColumn` into bare locals, losing the `this` binding when the class-method connectors (Snowflake, Postgres, MySQL) were invoked. Every sample call threw "Cannot read properties of undefined (reading 'assertConnection')" and degraded LLM descriptions to metadata-only prompts. Call the methods through the connector instead. Without these, even after the primary-key probe is allowed to fail softly, the scan ends up with 0 validated relationships and an empty `joins:` block in every shard YAML. * test(scan): cover table-ref helpers * feat(scan): plumb tableScope through live-database introspection port * feat(scan): apply tableScope during metadata fetch * feat(scan): enforce table scope at fetch boundary * feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206) * feat(cli): add RSA key-pair auth option to Snowflake setup wizard Extends the interactive Snowflake setup flow with an authentication-method prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key path (env/file/absolute) and an optional passphrase; the resulting connection config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead of `password`. * feat(scan): pool Snowflake sessions * fix(scan): reuse structural snapshots and cleanup connectors * feat(scan): parallelize relationship profiling * feat(scan): batch table description generation * docs: document Snowflake ingest concurrency knobs * fix(scan): close Snowflake ingest perf verification gaps * fix(scan): keep batched description failure bounded * feat(scan): dispatch query-history probes by connection driver Extract historic-sql dialect resolution into a shared helper so the status-project readiness check and the local ingest factory agree on which connections enable query history and which probe to run. The status command now picks the postgres/snowflake/bigquery probe based on the connection's driver instead of always reporting against postgres, which previously caused snowflake connections with queryHistory.enabled to surface a misleading "driver is snowflake" failure. Also drops a noisy console.warn from Snowflake primary-key discovery — INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only roles and the FK + profiling paths handle the empty PK map already. * fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject The Claude Code agent SDK announces an internal pseudo-tool named StructuredOutput in the system/init message whenever outputFormat is set to { type: 'json_schema' }. The runtime's isolation check built its allowedToolIds set only from MCP tool ids and treated StructuredOutput as an unexpected host-injected tool, so every generateObject call threw "Claude Code runtime isolation failed: tools=StructuredOutput ..." and the table-descriptions and relationship-LLM-proposal enrichment stages recorded null output across the board. Whitelist StructuredOutput specifically in generateObject's allowedToolIds — the check also enforces missing_tools symmetry, so generateText and runAgentLoop, which do not see StructuredOutput, must not require it. generateObject also ran with maxTurns: 1, which the model intermittently breached when it emitted thinking text before the structured response. Raised to 5 to give the schema-bound call enough headroom without allowing unbounded loops. The existing tests now exercise the path with an init message that announces StructuredOutput so the regression cannot slip back in. * chore(scripts): add ktx-reset.sh project-cleanup helper Convenience script for repeatable ingest testing: takes a project directory and prunes everything except ktx.yaml and .ktx/secrets/, so the next ktx setup or ktx ingest run starts from a known-clean state.
2026-05-23 10:41:30 +02:00
it('limits introspection to tables in tableScope', async () => {
const queries: string[] = [];
const inputs: Array<{ name: string; value: unknown }> = [];
const request = {
input: vi.fn((name: string, value: unknown) => {
inputs.push({ name, value });
return request;
}),
query: vi.fn(async (sql: string): Promise<KtxSqlServerQueryResult> => {
queries.push(sql);
if (sql.includes('INFORMATION_SCHEMA.TABLES')) {
return result([{ table_name: 'orders', table_type: 'BASE TABLE' }], ['table_name', 'table_type']);
}
if (sql.includes('INFORMATION_SCHEMA.COLUMNS')) {
return result(
[{ table_name: 'orders', column_name: 'id', data_type: 'int', is_nullable: 'NO' }],
['table_name', 'column_name', 'data_type', 'is_nullable'],
);
}
return result([], []);
}),
};
const poolFactory: KtxSqlServerPoolFactory = {
createPool: vi.fn(async () => ({
request: () => request,
close: vi.fn(async () => undefined),
})),
};
const connector = new KtxSqlServerScanConnector({
connectionId: 'warehouse',
connection: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
poolFactory,
});
const scope = tableRefSet([{ catalog: 'analytics', db: 'dbo', name: 'orders' }]);
const snapshot = await connector.introspect(
{ connectionId: 'warehouse', driver: 'sqlserver', tableScope: scope },
{ runId: 'scope-test' },
);
expect(snapshot.tables.map((table) => table.name)).toEqual(['orders']);
expect(queries.find((query) => query.includes('INFORMATION_SCHEMA.TABLES'))).toMatch(/TABLE_NAME IN \(@table_0\)/);
expect(inputs).toEqual(expect.arrayContaining([{ name: 'table_0', value: 'orders' }]));
});
2026-05-10 23:12:26 +02:00
it('adapts native SQL Server snapshots to live-database introspection for local ingest', async () => {
const introspection = createSqlServerLiveDatabaseIntrospection({
connections: {
warehouse: {
driver: 'sqlserver',
host: 'db.example.test',
database: 'analytics',
username: 'reader',
schema: 'dbo',
},
},
poolFactory: fakePoolFactory(),
now: () => new Date('2026-04-29T16:00:00.000Z'),
});
const snapshot = await introspection.extractSchema('warehouse');
expect(snapshot).toMatchObject({
connectionId: 'warehouse',
extractedAt: '2026-04-29T16:00:00.000Z',
});
expect(snapshot.tables.find((table) => table.name === 'customers')).toMatchObject({
name: 'customers',
catalog: 'analytics',
db: 'dbo',
columns: [
{
name: 'id',
nativeType: 'int',
normalizedType: 'int',
dimensionType: 'number',
nullable: false,
primaryKey: true,
comment: 'PK',
},
{
name: 'name',
nativeType: 'nvarchar',
normalizedType: 'nvarchar',
dimensionType: 'string',
nullable: false,
primaryKey: false,
comment: null,
},
],
foreignKeys: [],
});
});
});