ktx/packages/cli/test/ingest-query-executor.test.ts

import type { KtxLocalProject } from '../src/context/project/project.js';
import { createKtxConnectorCapabilities, type KtxScanConnector } from '../src/context/scan/types.js';
import { describe, expect, it, vi } from 'vitest';
import { createKtxCliIngestQueryExecutor } from '../src/ingest-query-executor.js';

function project(): KtxLocalProject {
  return {
    projectDir: '/tmp/ktx-query-project',
    config: {
      project: 'warehouse',
      connections: {
        warehouse: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },
      },
    },
  } as unknown as KtxLocalProject;
}

function connector(overrides: Partial<KtxScanConnector> = {}): KtxScanConnector {
  return {
    id: 'warehouse',
    driver: 'postgres',
    capabilities: createKtxConnectorCapabilities({ readOnlySql: true }),
    async introspect() {
      throw new Error('introspect is not used by this test');
    },
    executeReadOnly: vi.fn(async () => ({
      headers: ['answer'],
      rows: [[1]],
      totalRows: 1,
      rowCount: 1,
    })),
    cleanup: vi.fn(async () => {}),
    ...overrides,
    listSchemas: overrides.listSchemas ?? vi.fn(async () => []),
    listTables: overrides.listTables ?? vi.fn(async () => []),
  };
}

describe('createKtxCliIngestQueryExecutor', () => {
  it('executes read-only SQL through the scan connector and cleans it up', async () => {
    const scanConnector = connector();
    const createConnector = vi.fn(async () => scanConnector);
    const executor = createKtxCliIngestQueryExecutor(project(), { createConnector });

    await expect(
      executor.execute({
        connectionId: 'warehouse',
        connection: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },
        projectDir: '/tmp/ktx-query-project',
        sql: 'select 1',
        maxRows: 5,
      }),
    ).resolves.toMatchObject({
      headers: ['answer'],
      rows: [[1]],
      totalRows: 1,
      command: 'SELECT',
      rowCount: 1,
    });

    expect(createConnector).toHaveBeenCalledWith(project(), 'warehouse');
    expect(scanConnector.executeReadOnly).toHaveBeenCalledWith(
      { connectionId: 'warehouse', sql: 'select 1', maxRows: 5 },
      { runId: 'ingest-sql-execution' },
    );
    expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);
  });

  it('rejects connectors without read-only SQL support', async () => {
    const scanConnector = connector({
      capabilities: createKtxConnectorCapabilities({ readOnlySql: false }),
      executeReadOnly: undefined,
    });
    const executor = createKtxCliIngestQueryExecutor(project(), {
      createConnector: vi.fn(async () => scanConnector),
    });

    await expect(
      executor.execute({
        connectionId: 'warehouse',
        connection: { driver: 'postgres' },
        projectDir: '/tmp/ktx-query-project',
        sql: 'select 1',
      }),
    ).rejects.toThrow('Connection "warehouse" driver "postgres" does not support read-only SQL execution.');
    expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);
  });
});
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`import type { KtxLocalProject } from '../src/context/project/project.js';`
			`import { createKtxConnectorCapabilities, type KtxScanConnector } from '../src/context/scan/types.js';`
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references 2026-05-13 13:43:23 +02:00			`import { describe, expect, it, vi } from 'vitest';`
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`import { createKtxCliIngestQueryExecutor } from '../src/ingest-query-executor.js';`
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references 2026-05-13 13:43:23 +02:00
			`function project(): KtxLocalProject {`
			`return {`
			`projectDir: '/tmp/ktx-query-project',`
			`config: {`
			`project: 'warehouse',`
			`connections: {`
			`warehouse: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },`
			`},`
			`},`
			`} as unknown as KtxLocalProject;`
			`}`

			`function connector(overrides: Partial<KtxScanConnector> = {}): KtxScanConnector {`
			`return {`
			`id: 'warehouse',`
			`driver: 'postgres',`
			`capabilities: createKtxConnectorCapabilities({ readOnlySql: true }),`
			`async introspect() {`
			`throw new Error('introspect is not used by this test');`
			`},`
			`executeReadOnly: vi.fn(async () => ({`
			`headers: ['answer'],`
			`rows: [[1]],`
			`totalRows: 1,`
			`rowCount: 1,`
			`})),`
			`cleanup: vi.fn(async () => {}),`
			`...overrides,`
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`listSchemas: overrides.listSchemas ?? vi.fn(async () => []),`
			`listTables: overrides.listTables ?? vi.fn(async () => []),`
feat(context): add warehouse verification tools (#46) * feat(context): add warehouse dialect dispatch * feat(context): read warehouse scan catalog * feat(context): add entity details verification tool * feat(context): add ingest SQL verification tool * feat(context): add raw warehouse discovery tool * feat(context): expose warehouse verification tools to ingest * docs(context): add ingest identifier verification protocol * test(context): guard ingest identifier verification prompts * chore(context): verify warehouse verification tools * docs: add warehouse verification tools plan and spec * fix(context): expose target warehouses to Notion ingest * fix(context): update ingest prompts for warehouse verification tools * fix(context): scope raw schema discovery to allowed connections * fix(context): verify warehouse column display targets * docs: add notion warehouse verification gap closure plan * fix(context): include raw discovery connection names * fix(context): expose warehouse targets for LookML and MetricFlow * fix(context): pass connection config to ingest query executors * fix(cli): enable read-only SQL probes for local ingest * docs: add warehouse verification final v1 closure plan * fix(context): align warehouse sql probe prompt shape * docs: add warehouse verification prompt shape closure plan * test(context): catch connectionless sql execution prompt examples * fix(context): include connection name in sl capture sql example * docs: add warehouse verification sql example closure plan * fix(context): report structured entity detail misses * docs: add warehouse verification structured target miss closure plan * fix: report untracked squash merge conflicts * feat: require ingest verification ledger * fix: stabilize ingest wiki references 2026-05-13 13:43:23 +02:00			`};`
			`}`

			`describe('createKtxCliIngestQueryExecutor', () => {`
			`it('executes read-only SQL through the scan connector and cleans it up', async () => {`
			`const scanConnector = connector();`
			`const createConnector = vi.fn(async () => scanConnector);`
			`const executor = createKtxCliIngestQueryExecutor(project(), { createConnector });`

			`await expect(`
			`executor.execute({`
			`connectionId: 'warehouse',`
			`connection: { driver: 'postgres', url: 'postgresql://readonly@example.test/db' },`
			`projectDir: '/tmp/ktx-query-project',`
			`sql: 'select 1',`
			`maxRows: 5,`
			`}),`
			`).resolves.toMatchObject({`
			`headers: ['answer'],`
			`rows: [[1]],`
			`totalRows: 1,`
			`command: 'SELECT',`
			`rowCount: 1,`
			`});`

			`expect(createConnector).toHaveBeenCalledWith(project(), 'warehouse');`
			`expect(scanConnector.executeReadOnly).toHaveBeenCalledWith(`
			`{ connectionId: 'warehouse', sql: 'select 1', maxRows: 5 },`
			`{ runId: 'ingest-sql-execution' },`
			`);`
			`expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);`
			`});`

			`it('rejects connectors without read-only SQL support', async () => {`
			`const scanConnector = connector({`
			`capabilities: createKtxConnectorCapabilities({ readOnlySql: false }),`
			`executeReadOnly: undefined,`
			`});`
			`const executor = createKtxCliIngestQueryExecutor(project(), {`
			`createConnector: vi.fn(async () => scanConnector),`
			`});`

			`await expect(`
			`executor.execute({`
			`connectionId: 'warehouse',`
			`connection: { driver: 'postgres' },`
			`projectDir: '/tmp/ktx-query-project',`
			`sql: 'select 1',`
			`}),`
			`).rejects.toThrow('Connection "warehouse" driver "postgres" does not support read-only SQL execution.');`
			`expect(scanConnector.cleanup).toHaveBeenCalledTimes(1);`
			`});`
			`});`