ktx/packages/cli/test/context/scan/relationship-composite-candidates.test.ts

import Database from 'better-sqlite3';
import { join } from 'node:path';
import { describe, expect, it } from 'vitest';
import { getDialectForDriver } from '../../../src/context/connections/dialects.js';
import { snapshotToKtxEnrichedSchema } from '../../../src/context/scan/local-enrichment.js';
import { loadKtxRelationshipBenchmarkFixture, maskKtxRelationshipBenchmarkSnapshot } from '../../../src/context/scan/relationship-benchmarks.js';
import { discoverKtxCompositeRelationships } from '../../../src/context/scan/relationship-composite-candidates.js';
import { profileKtxRelationshipSchema, type KtxRelationshipReadOnlyExecutor } from '../../../src/context/scan/relationship-profiling.js';
import type { KtxQueryResult, KtxReadOnlyQueryInput, KtxScanContext } from '../../../src/context/scan/types.js';

class TestSqliteExecutor implements KtxRelationshipReadOnlyExecutor {
  private readonly db: Database.Database;

  constructor(dataPath: string) {
    this.db = new Database(dataPath, { readonly: true, fileMustExist: true });
  }

  async executeReadOnly(input: KtxReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {
    const rows = this.db.prepare(input.sql).all() as Record<string, unknown>[];
    const headers = Object.keys(rows[0] ?? {});
    return {
      headers,
      rows: rows.map((row) => headers.map((header) => row[header])),
      totalRows: rows.length,
      rowCount: rows.length,
    };
  }

  close(): void {
    this.db.close();
  }
}

describe('composite relationship discovery detector', () => {
  it('infers composite primary keys and validates composite foreign keys from row evidence', async () => {
    const fixtureRoot = new URL('../../fixtures/relationship-benchmarks', import.meta.url);
    const fixture = await loadKtxRelationshipBenchmarkFixture(
      join(fixtureRoot.pathname, 'composite_keys_no_declared_constraints'),
    );
    const snapshot = maskKtxRelationshipBenchmarkSnapshot(fixture.snapshot, 'declared_pks_and_declared_fks_removed');
    const schema = snapshotToKtxEnrichedSchema(snapshot, new Map());
    const executor = new TestSqliteExecutor(fixture.dataPath ?? '');
    const profiles = await profileKtxRelationshipSchema({
      connectionId: snapshot.connectionId,
      dialect: getDialectForDriver(snapshot.driver),
      schema,
      executor,
      ctx: { runId: 'test:composite-profile' },
    });

    const result = await discoverKtxCompositeRelationships({
      connectionId: snapshot.connectionId,
      dialect: getDialectForDriver(snapshot.driver),
      schema,
      profiles,
      executor,
      ctx: { runId: 'test:composite-detect' },
    });
    executor.close();

    expect(result.primaryKeys.map((item) => `${item.table.name}.(${item.columns.join(',')})`)).toEqual([
      'order_line_allocations.(order_id,line_number,warehouse_code)',
      'order_lines.(order_id,line_number)',
    ]);
    expect(
      result.relationships.map(
        (item) =>
          `${item.from.table.name}.(${item.from.columns.join(',')})->${item.to.table.name}.(${item.to.columns.join(',')})`,
      ),
    ).toEqual(['order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)']);
    expect(result.relationships[0]).toMatchObject({
      relationshipType: 'many_to_one',
      status: 'accepted',
      confidence: 0.95,
      validation: {
        targetUniqueness: 1,
        sourceCoverage: 1,
        violationCount: 0,
        violationRatio: 0,
        reasons: ['composite_validation_passed'],
      },
    });
    expect(result.queryCount).toBeGreaterThan(0);
  });
});
Initial open-source release 2026-05-10 23:12:26 +02:00			`import Database from 'better-sqlite3';`
			`import { join } from 'node:path';`
			`import { describe, expect, it } from 'vitest';`
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`import { getDialectForDriver } from '../../../src/context/connections/dialects.js';`
			`import { snapshotToKtxEnrichedSchema } from '../../../src/context/scan/local-enrichment.js';`
			`import { loadKtxRelationshipBenchmarkFixture, maskKtxRelationshipBenchmarkSnapshot } from '../../../src/context/scan/relationship-benchmarks.js';`
			`import { discoverKtxCompositeRelationships } from '../../../src/context/scan/relationship-composite-candidates.js';`
			`import { profileKtxRelationshipSchema, type KtxRelationshipReadOnlyExecutor } from '../../../src/context/scan/relationship-profiling.js';`
			`import type { KtxQueryResult, KtxReadOnlyQueryInput, KtxScanContext } from '../../../src/context/scan/types.js';`
Initial open-source release 2026-05-10 23:12:26 +02:00
rename klo to ktx 2026-05-10 23:51:24 +02:00			`class TestSqliteExecutor implements KtxRelationshipReadOnlyExecutor {`
Initial open-source release 2026-05-10 23:12:26 +02:00			`private readonly db: Database.Database;`

			`constructor(dataPath: string) {`
			`this.db = new Database(dataPath, { readonly: true, fileMustExist: true });`
			`}`

rename klo to ktx 2026-05-10 23:51:24 +02:00			`async executeReadOnly(input: KtxReadOnlyQueryInput, _ctx: KtxScanContext): Promise<KtxQueryResult> {`
Initial open-source release 2026-05-10 23:12:26 +02:00			`const rows = this.db.prepare(input.sql).all() as Record<string, unknown>[];`
			`const headers = Object.keys(rows[0] ?? {});`
			`return {`
			`headers,`
			`rows: rows.map((row) => headers.map((header) => row[header])),`
			`totalRows: rows.length,`
			`rowCount: rows.length,`
			`};`
			`}`

			`close(): void {`
			`this.db.close();`
			`}`
			`}`

			`describe('composite relationship discovery detector', () => {`
			`it('infers composite primary keys and validates composite foreign keys from row evidence', async () => {`
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`const fixtureRoot = new URL('../../fixtures/relationship-benchmarks', import.meta.url);`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`const fixture = await loadKtxRelationshipBenchmarkFixture(`
Initial open-source release 2026-05-10 23:12:26 +02:00			`join(fixtureRoot.pathname, 'composite_keys_no_declared_constraints'),`
			`);`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`const snapshot = maskKtxRelationshipBenchmarkSnapshot(fixture.snapshot, 'declared_pks_and_declared_fks_removed');`
			`const schema = snapshotToKtxEnrichedSchema(snapshot, new Map());`
Initial open-source release 2026-05-10 23:12:26 +02:00			`const executor = new TestSqliteExecutor(fixture.dataPath ?? '');`
rename klo to ktx 2026-05-10 23:51:24 +02:00			`const profiles = await profileKtxRelationshipSchema({`
Initial open-source release 2026-05-10 23:12:26 +02:00			`connectionId: snapshot.connectionId,`
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`dialect: getDialectForDriver(snapshot.driver),`
Initial open-source release 2026-05-10 23:12:26 +02:00			`schema,`
			`executor,`
			`ctx: { runId: 'test:composite-profile' },`
			`});`

rename klo to ktx 2026-05-10 23:51:24 +02:00			`const result = await discoverKtxCompositeRelationships({`
Initial open-source release 2026-05-10 23:12:26 +02:00			`connectionId: snapshot.connectionId,`
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId\|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string \| null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env 2026-05-26 08:49:05 +02:00			`dialect: getDialectForDriver(snapshot.driver),`
Initial open-source release 2026-05-10 23:12:26 +02:00			`schema,`
			`profiles,`
			`executor,`
			`ctx: { runId: 'test:composite-detect' },`
			`});`
			`executor.close();`

			expect(result.primaryKeys.map((item) => `${item.table.name}.(${item.columns.join(',')})`)).toEqual([
			`'order_line_allocations.(order_id,line_number,warehouse_code)',`
			`'order_lines.(order_id,line_number)',`
			`]);`
			`expect(`
			`result.relationships.map(`
			`(item) =>`
			`${item.from.table.name}.(${item.from.columns.join(',')})->${item.to.table.name}.(${item.to.columns.join(',')})`,
			`),`
			`).toEqual(['order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)']);`
			`expect(result.relationships[0]).toMatchObject({`
			`relationshipType: 'many_to_one',`
			`status: 'accepted',`
			`confidence: 0.95,`
			`validation: {`
			`targetUniqueness: 1,`
			`sourceCoverage: 1,`
			`violationCount: 0,`
			`violationRatio: 0,`
			`reasons: ['composite_validation_passed'],`
			`},`
			`});`
			`expect(result.queryCount).toBeGreaterThan(0);`
			`});`
			`});`