ktx/packages/cli/test/context/scan/relationship-locality.test.ts
Andrey Avtomonov 56985b7e09
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract

* test(cli): keep dialect edge tests focused

* fix(cli): stabilize dialect contract foundation

* refactor(connectors): own read-only query preparation

* refactor(connectors): resolve dialects through registry

* refactor(connectors): keep concrete dialect classes internal

* chore(workspace): enforce dialect import boundary

* refactor(cli): resolve relationship dialect at scan boundary

* refactor(cli): use dialect display parsing for entity details

* refactor(cli): use dialect display parsing for warehouse catalog

* refactor(cli): use dialect SQL in relationship workflows

* test(cli): verify solid dialect scan workflow closure

* test: split cli tests from source tree

* refactor(cli): standardize BigQuery scope listing

* feat(sqlite): implement connector scope listing

* test(connectors): cover required table listing

* feat(cli): add warehouse driver registry

* refactor(setup): route scope discovery through driver registry

* refactor(cli): route local query execution through driver registry

* refactor(historic-sql): route dialect support through driver registry

* refactor(cli): test warehouse connections through driver registry

* fix(cli): close driver registry type export gaps

* Improve setup daemon diagnostics

* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback

Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.

* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match

The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.

Align the picker boundary with the canonical 3-level KtxTableRef:

- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
  resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
  (resolveEnabledTables already accepts the 3-part shape) and
  schemasFromEnabledTables now goes through parseDottedTableEntry so it
  recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
  reuse.

Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).

* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00

151 lines
5.5 KiB
TypeScript

import { describe, expect, it } from 'vitest';
import type { KtxEnrichedColumn, KtxEnrichedTable } from '../../../src/context/scan/enrichment-types.js';
import { localCandidateTables } from '../../../src/context/scan/relationship-locality.js';
function column(
tableId: string,
id: string,
name: string,
options: Partial<KtxEnrichedColumn> = {},
): KtxEnrichedColumn {
const tableRef = options.tableRef ?? { catalog: null, db: 'public', name: tableId };
return {
id,
tableId,
tableRef,
name,
nativeType: options.nativeType ?? 'INTEGER',
normalizedType: options.normalizedType ?? 'integer',
dimensionType: options.dimensionType ?? 'number',
nullable: options.nullable ?? true,
primaryKey: options.primaryKey ?? false,
parentColumnId: options.parentColumnId ?? null,
descriptions: options.descriptions ?? {},
embedding: options.embedding ?? null,
sampleValues: options.sampleValues ?? null,
cardinality: options.cardinality ?? null,
};
}
function table(id: string, name: string, columns: KtxEnrichedColumn[]): KtxEnrichedTable {
const ref = { catalog: null, db: 'public', name };
return {
id,
ref,
enabled: true,
descriptions: {},
columns: columns.map((item) => ({ ...item, tableId: id, tableRef: ref })),
};
}
describe('relationship locality', () => {
it('ranks the referenced parent table ahead of the child table for id-like source columns', () => {
const artists = table('artist-id', 'Artist', [column('artist-id', 'artist-pk', 'ArtistId')]);
const albums = table('album-id', 'Album', [
column('album-id', 'album-pk', 'AlbumId'),
column('album-id', 'artist-fk', 'ArtistId'),
]);
const unrelated = table('invoice-id', 'Invoice', [column('invoice-id', 'invoice-pk', 'InvoiceId')]);
const ranked = localCandidateTables({
childTable: albums,
childColumn: albums.columns[1]!,
parentTables: [albums, unrelated, artists],
maxParentTables: 1,
});
expect(ranked.map((item) => item.table.ref.name)).toEqual(['Artist']);
expect(ranked[0]).toMatchObject({
score: expect.any(Number),
tokenScore: expect.any(Number),
embeddingScore: 0,
reasons: expect.arrayContaining(['column_table_token_overlap']),
});
});
it('uses singular and plural variants so plan_code can rank stg_plans', () => {
const plans = table('plans-id', 'stg_plans', [column('plans-id', 'plan-code', 'plan_code')]);
const segments = table('segments-id', 'mart_account_segments', [
column('segments-id', 'current-plan-code', 'current_plan_code', {
nativeType: 'TEXT',
normalizedType: 'text',
dimensionType: 'string',
}),
]);
const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'account-id', 'id')]);
const ranked = localCandidateTables({
childTable: segments,
childColumn: segments.columns[0]!,
parentTables: [accounts, segments, plans],
maxParentTables: 1,
});
expect(ranked.map((item) => item.table.ref.name)).toEqual(['stg_plans']);
expect(ranked[0]?.tokenScore).toBeGreaterThan(0);
});
it('returns all tables when the schema is smaller than the default locality cap', () => {
const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'account-id', 'id')]);
const invoices = table('invoices-id', 'invoices', [
column('invoices-id', 'invoice-id', 'id'),
column('invoices-id', 'account-id', 'account_id'),
]);
const ranked = localCandidateTables({
childTable: invoices,
childColumn: invoices.columns[1]!,
parentTables: [invoices, accounts],
});
expect(ranked.map((item) => item.table.ref.name).sort()).toEqual(['accounts', 'invoices']);
});
it('supports an explicit zero cap for deterministic tests', () => {
const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'account-id', 'id')]);
const invoices = table('invoices-id', 'invoices', [
column('invoices-id', 'invoice-id', 'id'),
column('invoices-id', 'account-id', 'account_id'),
]);
const ranked = localCandidateTables({
childTable: invoices,
childColumn: invoices.columns[1]!,
parentTables: [invoices, accounts],
maxParentTables: 0,
});
expect(ranked).toEqual([]);
});
it('uses parent-column embeddings when token locality is weak', () => {
const customers = table('customers-id', 'customers', [
column('customers-id', 'customers-id-col', 'id', { embedding: [1, 0, 0] }),
column('customers-id', 'customers-name-col', 'name', {
nativeType: 'TEXT',
normalizedType: 'text',
dimensionType: 'string',
embedding: [0, 1, 0],
}),
]);
const orders = table('orders-id', 'orders', [
column('orders-id', 'orders-id-col', 'id', { embedding: [0, 0, 1] }),
column('orders-id', 'buyer-ref-col', 'buyer_ref', { embedding: [0.995, 0.005, 0] }),
]);
const invoices = table('invoices-id', 'invoices', [column('invoices-id', 'invoice-id', 'id')]);
const ranked = localCandidateTables({
childTable: orders,
childColumn: orders.columns[1]!,
parentTables: [invoices, customers],
maxParentTables: 1,
});
expect(ranked.map((item) => item.table.ref.name)).toEqual(['customers']);
expect(ranked[0]).toMatchObject({
embeddingScore: expect.any(Number),
reasons: expect.arrayContaining(['embedding_similarity']),
});
expect(ranked[0]!.embeddingScore).toBeGreaterThan(0.99);
});
});