mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
151 lines
5.5 KiB
TypeScript
151 lines
5.5 KiB
TypeScript
import { describe, expect, it } from 'vitest';
|
|
import type { KtxEnrichedColumn, KtxEnrichedTable } from '../../../src/context/scan/enrichment-types.js';
|
|
import { localCandidateTables } from '../../../src/context/scan/relationship-locality.js';
|
|
|
|
function column(
|
|
tableId: string,
|
|
id: string,
|
|
name: string,
|
|
options: Partial<KtxEnrichedColumn> = {},
|
|
): KtxEnrichedColumn {
|
|
const tableRef = options.tableRef ?? { catalog: null, db: 'public', name: tableId };
|
|
return {
|
|
id,
|
|
tableId,
|
|
tableRef,
|
|
name,
|
|
nativeType: options.nativeType ?? 'INTEGER',
|
|
normalizedType: options.normalizedType ?? 'integer',
|
|
dimensionType: options.dimensionType ?? 'number',
|
|
nullable: options.nullable ?? true,
|
|
primaryKey: options.primaryKey ?? false,
|
|
parentColumnId: options.parentColumnId ?? null,
|
|
descriptions: options.descriptions ?? {},
|
|
embedding: options.embedding ?? null,
|
|
sampleValues: options.sampleValues ?? null,
|
|
cardinality: options.cardinality ?? null,
|
|
};
|
|
}
|
|
|
|
function table(id: string, name: string, columns: KtxEnrichedColumn[]): KtxEnrichedTable {
|
|
const ref = { catalog: null, db: 'public', name };
|
|
return {
|
|
id,
|
|
ref,
|
|
enabled: true,
|
|
descriptions: {},
|
|
columns: columns.map((item) => ({ ...item, tableId: id, tableRef: ref })),
|
|
};
|
|
}
|
|
|
|
describe('relationship locality', () => {
|
|
it('ranks the referenced parent table ahead of the child table for id-like source columns', () => {
|
|
const artists = table('artist-id', 'Artist', [column('artist-id', 'artist-pk', 'ArtistId')]);
|
|
const albums = table('album-id', 'Album', [
|
|
column('album-id', 'album-pk', 'AlbumId'),
|
|
column('album-id', 'artist-fk', 'ArtistId'),
|
|
]);
|
|
const unrelated = table('invoice-id', 'Invoice', [column('invoice-id', 'invoice-pk', 'InvoiceId')]);
|
|
|
|
const ranked = localCandidateTables({
|
|
childTable: albums,
|
|
childColumn: albums.columns[1]!,
|
|
parentTables: [albums, unrelated, artists],
|
|
maxParentTables: 1,
|
|
});
|
|
|
|
expect(ranked.map((item) => item.table.ref.name)).toEqual(['Artist']);
|
|
expect(ranked[0]).toMatchObject({
|
|
score: expect.any(Number),
|
|
tokenScore: expect.any(Number),
|
|
embeddingScore: 0,
|
|
reasons: expect.arrayContaining(['column_table_token_overlap']),
|
|
});
|
|
});
|
|
|
|
it('uses singular and plural variants so plan_code can rank stg_plans', () => {
|
|
const plans = table('plans-id', 'stg_plans', [column('plans-id', 'plan-code', 'plan_code')]);
|
|
const segments = table('segments-id', 'mart_account_segments', [
|
|
column('segments-id', 'current-plan-code', 'current_plan_code', {
|
|
nativeType: 'TEXT',
|
|
normalizedType: 'text',
|
|
dimensionType: 'string',
|
|
}),
|
|
]);
|
|
const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'account-id', 'id')]);
|
|
|
|
const ranked = localCandidateTables({
|
|
childTable: segments,
|
|
childColumn: segments.columns[0]!,
|
|
parentTables: [accounts, segments, plans],
|
|
maxParentTables: 1,
|
|
});
|
|
|
|
expect(ranked.map((item) => item.table.ref.name)).toEqual(['stg_plans']);
|
|
expect(ranked[0]?.tokenScore).toBeGreaterThan(0);
|
|
});
|
|
|
|
it('returns all tables when the schema is smaller than the default locality cap', () => {
|
|
const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'account-id', 'id')]);
|
|
const invoices = table('invoices-id', 'invoices', [
|
|
column('invoices-id', 'invoice-id', 'id'),
|
|
column('invoices-id', 'account-id', 'account_id'),
|
|
]);
|
|
|
|
const ranked = localCandidateTables({
|
|
childTable: invoices,
|
|
childColumn: invoices.columns[1]!,
|
|
parentTables: [invoices, accounts],
|
|
});
|
|
|
|
expect(ranked.map((item) => item.table.ref.name).sort()).toEqual(['accounts', 'invoices']);
|
|
});
|
|
|
|
it('supports an explicit zero cap for deterministic tests', () => {
|
|
const accounts = table('accounts-id', 'accounts', [column('accounts-id', 'account-id', 'id')]);
|
|
const invoices = table('invoices-id', 'invoices', [
|
|
column('invoices-id', 'invoice-id', 'id'),
|
|
column('invoices-id', 'account-id', 'account_id'),
|
|
]);
|
|
|
|
const ranked = localCandidateTables({
|
|
childTable: invoices,
|
|
childColumn: invoices.columns[1]!,
|
|
parentTables: [invoices, accounts],
|
|
maxParentTables: 0,
|
|
});
|
|
|
|
expect(ranked).toEqual([]);
|
|
});
|
|
|
|
it('uses parent-column embeddings when token locality is weak', () => {
|
|
const customers = table('customers-id', 'customers', [
|
|
column('customers-id', 'customers-id-col', 'id', { embedding: [1, 0, 0] }),
|
|
column('customers-id', 'customers-name-col', 'name', {
|
|
nativeType: 'TEXT',
|
|
normalizedType: 'text',
|
|
dimensionType: 'string',
|
|
embedding: [0, 1, 0],
|
|
}),
|
|
]);
|
|
const orders = table('orders-id', 'orders', [
|
|
column('orders-id', 'orders-id-col', 'id', { embedding: [0, 0, 1] }),
|
|
column('orders-id', 'buyer-ref-col', 'buyer_ref', { embedding: [0.995, 0.005, 0] }),
|
|
]);
|
|
const invoices = table('invoices-id', 'invoices', [column('invoices-id', 'invoice-id', 'id')]);
|
|
|
|
const ranked = localCandidateTables({
|
|
childTable: orders,
|
|
childColumn: orders.columns[1]!,
|
|
parentTables: [invoices, customers],
|
|
maxParentTables: 1,
|
|
});
|
|
|
|
expect(ranked.map((item) => item.table.ref.name)).toEqual(['customers']);
|
|
expect(ranked[0]).toMatchObject({
|
|
embeddingScore: expect.any(Number),
|
|
reasons: expect.arrayContaining(['embedding_similarity']),
|
|
});
|
|
expect(ranked[0]!.embeddingScore).toBeGreaterThan(0.99);
|
|
});
|
|
});
|