ktx/packages/cli/test/context/sl/dictionary-search.test.ts
Andrey Avtomonov 56985b7e09
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract

* test(cli): keep dialect edge tests focused

* fix(cli): stabilize dialect contract foundation

* refactor(connectors): own read-only query preparation

* refactor(connectors): resolve dialects through registry

* refactor(connectors): keep concrete dialect classes internal

* chore(workspace): enforce dialect import boundary

* refactor(cli): resolve relationship dialect at scan boundary

* refactor(cli): use dialect display parsing for entity details

* refactor(cli): use dialect display parsing for warehouse catalog

* refactor(cli): use dialect SQL in relationship workflows

* test(cli): verify solid dialect scan workflow closure

* test: split cli tests from source tree

* refactor(cli): standardize BigQuery scope listing

* feat(sqlite): implement connector scope listing

* test(connectors): cover required table listing

* feat(cli): add warehouse driver registry

* refactor(setup): route scope discovery through driver registry

* refactor(cli): route local query execution through driver registry

* refactor(historic-sql): route dialect support through driver registry

* refactor(cli): test warehouse connections through driver registry

* fix(cli): close driver registry type export gaps

* Improve setup daemon diagnostics

* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback

Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.

* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match

The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.

Align the picker boundary with the canonical 3-level KtxTableRef:

- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
  resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
  (resolveEnabledTables already accepts the 3-part shape) and
  schemasFromEnabledTables now goes through parseDottedTableEntry so it
  recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
  reuse.

Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).

* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00

228 lines
6.5 KiB
TypeScript

import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { initKtxProject, type KtxLocalProject } from '../../../src/context/project/project.js';
import { createKtxDictionarySearchService } from '../../../src/context/sl/dictionary-search.js';
describe('createKtxDictionarySearchService', () => {
let tempDir: string;
let project: KtxLocalProject;
beforeEach(async () => {
tempDir = await mkdtemp(join(tmpdir(), 'ktx-dictionary-search-'));
project = await initKtxProject({ projectDir: join(tempDir, 'project') });
project.config.connections.warehouse = { driver: 'postgres', url: 'env:DATABASE_URL' };
project.config.connections.billing = { driver: 'postgres', url: 'env:BILLING_DATABASE_URL' };
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
async function seedProfile(input: {
connectionId: string;
syncId: string;
columns: Record<string, unknown>;
}): Promise<void> {
await project.fileStore.writeFile(
`raw-sources/${input.connectionId}/live-database/${input.syncId}/enrichment/relationship-profile.json`,
`${JSON.stringify(
{
connectionId: input.connectionId,
driver: 'postgres',
sqlAvailable: true,
queryCount: 4,
tables: [],
columns: input.columns,
warnings: [],
},
null,
2,
)}\n`,
'ktx',
'ktx@example.com',
'Seed relationship profile',
);
}
it('returns matches and non-authoritative misses across configured connections', async () => {
await seedProfile({
connectionId: 'warehouse',
syncId: 'sync-1',
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 3,
sampleValues: ['paid', 'refunded', 'pending'],
},
},
});
await seedProfile({
connectionId: 'billing',
syncId: 'sync-2',
columns: {
'customers.name': {
table: { catalog: null, db: 'public', name: 'customers' },
column: 'name',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 4,
sampleValues: ['Acme Corp', 'Globex'],
},
},
});
const service = createKtxDictionarySearchService(project);
await expect(service.search({ values: ['PAID', 'missing'] })).resolves.toEqual({
searched: [
{
connectionId: 'billing',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 1,
syncId: 'sync-2',
profiledAt: null,
},
status: 'ready',
},
{
connectionId: 'warehouse',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 1,
syncId: 'sync-1',
profiledAt: null,
},
status: 'ready',
},
],
results: [
{
value: 'PAID',
matches: [
{
connectionId: 'warehouse',
sourceName: 'orders',
columnName: 'status',
matchedValue: 'paid',
cardinality: 3,
},
],
misses: [{ connectionId: 'billing', reason: 'value_not_in_sample' }],
},
{
value: 'missing',
matches: [],
misses: [
{ connectionId: 'billing', reason: 'value_not_in_sample' },
{ connectionId: 'warehouse', reason: 'value_not_in_sample' },
],
},
],
});
});
it('distinguishes missing profile artifacts from profiles with no candidate columns', async () => {
await seedProfile({
connectionId: 'billing',
syncId: 'sync-empty',
columns: {
'events.id': {
table: { catalog: null, db: 'public', name: 'events' },
column: 'id',
nativeType: 'integer',
normalizedType: 'integer',
distinctCount: 100,
sampleValues: [1, 2, 3],
},
},
});
const service = createKtxDictionarySearchService(project);
await expect(service.search({ values: ['Acme'] })).resolves.toEqual({
searched: [
{
connectionId: 'billing',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 0,
syncId: 'sync-empty',
profiledAt: null,
},
status: 'no_candidate_columns',
},
{
connectionId: 'warehouse',
coverage: {
sampledRows: null,
valuesPerColumn: null,
profiledColumns: 0,
syncId: null,
profiledAt: null,
},
status: 'no_profile_artifact',
},
],
results: [
{
value: 'Acme',
matches: [],
misses: [
{ connectionId: 'billing', reason: 'no_candidate_columns' },
{ connectionId: 'warehouse', reason: 'no_profile_artifact' },
],
},
],
});
});
it('scopes search to the requested connection', async () => {
await seedProfile({
connectionId: 'warehouse',
syncId: 'sync-1',
columns: {
'orders.status': {
table: { catalog: null, db: 'public', name: 'orders' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 3,
sampleValues: ['paid'],
},
},
});
await seedProfile({
connectionId: 'billing',
syncId: 'sync-2',
columns: {
'invoices.status': {
table: { catalog: null, db: 'public', name: 'invoices' },
column: 'status',
nativeType: 'text',
normalizedType: 'string',
distinctCount: 2,
sampleValues: ['paid'],
},
},
});
const service = createKtxDictionarySearchService(project);
await expect(service.search({ connectionId: 'billing', values: ['paid'] })).resolves.toMatchObject({
searched: [{ connectionId: 'billing', status: 'ready' }],
results: [
{
value: 'paid',
matches: [{ connectionId: 'billing', sourceName: 'invoices', columnName: 'status', matchedValue: 'paid' }],
misses: [],
},
],
});
});
});