ktx/packages/cli/test/context/scan/relationship-name-similarity.test.ts

82 lines
3.2 KiB
TypeScript
Raw Permalink Normal View History

2026-05-10 23:12:26 +02:00
import { describe, expect, it } from 'vitest';
import {
2026-05-10 23:51:24 +02:00
normalizeKtxRelationshipName,
pluralizeKtxRelationshipToken,
singularizeKtxRelationshipToken,
2026-05-10 23:12:26 +02:00
tokenSimilarity,
2026-05-10 23:51:24 +02:00
tokenizeKtxRelationshipName,
test: split cli tests from source tree (#216) * feat(cli): define full warehouse dialect contract * test(cli): keep dialect edge tests focused * fix(cli): stabilize dialect contract foundation * refactor(connectors): own read-only query preparation * refactor(connectors): resolve dialects through registry * refactor(connectors): keep concrete dialect classes internal * chore(workspace): enforce dialect import boundary * refactor(cli): resolve relationship dialect at scan boundary * refactor(cli): use dialect display parsing for entity details * refactor(cli): use dialect display parsing for warehouse catalog * refactor(cli): use dialect SQL in relationship workflows * test(cli): verify solid dialect scan workflow closure * test: split cli tests from source tree * refactor(cli): standardize BigQuery scope listing * feat(sqlite): implement connector scope listing * test(connectors): cover required table listing * feat(cli): add warehouse driver registry * refactor(setup): route scope discovery through driver registry * refactor(cli): route local query execution through driver registry * refactor(historic-sql): route dialect support through driver registry * refactor(cli): test warehouse connections through driver registry * fix(cli): close driver registry type export gaps * Improve setup daemon diagnostics * refactor(setup): centralize rail-prefixed diagnostics + query-history fallback Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput into clack.ts so the setup wizard, managed daemons, and embedding/agent steps share one rail-formatted writer. setup-databases.ts also adds a "disable query history and retry" option when the schema-context build fails and query history is the likely culprit, surfaced via a new failed-query-history-unavailable status. * fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match The setup picker's KtxTableListEntry was a 2-level { schema, name }, so qualifiedTableId always wrote db.name into enabled_tables. When BigQuery, Snowflake, or SQL Server later ran fast ingest, their introspect step filtered the scope set with scopedTableNames(scope, { catalog: projectId|database, db }) — catalog was non-null on the introspect side but null in the scope refs, so every entry was rejected, the live-database adapter staged zero table files, and detect() failed with 'Adapter "live-database" did not recognize fetched source output'. Align the picker boundary with the canonical 3-level KtxTableRef: - Add catalog: string | null to KtxTableListEntry. - BigQuery/Snowflake/SQL Server listTables populate catalog from the resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null. - qualifiedTableId emits catalog.schema.name when catalog is non-null (resolveEnabledTables already accepts the 3-part shape) and schemasFromEnabledTables now goes through parseDottedTableEntry so it recovers the schema correctly from both 2-part and 3-part entries. - Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker reuse. Update listTables expectations in all seven connector tests and the setup / picker test fixtures. Add a picker regression test that covers the catalog-bearing round-trip (save + refine). * fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00
} from '../../../src/context/scan/relationship-name-similarity.js';
2026-05-10 23:12:26 +02:00
describe('relationship name similarity', () => {
it('tokenizes common warehouse naming styles', () => {
2026-05-10 23:51:24 +02:00
expect(normalizeKtxRelationshipName('AlbumId')).toMatchObject({
2026-05-10 23:12:26 +02:00
normalized: 'album_id',
singular: 'album_id',
plural: 'album_ids',
tokens: ['album', 'id'],
});
2026-05-10 23:51:24 +02:00
expect(normalizeKtxRelationshipName('artistID')).toMatchObject({
2026-05-10 23:12:26 +02:00
normalized: 'artist_id',
tokens: ['artist', 'id'],
});
2026-05-10 23:51:24 +02:00
expect(normalizeKtxRelationshipName('SalesLT.CustomerID')).toMatchObject({
2026-05-10 23:12:26 +02:00
normalized: 'sales_lt_customer_id',
singular: 'sales_lt_customer_id',
tokens: ['sales', 'lt', 'customer', 'id'],
});
2026-05-10 23:51:24 +02:00
expect(normalizeKtxRelationshipName('SCREAMING_CUSTOMER_UUID')).toMatchObject({
2026-05-10 23:12:26 +02:00
normalized: 'screaming_customer_uuid',
tokens: ['screaming', 'customer', 'uuid'],
});
2026-05-10 23:51:24 +02:00
expect(normalizeKtxRelationshipName('billing-account-key')).toMatchObject({
2026-05-10 23:12:26 +02:00
normalized: 'billing_account_key',
tokens: ['billing', 'account', 'key'],
});
});
it('removes only leading warehouse layer prefixes', () => {
2026-05-10 23:51:24 +02:00
expect(normalizeKtxRelationshipName('mart__Sales_Accounts')).toMatchObject({
2026-05-10 23:12:26 +02:00
normalized: 'sales_accounts',
singular: 'sales_account',
plural: 'sales_accounts',
tokens: ['sales', 'accounts'],
});
2026-05-10 23:51:24 +02:00
expect(normalizeKtxRelationshipName('dim_users')).toMatchObject({
2026-05-10 23:12:26 +02:00
normalized: 'users',
singular: 'user',
plural: 'users',
tokens: ['users'],
});
2026-05-10 23:51:24 +02:00
expect(normalizeKtxRelationshipName('customer_dim_id')).toMatchObject({
2026-05-10 23:12:26 +02:00
normalized: 'customer_dim_id',
tokens: ['customer', 'dim', 'id'],
});
});
it('folds accents and preserves non-suffix trailing s words', () => {
2026-05-10 23:51:24 +02:00
expect(normalizeKtxRelationshipName('KundénID')).toMatchObject({
2026-05-10 23:12:26 +02:00
normalized: 'kunden_id',
tokens: ['kunden', 'id'],
});
2026-05-10 23:51:24 +02:00
expect(singularizeKtxRelationshipToken('address')).toBe('address');
expect(singularizeKtxRelationshipToken('addresses')).toBe('address');
expect(singularizeKtxRelationshipToken('status')).toBe('status');
expect(pluralizeKtxRelationshipToken('address')).toBe('addresses');
expect(pluralizeKtxRelationshipToken('company')).toBe('companies');
2026-05-10 23:12:26 +02:00
});
it('returns deterministic tokens for direct tokenization calls', () => {
2026-05-10 23:51:24 +02:00
expect(tokenizeKtxRelationshipName('HTTPResponseCode')).toEqual(['http', 'response', 'code']);
expect(tokenizeKtxRelationshipName('customer2AddressID')).toEqual(['customer', '2', 'address', 'id']);
2026-05-10 23:12:26 +02:00
});
it('scores token overlap and ordered suffix similarity', () => {
expect(tokenSimilarity('artist_id', 'artist_id')).toBe(1);
expect(tokenSimilarity('Album.ArtistId', 'ArtistID')).toBeGreaterThanOrEqual(0.74);
expect(tokenSimilarity('customer_account_id', 'account_id')).toBeGreaterThan(
tokenSimilarity('customer_account_id', 'invoice_id'),
);
expect(tokenSimilarity('', 'artist')).toBe(0);
});
});