ktx/packages/cli/test/context/scan/relationship-diagnostics.test.ts
Andrey Avtomonov 56985b7e09
test: split cli tests from source tree (#216)
* feat(cli): define full warehouse dialect contract

* test(cli): keep dialect edge tests focused

* fix(cli): stabilize dialect contract foundation

* refactor(connectors): own read-only query preparation

* refactor(connectors): resolve dialects through registry

* refactor(connectors): keep concrete dialect classes internal

* chore(workspace): enforce dialect import boundary

* refactor(cli): resolve relationship dialect at scan boundary

* refactor(cli): use dialect display parsing for entity details

* refactor(cli): use dialect display parsing for warehouse catalog

* refactor(cli): use dialect SQL in relationship workflows

* test(cli): verify solid dialect scan workflow closure

* test: split cli tests from source tree

* refactor(cli): standardize BigQuery scope listing

* feat(sqlite): implement connector scope listing

* test(connectors): cover required table listing

* feat(cli): add warehouse driver registry

* refactor(setup): route scope discovery through driver registry

* refactor(cli): route local query execution through driver registry

* refactor(historic-sql): route dialect support through driver registry

* refactor(cli): test warehouse connections through driver registry

* fix(cli): close driver registry type export gaps

* Improve setup daemon diagnostics

* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback

Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.

* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match

The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.

Align the picker boundary with the canonical 3-level KtxTableRef:

- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
  resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
  (resolveEnabledTables already accepts the 3-part shape) and
  schemasFromEnabledTables now goes through parseDottedTableEntry so it
  recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
  reuse.

Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).

* fix(cli): allow debug telemetry under opt-out env
2026-05-26 08:49:05 +02:00

373 lines
13 KiB
TypeScript

import { describe, expect, it } from 'vitest';
import type { KtxEnrichedRelationship, KtxRelationshipEndpoint } from '../../../src/context/scan/enrichment-types.js';
import type { KtxResolvedRelationshipDiscoveryCandidate } from '../../../src/context/scan/relationship-graph-resolver.js';
import {
buildKtxRelationshipArtifacts,
buildKtxRelationshipDiagnostics,
emptyKtxRelationshipProfileArtifact,
} from '../../../src/context/scan/relationship-diagnostics.js';
function endpoint(table: string, column: string): KtxRelationshipEndpoint {
return {
tableId: table,
columnIds: [`${table}.${column}`],
table: { catalog: null, db: null, name: table },
columns: [column],
};
}
function enrichedRelationship(input: {
id: string;
fromTable: string;
fromColumn: string;
toTable: string;
toColumn: string;
confidence?: number;
}): KtxEnrichedRelationship {
return {
id: input.id,
source: 'inferred',
from: endpoint(input.fromTable, input.fromColumn),
to: endpoint(input.toTable, input.toColumn),
relationshipType: 'many_to_one',
confidence: input.confidence ?? 0.92,
isPrimaryKeyReference: true,
};
}
function resolvedRelationship(input: {
id: string;
status: 'accepted' | 'review' | 'rejected';
source?: 'normalized_table_match' | 'exact_column_match' | 'inflection' | 'self_reference' | 'llm_proposal';
fkScore?: number;
pkScore?: number;
validationReasons?: string[];
graphReasons?: string[];
}): KtxResolvedRelationshipDiscoveryCandidate {
return {
id: input.id,
from: endpoint('orders', 'customer_id'),
to: endpoint('customers', 'id'),
relationshipType: 'many_to_one',
confidence: 0.88,
source: input.source ?? 'normalized_table_match',
status: input.status,
evidence:
input.source === 'llm_proposal'
? {
sourceColumnBase: 'buyer',
targetTableBase: 'customer',
targetColumnBase: 'id',
targetKeyScore: 0.88,
nameScore: 0.45,
reasons: ['llm_proposal', 'llm_pk_proposal'],
llmConfidence: 0.89,
llmRationale: 'Buyer reference values align with customer identifiers.',
}
: {
sourceColumnBase: 'customer',
targetTableBase: 'customer',
targetColumnBase: 'id',
targetKeyScore: 0.9,
nameScore: 0.85,
reasons: ['table_name_matches_source_column'],
},
score: 0.91,
validation: {
targetUniqueness: 1,
sourceCoverage: input.status === 'rejected' ? 0.2 : 1,
violationCount: input.status === 'rejected' ? 8 : 0,
violationRatio: input.status === 'rejected' ? 0.8 : 0,
sourceNullRate: 0,
targetNullRate: 0,
childDistinct: 10,
parentDistinct: 10,
overlap: input.status === 'rejected' ? 2 : 10,
checkedValues: 10,
reasons: input.validationReasons ?? ['validation_passed'],
},
pkScore: input.pkScore ?? 0.97,
fkScore: input.fkScore ?? 0.94,
graph: {
targetPkScore: input.pkScore ?? 0.97,
incomingCandidateCount: 1,
conflictRank: 1,
reasons: input.graphReasons ?? ['target_pk_score_passed', 'fk_score_passed'],
},
};
}
describe('relationship diagnostics artifacts', () => {
it('groups graph-resolved relationships and preserves evidence reasons', () => {
const artifacts = buildKtxRelationshipArtifacts({
connectionId: 'warehouse',
resolvedRelationships: [
resolvedRelationship({ id: 'accepted-edge', status: 'accepted', source: 'llm_proposal' }),
resolvedRelationship({
id: 'review-edge',
status: 'review',
validationReasons: ['validation_unavailable'],
graphReasons: ['validation_unavailable_review_only', 'fk_score_review'],
}),
resolvedRelationship({
id: 'rejected-edge',
status: 'rejected',
validationReasons: ['low_source_coverage'],
graphReasons: ['fk_score_rejected'],
}),
],
});
expect(artifacts.accepted).toHaveLength(1);
expect(artifacts.accepted[0]).toMatchObject({
source: 'llm_proposal',
evidence: {
llmConfidence: 0.89,
llmRationale: 'Buyer reference values align with customer identifiers.',
},
reasons: expect.arrayContaining(['llm_proposal', 'llm_pk_proposal']),
});
expect(artifacts.review).toHaveLength(1);
expect(artifacts.rejected).toHaveLength(1);
expect(artifacts.review[0]).toMatchObject({
id: 'review-edge',
status: 'review',
source: 'normalized_table_match',
fkScore: 0.94,
reasons: expect.arrayContaining(['validation_unavailable', 'validation_unavailable_review_only']),
});
expect(artifacts.rejected[0]?.reasons).toEqual(
expect.arrayContaining(['table_name_matches_source_column', 'low_source_coverage', 'fk_score_rejected']),
);
});
it('adapts relationship updates into the artifact shape', () => {
const artifacts = buildKtxRelationshipArtifacts({
connectionId: 'warehouse',
relationshipUpdate: {
connectionId: 'warehouse',
accepted: [
enrichedRelationship({
id: 'orders-customer',
fromTable: 'orders',
fromColumn: 'customer_id',
toTable: 'customers',
toColumn: 'id',
}),
],
rejected: [
enrichedRelationship({
id: 'orders-account',
fromTable: 'orders',
fromColumn: 'account_id',
toTable: 'accounts',
toColumn: 'id',
confidence: 0.4,
}),
],
skipped: [{ relationshipId: 'orders-region', reason: 'validation_port_unavailable' }],
},
});
expect(artifacts.accepted[0]).toMatchObject({
id: 'orders-customer',
status: 'accepted',
source: 'inferred',
reasons: ['accepted_relationship_update'],
});
expect(artifacts.rejected[0]).toMatchObject({
id: 'orders-account',
status: 'rejected',
reasons: ['rejected_relationship_update'],
});
expect(artifacts.skipped).toEqual([{ relationshipId: 'orders-region', reason: 'validation_port_unavailable' }]);
});
it('deduplicates resolved and formal relationship update artifacts by edge id', () => {
const artifacts = buildKtxRelationshipArtifacts({
connectionId: 'warehouse',
resolvedRelationships: [
{
id: 'orders:orders.account_id->accounts:accounts.id',
from: endpoint('orders', 'account_id'),
to: endpoint('accounts', 'id'),
relationshipType: 'many_to_one',
source: 'normalized_table_match',
status: 'accepted',
confidence: 0.92,
score: 0.9,
pkScore: 0.92,
fkScore: 0.9,
evidence: {
sourceColumnBase: 'account',
targetTableBase: 'account',
targetColumnBase: 'id',
targetKeyScore: 0.92,
nameScore: 0.92,
reasons: ['foreign_key_suffix'],
},
validation: {
targetUniqueness: 1,
sourceCoverage: 1,
violationCount: 0,
violationRatio: 0,
sourceNullRate: 0,
targetNullRate: 0,
childDistinct: 2,
parentDistinct: 2,
overlap: 2,
checkedValues: 2,
reasons: ['validation_passed'],
},
graph: {
targetPkScore: 0.92,
incomingCandidateCount: 1,
conflictRank: 1,
reasons: ['fk_score_passed'],
},
},
],
relationshipUpdate: {
connectionId: 'warehouse',
accepted: [
{
id: 'orders:orders.account_id->accounts:accounts.id',
source: 'formal',
from: endpoint('orders', 'account_id'),
to: endpoint('accounts', 'id'),
relationshipType: 'many_to_one',
confidence: 1,
isPrimaryKeyReference: true,
},
],
rejected: [],
skipped: [],
},
});
expect(artifacts.accepted).toHaveLength(1);
expect(artifacts.accepted[0]).toMatchObject({
id: 'orders:orders.account_id->accounts:accounts.id',
source: 'normalized_table_match',
reasons: expect.arrayContaining(['foreign_key_suffix', 'validation_passed', 'fk_score_passed']),
});
});
it('explains validation-unavailable review candidates', () => {
const artifacts = buildKtxRelationshipArtifacts({
connectionId: 'warehouse',
resolvedRelationships: [
resolvedRelationship({
id: 'review-edge',
status: 'review',
validationReasons: ['validation_unavailable'],
graphReasons: ['validation_unavailable_review_only'],
}),
],
});
const profile = emptyKtxRelationshipProfileArtifact({
connectionId: 'warehouse',
driver: 'sqlite',
reason: 'read_only_sql_unavailable',
});
const diagnostics = buildKtxRelationshipDiagnostics({
connectionId: 'warehouse',
generatedAt: '2026-05-07T12:00:00.000Z',
artifacts,
profile,
warnings: [
{
code: 'connector_capability_missing',
message: 'KTX scan connector cannot run standalone statistical relationship validation',
recoverable: true,
metadata: { capability: 'readOnlySql' },
},
],
thresholds: { acceptThreshold: 0.85, reviewThreshold: 0.55 },
});
expect(diagnostics.summary).toEqual({ accepted: 0, review: 1, rejected: 0, skipped: 0 });
expect(diagnostics.noAcceptedReason).toBe('validation unavailable; review candidates written');
expect(diagnostics.candidateCountsBySource).toEqual({ normalized_table_match: 1 });
expect(diagnostics.validation).toEqual({
available: false,
sqlAvailable: false,
queryCount: 0,
});
expect(diagnostics.profileWarnings).toEqual(['read_only_sql_unavailable']);
expect(diagnostics.warnings[0]).toMatchObject({ code: 'connector_capability_missing' });
});
it('explains empty relationship output as a no-candidate outcome', () => {
const artifacts = buildKtxRelationshipArtifacts({ connectionId: 'warehouse' });
const diagnostics = buildKtxRelationshipDiagnostics({
connectionId: 'warehouse',
generatedAt: '2026-05-07T12:00:00.000Z',
artifacts,
profile: emptyKtxRelationshipProfileArtifact({
connectionId: 'warehouse',
driver: 'sqlite',
reason: 'relationship_profiling_not_run',
}),
});
expect(diagnostics.summary).toEqual({ accepted: 0, review: 0, rejected: 0, skipped: 0 });
expect(diagnostics.noAcceptedReason).toBe('no candidate pairs passed type compatibility');
expect(diagnostics.candidateCountsBySource).toEqual({});
});
it('records composite relationship endpoints in relationship artifacts', () => {
const artifacts = buildKtxRelationshipArtifacts({
connectionId: 'warehouse',
compositeRelationships: [
{
id: 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)',
source: 'composite_profile_match',
status: 'accepted',
from: {
tableId: 'order_line_allocations',
columnIds: ['order_line_allocations.order_id', 'order_line_allocations.line_number'],
table: { catalog: null, db: null, name: 'order_line_allocations' },
columns: ['order_id', 'line_number'],
},
to: {
tableId: 'order_lines',
columnIds: ['order_lines.order_id', 'order_lines.line_number'],
table: { catalog: null, db: null, name: 'order_lines' },
columns: ['order_id', 'line_number'],
},
relationshipType: 'many_to_one',
confidence: 0.95,
validation: {
targetUniqueness: 1,
sourceCoverage: 1,
violationCount: 0,
violationRatio: 0,
childDistinct: 2,
parentDistinct: 2,
overlap: 2,
reasons: ['composite_validation_passed'],
},
},
],
});
expect(artifacts.accepted).toEqual([
expect.objectContaining({
id: 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)',
source: 'composite_profile_match',
from: expect.objectContaining({
columnIds: ['order_line_allocations.order_id', 'order_line_allocations.line_number'],
columns: ['order_id', 'line_number'],
}),
to: expect.objectContaining({
columnIds: ['order_lines.order_id', 'order_lines.line_number'],
columns: ['order_id', 'line_number'],
}),
reasons: ['composite_validation_passed'],
validation: expect.objectContaining({ sourceCoverage: 1 }),
}),
]);
});
});