mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
* feat(cli): define full warehouse dialect contract
* test(cli): keep dialect edge tests focused
* fix(cli): stabilize dialect contract foundation
* refactor(connectors): own read-only query preparation
* refactor(connectors): resolve dialects through registry
* refactor(connectors): keep concrete dialect classes internal
* chore(workspace): enforce dialect import boundary
* refactor(cli): resolve relationship dialect at scan boundary
* refactor(cli): use dialect display parsing for entity details
* refactor(cli): use dialect display parsing for warehouse catalog
* refactor(cli): use dialect SQL in relationship workflows
* test(cli): verify solid dialect scan workflow closure
* test: split cli tests from source tree
* refactor(cli): standardize BigQuery scope listing
* feat(sqlite): implement connector scope listing
* test(connectors): cover required table listing
* feat(cli): add warehouse driver registry
* refactor(setup): route scope discovery through driver registry
* refactor(cli): route local query execution through driver registry
* refactor(historic-sql): route dialect support through driver registry
* refactor(cli): test warehouse connections through driver registry
* fix(cli): close driver registry type export gaps
* Improve setup daemon diagnostics
* refactor(setup): centralize rail-prefixed diagnostics + query-history fallback
Extract errorMessage, writePrefixedLines, and flushPrefixedBufferedCommandOutput
into clack.ts so the setup wizard, managed daemons, and embedding/agent steps
share one rail-formatted writer. setup-databases.ts also adds a
"disable query history and retry" option when the schema-context build fails
and query history is the likely culprit, surfaced via a new
failed-query-history-unavailable status.
* fix(cli): carry catalog through the picker so BigQuery/Snowflake/SQL Server scope filters match
The setup picker's KtxTableListEntry was a 2-level { schema, name }, so
qualifiedTableId always wrote db.name into enabled_tables. When BigQuery,
Snowflake, or SQL Server later ran fast ingest, their introspect step filtered
the scope set with scopedTableNames(scope, { catalog: projectId|database, db })
— catalog was non-null on the introspect side but null in the scope refs, so
every entry was rejected, the live-database adapter staged zero table files,
and detect() failed with 'Adapter "live-database" did not recognize fetched
source output'.
Align the picker boundary with the canonical 3-level KtxTableRef:
- Add catalog: string | null to KtxTableListEntry.
- BigQuery/Snowflake/SQL Server listTables populate catalog from the
resolved projectId / database; Postgres/MySQL/ClickHouse/SQLite set null.
- qualifiedTableId emits catalog.schema.name when catalog is non-null
(resolveEnabledTables already accepts the 3-part shape) and
schemasFromEnabledTables now goes through parseDottedTableEntry so it
recovers the schema correctly from both 2-part and 3-part entries.
- Export parseDottedTableEntry from enabled-tables.ts (@internal) for picker
reuse.
Update listTables expectations in all seven connector tests and the setup /
picker test fixtures. Add a picker regression test that covers the
catalog-bearing round-trip (save + refine).
* fix(cli): allow debug telemetry under opt-out env
373 lines
13 KiB
TypeScript
373 lines
13 KiB
TypeScript
import { describe, expect, it } from 'vitest';
|
|
import type { KtxEnrichedRelationship, KtxRelationshipEndpoint } from '../../../src/context/scan/enrichment-types.js';
|
|
import type { KtxResolvedRelationshipDiscoveryCandidate } from '../../../src/context/scan/relationship-graph-resolver.js';
|
|
import {
|
|
buildKtxRelationshipArtifacts,
|
|
buildKtxRelationshipDiagnostics,
|
|
emptyKtxRelationshipProfileArtifact,
|
|
} from '../../../src/context/scan/relationship-diagnostics.js';
|
|
|
|
function endpoint(table: string, column: string): KtxRelationshipEndpoint {
|
|
return {
|
|
tableId: table,
|
|
columnIds: [`${table}.${column}`],
|
|
table: { catalog: null, db: null, name: table },
|
|
columns: [column],
|
|
};
|
|
}
|
|
|
|
function enrichedRelationship(input: {
|
|
id: string;
|
|
fromTable: string;
|
|
fromColumn: string;
|
|
toTable: string;
|
|
toColumn: string;
|
|
confidence?: number;
|
|
}): KtxEnrichedRelationship {
|
|
return {
|
|
id: input.id,
|
|
source: 'inferred',
|
|
from: endpoint(input.fromTable, input.fromColumn),
|
|
to: endpoint(input.toTable, input.toColumn),
|
|
relationshipType: 'many_to_one',
|
|
confidence: input.confidence ?? 0.92,
|
|
isPrimaryKeyReference: true,
|
|
};
|
|
}
|
|
|
|
function resolvedRelationship(input: {
|
|
id: string;
|
|
status: 'accepted' | 'review' | 'rejected';
|
|
source?: 'normalized_table_match' | 'exact_column_match' | 'inflection' | 'self_reference' | 'llm_proposal';
|
|
fkScore?: number;
|
|
pkScore?: number;
|
|
validationReasons?: string[];
|
|
graphReasons?: string[];
|
|
}): KtxResolvedRelationshipDiscoveryCandidate {
|
|
return {
|
|
id: input.id,
|
|
from: endpoint('orders', 'customer_id'),
|
|
to: endpoint('customers', 'id'),
|
|
relationshipType: 'many_to_one',
|
|
confidence: 0.88,
|
|
source: input.source ?? 'normalized_table_match',
|
|
status: input.status,
|
|
evidence:
|
|
input.source === 'llm_proposal'
|
|
? {
|
|
sourceColumnBase: 'buyer',
|
|
targetTableBase: 'customer',
|
|
targetColumnBase: 'id',
|
|
targetKeyScore: 0.88,
|
|
nameScore: 0.45,
|
|
reasons: ['llm_proposal', 'llm_pk_proposal'],
|
|
llmConfidence: 0.89,
|
|
llmRationale: 'Buyer reference values align with customer identifiers.',
|
|
}
|
|
: {
|
|
sourceColumnBase: 'customer',
|
|
targetTableBase: 'customer',
|
|
targetColumnBase: 'id',
|
|
targetKeyScore: 0.9,
|
|
nameScore: 0.85,
|
|
reasons: ['table_name_matches_source_column'],
|
|
},
|
|
score: 0.91,
|
|
validation: {
|
|
targetUniqueness: 1,
|
|
sourceCoverage: input.status === 'rejected' ? 0.2 : 1,
|
|
violationCount: input.status === 'rejected' ? 8 : 0,
|
|
violationRatio: input.status === 'rejected' ? 0.8 : 0,
|
|
sourceNullRate: 0,
|
|
targetNullRate: 0,
|
|
childDistinct: 10,
|
|
parentDistinct: 10,
|
|
overlap: input.status === 'rejected' ? 2 : 10,
|
|
checkedValues: 10,
|
|
reasons: input.validationReasons ?? ['validation_passed'],
|
|
},
|
|
pkScore: input.pkScore ?? 0.97,
|
|
fkScore: input.fkScore ?? 0.94,
|
|
graph: {
|
|
targetPkScore: input.pkScore ?? 0.97,
|
|
incomingCandidateCount: 1,
|
|
conflictRank: 1,
|
|
reasons: input.graphReasons ?? ['target_pk_score_passed', 'fk_score_passed'],
|
|
},
|
|
};
|
|
}
|
|
|
|
describe('relationship diagnostics artifacts', () => {
|
|
it('groups graph-resolved relationships and preserves evidence reasons', () => {
|
|
const artifacts = buildKtxRelationshipArtifacts({
|
|
connectionId: 'warehouse',
|
|
resolvedRelationships: [
|
|
resolvedRelationship({ id: 'accepted-edge', status: 'accepted', source: 'llm_proposal' }),
|
|
resolvedRelationship({
|
|
id: 'review-edge',
|
|
status: 'review',
|
|
validationReasons: ['validation_unavailable'],
|
|
graphReasons: ['validation_unavailable_review_only', 'fk_score_review'],
|
|
}),
|
|
resolvedRelationship({
|
|
id: 'rejected-edge',
|
|
status: 'rejected',
|
|
validationReasons: ['low_source_coverage'],
|
|
graphReasons: ['fk_score_rejected'],
|
|
}),
|
|
],
|
|
});
|
|
|
|
expect(artifacts.accepted).toHaveLength(1);
|
|
expect(artifacts.accepted[0]).toMatchObject({
|
|
source: 'llm_proposal',
|
|
evidence: {
|
|
llmConfidence: 0.89,
|
|
llmRationale: 'Buyer reference values align with customer identifiers.',
|
|
},
|
|
reasons: expect.arrayContaining(['llm_proposal', 'llm_pk_proposal']),
|
|
});
|
|
expect(artifacts.review).toHaveLength(1);
|
|
expect(artifacts.rejected).toHaveLength(1);
|
|
expect(artifacts.review[0]).toMatchObject({
|
|
id: 'review-edge',
|
|
status: 'review',
|
|
source: 'normalized_table_match',
|
|
fkScore: 0.94,
|
|
reasons: expect.arrayContaining(['validation_unavailable', 'validation_unavailable_review_only']),
|
|
});
|
|
expect(artifacts.rejected[0]?.reasons).toEqual(
|
|
expect.arrayContaining(['table_name_matches_source_column', 'low_source_coverage', 'fk_score_rejected']),
|
|
);
|
|
});
|
|
|
|
it('adapts relationship updates into the artifact shape', () => {
|
|
const artifacts = buildKtxRelationshipArtifacts({
|
|
connectionId: 'warehouse',
|
|
relationshipUpdate: {
|
|
connectionId: 'warehouse',
|
|
accepted: [
|
|
enrichedRelationship({
|
|
id: 'orders-customer',
|
|
fromTable: 'orders',
|
|
fromColumn: 'customer_id',
|
|
toTable: 'customers',
|
|
toColumn: 'id',
|
|
}),
|
|
],
|
|
rejected: [
|
|
enrichedRelationship({
|
|
id: 'orders-account',
|
|
fromTable: 'orders',
|
|
fromColumn: 'account_id',
|
|
toTable: 'accounts',
|
|
toColumn: 'id',
|
|
confidence: 0.4,
|
|
}),
|
|
],
|
|
skipped: [{ relationshipId: 'orders-region', reason: 'validation_port_unavailable' }],
|
|
},
|
|
});
|
|
|
|
expect(artifacts.accepted[0]).toMatchObject({
|
|
id: 'orders-customer',
|
|
status: 'accepted',
|
|
source: 'inferred',
|
|
reasons: ['accepted_relationship_update'],
|
|
});
|
|
expect(artifacts.rejected[0]).toMatchObject({
|
|
id: 'orders-account',
|
|
status: 'rejected',
|
|
reasons: ['rejected_relationship_update'],
|
|
});
|
|
expect(artifacts.skipped).toEqual([{ relationshipId: 'orders-region', reason: 'validation_port_unavailable' }]);
|
|
});
|
|
|
|
it('deduplicates resolved and formal relationship update artifacts by edge id', () => {
|
|
const artifacts = buildKtxRelationshipArtifacts({
|
|
connectionId: 'warehouse',
|
|
resolvedRelationships: [
|
|
{
|
|
id: 'orders:orders.account_id->accounts:accounts.id',
|
|
from: endpoint('orders', 'account_id'),
|
|
to: endpoint('accounts', 'id'),
|
|
relationshipType: 'many_to_one',
|
|
source: 'normalized_table_match',
|
|
status: 'accepted',
|
|
confidence: 0.92,
|
|
score: 0.9,
|
|
pkScore: 0.92,
|
|
fkScore: 0.9,
|
|
evidence: {
|
|
sourceColumnBase: 'account',
|
|
targetTableBase: 'account',
|
|
targetColumnBase: 'id',
|
|
targetKeyScore: 0.92,
|
|
nameScore: 0.92,
|
|
reasons: ['foreign_key_suffix'],
|
|
},
|
|
validation: {
|
|
targetUniqueness: 1,
|
|
sourceCoverage: 1,
|
|
violationCount: 0,
|
|
violationRatio: 0,
|
|
sourceNullRate: 0,
|
|
targetNullRate: 0,
|
|
childDistinct: 2,
|
|
parentDistinct: 2,
|
|
overlap: 2,
|
|
checkedValues: 2,
|
|
reasons: ['validation_passed'],
|
|
},
|
|
graph: {
|
|
targetPkScore: 0.92,
|
|
incomingCandidateCount: 1,
|
|
conflictRank: 1,
|
|
reasons: ['fk_score_passed'],
|
|
},
|
|
},
|
|
],
|
|
relationshipUpdate: {
|
|
connectionId: 'warehouse',
|
|
accepted: [
|
|
{
|
|
id: 'orders:orders.account_id->accounts:accounts.id',
|
|
source: 'formal',
|
|
from: endpoint('orders', 'account_id'),
|
|
to: endpoint('accounts', 'id'),
|
|
relationshipType: 'many_to_one',
|
|
confidence: 1,
|
|
isPrimaryKeyReference: true,
|
|
},
|
|
],
|
|
rejected: [],
|
|
skipped: [],
|
|
},
|
|
});
|
|
|
|
expect(artifacts.accepted).toHaveLength(1);
|
|
expect(artifacts.accepted[0]).toMatchObject({
|
|
id: 'orders:orders.account_id->accounts:accounts.id',
|
|
source: 'normalized_table_match',
|
|
reasons: expect.arrayContaining(['foreign_key_suffix', 'validation_passed', 'fk_score_passed']),
|
|
});
|
|
});
|
|
|
|
it('explains validation-unavailable review candidates', () => {
|
|
const artifacts = buildKtxRelationshipArtifacts({
|
|
connectionId: 'warehouse',
|
|
resolvedRelationships: [
|
|
resolvedRelationship({
|
|
id: 'review-edge',
|
|
status: 'review',
|
|
validationReasons: ['validation_unavailable'],
|
|
graphReasons: ['validation_unavailable_review_only'],
|
|
}),
|
|
],
|
|
});
|
|
const profile = emptyKtxRelationshipProfileArtifact({
|
|
connectionId: 'warehouse',
|
|
driver: 'sqlite',
|
|
reason: 'read_only_sql_unavailable',
|
|
});
|
|
|
|
const diagnostics = buildKtxRelationshipDiagnostics({
|
|
connectionId: 'warehouse',
|
|
generatedAt: '2026-05-07T12:00:00.000Z',
|
|
artifacts,
|
|
profile,
|
|
warnings: [
|
|
{
|
|
code: 'connector_capability_missing',
|
|
message: 'KTX scan connector cannot run standalone statistical relationship validation',
|
|
recoverable: true,
|
|
metadata: { capability: 'readOnlySql' },
|
|
},
|
|
],
|
|
thresholds: { acceptThreshold: 0.85, reviewThreshold: 0.55 },
|
|
});
|
|
|
|
expect(diagnostics.summary).toEqual({ accepted: 0, review: 1, rejected: 0, skipped: 0 });
|
|
expect(diagnostics.noAcceptedReason).toBe('validation unavailable; review candidates written');
|
|
expect(diagnostics.candidateCountsBySource).toEqual({ normalized_table_match: 1 });
|
|
expect(diagnostics.validation).toEqual({
|
|
available: false,
|
|
sqlAvailable: false,
|
|
queryCount: 0,
|
|
});
|
|
expect(diagnostics.profileWarnings).toEqual(['read_only_sql_unavailable']);
|
|
expect(diagnostics.warnings[0]).toMatchObject({ code: 'connector_capability_missing' });
|
|
});
|
|
|
|
it('explains empty relationship output as a no-candidate outcome', () => {
|
|
const artifacts = buildKtxRelationshipArtifacts({ connectionId: 'warehouse' });
|
|
const diagnostics = buildKtxRelationshipDiagnostics({
|
|
connectionId: 'warehouse',
|
|
generatedAt: '2026-05-07T12:00:00.000Z',
|
|
artifacts,
|
|
profile: emptyKtxRelationshipProfileArtifact({
|
|
connectionId: 'warehouse',
|
|
driver: 'sqlite',
|
|
reason: 'relationship_profiling_not_run',
|
|
}),
|
|
});
|
|
|
|
expect(diagnostics.summary).toEqual({ accepted: 0, review: 0, rejected: 0, skipped: 0 });
|
|
expect(diagnostics.noAcceptedReason).toBe('no candidate pairs passed type compatibility');
|
|
expect(diagnostics.candidateCountsBySource).toEqual({});
|
|
});
|
|
|
|
it('records composite relationship endpoints in relationship artifacts', () => {
|
|
const artifacts = buildKtxRelationshipArtifacts({
|
|
connectionId: 'warehouse',
|
|
compositeRelationships: [
|
|
{
|
|
id: 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)',
|
|
source: 'composite_profile_match',
|
|
status: 'accepted',
|
|
from: {
|
|
tableId: 'order_line_allocations',
|
|
columnIds: ['order_line_allocations.order_id', 'order_line_allocations.line_number'],
|
|
table: { catalog: null, db: null, name: 'order_line_allocations' },
|
|
columns: ['order_id', 'line_number'],
|
|
},
|
|
to: {
|
|
tableId: 'order_lines',
|
|
columnIds: ['order_lines.order_id', 'order_lines.line_number'],
|
|
table: { catalog: null, db: null, name: 'order_lines' },
|
|
columns: ['order_id', 'line_number'],
|
|
},
|
|
relationshipType: 'many_to_one',
|
|
confidence: 0.95,
|
|
validation: {
|
|
targetUniqueness: 1,
|
|
sourceCoverage: 1,
|
|
violationCount: 0,
|
|
violationRatio: 0,
|
|
childDistinct: 2,
|
|
parentDistinct: 2,
|
|
overlap: 2,
|
|
reasons: ['composite_validation_passed'],
|
|
},
|
|
},
|
|
],
|
|
});
|
|
|
|
expect(artifacts.accepted).toEqual([
|
|
expect.objectContaining({
|
|
id: 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)',
|
|
source: 'composite_profile_match',
|
|
from: expect.objectContaining({
|
|
columnIds: ['order_line_allocations.order_id', 'order_line_allocations.line_number'],
|
|
columns: ['order_id', 'line_number'],
|
|
}),
|
|
to: expect.objectContaining({
|
|
columnIds: ['order_lines.order_id', 'order_lines.line_number'],
|
|
columns: ['order_id', 'line_number'],
|
|
}),
|
|
reasons: ['composite_validation_passed'],
|
|
validation: expect.objectContaining({ sourceCoverage: 1 }),
|
|
}),
|
|
]);
|
|
});
|
|
});
|