mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-16 08:25:14 +02:00
Align the tree with AGENTS.md/CLAUDE.md conventions: - Rewrite user-facing strings, docs, and tests to lowercase `ktx` (no bare uppercase `KTX` tokens remain outside literal identifiers). - Drop the legacy `historicSql` migration path and its now-unused helpers, per the no-backward-compat rule. - Remove `as unknown as` / `any` casts: narrow `BaseTool` generics to `z.ZodObject`, add a typed `createLookerClient`, and delete the dead `getParametersSchema`/`toAnthropicFormat` pre-AI-SDK helpers. - Use `InvalidArgumentError` for Commander parse failures. - Finish the adapter→connector prose conversion in the `ktx.yaml` docs while keeping the literal `adapters` config key.
451 lines
16 KiB
TypeScript
451 lines
16 KiB
TypeScript
import { describe, expect, it } from 'vitest';
|
|
import {
|
|
buildKtxRelationshipBenchmarkReport,
|
|
formatKtxRelationshipBenchmarkReportMarkdown,
|
|
} from '../../../src/context/scan/relationship-benchmark-report.js';
|
|
import type {
|
|
KtxRelationshipBenchmarkCaseResult,
|
|
KtxRelationshipBenchmarkFixture,
|
|
KtxRelationshipBenchmarkSuiteResult,
|
|
} from '../../../src/context/scan/relationship-benchmarks.js';
|
|
|
|
type CaseResultOverrides = Omit<Partial<KtxRelationshipBenchmarkCaseResult>, 'metrics'> & {
|
|
metrics?: Partial<KtxRelationshipBenchmarkCaseResult['metrics']>;
|
|
};
|
|
|
|
function caseResult(overrides: CaseResultOverrides = {}): KtxRelationshipBenchmarkCaseResult {
|
|
return {
|
|
fixtureId: overrides.fixtureId ?? 'demo_b2b_no_declared_constraints',
|
|
mode: overrides.mode ?? 'declared_pks_and_declared_fks_removed',
|
|
metrics: {
|
|
pkPrecision: 1,
|
|
pkRecall: 0.5,
|
|
pkF1: 0.6666666666666666,
|
|
fkPrecision: 1,
|
|
fkRecall: 1,
|
|
fkF1: 1,
|
|
acceptedFalsePositiveCount: 0,
|
|
reviewRecall: 0,
|
|
acceptedOrReviewRecall: 1,
|
|
runtimeSeconds: 0.012345,
|
|
sqlQueries: 14,
|
|
llmCalls: 0,
|
|
...(overrides.metrics ?? {}),
|
|
},
|
|
expected: overrides.expected ?? {
|
|
pk: ['accounts.(id)', 'users.(id)'],
|
|
fk: ['users.(account_id)->accounts.(id)'],
|
|
},
|
|
predicted: overrides.predicted ?? {
|
|
pk: ['accounts.(id)'],
|
|
fk: ['users.(account_id)->accounts.(id)'],
|
|
acceptedFk: ['users.(account_id)->accounts.(id)'],
|
|
reviewFk: [],
|
|
},
|
|
falsePositives: overrides.falsePositives ?? { pk: [], fk: [] },
|
|
falseNegatives: overrides.falseNegatives ?? { pk: ['users.(id)'], fk: [] },
|
|
skippedComposite: overrides.skippedComposite ?? { pk: [], fk: [] },
|
|
validationBlocked: overrides.validationBlocked ?? false,
|
|
};
|
|
}
|
|
|
|
function fixture(overrides: Partial<KtxRelationshipBenchmarkFixture> = {}): KtxRelationshipBenchmarkFixture {
|
|
return {
|
|
id: overrides.id ?? 'demo_b2b_no_declared_constraints',
|
|
name: overrides.name ?? 'Packaged B2B demo with declared PK and FK metadata masked',
|
|
tier: overrides.tier ?? 'smoke',
|
|
origin: overrides.origin ?? 'synthetic',
|
|
thresholdEligible: overrides.thresholdEligible,
|
|
validationBudget: overrides.validationBudget,
|
|
snapshot: overrides.snapshot ?? {
|
|
connectionId: 'demo_b2b',
|
|
driver: 'sqlite',
|
|
extractedAt: '2026-05-07T00:00:00.000Z',
|
|
scope: {},
|
|
metadata: {},
|
|
tables: [],
|
|
},
|
|
expected: overrides.expected ?? { expectedPks: [], expectedLinks: [] },
|
|
defaultModes: overrides.defaultModes ?? ['declared_pks_and_declared_fks_removed', 'validation_disabled'],
|
|
dataPath: overrides.dataPath ?? '/tmp/demo.sqlite',
|
|
columnEmbeddings: overrides.columnEmbeddings ?? {},
|
|
};
|
|
}
|
|
|
|
describe('relationship benchmark report', () => {
|
|
it('classifies run, validation-blocked, and not-run benchmark cases', () => {
|
|
const suite: KtxRelationshipBenchmarkSuiteResult = {
|
|
cases: [
|
|
caseResult(),
|
|
caseResult({
|
|
mode: 'validation_disabled',
|
|
validationBlocked: true,
|
|
metrics: { fkRecall: 0, acceptedOrReviewRecall: 1, sqlQueries: 0 },
|
|
predicted: {
|
|
pk: ['accounts.(id)'],
|
|
fk: ['users.(account_id)->accounts.(id)'],
|
|
acceptedFk: [],
|
|
reviewFk: ['users.(account_id)->accounts.(id)'],
|
|
},
|
|
}),
|
|
],
|
|
validationBlockedCases: ['demo_b2b_no_declared_constraints:validation_disabled'],
|
|
aggregate: {
|
|
caseCount: 2,
|
|
headlineCaseCount: 1,
|
|
headlinePkRecall: 0.5,
|
|
headlineFkRecall: 1,
|
|
headlineAcceptedOrReviewRecall: 1,
|
|
meanPkRecall: 0.5,
|
|
meanFkRecall: 0.5,
|
|
meanAcceptedOrReviewRecall: 1,
|
|
},
|
|
};
|
|
|
|
const report = buildKtxRelationshipBenchmarkReport({
|
|
fixtures: [fixture()],
|
|
suite,
|
|
modes: ['declared_pks_and_declared_fks_removed', 'validation_disabled', 'profiling_disabled'],
|
|
});
|
|
|
|
expect(report.headline).toEqual({
|
|
caseCount: 2,
|
|
headlineCaseCount: 1,
|
|
headlinePkRecall: 0.5,
|
|
headlineFkRecall: 1,
|
|
headlineAcceptedOrReviewRecall: 1,
|
|
acceptedFalsePositiveCount: 0,
|
|
validationBlockedCount: 1,
|
|
});
|
|
expect(report.cases.map((item) => `${item.fixtureId}:${item.mode}:${item.status}`)).toEqual([
|
|
'demo_b2b_no_declared_constraints:declared_pks_and_declared_fks_removed:run',
|
|
'demo_b2b_no_declared_constraints:validation_disabled:validation_blocked',
|
|
'demo_b2b_no_declared_constraints:profiling_disabled:not_run',
|
|
]);
|
|
expect(report.cases[2]?.reason).toBe('mode not selected by fixture defaultModes');
|
|
});
|
|
|
|
it('surfaces validation budget review candidates in the report reason', () => {
|
|
const suite: KtxRelationshipBenchmarkSuiteResult = {
|
|
cases: [
|
|
caseResult({
|
|
fixtureId: 'scale_stress_no_declared_constraints',
|
|
metrics: { fkRecall: 0.5, acceptedOrReviewRecall: 1 },
|
|
predicted: {
|
|
pk: ['dim_entity_00.(entity_00_key)'],
|
|
fk: [
|
|
'fact_activity_000.(entity_00_key)->dim_entity_00.(entity_00_key)',
|
|
'fact_activity_001.(entity_00_key)->dim_entity_00.(entity_00_key)',
|
|
],
|
|
acceptedFk: ['fact_activity_000.(entity_00_key)->dim_entity_00.(entity_00_key)'],
|
|
reviewFk: ['fact_activity_001.(entity_00_key)->dim_entity_00.(entity_00_key)'],
|
|
},
|
|
}),
|
|
],
|
|
validationBlockedCases: [],
|
|
aggregate: {
|
|
caseCount: 1,
|
|
headlineCaseCount: 0,
|
|
headlinePkRecall: 1,
|
|
headlineFkRecall: 0.5,
|
|
headlineAcceptedOrReviewRecall: 1,
|
|
meanPkRecall: 1,
|
|
meanFkRecall: 0.5,
|
|
meanAcceptedOrReviewRecall: 1,
|
|
},
|
|
};
|
|
|
|
const report = buildKtxRelationshipBenchmarkReport({
|
|
fixtures: [
|
|
fixture({
|
|
id: 'scale_stress_no_declared_constraints',
|
|
name: 'Scale stress fixture',
|
|
tier: 'row_bearing',
|
|
validationBudget: 800,
|
|
defaultModes: ['declared_pks_and_declared_fks_removed'],
|
|
}),
|
|
],
|
|
suite,
|
|
modes: ['declared_pks_and_declared_fks_removed'],
|
|
});
|
|
|
|
expect(report.cases[0]?.reason).toBe('review candidate validation reasons: validation_unattempted (1)');
|
|
expect(formatKtxRelationshipBenchmarkReportMarkdown(report)).toContain('validation_unattempted');
|
|
});
|
|
|
|
it('uses benchmark suite eligibility for product and smoke report rows', () => {
|
|
const productCase = caseResult({ fixtureId: 'product_curated' });
|
|
const productBlocked = caseResult({
|
|
fixtureId: 'product_curated',
|
|
mode: 'validation_disabled',
|
|
validationBlocked: true,
|
|
metrics: { fkRecall: 0, acceptedOrReviewRecall: 1, sqlQueries: 0 },
|
|
});
|
|
const smokeCase = caseResult({ fixtureId: 'smoke_even_if_marked' });
|
|
const suite: KtxRelationshipBenchmarkSuiteResult = {
|
|
cases: [productCase, productBlocked, smokeCase],
|
|
validationBlockedCases: ['product_curated:validation_disabled'],
|
|
aggregate: {
|
|
caseCount: 3,
|
|
headlineCaseCount: 1,
|
|
headlinePkRecall: 0.5,
|
|
headlineFkRecall: 1,
|
|
headlineAcceptedOrReviewRecall: 1,
|
|
meanPkRecall: 0.5,
|
|
meanFkRecall: 0.6666666666666666,
|
|
meanAcceptedOrReviewRecall: 1,
|
|
},
|
|
};
|
|
|
|
const report = buildKtxRelationshipBenchmarkReport({
|
|
fixtures: [
|
|
fixture({
|
|
id: 'product_curated',
|
|
name: 'Curated product fixture',
|
|
tier: 'product',
|
|
thresholdEligible: true,
|
|
defaultModes: ['declared_pks_and_declared_fks_removed', 'validation_disabled'],
|
|
}),
|
|
fixture({
|
|
id: 'smoke_even_if_marked',
|
|
name: 'Marked smoke fixture',
|
|
tier: 'smoke',
|
|
thresholdEligible: true,
|
|
defaultModes: ['declared_pks_and_declared_fks_removed'],
|
|
}),
|
|
],
|
|
suite,
|
|
modes: ['declared_pks_and_declared_fks_removed', 'validation_disabled'],
|
|
});
|
|
|
|
expect(report.cases.map((item) => `${item.fixtureId}:${item.mode}:${item.tuningEligible}`)).toEqual([
|
|
'product_curated:declared_pks_and_declared_fks_removed:true',
|
|
'product_curated:validation_disabled:false',
|
|
'smoke_even_if_marked:declared_pks_and_declared_fks_removed:false',
|
|
'smoke_even_if_marked:validation_disabled:false',
|
|
]);
|
|
expect(formatKtxRelationshipBenchmarkReportMarkdown(report)).toContain(
|
|
'| product_curated | product | declared_pks_and_declared_fks_removed | run | yes |',
|
|
);
|
|
});
|
|
|
|
it('formats a compact Markdown report with false negatives and blocked modes', () => {
|
|
const suite: KtxRelationshipBenchmarkSuiteResult = {
|
|
cases: [
|
|
caseResult({
|
|
metrics: { fkRecall: 0, acceptedOrReviewRecall: 0 },
|
|
falseNegatives: { pk: ['users.(id)'], fk: ['users.(account_id)->accounts.(id)'] },
|
|
}),
|
|
],
|
|
validationBlockedCases: [],
|
|
aggregate: {
|
|
caseCount: 1,
|
|
headlineCaseCount: 1,
|
|
headlinePkRecall: 0.5,
|
|
headlineFkRecall: 0,
|
|
headlineAcceptedOrReviewRecall: 0,
|
|
meanPkRecall: 0.5,
|
|
meanFkRecall: 0,
|
|
meanAcceptedOrReviewRecall: 0,
|
|
},
|
|
};
|
|
|
|
const markdown = formatKtxRelationshipBenchmarkReportMarkdown(
|
|
buildKtxRelationshipBenchmarkReport({
|
|
fixtures: [fixture()],
|
|
suite,
|
|
modes: ['declared_pks_and_declared_fks_removed'],
|
|
}),
|
|
);
|
|
|
|
expect(markdown).toContain('# ktx Relationship Discovery Benchmark Evidence');
|
|
expect(markdown).toContain(
|
|
'| demo_b2b_no_declared_constraints | smoke | declared_pks_and_declared_fks_removed | run | no | 0.500 | 0.000 | 0.000 | 0 |',
|
|
);
|
|
expect(markdown).toContain(
|
|
'- `demo_b2b_no_declared_constraints` / `declared_pks_and_declared_fks_removed` / `run`: users.(id)',
|
|
);
|
|
expect(markdown).toContain(
|
|
'- `demo_b2b_no_declared_constraints` / `declared_pks_and_declared_fks_removed` / `run`: users.(account_id)->accounts.(id)',
|
|
);
|
|
});
|
|
|
|
it('keeps headline failures separate from non-headline failure details', () => {
|
|
const suite: KtxRelationshipBenchmarkSuiteResult = {
|
|
cases: [
|
|
caseResult({
|
|
fixtureId: 'product_curated',
|
|
falseNegatives: { pk: [], fk: [] },
|
|
metrics: { pkRecall: 1, fkRecall: 1, acceptedOrReviewRecall: 1 },
|
|
}),
|
|
caseResult({
|
|
fixtureId: 'product_curated',
|
|
mode: 'embeddings_disabled',
|
|
falseNegatives: {
|
|
pk: ['customers.(id)'],
|
|
fk: ['orders.(buyer_ref)->customers.(id)'],
|
|
},
|
|
metrics: { pkRecall: 0.5, fkRecall: 0, acceptedOrReviewRecall: 0 },
|
|
}),
|
|
],
|
|
validationBlockedCases: [],
|
|
aggregate: {
|
|
caseCount: 2,
|
|
headlineCaseCount: 1,
|
|
headlinePkRecall: 1,
|
|
headlineFkRecall: 1,
|
|
headlineAcceptedOrReviewRecall: 1,
|
|
meanPkRecall: 0.75,
|
|
meanFkRecall: 0.5,
|
|
meanAcceptedOrReviewRecall: 0.5,
|
|
},
|
|
};
|
|
|
|
const markdown = formatKtxRelationshipBenchmarkReportMarkdown(
|
|
buildKtxRelationshipBenchmarkReport({
|
|
fixtures: [
|
|
fixture({
|
|
id: 'product_curated',
|
|
name: 'Curated product fixture',
|
|
tier: 'product',
|
|
thresholdEligible: true,
|
|
defaultModes: ['declared_pks_and_declared_fks_removed', 'embeddings_disabled'],
|
|
}),
|
|
],
|
|
suite,
|
|
modes: ['declared_pks_and_declared_fks_removed', 'embeddings_disabled'],
|
|
}),
|
|
);
|
|
|
|
expect(markdown).toContain('## Failure Details');
|
|
expect(markdown).toContain('### Headline False Negative FKs\n\n- none');
|
|
expect(markdown).toContain(
|
|
'- `product_curated` / `embeddings_disabled` / `run`: orders.(buyer_ref)->customers.(id)',
|
|
);
|
|
expect(markdown).toContain('- `product_curated` / `embeddings_disabled` / `run`: customers.(id)');
|
|
});
|
|
|
|
it('formats headline failure context from remaining headline false negatives', () => {
|
|
const suite: KtxRelationshipBenchmarkSuiteResult = {
|
|
cases: [
|
|
caseResult({
|
|
fixtureId: 'public_headline_fixture',
|
|
metrics: { pkRecall: 0.5, fkRecall: 0, acceptedOrReviewRecall: 0 },
|
|
falseNegatives: {
|
|
pk: ['parent_table.(opaque_key)'],
|
|
fk: ['child_table.(parent_table_id)->parent_table.(opaque_key)'],
|
|
},
|
|
}),
|
|
],
|
|
validationBlockedCases: [],
|
|
aggregate: {
|
|
caseCount: 1,
|
|
headlineCaseCount: 1,
|
|
headlinePkRecall: 0.5,
|
|
headlineFkRecall: 0,
|
|
headlineAcceptedOrReviewRecall: 0,
|
|
meanPkRecall: 0.5,
|
|
meanFkRecall: 0,
|
|
meanAcceptedOrReviewRecall: 0,
|
|
},
|
|
};
|
|
|
|
const markdown = formatKtxRelationshipBenchmarkReportMarkdown(
|
|
buildKtxRelationshipBenchmarkReport({
|
|
fixtures: [
|
|
fixture({
|
|
id: 'public_headline_fixture',
|
|
name: 'Public headline fixture',
|
|
tier: 'row_bearing',
|
|
thresholdEligible: true,
|
|
defaultModes: ['declared_pks_and_declared_fks_removed'],
|
|
}),
|
|
],
|
|
suite,
|
|
modes: ['declared_pks_and_declared_fks_removed'],
|
|
}),
|
|
);
|
|
|
|
expect(markdown).toContain('## Headline Failure Context');
|
|
expect(markdown).toContain('- Remaining headline false-negative PKs: 1');
|
|
expect(markdown).toContain('- Remaining headline false-negative FKs: 1');
|
|
expect(markdown).toContain(
|
|
'- `public_headline_fixture` / `declared_pks_and_declared_fks_removed` / `run`: parent_table.(opaque_key)',
|
|
);
|
|
expect(markdown).toContain(
|
|
'- `public_headline_fixture` / `declared_pks_and_declared_fks_removed` / `run`: child_table.(parent_table_id)->parent_table.(opaque_key)',
|
|
);
|
|
});
|
|
|
|
it('formats skipped composite ground truth separately from false-negative details', () => {
|
|
const compositePk = 'order_lines.(order_id,line_number)';
|
|
const compositeFk = 'order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)';
|
|
const suite: KtxRelationshipBenchmarkSuiteResult = {
|
|
cases: [
|
|
caseResult({
|
|
fixtureId: 'composite_keys_no_declared_constraints',
|
|
metrics: { pkRecall: 0, fkRecall: 0, acceptedOrReviewRecall: 0 },
|
|
expected: {
|
|
pk: [compositePk],
|
|
fk: [compositeFk],
|
|
},
|
|
predicted: {
|
|
pk: [],
|
|
fk: [],
|
|
acceptedFk: [],
|
|
reviewFk: [],
|
|
},
|
|
falseNegatives: {
|
|
pk: [compositePk],
|
|
fk: [compositeFk],
|
|
},
|
|
skippedComposite: {
|
|
pk: [compositePk],
|
|
fk: [compositeFk],
|
|
},
|
|
}),
|
|
],
|
|
validationBlockedCases: [],
|
|
aggregate: {
|
|
caseCount: 1,
|
|
headlineCaseCount: 1,
|
|
headlinePkRecall: 0,
|
|
headlineFkRecall: 0,
|
|
headlineAcceptedOrReviewRecall: 0,
|
|
meanPkRecall: 0,
|
|
meanFkRecall: 0,
|
|
meanAcceptedOrReviewRecall: 0,
|
|
},
|
|
};
|
|
|
|
const report = buildKtxRelationshipBenchmarkReport({
|
|
fixtures: [
|
|
fixture({
|
|
id: 'composite_keys_no_declared_constraints',
|
|
name: 'Composite key fixture with no declared constraints',
|
|
tier: 'row_bearing',
|
|
defaultModes: ['declared_pks_and_declared_fks_removed'],
|
|
}),
|
|
],
|
|
suite,
|
|
modes: ['declared_pks_and_declared_fks_removed'],
|
|
});
|
|
|
|
expect(report.cases[0]?.skippedComposite).toEqual({
|
|
pk: [compositePk],
|
|
fk: [compositeFk],
|
|
});
|
|
|
|
const markdown = formatKtxRelationshipBenchmarkReportMarkdown(report);
|
|
expect(markdown).toContain('## Composite Ground Truth Skips');
|
|
expect(markdown).toContain(
|
|
'### Skipped Composite PKs\n\n- `composite_keys_no_declared_constraints` / `declared_pks_and_declared_fks_removed` / `run`: order_lines.(order_id,line_number)',
|
|
);
|
|
expect(markdown).toContain(
|
|
'### Skipped Composite FKs\n\n- `composite_keys_no_declared_constraints` / `declared_pks_and_declared_fks_removed` / `run`: order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)',
|
|
);
|
|
expect(markdown).toContain(
|
|
'### Headline False Negative FKs\n\n- `composite_keys_no_declared_constraints` / `declared_pks_and_declared_fks_removed` / `run`: order_line_allocations.(order_id,line_number)->order_lines.(order_id,line_number)',
|
|
);
|
|
});
|
|
});
|