feat(setup): verify context readiness by database depth

This commit is contained in:
Andrey Avtomonov 2026-05-13 18:27:44 +02:00
parent 471fae60b3
commit 3fc75c1dbe
2 changed files with 117 additions and 26 deletions

View file

@ -45,7 +45,7 @@ async function writeReadyProject(projectDir: string, overrides: Partial<KtxProje
...defaults,
setup: { database_connection_ids: ['warehouse'] },
connections: {
warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' },
warehouse: { driver: 'postgres', url: 'env:DATABASE_URL', context: { depth: 'deep' } },
docs: { driver: 'notion', auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'all_accessible' },
},
llm: {
@ -117,6 +117,8 @@ async function writeScanReport(
columnDescriptions: string;
embeddings: string;
manifestShards?: string[];
completedStages?: string[];
relationships?: { accepted: number; review: number; rejected: number; skipped: number };
},
) {
const reportDir = join(projectDir, 'raw-sources', 'warehouse', 'live-database', syncId);
@ -139,9 +141,11 @@ async function writeScanReport(
tableDescriptions: report.tableDescriptions,
columnDescriptions: report.columnDescriptions,
embeddings: report.embeddings,
...(report.relationships ? { relationships: report.relationships } : {}),
},
enrichmentState: {
completedStages: report.tableDescriptions === 'completed' ? ['descriptions', 'embeddings'] : [],
completedStages:
report.completedStages ?? (report.tableDescriptions === 'completed' ? ['descriptions', 'embeddings'] : []),
failedStages: report.tableDescriptions === 'failed' ? ['descriptions'] : [],
},
createdAt: syncId,
@ -152,12 +156,19 @@ async function writeScanReport(
);
}
async function writeReadyEnrichedScanReport(projectDir: string, syncId = '2026-05-09T10:00:00.000Z') {
async function writeReadyEnrichedScanReport(
projectDir: string,
syncId = '2026-05-09T10:00:00.000Z',
overrides: Partial<Parameters<typeof writeScanReport>[2]> = {},
) {
await writeScanReport(projectDir, syncId, {
mode: 'enriched',
tableDescriptions: 'completed',
columnDescriptions: 'completed',
embeddings: 'completed',
completedStages: ['descriptions', 'embeddings', 'relationships'],
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
...overrides,
});
}
@ -504,6 +515,65 @@ describe('setup context build state', () => {
expect(config.connections.warehouse.context).toMatchObject({ depth: 'deep' });
});
it('requires completed relationships for deep context when relationship discovery is enabled', async () => {
await writeReadyProject(tempDir, {
connections: {
warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } },
},
scan: { relationships: { enabled: true } },
});
await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true });
await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n');
await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:00:00.000Z', {
completedStages: ['descriptions', 'embeddings'],
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
});
const io = makeIo();
const runContextBuildMock = vi.fn(async () => {
await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:01:00.000Z', {
completedStages: ['descriptions', 'embeddings', 'relationships'],
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
});
return { exitCode: 0 };
});
await expect(
runKtxSetupContextStep(
{ projectDir: tempDir, inputMode: 'disabled' },
io.io,
{ runContextBuild: runContextBuildMock },
),
).resolves.toMatchObject({ status: 'ready' });
expect(runContextBuildMock).toHaveBeenCalledOnce();
});
it('does not require relationships for deep context when relationship discovery is disabled', async () => {
await writeReadyProject(tempDir, {
connections: {
warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } },
},
scan: { relationships: { enabled: false } },
});
await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true });
await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n');
await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:00:00.000Z', {
completedStages: ['descriptions', 'embeddings'],
});
const io = makeIo();
const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 }));
await expect(
runKtxSetupContextStep(
{ projectDir: tempDir, inputMode: 'disabled' },
io.io,
{ runContextBuild: runContextBuildMock },
),
).resolves.toMatchObject({ status: 'ready' });
expect(runContextBuildMock).not.toHaveBeenCalled();
});
it('refuses empty setup context builds', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),

View file

@ -446,7 +446,21 @@ async function readLatestScanReport(projectDir: string, connectionId: string): P
return reports.at(-1)?.report ?? null;
}
function scanReportHasCompletedDescriptionEnrichment(report: unknown, connectionId: string): boolean {
function scanReportHasSchemaManifest(report: unknown, connectionId: string): boolean {
if (!isRecord(report)) {
return false;
}
if (report.connectionId !== connectionId || report.dryRun === true) {
return false;
}
return stringArrayValue(isRecord(report.artifactPaths) ? report.artifactPaths.manifestShards : undefined).length > 0;
}
function scanReportHasCompletedDeepEnrichment(
report: unknown,
connectionId: string,
relationshipsRequired: boolean,
): boolean {
if (!isRecord(report)) {
return false;
}
@ -463,39 +477,39 @@ function scanReportHasCompletedDescriptionEnrichment(report: unknown, connection
report.enrichment.embeddings === 'completed' &&
completedStages.includes('descriptions') &&
completedStages.includes('embeddings') &&
(!relationshipsRequired || completedStages.includes('relationships')) &&
stringArrayValue(report.artifactPaths.manifestShards).length > 0
);
}
function scanReportHasCompletedSchemaManifest(report: unknown, connectionId: string): boolean {
if (!isRecord(report)) {
return false;
function scanReportSatisfiesDepth(input: {
report: unknown;
connectionId: string;
depth: KtxDatabaseContextDepth;
relationshipsRequired: boolean;
}): boolean {
if (input.depth === 'fast') {
return scanReportHasSchemaManifest(input.report, input.connectionId);
}
if (report.connectionId !== connectionId || report.dryRun === true) {
return false;
}
if (!isRecord(report.artifactPaths)) {
return false;
}
return stringArrayValue(report.artifactPaths.manifestShards).length > 0;
return scanReportHasCompletedDeepEnrichment(input.report, input.connectionId, input.relationshipsRequired);
}
async function verifyPrimarySourceScans(
project: KtxLocalProject,
projectDir: string,
connectionIds: string[],
): Promise<{ ready: boolean; details: string[] }> {
const details: string[] = [];
const relationshipsRequired = project.config.scan.relationships.enabled;
for (const connectionId of connectionIds) {
const connection = project.config.connections[connectionId];
const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast';
const report = await readLatestScanReport(projectDir, connectionId);
const ready =
depth === 'fast'
? scanReportHasCompletedSchemaManifest(report, connectionId)
: scanReportHasCompletedDescriptionEnrichment(report, connectionId);
if (!ready) {
details.push(`${connectionId}: enriched database scan with AI descriptions has not completed.`);
const report = await readLatestScanReport(project.projectDir, connectionId);
if (!scanReportSatisfiesDepth({ report, connectionId, depth, relationshipsRequired })) {
details.push(
depth === 'fast'
? `${connectionId}: schema context has not completed.`
: `${connectionId}: deep database context has not completed.`,
);
}
}
return { ready: details.length === 0, details };
@ -504,7 +518,7 @@ async function verifyPrimarySourceScans(
async function defaultVerifyContextReady(projectDir: string): Promise<KtxSetupContextReadiness> {
const project = await loadKtxProject({ projectDir });
const targets = listContextTargets(project);
const primarySourceScans = await verifyPrimarySourceScans(project, projectDir, targets.primarySourceConnectionIds);
const primarySourceScans = await verifyPrimarySourceScans(project, targets.primarySourceConnectionIds);
const semanticLayerContextReady = await hasFileWithExtension(
join(projectDir, 'semantic-layer'),
new Set(['.yaml', '.yml']),
@ -560,14 +574,21 @@ function writeSkippedContext(projectDir: string, io: KtxCliIo): void {
io.stdout.write(`Check status:\n ktx status --project-dir ${resolve(projectDir)}\n`);
}
function writeSuccess(readiness: KtxSetupContextReadiness, targets: KtxSetupContextTargets, io: KtxCliIo): void {
function writeSuccess(
project: KtxLocalProject,
readiness: KtxSetupContextReadiness,
targets: KtxSetupContextTargets,
io: KtxCliIo,
): void {
io.stdout.write('\nKTX context is ready for agents.\n\n');
io.stdout.write('Primary sources:\n');
if (targets.primarySourceConnectionIds.length === 0) {
io.stdout.write(' none\n');
} else {
for (const connectionId of targets.primarySourceConnectionIds) {
io.stdout.write(` ${connectionId}: enriched scan complete\n`);
const connection = project.config.connections[connectionId];
const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast';
io.stdout.write(` ${connectionId}: ${depth === 'deep' ? 'deep context complete' : 'schema context complete'}\n`);
}
}
io.stdout.write('\nContext sources:\n');
@ -727,7 +748,7 @@ async function runBuild(
retryableFailedTargets: [],
...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}),
});
writeSuccess(readiness, targets, io);
writeSuccess(project, readiness, targets, io);
return { status: 'ready', projectDir: args.projectDir, runId };
}