mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
feat(setup): verify context readiness by database depth
This commit is contained in:
parent
471fae60b3
commit
3fc75c1dbe
2 changed files with 117 additions and 26 deletions
|
|
@ -45,7 +45,7 @@ async function writeReadyProject(projectDir: string, overrides: Partial<KtxProje
|
|||
...defaults,
|
||||
setup: { database_connection_ids: ['warehouse'] },
|
||||
connections: {
|
||||
warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' },
|
||||
warehouse: { driver: 'postgres', url: 'env:DATABASE_URL', context: { depth: 'deep' } },
|
||||
docs: { driver: 'notion', auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'all_accessible' },
|
||||
},
|
||||
llm: {
|
||||
|
|
@ -117,6 +117,8 @@ async function writeScanReport(
|
|||
columnDescriptions: string;
|
||||
embeddings: string;
|
||||
manifestShards?: string[];
|
||||
completedStages?: string[];
|
||||
relationships?: { accepted: number; review: number; rejected: number; skipped: number };
|
||||
},
|
||||
) {
|
||||
const reportDir = join(projectDir, 'raw-sources', 'warehouse', 'live-database', syncId);
|
||||
|
|
@ -139,9 +141,11 @@ async function writeScanReport(
|
|||
tableDescriptions: report.tableDescriptions,
|
||||
columnDescriptions: report.columnDescriptions,
|
||||
embeddings: report.embeddings,
|
||||
...(report.relationships ? { relationships: report.relationships } : {}),
|
||||
},
|
||||
enrichmentState: {
|
||||
completedStages: report.tableDescriptions === 'completed' ? ['descriptions', 'embeddings'] : [],
|
||||
completedStages:
|
||||
report.completedStages ?? (report.tableDescriptions === 'completed' ? ['descriptions', 'embeddings'] : []),
|
||||
failedStages: report.tableDescriptions === 'failed' ? ['descriptions'] : [],
|
||||
},
|
||||
createdAt: syncId,
|
||||
|
|
@ -152,12 +156,19 @@ async function writeScanReport(
|
|||
);
|
||||
}
|
||||
|
||||
async function writeReadyEnrichedScanReport(projectDir: string, syncId = '2026-05-09T10:00:00.000Z') {
|
||||
async function writeReadyEnrichedScanReport(
|
||||
projectDir: string,
|
||||
syncId = '2026-05-09T10:00:00.000Z',
|
||||
overrides: Partial<Parameters<typeof writeScanReport>[2]> = {},
|
||||
) {
|
||||
await writeScanReport(projectDir, syncId, {
|
||||
mode: 'enriched',
|
||||
tableDescriptions: 'completed',
|
||||
columnDescriptions: 'completed',
|
||||
embeddings: 'completed',
|
||||
completedStages: ['descriptions', 'embeddings', 'relationships'],
|
||||
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
||||
...overrides,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -504,6 +515,65 @@ describe('setup context build state', () => {
|
|||
expect(config.connections.warehouse.context).toMatchObject({ depth: 'deep' });
|
||||
});
|
||||
|
||||
it('requires completed relationships for deep context when relationship discovery is enabled', async () => {
|
||||
await writeReadyProject(tempDir, {
|
||||
connections: {
|
||||
warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } },
|
||||
},
|
||||
scan: { relationships: { enabled: true } },
|
||||
});
|
||||
await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true });
|
||||
await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n');
|
||||
await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:00:00.000Z', {
|
||||
completedStages: ['descriptions', 'embeddings'],
|
||||
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
||||
});
|
||||
const io = makeIo();
|
||||
const runContextBuildMock = vi.fn(async () => {
|
||||
await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:01:00.000Z', {
|
||||
completedStages: ['descriptions', 'embeddings', 'relationships'],
|
||||
relationships: { accepted: 0, review: 0, rejected: 0, skipped: 0 },
|
||||
});
|
||||
return { exitCode: 0 };
|
||||
});
|
||||
|
||||
await expect(
|
||||
runKtxSetupContextStep(
|
||||
{ projectDir: tempDir, inputMode: 'disabled' },
|
||||
io.io,
|
||||
{ runContextBuild: runContextBuildMock },
|
||||
),
|
||||
).resolves.toMatchObject({ status: 'ready' });
|
||||
|
||||
expect(runContextBuildMock).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it('does not require relationships for deep context when relationship discovery is disabled', async () => {
|
||||
await writeReadyProject(tempDir, {
|
||||
connections: {
|
||||
warehouse: { driver: 'postgres', readonly: true, context: { depth: 'deep' } },
|
||||
},
|
||||
scan: { relationships: { enabled: false } },
|
||||
});
|
||||
await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true });
|
||||
await writeFile(join(tempDir, 'semantic-layer', 'dbt-main', 'mart_revenue_daily.yaml'), 'name: mart_revenue_daily\n');
|
||||
await writeReadyEnrichedScanReport(tempDir, '2026-05-09T10:00:00.000Z', {
|
||||
completedStages: ['descriptions', 'embeddings'],
|
||||
});
|
||||
const io = makeIo();
|
||||
const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 }));
|
||||
|
||||
await expect(
|
||||
runKtxSetupContextStep(
|
||||
{ projectDir: tempDir, inputMode: 'disabled' },
|
||||
io.io,
|
||||
{ runContextBuild: runContextBuildMock },
|
||||
),
|
||||
).resolves.toMatchObject({ status: 'ready' });
|
||||
|
||||
expect(runContextBuildMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('refuses empty setup context builds', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
|
|
|
|||
|
|
@ -446,7 +446,21 @@ async function readLatestScanReport(projectDir: string, connectionId: string): P
|
|||
return reports.at(-1)?.report ?? null;
|
||||
}
|
||||
|
||||
function scanReportHasCompletedDescriptionEnrichment(report: unknown, connectionId: string): boolean {
|
||||
function scanReportHasSchemaManifest(report: unknown, connectionId: string): boolean {
|
||||
if (!isRecord(report)) {
|
||||
return false;
|
||||
}
|
||||
if (report.connectionId !== connectionId || report.dryRun === true) {
|
||||
return false;
|
||||
}
|
||||
return stringArrayValue(isRecord(report.artifactPaths) ? report.artifactPaths.manifestShards : undefined).length > 0;
|
||||
}
|
||||
|
||||
function scanReportHasCompletedDeepEnrichment(
|
||||
report: unknown,
|
||||
connectionId: string,
|
||||
relationshipsRequired: boolean,
|
||||
): boolean {
|
||||
if (!isRecord(report)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -463,39 +477,39 @@ function scanReportHasCompletedDescriptionEnrichment(report: unknown, connection
|
|||
report.enrichment.embeddings === 'completed' &&
|
||||
completedStages.includes('descriptions') &&
|
||||
completedStages.includes('embeddings') &&
|
||||
(!relationshipsRequired || completedStages.includes('relationships')) &&
|
||||
stringArrayValue(report.artifactPaths.manifestShards).length > 0
|
||||
);
|
||||
}
|
||||
|
||||
function scanReportHasCompletedSchemaManifest(report: unknown, connectionId: string): boolean {
|
||||
if (!isRecord(report)) {
|
||||
return false;
|
||||
function scanReportSatisfiesDepth(input: {
|
||||
report: unknown;
|
||||
connectionId: string;
|
||||
depth: KtxDatabaseContextDepth;
|
||||
relationshipsRequired: boolean;
|
||||
}): boolean {
|
||||
if (input.depth === 'fast') {
|
||||
return scanReportHasSchemaManifest(input.report, input.connectionId);
|
||||
}
|
||||
if (report.connectionId !== connectionId || report.dryRun === true) {
|
||||
return false;
|
||||
}
|
||||
if (!isRecord(report.artifactPaths)) {
|
||||
return false;
|
||||
}
|
||||
return stringArrayValue(report.artifactPaths.manifestShards).length > 0;
|
||||
return scanReportHasCompletedDeepEnrichment(input.report, input.connectionId, input.relationshipsRequired);
|
||||
}
|
||||
|
||||
async function verifyPrimarySourceScans(
|
||||
project: KtxLocalProject,
|
||||
projectDir: string,
|
||||
connectionIds: string[],
|
||||
): Promise<{ ready: boolean; details: string[] }> {
|
||||
const details: string[] = [];
|
||||
const relationshipsRequired = project.config.scan.relationships.enabled;
|
||||
for (const connectionId of connectionIds) {
|
||||
const connection = project.config.connections[connectionId];
|
||||
const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast';
|
||||
const report = await readLatestScanReport(projectDir, connectionId);
|
||||
const ready =
|
||||
depth === 'fast'
|
||||
? scanReportHasCompletedSchemaManifest(report, connectionId)
|
||||
: scanReportHasCompletedDescriptionEnrichment(report, connectionId);
|
||||
if (!ready) {
|
||||
details.push(`${connectionId}: enriched database scan with AI descriptions has not completed.`);
|
||||
const report = await readLatestScanReport(project.projectDir, connectionId);
|
||||
if (!scanReportSatisfiesDepth({ report, connectionId, depth, relationshipsRequired })) {
|
||||
details.push(
|
||||
depth === 'fast'
|
||||
? `${connectionId}: schema context has not completed.`
|
||||
: `${connectionId}: deep database context has not completed.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
return { ready: details.length === 0, details };
|
||||
|
|
@ -504,7 +518,7 @@ async function verifyPrimarySourceScans(
|
|||
async function defaultVerifyContextReady(projectDir: string): Promise<KtxSetupContextReadiness> {
|
||||
const project = await loadKtxProject({ projectDir });
|
||||
const targets = listContextTargets(project);
|
||||
const primarySourceScans = await verifyPrimarySourceScans(project, projectDir, targets.primarySourceConnectionIds);
|
||||
const primarySourceScans = await verifyPrimarySourceScans(project, targets.primarySourceConnectionIds);
|
||||
const semanticLayerContextReady = await hasFileWithExtension(
|
||||
join(projectDir, 'semantic-layer'),
|
||||
new Set(['.yaml', '.yml']),
|
||||
|
|
@ -560,14 +574,21 @@ function writeSkippedContext(projectDir: string, io: KtxCliIo): void {
|
|||
io.stdout.write(`Check status:\n ktx status --project-dir ${resolve(projectDir)}\n`);
|
||||
}
|
||||
|
||||
function writeSuccess(readiness: KtxSetupContextReadiness, targets: KtxSetupContextTargets, io: KtxCliIo): void {
|
||||
function writeSuccess(
|
||||
project: KtxLocalProject,
|
||||
readiness: KtxSetupContextReadiness,
|
||||
targets: KtxSetupContextTargets,
|
||||
io: KtxCliIo,
|
||||
): void {
|
||||
io.stdout.write('\nKTX context is ready for agents.\n\n');
|
||||
io.stdout.write('Primary sources:\n');
|
||||
if (targets.primarySourceConnectionIds.length === 0) {
|
||||
io.stdout.write(' none\n');
|
||||
} else {
|
||||
for (const connectionId of targets.primarySourceConnectionIds) {
|
||||
io.stdout.write(` ${connectionId}: enriched scan complete\n`);
|
||||
const connection = project.config.connections[connectionId];
|
||||
const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast';
|
||||
io.stdout.write(` ${connectionId}: ${depth === 'deep' ? 'deep context complete' : 'schema context complete'}\n`);
|
||||
}
|
||||
}
|
||||
io.stdout.write('\nContext sources:\n');
|
||||
|
|
@ -727,7 +748,7 @@ async function runBuild(
|
|||
retryableFailedTargets: [],
|
||||
...(lastSourceProgress ? { sourceProgress: lastSourceProgress } : {}),
|
||||
});
|
||||
writeSuccess(readiness, targets, io);
|
||||
writeSuccess(project, readiness, targets, io);
|
||||
return { status: 'ready', projectDir: args.projectDir, runId };
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue