From 471fae60b37d4b886ccc42f5a4856274501138f3 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Wed, 13 May 2026 18:26:44 +0200 Subject: [PATCH] feat(setup): store database context depth --- packages/cli/src/setup-context.test.ts | 232 +++++++++++++++++++------ packages/cli/src/setup-context.ts | 141 ++++++++++++--- 2 files changed, 296 insertions(+), 77 deletions(-) diff --git a/packages/cli/src/setup-context.test.ts b/packages/cli/src/setup-context.test.ts index a5e45189..103027a3 100644 --- a/packages/cli/src/setup-context.test.ts +++ b/packages/cli/src/setup-context.test.ts @@ -1,7 +1,14 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { readKtxSetupState, writeKtxSetupState } from '@ktx/context/project'; +import { + buildDefaultKtxProjectConfig, + parseKtxProjectConfig, + readKtxSetupState, + serializeKtxProjectConfig, + type KtxProjectConfig, + writeKtxSetupState, +} from '@ktx/context/project'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { @@ -32,39 +39,70 @@ function makeIo() { }; } -async function writeReadyProject(projectDir: string) { - await writeFile( - join(projectDir, 'ktx.yaml'), - [ - 'project: revenue', - 'setup:', - ' database_connection_ids:', - ' - warehouse', - 'connections:', - ' warehouse:', - ' driver: postgres', - ' url: env:DATABASE_URL', - ' docs:', - ' driver: notion', - ' auth_token_ref: env:NOTION_TOKEN', - ' crawl_mode: all_accessible', - 'llm:', - ' provider:', - ' backend: anthropic', - ' models:', - ' default: claude-sonnet-4-6', - 'ingest:', - ' embeddings:', - ' backend: openai', - ' model: text-embedding-3-small', - ' dimensions: 1536', - 'scan:', - ' enrichment:', - ' mode: llm', - '', - ].join('\n'), - 'utf-8', - ); +async function writeReadyProject(projectDir: string, overrides: Partial = {}) { + const defaults = buildDefaultKtxProjectConfig('revenue'); + const readyConfig: KtxProjectConfig = { + ...defaults, + setup: { database_connection_ids: ['warehouse'] }, + connections: { + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' }, + docs: { driver: 'notion', auth_token_ref: 'env:NOTION_TOKEN', crawl_mode: 'all_accessible' }, + }, + llm: { + provider: { backend: 'anthropic' }, + models: { default: 'claude-sonnet-4-6' }, + }, + ingest: { + ...defaults.ingest, + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + }, + }, + scan: { + ...defaults.scan, + enrichment: { + mode: 'llm', + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + }, + }, + }, + }; + const nextConfig: KtxProjectConfig = { + ...readyConfig, + ...overrides, + setup: overrides.setup ?? readyConfig.setup, + connections: overrides.connections ?? readyConfig.connections, + llm: { + ...readyConfig.llm, + ...overrides.llm, + provider: overrides.llm?.provider ?? readyConfig.llm.provider, + models: overrides.llm?.models ?? readyConfig.llm.models, + }, + ingest: { + ...readyConfig.ingest, + ...overrides.ingest, + embeddings: overrides.ingest?.embeddings ?? readyConfig.ingest.embeddings, + workUnits: overrides.ingest?.workUnits ?? readyConfig.ingest.workUnits, + }, + scan: { + ...readyConfig.scan, + ...overrides.scan, + enrichment: { + ...readyConfig.scan.enrichment, + ...(overrides.scan?.enrichment ?? {}), + }, + relationships: { + ...readyConfig.scan.relationships, + ...(overrides.scan?.relationships ?? {}), + }, + }, + }; + await writeFile(join(projectDir, 'ktx.yaml'), serializeKtxProjectConfig(nextConfig), 'utf-8'); await writeKtxSetupState(projectDir, { completed_steps: ['project', 'llm', 'embeddings', 'databases', 'sources'], }); @@ -73,7 +111,13 @@ async function writeReadyProject(projectDir: string) { async function writeScanReport( projectDir: string, syncId: string, - report: { mode: string; tableDescriptions: string; columnDescriptions: string; embeddings: string }, + report: { + mode: string; + tableDescriptions: string; + columnDescriptions: string; + embeddings: string; + manifestShards?: string[]; + }, ) { const reportDir = join(projectDir, 'raw-sources', 'warehouse', 'live-database', syncId); await mkdir(reportDir, { recursive: true }); @@ -85,7 +129,7 @@ async function writeScanReport( mode: report.mode, dryRun: false, artifactPaths: { - manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], + manifestShards: report.manifestShards ?? ['semantic-layer/warehouse/_schema/public.yaml'], enrichmentArtifacts: report.mode === 'enriched' ? [`raw-sources/warehouse/live-database/${syncId}/enrichment/descriptions.json`] @@ -214,8 +258,6 @@ describe('setup context build state', () => { expect.objectContaining({ projectDir: tempDir, inputMode: 'disabled', - scanMode: 'enriched', - detectRelationships: true, }), io.io, expect.objectContaining({ onDetach: expect.any(Function) }), @@ -346,32 +388,120 @@ describe('setup context build state', () => { expect(io.stdout()).not.toContain('Existing context artifacts were found from setup ingest.'); }); - it('does not treat schema-only scan shards as completed setup context', async () => { - await writeReadyProject(tempDir); + it('treats fast database context as ready from schema manifest shards without AI artifacts', async () => { + await writeReadyProject(tempDir, { + connections: { + warehouse: { driver: 'postgres', readonly: true, context: { depth: 'fast' } }, + }, + llm: { provider: { backend: 'none' }, models: {} }, + scan: { enrichment: { mode: 'none' } }, + }); await mkdir(join(tempDir, 'semantic-layer', 'warehouse', '_schema'), { recursive: true }); await writeFile(join(tempDir, 'semantic-layer', 'warehouse', '_schema', 'public.yaml'), 'tables: {}\n'); - const io = makeIo(); - const runContextBuildMock = vi.fn(async () => { - await mkdir(join(tempDir, 'wiki', 'global'), { recursive: true }); - await writeFile(join(tempDir, 'wiki', 'global', 'metrics.md'), '# Metrics\n'); - await writeReadyEnrichedScanReport(tempDir); - return { exitCode: 0, detached: false }; + await writeScanReport(tempDir, '2026-05-09T10:00:00.000Z', { + mode: 'structural', + tableDescriptions: 'skipped', + columnDescriptions: 'skipped', + embeddings: 'skipped', + manifestShards: ['semantic-layer/warehouse/_schema/public.yaml'], }); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); await expect( runKtxSetupContextStep( { projectDir: tempDir, inputMode: 'disabled' }, io.io, { - runIdFactory: () => 'setup-context-local-schema-only', - now: () => new Date('2026-05-09T10:00:00.000Z'), runContextBuild: runContextBuildMock, }, ), - ).resolves.toEqual({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-schema-only' }); + ).resolves.toMatchObject({ status: 'ready' }); - expect(runContextBuildMock).toHaveBeenCalledOnce(); - expect(io.stdout()).not.toContain('Existing context artifacts were found from setup ingest.'); + expect(runContextBuildMock).not.toHaveBeenCalled(); + expect(io.stdout()).toContain('Existing context artifacts were found from setup ingest.'); + }); + + it('stores fast context depth non-interactively when deep readiness is missing', async () => { + await writeReadyProject(tempDir, { + connections: { warehouse: { driver: 'postgres', readonly: true } }, + llm: { provider: { backend: 'none' }, models: {} }, + scan: { enrichment: { mode: 'none' } }, + }); + const io = makeIo(); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); + const verifyContextReady = vi.fn(async () => ({ + ready: true, + agentContextReady: true, + semanticSearchReady: true, + details: ['ready'], + })); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'disabled' }, + io.io, + { runContextBuild: runContextBuildMock, verifyContextReady }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse.context).toMatchObject({ depth: 'fast' }); + expect(runContextBuildMock).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ projectDir: tempDir, inputMode: 'disabled' }), + expect.anything(), + expect.anything(), + ); + expect(runContextBuildMock.mock.calls[0]?.[1]).not.toMatchObject({ + scanMode: 'enriched', + detectRelationships: true, + }); + }); + + it('prompts for database context depth after final readiness is known', async () => { + await writeReadyProject(tempDir, { + connections: { warehouse: { driver: 'postgres', readonly: true } }, + llm: { + provider: { backend: 'gateway', gateway: { api_key: 'env:KTX_GATEWAY_API_KEY' } }, + models: { default: 'gpt-test' }, + }, + scan: { + enrichment: { + mode: 'llm', + embeddings: { backend: 'openai', model: 'text-embedding-3-small', dimensions: 1536 }, + }, + }, + }); + const io = makeIo(); + const select = vi.fn(async () => 'deep'); + const runContextBuildMock = vi.fn(async () => ({ exitCode: 0 })); + const verifyContextReady = vi.fn(async () => ({ + ready: true, + agentContextReady: true, + semanticSearchReady: true, + details: ['ready'], + })); + + await expect( + runKtxSetupContextStep( + { projectDir: tempDir, inputMode: 'auto' }, + io.io, + { + prompts: { select, cancel: vi.fn() }, + runContextBuild: runContextBuildMock, + verifyContextReady, + }, + ), + ).resolves.toMatchObject({ status: 'ready' }); + + expect(select).toHaveBeenCalledWith( + expect.objectContaining({ + message: expect.stringContaining('How much database context should KTX build?'), + }), + ); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + expect(config.connections.warehouse.context).toMatchObject({ depth: 'deep' }); }); it('refuses empty setup context builds', async () => { diff --git a/packages/cli/src/setup-context.ts b/packages/cli/src/setup-context.ts index c58557a7..ce709a9f 100644 --- a/packages/cli/src/setup-context.ts +++ b/packages/cli/src/setup-context.ts @@ -10,6 +10,15 @@ import { } from '@ktx/context/project'; import type { KtxCliIo } from './cli-runtime.js'; import { buildPublicIngestPlan } from './public-ingest.js'; +import { + type KtxDatabaseContextDepth, + databaseContextDepth, + deepReadinessGaps, + isDatabaseDriver, + normalizeConnectionDriver, + recommendedDatabaseContextDepth, + withDatabaseContextDepth, +} from './ingest-depth.js'; import { type ContextBuildSourceProgressUpdate, createRepainter, @@ -297,25 +306,75 @@ function listContextTargets(project: KtxLocalProject): KtxSetupContextTargets { }; } -function missingCapabilities(project: KtxLocalProject): string[] { - const missing: string[] = []; - const llm = project.config.llm; - if (llm.provider.backend === 'none' || !llm.models.default) { - missing.push('Models are not ready.'); +function databaseConnectionsNeedingDepth(project: KtxLocalProject): string[] { + return Object.entries(project.config.connections) + .filter(([, connection]) => isDatabaseDriver(normalizeConnectionDriver(connection))) + .filter(([, connection]) => databaseContextDepth(connection) === undefined) + .map(([connectionId]) => connectionId) + .sort((left, right) => left.localeCompare(right)); +} + +async function writeDatabaseContextDepths( + project: KtxLocalProject, + connectionIds: string[], + depth: KtxDatabaseContextDepth, +): Promise { + if (connectionIds.length === 0) { + return project; } - const embeddings = project.config.ingest.embeddings; - if ( - embeddings.backend === 'none' || - embeddings.backend === 'deterministic' || - !embeddings.model || - embeddings.dimensions <= 0 - ) { - missing.push('Embeddings are not ready.'); + const nextConnections = { ...project.config.connections }; + for (const connectionId of connectionIds) { + const connection = nextConnections[connectionId]; + if (connection) { + nextConnections[connectionId] = withDatabaseContextDepth(connection, depth); + } } - if (project.config.scan.enrichment.mode === 'none') { - missing.push('Scan enrichment is not configured.'); + const nextConfig = { ...project.config, connections: nextConnections }; + await writeFile(project.configPath, serializeKtxProjectConfig(nextConfig), 'utf-8'); + return await loadKtxProject({ projectDir: project.projectDir }); +} + +async function ensureSetupDatabaseContextDepths(input: { + project: KtxLocalProject; + args: KtxSetupContextStepArgs; + prompts: KtxSetupContextPromptAdapter; +}): Promise { + const missingDepthConnectionIds = databaseConnectionsNeedingDepth(input.project); + if (missingDepthConnectionIds.length === 0) { + return input.project; } - return missing; + + const recommended = recommendedDatabaseContextDepth(input.project.config); + if (input.args.inputMode === 'disabled') { + return await writeDatabaseContextDepths(input.project, missingDepthConnectionIds, recommended); + } + + const deepReady = deepReadinessGaps(input.project.config).length === 0; + const options = + recommended === 'deep' + ? [ + { value: 'deep', label: 'Deep: AI descriptions, embeddings, relationships, slower' }, + { value: 'fast', label: 'Fast: schema only, no AI, quickest' }, + { value: 'back', label: 'Back' }, + ] + : [ + { value: 'fast', label: 'Fast: schema only, no AI, quickest' }, + { value: 'deep', label: 'Deep: AI descriptions, embeddings, relationships, slower' }, + { value: 'back', label: 'Back' }, + ]; + + const choice = await input.prompts.select({ + message: + 'How much database context should KTX build?\n\n' + + (deepReady + ? 'Deep is available because model, embedding, and scan enrichment are configured.' + : 'Fast is recommended because model, embedding, or scan enrichment is not configured.'), + options, + }); + if (choice === 'back') { + return 'back'; + } + return await writeDatabaseContextDepths(input.project, missingDepthConnectionIds, choice as KtxDatabaseContextDepth); } async function hasFileWithExtension( @@ -408,14 +467,34 @@ function scanReportHasCompletedDescriptionEnrichment(report: unknown, connection ); } +function scanReportHasCompletedSchemaManifest(report: unknown, connectionId: string): boolean { + if (!isRecord(report)) { + return false; + } + if (report.connectionId !== connectionId || report.dryRun === true) { + return false; + } + if (!isRecord(report.artifactPaths)) { + return false; + } + return stringArrayValue(report.artifactPaths.manifestShards).length > 0; +} + async function verifyPrimarySourceScans( + project: KtxLocalProject, projectDir: string, connectionIds: string[], ): Promise<{ ready: boolean; details: string[] }> { const details: string[] = []; for (const connectionId of connectionIds) { + const connection = project.config.connections[connectionId]; + const depth = connection ? (databaseContextDepth(connection) ?? 'fast') : 'fast'; const report = await readLatestScanReport(projectDir, connectionId); - if (!scanReportHasCompletedDescriptionEnrichment(report, connectionId)) { + const ready = + depth === 'fast' + ? scanReportHasCompletedSchemaManifest(report, connectionId) + : scanReportHasCompletedDescriptionEnrichment(report, connectionId); + if (!ready) { details.push(`${connectionId}: enriched database scan with AI descriptions has not completed.`); } } @@ -425,7 +504,7 @@ async function verifyPrimarySourceScans( async function defaultVerifyContextReady(projectDir: string): Promise { const project = await loadKtxProject({ projectDir }); const targets = listContextTargets(project); - const primarySourceScans = await verifyPrimarySourceScans(projectDir, targets.primarySourceConnectionIds); + const primarySourceScans = await verifyPrimarySourceScans(project, projectDir, targets.primarySourceConnectionIds); const semanticLayerContextReady = await hasFileWithExtension( join(projectDir, 'semantic-layer'), new Set(['.yaml', '.yml']), @@ -556,8 +635,6 @@ async function runBuild( { projectDir: args.projectDir, inputMode: args.inputMode, - scanMode: 'enriched', - detectRelationships: true, }, io, { @@ -692,7 +769,17 @@ export async function runKtxSetupContextStep( deps: KtxSetupContextDeps = {}, ): Promise { try { - const project = await loadKtxProject({ projectDir: args.projectDir }); + let project = await loadKtxProject({ projectDir: args.projectDir }); + const prompts = deps.prompts ?? createPromptAdapter(); + const depthProject = await ensureSetupDatabaseContextDepths({ + project, + args, + prompts, + }); + if (depthProject === 'back') { + return { status: 'back', projectDir: args.projectDir }; + } + project = depthProject; const existingState = await readKtxSetupContextState(args.projectDir); const completedSteps = (await readKtxSetupState(args.projectDir)).completed_steps; if (completedSteps.includes('context') && existingState.status === 'completed') { @@ -716,7 +803,6 @@ export async function runKtxSetupContextStep( ); return setupResultFromWatchedState(args.projectDir, watched.state); } - const prompts = deps.prompts ?? createPromptAdapter(); const choice = await prompts.select({ message: 'A context build is running in the background.\n\n' + @@ -761,12 +847,15 @@ export async function runKtxSetupContextStep( return { status: 'failed', projectDir: args.projectDir }; } - const missing = missingCapabilities(project); - if (missing.length > 0) { + const preflightPlan = buildPublicIngestPlan(project, { projectDir: project.projectDir, all: true }); + const preflightFailures = preflightPlan.targets.flatMap((target) => + target.preflightFailure ? [`${target.connectionId}: ${target.preflightFailure}`] : [], + ); + if (preflightFailures.length > 0) { if (args.allowEmpty === true) { return { status: 'skipped', projectDir: args.projectDir }; } - writeMissingCapabilities(missing, io); + writeMissingCapabilities(preflightFailures, io); return { status: 'missing-input', projectDir: args.projectDir }; } @@ -778,7 +867,7 @@ export async function runKtxSetupContextStep( } if (args.inputMode !== 'disabled' && args.prompt !== false) { - const choice = await promptForBuild(deps.prompts ?? createPromptAdapter()); + const choice = await promptForBuild(prompts); if (choice === 'back') { return { status: 'back', projectDir: args.projectDir }; }