From b3be54e3fae53693adae5e75c7487acde9ec0e7a Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Thu, 14 May 2026 15:36:35 +0200 Subject: [PATCH] refactor(context): validate ktx.yaml with Zod and surface issues in status (#91) * refactor(context): validate ktx.yaml with Zod and surface issues in status - Replace hand-rolled ktx.yaml parsing with a strict Zod schema and derive KtxProjectConfig types from it. - Add validateKtxProjectConfig returning structured KtxConfigIssue[] with migration hints for deprecated keys (ingest.llm, scan.enrichment.backend, etc.). - Wire ktx status/doctor to run validation, render schema issues in plain and JSON output, and add a Config row to project status. - Update the orbit example to camelCase scan.relationships keys to match the schema. * fix(context): tolerate legacy setup.completed_steps and optional driver - Accept and drop the legacy setup.completed_steps field so existing ktx.yaml files migrated from older versions still load. - Make connections..driver optional in the schema; runtime code already produces a clear "no driver" error at use time. * feat(cli): add ktx status --validate to run only ktx.yaml schema validation - New --validate flag dispatches a focused runKtxDoctor 'validate' branch that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM, connection, embedding, and query-history checks. - Plain output prints a single Config row; JSON output emits {ok: true} on success or the existing invalid_config / missing_project shapes on failure. --- .../orbit-relationship-verification/ktx.yaml | 18 +- packages/cli/src/commands/status-commands.ts | 64 +- packages/cli/src/doctor.test.ts | 258 +++++++ packages/cli/src/doctor.ts | 114 ++- packages/cli/src/status-project.ts | 30 + packages/context/src/project/config.test.ts | 122 ++- packages/context/src/project/config.ts | 715 ++++++------------ packages/context/src/project/index.ts | 3 + scripts/examples-docs.test.mjs | 4 +- 9 files changed, 783 insertions(+), 545 deletions(-) diff --git a/examples/orbit-relationship-verification/ktx.yaml b/examples/orbit-relationship-verification/ktx.yaml index bcfad298..1251d5a1 100644 --- a/examples/orbit-relationship-verification/ktx.yaml +++ b/examples/orbit-relationship-verification/ktx.yaml @@ -13,14 +13,14 @@ ingest: adapters: [] scan: enrichment: - backend: none + mode: none relationships: enabled: true - llm_proposals: false - validation_required_for_manifest: true - accept_threshold: 0.85 - review_threshold: 0.55 - max_llm_tables_per_batch: 40 - max_candidates_per_column: 25 - profile_sample_rows: 10000 - validation_concurrency: 4 + llmProposals: false + validationRequiredForManifest: true + acceptThreshold: 0.85 + reviewThreshold: 0.55 + maxLlmTablesPerBatch: 40 + maxCandidatesPerColumn: 25 + profileSampleRows: 10000 + validationConcurrency: 4 diff --git a/packages/cli/src/commands/status-commands.ts b/packages/cli/src/commands/status-commands.ts index 52032e59..62c857f9 100644 --- a/packages/cli/src/commands/status-commands.ts +++ b/packages/cli/src/commands/status-commands.ts @@ -17,16 +17,51 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC .description('Check current KTX setup and project readiness') .option('--json', 'Print JSON output', false) .option('-v, --verbose', 'Show every check, including passing ones', false) + .option('--validate', 'Only validate the ktx.yaml schema; skip readiness checks', false) .option('--no-input', 'Disable interactive terminal input') - .action(async (options: { json?: boolean; verbose?: boolean; input?: boolean }, command) => { - const runner = context.deps.doctor ?? (await import('../doctor.js')).runKtxDoctor; - const explicitOrEnvProjectDir = resolveCommandProjectDirOverride(command); - const nearestProjectDir = explicitOrEnvProjectDir ? undefined : findNearestKtxProjectDir(process.cwd()); - if (!explicitOrEnvProjectDir && !nearestProjectDir) { + .action( + async ( + options: { json?: boolean; verbose?: boolean; validate?: boolean; input?: boolean }, + command, + ) => { + const runner = context.deps.doctor ?? (await import('../doctor.js')).runKtxDoctor; + const explicitOrEnvProjectDir = resolveCommandProjectDirOverride(command); + const nearestProjectDir = explicitOrEnvProjectDir ? undefined : findNearestKtxProjectDir(process.cwd()); + + if (options.validate === true) { + context.setExitCode( + await runner( + { + command: 'validate', + projectDir: resolveCommandProjectDir(command), + outputMode: outputMode(options), + ...inputMode(options), + }, + context.io, + ), + ); + return; + } + + if (!explicitOrEnvProjectDir && !nearestProjectDir) { + context.setExitCode( + await runner( + { + command: 'setup', + outputMode: outputMode(options), + verbose: options.verbose === true, + ...inputMode(options), + }, + context.io, + ), + ); + return; + } context.setExitCode( await runner( { - command: 'setup', + command: 'project', + projectDir: resolveCommandProjectDir(command), outputMode: outputMode(options), verbose: options.verbose === true, ...inputMode(options), @@ -34,19 +69,6 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC context.io, ), ); - return; - } - context.setExitCode( - await runner( - { - command: 'project', - projectDir: resolveCommandProjectDir(command), - outputMode: outputMode(options), - verbose: options.verbose === true, - ...inputMode(options), - }, - context.io, - ), - ); - }); + }, + ); } diff --git a/packages/cli/src/doctor.test.ts b/packages/cli/src/doctor.test.ts index 22c6878d..5a9a3fdd 100644 --- a/packages/cli/src/doctor.test.ts +++ b/packages/cli/src/doctor.test.ts @@ -324,6 +324,95 @@ describe('runKtxDoctor', () => { expect(parsed.projectDir).toBe(tempDir); }); + it('prints schema issues and exits 1 when ktx.yaml fails Zod validation', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'storrage:', + ' state: sqlite', + 'ingest:', + ' llm:', + ' backend: anthropic', + '', + ].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + + await expect( + runKtxDoctor( + { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + {}, + ), + ).resolves.toBe(1); + + const out = testIo.stdout(); + expect(out).toContain('KTX status'); + expect(out).toContain('Config'); + expect(out).toContain('Unsupported storrage: unknown field'); + expect(out).toContain('Unsupported ingest.llm: use top-level llm.provider'); + expect(out).toContain('ktx.yaml'); + }); + + it('emits structured JSON when ktx.yaml fails Zod validation', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + ['project: warehouse', 'storrage: {}', ''].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + + await expect( + runKtxDoctor( + { command: 'project', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' }, + testIo.io, + {}, + ), + ).resolves.toBe(1); + + const parsed = JSON.parse(testIo.stdout()) as { + error: string; + projectDir: string; + issues: Array<{ path: string; message: string }>; + }; + expect(parsed.error).toBe('invalid_config'); + expect(parsed.projectDir).toBe(tempDir); + expect(parsed.issues.some((issue) => issue.path === 'storrage')).toBe(true); + }); + + it('shows a Config row labelled "ktx.yaml schema valid" on the happy path', async () => { + process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: ./warehouse.db', + 'llm:', + ' provider:', + ' backend: anthropic', + '', + ].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + + await expect( + runKtxDoctor( + { command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + {}, + ), + ).resolves.toBe(0); + + expect(testIo.stdout()).toContain('ktx.yaml schema valid'); + delete process.env.ANTHROPIC_API_KEY; + }); + it('runs project checks against a valid ktx.yaml', async () => { process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret await writeFile( @@ -565,4 +654,173 @@ describe('runKtxDoctor', () => { expect(testIo.stdout()).toContain('semantic search degraded'); delete process.env.ANTHROPIC_API_KEY; }); + + describe('command: validate', () => { + it('prints a success line and exits 0 when ktx.yaml is schema-valid', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: ./warehouse.db', + 'llm:', + ' provider:', + ' backend: anthropic', + '', + ].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + + await expect( + runKtxDoctor( + { command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + {}, + ), + ).resolves.toBe(0); + + const out = testIo.stdout(); + expect(out).toContain('KTX status'); + expect(out).toContain('Config'); + expect(out).toContain('ktx.yaml schema valid'); + expect(out).not.toContain('LLM'); + expect(out).not.toContain('Connections'); + expect(out).not.toContain('Pipeline'); + }); + + it('emits {ok: true} JSON when ktx.yaml is schema-valid', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: sqlite', + ' path: ./warehouse.db', + 'llm:', + ' provider:', + ' backend: anthropic', + '', + ].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + + await expect( + runKtxDoctor( + { command: 'validate', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' }, + testIo.io, + {}, + ), + ).resolves.toBe(0); + + expect(JSON.parse(testIo.stdout())).toEqual({ ok: true, projectDir: tempDir }); + }); + + it('prints schema issues and exits 1 when ktx.yaml fails Zod validation', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'storrage:', + ' state: sqlite', + 'ingest:', + ' llm:', + ' backend: anthropic', + '', + ].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + + await expect( + runKtxDoctor( + { command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + {}, + ), + ).resolves.toBe(1); + + const out = testIo.stdout(); + expect(out).toContain('Unsupported storrage: unknown field'); + expect(out).toContain('Unsupported ingest.llm: use top-level llm.provider'); + }); + + it('emits structured JSON issues when validation fails', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + ['project: warehouse', 'storrage: {}', ''].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + + await expect( + runKtxDoctor( + { command: 'validate', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' }, + testIo.io, + {}, + ), + ).resolves.toBe(1); + + const parsed = JSON.parse(testIo.stdout()) as { error: string; issues: Array<{ path: string }> }; + expect(parsed.error).toBe('invalid_config'); + expect(parsed.issues.some((issue) => issue.path === 'storrage')).toBe(true); + }); + + it('prints the missing-project message and exits 1 when ktx.yaml is absent', async () => { + const testIo = makeIo(); + + await expect( + runKtxDoctor( + { command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + {}, + ), + ).resolves.toBe(1); + + expect(testIo.stdout()).toContain('No KTX project here yet.'); + }); + + it('does not invoke the Postgres query-history probe in validate mode', async () => { + await writeFile( + join(tempDir, 'ktx.yaml'), + [ + 'project: warehouse', + 'connections:', + ' warehouse:', + ' driver: postgres', + ' url: env:WAREHOUSE_DATABASE_URL', + ' context:', + ' queryHistory:', + ' enabled: true', + 'llm:', + ' provider:', + ' backend: anthropic', + '', + ].join('\n'), + 'utf-8', + ); + const testIo = makeIo(); + let probeCalls = 0; + + await expect( + runKtxDoctor( + { command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' }, + testIo.io, + { + postgresQueryHistoryProbe: async () => { + probeCalls += 1; + return { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }; + }, + }, + ), + ).resolves.toBe(0); + + expect(probeCalls).toBe(0); + expect(testIo.stdout()).toContain('ktx.yaml schema valid'); + }); + }); }); diff --git a/packages/cli/src/doctor.ts b/packages/cli/src/doctor.ts index c0ff7ba7..b1845ae3 100644 --- a/packages/cli/src/doctor.ts +++ b/packages/cli/src/doctor.ts @@ -1,9 +1,10 @@ import { execFile } from 'node:child_process'; import { constants as fsConstants } from 'node:fs'; -import { access } from 'node:fs/promises'; +import { access, readFile } from 'node:fs/promises'; import { join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import { promisify } from 'node:util'; +import type { KtxConfigIssue } from '@ktx/context/project'; import type { BuildProjectStatusOptions } from './status-project.js'; const execFileAsync = promisify(execFile); @@ -40,6 +41,12 @@ export type KtxDoctorArgs = outputMode: KtxDoctorOutputMode; inputMode?: KtxDoctorInputMode; verbose?: boolean; + } + | { + command: 'validate'; + projectDir: string; + outputMode: KtxDoctorOutputMode; + inputMode?: KtxDoctorInputMode; }; interface KtxDoctorIo { @@ -450,6 +457,84 @@ function writeReport(report: DoctorReport, outputMode: KtxDoctorOutputMode, io: io.stdout.write(renderPlainReport(report, options)); } +export function renderInvalidConfigMessage( + projectDir: string, + issues: KtxConfigIssue[], + outputMode: KtxDoctorOutputMode, + io: KtxDoctorIo, +): void { + if (outputMode === 'json') { + io.stdout.write( + `${JSON.stringify( + { + error: 'invalid_config', + projectDir, + issues, + }, + null, + 2, + )}\n`, + ); + return; + } + + const useColor = shouldUseColor(io); + const dim = (text: string) => styleDim(useColor, text); + const bold = (text: string) => styleBold(useColor, text); + const status = (s: DoctorStatus, text: string) => styleStatus(useColor, s, text); + const abbreviated = abbreviateHome(projectDir) ?? projectDir; + + const lines: string[] = []; + lines.push(`${bold('KTX status')} ${dim('·')} ${abbreviated}`); + lines.push(''); + lines.push(` ${status('fail', '✗')} ${bold('Config')} ktx.yaml has ${issues.length} schema issue${issues.length === 1 ? '' : 's'}`); + for (const issue of issues) { + lines.push(` ${status('fail', '✗')} ${issue.message}`); + if (issue.fix) { + lines.push(` ${dim(`→ ${issue.fix}`)}`); + } + } + lines.push(''); + lines.push(` ${dim('Fix the issues in')} ${join(abbreviated, 'ktx.yaml')} ${dim('and rerun')} ${bold('ktx status')}.`); + lines.push(''); + + io.stdout.write(lines.join('\n')); +} + +export function renderValidConfigMessage( + projectDir: string, + outputMode: KtxDoctorOutputMode, + io: KtxDoctorIo, +): void { + if (outputMode === 'json') { + io.stdout.write( + `${JSON.stringify( + { + ok: true, + projectDir, + }, + null, + 2, + )}\n`, + ); + return; + } + + const useColor = shouldUseColor(io); + const dim = (text: string) => styleDim(useColor, text); + const bold = (text: string) => styleBold(useColor, text); + const status = (s: DoctorStatus, text: string) => styleStatus(useColor, s, text); + const abbreviated = abbreviateHome(projectDir) ?? projectDir; + + const lines: string[] = []; + lines.push(`${bold('KTX status')} ${dim('·')} ${abbreviated}`); + lines.push(''); + lines.push(` ${status('pass', '✓')} ${bold('Config')} ${dim('ktx.yaml schema valid')}`); + lines.push(''); + + io.stdout.write(lines.join('\n')); +} + export function renderMissingProjectMessage( projectDir: string, outputMode: KtxDoctorOutputMode, @@ -501,16 +586,39 @@ export async function runKtxDoctor( try { const runSetupChecks = deps.runSetupChecks ?? (() => runSetupDoctorChecks()); + if (args.command === 'validate') { + const configPath = join(args.projectDir, 'ktx.yaml'); + if (!(await defaultPathExists(configPath))) { + renderMissingProjectMessage(args.projectDir, args.outputMode, io); + return 1; + } + const { validateKtxProjectConfig } = await import('@ktx/context/project'); + const rawConfig = await readFile(configPath, 'utf-8'); + const validation = validateKtxProjectConfig(rawConfig); + if (!validation.ok) { + renderInvalidConfigMessage(args.projectDir, validation.issues, args.outputMode, io); + return 1; + } + renderValidConfigMessage(args.projectDir, args.outputMode, io); + return 0; + } + if (args.command === 'project') { const configPath = join(args.projectDir, 'ktx.yaml'); if (!(await defaultPathExists(configPath))) { renderMissingProjectMessage(args.projectDir, args.outputMode, io); return 1; } - const { loadKtxProject } = await import('@ktx/context/project'); + const { loadKtxProject, validateKtxProjectConfig } = await import('@ktx/context/project'); const { buildProjectStatus, renderProjectStatus } = await import('./status-project.js'); + const rawConfig = await readFile(configPath, 'utf-8'); + const validation = validateKtxProjectConfig(rawConfig); + if (!validation.ok) { + renderInvalidConfigMessage(args.projectDir, validation.issues, args.outputMode, io); + return 1; + } const project = await loadKtxProject({ projectDir: args.projectDir }); - const projectStatus = await buildProjectStatus(project, deps); + const projectStatus = await buildProjectStatus(project, { ...deps, configIssues: validation.issues }); const verbose = args.verbose ?? false; const toolchainChecks = verbose ? await runSetupChecks() : undefined; if (args.outputMode === 'json') { diff --git a/packages/cli/src/status-project.ts b/packages/cli/src/status-project.ts index 08686355..6c6cc4e3 100644 --- a/packages/cli/src/status-project.ts +++ b/packages/cli/src/status-project.ts @@ -1,4 +1,5 @@ import type { + KtxConfigIssue, KtxLocalProject, KtxProjectConfig, KtxProjectConnectionConfig, @@ -56,6 +57,12 @@ interface StorageStatus { gitAuthor: string; } +interface ConfigStatus { + status: ProjectStatusLevel; + detail: string; + issues: KtxConfigIssue[]; +} + interface WarningItem { message: string; fix?: string; @@ -72,6 +79,7 @@ function hasOwnField(value: Record, key: string): boolean { export interface ProjectStatus { projectName: string; projectDir: string; + config: ConfigStatus; llm: LlmStatus; embeddings: EmbeddingsStatus; storage: StorageStatus; @@ -610,12 +618,26 @@ function buildVerdict( export interface BuildProjectStatusOptions { env?: NodeJS.ProcessEnv; postgresQueryHistoryProbe?: PostgresQueryHistoryProbe; + configIssues?: KtxConfigIssue[]; +} + +function buildConfigStatus(issues: KtxConfigIssue[] | undefined): ConfigStatus { + const list = issues ?? []; + if (list.length === 0) { + return { status: 'ok', detail: 'ktx.yaml schema valid', issues: [] }; + } + return { + status: 'warn', + detail: `${list.length} issue${list.length === 1 ? '' : 's'} in ktx.yaml`, + issues: list, + }; } export async function buildProjectStatus(project: KtxLocalProject, options: BuildProjectStatusOptions = {}): Promise { const env = options.env ?? process.env; const config = project.config; + const configStatus = buildConfigStatus(options.configIssues); const llm = buildLlmStatus(config.llm, env); const embeddings = buildEmbeddingsStatus(config.ingest.embeddings, env); const storage = buildStorageStatus(config); @@ -630,6 +652,7 @@ export async function buildProjectStatus(project: KtxLocalProject, options: Buil return { projectName: config.project, projectDir: project.projectDir, + config: configStatus, llm, embeddings, storage, @@ -719,6 +742,13 @@ export function renderProjectStatus(status: ProjectStatus, options: RenderProjec lines.push(` ${label('Embeddings')} ${embedDetail} ${sym(status.embeddings.status)} ${dim(status.embeddings.detail)}`); lines.push(` ${label('Storage')} ${dim(`${status.storage.state} (state) · ${status.storage.search} (search)`)}`); + lines.push(` ${label('Config')} ${sym(status.config.status)} ${dim(status.config.detail)}`); + if (status.config.issues.length > 0) { + for (const issue of status.config.issues) { + lines.push(` ${color('warn', SYMBOL.warn)} ${issue.message}`); + if (issue.fix) lines.push(` ${dim(`→ ${issue.fix}`)}`); + } + } lines.push(''); // Connections diff --git a/packages/context/src/project/config.test.ts b/packages/context/src/project/config.test.ts index b81132d9..92428c56 100644 --- a/packages/context/src/project/config.test.ts +++ b/packages/context/src/project/config.test.ts @@ -1,5 +1,10 @@ import { describe, expect, it } from 'vitest'; -import { buildDefaultKtxProjectConfig, parseKtxProjectConfig, serializeKtxProjectConfig } from './config.js'; +import { + buildDefaultKtxProjectConfig, + parseKtxProjectConfig, + serializeKtxProjectConfig, + validateKtxProjectConfig, +} from './config.js'; describe('KTX project config', () => { it.each(['status', 'replay', 'run', 'watch'])('accepts former ingest subcommand name "%s" as a connection id', (connectionId) => { @@ -277,8 +282,8 @@ scan: expect(serializeKtxProjectConfig(config)).toContain('validationBudget: all'); }); - it('falls back to safe scan relationship defaults for invalid numeric settings', () => { - const config = parseKtxProjectConfig(` + it('rejects out-of-range scan relationship numeric settings', () => { + const yaml = ` project: demo scan: relationships: @@ -289,28 +294,33 @@ scan: profileSampleRows: 0 validationConcurrency: 0 validationBudget: 1.5 -`); +`; + expect(() => parseKtxProjectConfig(yaml)).toThrow(/scan\.relationships\.acceptThreshold/); - expect(config.scan.relationships).toMatchObject({ - acceptThreshold: 0.85, - reviewThreshold: 0.55, - maxLlmTablesPerBatch: 40, - maxCandidatesPerColumn: 25, - profileSampleRows: 10000, - validationConcurrency: 4, - }); - expect(config.scan.relationships).not.toHaveProperty('validationBudget'); + const validation = validateKtxProjectConfig(yaml); + expect(validation.ok).toBe(false); + const paths = validation.issues.map((issue) => issue.path); + expect(paths).toEqual( + expect.arrayContaining([ + 'scan.relationships.acceptThreshold', + 'scan.relationships.reviewThreshold', + 'scan.relationships.maxLlmTablesPerBatch', + 'scan.relationships.maxCandidatesPerColumn', + 'scan.relationships.profileSampleRows', + 'scan.relationships.validationConcurrency', + 'scan.relationships.validationBudget', + ]), + ); }); - it('falls back for invalid scan relationship validation budget strings', () => { - const config = parseKtxProjectConfig(` + it('rejects invalid scan relationship validation budget strings', () => { + const yaml = ` project: demo scan: relationships: validationBudget: infinite -`); - - expect(config.scan.relationships).not.toHaveProperty('validationBudget'); +`; + expect(() => parseKtxProjectConfig(yaml)).toThrow(/scan\.relationships\.validationBudget/); }); it('rejects unsupported local LLM and embedding fields', () => { @@ -398,4 +408,80 @@ scan: it('rejects configs with a missing project name', () => { expect(() => parseKtxProjectConfig('connections: {}\n')).toThrow('ktx.yaml field "project" is required'); }); + + it('rejects unknown top-level fields under strict mode', () => { + expect(() => + parseKtxProjectConfig(` +project: demo +storrage: + state: sqlite +`), + ).toThrow(/Unsupported storrage/); + }); +}); + +describe('validateKtxProjectConfig', () => { + it('returns ok: true with no issues for a valid config', () => { + const result = validateKtxProjectConfig('project: warehouse\n'); + expect(result).toEqual({ ok: true, issues: [] }); + }); + + it('collects every schema issue without throwing', () => { + const result = validateKtxProjectConfig(` +project: "" +storage: + search: not-a-real-backend +scan: + relationships: + acceptThreshold: 1.7 +`); + + expect(result.ok).toBe(false); + const paths = result.issues.map((issue) => issue.path); + expect(paths).toEqual( + expect.arrayContaining([ + 'project', + 'storage.search', + 'scan.relationships.acceptThreshold', + ]), + ); + }); + + it('attaches migration hints for known deprecated keys', () => { + const result = validateKtxProjectConfig(` +project: demo +ingest: + llm: + backend: anthropic +scan: + enrichment: + backend: none +`); + + expect(result.ok).toBe(false); + const findIssue = (path: string) => result.issues.find((issue) => issue.path === path); + expect(findIssue('ingest.llm')).toMatchObject({ + message: 'Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits', + fix: 'use top-level llm.provider, llm.models, and ingest.workUnits', + }); + expect(findIssue('scan.enrichment.backend')).toMatchObject({ + message: 'Unsupported scan.enrichment.backend: use scan.enrichment.mode', + fix: 'use scan.enrichment.mode', + }); + }); + + it('reports YAML parse errors as a root-level issue', () => { + const result = validateKtxProjectConfig(': not valid yaml :\n'); + expect(result.ok).toBe(false); + expect(result.issues[0]?.path).toBe(''); + expect(result.issues[0]?.message).toMatch(/ktx\.yaml parse error/); + }); + + it('reports a YAML scalar root as a single issue', () => { + const result = validateKtxProjectConfig('- nope\n'); + expect(result).toEqual({ + ok: false, + issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }], + }); + }); }); diff --git a/packages/context/src/project/config.ts b/packages/context/src/project/config.ts index 6cd4d8fe..3d34ce2c 100644 --- a/packages/context/src/project/config.ts +++ b/packages/context/src/project/config.ts @@ -1,435 +1,233 @@ -import type { KtxEmbeddingBackend, KtxLlmBackend, KtxModelRole, KtxPromptCacheTtl } from '@ktx/llm'; +import { KTX_MODEL_ROLES } from '@ktx/llm'; import YAML from 'yaml'; +import * as z from 'zod'; -export type KtxStorageState = 'postgres' | 'sqlite'; -export type KtxSearchBackend = 'postgres-hybrid' | 'sqlite-fts5'; -type KtxLocalLlmBackend = KtxLlmBackend | 'none'; -type KtxLocalEmbeddingBackend = KtxEmbeddingBackend | 'none'; -type KtxScanEnrichmentMode = 'none' | 'deterministic' | 'llm'; +const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway'] as const; +const KTX_EMBEDDING_BACKENDS = ['none', 'deterministic', 'openai', 'sentence-transformers'] as const; +const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const; +const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const; +const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'] as const; +const KTX_STORAGE_STATES = ['sqlite', 'postgres'] as const; +const KTX_SEARCH_BACKENDS = ['sqlite-fts5', 'postgres-hybrid'] as const; -interface KtxProjectPromptCachingConfig { - enabled?: boolean; - systemTtl?: KtxPromptCacheTtl; - toolsTtl?: KtxPromptCacheTtl; - historyTtl?: KtxPromptCacheTtl; - vertexFallbackTo5m?: boolean; +const DEPRECATED_KEY_HINTS: Record = { + 'llm.provider.provider': 'use llm.provider.backend', + 'ingest.llm': 'use top-level llm.provider, llm.models, and ingest.workUnits', + 'ingest.embeddings.provider': 'use ingest.embeddings.backend', + 'scan.enrichment.backend': 'use scan.enrichment.mode', + 'scan.enrichment.llm': 'use top-level llm.provider and llm.models', + 'scan.enrichment.embeddings.provider': 'use scan.enrichment.embeddings.backend', +}; + +const apiCredentialsSchema = z.strictObject({ + api_key: z.string().min(1).optional(), + base_url: z.string().min(1).optional(), +}); + +const vertexProviderSchema = z.strictObject({ + project: z.string().min(1).optional(), + location: z.string().default(''), +}); + +const sentenceTransformersSchema = z.strictObject({ + base_url: z.string().default(''), + pathPrefix: z.string().optional(), +}); + +const llmProviderSchema = z.strictObject({ + backend: z.enum(KTX_LLM_BACKENDS).default('none'), + vertex: vertexProviderSchema.optional(), + anthropic: apiCredentialsSchema.optional(), + gateway: apiCredentialsSchema.optional(), +}); + +const promptCachingSchema = z.strictObject({ + enabled: z.boolean().optional(), + systemTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional(), + toolsTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional(), + historyTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional(), + vertexFallbackTo5m: z.boolean().optional(), +}); + +const llmSchema = z.strictObject({ + provider: llmProviderSchema.prefault({}), + models: z.partialRecord(z.enum(KTX_MODEL_ROLES), z.string().min(1)).default({}), + promptCaching: promptCachingSchema.optional(), +}); + +const embeddingSchema = z.strictObject({ + backend: z.enum(KTX_EMBEDDING_BACKENDS).default('deterministic'), + model: z.string().min(1).optional(), + dimensions: z.int().positive().default(8), + openai: apiCredentialsSchema.optional(), + sentenceTransformers: sentenceTransformersSchema.optional(), + batchSize: z.int().positive().optional(), +}); + +const workUnitsSchema = z.strictObject({ + stepBudget: z.int().positive().default(40), + maxConcurrency: z.int().positive().default(1), + failureMode: z.enum(KTX_WORK_UNIT_FAILURE_MODES).default('continue'), +}); + +const ingestSchema = z.strictObject({ + adapters: z.array(z.string().min(1)).default([]), + embeddings: embeddingSchema.prefault({ backend: 'deterministic', model: 'deterministic' }), + workUnits: workUnitsSchema.prefault({}), +}); + +const scanEnrichmentSchema = z.strictObject({ + mode: z.enum(KTX_ENRICHMENT_MODES).default('none'), + embeddings: embeddingSchema.optional(), +}); + +const scanRelationshipsSchema = z.strictObject({ + enabled: z.boolean().default(true), + llmProposals: z.boolean().default(true), + validationRequiredForManifest: z.boolean().default(true), + acceptThreshold: z.number().min(0).max(1).default(0.85), + reviewThreshold: z.number().min(0).max(1).default(0.55), + maxLlmTablesPerBatch: z.int().positive().default(40), + maxCandidatesPerColumn: z.int().positive().default(25), + profileSampleRows: z.int().positive().default(10000), + validationConcurrency: z.int().positive().default(4), + validationBudget: z.union([z.literal('all'), z.int().nonnegative()]).optional(), +}); + +const scanSchema = z.strictObject({ + enrichment: scanEnrichmentSchema.prefault({}), + relationships: scanRelationshipsSchema.prefault({}), +}); + +const setupSchema = z + .strictObject({ + database_connection_ids: z.array(z.string().min(1)).default([]), + completed_steps: z.unknown().optional(), + }) + .transform(({ database_connection_ids }) => ({ database_connection_ids })); + +const storageGitSchema = z.strictObject({ + auto_commit: z.boolean().default(true), + author: z.string().min(1).default('ktx '), +}); + +const storageSchema = z.strictObject({ + state: z.enum(KTX_STORAGE_STATES).default('sqlite'), + search: z.enum(KTX_SEARCH_BACKENDS).default('sqlite-fts5'), + git: storageGitSchema.prefault({}), +}); + +const connectionSchema = z.looseObject({ + driver: z.string().min(1).optional(), + url: z.string().optional(), +}); + +const agentSchema = z.strictObject({ + run_research: z + .strictObject({ + enabled: z.boolean().default(false), + max_iterations: z.number().int().nonnegative().default(20), + default_toolset: z.array(z.string().min(1)).default(['sl_query', 'wiki_search', 'sl_read_source']), + }) + .prefault({}), +}); + +const memorySchema = z.strictObject({ + auto_commit: z.boolean().default(true), +}); + +const ktxProjectConfigSchema = z.strictObject({ + project: z + .string({ error: 'ktx.yaml field "project" is required' }) + .trim() + .min(1, 'ktx.yaml field "project" is required'), + setup: setupSchema.optional(), + connections: z.record(z.string(), connectionSchema).default({}), + storage: storageSchema.prefault({}), + llm: llmSchema.prefault({}), + ingest: ingestSchema.prefault({}), + agent: agentSchema.prefault({}), + memory: memorySchema.prefault({}), + scan: scanSchema.prefault({}), +}); + +export type KtxProjectConfig = z.infer; +export type KtxProjectLlmConfig = z.infer; +export type KtxProjectLlmProviderConfig = z.infer; +export type KtxProjectEmbeddingConfig = z.infer; +export type KtxScanEnrichmentConfig = z.infer; +export type KtxIngestWorkUnitsConfig = z.infer; +export type KtxScanRelationshipConfig = z.infer; +export type KtxProjectScanConfig = z.infer; +export type KtxProjectConnectionConfig = z.infer; +export type KtxProjectSetupConfig = z.infer; +export type KtxStorageState = z.infer['state']; +export type KtxSearchBackend = z.infer['search']; + +export interface KtxConfigIssue { + path: string; + message: string; + fix?: string; } -export interface KtxProjectLlmProviderConfig { - backend: KtxLocalLlmBackend; - vertex?: { project?: string; location: string }; - anthropic?: { api_key?: string; base_url?: string }; - gateway?: { api_key?: string; base_url?: string }; -} - -export interface KtxProjectLlmConfig { - provider: KtxProjectLlmProviderConfig; - models: Partial> & { default?: string }; - promptCaching?: KtxProjectPromptCachingConfig; -} - -export interface KtxProjectEmbeddingConfig { - backend: KtxLocalEmbeddingBackend; - model?: string; - dimensions: number; - openai?: { api_key?: string; base_url?: string }; - sentenceTransformers?: { base_url: string; pathPrefix?: string }; - batchSize?: number; -} - -export interface KtxScanEnrichmentConfig { - mode: KtxScanEnrichmentMode; - embeddings?: KtxProjectEmbeddingConfig; -} - -export interface KtxIngestWorkUnitsConfig { - stepBudget: number; - maxConcurrency: number; - failureMode: 'abort' | 'continue'; -} - -export interface KtxScanRelationshipConfig { - enabled: boolean; - llmProposals: boolean; - validationRequiredForManifest: boolean; - acceptThreshold: number; - reviewThreshold: number; - maxLlmTablesPerBatch: number; - maxCandidatesPerColumn: number; - profileSampleRows: number; - validationConcurrency: number; - validationBudget?: number | 'all'; -} - -export interface KtxProjectScanConfig { - enrichment: KtxScanEnrichmentConfig; - relationships: KtxScanRelationshipConfig; -} - -export interface KtxProjectConnectionConfig { - driver: string; - url?: string; - [key: string]: unknown; -} - -export interface KtxProjectSetupConfig { - database_connection_ids: string[]; -} - -export interface KtxProjectConfig { - project: string; - setup?: KtxProjectSetupConfig; - connections: Record; - storage: { - state: KtxStorageState; - search: KtxSearchBackend; - git: { - auto_commit: boolean; - author: string; - }; - }; - llm: KtxProjectLlmConfig; - ingest: { - adapters: string[]; - embeddings: KtxProjectEmbeddingConfig; - workUnits: KtxIngestWorkUnitsConfig; - }; - agent: { - run_research: { - enabled: boolean; - max_iterations: number; - default_toolset: string[]; - }; - }; - memory: { - auto_commit: boolean; - }; - scan: KtxProjectScanConfig; +export interface KtxConfigValidation { + ok: boolean; + issues: KtxConfigIssue[]; } function isRecord(value: unknown): value is Record { return typeof value === 'object' && value !== null && !Array.isArray(value); } -function stringArray(value: unknown, fallback: string[]): string[] { - if (!Array.isArray(value)) { - return fallback; - } - return value.filter((item): item is string => typeof item === 'string' && item.length > 0); +function dottedPath(path: ReadonlyArray): string { + return path.map((segment) => String(segment)).join('.'); } -function booleanValue(value: unknown, fallback: boolean): boolean { - return typeof value === 'boolean' ? value : fallback; +function valueAtPath(root: unknown, path: ReadonlyArray): unknown { + let cursor: unknown = root; + for (const segment of path) { + if (cursor === null || typeof cursor !== 'object') return undefined; + cursor = (cursor as Record)[segment]; + } + return cursor; } -function numberValue(value: unknown, fallback: number): number { - return typeof value === 'number' && Number.isFinite(value) ? value : fallback; -} +function formatIssue(issue: z.core.$ZodIssue, input: unknown): KtxConfigIssue[] { + const basePath = dottedPath(issue.path); -function stringValue(value: unknown, fallback: string): string { - return typeof value === 'string' && value.trim().length > 0 ? value : fallback; -} - -function optionalNonEmptyString(value: unknown): string | undefined { - if (typeof value !== 'string') { - return undefined; - } - - const trimmed = value.trim(); - return trimmed.length > 0 ? trimmed : undefined; -} - -function positiveIntegerConfigValue(value: unknown, fallback: number): number { - if (typeof value !== 'number' || !Number.isInteger(value) || value <= 0) { - return fallback; - } - - return value; -} - -function validationBudgetConfigValue(value: unknown, fallback: number | 'all' | undefined): number | 'all' | undefined { - if (value === 'all') { - return value; - } - if (typeof value === 'number' && Number.isInteger(value) && value >= 0) { - return value; - } - return fallback; -} - -function ratioConfigValue(value: unknown, fallback: number): number { - if (typeof value !== 'number' || !Number.isFinite(value) || value < 0 || value > 1) { - return fallback; - } - - return value; -} - -function localLlmBackend(value: unknown, fallback: KtxLocalLlmBackend, section = 'llm.provider'): KtxLocalLlmBackend { - if (value == null) { - return fallback; - } - - if (value === 'none' || value === 'anthropic' || value === 'vertex' || value === 'gateway') { - return value; - } - - throw new Error(`Unsupported ${section}.backend: ${String(value)}`); -} - -function localEmbeddingBackend( - value: unknown, - fallback: KtxLocalEmbeddingBackend, - section = 'ingest.embeddings', -): KtxLocalEmbeddingBackend { - if (value == null) { - return fallback; - } - - if ( - value === 'none' || - value === 'deterministic' || - value === 'openai' || - value === 'sentence-transformers' - ) { - return value; - } - - throw new Error(`Unsupported ${section}.backend: ${String(value)}`); -} - -function scanEnrichmentMode(value: unknown, fallback: KtxScanEnrichmentMode): KtxScanEnrichmentMode { - if (value == null) { - return fallback; - } - - if (value === 'none' || value === 'deterministic' || value === 'llm') { - return value; - } - - throw new Error(`Unsupported scan.enrichment.mode: ${String(value)}`); -} - -function rejectUnsupportedProvider(section: string, value: unknown): void { - if (value !== undefined) { - throw new Error(`Unsupported ${section}.provider: use ${section}.backend`); - } -} - -function optionalStringRecord(value: unknown): Record { - return isRecord(value) ? value : {}; -} - -function optionalProviderConfig(value: unknown): { api_key?: string; base_url?: string } | undefined { - if (!isRecord(value)) { - return undefined; - } - - const apiKey = optionalNonEmptyString(value.api_key); - const baseUrl = optionalNonEmptyString(value.base_url); - if (!apiKey && !baseUrl) { - return undefined; - } - - return { - ...(apiKey ? { api_key: apiKey } : {}), - ...(baseUrl ? { base_url: baseUrl } : {}), - }; -} - -function parseModels(value: unknown): KtxProjectLlmConfig['models'] { - if (!isRecord(value)) { - return {}; - } - - const models: KtxProjectLlmConfig['models'] = {}; - for (const [role, model] of Object.entries(value)) { - const modelName = optionalNonEmptyString(model); - if (modelName) { - models[role as KtxModelRole] = modelName; - } - } - return models; -} - -function promptCacheTtl(value: unknown): KtxPromptCacheTtl | undefined { - return value === '5m' || value === '1h' ? value : undefined; -} - -function parsePromptCaching(value: unknown): KtxProjectPromptCachingConfig | undefined { - if (!isRecord(value)) { - return undefined; - } - - return { - ...(typeof value.enabled === 'boolean' ? { enabled: value.enabled } : {}), - ...(promptCacheTtl(value.systemTtl) ? { systemTtl: promptCacheTtl(value.systemTtl) } : {}), - ...(promptCacheTtl(value.toolsTtl) ? { toolsTtl: promptCacheTtl(value.toolsTtl) } : {}), - ...(promptCacheTtl(value.historyTtl) ? { historyTtl: promptCacheTtl(value.historyTtl) } : {}), - ...(typeof value.vertexFallbackTo5m === 'boolean' ? { vertexFallbackTo5m: value.vertexFallbackTo5m } : {}), - }; -} - -function parseProjectLlmProviderConfig( - raw: Record, - defaults: KtxProjectLlmProviderConfig, - section: string, -): KtxProjectLlmProviderConfig { - rejectUnsupportedProvider(section, raw.provider); - - const vertex = isRecord(raw.vertex) - ? { - ...(optionalNonEmptyString(raw.vertex.project) ? { project: optionalNonEmptyString(raw.vertex.project) } : {}), - location: stringValue(raw.vertex.location, ''), + if (issue.code === 'unrecognized_keys') { + const keys = (issue as { keys?: readonly string[] }).keys ?? []; + return keys.map((key) => { + const fullPath = basePath.length > 0 ? `${basePath}.${key}` : key; + const hint = DEPRECATED_KEY_HINTS[fullPath]; + if (hint !== undefined) { + return { path: fullPath, message: `Unsupported ${fullPath}: ${hint}`, fix: hint }; } - : undefined; - const anthropic = optionalProviderConfig(raw.anthropic); - const gateway = optionalProviderConfig(raw.gateway); + return { path: fullPath, message: `Unsupported ${fullPath}: unknown field` }; + }); + } - return { - backend: localLlmBackend(raw.backend, defaults.backend, section), - ...(vertex ? { vertex } : {}), - ...(anthropic ? { anthropic } : {}), - ...(gateway ? { gateway } : {}), - }; + const lastSegment = issue.path[issue.path.length - 1]; + if (lastSegment === 'backend' && (issue.code === 'invalid_value' || issue.code === 'invalid_type')) { + const value = valueAtPath(input, issue.path); + return [{ path: basePath, message: `Unsupported ${basePath}: ${String(value)}` }]; + } + + return [{ path: basePath, message: basePath.length > 0 ? `${basePath}: ${issue.message}` : issue.message }]; } -function parseProjectLlmConfig(raw: Record, defaults: KtxProjectLlmConfig): KtxProjectLlmConfig { - const provider = isRecord(raw.provider) ? raw.provider : {}; - return { - provider: parseProjectLlmProviderConfig(provider, defaults.provider, 'llm.provider'), - models: parseModels(raw.models ?? defaults.models), - ...(parsePromptCaching(raw.promptCaching) ? { promptCaching: parsePromptCaching(raw.promptCaching) } : {}), - }; +function collectIssues(error: z.ZodError, input: unknown): KtxConfigIssue[] { + return error.issues.flatMap((issue) => formatIssue(issue, input)); } -function parseProjectEmbeddingConfig( - raw: Record, - defaults: KtxProjectEmbeddingConfig, - section: string, -): KtxProjectEmbeddingConfig { - rejectUnsupportedProvider(section, raw.provider); - - const openai = optionalProviderConfig(raw.openai); - const sentenceTransformers = isRecord(raw.sentenceTransformers) - ? { - base_url: stringValue(raw.sentenceTransformers.base_url, ''), - ...(typeof raw.sentenceTransformers.pathPrefix === 'string' - ? { pathPrefix: raw.sentenceTransformers.pathPrefix } - : {}), - } - : undefined; - - const backend = localEmbeddingBackend(raw.backend, defaults.backend, section); - const model = - optionalNonEmptyString(raw.model) ?? (raw.backend == null && backend !== 'none' ? defaults.model : undefined); - const batchSize = positiveIntegerConfigValue(raw.batchSize, 0); - return { - backend, - ...(model ? { model } : {}), - dimensions: positiveIntegerConfigValue(raw.dimensions, defaults.dimensions), - ...(openai ? { openai } : {}), - ...(sentenceTransformers ? { sentenceTransformers } : {}), - ...(batchSize > 0 ? { batchSize } : {}), - }; -} - -function parseScanRelationshipConfig( - raw: Record, - defaults: KtxScanRelationshipConfig, -): KtxScanRelationshipConfig { - const validationBudget = validationBudgetConfigValue(raw.validationBudget, defaults.validationBudget); - - return { - enabled: booleanValue(raw.enabled, defaults.enabled), - llmProposals: booleanValue(raw.llmProposals, defaults.llmProposals), - validationRequiredForManifest: booleanValue( - raw.validationRequiredForManifest, - defaults.validationRequiredForManifest, - ), - acceptThreshold: ratioConfigValue(raw.acceptThreshold, defaults.acceptThreshold), - reviewThreshold: ratioConfigValue(raw.reviewThreshold, defaults.reviewThreshold), - maxLlmTablesPerBatch: positiveIntegerConfigValue(raw.maxLlmTablesPerBatch, defaults.maxLlmTablesPerBatch), - maxCandidatesPerColumn: positiveIntegerConfigValue(raw.maxCandidatesPerColumn, defaults.maxCandidatesPerColumn), - profileSampleRows: positiveIntegerConfigValue(raw.profileSampleRows, defaults.profileSampleRows), - validationConcurrency: positiveIntegerConfigValue(raw.validationConcurrency, defaults.validationConcurrency), - ...(validationBudget !== undefined ? { validationBudget } : {}), - }; -} - -function workUnitFailureMode(value: unknown, fallback: 'abort' | 'continue'): 'abort' | 'continue' { - return value === 'abort' || value === 'continue' ? value : fallback; -} - -function parseIngestWorkUnitsConfig( - raw: Record, - defaults: KtxIngestWorkUnitsConfig, -): KtxIngestWorkUnitsConfig { - return { - stepBudget: positiveIntegerConfigValue(raw.stepBudget, defaults.stepBudget), - maxConcurrency: positiveIntegerConfigValue(raw.maxConcurrency, defaults.maxConcurrency), - failureMode: workUnitFailureMode(raw.failureMode, defaults.failureMode), - }; +function formatZodError(error: z.ZodError, input: unknown): string { + return collectIssues(error, input) + .map((issue) => issue.message) + .join('\n'); } export function buildDefaultKtxProjectConfig(projectName = 'ktx-project'): KtxProjectConfig { - return { - project: projectName, - connections: {}, - storage: { - state: 'sqlite', - search: 'sqlite-fts5', - git: { - auto_commit: true, - author: 'ktx ', - }, - }, - llm: { - provider: { - backend: 'none', - }, - models: {}, - }, - ingest: { - adapters: [], - embeddings: { - backend: 'deterministic', - model: 'deterministic', - dimensions: 8, - }, - workUnits: { - stepBudget: 40, - maxConcurrency: 1, - failureMode: 'continue', - }, - }, - agent: { - run_research: { - enabled: false, - max_iterations: 20, - default_toolset: ['sl_query', 'wiki_search', 'sl_read_source'], - }, - }, - memory: { - auto_commit: true, - }, - scan: { - enrichment: { - mode: 'none', - }, - relationships: { - enabled: true, - llmProposals: true, - validationRequiredForManifest: true, - acceptThreshold: 0.85, - reviewThreshold: 0.55, - maxLlmTablesPerBatch: 40, - maxCandidatesPerColumn: 25, - profileSampleRows: 10000, - validationConcurrency: 4, - }, - }, - }; + return ktxProjectConfigSchema.parse({ project: projectName }); } export function parseKtxProjectConfig(raw: string): KtxProjectConfig { @@ -437,96 +235,29 @@ export function parseKtxProjectConfig(raw: string): KtxProjectConfig { if (!isRecord(parsed)) { throw new Error('ktx.yaml must contain a YAML object'); } - - const project = parsed.project; - if (typeof project !== 'string' || project.trim().length === 0) { - throw new Error('ktx.yaml field "project" is required'); + const result = ktxProjectConfigSchema.safeParse(parsed); + if (!result.success) { + throw new Error(formatZodError(result.error, parsed)); } + return result.data; +} - const defaults = buildDefaultKtxProjectConfig(project.trim()); - const llm = isRecord(parsed.llm) ? parsed.llm : {}; - const storage = isRecord(parsed.storage) ? parsed.storage : {}; - const storageGit = isRecord(storage.git) ? storage.git : {}; - const setup = isRecord(parsed.setup) ? parsed.setup : undefined; - const ingest = isRecord(parsed.ingest) ? parsed.ingest : {}; - const ingestEmbeddings = isRecord(ingest.embeddings) ? ingest.embeddings : {}; - const ingestWorkUnits = isRecord(ingest.workUnits) ? ingest.workUnits : {}; - const agent = isRecord(parsed.agent) ? parsed.agent : {}; - const runResearch = isRecord(agent.run_research) ? agent.run_research : {}; - const memory = isRecord(parsed.memory) ? parsed.memory : {}; - const scan = isRecord(parsed.scan) ? parsed.scan : {}; - const scanEnrichment = isRecord(scan.enrichment) ? scan.enrichment : {}; - const scanRelationships = isRecord(scan.relationships) ? scan.relationships : {}; - if (isRecord(ingest.llm)) { - throw new Error('Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits'); +export function validateKtxProjectConfig(raw: string): KtxConfigValidation { + let parsed: unknown; + try { + parsed = YAML.parse(raw); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { ok: false, issues: [{ path: '', message: `ktx.yaml parse error: ${message}` }] }; } - if (scanEnrichment.backend !== undefined) { - throw new Error('Unsupported scan.enrichment.backend: use scan.enrichment.mode'); + if (!isRecord(parsed)) { + return { ok: false, issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }] }; } - if (isRecord(scanEnrichment.llm)) { - throw new Error('Unsupported scan.enrichment.llm: use top-level llm.provider and llm.models'); + const result = ktxProjectConfigSchema.safeParse(parsed); + if (result.success) { + return { ok: true, issues: [] }; } - - const parsedLlm = parseProjectLlmConfig(llm, defaults.llm); - const parsedIngestEmbeddings = parseProjectEmbeddingConfig( - ingestEmbeddings, - defaults.ingest.embeddings, - 'ingest.embeddings', - ); - const parsedIngestWorkUnits = parseIngestWorkUnitsConfig(ingestWorkUnits, defaults.ingest.workUnits); - const scanEmbeddings = parseProjectEmbeddingConfig( - optionalStringRecord(scanEnrichment.embeddings), - defaults.ingest.embeddings, - 'scan.enrichment.embeddings', - ); - const parsedScanEnrichment: KtxScanEnrichmentConfig = { - mode: scanEnrichmentMode(scanEnrichment.mode, defaults.scan.enrichment.mode), - ...(isRecord(scanEnrichment.embeddings) ? { embeddings: scanEmbeddings } : {}), - }; - const parsedScanRelationships = parseScanRelationshipConfig(scanRelationships, defaults.scan.relationships); - const parsedConnections = isRecord(parsed.connections) - ? (parsed.connections as Record) - : defaults.connections; - - return { - project: project.trim(), - ...(setup - ? { - setup: { - database_connection_ids: stringArray(setup.database_connection_ids, []), - }, - } - : {}), - connections: parsedConnections, - storage: { - state: storage.state === 'sqlite' ? 'sqlite' : defaults.storage.state, - search: storage.search === 'sqlite-fts5' ? 'sqlite-fts5' : defaults.storage.search, - git: { - auto_commit: booleanValue(storageGit.auto_commit, defaults.storage.git.auto_commit), - author: stringValue(storageGit.author, defaults.storage.git.author), - }, - }, - llm: parsedLlm, - ingest: { - adapters: stringArray(ingest.adapters, defaults.ingest.adapters), - embeddings: parsedIngestEmbeddings, - workUnits: parsedIngestWorkUnits, - }, - agent: { - run_research: { - enabled: booleanValue(runResearch.enabled, defaults.agent.run_research.enabled), - max_iterations: numberValue(runResearch.max_iterations, defaults.agent.run_research.max_iterations), - default_toolset: stringArray(runResearch.default_toolset, defaults.agent.run_research.default_toolset), - }, - }, - memory: { - auto_commit: booleanValue(memory.auto_commit, defaults.memory.auto_commit), - }, - scan: { - enrichment: parsedScanEnrichment, - relationships: parsedScanRelationships, - }, - }; + return { ok: false, issues: collectIssues(result.error, parsed) }; } export function serializeKtxProjectConfig(config: KtxProjectConfig): string { diff --git a/packages/context/src/project/index.ts b/packages/context/src/project/index.ts index aaec44ed..aaaf13d2 100644 --- a/packages/context/src/project/index.ts +++ b/packages/context/src/project/index.ts @@ -1,4 +1,6 @@ export type { + KtxConfigIssue, + KtxConfigValidation, KtxProjectConfig, KtxProjectConnectionConfig, KtxProjectEmbeddingConfig, @@ -10,6 +12,7 @@ export { buildDefaultKtxProjectConfig, parseKtxProjectConfig, serializeKtxProjectConfig, + validateKtxProjectConfig, } from './config.js'; export type { LocalGitFileStoreDeps } from './local-git-file-store.js'; export { LocalGitFileStore } from './local-git-file-store.js'; diff --git a/scripts/examples-docs.test.mjs b/scripts/examples-docs.test.mjs index b5653ee7..0ae3371a 100644 --- a/scripts/examples-docs.test.mjs +++ b/scripts/examples-docs.test.mjs @@ -50,8 +50,8 @@ describe('standalone example docs', () => { config, /path: \.\.\/\.\.\/packages\/context\/test\/fixtures\/relationship-benchmarks\/orbit_style_product_no_declared_constraints\/data\.sqlite/, ); - assert.match(config, /llm_proposals: false/); - assert.match(config, /validation_required_for_manifest: true/); + assert.match(config, /llmProposals: false/); + assert.match(config, /validationRequiredForManifest: true/); }); it('documents the Postgres historic SQL smoke example', async () => {