refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
This commit is contained in:
Andrey Avtomonov 2026-05-14 15:36:35 +02:00 committed by GitHub
parent 49f1e2720e
commit b3be54e3fa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 783 additions and 545 deletions

View file

@ -13,14 +13,14 @@ ingest:
adapters: []
scan:
enrichment:
backend: none
mode: none
relationships:
enabled: true
llm_proposals: false
validation_required_for_manifest: true
accept_threshold: 0.85
review_threshold: 0.55
max_llm_tables_per_batch: 40
max_candidates_per_column: 25
profile_sample_rows: 10000
validation_concurrency: 4
llmProposals: false
validationRequiredForManifest: true
acceptThreshold: 0.85
reviewThreshold: 0.55
maxLlmTablesPerBatch: 40
maxCandidatesPerColumn: 25
profileSampleRows: 10000
validationConcurrency: 4

View file

@ -17,16 +17,51 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC
.description('Check current KTX setup and project readiness')
.option('--json', 'Print JSON output', false)
.option('-v, --verbose', 'Show every check, including passing ones', false)
.option('--validate', 'Only validate the ktx.yaml schema; skip readiness checks', false)
.option('--no-input', 'Disable interactive terminal input')
.action(async (options: { json?: boolean; verbose?: boolean; input?: boolean }, command) => {
const runner = context.deps.doctor ?? (await import('../doctor.js')).runKtxDoctor;
const explicitOrEnvProjectDir = resolveCommandProjectDirOverride(command);
const nearestProjectDir = explicitOrEnvProjectDir ? undefined : findNearestKtxProjectDir(process.cwd());
if (!explicitOrEnvProjectDir && !nearestProjectDir) {
.action(
async (
options: { json?: boolean; verbose?: boolean; validate?: boolean; input?: boolean },
command,
) => {
const runner = context.deps.doctor ?? (await import('../doctor.js')).runKtxDoctor;
const explicitOrEnvProjectDir = resolveCommandProjectDirOverride(command);
const nearestProjectDir = explicitOrEnvProjectDir ? undefined : findNearestKtxProjectDir(process.cwd());
if (options.validate === true) {
context.setExitCode(
await runner(
{
command: 'validate',
projectDir: resolveCommandProjectDir(command),
outputMode: outputMode(options),
...inputMode(options),
},
context.io,
),
);
return;
}
if (!explicitOrEnvProjectDir && !nearestProjectDir) {
context.setExitCode(
await runner(
{
command: 'setup',
outputMode: outputMode(options),
verbose: options.verbose === true,
...inputMode(options),
},
context.io,
),
);
return;
}
context.setExitCode(
await runner(
{
command: 'setup',
command: 'project',
projectDir: resolveCommandProjectDir(command),
outputMode: outputMode(options),
verbose: options.verbose === true,
...inputMode(options),
@ -34,19 +69,6 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC
context.io,
),
);
return;
}
context.setExitCode(
await runner(
{
command: 'project',
projectDir: resolveCommandProjectDir(command),
outputMode: outputMode(options),
verbose: options.verbose === true,
...inputMode(options),
},
context.io,
),
);
});
},
);
}

View file

@ -324,6 +324,95 @@ describe('runKtxDoctor', () => {
expect(parsed.projectDir).toBe(tempDir);
});
it('prints schema issues and exits 1 when ktx.yaml fails Zod validation', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: warehouse',
'storrage:',
' state: sqlite',
'ingest:',
' llm:',
' backend: anthropic',
'',
].join('\n'),
'utf-8',
);
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
testIo.io,
{},
),
).resolves.toBe(1);
const out = testIo.stdout();
expect(out).toContain('KTX status');
expect(out).toContain('Config');
expect(out).toContain('Unsupported storrage: unknown field');
expect(out).toContain('Unsupported ingest.llm: use top-level llm.provider');
expect(out).toContain('ktx.yaml');
});
it('emits structured JSON when ktx.yaml fails Zod validation', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
['project: warehouse', 'storrage: {}', ''].join('\n'),
'utf-8',
);
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'project', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' },
testIo.io,
{},
),
).resolves.toBe(1);
const parsed = JSON.parse(testIo.stdout()) as {
error: string;
projectDir: string;
issues: Array<{ path: string; message: string }>;
};
expect(parsed.error).toBe('invalid_config');
expect(parsed.projectDir).toBe(tempDir);
expect(parsed.issues.some((issue) => issue.path === 'storrage')).toBe(true);
});
it('shows a Config row labelled "ktx.yaml schema valid" on the happy path', async () => {
process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: warehouse',
'connections:',
' warehouse:',
' driver: sqlite',
' path: ./warehouse.db',
'llm:',
' provider:',
' backend: anthropic',
'',
].join('\n'),
'utf-8',
);
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
testIo.io,
{},
),
).resolves.toBe(0);
expect(testIo.stdout()).toContain('ktx.yaml schema valid');
delete process.env.ANTHROPIC_API_KEY;
});
it('runs project checks against a valid ktx.yaml', async () => {
process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret
await writeFile(
@ -565,4 +654,173 @@ describe('runKtxDoctor', () => {
expect(testIo.stdout()).toContain('semantic search degraded');
delete process.env.ANTHROPIC_API_KEY;
});
describe('command: validate', () => {
it('prints a success line and exits 0 when ktx.yaml is schema-valid', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: warehouse',
'connections:',
' warehouse:',
' driver: sqlite',
' path: ./warehouse.db',
'llm:',
' provider:',
' backend: anthropic',
'',
].join('\n'),
'utf-8',
);
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
testIo.io,
{},
),
).resolves.toBe(0);
const out = testIo.stdout();
expect(out).toContain('KTX status');
expect(out).toContain('Config');
expect(out).toContain('ktx.yaml schema valid');
expect(out).not.toContain('LLM');
expect(out).not.toContain('Connections');
expect(out).not.toContain('Pipeline');
});
it('emits {ok: true} JSON when ktx.yaml is schema-valid', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: warehouse',
'connections:',
' warehouse:',
' driver: sqlite',
' path: ./warehouse.db',
'llm:',
' provider:',
' backend: anthropic',
'',
].join('\n'),
'utf-8',
);
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'validate', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' },
testIo.io,
{},
),
).resolves.toBe(0);
expect(JSON.parse(testIo.stdout())).toEqual({ ok: true, projectDir: tempDir });
});
it('prints schema issues and exits 1 when ktx.yaml fails Zod validation', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: warehouse',
'storrage:',
' state: sqlite',
'ingest:',
' llm:',
' backend: anthropic',
'',
].join('\n'),
'utf-8',
);
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
testIo.io,
{},
),
).resolves.toBe(1);
const out = testIo.stdout();
expect(out).toContain('Unsupported storrage: unknown field');
expect(out).toContain('Unsupported ingest.llm: use top-level llm.provider');
});
it('emits structured JSON issues when validation fails', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
['project: warehouse', 'storrage: {}', ''].join('\n'),
'utf-8',
);
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'validate', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' },
testIo.io,
{},
),
).resolves.toBe(1);
const parsed = JSON.parse(testIo.stdout()) as { error: string; issues: Array<{ path: string }> };
expect(parsed.error).toBe('invalid_config');
expect(parsed.issues.some((issue) => issue.path === 'storrage')).toBe(true);
});
it('prints the missing-project message and exits 1 when ktx.yaml is absent', async () => {
const testIo = makeIo();
await expect(
runKtxDoctor(
{ command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
testIo.io,
{},
),
).resolves.toBe(1);
expect(testIo.stdout()).toContain('No KTX project here yet.');
});
it('does not invoke the Postgres query-history probe in validate mode', async () => {
await writeFile(
join(tempDir, 'ktx.yaml'),
[
'project: warehouse',
'connections:',
' warehouse:',
' driver: postgres',
' url: env:WAREHOUSE_DATABASE_URL',
' context:',
' queryHistory:',
' enabled: true',
'llm:',
' provider:',
' backend: anthropic',
'',
].join('\n'),
'utf-8',
);
const testIo = makeIo();
let probeCalls = 0;
await expect(
runKtxDoctor(
{ command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
testIo.io,
{
postgresQueryHistoryProbe: async () => {
probeCalls += 1;
return { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] };
},
},
),
).resolves.toBe(0);
expect(probeCalls).toBe(0);
expect(testIo.stdout()).toContain('ktx.yaml schema valid');
});
});
});

View file

@ -1,9 +1,10 @@
import { execFile } from 'node:child_process';
import { constants as fsConstants } from 'node:fs';
import { access } from 'node:fs/promises';
import { access, readFile } from 'node:fs/promises';
import { join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import { promisify } from 'node:util';
import type { KtxConfigIssue } from '@ktx/context/project';
import type { BuildProjectStatusOptions } from './status-project.js';
const execFileAsync = promisify(execFile);
@ -40,6 +41,12 @@ export type KtxDoctorArgs =
outputMode: KtxDoctorOutputMode;
inputMode?: KtxDoctorInputMode;
verbose?: boolean;
}
| {
command: 'validate';
projectDir: string;
outputMode: KtxDoctorOutputMode;
inputMode?: KtxDoctorInputMode;
};
interface KtxDoctorIo {
@ -450,6 +457,84 @@ function writeReport(report: DoctorReport, outputMode: KtxDoctorOutputMode, io:
io.stdout.write(renderPlainReport(report, options));
}
export function renderInvalidConfigMessage(
projectDir: string,
issues: KtxConfigIssue[],
outputMode: KtxDoctorOutputMode,
io: KtxDoctorIo,
): void {
if (outputMode === 'json') {
io.stdout.write(
`${JSON.stringify(
{
error: 'invalid_config',
projectDir,
issues,
},
null,
2,
)}\n`,
);
return;
}
const useColor = shouldUseColor(io);
const dim = (text: string) => styleDim(useColor, text);
const bold = (text: string) => styleBold(useColor, text);
const status = (s: DoctorStatus, text: string) => styleStatus(useColor, s, text);
const abbreviated = abbreviateHome(projectDir) ?? projectDir;
const lines: string[] = [];
lines.push(`${bold('KTX status')} ${dim('·')} ${abbreviated}`);
lines.push('');
lines.push(` ${status('fail', '✗')} ${bold('Config')} ktx.yaml has ${issues.length} schema issue${issues.length === 1 ? '' : 's'}`);
for (const issue of issues) {
lines.push(` ${status('fail', '✗')} ${issue.message}`);
if (issue.fix) {
lines.push(` ${dim(`${issue.fix}`)}`);
}
}
lines.push('');
lines.push(` ${dim('Fix the issues in')} ${join(abbreviated, 'ktx.yaml')} ${dim('and rerun')} ${bold('ktx status')}.`);
lines.push('');
io.stdout.write(lines.join('\n'));
}
export function renderValidConfigMessage(
projectDir: string,
outputMode: KtxDoctorOutputMode,
io: KtxDoctorIo,
): void {
if (outputMode === 'json') {
io.stdout.write(
`${JSON.stringify(
{
ok: true,
projectDir,
},
null,
2,
)}\n`,
);
return;
}
const useColor = shouldUseColor(io);
const dim = (text: string) => styleDim(useColor, text);
const bold = (text: string) => styleBold(useColor, text);
const status = (s: DoctorStatus, text: string) => styleStatus(useColor, s, text);
const abbreviated = abbreviateHome(projectDir) ?? projectDir;
const lines: string[] = [];
lines.push(`${bold('KTX status')} ${dim('·')} ${abbreviated}`);
lines.push('');
lines.push(` ${status('pass', '✓')} ${bold('Config')} ${dim('ktx.yaml schema valid')}`);
lines.push('');
io.stdout.write(lines.join('\n'));
}
export function renderMissingProjectMessage(
projectDir: string,
outputMode: KtxDoctorOutputMode,
@ -501,16 +586,39 @@ export async function runKtxDoctor(
try {
const runSetupChecks = deps.runSetupChecks ?? (() => runSetupDoctorChecks());
if (args.command === 'validate') {
const configPath = join(args.projectDir, 'ktx.yaml');
if (!(await defaultPathExists(configPath))) {
renderMissingProjectMessage(args.projectDir, args.outputMode, io);
return 1;
}
const { validateKtxProjectConfig } = await import('@ktx/context/project');
const rawConfig = await readFile(configPath, 'utf-8');
const validation = validateKtxProjectConfig(rawConfig);
if (!validation.ok) {
renderInvalidConfigMessage(args.projectDir, validation.issues, args.outputMode, io);
return 1;
}
renderValidConfigMessage(args.projectDir, args.outputMode, io);
return 0;
}
if (args.command === 'project') {
const configPath = join(args.projectDir, 'ktx.yaml');
if (!(await defaultPathExists(configPath))) {
renderMissingProjectMessage(args.projectDir, args.outputMode, io);
return 1;
}
const { loadKtxProject } = await import('@ktx/context/project');
const { loadKtxProject, validateKtxProjectConfig } = await import('@ktx/context/project');
const { buildProjectStatus, renderProjectStatus } = await import('./status-project.js');
const rawConfig = await readFile(configPath, 'utf-8');
const validation = validateKtxProjectConfig(rawConfig);
if (!validation.ok) {
renderInvalidConfigMessage(args.projectDir, validation.issues, args.outputMode, io);
return 1;
}
const project = await loadKtxProject({ projectDir: args.projectDir });
const projectStatus = await buildProjectStatus(project, deps);
const projectStatus = await buildProjectStatus(project, { ...deps, configIssues: validation.issues });
const verbose = args.verbose ?? false;
const toolchainChecks = verbose ? await runSetupChecks() : undefined;
if (args.outputMode === 'json') {

View file

@ -1,4 +1,5 @@
import type {
KtxConfigIssue,
KtxLocalProject,
KtxProjectConfig,
KtxProjectConnectionConfig,
@ -56,6 +57,12 @@ interface StorageStatus {
gitAuthor: string;
}
interface ConfigStatus {
status: ProjectStatusLevel;
detail: string;
issues: KtxConfigIssue[];
}
interface WarningItem {
message: string;
fix?: string;
@ -72,6 +79,7 @@ function hasOwnField(value: Record<string, unknown>, key: string): boolean {
export interface ProjectStatus {
projectName: string;
projectDir: string;
config: ConfigStatus;
llm: LlmStatus;
embeddings: EmbeddingsStatus;
storage: StorageStatus;
@ -610,12 +618,26 @@ function buildVerdict(
export interface BuildProjectStatusOptions {
env?: NodeJS.ProcessEnv;
postgresQueryHistoryProbe?: PostgresQueryHistoryProbe;
configIssues?: KtxConfigIssue[];
}
function buildConfigStatus(issues: KtxConfigIssue[] | undefined): ConfigStatus {
const list = issues ?? [];
if (list.length === 0) {
return { status: 'ok', detail: 'ktx.yaml schema valid', issues: [] };
}
return {
status: 'warn',
detail: `${list.length} issue${list.length === 1 ? '' : 's'} in ktx.yaml`,
issues: list,
};
}
export async function buildProjectStatus(project: KtxLocalProject, options: BuildProjectStatusOptions = {}): Promise<ProjectStatus> {
const env = options.env ?? process.env;
const config = project.config;
const configStatus = buildConfigStatus(options.configIssues);
const llm = buildLlmStatus(config.llm, env);
const embeddings = buildEmbeddingsStatus(config.ingest.embeddings, env);
const storage = buildStorageStatus(config);
@ -630,6 +652,7 @@ export async function buildProjectStatus(project: KtxLocalProject, options: Buil
return {
projectName: config.project,
projectDir: project.projectDir,
config: configStatus,
llm,
embeddings,
storage,
@ -719,6 +742,13 @@ export function renderProjectStatus(status: ProjectStatus, options: RenderProjec
lines.push(` ${label('Embeddings')} ${embedDetail} ${sym(status.embeddings.status)} ${dim(status.embeddings.detail)}`);
lines.push(` ${label('Storage')} ${dim(`${status.storage.state} (state) · ${status.storage.search} (search)`)}`);
lines.push(` ${label('Config')} ${sym(status.config.status)} ${dim(status.config.detail)}`);
if (status.config.issues.length > 0) {
for (const issue of status.config.issues) {
lines.push(` ${color('warn', SYMBOL.warn)} ${issue.message}`);
if (issue.fix) lines.push(` ${dim(`${issue.fix}`)}`);
}
}
lines.push('');
// Connections

View file

@ -1,5 +1,10 @@
import { describe, expect, it } from 'vitest';
import { buildDefaultKtxProjectConfig, parseKtxProjectConfig, serializeKtxProjectConfig } from './config.js';
import {
buildDefaultKtxProjectConfig,
parseKtxProjectConfig,
serializeKtxProjectConfig,
validateKtxProjectConfig,
} from './config.js';
describe('KTX project config', () => {
it.each(['status', 'replay', 'run', 'watch'])('accepts former ingest subcommand name "%s" as a connection id', (connectionId) => {
@ -277,8 +282,8 @@ scan:
expect(serializeKtxProjectConfig(config)).toContain('validationBudget: all');
});
it('falls back to safe scan relationship defaults for invalid numeric settings', () => {
const config = parseKtxProjectConfig(`
it('rejects out-of-range scan relationship numeric settings', () => {
const yaml = `
project: demo
scan:
relationships:
@ -289,28 +294,33 @@ scan:
profileSampleRows: 0
validationConcurrency: 0
validationBudget: 1.5
`);
`;
expect(() => parseKtxProjectConfig(yaml)).toThrow(/scan\.relationships\.acceptThreshold/);
expect(config.scan.relationships).toMatchObject({
acceptThreshold: 0.85,
reviewThreshold: 0.55,
maxLlmTablesPerBatch: 40,
maxCandidatesPerColumn: 25,
profileSampleRows: 10000,
validationConcurrency: 4,
});
expect(config.scan.relationships).not.toHaveProperty('validationBudget');
const validation = validateKtxProjectConfig(yaml);
expect(validation.ok).toBe(false);
const paths = validation.issues.map((issue) => issue.path);
expect(paths).toEqual(
expect.arrayContaining([
'scan.relationships.acceptThreshold',
'scan.relationships.reviewThreshold',
'scan.relationships.maxLlmTablesPerBatch',
'scan.relationships.maxCandidatesPerColumn',
'scan.relationships.profileSampleRows',
'scan.relationships.validationConcurrency',
'scan.relationships.validationBudget',
]),
);
});
it('falls back for invalid scan relationship validation budget strings', () => {
const config = parseKtxProjectConfig(`
it('rejects invalid scan relationship validation budget strings', () => {
const yaml = `
project: demo
scan:
relationships:
validationBudget: infinite
`);
expect(config.scan.relationships).not.toHaveProperty('validationBudget');
`;
expect(() => parseKtxProjectConfig(yaml)).toThrow(/scan\.relationships\.validationBudget/);
});
it('rejects unsupported local LLM and embedding fields', () => {
@ -398,4 +408,80 @@ scan:
it('rejects configs with a missing project name', () => {
expect(() => parseKtxProjectConfig('connections: {}\n')).toThrow('ktx.yaml field "project" is required');
});
it('rejects unknown top-level fields under strict mode', () => {
expect(() =>
parseKtxProjectConfig(`
project: demo
storrage:
state: sqlite
`),
).toThrow(/Unsupported storrage/);
});
});
describe('validateKtxProjectConfig', () => {
it('returns ok: true with no issues for a valid config', () => {
const result = validateKtxProjectConfig('project: warehouse\n');
expect(result).toEqual({ ok: true, issues: [] });
});
it('collects every schema issue without throwing', () => {
const result = validateKtxProjectConfig(`
project: ""
storage:
search: not-a-real-backend
scan:
relationships:
acceptThreshold: 1.7
`);
expect(result.ok).toBe(false);
const paths = result.issues.map((issue) => issue.path);
expect(paths).toEqual(
expect.arrayContaining([
'project',
'storage.search',
'scan.relationships.acceptThreshold',
]),
);
});
it('attaches migration hints for known deprecated keys', () => {
const result = validateKtxProjectConfig(`
project: demo
ingest:
llm:
backend: anthropic
scan:
enrichment:
backend: none
`);
expect(result.ok).toBe(false);
const findIssue = (path: string) => result.issues.find((issue) => issue.path === path);
expect(findIssue('ingest.llm')).toMatchObject({
message: 'Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits',
fix: 'use top-level llm.provider, llm.models, and ingest.workUnits',
});
expect(findIssue('scan.enrichment.backend')).toMatchObject({
message: 'Unsupported scan.enrichment.backend: use scan.enrichment.mode',
fix: 'use scan.enrichment.mode',
});
});
it('reports YAML parse errors as a root-level issue', () => {
const result = validateKtxProjectConfig(': not valid yaml :\n');
expect(result.ok).toBe(false);
expect(result.issues[0]?.path).toBe('');
expect(result.issues[0]?.message).toMatch(/ktx\.yaml parse error/);
});
it('reports a YAML scalar root as a single issue', () => {
const result = validateKtxProjectConfig('- nope\n');
expect(result).toEqual({
ok: false,
issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }],
});
});
});

View file

@ -1,435 +1,233 @@
import type { KtxEmbeddingBackend, KtxLlmBackend, KtxModelRole, KtxPromptCacheTtl } from '@ktx/llm';
import { KTX_MODEL_ROLES } from '@ktx/llm';
import YAML from 'yaml';
import * as z from 'zod';
export type KtxStorageState = 'postgres' | 'sqlite';
export type KtxSearchBackend = 'postgres-hybrid' | 'sqlite-fts5';
type KtxLocalLlmBackend = KtxLlmBackend | 'none';
type KtxLocalEmbeddingBackend = KtxEmbeddingBackend | 'none';
type KtxScanEnrichmentMode = 'none' | 'deterministic' | 'llm';
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway'] as const;
const KTX_EMBEDDING_BACKENDS = ['none', 'deterministic', 'openai', 'sentence-transformers'] as const;
const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'] as const;
const KTX_STORAGE_STATES = ['sqlite', 'postgres'] as const;
const KTX_SEARCH_BACKENDS = ['sqlite-fts5', 'postgres-hybrid'] as const;
interface KtxProjectPromptCachingConfig {
enabled?: boolean;
systemTtl?: KtxPromptCacheTtl;
toolsTtl?: KtxPromptCacheTtl;
historyTtl?: KtxPromptCacheTtl;
vertexFallbackTo5m?: boolean;
const DEPRECATED_KEY_HINTS: Record<string, string> = {
'llm.provider.provider': 'use llm.provider.backend',
'ingest.llm': 'use top-level llm.provider, llm.models, and ingest.workUnits',
'ingest.embeddings.provider': 'use ingest.embeddings.backend',
'scan.enrichment.backend': 'use scan.enrichment.mode',
'scan.enrichment.llm': 'use top-level llm.provider and llm.models',
'scan.enrichment.embeddings.provider': 'use scan.enrichment.embeddings.backend',
};
const apiCredentialsSchema = z.strictObject({
api_key: z.string().min(1).optional(),
base_url: z.string().min(1).optional(),
});
const vertexProviderSchema = z.strictObject({
project: z.string().min(1).optional(),
location: z.string().default(''),
});
const sentenceTransformersSchema = z.strictObject({
base_url: z.string().default(''),
pathPrefix: z.string().optional(),
});
const llmProviderSchema = z.strictObject({
backend: z.enum(KTX_LLM_BACKENDS).default('none'),
vertex: vertexProviderSchema.optional(),
anthropic: apiCredentialsSchema.optional(),
gateway: apiCredentialsSchema.optional(),
});
const promptCachingSchema = z.strictObject({
enabled: z.boolean().optional(),
systemTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional(),
toolsTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional(),
historyTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional(),
vertexFallbackTo5m: z.boolean().optional(),
});
const llmSchema = z.strictObject({
provider: llmProviderSchema.prefault({}),
models: z.partialRecord(z.enum(KTX_MODEL_ROLES), z.string().min(1)).default({}),
promptCaching: promptCachingSchema.optional(),
});
const embeddingSchema = z.strictObject({
backend: z.enum(KTX_EMBEDDING_BACKENDS).default('deterministic'),
model: z.string().min(1).optional(),
dimensions: z.int().positive().default(8),
openai: apiCredentialsSchema.optional(),
sentenceTransformers: sentenceTransformersSchema.optional(),
batchSize: z.int().positive().optional(),
});
const workUnitsSchema = z.strictObject({
stepBudget: z.int().positive().default(40),
maxConcurrency: z.int().positive().default(1),
failureMode: z.enum(KTX_WORK_UNIT_FAILURE_MODES).default('continue'),
});
const ingestSchema = z.strictObject({
adapters: z.array(z.string().min(1)).default([]),
embeddings: embeddingSchema.prefault({ backend: 'deterministic', model: 'deterministic' }),
workUnits: workUnitsSchema.prefault({}),
});
const scanEnrichmentSchema = z.strictObject({
mode: z.enum(KTX_ENRICHMENT_MODES).default('none'),
embeddings: embeddingSchema.optional(),
});
const scanRelationshipsSchema = z.strictObject({
enabled: z.boolean().default(true),
llmProposals: z.boolean().default(true),
validationRequiredForManifest: z.boolean().default(true),
acceptThreshold: z.number().min(0).max(1).default(0.85),
reviewThreshold: z.number().min(0).max(1).default(0.55),
maxLlmTablesPerBatch: z.int().positive().default(40),
maxCandidatesPerColumn: z.int().positive().default(25),
profileSampleRows: z.int().positive().default(10000),
validationConcurrency: z.int().positive().default(4),
validationBudget: z.union([z.literal('all'), z.int().nonnegative()]).optional(),
});
const scanSchema = z.strictObject({
enrichment: scanEnrichmentSchema.prefault({}),
relationships: scanRelationshipsSchema.prefault({}),
});
const setupSchema = z
.strictObject({
database_connection_ids: z.array(z.string().min(1)).default([]),
completed_steps: z.unknown().optional(),
})
.transform(({ database_connection_ids }) => ({ database_connection_ids }));
const storageGitSchema = z.strictObject({
auto_commit: z.boolean().default(true),
author: z.string().min(1).default('ktx <ktx@example.com>'),
});
const storageSchema = z.strictObject({
state: z.enum(KTX_STORAGE_STATES).default('sqlite'),
search: z.enum(KTX_SEARCH_BACKENDS).default('sqlite-fts5'),
git: storageGitSchema.prefault({}),
});
const connectionSchema = z.looseObject({
driver: z.string().min(1).optional(),
url: z.string().optional(),
});
const agentSchema = z.strictObject({
run_research: z
.strictObject({
enabled: z.boolean().default(false),
max_iterations: z.number().int().nonnegative().default(20),
default_toolset: z.array(z.string().min(1)).default(['sl_query', 'wiki_search', 'sl_read_source']),
})
.prefault({}),
});
const memorySchema = z.strictObject({
auto_commit: z.boolean().default(true),
});
const ktxProjectConfigSchema = z.strictObject({
project: z
.string({ error: 'ktx.yaml field "project" is required' })
.trim()
.min(1, 'ktx.yaml field "project" is required'),
setup: setupSchema.optional(),
connections: z.record(z.string(), connectionSchema).default({}),
storage: storageSchema.prefault({}),
llm: llmSchema.prefault({}),
ingest: ingestSchema.prefault({}),
agent: agentSchema.prefault({}),
memory: memorySchema.prefault({}),
scan: scanSchema.prefault({}),
});
export type KtxProjectConfig = z.infer<typeof ktxProjectConfigSchema>;
export type KtxProjectLlmConfig = z.infer<typeof llmSchema>;
export type KtxProjectLlmProviderConfig = z.infer<typeof llmProviderSchema>;
export type KtxProjectEmbeddingConfig = z.infer<typeof embeddingSchema>;
export type KtxScanEnrichmentConfig = z.infer<typeof scanEnrichmentSchema>;
export type KtxIngestWorkUnitsConfig = z.infer<typeof workUnitsSchema>;
export type KtxScanRelationshipConfig = z.infer<typeof scanRelationshipsSchema>;
export type KtxProjectScanConfig = z.infer<typeof scanSchema>;
export type KtxProjectConnectionConfig = z.infer<typeof connectionSchema>;
export type KtxProjectSetupConfig = z.infer<typeof setupSchema>;
export type KtxStorageState = z.infer<typeof storageSchema>['state'];
export type KtxSearchBackend = z.infer<typeof storageSchema>['search'];
export interface KtxConfigIssue {
path: string;
message: string;
fix?: string;
}
export interface KtxProjectLlmProviderConfig {
backend: KtxLocalLlmBackend;
vertex?: { project?: string; location: string };
anthropic?: { api_key?: string; base_url?: string };
gateway?: { api_key?: string; base_url?: string };
}
export interface KtxProjectLlmConfig {
provider: KtxProjectLlmProviderConfig;
models: Partial<Record<KtxModelRole, string>> & { default?: string };
promptCaching?: KtxProjectPromptCachingConfig;
}
export interface KtxProjectEmbeddingConfig {
backend: KtxLocalEmbeddingBackend;
model?: string;
dimensions: number;
openai?: { api_key?: string; base_url?: string };
sentenceTransformers?: { base_url: string; pathPrefix?: string };
batchSize?: number;
}
export interface KtxScanEnrichmentConfig {
mode: KtxScanEnrichmentMode;
embeddings?: KtxProjectEmbeddingConfig;
}
export interface KtxIngestWorkUnitsConfig {
stepBudget: number;
maxConcurrency: number;
failureMode: 'abort' | 'continue';
}
export interface KtxScanRelationshipConfig {
enabled: boolean;
llmProposals: boolean;
validationRequiredForManifest: boolean;
acceptThreshold: number;
reviewThreshold: number;
maxLlmTablesPerBatch: number;
maxCandidatesPerColumn: number;
profileSampleRows: number;
validationConcurrency: number;
validationBudget?: number | 'all';
}
export interface KtxProjectScanConfig {
enrichment: KtxScanEnrichmentConfig;
relationships: KtxScanRelationshipConfig;
}
export interface KtxProjectConnectionConfig {
driver: string;
url?: string;
[key: string]: unknown;
}
export interface KtxProjectSetupConfig {
database_connection_ids: string[];
}
export interface KtxProjectConfig {
project: string;
setup?: KtxProjectSetupConfig;
connections: Record<string, KtxProjectConnectionConfig>;
storage: {
state: KtxStorageState;
search: KtxSearchBackend;
git: {
auto_commit: boolean;
author: string;
};
};
llm: KtxProjectLlmConfig;
ingest: {
adapters: string[];
embeddings: KtxProjectEmbeddingConfig;
workUnits: KtxIngestWorkUnitsConfig;
};
agent: {
run_research: {
enabled: boolean;
max_iterations: number;
default_toolset: string[];
};
};
memory: {
auto_commit: boolean;
};
scan: KtxProjectScanConfig;
export interface KtxConfigValidation {
ok: boolean;
issues: KtxConfigIssue[];
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
function stringArray(value: unknown, fallback: string[]): string[] {
if (!Array.isArray(value)) {
return fallback;
}
return value.filter((item): item is string => typeof item === 'string' && item.length > 0);
function dottedPath(path: ReadonlyArray<PropertyKey>): string {
return path.map((segment) => String(segment)).join('.');
}
function booleanValue(value: unknown, fallback: boolean): boolean {
return typeof value === 'boolean' ? value : fallback;
function valueAtPath(root: unknown, path: ReadonlyArray<PropertyKey>): unknown {
let cursor: unknown = root;
for (const segment of path) {
if (cursor === null || typeof cursor !== 'object') return undefined;
cursor = (cursor as Record<PropertyKey, unknown>)[segment];
}
return cursor;
}
function numberValue(value: unknown, fallback: number): number {
return typeof value === 'number' && Number.isFinite(value) ? value : fallback;
}
function formatIssue(issue: z.core.$ZodIssue, input: unknown): KtxConfigIssue[] {
const basePath = dottedPath(issue.path);
function stringValue(value: unknown, fallback: string): string {
return typeof value === 'string' && value.trim().length > 0 ? value : fallback;
}
function optionalNonEmptyString(value: unknown): string | undefined {
if (typeof value !== 'string') {
return undefined;
}
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : undefined;
}
function positiveIntegerConfigValue(value: unknown, fallback: number): number {
if (typeof value !== 'number' || !Number.isInteger(value) || value <= 0) {
return fallback;
}
return value;
}
function validationBudgetConfigValue(value: unknown, fallback: number | 'all' | undefined): number | 'all' | undefined {
if (value === 'all') {
return value;
}
if (typeof value === 'number' && Number.isInteger(value) && value >= 0) {
return value;
}
return fallback;
}
function ratioConfigValue(value: unknown, fallback: number): number {
if (typeof value !== 'number' || !Number.isFinite(value) || value < 0 || value > 1) {
return fallback;
}
return value;
}
function localLlmBackend(value: unknown, fallback: KtxLocalLlmBackend, section = 'llm.provider'): KtxLocalLlmBackend {
if (value == null) {
return fallback;
}
if (value === 'none' || value === 'anthropic' || value === 'vertex' || value === 'gateway') {
return value;
}
throw new Error(`Unsupported ${section}.backend: ${String(value)}`);
}
function localEmbeddingBackend(
value: unknown,
fallback: KtxLocalEmbeddingBackend,
section = 'ingest.embeddings',
): KtxLocalEmbeddingBackend {
if (value == null) {
return fallback;
}
if (
value === 'none' ||
value === 'deterministic' ||
value === 'openai' ||
value === 'sentence-transformers'
) {
return value;
}
throw new Error(`Unsupported ${section}.backend: ${String(value)}`);
}
function scanEnrichmentMode(value: unknown, fallback: KtxScanEnrichmentMode): KtxScanEnrichmentMode {
if (value == null) {
return fallback;
}
if (value === 'none' || value === 'deterministic' || value === 'llm') {
return value;
}
throw new Error(`Unsupported scan.enrichment.mode: ${String(value)}`);
}
function rejectUnsupportedProvider(section: string, value: unknown): void {
if (value !== undefined) {
throw new Error(`Unsupported ${section}.provider: use ${section}.backend`);
}
}
function optionalStringRecord(value: unknown): Record<string, unknown> {
return isRecord(value) ? value : {};
}
function optionalProviderConfig(value: unknown): { api_key?: string; base_url?: string } | undefined {
if (!isRecord(value)) {
return undefined;
}
const apiKey = optionalNonEmptyString(value.api_key);
const baseUrl = optionalNonEmptyString(value.base_url);
if (!apiKey && !baseUrl) {
return undefined;
}
return {
...(apiKey ? { api_key: apiKey } : {}),
...(baseUrl ? { base_url: baseUrl } : {}),
};
}
function parseModels(value: unknown): KtxProjectLlmConfig['models'] {
if (!isRecord(value)) {
return {};
}
const models: KtxProjectLlmConfig['models'] = {};
for (const [role, model] of Object.entries(value)) {
const modelName = optionalNonEmptyString(model);
if (modelName) {
models[role as KtxModelRole] = modelName;
}
}
return models;
}
function promptCacheTtl(value: unknown): KtxPromptCacheTtl | undefined {
return value === '5m' || value === '1h' ? value : undefined;
}
function parsePromptCaching(value: unknown): KtxProjectPromptCachingConfig | undefined {
if (!isRecord(value)) {
return undefined;
}
return {
...(typeof value.enabled === 'boolean' ? { enabled: value.enabled } : {}),
...(promptCacheTtl(value.systemTtl) ? { systemTtl: promptCacheTtl(value.systemTtl) } : {}),
...(promptCacheTtl(value.toolsTtl) ? { toolsTtl: promptCacheTtl(value.toolsTtl) } : {}),
...(promptCacheTtl(value.historyTtl) ? { historyTtl: promptCacheTtl(value.historyTtl) } : {}),
...(typeof value.vertexFallbackTo5m === 'boolean' ? { vertexFallbackTo5m: value.vertexFallbackTo5m } : {}),
};
}
function parseProjectLlmProviderConfig(
raw: Record<string, unknown>,
defaults: KtxProjectLlmProviderConfig,
section: string,
): KtxProjectLlmProviderConfig {
rejectUnsupportedProvider(section, raw.provider);
const vertex = isRecord(raw.vertex)
? {
...(optionalNonEmptyString(raw.vertex.project) ? { project: optionalNonEmptyString(raw.vertex.project) } : {}),
location: stringValue(raw.vertex.location, ''),
if (issue.code === 'unrecognized_keys') {
const keys = (issue as { keys?: readonly string[] }).keys ?? [];
return keys.map((key) => {
const fullPath = basePath.length > 0 ? `${basePath}.${key}` : key;
const hint = DEPRECATED_KEY_HINTS[fullPath];
if (hint !== undefined) {
return { path: fullPath, message: `Unsupported ${fullPath}: ${hint}`, fix: hint };
}
: undefined;
const anthropic = optionalProviderConfig(raw.anthropic);
const gateway = optionalProviderConfig(raw.gateway);
return { path: fullPath, message: `Unsupported ${fullPath}: unknown field` };
});
}
return {
backend: localLlmBackend(raw.backend, defaults.backend, section),
...(vertex ? { vertex } : {}),
...(anthropic ? { anthropic } : {}),
...(gateway ? { gateway } : {}),
};
const lastSegment = issue.path[issue.path.length - 1];
if (lastSegment === 'backend' && (issue.code === 'invalid_value' || issue.code === 'invalid_type')) {
const value = valueAtPath(input, issue.path);
return [{ path: basePath, message: `Unsupported ${basePath}: ${String(value)}` }];
}
return [{ path: basePath, message: basePath.length > 0 ? `${basePath}: ${issue.message}` : issue.message }];
}
function parseProjectLlmConfig(raw: Record<string, unknown>, defaults: KtxProjectLlmConfig): KtxProjectLlmConfig {
const provider = isRecord(raw.provider) ? raw.provider : {};
return {
provider: parseProjectLlmProviderConfig(provider, defaults.provider, 'llm.provider'),
models: parseModels(raw.models ?? defaults.models),
...(parsePromptCaching(raw.promptCaching) ? { promptCaching: parsePromptCaching(raw.promptCaching) } : {}),
};
function collectIssues(error: z.ZodError, input: unknown): KtxConfigIssue[] {
return error.issues.flatMap((issue) => formatIssue(issue, input));
}
function parseProjectEmbeddingConfig(
raw: Record<string, unknown>,
defaults: KtxProjectEmbeddingConfig,
section: string,
): KtxProjectEmbeddingConfig {
rejectUnsupportedProvider(section, raw.provider);
const openai = optionalProviderConfig(raw.openai);
const sentenceTransformers = isRecord(raw.sentenceTransformers)
? {
base_url: stringValue(raw.sentenceTransformers.base_url, ''),
...(typeof raw.sentenceTransformers.pathPrefix === 'string'
? { pathPrefix: raw.sentenceTransformers.pathPrefix }
: {}),
}
: undefined;
const backend = localEmbeddingBackend(raw.backend, defaults.backend, section);
const model =
optionalNonEmptyString(raw.model) ?? (raw.backend == null && backend !== 'none' ? defaults.model : undefined);
const batchSize = positiveIntegerConfigValue(raw.batchSize, 0);
return {
backend,
...(model ? { model } : {}),
dimensions: positiveIntegerConfigValue(raw.dimensions, defaults.dimensions),
...(openai ? { openai } : {}),
...(sentenceTransformers ? { sentenceTransformers } : {}),
...(batchSize > 0 ? { batchSize } : {}),
};
}
function parseScanRelationshipConfig(
raw: Record<string, unknown>,
defaults: KtxScanRelationshipConfig,
): KtxScanRelationshipConfig {
const validationBudget = validationBudgetConfigValue(raw.validationBudget, defaults.validationBudget);
return {
enabled: booleanValue(raw.enabled, defaults.enabled),
llmProposals: booleanValue(raw.llmProposals, defaults.llmProposals),
validationRequiredForManifest: booleanValue(
raw.validationRequiredForManifest,
defaults.validationRequiredForManifest,
),
acceptThreshold: ratioConfigValue(raw.acceptThreshold, defaults.acceptThreshold),
reviewThreshold: ratioConfigValue(raw.reviewThreshold, defaults.reviewThreshold),
maxLlmTablesPerBatch: positiveIntegerConfigValue(raw.maxLlmTablesPerBatch, defaults.maxLlmTablesPerBatch),
maxCandidatesPerColumn: positiveIntegerConfigValue(raw.maxCandidatesPerColumn, defaults.maxCandidatesPerColumn),
profileSampleRows: positiveIntegerConfigValue(raw.profileSampleRows, defaults.profileSampleRows),
validationConcurrency: positiveIntegerConfigValue(raw.validationConcurrency, defaults.validationConcurrency),
...(validationBudget !== undefined ? { validationBudget } : {}),
};
}
function workUnitFailureMode(value: unknown, fallback: 'abort' | 'continue'): 'abort' | 'continue' {
return value === 'abort' || value === 'continue' ? value : fallback;
}
function parseIngestWorkUnitsConfig(
raw: Record<string, unknown>,
defaults: KtxIngestWorkUnitsConfig,
): KtxIngestWorkUnitsConfig {
return {
stepBudget: positiveIntegerConfigValue(raw.stepBudget, defaults.stepBudget),
maxConcurrency: positiveIntegerConfigValue(raw.maxConcurrency, defaults.maxConcurrency),
failureMode: workUnitFailureMode(raw.failureMode, defaults.failureMode),
};
function formatZodError(error: z.ZodError, input: unknown): string {
return collectIssues(error, input)
.map((issue) => issue.message)
.join('\n');
}
export function buildDefaultKtxProjectConfig(projectName = 'ktx-project'): KtxProjectConfig {
return {
project: projectName,
connections: {},
storage: {
state: 'sqlite',
search: 'sqlite-fts5',
git: {
auto_commit: true,
author: 'ktx <ktx@example.com>',
},
},
llm: {
provider: {
backend: 'none',
},
models: {},
},
ingest: {
adapters: [],
embeddings: {
backend: 'deterministic',
model: 'deterministic',
dimensions: 8,
},
workUnits: {
stepBudget: 40,
maxConcurrency: 1,
failureMode: 'continue',
},
},
agent: {
run_research: {
enabled: false,
max_iterations: 20,
default_toolset: ['sl_query', 'wiki_search', 'sl_read_source'],
},
},
memory: {
auto_commit: true,
},
scan: {
enrichment: {
mode: 'none',
},
relationships: {
enabled: true,
llmProposals: true,
validationRequiredForManifest: true,
acceptThreshold: 0.85,
reviewThreshold: 0.55,
maxLlmTablesPerBatch: 40,
maxCandidatesPerColumn: 25,
profileSampleRows: 10000,
validationConcurrency: 4,
},
},
};
return ktxProjectConfigSchema.parse({ project: projectName });
}
export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
@ -437,96 +235,29 @@ export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
if (!isRecord(parsed)) {
throw new Error('ktx.yaml must contain a YAML object');
}
const project = parsed.project;
if (typeof project !== 'string' || project.trim().length === 0) {
throw new Error('ktx.yaml field "project" is required');
const result = ktxProjectConfigSchema.safeParse(parsed);
if (!result.success) {
throw new Error(formatZodError(result.error, parsed));
}
return result.data;
}
const defaults = buildDefaultKtxProjectConfig(project.trim());
const llm = isRecord(parsed.llm) ? parsed.llm : {};
const storage = isRecord(parsed.storage) ? parsed.storage : {};
const storageGit = isRecord(storage.git) ? storage.git : {};
const setup = isRecord(parsed.setup) ? parsed.setup : undefined;
const ingest = isRecord(parsed.ingest) ? parsed.ingest : {};
const ingestEmbeddings = isRecord(ingest.embeddings) ? ingest.embeddings : {};
const ingestWorkUnits = isRecord(ingest.workUnits) ? ingest.workUnits : {};
const agent = isRecord(parsed.agent) ? parsed.agent : {};
const runResearch = isRecord(agent.run_research) ? agent.run_research : {};
const memory = isRecord(parsed.memory) ? parsed.memory : {};
const scan = isRecord(parsed.scan) ? parsed.scan : {};
const scanEnrichment = isRecord(scan.enrichment) ? scan.enrichment : {};
const scanRelationships = isRecord(scan.relationships) ? scan.relationships : {};
if (isRecord(ingest.llm)) {
throw new Error('Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits');
export function validateKtxProjectConfig(raw: string): KtxConfigValidation {
let parsed: unknown;
try {
parsed = YAML.parse(raw);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
return { ok: false, issues: [{ path: '', message: `ktx.yaml parse error: ${message}` }] };
}
if (scanEnrichment.backend !== undefined) {
throw new Error('Unsupported scan.enrichment.backend: use scan.enrichment.mode');
if (!isRecord(parsed)) {
return { ok: false, issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }] };
}
if (isRecord(scanEnrichment.llm)) {
throw new Error('Unsupported scan.enrichment.llm: use top-level llm.provider and llm.models');
const result = ktxProjectConfigSchema.safeParse(parsed);
if (result.success) {
return { ok: true, issues: [] };
}
const parsedLlm = parseProjectLlmConfig(llm, defaults.llm);
const parsedIngestEmbeddings = parseProjectEmbeddingConfig(
ingestEmbeddings,
defaults.ingest.embeddings,
'ingest.embeddings',
);
const parsedIngestWorkUnits = parseIngestWorkUnitsConfig(ingestWorkUnits, defaults.ingest.workUnits);
const scanEmbeddings = parseProjectEmbeddingConfig(
optionalStringRecord(scanEnrichment.embeddings),
defaults.ingest.embeddings,
'scan.enrichment.embeddings',
);
const parsedScanEnrichment: KtxScanEnrichmentConfig = {
mode: scanEnrichmentMode(scanEnrichment.mode, defaults.scan.enrichment.mode),
...(isRecord(scanEnrichment.embeddings) ? { embeddings: scanEmbeddings } : {}),
};
const parsedScanRelationships = parseScanRelationshipConfig(scanRelationships, defaults.scan.relationships);
const parsedConnections = isRecord(parsed.connections)
? (parsed.connections as Record<string, KtxProjectConnectionConfig>)
: defaults.connections;
return {
project: project.trim(),
...(setup
? {
setup: {
database_connection_ids: stringArray(setup.database_connection_ids, []),
},
}
: {}),
connections: parsedConnections,
storage: {
state: storage.state === 'sqlite' ? 'sqlite' : defaults.storage.state,
search: storage.search === 'sqlite-fts5' ? 'sqlite-fts5' : defaults.storage.search,
git: {
auto_commit: booleanValue(storageGit.auto_commit, defaults.storage.git.auto_commit),
author: stringValue(storageGit.author, defaults.storage.git.author),
},
},
llm: parsedLlm,
ingest: {
adapters: stringArray(ingest.adapters, defaults.ingest.adapters),
embeddings: parsedIngestEmbeddings,
workUnits: parsedIngestWorkUnits,
},
agent: {
run_research: {
enabled: booleanValue(runResearch.enabled, defaults.agent.run_research.enabled),
max_iterations: numberValue(runResearch.max_iterations, defaults.agent.run_research.max_iterations),
default_toolset: stringArray(runResearch.default_toolset, defaults.agent.run_research.default_toolset),
},
},
memory: {
auto_commit: booleanValue(memory.auto_commit, defaults.memory.auto_commit),
},
scan: {
enrichment: parsedScanEnrichment,
relationships: parsedScanRelationships,
},
};
return { ok: false, issues: collectIssues(result.error, parsed) };
}
export function serializeKtxProjectConfig(config: KtxProjectConfig): string {

View file

@ -1,4 +1,6 @@
export type {
KtxConfigIssue,
KtxConfigValidation,
KtxProjectConfig,
KtxProjectConnectionConfig,
KtxProjectEmbeddingConfig,
@ -10,6 +12,7 @@ export {
buildDefaultKtxProjectConfig,
parseKtxProjectConfig,
serializeKtxProjectConfig,
validateKtxProjectConfig,
} from './config.js';
export type { LocalGitFileStoreDeps } from './local-git-file-store.js';
export { LocalGitFileStore } from './local-git-file-store.js';

View file

@ -50,8 +50,8 @@ describe('standalone example docs', () => {
config,
/path: \.\.\/\.\.\/packages\/context\/test\/fixtures\/relationship-benchmarks\/orbit_style_product_no_declared_constraints\/data\.sqlite/,
);
assert.match(config, /llm_proposals: false/);
assert.match(config, /validation_required_for_manifest: true/);
assert.match(config, /llmProposals: false/);
assert.match(config, /validationRequiredForManifest: true/);
});
it('documents the Postgres historic SQL smoke example', async () => {