mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-10 08:05:14 +02:00
refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)
* refactor(context): validate ktx.yaml with Zod and surface issues in status
- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
with migration hints for deprecated keys (ingest.llm,
scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
match the schema.
* fix(context): tolerate legacy setup.completed_steps and optional driver
- Accept and drop the legacy setup.completed_steps field so existing
ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
already produces a clear "no driver" error at use time.
* feat(cli): add ktx status --validate to run only ktx.yaml schema validation
- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
{ok: true} on success or the existing invalid_config / missing_project
shapes on failure.
This commit is contained in:
parent
49f1e2720e
commit
b3be54e3fa
9 changed files with 783 additions and 545 deletions
|
|
@ -13,14 +13,14 @@ ingest:
|
|||
adapters: []
|
||||
scan:
|
||||
enrichment:
|
||||
backend: none
|
||||
mode: none
|
||||
relationships:
|
||||
enabled: true
|
||||
llm_proposals: false
|
||||
validation_required_for_manifest: true
|
||||
accept_threshold: 0.85
|
||||
review_threshold: 0.55
|
||||
max_llm_tables_per_batch: 40
|
||||
max_candidates_per_column: 25
|
||||
profile_sample_rows: 10000
|
||||
validation_concurrency: 4
|
||||
llmProposals: false
|
||||
validationRequiredForManifest: true
|
||||
acceptThreshold: 0.85
|
||||
reviewThreshold: 0.55
|
||||
maxLlmTablesPerBatch: 40
|
||||
maxCandidatesPerColumn: 25
|
||||
profileSampleRows: 10000
|
||||
validationConcurrency: 4
|
||||
|
|
|
|||
|
|
@ -17,16 +17,51 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC
|
|||
.description('Check current KTX setup and project readiness')
|
||||
.option('--json', 'Print JSON output', false)
|
||||
.option('-v, --verbose', 'Show every check, including passing ones', false)
|
||||
.option('--validate', 'Only validate the ktx.yaml schema; skip readiness checks', false)
|
||||
.option('--no-input', 'Disable interactive terminal input')
|
||||
.action(async (options: { json?: boolean; verbose?: boolean; input?: boolean }, command) => {
|
||||
const runner = context.deps.doctor ?? (await import('../doctor.js')).runKtxDoctor;
|
||||
const explicitOrEnvProjectDir = resolveCommandProjectDirOverride(command);
|
||||
const nearestProjectDir = explicitOrEnvProjectDir ? undefined : findNearestKtxProjectDir(process.cwd());
|
||||
if (!explicitOrEnvProjectDir && !nearestProjectDir) {
|
||||
.action(
|
||||
async (
|
||||
options: { json?: boolean; verbose?: boolean; validate?: boolean; input?: boolean },
|
||||
command,
|
||||
) => {
|
||||
const runner = context.deps.doctor ?? (await import('../doctor.js')).runKtxDoctor;
|
||||
const explicitOrEnvProjectDir = resolveCommandProjectDirOverride(command);
|
||||
const nearestProjectDir = explicitOrEnvProjectDir ? undefined : findNearestKtxProjectDir(process.cwd());
|
||||
|
||||
if (options.validate === true) {
|
||||
context.setExitCode(
|
||||
await runner(
|
||||
{
|
||||
command: 'validate',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
outputMode: outputMode(options),
|
||||
...inputMode(options),
|
||||
},
|
||||
context.io,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!explicitOrEnvProjectDir && !nearestProjectDir) {
|
||||
context.setExitCode(
|
||||
await runner(
|
||||
{
|
||||
command: 'setup',
|
||||
outputMode: outputMode(options),
|
||||
verbose: options.verbose === true,
|
||||
...inputMode(options),
|
||||
},
|
||||
context.io,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
context.setExitCode(
|
||||
await runner(
|
||||
{
|
||||
command: 'setup',
|
||||
command: 'project',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
outputMode: outputMode(options),
|
||||
verbose: options.verbose === true,
|
||||
...inputMode(options),
|
||||
|
|
@ -34,19 +69,6 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC
|
|||
context.io,
|
||||
),
|
||||
);
|
||||
return;
|
||||
}
|
||||
context.setExitCode(
|
||||
await runner(
|
||||
{
|
||||
command: 'project',
|
||||
projectDir: resolveCommandProjectDir(command),
|
||||
outputMode: outputMode(options),
|
||||
verbose: options.verbose === true,
|
||||
...inputMode(options),
|
||||
},
|
||||
context.io,
|
||||
),
|
||||
);
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -324,6 +324,95 @@ describe('runKtxDoctor', () => {
|
|||
expect(parsed.projectDir).toBe(tempDir);
|
||||
});
|
||||
|
||||
it('prints schema issues and exits 1 when ktx.yaml fails Zod validation', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: warehouse',
|
||||
'storrage:',
|
||||
' state: sqlite',
|
||||
'ingest:',
|
||||
' llm:',
|
||||
' backend: anthropic',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const testIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{},
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
const out = testIo.stdout();
|
||||
expect(out).toContain('KTX status');
|
||||
expect(out).toContain('Config');
|
||||
expect(out).toContain('Unsupported storrage: unknown field');
|
||||
expect(out).toContain('Unsupported ingest.llm: use top-level llm.provider');
|
||||
expect(out).toContain('ktx.yaml');
|
||||
});
|
||||
|
||||
it('emits structured JSON when ktx.yaml fails Zod validation', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
['project: warehouse', 'storrage: {}', ''].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const testIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'project', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{},
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
const parsed = JSON.parse(testIo.stdout()) as {
|
||||
error: string;
|
||||
projectDir: string;
|
||||
issues: Array<{ path: string; message: string }>;
|
||||
};
|
||||
expect(parsed.error).toBe('invalid_config');
|
||||
expect(parsed.projectDir).toBe(tempDir);
|
||||
expect(parsed.issues.some((issue) => issue.path === 'storrage')).toBe(true);
|
||||
});
|
||||
|
||||
it('shows a Config row labelled "ktx.yaml schema valid" on the happy path', async () => {
|
||||
process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: warehouse',
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: sqlite',
|
||||
' path: ./warehouse.db',
|
||||
'llm:',
|
||||
' provider:',
|
||||
' backend: anthropic',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const testIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'project', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(testIo.stdout()).toContain('ktx.yaml schema valid');
|
||||
delete process.env.ANTHROPIC_API_KEY;
|
||||
});
|
||||
|
||||
it('runs project checks against a valid ktx.yaml', async () => {
|
||||
process.env.ANTHROPIC_API_KEY = 'test-key'; // pragma: allowlist secret
|
||||
await writeFile(
|
||||
|
|
@ -565,4 +654,173 @@ describe('runKtxDoctor', () => {
|
|||
expect(testIo.stdout()).toContain('semantic search degraded');
|
||||
delete process.env.ANTHROPIC_API_KEY;
|
||||
});
|
||||
|
||||
describe('command: validate', () => {
|
||||
it('prints a success line and exits 0 when ktx.yaml is schema-valid', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: warehouse',
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: sqlite',
|
||||
' path: ./warehouse.db',
|
||||
'llm:',
|
||||
' provider:',
|
||||
' backend: anthropic',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const testIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
const out = testIo.stdout();
|
||||
expect(out).toContain('KTX status');
|
||||
expect(out).toContain('Config');
|
||||
expect(out).toContain('ktx.yaml schema valid');
|
||||
expect(out).not.toContain('LLM');
|
||||
expect(out).not.toContain('Connections');
|
||||
expect(out).not.toContain('Pipeline');
|
||||
});
|
||||
|
||||
it('emits {ok: true} JSON when ktx.yaml is schema-valid', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: warehouse',
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: sqlite',
|
||||
' path: ./warehouse.db',
|
||||
'llm:',
|
||||
' provider:',
|
||||
' backend: anthropic',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const testIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'validate', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(JSON.parse(testIo.stdout())).toEqual({ ok: true, projectDir: tempDir });
|
||||
});
|
||||
|
||||
it('prints schema issues and exits 1 when ktx.yaml fails Zod validation', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: warehouse',
|
||||
'storrage:',
|
||||
' state: sqlite',
|
||||
'ingest:',
|
||||
' llm:',
|
||||
' backend: anthropic',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const testIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{},
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
const out = testIo.stdout();
|
||||
expect(out).toContain('Unsupported storrage: unknown field');
|
||||
expect(out).toContain('Unsupported ingest.llm: use top-level llm.provider');
|
||||
});
|
||||
|
||||
it('emits structured JSON issues when validation fails', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
['project: warehouse', 'storrage: {}', ''].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const testIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'validate', projectDir: tempDir, outputMode: 'json', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{},
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
const parsed = JSON.parse(testIo.stdout()) as { error: string; issues: Array<{ path: string }> };
|
||||
expect(parsed.error).toBe('invalid_config');
|
||||
expect(parsed.issues.some((issue) => issue.path === 'storrage')).toBe(true);
|
||||
});
|
||||
|
||||
it('prints the missing-project message and exits 1 when ktx.yaml is absent', async () => {
|
||||
const testIo = makeIo();
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{},
|
||||
),
|
||||
).resolves.toBe(1);
|
||||
|
||||
expect(testIo.stdout()).toContain('No KTX project here yet.');
|
||||
});
|
||||
|
||||
it('does not invoke the Postgres query-history probe in validate mode', async () => {
|
||||
await writeFile(
|
||||
join(tempDir, 'ktx.yaml'),
|
||||
[
|
||||
'project: warehouse',
|
||||
'connections:',
|
||||
' warehouse:',
|
||||
' driver: postgres',
|
||||
' url: env:WAREHOUSE_DATABASE_URL',
|
||||
' context:',
|
||||
' queryHistory:',
|
||||
' enabled: true',
|
||||
'llm:',
|
||||
' provider:',
|
||||
' backend: anthropic',
|
||||
'',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
const testIo = makeIo();
|
||||
let probeCalls = 0;
|
||||
|
||||
await expect(
|
||||
runKtxDoctor(
|
||||
{ command: 'validate', projectDir: tempDir, outputMode: 'plain', inputMode: 'disabled' },
|
||||
testIo.io,
|
||||
{
|
||||
postgresQueryHistoryProbe: async () => {
|
||||
probeCalls += 1;
|
||||
return { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] };
|
||||
},
|
||||
},
|
||||
),
|
||||
).resolves.toBe(0);
|
||||
|
||||
expect(probeCalls).toBe(0);
|
||||
expect(testIo.stdout()).toContain('ktx.yaml schema valid');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
import { execFile } from 'node:child_process';
|
||||
import { constants as fsConstants } from 'node:fs';
|
||||
import { access } from 'node:fs/promises';
|
||||
import { access, readFile } from 'node:fs/promises';
|
||||
import { join, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { promisify } from 'node:util';
|
||||
import type { KtxConfigIssue } from '@ktx/context/project';
|
||||
import type { BuildProjectStatusOptions } from './status-project.js';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
|
@ -40,6 +41,12 @@ export type KtxDoctorArgs =
|
|||
outputMode: KtxDoctorOutputMode;
|
||||
inputMode?: KtxDoctorInputMode;
|
||||
verbose?: boolean;
|
||||
}
|
||||
| {
|
||||
command: 'validate';
|
||||
projectDir: string;
|
||||
outputMode: KtxDoctorOutputMode;
|
||||
inputMode?: KtxDoctorInputMode;
|
||||
};
|
||||
|
||||
interface KtxDoctorIo {
|
||||
|
|
@ -450,6 +457,84 @@ function writeReport(report: DoctorReport, outputMode: KtxDoctorOutputMode, io:
|
|||
io.stdout.write(renderPlainReport(report, options));
|
||||
}
|
||||
|
||||
export function renderInvalidConfigMessage(
|
||||
projectDir: string,
|
||||
issues: KtxConfigIssue[],
|
||||
outputMode: KtxDoctorOutputMode,
|
||||
io: KtxDoctorIo,
|
||||
): void {
|
||||
if (outputMode === 'json') {
|
||||
io.stdout.write(
|
||||
`${JSON.stringify(
|
||||
{
|
||||
error: 'invalid_config',
|
||||
projectDir,
|
||||
issues,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const useColor = shouldUseColor(io);
|
||||
const dim = (text: string) => styleDim(useColor, text);
|
||||
const bold = (text: string) => styleBold(useColor, text);
|
||||
const status = (s: DoctorStatus, text: string) => styleStatus(useColor, s, text);
|
||||
const abbreviated = abbreviateHome(projectDir) ?? projectDir;
|
||||
|
||||
const lines: string[] = [];
|
||||
lines.push(`${bold('KTX status')} ${dim('·')} ${abbreviated}`);
|
||||
lines.push('');
|
||||
lines.push(` ${status('fail', '✗')} ${bold('Config')} ktx.yaml has ${issues.length} schema issue${issues.length === 1 ? '' : 's'}`);
|
||||
for (const issue of issues) {
|
||||
lines.push(` ${status('fail', '✗')} ${issue.message}`);
|
||||
if (issue.fix) {
|
||||
lines.push(` ${dim(`→ ${issue.fix}`)}`);
|
||||
}
|
||||
}
|
||||
lines.push('');
|
||||
lines.push(` ${dim('Fix the issues in')} ${join(abbreviated, 'ktx.yaml')} ${dim('and rerun')} ${bold('ktx status')}.`);
|
||||
lines.push('');
|
||||
|
||||
io.stdout.write(lines.join('\n'));
|
||||
}
|
||||
|
||||
export function renderValidConfigMessage(
|
||||
projectDir: string,
|
||||
outputMode: KtxDoctorOutputMode,
|
||||
io: KtxDoctorIo,
|
||||
): void {
|
||||
if (outputMode === 'json') {
|
||||
io.stdout.write(
|
||||
`${JSON.stringify(
|
||||
{
|
||||
ok: true,
|
||||
projectDir,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const useColor = shouldUseColor(io);
|
||||
const dim = (text: string) => styleDim(useColor, text);
|
||||
const bold = (text: string) => styleBold(useColor, text);
|
||||
const status = (s: DoctorStatus, text: string) => styleStatus(useColor, s, text);
|
||||
const abbreviated = abbreviateHome(projectDir) ?? projectDir;
|
||||
|
||||
const lines: string[] = [];
|
||||
lines.push(`${bold('KTX status')} ${dim('·')} ${abbreviated}`);
|
||||
lines.push('');
|
||||
lines.push(` ${status('pass', '✓')} ${bold('Config')} ${dim('ktx.yaml schema valid')}`);
|
||||
lines.push('');
|
||||
|
||||
io.stdout.write(lines.join('\n'));
|
||||
}
|
||||
|
||||
export function renderMissingProjectMessage(
|
||||
projectDir: string,
|
||||
outputMode: KtxDoctorOutputMode,
|
||||
|
|
@ -501,16 +586,39 @@ export async function runKtxDoctor(
|
|||
try {
|
||||
const runSetupChecks = deps.runSetupChecks ?? (() => runSetupDoctorChecks());
|
||||
|
||||
if (args.command === 'validate') {
|
||||
const configPath = join(args.projectDir, 'ktx.yaml');
|
||||
if (!(await defaultPathExists(configPath))) {
|
||||
renderMissingProjectMessage(args.projectDir, args.outputMode, io);
|
||||
return 1;
|
||||
}
|
||||
const { validateKtxProjectConfig } = await import('@ktx/context/project');
|
||||
const rawConfig = await readFile(configPath, 'utf-8');
|
||||
const validation = validateKtxProjectConfig(rawConfig);
|
||||
if (!validation.ok) {
|
||||
renderInvalidConfigMessage(args.projectDir, validation.issues, args.outputMode, io);
|
||||
return 1;
|
||||
}
|
||||
renderValidConfigMessage(args.projectDir, args.outputMode, io);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (args.command === 'project') {
|
||||
const configPath = join(args.projectDir, 'ktx.yaml');
|
||||
if (!(await defaultPathExists(configPath))) {
|
||||
renderMissingProjectMessage(args.projectDir, args.outputMode, io);
|
||||
return 1;
|
||||
}
|
||||
const { loadKtxProject } = await import('@ktx/context/project');
|
||||
const { loadKtxProject, validateKtxProjectConfig } = await import('@ktx/context/project');
|
||||
const { buildProjectStatus, renderProjectStatus } = await import('./status-project.js');
|
||||
const rawConfig = await readFile(configPath, 'utf-8');
|
||||
const validation = validateKtxProjectConfig(rawConfig);
|
||||
if (!validation.ok) {
|
||||
renderInvalidConfigMessage(args.projectDir, validation.issues, args.outputMode, io);
|
||||
return 1;
|
||||
}
|
||||
const project = await loadKtxProject({ projectDir: args.projectDir });
|
||||
const projectStatus = await buildProjectStatus(project, deps);
|
||||
const projectStatus = await buildProjectStatus(project, { ...deps, configIssues: validation.issues });
|
||||
const verbose = args.verbose ?? false;
|
||||
const toolchainChecks = verbose ? await runSetupChecks() : undefined;
|
||||
if (args.outputMode === 'json') {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type {
|
||||
KtxConfigIssue,
|
||||
KtxLocalProject,
|
||||
KtxProjectConfig,
|
||||
KtxProjectConnectionConfig,
|
||||
|
|
@ -56,6 +57,12 @@ interface StorageStatus {
|
|||
gitAuthor: string;
|
||||
}
|
||||
|
||||
interface ConfigStatus {
|
||||
status: ProjectStatusLevel;
|
||||
detail: string;
|
||||
issues: KtxConfigIssue[];
|
||||
}
|
||||
|
||||
interface WarningItem {
|
||||
message: string;
|
||||
fix?: string;
|
||||
|
|
@ -72,6 +79,7 @@ function hasOwnField(value: Record<string, unknown>, key: string): boolean {
|
|||
export interface ProjectStatus {
|
||||
projectName: string;
|
||||
projectDir: string;
|
||||
config: ConfigStatus;
|
||||
llm: LlmStatus;
|
||||
embeddings: EmbeddingsStatus;
|
||||
storage: StorageStatus;
|
||||
|
|
@ -610,12 +618,26 @@ function buildVerdict(
|
|||
export interface BuildProjectStatusOptions {
|
||||
env?: NodeJS.ProcessEnv;
|
||||
postgresQueryHistoryProbe?: PostgresQueryHistoryProbe;
|
||||
configIssues?: KtxConfigIssue[];
|
||||
}
|
||||
|
||||
function buildConfigStatus(issues: KtxConfigIssue[] | undefined): ConfigStatus {
|
||||
const list = issues ?? [];
|
||||
if (list.length === 0) {
|
||||
return { status: 'ok', detail: 'ktx.yaml schema valid', issues: [] };
|
||||
}
|
||||
return {
|
||||
status: 'warn',
|
||||
detail: `${list.length} issue${list.length === 1 ? '' : 's'} in ktx.yaml`,
|
||||
issues: list,
|
||||
};
|
||||
}
|
||||
|
||||
export async function buildProjectStatus(project: KtxLocalProject, options: BuildProjectStatusOptions = {}): Promise<ProjectStatus> {
|
||||
const env = options.env ?? process.env;
|
||||
const config = project.config;
|
||||
|
||||
const configStatus = buildConfigStatus(options.configIssues);
|
||||
const llm = buildLlmStatus(config.llm, env);
|
||||
const embeddings = buildEmbeddingsStatus(config.ingest.embeddings, env);
|
||||
const storage = buildStorageStatus(config);
|
||||
|
|
@ -630,6 +652,7 @@ export async function buildProjectStatus(project: KtxLocalProject, options: Buil
|
|||
return {
|
||||
projectName: config.project,
|
||||
projectDir: project.projectDir,
|
||||
config: configStatus,
|
||||
llm,
|
||||
embeddings,
|
||||
storage,
|
||||
|
|
@ -719,6 +742,13 @@ export function renderProjectStatus(status: ProjectStatus, options: RenderProjec
|
|||
lines.push(` ${label('Embeddings')} ${embedDetail} ${sym(status.embeddings.status)} ${dim(status.embeddings.detail)}`);
|
||||
|
||||
lines.push(` ${label('Storage')} ${dim(`${status.storage.state} (state) · ${status.storage.search} (search)`)}`);
|
||||
lines.push(` ${label('Config')} ${sym(status.config.status)} ${dim(status.config.detail)}`);
|
||||
if (status.config.issues.length > 0) {
|
||||
for (const issue of status.config.issues) {
|
||||
lines.push(` ${color('warn', SYMBOL.warn)} ${issue.message}`);
|
||||
if (issue.fix) lines.push(` ${dim(`→ ${issue.fix}`)}`);
|
||||
}
|
||||
}
|
||||
lines.push('');
|
||||
|
||||
// Connections
|
||||
|
|
|
|||
|
|
@ -1,5 +1,10 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { buildDefaultKtxProjectConfig, parseKtxProjectConfig, serializeKtxProjectConfig } from './config.js';
|
||||
import {
|
||||
buildDefaultKtxProjectConfig,
|
||||
parseKtxProjectConfig,
|
||||
serializeKtxProjectConfig,
|
||||
validateKtxProjectConfig,
|
||||
} from './config.js';
|
||||
|
||||
describe('KTX project config', () => {
|
||||
it.each(['status', 'replay', 'run', 'watch'])('accepts former ingest subcommand name "%s" as a connection id', (connectionId) => {
|
||||
|
|
@ -277,8 +282,8 @@ scan:
|
|||
expect(serializeKtxProjectConfig(config)).toContain('validationBudget: all');
|
||||
});
|
||||
|
||||
it('falls back to safe scan relationship defaults for invalid numeric settings', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
it('rejects out-of-range scan relationship numeric settings', () => {
|
||||
const yaml = `
|
||||
project: demo
|
||||
scan:
|
||||
relationships:
|
||||
|
|
@ -289,28 +294,33 @@ scan:
|
|||
profileSampleRows: 0
|
||||
validationConcurrency: 0
|
||||
validationBudget: 1.5
|
||||
`);
|
||||
`;
|
||||
expect(() => parseKtxProjectConfig(yaml)).toThrow(/scan\.relationships\.acceptThreshold/);
|
||||
|
||||
expect(config.scan.relationships).toMatchObject({
|
||||
acceptThreshold: 0.85,
|
||||
reviewThreshold: 0.55,
|
||||
maxLlmTablesPerBatch: 40,
|
||||
maxCandidatesPerColumn: 25,
|
||||
profileSampleRows: 10000,
|
||||
validationConcurrency: 4,
|
||||
});
|
||||
expect(config.scan.relationships).not.toHaveProperty('validationBudget');
|
||||
const validation = validateKtxProjectConfig(yaml);
|
||||
expect(validation.ok).toBe(false);
|
||||
const paths = validation.issues.map((issue) => issue.path);
|
||||
expect(paths).toEqual(
|
||||
expect.arrayContaining([
|
||||
'scan.relationships.acceptThreshold',
|
||||
'scan.relationships.reviewThreshold',
|
||||
'scan.relationships.maxLlmTablesPerBatch',
|
||||
'scan.relationships.maxCandidatesPerColumn',
|
||||
'scan.relationships.profileSampleRows',
|
||||
'scan.relationships.validationConcurrency',
|
||||
'scan.relationships.validationBudget',
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('falls back for invalid scan relationship validation budget strings', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
it('rejects invalid scan relationship validation budget strings', () => {
|
||||
const yaml = `
|
||||
project: demo
|
||||
scan:
|
||||
relationships:
|
||||
validationBudget: infinite
|
||||
`);
|
||||
|
||||
expect(config.scan.relationships).not.toHaveProperty('validationBudget');
|
||||
`;
|
||||
expect(() => parseKtxProjectConfig(yaml)).toThrow(/scan\.relationships\.validationBudget/);
|
||||
});
|
||||
|
||||
it('rejects unsupported local LLM and embedding fields', () => {
|
||||
|
|
@ -398,4 +408,80 @@ scan:
|
|||
it('rejects configs with a missing project name', () => {
|
||||
expect(() => parseKtxProjectConfig('connections: {}\n')).toThrow('ktx.yaml field "project" is required');
|
||||
});
|
||||
|
||||
it('rejects unknown top-level fields under strict mode', () => {
|
||||
expect(() =>
|
||||
parseKtxProjectConfig(`
|
||||
project: demo
|
||||
storrage:
|
||||
state: sqlite
|
||||
`),
|
||||
).toThrow(/Unsupported storrage/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('validateKtxProjectConfig', () => {
|
||||
it('returns ok: true with no issues for a valid config', () => {
|
||||
const result = validateKtxProjectConfig('project: warehouse\n');
|
||||
expect(result).toEqual({ ok: true, issues: [] });
|
||||
});
|
||||
|
||||
it('collects every schema issue without throwing', () => {
|
||||
const result = validateKtxProjectConfig(`
|
||||
project: ""
|
||||
storage:
|
||||
search: not-a-real-backend
|
||||
scan:
|
||||
relationships:
|
||||
acceptThreshold: 1.7
|
||||
`);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
const paths = result.issues.map((issue) => issue.path);
|
||||
expect(paths).toEqual(
|
||||
expect.arrayContaining([
|
||||
'project',
|
||||
'storage.search',
|
||||
'scan.relationships.acceptThreshold',
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('attaches migration hints for known deprecated keys', () => {
|
||||
const result = validateKtxProjectConfig(`
|
||||
project: demo
|
||||
ingest:
|
||||
llm:
|
||||
backend: anthropic
|
||||
scan:
|
||||
enrichment:
|
||||
backend: none
|
||||
`);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
const findIssue = (path: string) => result.issues.find((issue) => issue.path === path);
|
||||
expect(findIssue('ingest.llm')).toMatchObject({
|
||||
message: 'Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits',
|
||||
fix: 'use top-level llm.provider, llm.models, and ingest.workUnits',
|
||||
});
|
||||
expect(findIssue('scan.enrichment.backend')).toMatchObject({
|
||||
message: 'Unsupported scan.enrichment.backend: use scan.enrichment.mode',
|
||||
fix: 'use scan.enrichment.mode',
|
||||
});
|
||||
});
|
||||
|
||||
it('reports YAML parse errors as a root-level issue', () => {
|
||||
const result = validateKtxProjectConfig(': not valid yaml :\n');
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.issues[0]?.path).toBe('');
|
||||
expect(result.issues[0]?.message).toMatch(/ktx\.yaml parse error/);
|
||||
});
|
||||
|
||||
it('reports a YAML scalar root as a single issue', () => {
|
||||
const result = validateKtxProjectConfig('- nope\n');
|
||||
expect(result).toEqual({
|
||||
ok: false,
|
||||
issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,435 +1,233 @@
|
|||
import type { KtxEmbeddingBackend, KtxLlmBackend, KtxModelRole, KtxPromptCacheTtl } from '@ktx/llm';
|
||||
import { KTX_MODEL_ROLES } from '@ktx/llm';
|
||||
import YAML from 'yaml';
|
||||
import * as z from 'zod';
|
||||
|
||||
export type KtxStorageState = 'postgres' | 'sqlite';
|
||||
export type KtxSearchBackend = 'postgres-hybrid' | 'sqlite-fts5';
|
||||
type KtxLocalLlmBackend = KtxLlmBackend | 'none';
|
||||
type KtxLocalEmbeddingBackend = KtxEmbeddingBackend | 'none';
|
||||
type KtxScanEnrichmentMode = 'none' | 'deterministic' | 'llm';
|
||||
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway'] as const;
|
||||
const KTX_EMBEDDING_BACKENDS = ['none', 'deterministic', 'openai', 'sentence-transformers'] as const;
|
||||
const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
|
||||
const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
|
||||
const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'] as const;
|
||||
const KTX_STORAGE_STATES = ['sqlite', 'postgres'] as const;
|
||||
const KTX_SEARCH_BACKENDS = ['sqlite-fts5', 'postgres-hybrid'] as const;
|
||||
|
||||
interface KtxProjectPromptCachingConfig {
|
||||
enabled?: boolean;
|
||||
systemTtl?: KtxPromptCacheTtl;
|
||||
toolsTtl?: KtxPromptCacheTtl;
|
||||
historyTtl?: KtxPromptCacheTtl;
|
||||
vertexFallbackTo5m?: boolean;
|
||||
const DEPRECATED_KEY_HINTS: Record<string, string> = {
|
||||
'llm.provider.provider': 'use llm.provider.backend',
|
||||
'ingest.llm': 'use top-level llm.provider, llm.models, and ingest.workUnits',
|
||||
'ingest.embeddings.provider': 'use ingest.embeddings.backend',
|
||||
'scan.enrichment.backend': 'use scan.enrichment.mode',
|
||||
'scan.enrichment.llm': 'use top-level llm.provider and llm.models',
|
||||
'scan.enrichment.embeddings.provider': 'use scan.enrichment.embeddings.backend',
|
||||
};
|
||||
|
||||
const apiCredentialsSchema = z.strictObject({
|
||||
api_key: z.string().min(1).optional(),
|
||||
base_url: z.string().min(1).optional(),
|
||||
});
|
||||
|
||||
const vertexProviderSchema = z.strictObject({
|
||||
project: z.string().min(1).optional(),
|
||||
location: z.string().default(''),
|
||||
});
|
||||
|
||||
const sentenceTransformersSchema = z.strictObject({
|
||||
base_url: z.string().default(''),
|
||||
pathPrefix: z.string().optional(),
|
||||
});
|
||||
|
||||
const llmProviderSchema = z.strictObject({
|
||||
backend: z.enum(KTX_LLM_BACKENDS).default('none'),
|
||||
vertex: vertexProviderSchema.optional(),
|
||||
anthropic: apiCredentialsSchema.optional(),
|
||||
gateway: apiCredentialsSchema.optional(),
|
||||
});
|
||||
|
||||
const promptCachingSchema = z.strictObject({
|
||||
enabled: z.boolean().optional(),
|
||||
systemTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional(),
|
||||
toolsTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional(),
|
||||
historyTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional(),
|
||||
vertexFallbackTo5m: z.boolean().optional(),
|
||||
});
|
||||
|
||||
const llmSchema = z.strictObject({
|
||||
provider: llmProviderSchema.prefault({}),
|
||||
models: z.partialRecord(z.enum(KTX_MODEL_ROLES), z.string().min(1)).default({}),
|
||||
promptCaching: promptCachingSchema.optional(),
|
||||
});
|
||||
|
||||
const embeddingSchema = z.strictObject({
|
||||
backend: z.enum(KTX_EMBEDDING_BACKENDS).default('deterministic'),
|
||||
model: z.string().min(1).optional(),
|
||||
dimensions: z.int().positive().default(8),
|
||||
openai: apiCredentialsSchema.optional(),
|
||||
sentenceTransformers: sentenceTransformersSchema.optional(),
|
||||
batchSize: z.int().positive().optional(),
|
||||
});
|
||||
|
||||
const workUnitsSchema = z.strictObject({
|
||||
stepBudget: z.int().positive().default(40),
|
||||
maxConcurrency: z.int().positive().default(1),
|
||||
failureMode: z.enum(KTX_WORK_UNIT_FAILURE_MODES).default('continue'),
|
||||
});
|
||||
|
||||
const ingestSchema = z.strictObject({
|
||||
adapters: z.array(z.string().min(1)).default([]),
|
||||
embeddings: embeddingSchema.prefault({ backend: 'deterministic', model: 'deterministic' }),
|
||||
workUnits: workUnitsSchema.prefault({}),
|
||||
});
|
||||
|
||||
const scanEnrichmentSchema = z.strictObject({
|
||||
mode: z.enum(KTX_ENRICHMENT_MODES).default('none'),
|
||||
embeddings: embeddingSchema.optional(),
|
||||
});
|
||||
|
||||
const scanRelationshipsSchema = z.strictObject({
|
||||
enabled: z.boolean().default(true),
|
||||
llmProposals: z.boolean().default(true),
|
||||
validationRequiredForManifest: z.boolean().default(true),
|
||||
acceptThreshold: z.number().min(0).max(1).default(0.85),
|
||||
reviewThreshold: z.number().min(0).max(1).default(0.55),
|
||||
maxLlmTablesPerBatch: z.int().positive().default(40),
|
||||
maxCandidatesPerColumn: z.int().positive().default(25),
|
||||
profileSampleRows: z.int().positive().default(10000),
|
||||
validationConcurrency: z.int().positive().default(4),
|
||||
validationBudget: z.union([z.literal('all'), z.int().nonnegative()]).optional(),
|
||||
});
|
||||
|
||||
const scanSchema = z.strictObject({
|
||||
enrichment: scanEnrichmentSchema.prefault({}),
|
||||
relationships: scanRelationshipsSchema.prefault({}),
|
||||
});
|
||||
|
||||
const setupSchema = z
|
||||
.strictObject({
|
||||
database_connection_ids: z.array(z.string().min(1)).default([]),
|
||||
completed_steps: z.unknown().optional(),
|
||||
})
|
||||
.transform(({ database_connection_ids }) => ({ database_connection_ids }));
|
||||
|
||||
const storageGitSchema = z.strictObject({
|
||||
auto_commit: z.boolean().default(true),
|
||||
author: z.string().min(1).default('ktx <ktx@example.com>'),
|
||||
});
|
||||
|
||||
const storageSchema = z.strictObject({
|
||||
state: z.enum(KTX_STORAGE_STATES).default('sqlite'),
|
||||
search: z.enum(KTX_SEARCH_BACKENDS).default('sqlite-fts5'),
|
||||
git: storageGitSchema.prefault({}),
|
||||
});
|
||||
|
||||
const connectionSchema = z.looseObject({
|
||||
driver: z.string().min(1).optional(),
|
||||
url: z.string().optional(),
|
||||
});
|
||||
|
||||
const agentSchema = z.strictObject({
|
||||
run_research: z
|
||||
.strictObject({
|
||||
enabled: z.boolean().default(false),
|
||||
max_iterations: z.number().int().nonnegative().default(20),
|
||||
default_toolset: z.array(z.string().min(1)).default(['sl_query', 'wiki_search', 'sl_read_source']),
|
||||
})
|
||||
.prefault({}),
|
||||
});
|
||||
|
||||
const memorySchema = z.strictObject({
|
||||
auto_commit: z.boolean().default(true),
|
||||
});
|
||||
|
||||
const ktxProjectConfigSchema = z.strictObject({
|
||||
project: z
|
||||
.string({ error: 'ktx.yaml field "project" is required' })
|
||||
.trim()
|
||||
.min(1, 'ktx.yaml field "project" is required'),
|
||||
setup: setupSchema.optional(),
|
||||
connections: z.record(z.string(), connectionSchema).default({}),
|
||||
storage: storageSchema.prefault({}),
|
||||
llm: llmSchema.prefault({}),
|
||||
ingest: ingestSchema.prefault({}),
|
||||
agent: agentSchema.prefault({}),
|
||||
memory: memorySchema.prefault({}),
|
||||
scan: scanSchema.prefault({}),
|
||||
});
|
||||
|
||||
export type KtxProjectConfig = z.infer<typeof ktxProjectConfigSchema>;
|
||||
export type KtxProjectLlmConfig = z.infer<typeof llmSchema>;
|
||||
export type KtxProjectLlmProviderConfig = z.infer<typeof llmProviderSchema>;
|
||||
export type KtxProjectEmbeddingConfig = z.infer<typeof embeddingSchema>;
|
||||
export type KtxScanEnrichmentConfig = z.infer<typeof scanEnrichmentSchema>;
|
||||
export type KtxIngestWorkUnitsConfig = z.infer<typeof workUnitsSchema>;
|
||||
export type KtxScanRelationshipConfig = z.infer<typeof scanRelationshipsSchema>;
|
||||
export type KtxProjectScanConfig = z.infer<typeof scanSchema>;
|
||||
export type KtxProjectConnectionConfig = z.infer<typeof connectionSchema>;
|
||||
export type KtxProjectSetupConfig = z.infer<typeof setupSchema>;
|
||||
export type KtxStorageState = z.infer<typeof storageSchema>['state'];
|
||||
export type KtxSearchBackend = z.infer<typeof storageSchema>['search'];
|
||||
|
||||
export interface KtxConfigIssue {
|
||||
path: string;
|
||||
message: string;
|
||||
fix?: string;
|
||||
}
|
||||
|
||||
export interface KtxProjectLlmProviderConfig {
|
||||
backend: KtxLocalLlmBackend;
|
||||
vertex?: { project?: string; location: string };
|
||||
anthropic?: { api_key?: string; base_url?: string };
|
||||
gateway?: { api_key?: string; base_url?: string };
|
||||
}
|
||||
|
||||
export interface KtxProjectLlmConfig {
|
||||
provider: KtxProjectLlmProviderConfig;
|
||||
models: Partial<Record<KtxModelRole, string>> & { default?: string };
|
||||
promptCaching?: KtxProjectPromptCachingConfig;
|
||||
}
|
||||
|
||||
export interface KtxProjectEmbeddingConfig {
|
||||
backend: KtxLocalEmbeddingBackend;
|
||||
model?: string;
|
||||
dimensions: number;
|
||||
openai?: { api_key?: string; base_url?: string };
|
||||
sentenceTransformers?: { base_url: string; pathPrefix?: string };
|
||||
batchSize?: number;
|
||||
}
|
||||
|
||||
export interface KtxScanEnrichmentConfig {
|
||||
mode: KtxScanEnrichmentMode;
|
||||
embeddings?: KtxProjectEmbeddingConfig;
|
||||
}
|
||||
|
||||
export interface KtxIngestWorkUnitsConfig {
|
||||
stepBudget: number;
|
||||
maxConcurrency: number;
|
||||
failureMode: 'abort' | 'continue';
|
||||
}
|
||||
|
||||
export interface KtxScanRelationshipConfig {
|
||||
enabled: boolean;
|
||||
llmProposals: boolean;
|
||||
validationRequiredForManifest: boolean;
|
||||
acceptThreshold: number;
|
||||
reviewThreshold: number;
|
||||
maxLlmTablesPerBatch: number;
|
||||
maxCandidatesPerColumn: number;
|
||||
profileSampleRows: number;
|
||||
validationConcurrency: number;
|
||||
validationBudget?: number | 'all';
|
||||
}
|
||||
|
||||
export interface KtxProjectScanConfig {
|
||||
enrichment: KtxScanEnrichmentConfig;
|
||||
relationships: KtxScanRelationshipConfig;
|
||||
}
|
||||
|
||||
export interface KtxProjectConnectionConfig {
|
||||
driver: string;
|
||||
url?: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
export interface KtxProjectSetupConfig {
|
||||
database_connection_ids: string[];
|
||||
}
|
||||
|
||||
export interface KtxProjectConfig {
|
||||
project: string;
|
||||
setup?: KtxProjectSetupConfig;
|
||||
connections: Record<string, KtxProjectConnectionConfig>;
|
||||
storage: {
|
||||
state: KtxStorageState;
|
||||
search: KtxSearchBackend;
|
||||
git: {
|
||||
auto_commit: boolean;
|
||||
author: string;
|
||||
};
|
||||
};
|
||||
llm: KtxProjectLlmConfig;
|
||||
ingest: {
|
||||
adapters: string[];
|
||||
embeddings: KtxProjectEmbeddingConfig;
|
||||
workUnits: KtxIngestWorkUnitsConfig;
|
||||
};
|
||||
agent: {
|
||||
run_research: {
|
||||
enabled: boolean;
|
||||
max_iterations: number;
|
||||
default_toolset: string[];
|
||||
};
|
||||
};
|
||||
memory: {
|
||||
auto_commit: boolean;
|
||||
};
|
||||
scan: KtxProjectScanConfig;
|
||||
export interface KtxConfigValidation {
|
||||
ok: boolean;
|
||||
issues: KtxConfigIssue[];
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function stringArray(value: unknown, fallback: string[]): string[] {
|
||||
if (!Array.isArray(value)) {
|
||||
return fallback;
|
||||
}
|
||||
return value.filter((item): item is string => typeof item === 'string' && item.length > 0);
|
||||
function dottedPath(path: ReadonlyArray<PropertyKey>): string {
|
||||
return path.map((segment) => String(segment)).join('.');
|
||||
}
|
||||
|
||||
function booleanValue(value: unknown, fallback: boolean): boolean {
|
||||
return typeof value === 'boolean' ? value : fallback;
|
||||
function valueAtPath(root: unknown, path: ReadonlyArray<PropertyKey>): unknown {
|
||||
let cursor: unknown = root;
|
||||
for (const segment of path) {
|
||||
if (cursor === null || typeof cursor !== 'object') return undefined;
|
||||
cursor = (cursor as Record<PropertyKey, unknown>)[segment];
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
function numberValue(value: unknown, fallback: number): number {
|
||||
return typeof value === 'number' && Number.isFinite(value) ? value : fallback;
|
||||
}
|
||||
function formatIssue(issue: z.core.$ZodIssue, input: unknown): KtxConfigIssue[] {
|
||||
const basePath = dottedPath(issue.path);
|
||||
|
||||
function stringValue(value: unknown, fallback: string): string {
|
||||
return typeof value === 'string' && value.trim().length > 0 ? value : fallback;
|
||||
}
|
||||
|
||||
function optionalNonEmptyString(value: unknown): string | undefined {
|
||||
if (typeof value !== 'string') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
|
||||
function positiveIntegerConfigValue(value: unknown, fallback: number): number {
|
||||
if (typeof value !== 'number' || !Number.isInteger(value) || value <= 0) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
function validationBudgetConfigValue(value: unknown, fallback: number | 'all' | undefined): number | 'all' | undefined {
|
||||
if (value === 'all') {
|
||||
return value;
|
||||
}
|
||||
if (typeof value === 'number' && Number.isInteger(value) && value >= 0) {
|
||||
return value;
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function ratioConfigValue(value: unknown, fallback: number): number {
|
||||
if (typeof value !== 'number' || !Number.isFinite(value) || value < 0 || value > 1) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
function localLlmBackend(value: unknown, fallback: KtxLocalLlmBackend, section = 'llm.provider'): KtxLocalLlmBackend {
|
||||
if (value == null) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
if (value === 'none' || value === 'anthropic' || value === 'vertex' || value === 'gateway') {
|
||||
return value;
|
||||
}
|
||||
|
||||
throw new Error(`Unsupported ${section}.backend: ${String(value)}`);
|
||||
}
|
||||
|
||||
function localEmbeddingBackend(
|
||||
value: unknown,
|
||||
fallback: KtxLocalEmbeddingBackend,
|
||||
section = 'ingest.embeddings',
|
||||
): KtxLocalEmbeddingBackend {
|
||||
if (value == null) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
if (
|
||||
value === 'none' ||
|
||||
value === 'deterministic' ||
|
||||
value === 'openai' ||
|
||||
value === 'sentence-transformers'
|
||||
) {
|
||||
return value;
|
||||
}
|
||||
|
||||
throw new Error(`Unsupported ${section}.backend: ${String(value)}`);
|
||||
}
|
||||
|
||||
function scanEnrichmentMode(value: unknown, fallback: KtxScanEnrichmentMode): KtxScanEnrichmentMode {
|
||||
if (value == null) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
if (value === 'none' || value === 'deterministic' || value === 'llm') {
|
||||
return value;
|
||||
}
|
||||
|
||||
throw new Error(`Unsupported scan.enrichment.mode: ${String(value)}`);
|
||||
}
|
||||
|
||||
function rejectUnsupportedProvider(section: string, value: unknown): void {
|
||||
if (value !== undefined) {
|
||||
throw new Error(`Unsupported ${section}.provider: use ${section}.backend`);
|
||||
}
|
||||
}
|
||||
|
||||
function optionalStringRecord(value: unknown): Record<string, unknown> {
|
||||
return isRecord(value) ? value : {};
|
||||
}
|
||||
|
||||
function optionalProviderConfig(value: unknown): { api_key?: string; base_url?: string } | undefined {
|
||||
if (!isRecord(value)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const apiKey = optionalNonEmptyString(value.api_key);
|
||||
const baseUrl = optionalNonEmptyString(value.base_url);
|
||||
if (!apiKey && !baseUrl) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return {
|
||||
...(apiKey ? { api_key: apiKey } : {}),
|
||||
...(baseUrl ? { base_url: baseUrl } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function parseModels(value: unknown): KtxProjectLlmConfig['models'] {
|
||||
if (!isRecord(value)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const models: KtxProjectLlmConfig['models'] = {};
|
||||
for (const [role, model] of Object.entries(value)) {
|
||||
const modelName = optionalNonEmptyString(model);
|
||||
if (modelName) {
|
||||
models[role as KtxModelRole] = modelName;
|
||||
}
|
||||
}
|
||||
return models;
|
||||
}
|
||||
|
||||
function promptCacheTtl(value: unknown): KtxPromptCacheTtl | undefined {
|
||||
return value === '5m' || value === '1h' ? value : undefined;
|
||||
}
|
||||
|
||||
function parsePromptCaching(value: unknown): KtxProjectPromptCachingConfig | undefined {
|
||||
if (!isRecord(value)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return {
|
||||
...(typeof value.enabled === 'boolean' ? { enabled: value.enabled } : {}),
|
||||
...(promptCacheTtl(value.systemTtl) ? { systemTtl: promptCacheTtl(value.systemTtl) } : {}),
|
||||
...(promptCacheTtl(value.toolsTtl) ? { toolsTtl: promptCacheTtl(value.toolsTtl) } : {}),
|
||||
...(promptCacheTtl(value.historyTtl) ? { historyTtl: promptCacheTtl(value.historyTtl) } : {}),
|
||||
...(typeof value.vertexFallbackTo5m === 'boolean' ? { vertexFallbackTo5m: value.vertexFallbackTo5m } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function parseProjectLlmProviderConfig(
|
||||
raw: Record<string, unknown>,
|
||||
defaults: KtxProjectLlmProviderConfig,
|
||||
section: string,
|
||||
): KtxProjectLlmProviderConfig {
|
||||
rejectUnsupportedProvider(section, raw.provider);
|
||||
|
||||
const vertex = isRecord(raw.vertex)
|
||||
? {
|
||||
...(optionalNonEmptyString(raw.vertex.project) ? { project: optionalNonEmptyString(raw.vertex.project) } : {}),
|
||||
location: stringValue(raw.vertex.location, ''),
|
||||
if (issue.code === 'unrecognized_keys') {
|
||||
const keys = (issue as { keys?: readonly string[] }).keys ?? [];
|
||||
return keys.map((key) => {
|
||||
const fullPath = basePath.length > 0 ? `${basePath}.${key}` : key;
|
||||
const hint = DEPRECATED_KEY_HINTS[fullPath];
|
||||
if (hint !== undefined) {
|
||||
return { path: fullPath, message: `Unsupported ${fullPath}: ${hint}`, fix: hint };
|
||||
}
|
||||
: undefined;
|
||||
const anthropic = optionalProviderConfig(raw.anthropic);
|
||||
const gateway = optionalProviderConfig(raw.gateway);
|
||||
return { path: fullPath, message: `Unsupported ${fullPath}: unknown field` };
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
backend: localLlmBackend(raw.backend, defaults.backend, section),
|
||||
...(vertex ? { vertex } : {}),
|
||||
...(anthropic ? { anthropic } : {}),
|
||||
...(gateway ? { gateway } : {}),
|
||||
};
|
||||
const lastSegment = issue.path[issue.path.length - 1];
|
||||
if (lastSegment === 'backend' && (issue.code === 'invalid_value' || issue.code === 'invalid_type')) {
|
||||
const value = valueAtPath(input, issue.path);
|
||||
return [{ path: basePath, message: `Unsupported ${basePath}: ${String(value)}` }];
|
||||
}
|
||||
|
||||
return [{ path: basePath, message: basePath.length > 0 ? `${basePath}: ${issue.message}` : issue.message }];
|
||||
}
|
||||
|
||||
function parseProjectLlmConfig(raw: Record<string, unknown>, defaults: KtxProjectLlmConfig): KtxProjectLlmConfig {
|
||||
const provider = isRecord(raw.provider) ? raw.provider : {};
|
||||
return {
|
||||
provider: parseProjectLlmProviderConfig(provider, defaults.provider, 'llm.provider'),
|
||||
models: parseModels(raw.models ?? defaults.models),
|
||||
...(parsePromptCaching(raw.promptCaching) ? { promptCaching: parsePromptCaching(raw.promptCaching) } : {}),
|
||||
};
|
||||
function collectIssues(error: z.ZodError, input: unknown): KtxConfigIssue[] {
|
||||
return error.issues.flatMap((issue) => formatIssue(issue, input));
|
||||
}
|
||||
|
||||
function parseProjectEmbeddingConfig(
|
||||
raw: Record<string, unknown>,
|
||||
defaults: KtxProjectEmbeddingConfig,
|
||||
section: string,
|
||||
): KtxProjectEmbeddingConfig {
|
||||
rejectUnsupportedProvider(section, raw.provider);
|
||||
|
||||
const openai = optionalProviderConfig(raw.openai);
|
||||
const sentenceTransformers = isRecord(raw.sentenceTransformers)
|
||||
? {
|
||||
base_url: stringValue(raw.sentenceTransformers.base_url, ''),
|
||||
...(typeof raw.sentenceTransformers.pathPrefix === 'string'
|
||||
? { pathPrefix: raw.sentenceTransformers.pathPrefix }
|
||||
: {}),
|
||||
}
|
||||
: undefined;
|
||||
|
||||
const backend = localEmbeddingBackend(raw.backend, defaults.backend, section);
|
||||
const model =
|
||||
optionalNonEmptyString(raw.model) ?? (raw.backend == null && backend !== 'none' ? defaults.model : undefined);
|
||||
const batchSize = positiveIntegerConfigValue(raw.batchSize, 0);
|
||||
return {
|
||||
backend,
|
||||
...(model ? { model } : {}),
|
||||
dimensions: positiveIntegerConfigValue(raw.dimensions, defaults.dimensions),
|
||||
...(openai ? { openai } : {}),
|
||||
...(sentenceTransformers ? { sentenceTransformers } : {}),
|
||||
...(batchSize > 0 ? { batchSize } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function parseScanRelationshipConfig(
|
||||
raw: Record<string, unknown>,
|
||||
defaults: KtxScanRelationshipConfig,
|
||||
): KtxScanRelationshipConfig {
|
||||
const validationBudget = validationBudgetConfigValue(raw.validationBudget, defaults.validationBudget);
|
||||
|
||||
return {
|
||||
enabled: booleanValue(raw.enabled, defaults.enabled),
|
||||
llmProposals: booleanValue(raw.llmProposals, defaults.llmProposals),
|
||||
validationRequiredForManifest: booleanValue(
|
||||
raw.validationRequiredForManifest,
|
||||
defaults.validationRequiredForManifest,
|
||||
),
|
||||
acceptThreshold: ratioConfigValue(raw.acceptThreshold, defaults.acceptThreshold),
|
||||
reviewThreshold: ratioConfigValue(raw.reviewThreshold, defaults.reviewThreshold),
|
||||
maxLlmTablesPerBatch: positiveIntegerConfigValue(raw.maxLlmTablesPerBatch, defaults.maxLlmTablesPerBatch),
|
||||
maxCandidatesPerColumn: positiveIntegerConfigValue(raw.maxCandidatesPerColumn, defaults.maxCandidatesPerColumn),
|
||||
profileSampleRows: positiveIntegerConfigValue(raw.profileSampleRows, defaults.profileSampleRows),
|
||||
validationConcurrency: positiveIntegerConfigValue(raw.validationConcurrency, defaults.validationConcurrency),
|
||||
...(validationBudget !== undefined ? { validationBudget } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function workUnitFailureMode(value: unknown, fallback: 'abort' | 'continue'): 'abort' | 'continue' {
|
||||
return value === 'abort' || value === 'continue' ? value : fallback;
|
||||
}
|
||||
|
||||
function parseIngestWorkUnitsConfig(
|
||||
raw: Record<string, unknown>,
|
||||
defaults: KtxIngestWorkUnitsConfig,
|
||||
): KtxIngestWorkUnitsConfig {
|
||||
return {
|
||||
stepBudget: positiveIntegerConfigValue(raw.stepBudget, defaults.stepBudget),
|
||||
maxConcurrency: positiveIntegerConfigValue(raw.maxConcurrency, defaults.maxConcurrency),
|
||||
failureMode: workUnitFailureMode(raw.failureMode, defaults.failureMode),
|
||||
};
|
||||
function formatZodError(error: z.ZodError, input: unknown): string {
|
||||
return collectIssues(error, input)
|
||||
.map((issue) => issue.message)
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
export function buildDefaultKtxProjectConfig(projectName = 'ktx-project'): KtxProjectConfig {
|
||||
return {
|
||||
project: projectName,
|
||||
connections: {},
|
||||
storage: {
|
||||
state: 'sqlite',
|
||||
search: 'sqlite-fts5',
|
||||
git: {
|
||||
auto_commit: true,
|
||||
author: 'ktx <ktx@example.com>',
|
||||
},
|
||||
},
|
||||
llm: {
|
||||
provider: {
|
||||
backend: 'none',
|
||||
},
|
||||
models: {},
|
||||
},
|
||||
ingest: {
|
||||
adapters: [],
|
||||
embeddings: {
|
||||
backend: 'deterministic',
|
||||
model: 'deterministic',
|
||||
dimensions: 8,
|
||||
},
|
||||
workUnits: {
|
||||
stepBudget: 40,
|
||||
maxConcurrency: 1,
|
||||
failureMode: 'continue',
|
||||
},
|
||||
},
|
||||
agent: {
|
||||
run_research: {
|
||||
enabled: false,
|
||||
max_iterations: 20,
|
||||
default_toolset: ['sl_query', 'wiki_search', 'sl_read_source'],
|
||||
},
|
||||
},
|
||||
memory: {
|
||||
auto_commit: true,
|
||||
},
|
||||
scan: {
|
||||
enrichment: {
|
||||
mode: 'none',
|
||||
},
|
||||
relationships: {
|
||||
enabled: true,
|
||||
llmProposals: true,
|
||||
validationRequiredForManifest: true,
|
||||
acceptThreshold: 0.85,
|
||||
reviewThreshold: 0.55,
|
||||
maxLlmTablesPerBatch: 40,
|
||||
maxCandidatesPerColumn: 25,
|
||||
profileSampleRows: 10000,
|
||||
validationConcurrency: 4,
|
||||
},
|
||||
},
|
||||
};
|
||||
return ktxProjectConfigSchema.parse({ project: projectName });
|
||||
}
|
||||
|
||||
export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
|
||||
|
|
@ -437,96 +235,29 @@ export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
|
|||
if (!isRecord(parsed)) {
|
||||
throw new Error('ktx.yaml must contain a YAML object');
|
||||
}
|
||||
|
||||
const project = parsed.project;
|
||||
if (typeof project !== 'string' || project.trim().length === 0) {
|
||||
throw new Error('ktx.yaml field "project" is required');
|
||||
const result = ktxProjectConfigSchema.safeParse(parsed);
|
||||
if (!result.success) {
|
||||
throw new Error(formatZodError(result.error, parsed));
|
||||
}
|
||||
return result.data;
|
||||
}
|
||||
|
||||
const defaults = buildDefaultKtxProjectConfig(project.trim());
|
||||
const llm = isRecord(parsed.llm) ? parsed.llm : {};
|
||||
const storage = isRecord(parsed.storage) ? parsed.storage : {};
|
||||
const storageGit = isRecord(storage.git) ? storage.git : {};
|
||||
const setup = isRecord(parsed.setup) ? parsed.setup : undefined;
|
||||
const ingest = isRecord(parsed.ingest) ? parsed.ingest : {};
|
||||
const ingestEmbeddings = isRecord(ingest.embeddings) ? ingest.embeddings : {};
|
||||
const ingestWorkUnits = isRecord(ingest.workUnits) ? ingest.workUnits : {};
|
||||
const agent = isRecord(parsed.agent) ? parsed.agent : {};
|
||||
const runResearch = isRecord(agent.run_research) ? agent.run_research : {};
|
||||
const memory = isRecord(parsed.memory) ? parsed.memory : {};
|
||||
const scan = isRecord(parsed.scan) ? parsed.scan : {};
|
||||
const scanEnrichment = isRecord(scan.enrichment) ? scan.enrichment : {};
|
||||
const scanRelationships = isRecord(scan.relationships) ? scan.relationships : {};
|
||||
if (isRecord(ingest.llm)) {
|
||||
throw new Error('Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits');
|
||||
export function validateKtxProjectConfig(raw: string): KtxConfigValidation {
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = YAML.parse(raw);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return { ok: false, issues: [{ path: '', message: `ktx.yaml parse error: ${message}` }] };
|
||||
}
|
||||
if (scanEnrichment.backend !== undefined) {
|
||||
throw new Error('Unsupported scan.enrichment.backend: use scan.enrichment.mode');
|
||||
if (!isRecord(parsed)) {
|
||||
return { ok: false, issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }] };
|
||||
}
|
||||
if (isRecord(scanEnrichment.llm)) {
|
||||
throw new Error('Unsupported scan.enrichment.llm: use top-level llm.provider and llm.models');
|
||||
const result = ktxProjectConfigSchema.safeParse(parsed);
|
||||
if (result.success) {
|
||||
return { ok: true, issues: [] };
|
||||
}
|
||||
|
||||
const parsedLlm = parseProjectLlmConfig(llm, defaults.llm);
|
||||
const parsedIngestEmbeddings = parseProjectEmbeddingConfig(
|
||||
ingestEmbeddings,
|
||||
defaults.ingest.embeddings,
|
||||
'ingest.embeddings',
|
||||
);
|
||||
const parsedIngestWorkUnits = parseIngestWorkUnitsConfig(ingestWorkUnits, defaults.ingest.workUnits);
|
||||
const scanEmbeddings = parseProjectEmbeddingConfig(
|
||||
optionalStringRecord(scanEnrichment.embeddings),
|
||||
defaults.ingest.embeddings,
|
||||
'scan.enrichment.embeddings',
|
||||
);
|
||||
const parsedScanEnrichment: KtxScanEnrichmentConfig = {
|
||||
mode: scanEnrichmentMode(scanEnrichment.mode, defaults.scan.enrichment.mode),
|
||||
...(isRecord(scanEnrichment.embeddings) ? { embeddings: scanEmbeddings } : {}),
|
||||
};
|
||||
const parsedScanRelationships = parseScanRelationshipConfig(scanRelationships, defaults.scan.relationships);
|
||||
const parsedConnections = isRecord(parsed.connections)
|
||||
? (parsed.connections as Record<string, KtxProjectConnectionConfig>)
|
||||
: defaults.connections;
|
||||
|
||||
return {
|
||||
project: project.trim(),
|
||||
...(setup
|
||||
? {
|
||||
setup: {
|
||||
database_connection_ids: stringArray(setup.database_connection_ids, []),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
connections: parsedConnections,
|
||||
storage: {
|
||||
state: storage.state === 'sqlite' ? 'sqlite' : defaults.storage.state,
|
||||
search: storage.search === 'sqlite-fts5' ? 'sqlite-fts5' : defaults.storage.search,
|
||||
git: {
|
||||
auto_commit: booleanValue(storageGit.auto_commit, defaults.storage.git.auto_commit),
|
||||
author: stringValue(storageGit.author, defaults.storage.git.author),
|
||||
},
|
||||
},
|
||||
llm: parsedLlm,
|
||||
ingest: {
|
||||
adapters: stringArray(ingest.adapters, defaults.ingest.adapters),
|
||||
embeddings: parsedIngestEmbeddings,
|
||||
workUnits: parsedIngestWorkUnits,
|
||||
},
|
||||
agent: {
|
||||
run_research: {
|
||||
enabled: booleanValue(runResearch.enabled, defaults.agent.run_research.enabled),
|
||||
max_iterations: numberValue(runResearch.max_iterations, defaults.agent.run_research.max_iterations),
|
||||
default_toolset: stringArray(runResearch.default_toolset, defaults.agent.run_research.default_toolset),
|
||||
},
|
||||
},
|
||||
memory: {
|
||||
auto_commit: booleanValue(memory.auto_commit, defaults.memory.auto_commit),
|
||||
},
|
||||
scan: {
|
||||
enrichment: parsedScanEnrichment,
|
||||
relationships: parsedScanRelationships,
|
||||
},
|
||||
};
|
||||
return { ok: false, issues: collectIssues(result.error, parsed) };
|
||||
}
|
||||
|
||||
export function serializeKtxProjectConfig(config: KtxProjectConfig): string {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
export type {
|
||||
KtxConfigIssue,
|
||||
KtxConfigValidation,
|
||||
KtxProjectConfig,
|
||||
KtxProjectConnectionConfig,
|
||||
KtxProjectEmbeddingConfig,
|
||||
|
|
@ -10,6 +12,7 @@ export {
|
|||
buildDefaultKtxProjectConfig,
|
||||
parseKtxProjectConfig,
|
||||
serializeKtxProjectConfig,
|
||||
validateKtxProjectConfig,
|
||||
} from './config.js';
|
||||
export type { LocalGitFileStoreDeps } from './local-git-file-store.js';
|
||||
export { LocalGitFileStore } from './local-git-file-store.js';
|
||||
|
|
|
|||
|
|
@ -50,8 +50,8 @@ describe('standalone example docs', () => {
|
|||
config,
|
||||
/path: \.\.\/\.\.\/packages\/context\/test\/fixtures\/relationship-benchmarks\/orbit_style_product_no_declared_constraints\/data\.sqlite/,
|
||||
);
|
||||
assert.match(config, /llm_proposals: false/);
|
||||
assert.match(config, /validation_required_for_manifest: true/);
|
||||
assert.match(config, /llmProposals: false/);
|
||||
assert.match(config, /validationRequiredForManifest: true/);
|
||||
});
|
||||
|
||||
it('documents the Postgres historic SQL smoke example', async () => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue