mirror of
https://github.com/Kaelio/ktx.git
synced 2026-07-01 08:59:39 +02:00
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
393 lines
18 KiB
TypeScript
393 lines
18 KiB
TypeScript
import { KTX_MODEL_ROLES } from '../../llm/types.js';
|
||
import YAML from 'yaml';
|
||
import * as z from 'zod';
|
||
import { connectionConfigSchema } from './driver-schemas.js';
|
||
|
||
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway', 'claude-code'] as const;
|
||
const KTX_EMBEDDING_BACKENDS = ['none', 'openai', 'sentence-transformers'] as const;
|
||
const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
|
||
const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
|
||
const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'] as const;
|
||
const KTX_STORAGE_STATES = ['sqlite', 'postgres'] as const;
|
||
const KTX_SEARCH_BACKENDS = ['sqlite-fts5', 'postgres-hybrid'] as const;
|
||
|
||
const DEPRECATED_KEY_HINTS: Record<string, string> = {
|
||
'llm.provider.provider': 'use llm.provider.backend',
|
||
'ingest.llm': 'use top-level llm.provider, llm.models, and ingest.workUnits',
|
||
'ingest.embeddings.provider': 'use ingest.embeddings.backend',
|
||
'scan.enrichment.backend': 'use scan.enrichment.mode',
|
||
'scan.enrichment.llm': 'use top-level llm.provider and llm.models',
|
||
'scan.enrichment.embeddings.provider': 'use scan.enrichment.embeddings.backend',
|
||
};
|
||
|
||
const apiCredentialsSchema = z
|
||
.strictObject({
|
||
api_key: z.string().min(1).optional().describe('API key for the provider. Read from this value or the provider-specific environment variable.'),
|
||
base_url: z.string().min(1).optional().describe('Override the provider\'s default API base URL (e.g. a proxy or self-hosted gateway).'),
|
||
})
|
||
.describe('API credentials block: optional key and base URL for an LLM or embedding provider.');
|
||
|
||
const vertexProviderSchema = z
|
||
.strictObject({
|
||
project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'),
|
||
location: z.string().min(1).describe('Vertex AI region (e.g. "us-east5"). Required whenever the vertex provider block is present.'),
|
||
})
|
||
.describe('Google Vertex AI provider configuration.');
|
||
|
||
const sentenceTransformersSchema = z
|
||
.strictObject({
|
||
base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) when the `ktx` CLI is expected to start and manage a local daemon for this project; programmatic consumers must populate it explicitly.'),
|
||
pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'),
|
||
})
|
||
.describe('Sentence-transformers embedding server configuration.');
|
||
|
||
const llmProviderSchema = z
|
||
.strictObject({
|
||
backend: z
|
||
.enum(KTX_LLM_BACKENDS)
|
||
.default('none')
|
||
.describe(
|
||
'LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block; "claude-code" uses the local Claude Code session.',
|
||
),
|
||
vertex: vertexProviderSchema.optional().describe('Vertex AI credentials, used when backend is "vertex".'),
|
||
anthropic: apiCredentialsSchema.optional().describe('Anthropic API credentials, used when backend is "anthropic".'),
|
||
gateway: apiCredentialsSchema.optional().describe('AI Gateway credentials, used when backend is "gateway".'),
|
||
})
|
||
.describe('LLM provider selection and credentials.');
|
||
|
||
const promptCachingSchema = z
|
||
.strictObject({
|
||
enabled: z.boolean().optional().describe('Master switch for Anthropic-style prompt caching. When omitted, the backend\'s default applies.'),
|
||
systemTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the system prompt segment ("5m" or "1h").'),
|
||
toolsTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the tools/schema segment ("5m" or "1h").'),
|
||
historyTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for conversation-history cache breakpoints ("5m" or "1h").'),
|
||
vertexFallbackTo5m: z.boolean().optional().describe('When true, transparently downgrade 1h TTLs to 5m on Vertex, which does not support 1h caching.'),
|
||
})
|
||
.describe('Prompt-caching tunables for Anthropic-compatible providers.');
|
||
|
||
const llmSchema = z
|
||
.strictObject({
|
||
provider: llmProviderSchema.prefault({}).describe('LLM provider backend and credentials.'),
|
||
models: z
|
||
.partialRecord(z.enum(KTX_MODEL_ROLES), z.string().min(1))
|
||
.default({})
|
||
.describe('Per-role model overrides keyed by KTX model role (e.g. "default", "triage"). Values are provider-specific model identifiers.'),
|
||
promptCaching: promptCachingSchema.optional().describe('Optional prompt-caching tunables.'),
|
||
})
|
||
.describe('LLM provider, per-role model overrides, and prompt-caching tunables.');
|
||
|
||
const embeddingSchema = z
|
||
.strictObject({
|
||
backend: z
|
||
.enum(KTX_EMBEDDING_BACKENDS)
|
||
.default('none')
|
||
.describe('Embedding backend. "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
|
||
model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small").'),
|
||
dimensions: z
|
||
.int()
|
||
.positive()
|
||
.default(8)
|
||
.describe(
|
||
'Embedding vector dimensionality. The default value 8 is a placeholder that is only valid alongside backend: none; ' +
|
||
'before switching backend to openai/sentence-transformers, set this explicitly to match the chosen model ' +
|
||
'(e.g. 384 for all-MiniLM-L6-v2, 1536 for text-embedding-3-small).',
|
||
),
|
||
openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
|
||
sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
|
||
batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'),
|
||
})
|
||
.describe('Embedding backend, model, and provider credentials.');
|
||
|
||
const workUnitsSchema = z
|
||
.strictObject({
|
||
stepBudget: z.int().positive().default(40).describe('Maximum number of agent steps allowed per work unit before it is force-terminated.'),
|
||
maxConcurrency: z.int().positive().default(1).describe('Maximum number of work units run concurrently during ingest.'),
|
||
failureMode: z
|
||
.enum(KTX_WORK_UNIT_FAILURE_MODES)
|
||
.default('continue')
|
||
.describe('Behavior when a work unit fails: "abort" stops the whole ingest run; "continue" records the failure and keeps going.'),
|
||
})
|
||
.describe('Concurrency and failure handling for ingest work units.');
|
||
|
||
const ingestSchema = z
|
||
.strictObject({
|
||
adapters: z
|
||
.array(z.string().min(1))
|
||
.default([])
|
||
.describe('Ingest adapter identifiers to run (e.g. "metabase", "looker", "historic-sql"). Empty array means no adapters are run.'),
|
||
embeddings: embeddingSchema
|
||
.prefault({ backend: 'none' })
|
||
.describe('Embedding configuration used when ingest adapters need to embed documents.'),
|
||
workUnits: workUnitsSchema.prefault({}).describe('Concurrency and failure handling for ingest work units.'),
|
||
})
|
||
.describe('Ingest pipeline configuration: adapters, embeddings, and work-unit policy.');
|
||
|
||
const scanEnrichmentSchema = z
|
||
.strictObject({
|
||
mode: z
|
||
.enum(KTX_ENRICHMENT_MODES)
|
||
.default('none')
|
||
.describe('Column/table enrichment mode. "none" disables enrichment; "deterministic" uses local heuristics; "llm" calls the configured LLM provider.'),
|
||
embeddings: embeddingSchema.optional().describe('Optional embedding override for enrichment-time vectorization. Falls back to ingest.embeddings when omitted.'),
|
||
})
|
||
.describe('Schema-scan enrichment: how columns and tables are described.');
|
||
|
||
const scanRelationshipsSchema = z
|
||
.strictObject({
|
||
enabled: z.boolean().default(true).describe('Master switch for relationship discovery during scan.'),
|
||
llmProposals: z.boolean().default(true).describe('When true, propose relationships using the configured LLM in addition to deterministic candidates.'),
|
||
validationRequiredForManifest: z
|
||
.boolean()
|
||
.default(true)
|
||
.describe('When true, only relationships that pass database-side validation are written to the manifest.'),
|
||
acceptThreshold: z
|
||
.number()
|
||
.min(0)
|
||
.max(1)
|
||
.default(0.85)
|
||
.describe('Confidence score (0–1) at or above which an LLM-proposed relationship is auto-accepted into the manifest.'),
|
||
reviewThreshold: z
|
||
.number()
|
||
.min(0)
|
||
.max(1)
|
||
.default(0.55)
|
||
.describe('Confidence score (0–1) at or above which a proposal is surfaced for human review (but not auto-accepted).'),
|
||
maxLlmTablesPerBatch: z
|
||
.int()
|
||
.positive()
|
||
.default(40)
|
||
.describe('Maximum number of tables included in a single LLM relationship-proposal batch.'),
|
||
maxCandidatesPerColumn: z
|
||
.int()
|
||
.positive()
|
||
.default(25)
|
||
.describe('Maximum number of candidate join partners considered per column during relationship discovery.'),
|
||
profileSampleRows: z.int().positive().default(10000).describe('Number of rows sampled per table when profiling values for relationship inference.'),
|
||
validationConcurrency: z.int().positive().default(4).describe('Number of relationship validation queries run in parallel against the database.'),
|
||
validationBudget: z
|
||
.union([z.literal('all'), z.int().nonnegative()])
|
||
.optional()
|
||
.describe('Cap on validation queries per scan run. Use "all" for unlimited, an integer for a hard cap, or omit for the runtime default.'),
|
||
})
|
||
.describe('Schema-scan relationship discovery and validation tunables.');
|
||
|
||
const scanSchema = z
|
||
.strictObject({
|
||
enrichment: scanEnrichmentSchema.prefault({}).describe('Column/table enrichment configuration.'),
|
||
relationships: scanRelationshipsSchema.prefault({}).describe('Relationship discovery and validation configuration.'),
|
||
})
|
||
.describe('Schema-scan configuration: enrichment and relationship discovery.');
|
||
|
||
const setupSchema = z
|
||
.strictObject({
|
||
database_connection_ids: z
|
||
.array(z.string().min(1))
|
||
.default([])
|
||
.describe('Connection IDs (keys of the top-level `connections` map) that the setup wizard treats as the project\'s primary databases.'),
|
||
completed_steps: z
|
||
.unknown()
|
||
.optional()
|
||
.describe('Deprecated. Accepted for backward compatibility but ignored; KTX no longer tracks setup progress here.'),
|
||
})
|
||
.transform(({ database_connection_ids }) => ({ database_connection_ids }))
|
||
.describe('Setup-wizard state captured during `ktx setup`.');
|
||
|
||
const storageGitSchema = z
|
||
.strictObject({
|
||
auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits state changes to the local Git-backed store.'),
|
||
author: z
|
||
.string()
|
||
.min(1)
|
||
.default('ktx <ktx@example.com>')
|
||
.describe('Git author identity used for auto-commits, in standard "Name <email>" form.'),
|
||
})
|
||
.describe('Git-backed storage commit policy.');
|
||
|
||
const storageSchema = z
|
||
.strictObject({
|
||
state: z
|
||
.enum(KTX_STORAGE_STATES)
|
||
.default('sqlite')
|
||
.describe('Backend for KTX state storage. "sqlite" uses .ktx/db.sqlite; "postgres" expects a configured Postgres connection.'),
|
||
search: z
|
||
.enum(KTX_SEARCH_BACKENDS)
|
||
.default('sqlite-fts5')
|
||
.describe('Backend for search indexes. "sqlite-fts5" uses SQLite FTS5; "postgres-hybrid" uses Postgres lexical + vector hybrid search.'),
|
||
git: storageGitSchema.prefault({}).describe('Git-backed storage commit policy.'),
|
||
})
|
||
.describe('Storage backends and commit policy for KTX state and search indexes.');
|
||
|
||
const connectionSchema = connectionConfigSchema;
|
||
|
||
const agentSchema = z
|
||
.strictObject({
|
||
run_research: z
|
||
.strictObject({
|
||
enabled: z.boolean().default(false).describe('Master switch for the research agent.'),
|
||
max_iterations: z
|
||
.number()
|
||
.int()
|
||
.nonnegative()
|
||
.default(20)
|
||
.describe('Maximum number of tool-call iterations the research agent may take per run.'),
|
||
default_toolset: z
|
||
.array(z.string().min(1))
|
||
.default(['sl_query', 'wiki_search', 'sl_read_source'])
|
||
.describe('Default list of tool identifiers exposed to the research agent.'),
|
||
})
|
||
.prefault({})
|
||
.describe('Research-agent configuration.'),
|
||
})
|
||
.describe('Agent feature configuration.');
|
||
|
||
const memorySchema = z
|
||
.strictObject({
|
||
auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits memory updates to the Git-backed store.'),
|
||
})
|
||
.describe('Memory subsystem configuration.');
|
||
|
||
const ktxProjectConfigSchema = z
|
||
.strictObject({
|
||
setup: setupSchema.optional().describe('Setup-wizard state. Written by `ktx setup`; may be omitted.'),
|
||
connections: z
|
||
.record(z.string(), connectionSchema)
|
||
.default({})
|
||
.describe('Map of connection ID to connector configuration. Keys are user-chosen names referenced elsewhere in the config.'),
|
||
storage: storageSchema.prefault({}).describe('Storage backends and commit policy for KTX state and search indexes.'),
|
||
llm: llmSchema.prefault({}).describe('LLM provider, per-role model overrides, and prompt-caching tunables.'),
|
||
ingest: ingestSchema.prefault({}).describe('Ingest pipeline configuration.'),
|
||
agent: agentSchema.prefault({}).describe('Agent feature configuration.'),
|
||
memory: memorySchema.prefault({}).describe('Memory subsystem configuration.'),
|
||
scan: scanSchema.prefault({}).describe('Schema-scan configuration: enrichment and relationship discovery.'),
|
||
})
|
||
.describe('Configuration schema for KTX project files (ktx.yaml).');
|
||
|
||
export type KtxProjectConfig = z.infer<typeof ktxProjectConfigSchema>;
|
||
export type KtxProjectLlmConfig = z.infer<typeof llmSchema>;
|
||
export type KtxProjectEmbeddingConfig = z.infer<typeof embeddingSchema>;
|
||
export type KtxScanEnrichmentConfig = z.infer<typeof scanEnrichmentSchema>;
|
||
export type KtxScanRelationshipConfig = z.infer<typeof scanRelationshipsSchema>;
|
||
export type KtxProjectConnectionConfig = z.infer<typeof connectionSchema>;
|
||
|
||
export interface KtxConfigIssue {
|
||
path: string;
|
||
message: string;
|
||
fix?: string;
|
||
}
|
||
|
||
export interface KtxConfigValidation {
|
||
ok: boolean;
|
||
issues: KtxConfigIssue[];
|
||
}
|
||
|
||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||
}
|
||
|
||
function dottedPath(path: ReadonlyArray<PropertyKey>): string {
|
||
return path.map((segment) => String(segment)).join('.');
|
||
}
|
||
|
||
function valueAtPath(root: unknown, path: ReadonlyArray<PropertyKey>): unknown {
|
||
let cursor: unknown = root;
|
||
for (const segment of path) {
|
||
if (cursor === null || typeof cursor !== 'object') return undefined;
|
||
cursor = (cursor as Record<PropertyKey, unknown>)[segment];
|
||
}
|
||
return cursor;
|
||
}
|
||
|
||
function formatIssue(issue: z.core.$ZodIssue, input: unknown): KtxConfigIssue[] {
|
||
const basePath = dottedPath(issue.path);
|
||
|
||
if (issue.code === 'unrecognized_keys') {
|
||
const keys = (issue as { keys?: readonly string[] }).keys ?? [];
|
||
return keys.map((key) => {
|
||
const fullPath = basePath.length > 0 ? `${basePath}.${key}` : key;
|
||
const hint = DEPRECATED_KEY_HINTS[fullPath];
|
||
if (hint !== undefined) {
|
||
return { path: fullPath, message: `Unsupported ${fullPath}: ${hint}`, fix: hint };
|
||
}
|
||
return { path: fullPath, message: `Unsupported ${fullPath}: unknown field` };
|
||
});
|
||
}
|
||
|
||
const lastSegment = issue.path[issue.path.length - 1];
|
||
if (lastSegment === 'backend' && (issue.code === 'invalid_value' || issue.code === 'invalid_type')) {
|
||
const value = valueAtPath(input, issue.path);
|
||
return [{ path: basePath, message: `Unsupported ${basePath}: ${String(value)}` }];
|
||
}
|
||
|
||
return [{ path: basePath, message: basePath.length > 0 ? `${basePath}: ${issue.message}` : issue.message }];
|
||
}
|
||
|
||
function collectIssues(error: z.ZodError, input: unknown): KtxConfigIssue[] {
|
||
return error.issues.flatMap((issue) => formatIssue(issue, input));
|
||
}
|
||
|
||
function formatZodError(error: z.ZodError, input: unknown): string {
|
||
return collectIssues(error, input)
|
||
.map((issue) => issue.message)
|
||
.join('\n');
|
||
}
|
||
|
||
export function buildDefaultKtxProjectConfig(): KtxProjectConfig {
|
||
return ktxProjectConfigSchema.parse({});
|
||
}
|
||
|
||
export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
|
||
const parsed = YAML.parse(raw) as unknown;
|
||
if (!isRecord(parsed)) {
|
||
throw new Error('ktx.yaml must contain a YAML object');
|
||
}
|
||
const result = ktxProjectConfigSchema.safeParse(parsed);
|
||
if (!result.success) {
|
||
throw new Error(formatZodError(result.error, parsed));
|
||
}
|
||
return result.data;
|
||
}
|
||
|
||
export function validateKtxProjectConfig(raw: string): KtxConfigValidation {
|
||
let parsed: unknown;
|
||
try {
|
||
parsed = YAML.parse(raw);
|
||
} catch (error) {
|
||
const message = error instanceof Error ? error.message : String(error);
|
||
return { ok: false, issues: [{ path: '', message: `ktx.yaml parse error: ${message}` }] };
|
||
}
|
||
if (!isRecord(parsed)) {
|
||
return { ok: false, issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }] };
|
||
}
|
||
const result = ktxProjectConfigSchema.safeParse(parsed);
|
||
if (result.success) {
|
||
return { ok: true, issues: [] };
|
||
}
|
||
return { ok: false, issues: collectIssues(result.error, parsed) };
|
||
}
|
||
|
||
export function generateKtxProjectConfigJsonSchema(): Record<string, unknown> {
|
||
const schema = z.toJSONSchema(ktxProjectConfigSchema, {
|
||
target: 'draft-7',
|
||
io: 'input',
|
||
}) as Record<string, unknown>;
|
||
return {
|
||
$schema: 'http://json-schema.org/draft-07/schema#',
|
||
$id: 'https://ktx.dev/schemas/ktx-project-config.json',
|
||
title: 'ktx.yaml',
|
||
...schema,
|
||
};
|
||
}
|
||
|
||
export function serializeKtxProjectConfig(config: KtxProjectConfig): string {
|
||
const serializedConfig =
|
||
config.ingest.adapters.length === 0
|
||
? {
|
||
...config,
|
||
ingest: {
|
||
embeddings: config.ingest.embeddings,
|
||
workUnits: config.ingest.workUnits,
|
||
},
|
||
}
|
||
: config;
|
||
return `${YAML.stringify(serializedConfig, { indent: 2, lineWidth: 0 }).trimEnd()}\n`;
|
||
}
|