mirror of
https://github.com/Kaelio/ktx.git
synced 2026-07-04 10:52:13 +02:00
chore(workspace): gate dead-code with knip production mode (#196)
* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm * refactor(workspace): rewrite @ktx/llm imports to relative paths * refactor(workspace): fold internal packages into cli * chore(workspace): gate dead-code with knip production mode Turn on production-mode knip plus an autofix run in pre-commit and the `pnpm dead-code` script, document the `/** @internal */` convention for test-only exports in AGENTS.md, annotate test-only exports across the CLI with that JSDoc, and drop dead exports/wrappers the new gate surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`, `createLocalScanEnrichmentProvidersFromConfig`, `PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports). Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit production entries so cross-package barrel leaks are caught. * refactor(cli): delete internal barrel index.ts files The 34 `index.ts` re-export barrels inside `packages/cli/src/` were holdovers from the pre-fold multi-workspace structure. Post-fold-in they served no production purpose: external consumers go through the single package main entry, and in-repo callers mostly imported through them only because the path was short. Internally, knip flagged most barrel re-exports as production-dead (only reached via tests). This change: - Deletes every internal barrel except `packages/cli/src/index.ts` (the published package entry). - Rewrites ~270 source/test files to import each name directly from the file that defines it. - Moves `tools/warehouse-verification/index.ts` to `create-warehouse-verification-tools.ts` (the function it defined locally) and updates its single consumer. - Renames `search/backend-conformance.ts` → `.test-utils.ts` to match the existing test-helper file convention. - Deletes 13 dead test-only chains (dbt-descriptions/*, live-database/extracted-schema, live-database/structural-sync, relationship-* feedback/review chain) plus their tests and a cascading orphan integration test. - Updates test mocks that pointed at deleted barrel paths (notion-client, connector barrels in scan/local-scan-connectors tests) to mock the source files instead. - Points the maintainer benchmark script (`scripts/relationship-benchmark-report.mjs`) at source files instead of `dist/context/scan/index.js`. - Drops the barrel `!` entries from `knip.json`; adds explicit production entries only for the benchmark code reached via dist by the maintainer script. Net: 413 files changed, ~1.2k insertions, ~9.4k deletions. `pnpm run dead-code` (Biome + knip default + knip production) and `pnpm run type-check` are clean; 2277 tests pass. * refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly Promote the CLI workspace package to the public name `@kaelio/ktx` and drop the separate `scripts/build-public-npm-package.mjs` wrapper. The CLI package is now publishable in place (`publishConfig.access: public`, `provenance: true`), so artifact packing uses `pnpm pack` against `packages/cli/` instead of assembling a parallel package tree. Updates all workspace filter invocations, docs, tests, and release readiness checks to reference the new package name, and folds the tarball-name helper into `scripts/public-npm-release-metadata.mjs`. * docs: align "agent clients" and "data agents" terminology Replace "client agents" with "agent clients" and "database agents" with "data agents" across AGENTS.md, README.md, the docs-site copy, and the matching setup-agents test description, matching the canonical vocabulary in docs/terminology.md. Also moves packages/cli/tsconfig.json's tsBuildInfoFile from node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive node_modules reinstalls. * refactor(release): single source of truth for package version Make packages/cli/package.json the single source of truth for the @kaelio/ktx version. publicNpmPackageVersion() now reads it directly, so artifact filenames, release-readiness checks, and the Python wheel version all derive from one field. The duplicate release-policy.json.publicNpmPackageVersion is removed. Previously the two fields could drift: tarballs were named kaelio-ktx-0.4.1.tgz while internally containing @kaelio/ktx@0.0.0-private. - update-public-release-version.mjs rewrites both Python pyproject.toml files (ktx-daemon, ktx-sl) alongside the npm package.jsons, normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2). - semantic-release-config.cjs adds the two pyproject.toml files to @semantic-release/git assets so the release commit back to main carries every version source in lockstep. - The six "?? '0.0.0-private'" fallback literals across the CLI are replaced with "?? getKtxCliPackageInfo().version", and createDefaultKtxMcpServer makes its version arg required. - docs/release.md describes the actual commit-back model: the dev tree always reflects the most recent release; no sentinel pin to maintain. Verified: pnpm run artifacts:build now produces kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with @kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and 2287 vitests + 173 script tests pass. * refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and scan command entrypoints so tests can stub them, and teach resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime feature when ktx.yaml selects sentence-transformers. * chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal Both symbols are consumed only by status-project.test.ts. Annotating with /** @internal */ keeps knip's production-mode check clean without changing runtime behavior. * fix(cli): use real package metadata in print-command-tree The stubbed package name embedded a forbidden product identifier that tripped the boundary check in CI. Read the metadata from package.json instead — keeps the rendered tree unchanged and removes a duplicate source of truth. * feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer source counts, computed with `SUM(embedding_json IS NOT NULL)` over `knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to "Wiki" (canonical per `docs/terminology.md`) and rename the matching `localStats.knowledgePages` field to `localStats.wikiPages`. Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those duplicated the per-surface rows above. Disk now reports only actual byte usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` / `semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry` helpers, and the `filter` arg on `summarizeDir` are removed.
This commit is contained in:
parent
a1cfb03d73
commit
2366b00301
1002 changed files with 2286 additions and 12051 deletions
561
packages/cli/src/context/project/config.test.ts
Normal file
561
packages/cli/src/context/project/config.test.ts
Normal file
|
|
@ -0,0 +1,561 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
buildDefaultKtxProjectConfig,
|
||||
generateKtxProjectConfigJsonSchema,
|
||||
parseKtxProjectConfig,
|
||||
serializeKtxProjectConfig,
|
||||
validateKtxProjectConfig,
|
||||
} from './config.js';
|
||||
|
||||
describe('KTX project config', () => {
|
||||
it.each(['status', 'replay', 'run', 'watch'])('accepts former ingest subcommand name "%s" as a connection id', (connectionId) => {
|
||||
expect(
|
||||
parseKtxProjectConfig(`
|
||||
connections:
|
||||
${connectionId}:
|
||||
driver: postgres
|
||||
`),
|
||||
).toMatchObject({
|
||||
connections: {
|
||||
[connectionId]: { driver: 'postgres' },
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('builds the default standalone project config', () => {
|
||||
expect(buildDefaultKtxProjectConfig()).toEqual({
|
||||
connections: {},
|
||||
storage: {
|
||||
state: 'sqlite',
|
||||
search: 'sqlite-fts5',
|
||||
git: {
|
||||
auto_commit: true,
|
||||
author: 'ktx <ktx@example.com>',
|
||||
},
|
||||
},
|
||||
llm: {
|
||||
provider: {
|
||||
backend: 'none',
|
||||
},
|
||||
models: {},
|
||||
},
|
||||
ingest: {
|
||||
adapters: [],
|
||||
embeddings: {
|
||||
backend: 'none',
|
||||
dimensions: 8,
|
||||
},
|
||||
workUnits: {
|
||||
stepBudget: 40,
|
||||
maxConcurrency: 1,
|
||||
failureMode: 'continue',
|
||||
},
|
||||
},
|
||||
agent: {
|
||||
run_research: {
|
||||
enabled: false,
|
||||
max_iterations: 20,
|
||||
default_toolset: ['sl_query', 'wiki_search', 'sl_read_source'],
|
||||
},
|
||||
},
|
||||
memory: {
|
||||
auto_commit: true,
|
||||
},
|
||||
scan: {
|
||||
enrichment: {
|
||||
mode: 'none',
|
||||
},
|
||||
relationships: {
|
||||
enabled: true,
|
||||
llmProposals: true,
|
||||
validationRequiredForManifest: true,
|
||||
acceptThreshold: 0.85,
|
||||
reviewThreshold: 0.55,
|
||||
maxLlmTablesPerBatch: 40,
|
||||
maxCandidatesPerColumn: 25,
|
||||
profileSampleRows: 10000,
|
||||
validationConcurrency: 4,
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('round-trips through YAML with stable defaults', () => {
|
||||
const serialized = serializeKtxProjectConfig(buildDefaultKtxProjectConfig());
|
||||
const parsed = parseKtxProjectConfig(serialized);
|
||||
|
||||
expect(serialized).not.toContain('project:');
|
||||
expect(serialized).not.toContain('live-database');
|
||||
expect(serialized).toContain(' embeddings:\n backend: none\n dimensions: 8');
|
||||
expect(parsed.ingest.adapters).toEqual([]);
|
||||
expect(parsed.ingest.embeddings).toEqual({
|
||||
backend: 'none',
|
||||
dimensions: 8,
|
||||
});
|
||||
});
|
||||
|
||||
it('parses and serializes setup warehouse metadata without setup progress', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
setup:
|
||||
database_connection_ids:
|
||||
- warehouse
|
||||
- analytics
|
||||
connections:
|
||||
warehouse:
|
||||
driver: postgres
|
||||
url: env:WAREHOUSE_URL
|
||||
`);
|
||||
|
||||
expect(config.setup).toEqual({
|
||||
database_connection_ids: ['warehouse', 'analytics'],
|
||||
});
|
||||
|
||||
const serialized = serializeKtxProjectConfig(config);
|
||||
expect(serialized).toContain('setup:');
|
||||
expect(serialized).toContain('database_connection_ids:');
|
||||
expect(serialized).not.toContain('completed_steps:');
|
||||
});
|
||||
|
||||
it('parses global direct Anthropic LLM config', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
llm:
|
||||
provider:
|
||||
backend: anthropic
|
||||
anthropic:
|
||||
api_key: env:ANTHROPIC_API_KEY
|
||||
models:
|
||||
default: claude-sonnet-4-6
|
||||
triage: claude-haiku-4-5
|
||||
repair: claude-opus-4-7
|
||||
promptCaching:
|
||||
enabled: false
|
||||
ingest:
|
||||
workUnits:
|
||||
stepBudget: 30
|
||||
maxConcurrency: 2
|
||||
failureMode: abort
|
||||
`);
|
||||
|
||||
expect(config.llm).toMatchObject({
|
||||
provider: {
|
||||
backend: 'anthropic',
|
||||
anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret
|
||||
},
|
||||
models: {
|
||||
default: 'claude-sonnet-4-6',
|
||||
triage: 'claude-haiku-4-5',
|
||||
repair: 'claude-opus-4-7',
|
||||
},
|
||||
promptCaching: { enabled: false },
|
||||
});
|
||||
expect(config.ingest.workUnits).toEqual({
|
||||
stepBudget: 30,
|
||||
maxConcurrency: 2,
|
||||
failureMode: 'abort',
|
||||
});
|
||||
});
|
||||
|
||||
it('parses global Vertex LLM config', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
llm:
|
||||
provider:
|
||||
backend: vertex
|
||||
vertex:
|
||||
project: local-gcp-project
|
||||
location: us-east5
|
||||
models:
|
||||
default: claude-sonnet-4-6
|
||||
triage: claude-haiku-4-5
|
||||
`);
|
||||
|
||||
expect(config.llm.provider.backend).toBe('vertex');
|
||||
expect(config.llm.provider.vertex).toEqual({ project: 'local-gcp-project', location: 'us-east5' });
|
||||
expect(config.llm.models).toEqual({
|
||||
default: 'claude-sonnet-4-6',
|
||||
triage: 'claude-haiku-4-5',
|
||||
});
|
||||
});
|
||||
|
||||
it('requires a non-empty Vertex location when the Vertex provider block is present', () => {
|
||||
const yaml = `
|
||||
llm:
|
||||
provider:
|
||||
backend: vertex
|
||||
vertex:
|
||||
project: local-gcp-project
|
||||
`;
|
||||
|
||||
expect(() => parseKtxProjectConfig(yaml)).toThrow(/llm\.provider\.vertex\.location/);
|
||||
|
||||
const validation = validateKtxProjectConfig(yaml);
|
||||
expect(validation.ok).toBe(false);
|
||||
expect(validation.issues).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
path: 'llm.provider.vertex.location',
|
||||
}),
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('parses Claude Code as a first-class LLM backend', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
llm:
|
||||
provider:
|
||||
backend: claude-code
|
||||
models:
|
||||
default: sonnet
|
||||
triage: haiku
|
||||
candidateExtraction: sonnet
|
||||
curator: sonnet
|
||||
reconcile: sonnet
|
||||
repair: opus
|
||||
`);
|
||||
|
||||
expect(config.llm.provider.backend).toBe('claude-code');
|
||||
expect(config.llm.models).toEqual({
|
||||
default: 'sonnet',
|
||||
triage: 'haiku',
|
||||
candidateExtraction: 'sonnet',
|
||||
curator: 'sonnet',
|
||||
reconcile: 'sonnet',
|
||||
repair: 'opus',
|
||||
});
|
||||
});
|
||||
|
||||
it('parses gateway LLM, OpenAI scan embeddings, and sentence-transformers ingest embeddings', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
llm:
|
||||
provider:
|
||||
backend: gateway
|
||||
gateway:
|
||||
api_key: env:AI_GATEWAY_API_KEY
|
||||
base_url: https://gateway.example/v1
|
||||
models:
|
||||
default: anthropic/claude-sonnet-4-6
|
||||
ingest:
|
||||
embeddings:
|
||||
backend: sentence-transformers
|
||||
model: all-MiniLM-L6-v2
|
||||
dimensions: 384
|
||||
sentenceTransformers:
|
||||
base_url: http://127.0.0.1:18081
|
||||
pathPrefix: ""
|
||||
batchSize: 16
|
||||
scan:
|
||||
enrichment:
|
||||
mode: llm
|
||||
embeddings:
|
||||
backend: openai
|
||||
model: text-embedding-3-small
|
||||
dimensions: 1536
|
||||
openai:
|
||||
api_key: env:OPENAI_API_KEY
|
||||
batchSize: 32
|
||||
`);
|
||||
|
||||
expect(config.ingest.embeddings).toMatchObject({
|
||||
backend: 'sentence-transformers',
|
||||
model: 'all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
sentenceTransformers: { base_url: 'http://127.0.0.1:18081', pathPrefix: '' },
|
||||
batchSize: 16,
|
||||
});
|
||||
expect(config.llm.models.default).toBe('anthropic/claude-sonnet-4-6');
|
||||
expect(config.scan.enrichment.mode).toBe('llm');
|
||||
expect(config.scan.enrichment.embeddings?.dimensions).toBe(1536);
|
||||
});
|
||||
|
||||
it('parses scan relationship settings', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
scan:
|
||||
relationships:
|
||||
enabled: false
|
||||
llmProposals: false
|
||||
validationRequiredForManifest: true
|
||||
acceptThreshold: 0.91
|
||||
reviewThreshold: 0.61
|
||||
maxLlmTablesPerBatch: 12
|
||||
maxCandidatesPerColumn: 7
|
||||
profileSampleRows: 500
|
||||
validationConcurrency: 2
|
||||
validationBudget: 0
|
||||
`);
|
||||
|
||||
expect(config.scan.relationships).toEqual({
|
||||
enabled: false,
|
||||
llmProposals: false,
|
||||
validationRequiredForManifest: true,
|
||||
acceptThreshold: 0.91,
|
||||
reviewThreshold: 0.61,
|
||||
maxLlmTablesPerBatch: 12,
|
||||
maxCandidatesPerColumn: 7,
|
||||
profileSampleRows: 500,
|
||||
validationConcurrency: 2,
|
||||
validationBudget: 0,
|
||||
});
|
||||
expect(serializeKtxProjectConfig(config)).toContain('enabled: false');
|
||||
expect(serializeKtxProjectConfig(config)).toContain('llmProposals: false');
|
||||
expect(serializeKtxProjectConfig(config)).toContain('validationRequiredForManifest: true');
|
||||
expect(serializeKtxProjectConfig(config)).toContain('acceptThreshold: 0.91');
|
||||
expect(serializeKtxProjectConfig(config)).toContain('reviewThreshold: 0.61');
|
||||
expect(serializeKtxProjectConfig(config)).toContain('maxLlmTablesPerBatch: 12');
|
||||
expect(serializeKtxProjectConfig(config)).toContain('maxCandidatesPerColumn: 7');
|
||||
expect(serializeKtxProjectConfig(config)).toContain('profileSampleRows: 500');
|
||||
expect(serializeKtxProjectConfig(config)).toContain('validationConcurrency: 2');
|
||||
expect(serializeKtxProjectConfig(config)).toContain('validationBudget: 0');
|
||||
});
|
||||
|
||||
it('parses the scan relationship validation budget sentinel', () => {
|
||||
const config = parseKtxProjectConfig(`
|
||||
scan:
|
||||
relationships:
|
||||
validationBudget: all
|
||||
`);
|
||||
|
||||
expect(config.scan.relationships.validationBudget).toBe('all');
|
||||
expect(serializeKtxProjectConfig(config)).toContain('validationBudget: all');
|
||||
});
|
||||
|
||||
it('rejects out-of-range scan relationship numeric settings', () => {
|
||||
const yaml = `
|
||||
scan:
|
||||
relationships:
|
||||
acceptThreshold: 2
|
||||
reviewThreshold: -1
|
||||
maxLlmTablesPerBatch: 0
|
||||
maxCandidatesPerColumn: -4
|
||||
profileSampleRows: 0
|
||||
validationConcurrency: 0
|
||||
validationBudget: 1.5
|
||||
`;
|
||||
expect(() => parseKtxProjectConfig(yaml)).toThrow(/scan\.relationships\.acceptThreshold/);
|
||||
|
||||
const validation = validateKtxProjectConfig(yaml);
|
||||
expect(validation.ok).toBe(false);
|
||||
const paths = validation.issues.map((issue) => issue.path);
|
||||
expect(paths).toEqual(
|
||||
expect.arrayContaining([
|
||||
'scan.relationships.acceptThreshold',
|
||||
'scan.relationships.reviewThreshold',
|
||||
'scan.relationships.maxLlmTablesPerBatch',
|
||||
'scan.relationships.maxCandidatesPerColumn',
|
||||
'scan.relationships.profileSampleRows',
|
||||
'scan.relationships.validationConcurrency',
|
||||
'scan.relationships.validationBudget',
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects invalid scan relationship validation budget strings', () => {
|
||||
const yaml = `
|
||||
scan:
|
||||
relationships:
|
||||
validationBudget: infinite
|
||||
`;
|
||||
expect(() => parseKtxProjectConfig(yaml)).toThrow(/scan\.relationships\.validationBudget/);
|
||||
});
|
||||
|
||||
it('rejects unsupported local LLM and embedding fields', () => {
|
||||
expect(() =>
|
||||
parseKtxProjectConfig(`
|
||||
ingest:
|
||||
llm:
|
||||
backend: anthropic
|
||||
`),
|
||||
).toThrow('Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits');
|
||||
|
||||
expect(() =>
|
||||
parseKtxProjectConfig(`
|
||||
scan:
|
||||
enrichment:
|
||||
backend: gateway
|
||||
`),
|
||||
).toThrow('Unsupported scan.enrichment.backend: use scan.enrichment.mode');
|
||||
|
||||
expect(() =>
|
||||
parseKtxProjectConfig(`
|
||||
scan:
|
||||
enrichment:
|
||||
mode: llm
|
||||
llm:
|
||||
backend: gateway
|
||||
`),
|
||||
).toThrow('Unsupported scan.enrichment.llm: use top-level llm.provider and llm.models');
|
||||
|
||||
expect(() =>
|
||||
parseKtxProjectConfig(`
|
||||
ingest:
|
||||
embeddings:
|
||||
provider: gateway
|
||||
max_batch_size: 32
|
||||
`),
|
||||
).toThrow('Unsupported ingest.embeddings.provider');
|
||||
});
|
||||
|
||||
it('rejects gateway embedding configs', () => {
|
||||
expect(() =>
|
||||
parseKtxProjectConfig(`
|
||||
ingest:
|
||||
embeddings:
|
||||
backend: gateway
|
||||
model: provider/text-embedding
|
||||
dimensions: 1536
|
||||
`),
|
||||
).toThrow('Unsupported ingest.embeddings.backend: gateway');
|
||||
|
||||
expect(() =>
|
||||
parseKtxProjectConfig(`
|
||||
scan:
|
||||
enrichment:
|
||||
mode: llm
|
||||
embeddings:
|
||||
backend: gateway
|
||||
model: provider/text-embedding
|
||||
dimensions: 1536
|
||||
`),
|
||||
).toThrow('Unsupported scan.enrichment.embeddings.backend: gateway');
|
||||
});
|
||||
|
||||
it('fills optional sections when a minimal config is loaded', () => {
|
||||
const config = parseKtxProjectConfig('{}\n');
|
||||
|
||||
expect(config).toEqual(buildDefaultKtxProjectConfig());
|
||||
expect(config.ingest.embeddings).toEqual({
|
||||
backend: 'none',
|
||||
dimensions: 8,
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects configs without an object root', () => {
|
||||
expect(() => parseKtxProjectConfig('- nope\n')).toThrow('ktx.yaml must contain a YAML object');
|
||||
});
|
||||
|
||||
it('accepts configs without a project name', () => {
|
||||
expect(parseKtxProjectConfig('connections: {}\n')).toMatchObject({
|
||||
connections: {},
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects unknown top-level fields under strict mode', () => {
|
||||
expect(() =>
|
||||
parseKtxProjectConfig(`
|
||||
storrage:
|
||||
state: sqlite
|
||||
`),
|
||||
).toThrow(/Unsupported storrage/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('validateKtxProjectConfig', () => {
|
||||
it('returns ok: true with no issues for a valid config', () => {
|
||||
const result = validateKtxProjectConfig('connections: {}\n');
|
||||
expect(result).toEqual({ ok: true, issues: [] });
|
||||
});
|
||||
|
||||
it('collects every schema issue without throwing', () => {
|
||||
const result = validateKtxProjectConfig(`
|
||||
storage:
|
||||
search: not-a-real-backend
|
||||
scan:
|
||||
relationships:
|
||||
acceptThreshold: 1.7
|
||||
`);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
const paths = result.issues.map((issue) => issue.path);
|
||||
expect(paths).toEqual(
|
||||
expect.arrayContaining([
|
||||
'storage.search',
|
||||
'scan.relationships.acceptThreshold',
|
||||
]),
|
||||
);
|
||||
});
|
||||
|
||||
it('attaches migration hints for known deprecated keys', () => {
|
||||
const result = validateKtxProjectConfig(`
|
||||
ingest:
|
||||
llm:
|
||||
backend: anthropic
|
||||
scan:
|
||||
enrichment:
|
||||
backend: none
|
||||
`);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
const findIssue = (path: string) => result.issues.find((issue) => issue.path === path);
|
||||
expect(findIssue('ingest.llm')).toMatchObject({
|
||||
message: 'Unsupported ingest.llm: use top-level llm.provider, llm.models, and ingest.workUnits',
|
||||
fix: 'use top-level llm.provider, llm.models, and ingest.workUnits',
|
||||
});
|
||||
expect(findIssue('scan.enrichment.backend')).toMatchObject({
|
||||
message: 'Unsupported scan.enrichment.backend: use scan.enrichment.mode',
|
||||
fix: 'use scan.enrichment.mode',
|
||||
});
|
||||
});
|
||||
|
||||
it('reports YAML parse errors as a root-level issue', () => {
|
||||
const result = validateKtxProjectConfig(': not valid yaml :\n');
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.issues[0]?.path).toBe('');
|
||||
expect(result.issues[0]?.message).toMatch(/ktx\.yaml parse error/);
|
||||
});
|
||||
|
||||
it('reports a YAML scalar root as a single issue', () => {
|
||||
const result = validateKtxProjectConfig('- nope\n');
|
||||
expect(result).toEqual({
|
||||
ok: false,
|
||||
issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('generateKtxProjectConfigJsonSchema', () => {
|
||||
const schema = generateKtxProjectConfigJsonSchema();
|
||||
|
||||
it('emits draft-07 metadata', () => {
|
||||
expect(schema.$schema).toBe('http://json-schema.org/draft-07/schema#');
|
||||
expect(schema.$id).toBe('https://ktx.dev/schemas/ktx-project-config.json');
|
||||
expect(schema.title).toBe('ktx.yaml');
|
||||
expect(schema.type).toBe('object');
|
||||
});
|
||||
|
||||
it('exposes every top-level ktx.yaml section under properties', () => {
|
||||
const properties = schema.properties as Record<string, unknown>;
|
||||
expect(Object.keys(properties).sort()).toEqual(['agent', 'connections', 'ingest', 'llm', 'memory', 'scan', 'setup', 'storage'].sort());
|
||||
});
|
||||
|
||||
it('does not require any top-level fields', () => {
|
||||
expect(schema.required).toBeUndefined();
|
||||
});
|
||||
|
||||
it('carries .describe() text on top-level fields', () => {
|
||||
const properties = schema.properties as Record<string, { description?: string }>;
|
||||
expect(properties.llm?.description).toMatch(/LLM/);
|
||||
expect(properties.scan?.description).toMatch(/Schema-scan/);
|
||||
});
|
||||
|
||||
it('propagates enum values through to nested fields', () => {
|
||||
const llm = (schema.properties as Record<string, { properties?: Record<string, unknown> }>).llm;
|
||||
const provider = llm?.properties?.provider as { properties?: Record<string, unknown> };
|
||||
const backend = provider?.properties?.backend as { enum?: readonly string[] };
|
||||
expect(backend?.enum).toEqual(['none', 'anthropic', 'vertex', 'gateway', 'claude-code']);
|
||||
|
||||
const storage = (schema.properties as Record<string, { properties?: Record<string, unknown> }>).storage;
|
||||
const state = storage?.properties?.state as { enum?: readonly string[] };
|
||||
expect(state?.enum).toEqual(['sqlite', 'postgres']);
|
||||
});
|
||||
|
||||
it('carries descriptions on deeply nested leaves', () => {
|
||||
const scan = (schema.properties as Record<string, { properties?: Record<string, unknown> }>).scan;
|
||||
const relationships = scan?.properties?.relationships as { properties?: Record<string, { description?: string }> };
|
||||
expect(relationships?.properties?.acceptThreshold?.description).toMatch(/auto-accepted/);
|
||||
});
|
||||
|
||||
it('emits the mappings shapes under connections', () => {
|
||||
const serialized = JSON.stringify(schema);
|
||||
expect(serialized).toContain('databaseMappings');
|
||||
expect(serialized).toContain('connectionMappings');
|
||||
expect(serialized).toContain('expectedLookerConnectionName');
|
||||
});
|
||||
});
|
||||
393
packages/cli/src/context/project/config.ts
Normal file
393
packages/cli/src/context/project/config.ts
Normal file
|
|
@ -0,0 +1,393 @@
|
|||
import { KTX_MODEL_ROLES } from '../../llm/types.js';
|
||||
import YAML from 'yaml';
|
||||
import * as z from 'zod';
|
||||
import { connectionConfigSchema } from './driver-schemas.js';
|
||||
|
||||
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway', 'claude-code'] as const;
|
||||
const KTX_EMBEDDING_BACKENDS = ['none', 'openai', 'sentence-transformers'] as const;
|
||||
const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
|
||||
const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
|
||||
const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'] as const;
|
||||
const KTX_STORAGE_STATES = ['sqlite', 'postgres'] as const;
|
||||
const KTX_SEARCH_BACKENDS = ['sqlite-fts5', 'postgres-hybrid'] as const;
|
||||
|
||||
const DEPRECATED_KEY_HINTS: Record<string, string> = {
|
||||
'llm.provider.provider': 'use llm.provider.backend',
|
||||
'ingest.llm': 'use top-level llm.provider, llm.models, and ingest.workUnits',
|
||||
'ingest.embeddings.provider': 'use ingest.embeddings.backend',
|
||||
'scan.enrichment.backend': 'use scan.enrichment.mode',
|
||||
'scan.enrichment.llm': 'use top-level llm.provider and llm.models',
|
||||
'scan.enrichment.embeddings.provider': 'use scan.enrichment.embeddings.backend',
|
||||
};
|
||||
|
||||
const apiCredentialsSchema = z
|
||||
.strictObject({
|
||||
api_key: z.string().min(1).optional().describe('API key for the provider. Read from this value or the provider-specific environment variable.'),
|
||||
base_url: z.string().min(1).optional().describe('Override the provider\'s default API base URL (e.g. a proxy or self-hosted gateway).'),
|
||||
})
|
||||
.describe('API credentials block: optional key and base URL for an LLM or embedding provider.');
|
||||
|
||||
const vertexProviderSchema = z
|
||||
.strictObject({
|
||||
project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'),
|
||||
location: z.string().min(1).describe('Vertex AI region (e.g. "us-east5"). Required whenever the vertex provider block is present.'),
|
||||
})
|
||||
.describe('Google Vertex AI provider configuration.');
|
||||
|
||||
const sentenceTransformersSchema = z
|
||||
.strictObject({
|
||||
base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) when the `ktx` CLI is expected to start and manage a local daemon for this project; programmatic consumers must populate it explicitly.'),
|
||||
pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'),
|
||||
})
|
||||
.describe('Sentence-transformers embedding server configuration.');
|
||||
|
||||
const llmProviderSchema = z
|
||||
.strictObject({
|
||||
backend: z
|
||||
.enum(KTX_LLM_BACKENDS)
|
||||
.default('none')
|
||||
.describe(
|
||||
'LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block; "claude-code" uses the local Claude Code session.',
|
||||
),
|
||||
vertex: vertexProviderSchema.optional().describe('Vertex AI credentials, used when backend is "vertex".'),
|
||||
anthropic: apiCredentialsSchema.optional().describe('Anthropic API credentials, used when backend is "anthropic".'),
|
||||
gateway: apiCredentialsSchema.optional().describe('AI Gateway credentials, used when backend is "gateway".'),
|
||||
})
|
||||
.describe('LLM provider selection and credentials.');
|
||||
|
||||
const promptCachingSchema = z
|
||||
.strictObject({
|
||||
enabled: z.boolean().optional().describe('Master switch for Anthropic-style prompt caching. When omitted, the backend\'s default applies.'),
|
||||
systemTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the system prompt segment ("5m" or "1h").'),
|
||||
toolsTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the tools/schema segment ("5m" or "1h").'),
|
||||
historyTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for conversation-history cache breakpoints ("5m" or "1h").'),
|
||||
vertexFallbackTo5m: z.boolean().optional().describe('When true, transparently downgrade 1h TTLs to 5m on Vertex, which does not support 1h caching.'),
|
||||
})
|
||||
.describe('Prompt-caching tunables for Anthropic-compatible providers.');
|
||||
|
||||
const llmSchema = z
|
||||
.strictObject({
|
||||
provider: llmProviderSchema.prefault({}).describe('LLM provider backend and credentials.'),
|
||||
models: z
|
||||
.partialRecord(z.enum(KTX_MODEL_ROLES), z.string().min(1))
|
||||
.default({})
|
||||
.describe('Per-role model overrides keyed by KTX model role (e.g. "default", "triage"). Values are provider-specific model identifiers.'),
|
||||
promptCaching: promptCachingSchema.optional().describe('Optional prompt-caching tunables.'),
|
||||
})
|
||||
.describe('LLM provider, per-role model overrides, and prompt-caching tunables.');
|
||||
|
||||
const embeddingSchema = z
|
||||
.strictObject({
|
||||
backend: z
|
||||
.enum(KTX_EMBEDDING_BACKENDS)
|
||||
.default('none')
|
||||
.describe('Embedding backend. "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
|
||||
model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small").'),
|
||||
dimensions: z
|
||||
.int()
|
||||
.positive()
|
||||
.default(8)
|
||||
.describe(
|
||||
'Embedding vector dimensionality. The default value 8 is a placeholder that is only valid alongside backend: none; ' +
|
||||
'before switching backend to openai/sentence-transformers, set this explicitly to match the chosen model ' +
|
||||
'(e.g. 384 for all-MiniLM-L6-v2, 1536 for text-embedding-3-small).',
|
||||
),
|
||||
openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
|
||||
sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
|
||||
batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'),
|
||||
})
|
||||
.describe('Embedding backend, model, and provider credentials.');
|
||||
|
||||
const workUnitsSchema = z
|
||||
.strictObject({
|
||||
stepBudget: z.int().positive().default(40).describe('Maximum number of agent steps allowed per work unit before it is force-terminated.'),
|
||||
maxConcurrency: z.int().positive().default(1).describe('Maximum number of work units run concurrently during ingest.'),
|
||||
failureMode: z
|
||||
.enum(KTX_WORK_UNIT_FAILURE_MODES)
|
||||
.default('continue')
|
||||
.describe('Behavior when a work unit fails: "abort" stops the whole ingest run; "continue" records the failure and keeps going.'),
|
||||
})
|
||||
.describe('Concurrency and failure handling for ingest work units.');
|
||||
|
||||
const ingestSchema = z
|
||||
.strictObject({
|
||||
adapters: z
|
||||
.array(z.string().min(1))
|
||||
.default([])
|
||||
.describe('Ingest adapter identifiers to run (e.g. "metabase", "looker", "historic-sql"). Empty array means no adapters are run.'),
|
||||
embeddings: embeddingSchema
|
||||
.prefault({ backend: 'none' })
|
||||
.describe('Embedding configuration used when ingest adapters need to embed documents.'),
|
||||
workUnits: workUnitsSchema.prefault({}).describe('Concurrency and failure handling for ingest work units.'),
|
||||
})
|
||||
.describe('Ingest pipeline configuration: adapters, embeddings, and work-unit policy.');
|
||||
|
||||
const scanEnrichmentSchema = z
|
||||
.strictObject({
|
||||
mode: z
|
||||
.enum(KTX_ENRICHMENT_MODES)
|
||||
.default('none')
|
||||
.describe('Column/table enrichment mode. "none" disables enrichment; "deterministic" uses local heuristics; "llm" calls the configured LLM provider.'),
|
||||
embeddings: embeddingSchema.optional().describe('Optional embedding override for enrichment-time vectorization. Falls back to ingest.embeddings when omitted.'),
|
||||
})
|
||||
.describe('Schema-scan enrichment: how columns and tables are described.');
|
||||
|
||||
const scanRelationshipsSchema = z
|
||||
.strictObject({
|
||||
enabled: z.boolean().default(true).describe('Master switch for relationship discovery during scan.'),
|
||||
llmProposals: z.boolean().default(true).describe('When true, propose relationships using the configured LLM in addition to deterministic candidates.'),
|
||||
validationRequiredForManifest: z
|
||||
.boolean()
|
||||
.default(true)
|
||||
.describe('When true, only relationships that pass database-side validation are written to the manifest.'),
|
||||
acceptThreshold: z
|
||||
.number()
|
||||
.min(0)
|
||||
.max(1)
|
||||
.default(0.85)
|
||||
.describe('Confidence score (0–1) at or above which an LLM-proposed relationship is auto-accepted into the manifest.'),
|
||||
reviewThreshold: z
|
||||
.number()
|
||||
.min(0)
|
||||
.max(1)
|
||||
.default(0.55)
|
||||
.describe('Confidence score (0–1) at or above which a proposal is surfaced for human review (but not auto-accepted).'),
|
||||
maxLlmTablesPerBatch: z
|
||||
.int()
|
||||
.positive()
|
||||
.default(40)
|
||||
.describe('Maximum number of tables included in a single LLM relationship-proposal batch.'),
|
||||
maxCandidatesPerColumn: z
|
||||
.int()
|
||||
.positive()
|
||||
.default(25)
|
||||
.describe('Maximum number of candidate join partners considered per column during relationship discovery.'),
|
||||
profileSampleRows: z.int().positive().default(10000).describe('Number of rows sampled per table when profiling values for relationship inference.'),
|
||||
validationConcurrency: z.int().positive().default(4).describe('Number of relationship validation queries run in parallel against the database.'),
|
||||
validationBudget: z
|
||||
.union([z.literal('all'), z.int().nonnegative()])
|
||||
.optional()
|
||||
.describe('Cap on validation queries per scan run. Use "all" for unlimited, an integer for a hard cap, or omit for the runtime default.'),
|
||||
})
|
||||
.describe('Schema-scan relationship discovery and validation tunables.');
|
||||
|
||||
const scanSchema = z
|
||||
.strictObject({
|
||||
enrichment: scanEnrichmentSchema.prefault({}).describe('Column/table enrichment configuration.'),
|
||||
relationships: scanRelationshipsSchema.prefault({}).describe('Relationship discovery and validation configuration.'),
|
||||
})
|
||||
.describe('Schema-scan configuration: enrichment and relationship discovery.');
|
||||
|
||||
const setupSchema = z
|
||||
.strictObject({
|
||||
database_connection_ids: z
|
||||
.array(z.string().min(1))
|
||||
.default([])
|
||||
.describe('Connection IDs (keys of the top-level `connections` map) that the setup wizard treats as the project\'s primary databases.'),
|
||||
completed_steps: z
|
||||
.unknown()
|
||||
.optional()
|
||||
.describe('Deprecated. Accepted for backward compatibility but ignored; KTX no longer tracks setup progress here.'),
|
||||
})
|
||||
.transform(({ database_connection_ids }) => ({ database_connection_ids }))
|
||||
.describe('Setup-wizard state captured during `ktx setup`.');
|
||||
|
||||
const storageGitSchema = z
|
||||
.strictObject({
|
||||
auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits state changes to the local Git-backed store.'),
|
||||
author: z
|
||||
.string()
|
||||
.min(1)
|
||||
.default('ktx <ktx@example.com>')
|
||||
.describe('Git author identity used for auto-commits, in standard "Name <email>" form.'),
|
||||
})
|
||||
.describe('Git-backed storage commit policy.');
|
||||
|
||||
const storageSchema = z
|
||||
.strictObject({
|
||||
state: z
|
||||
.enum(KTX_STORAGE_STATES)
|
||||
.default('sqlite')
|
||||
.describe('Backend for KTX state storage. "sqlite" uses .ktx/db.sqlite; "postgres" expects a configured Postgres connection.'),
|
||||
search: z
|
||||
.enum(KTX_SEARCH_BACKENDS)
|
||||
.default('sqlite-fts5')
|
||||
.describe('Backend for search indexes. "sqlite-fts5" uses SQLite FTS5; "postgres-hybrid" uses Postgres lexical + vector hybrid search.'),
|
||||
git: storageGitSchema.prefault({}).describe('Git-backed storage commit policy.'),
|
||||
})
|
||||
.describe('Storage backends and commit policy for KTX state and search indexes.');
|
||||
|
||||
const connectionSchema = connectionConfigSchema;
|
||||
|
||||
const agentSchema = z
|
||||
.strictObject({
|
||||
run_research: z
|
||||
.strictObject({
|
||||
enabled: z.boolean().default(false).describe('Master switch for the research agent.'),
|
||||
max_iterations: z
|
||||
.number()
|
||||
.int()
|
||||
.nonnegative()
|
||||
.default(20)
|
||||
.describe('Maximum number of tool-call iterations the research agent may take per run.'),
|
||||
default_toolset: z
|
||||
.array(z.string().min(1))
|
||||
.default(['sl_query', 'wiki_search', 'sl_read_source'])
|
||||
.describe('Default list of tool identifiers exposed to the research agent.'),
|
||||
})
|
||||
.prefault({})
|
||||
.describe('Research-agent configuration.'),
|
||||
})
|
||||
.describe('Agent feature configuration.');
|
||||
|
||||
const memorySchema = z
|
||||
.strictObject({
|
||||
auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits memory updates to the Git-backed store.'),
|
||||
})
|
||||
.describe('Memory subsystem configuration.');
|
||||
|
||||
const ktxProjectConfigSchema = z
|
||||
.strictObject({
|
||||
setup: setupSchema.optional().describe('Setup-wizard state. Written by `ktx setup`; may be omitted.'),
|
||||
connections: z
|
||||
.record(z.string(), connectionSchema)
|
||||
.default({})
|
||||
.describe('Map of connection ID to connector configuration. Keys are user-chosen names referenced elsewhere in the config.'),
|
||||
storage: storageSchema.prefault({}).describe('Storage backends and commit policy for KTX state and search indexes.'),
|
||||
llm: llmSchema.prefault({}).describe('LLM provider, per-role model overrides, and prompt-caching tunables.'),
|
||||
ingest: ingestSchema.prefault({}).describe('Ingest pipeline configuration.'),
|
||||
agent: agentSchema.prefault({}).describe('Agent feature configuration.'),
|
||||
memory: memorySchema.prefault({}).describe('Memory subsystem configuration.'),
|
||||
scan: scanSchema.prefault({}).describe('Schema-scan configuration: enrichment and relationship discovery.'),
|
||||
})
|
||||
.describe('Configuration schema for KTX project files (ktx.yaml).');
|
||||
|
||||
export type KtxProjectConfig = z.infer<typeof ktxProjectConfigSchema>;
|
||||
export type KtxProjectLlmConfig = z.infer<typeof llmSchema>;
|
||||
export type KtxProjectEmbeddingConfig = z.infer<typeof embeddingSchema>;
|
||||
export type KtxScanEnrichmentConfig = z.infer<typeof scanEnrichmentSchema>;
|
||||
export type KtxScanRelationshipConfig = z.infer<typeof scanRelationshipsSchema>;
|
||||
export type KtxProjectConnectionConfig = z.infer<typeof connectionSchema>;
|
||||
|
||||
export interface KtxConfigIssue {
|
||||
path: string;
|
||||
message: string;
|
||||
fix?: string;
|
||||
}
|
||||
|
||||
export interface KtxConfigValidation {
|
||||
ok: boolean;
|
||||
issues: KtxConfigIssue[];
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function dottedPath(path: ReadonlyArray<PropertyKey>): string {
|
||||
return path.map((segment) => String(segment)).join('.');
|
||||
}
|
||||
|
||||
function valueAtPath(root: unknown, path: ReadonlyArray<PropertyKey>): unknown {
|
||||
let cursor: unknown = root;
|
||||
for (const segment of path) {
|
||||
if (cursor === null || typeof cursor !== 'object') return undefined;
|
||||
cursor = (cursor as Record<PropertyKey, unknown>)[segment];
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
function formatIssue(issue: z.core.$ZodIssue, input: unknown): KtxConfigIssue[] {
|
||||
const basePath = dottedPath(issue.path);
|
||||
|
||||
if (issue.code === 'unrecognized_keys') {
|
||||
const keys = (issue as { keys?: readonly string[] }).keys ?? [];
|
||||
return keys.map((key) => {
|
||||
const fullPath = basePath.length > 0 ? `${basePath}.${key}` : key;
|
||||
const hint = DEPRECATED_KEY_HINTS[fullPath];
|
||||
if (hint !== undefined) {
|
||||
return { path: fullPath, message: `Unsupported ${fullPath}: ${hint}`, fix: hint };
|
||||
}
|
||||
return { path: fullPath, message: `Unsupported ${fullPath}: unknown field` };
|
||||
});
|
||||
}
|
||||
|
||||
const lastSegment = issue.path[issue.path.length - 1];
|
||||
if (lastSegment === 'backend' && (issue.code === 'invalid_value' || issue.code === 'invalid_type')) {
|
||||
const value = valueAtPath(input, issue.path);
|
||||
return [{ path: basePath, message: `Unsupported ${basePath}: ${String(value)}` }];
|
||||
}
|
||||
|
||||
return [{ path: basePath, message: basePath.length > 0 ? `${basePath}: ${issue.message}` : issue.message }];
|
||||
}
|
||||
|
||||
function collectIssues(error: z.ZodError, input: unknown): KtxConfigIssue[] {
|
||||
return error.issues.flatMap((issue) => formatIssue(issue, input));
|
||||
}
|
||||
|
||||
function formatZodError(error: z.ZodError, input: unknown): string {
|
||||
return collectIssues(error, input)
|
||||
.map((issue) => issue.message)
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
export function buildDefaultKtxProjectConfig(): KtxProjectConfig {
|
||||
return ktxProjectConfigSchema.parse({});
|
||||
}
|
||||
|
||||
export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
|
||||
const parsed = YAML.parse(raw) as unknown;
|
||||
if (!isRecord(parsed)) {
|
||||
throw new Error('ktx.yaml must contain a YAML object');
|
||||
}
|
||||
const result = ktxProjectConfigSchema.safeParse(parsed);
|
||||
if (!result.success) {
|
||||
throw new Error(formatZodError(result.error, parsed));
|
||||
}
|
||||
return result.data;
|
||||
}
|
||||
|
||||
export function validateKtxProjectConfig(raw: string): KtxConfigValidation {
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = YAML.parse(raw);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return { ok: false, issues: [{ path: '', message: `ktx.yaml parse error: ${message}` }] };
|
||||
}
|
||||
if (!isRecord(parsed)) {
|
||||
return { ok: false, issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }] };
|
||||
}
|
||||
const result = ktxProjectConfigSchema.safeParse(parsed);
|
||||
if (result.success) {
|
||||
return { ok: true, issues: [] };
|
||||
}
|
||||
return { ok: false, issues: collectIssues(result.error, parsed) };
|
||||
}
|
||||
|
||||
export function generateKtxProjectConfigJsonSchema(): Record<string, unknown> {
|
||||
const schema = z.toJSONSchema(ktxProjectConfigSchema, {
|
||||
target: 'draft-7',
|
||||
io: 'input',
|
||||
}) as Record<string, unknown>;
|
||||
return {
|
||||
$schema: 'http://json-schema.org/draft-07/schema#',
|
||||
$id: 'https://ktx.dev/schemas/ktx-project-config.json',
|
||||
title: 'ktx.yaml',
|
||||
...schema,
|
||||
};
|
||||
}
|
||||
|
||||
export function serializeKtxProjectConfig(config: KtxProjectConfig): string {
|
||||
const serializedConfig =
|
||||
config.ingest.adapters.length === 0
|
||||
? {
|
||||
...config,
|
||||
ingest: {
|
||||
embeddings: config.ingest.embeddings,
|
||||
workUnits: config.ingest.workUnits,
|
||||
},
|
||||
}
|
||||
: config;
|
||||
return `${YAML.stringify(serializedConfig, { indent: 2, lineWidth: 0 }).trimEnd()}\n`;
|
||||
}
|
||||
140
packages/cli/src/context/project/driver-schemas.test.ts
Normal file
140
packages/cli/src/context/project/driver-schemas.test.ts
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import { connectionConfigSchema } from './driver-schemas.js';
|
||||
|
||||
describe('connectionConfigSchema (driver discriminated union)', () => {
|
||||
it.each([
|
||||
['postgres', 'postgres://user:pass@host:5432/db'], // pragma: allowlist secret
|
||||
['postgresql', 'postgresql://user:pass@host:5432/db'], // pragma: allowlist secret
|
||||
['mysql', 'mysql://user:pass@host:3306/db'], // pragma: allowlist secret
|
||||
['snowflake', 'snowflake://account/db'],
|
||||
['bigquery', 'bigquery://project/dataset'],
|
||||
['sqlite', 'sqlite:///tmp/db.sqlite'],
|
||||
['clickhouse', 'clickhouse://host:8123/db'],
|
||||
['sqlserver', 'sqlserver://host:1433;database=db'],
|
||||
])('parses %s warehouse connection', (driver, url) => {
|
||||
expect(connectionConfigSchema.parse({ driver, url })).toMatchObject({ driver, url });
|
||||
});
|
||||
|
||||
it('preserves unknown warehouse fields via looseObject passthrough', () => {
|
||||
const parsed = connectionConfigSchema.parse({
|
||||
driver: 'postgres',
|
||||
url: 'postgres://x',
|
||||
historicSql: { enabled: true },
|
||||
context: { queryHistory: { enabled: false } },
|
||||
});
|
||||
expect(parsed).toMatchObject({
|
||||
driver: 'postgres',
|
||||
historicSql: { enabled: true },
|
||||
context: { queryHistory: { enabled: false } },
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects an unknown driver', () => {
|
||||
expect(() => connectionConfigSchema.parse({ driver: 'nope', url: 'x' })).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('connectionConfigSchema - context source drivers with mappings', () => {
|
||||
it('parses a metabase connection with mappings', () => {
|
||||
const parsed = connectionConfigSchema.parse({
|
||||
driver: 'metabase',
|
||||
api_url: 'https://metabase.example.com',
|
||||
api_key_ref: 'env:METABASE_API_KEY', // pragma: allowlist secret
|
||||
mappings: {
|
||||
databaseMappings: { '3': 'prod-warehouse' },
|
||||
syncEnabled: { '3': true },
|
||||
syncMode: 'ONLY',
|
||||
},
|
||||
});
|
||||
expect(parsed).toMatchObject({
|
||||
driver: 'metabase',
|
||||
api_url: 'https://metabase.example.com',
|
||||
mappings: {
|
||||
databaseMappings: { '3': 'prod-warehouse' },
|
||||
syncMode: 'ONLY',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('parses a looker connection with connectionMappings', () => {
|
||||
const parsed = connectionConfigSchema.parse({
|
||||
driver: 'looker',
|
||||
base_url: 'https://looker.example.com',
|
||||
client_id: 'abc',
|
||||
client_secret_ref: 'env:LOOKER_CLIENT_SECRET', // pragma: allowlist secret
|
||||
mappings: { connectionMappings: { bigquery_prod: 'wh' } },
|
||||
});
|
||||
expect(parsed.mappings).toEqual({ connectionMappings: { bigquery_prod: 'wh' } });
|
||||
});
|
||||
|
||||
it('parses a lookml connection with expectedLookerConnectionName', () => {
|
||||
const parsed = connectionConfigSchema.parse({
|
||||
driver: 'lookml',
|
||||
repoUrl: 'https://github.com/acme/looker.git',
|
||||
branch: 'main',
|
||||
mappings: { expectedLookerConnectionName: 'bigquery_prod' },
|
||||
});
|
||||
expect(parsed.mappings).toEqual({ expectedLookerConnectionName: 'bigquery_prod' });
|
||||
});
|
||||
|
||||
it('rejects metabase mapping with non-integer database key', () => {
|
||||
expect(() =>
|
||||
connectionConfigSchema.parse({
|
||||
driver: 'metabase',
|
||||
api_url: 'https://x',
|
||||
mappings: { databaseMappings: { abc: 'wh' } },
|
||||
}),
|
||||
).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('connectionConfigSchema - notion / dbt / metricflow', () => {
|
||||
it('parses a notion connection with selected_roots crawl', () => {
|
||||
const parsed = connectionConfigSchema.parse({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['abc', 'def'],
|
||||
max_pages_per_run: 500,
|
||||
});
|
||||
expect(parsed).toMatchObject({
|
||||
driver: 'notion',
|
||||
crawl_mode: 'selected_roots',
|
||||
root_page_ids: ['abc', 'def'],
|
||||
max_pages_per_run: 500,
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects notion with unknown crawl_mode', () => {
|
||||
expect(() =>
|
||||
connectionConfigSchema.parse({
|
||||
driver: 'notion',
|
||||
auth_token_ref: 'env:NOTION_TOKEN',
|
||||
crawl_mode: 'everything',
|
||||
}),
|
||||
).toThrow();
|
||||
});
|
||||
|
||||
it('parses a dbt connection from a local source_dir', () => {
|
||||
const parsed = connectionConfigSchema.parse({
|
||||
driver: 'dbt',
|
||||
source_dir: '/tmp/dbt-project',
|
||||
target: 'dev',
|
||||
});
|
||||
expect(parsed).toMatchObject({ driver: 'dbt', source_dir: '/tmp/dbt-project', target: 'dev' });
|
||||
});
|
||||
|
||||
it('parses a metricflow connection with nested config', () => {
|
||||
const parsed = connectionConfigSchema.parse({
|
||||
driver: 'metricflow',
|
||||
metricflow: {
|
||||
repoUrl: 'https://github.com/acme/sl.git',
|
||||
branch: 'main',
|
||||
},
|
||||
});
|
||||
expect(parsed).toMatchObject({
|
||||
driver: 'metricflow',
|
||||
metricflow: { repoUrl: 'https://github.com/acme/sl.git' },
|
||||
});
|
||||
});
|
||||
});
|
||||
209
packages/cli/src/context/project/driver-schemas.ts
Normal file
209
packages/cli/src/context/project/driver-schemas.ts
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
import * as z from 'zod';
|
||||
import {
|
||||
lookerMappingsSchema,
|
||||
lookmlMappingsSchema,
|
||||
metabaseMappingsSchema,
|
||||
} from './mappings-yaml-schema.js';
|
||||
|
||||
const warehouseDrivers = [
|
||||
'postgres',
|
||||
'postgresql',
|
||||
'mysql',
|
||||
'snowflake',
|
||||
'bigquery',
|
||||
'sqlite',
|
||||
'clickhouse',
|
||||
'sqlserver',
|
||||
] as const;
|
||||
|
||||
type WarehouseDriver = (typeof warehouseDrivers)[number];
|
||||
|
||||
function warehouseConnectionSchema<const Driver extends WarehouseDriver>(driver: Driver) {
|
||||
return z
|
||||
.looseObject({
|
||||
driver: z.literal(driver),
|
||||
url: z
|
||||
.string()
|
||||
.min(1)
|
||||
.optional()
|
||||
.describe('Warehouse connection URL or DSN; may contain environment-variable references like env:DATABASE_URL.'),
|
||||
enabled_tables: z
|
||||
.array(z.string().min(1))
|
||||
.optional()
|
||||
.describe(
|
||||
'Optional allowlist of fully-qualified table names ("schema.table") to ingest. When set, live-database ingest discards any table whose schema-qualified name is not in this list. Useful for smoke-testing deep ingest on a single table.',
|
||||
),
|
||||
})
|
||||
.describe(
|
||||
`${driver} warehouse connection. Additional driver-tunable fields (e.g. historicSql, context.queryHistory) are accepted and passed through.`,
|
||||
);
|
||||
}
|
||||
|
||||
const warehouseConnectionSchemas = [
|
||||
warehouseConnectionSchema('postgres'),
|
||||
warehouseConnectionSchema('postgresql'),
|
||||
warehouseConnectionSchema('mysql'),
|
||||
warehouseConnectionSchema('snowflake'),
|
||||
warehouseConnectionSchema('bigquery'),
|
||||
warehouseConnectionSchema('sqlite'),
|
||||
warehouseConnectionSchema('clickhouse'),
|
||||
warehouseConnectionSchema('sqlserver'),
|
||||
] as const;
|
||||
|
||||
const positiveIntKeyMessage = (field: string) => `${field} keys must be positive-integer strings (e.g. "1", "42")`;
|
||||
|
||||
const positiveIntKeyRegex = /^[1-9]\d*$/;
|
||||
|
||||
const metabaseMappingsStrictSchema = metabaseMappingsSchema.superRefine((value, ctx) => {
|
||||
for (const key of Object.keys(value.databaseMappings ?? {})) {
|
||||
if (!positiveIntKeyRegex.test(key)) {
|
||||
ctx.addIssue({
|
||||
code: 'custom',
|
||||
path: ['databaseMappings', key],
|
||||
message: positiveIntKeyMessage('databaseMappings'),
|
||||
});
|
||||
}
|
||||
}
|
||||
for (const key of Object.keys(value.syncEnabled ?? {})) {
|
||||
if (!positiveIntKeyRegex.test(key)) {
|
||||
ctx.addIssue({
|
||||
code: 'custom',
|
||||
path: ['syncEnabled', key],
|
||||
message: positiveIntKeyMessage('syncEnabled'),
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const metabaseConnectionSchema = z
|
||||
.looseObject({
|
||||
driver: z.literal('metabase'),
|
||||
api_url: z.string().url().describe('Metabase instance API URL (e.g. https://metabase.example.com).'),
|
||||
api_key: z.string().min(1).optional().describe('Literal Metabase API key. Prefer api_key_ref for safety.'),
|
||||
api_key_ref: z
|
||||
.string()
|
||||
.min(1)
|
||||
.optional()
|
||||
.describe('Reference to Metabase API key (e.g. env:METABASE_API_KEY or file:/path).'),
|
||||
network_proxy: z.looseObject({}).optional().describe('Optional network proxy configuration (snake_case form).'),
|
||||
networkProxy: z.looseObject({}).optional().describe('Optional network proxy configuration (camelCase form).'),
|
||||
mappings: metabaseMappingsStrictSchema
|
||||
.optional()
|
||||
.describe('Metabase database-to-warehouse mappings and sync configuration.'),
|
||||
})
|
||||
.describe('Metabase context-source connection.');
|
||||
|
||||
const lookerConnectionSchema = z
|
||||
.looseObject({
|
||||
driver: z.literal('looker'),
|
||||
base_url: z.string().url().describe('Looker instance base URL (e.g. https://looker.example.com).'),
|
||||
client_id: z.string().min(1).describe('Looker OAuth client ID.'),
|
||||
client_secret: z.string().min(1).optional().describe('Literal Looker OAuth client secret. Prefer client_secret_ref.'),
|
||||
client_secret_ref: z
|
||||
.string()
|
||||
.min(1)
|
||||
.optional()
|
||||
.describe('Reference to Looker OAuth client secret (e.g. env:LOOKER_CLIENT_SECRET).'),
|
||||
mappings: lookerMappingsSchema.optional().describe('Looker connection-name to KTX warehouse mappings.'),
|
||||
})
|
||||
.describe('Looker context-source connection.');
|
||||
|
||||
const lookmlConnectionSchema = z
|
||||
.looseObject({
|
||||
driver: z.literal('lookml'),
|
||||
repoUrl: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Git URL of the LookML project (https, ssh, or file:). Field is camelCase by convention.'),
|
||||
branch: z.string().min(1).optional().describe('Git branch (default "main" downstream).'),
|
||||
path: z.string().optional().describe('Subdirectory within the repo when the LookML project lives in a monorepo.'),
|
||||
auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos (e.g. env:GITHUB_TOKEN).'),
|
||||
mappings: lookmlMappingsSchema.optional().describe('LookML expected-connection mapping for ingest gating.'),
|
||||
})
|
||||
.describe('LookML context-source connection.');
|
||||
|
||||
const notionConnectionSchema = z
|
||||
.looseObject({
|
||||
driver: z.literal('notion'),
|
||||
auth_token: z.string().min(1).optional().describe('Literal Notion integration token. Prefer auth_token_ref.'),
|
||||
auth_token_ref: z
|
||||
.string()
|
||||
.min(1)
|
||||
.optional()
|
||||
.describe('Reference to Notion integration token (e.g. env:NOTION_TOKEN).'),
|
||||
crawl_mode: z
|
||||
.enum(['selected_roots', 'all_accessible'])
|
||||
.optional()
|
||||
.describe(
|
||||
'Crawl scope. "selected_roots" requires at least one of root_page_ids, root_database_ids, root_data_source_ids.',
|
||||
),
|
||||
root_page_ids: z.array(z.string().min(1)).optional().describe('Notion page IDs to crawl when crawl_mode is selected_roots.'),
|
||||
root_database_ids: z
|
||||
.array(z.string().min(1))
|
||||
.optional()
|
||||
.describe('Notion database IDs to crawl when crawl_mode is selected_roots.'),
|
||||
root_data_source_ids: z
|
||||
.array(z.string().min(1))
|
||||
.optional()
|
||||
.describe('Notion data source IDs to crawl when crawl_mode is selected_roots.'),
|
||||
max_pages_per_run: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(10000)
|
||||
.optional()
|
||||
.describe('Maximum Notion pages fetched in a single ingest run.'),
|
||||
max_knowledge_creates_per_run: z
|
||||
.number()
|
||||
.int()
|
||||
.min(0)
|
||||
.max(25)
|
||||
.optional()
|
||||
.describe('Maximum new wiki pages created per run.'),
|
||||
max_knowledge_updates_per_run: z
|
||||
.number()
|
||||
.int()
|
||||
.min(0)
|
||||
.max(100)
|
||||
.optional()
|
||||
.describe('Maximum existing wiki pages updated per run.'),
|
||||
})
|
||||
.describe('Notion context-source connection.');
|
||||
|
||||
const dbtConnectionSchema = z
|
||||
.looseObject({
|
||||
driver: z.literal('dbt'),
|
||||
source_dir: z.string().min(1).optional().describe('Absolute or project-relative path to a local dbt project.'),
|
||||
repo_url: z.string().min(1).optional().describe('Git URL of the dbt project (https, ssh, or file:).'),
|
||||
branch: z.string().min(1).optional().describe('Git branch when using repo_url.'),
|
||||
path: z.string().optional().describe('Subdirectory within the repo when the dbt project lives in a monorepo.'),
|
||||
auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos.'),
|
||||
profiles_path: z.string().optional().describe('Override path to dbt profiles.yml.'),
|
||||
target: z.string().min(1).optional().describe('dbt target name (e.g. dev, prod).'),
|
||||
project_name: z.string().min(1).optional().describe('Override auto-detected dbt project name.'),
|
||||
})
|
||||
.describe('dbt context-source connection.');
|
||||
|
||||
const metricflowConnectionSchema = z
|
||||
.looseObject({
|
||||
driver: z.literal('metricflow'),
|
||||
metricflow: z
|
||||
.looseObject({
|
||||
repoUrl: z.string().min(1).describe('Git URL of the MetricFlow / SL project.'),
|
||||
branch: z.string().min(1).optional().describe('Git branch (default "main").'),
|
||||
path: z.string().optional().describe('Subdirectory within the repo when the SL config lives in a monorepo.'),
|
||||
auth_token_ref: z.string().min(1).optional().describe('Reference to Git auth token for private repos.'),
|
||||
})
|
||||
.describe('Nested MetricFlow configuration block.'),
|
||||
})
|
||||
.describe('MetricFlow / SL context-source connection.');
|
||||
|
||||
export const connectionConfigSchema = z.discriminatedUnion('driver', [
|
||||
...warehouseConnectionSchemas,
|
||||
metabaseConnectionSchema,
|
||||
lookerConnectionSchema,
|
||||
lookmlConnectionSchema,
|
||||
notionConnectionSchema,
|
||||
dbtConnectionSchema,
|
||||
metricflowConnectionSchema,
|
||||
]);
|
||||
102
packages/cli/src/context/project/local-git-file-store.test.ts
Normal file
102
packages/cli/src/context/project/local-git-file-store.test.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import { mkdtemp, readFile, rm, stat } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { GitService } from '../../context/core/git.service.js';
|
||||
import type { KtxCoreConfig } from '../../context/core/config.js';
|
||||
import { LocalGitFileStore } from './local-git-file-store.js';
|
||||
|
||||
describe('LocalGitFileStore', () => {
|
||||
let tempDir: string;
|
||||
let store: LocalGitFileStore;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-local-store-'));
|
||||
const coreConfig: KtxCoreConfig = {
|
||||
storage: { configDir: tempDir, homeDir: tempDir },
|
||||
git: {
|
||||
userName: 'ktx',
|
||||
userEmail: 'ktx@example.com',
|
||||
bootstrapMessage: 'Initialize test project',
|
||||
bootstrapAuthor: 'ktx',
|
||||
bootstrapAuthorEmail: 'ktx@example.com',
|
||||
},
|
||||
};
|
||||
const git = new GitService(coreConfig);
|
||||
await git.onModuleInit();
|
||||
store = new LocalGitFileStore({ rootDir: tempDir, git });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('writes, commits, and reads a project file', async () => {
|
||||
const write = await store.writeFile(
|
||||
'wiki/global/revenue.md',
|
||||
'# Revenue\n',
|
||||
'Agent',
|
||||
'agent@example.com',
|
||||
'Add revenue page',
|
||||
);
|
||||
|
||||
expect(write.commitHash).toMatch(/^[0-9a-f]{40}$/);
|
||||
await expect(readFile(join(tempDir, 'wiki/global/revenue.md'), 'utf-8')).resolves.toBe('# Revenue\n');
|
||||
await expect(store.readFile('wiki/global/revenue.md')).resolves.toMatchObject({
|
||||
content: '# Revenue\n',
|
||||
});
|
||||
});
|
||||
|
||||
it('lists files recursively and can strip the requested prefix', async () => {
|
||||
await store.writeFile('wiki/global/a.md', 'a', 'Agent', 'agent@example.com', 'Add a');
|
||||
await store.writeFile('wiki/global/nested/b.md', 'b', 'Agent', 'agent@example.com', 'Add b');
|
||||
|
||||
await expect(store.listFiles('wiki')).resolves.toEqual({
|
||||
files: ['wiki/global/a.md', 'wiki/global/nested/b.md'],
|
||||
});
|
||||
await expect(store.listFiles('wiki/global', true)).resolves.toEqual({
|
||||
files: ['a.md', 'nested/b.md'],
|
||||
});
|
||||
});
|
||||
|
||||
it('deletes and commits an existing file', async () => {
|
||||
await store.writeFile('semantic-layer/conn/orders.yaml', 'name: orders\n', 'Agent', 'agent@example.com', 'Add SL');
|
||||
|
||||
const deleted = await store.deleteFile(
|
||||
'semantic-layer/conn/orders.yaml',
|
||||
'Agent',
|
||||
'agent@example.com',
|
||||
'Delete SL',
|
||||
);
|
||||
|
||||
expect(deleted?.commitHash).toMatch(/^[0-9a-f]{40}$/);
|
||||
await expect(stat(join(tempDir, 'semantic-layer/conn/orders.yaml'))).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('returns null when deleting a missing file', async () => {
|
||||
await expect(store.deleteFile('missing.md', 'Agent', 'agent@example.com', 'Delete missing')).resolves.toBeNull();
|
||||
});
|
||||
|
||||
it('exposes Git history for a file', async () => {
|
||||
await store.writeFile('wiki/global/history.md', 'v1', 'Agent', 'agent@example.com', 'Add history');
|
||||
await store.writeFile('wiki/global/history.md', 'v2', 'Agent', 'agent@example.com', 'Update history');
|
||||
|
||||
const history = await store.getFileHistory('wiki/global/history.md');
|
||||
|
||||
expect(Array.isArray(history)).toBe(true);
|
||||
expect(history[0]).toMatchObject({ message: 'Update history' });
|
||||
expect(history[1]).toMatchObject({ message: 'Add history' });
|
||||
});
|
||||
|
||||
it('rejects absolute paths and parent-directory traversal', async () => {
|
||||
await expect(store.writeFile('/tmp/outside.md', 'bad', 'Agent', 'agent@example.com', 'Bad write')).rejects.toThrow(
|
||||
'Path must be relative',
|
||||
);
|
||||
|
||||
await expect(store.readFile('../outside.md')).rejects.toThrow('Path escapes the project directory');
|
||||
});
|
||||
|
||||
it('rejects direct .git access', async () => {
|
||||
await expect(store.readFile('.git/config')).rejects.toThrow('Path cannot access .git');
|
||||
});
|
||||
});
|
||||
183
packages/cli/src/context/project/local-git-file-store.ts
Normal file
183
packages/cli/src/context/project/local-git-file-store.ts
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
import { promises as fs } from 'node:fs';
|
||||
import { dirname, isAbsolute, join, relative, resolve, sep } from 'node:path';
|
||||
import type { GitCommitInfo, GitService } from '../../context/core/git.service.js';
|
||||
import type { KtxFileHistoryEntry, KtxFileListResult, KtxFileReadResult, KtxFileStorePort, KtxFileWriteResult } from '../../context/core/file-store.js';
|
||||
|
||||
export interface LocalGitFileStoreDeps {
|
||||
rootDir: string;
|
||||
git: GitService;
|
||||
}
|
||||
|
||||
function normalizeRelativePath(filePath: string): string {
|
||||
return filePath.replaceAll('\\', '/').replace(/^\.\/+/, '');
|
||||
}
|
||||
|
||||
function gitInfoToWriteResult(info: GitCommitInfo): KtxFileWriteResult {
|
||||
return {
|
||||
success: true,
|
||||
commitHash: info.commitHash,
|
||||
commitMessage: info.message,
|
||||
author: info.author,
|
||||
authorEmail: info.authorEmail,
|
||||
timestamp: info.timestamp,
|
||||
created: info.created,
|
||||
};
|
||||
}
|
||||
|
||||
export class LocalGitFileStore implements KtxFileStorePort<LocalGitFileStore> {
|
||||
private readonly rootDir: string;
|
||||
private readonly git: GitService;
|
||||
|
||||
constructor(deps: LocalGitFileStoreDeps) {
|
||||
this.rootDir = resolve(deps.rootDir);
|
||||
this.git = deps.git;
|
||||
}
|
||||
|
||||
forWorktree(workdir: string): LocalGitFileStore {
|
||||
return new LocalGitFileStore({ rootDir: workdir, git: this.git.forWorktree(workdir) });
|
||||
}
|
||||
|
||||
async writeFile(
|
||||
path: string,
|
||||
content: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
commitMessage: string,
|
||||
options?: { skipLock?: boolean },
|
||||
): Promise<KtxFileWriteResult> {
|
||||
const relativePath = this.safeRelativePath(path);
|
||||
const absolutePath = this.absolutePath(relativePath);
|
||||
await fs.mkdir(dirname(absolutePath), { recursive: true });
|
||||
await fs.writeFile(absolutePath, content, 'utf-8');
|
||||
|
||||
if (options?.skipLock) {
|
||||
return { success: true, commitHash: null, path: relativePath, operation: 'write' };
|
||||
}
|
||||
|
||||
const info = await this.git.commitFile(relativePath, commitMessage, author, authorEmail);
|
||||
return { ...gitInfoToWriteResult(info), path: relativePath, operation: 'write' };
|
||||
}
|
||||
|
||||
async readFile(path: string): Promise<KtxFileReadResult> {
|
||||
const relativePath = this.safeRelativePath(path);
|
||||
const absolutePath = this.absolutePath(relativePath);
|
||||
const content = await fs.readFile(absolutePath, 'utf-8');
|
||||
const stats = await fs.stat(absolutePath);
|
||||
return {
|
||||
path: relativePath,
|
||||
content,
|
||||
size: stats.size,
|
||||
modifiedAt: stats.mtime.toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
async deleteFile(
|
||||
path: string,
|
||||
author: string,
|
||||
authorEmail: string,
|
||||
commitMessage: string,
|
||||
options?: { skipLock?: boolean },
|
||||
): Promise<KtxFileWriteResult | null> {
|
||||
const relativePath = this.safeRelativePath(path);
|
||||
const absolutePath = this.absolutePath(relativePath);
|
||||
try {
|
||||
await fs.access(absolutePath);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
await fs.unlink(absolutePath);
|
||||
|
||||
if (options?.skipLock) {
|
||||
return { success: true, commitHash: null, path: relativePath, operation: 'delete' };
|
||||
}
|
||||
|
||||
const info = await this.git.deleteFile(relativePath, commitMessage, author, authorEmail);
|
||||
return { ...gitInfoToWriteResult(info), path: relativePath, operation: 'delete' };
|
||||
}
|
||||
|
||||
async listFiles(path = '', stripPrefix = false): Promise<KtxFileListResult> {
|
||||
const relativePath = path ? this.safeRelativePath(path) : '';
|
||||
const searchRoot = relativePath ? this.absolutePath(relativePath) : this.rootDir;
|
||||
let files: string[];
|
||||
|
||||
try {
|
||||
files = await this.walk(searchRoot);
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
return { files: [] };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
const prefix = relativePath ? `${relativePath}/` : '';
|
||||
const relativeFiles = files
|
||||
.map((file) => normalizeRelativePath(relative(this.rootDir, file)))
|
||||
.filter((file) => !file.startsWith('.git/') && !file.includes('/.git/'))
|
||||
.filter((file) => !file.startsWith('.ktx/cache/'))
|
||||
.map((file) => (stripPrefix && prefix && file.startsWith(prefix) ? file.slice(prefix.length) : file))
|
||||
.sort();
|
||||
|
||||
return { files: relativeFiles };
|
||||
}
|
||||
|
||||
async getFileHistory(path: string): Promise<KtxFileHistoryEntry[]> {
|
||||
const relativePath = this.safeRelativePath(path);
|
||||
const history = await this.git.getFileHistory(relativePath);
|
||||
return history.map((entry) => ({
|
||||
sha: entry.commitHash,
|
||||
commitHash: entry.commitHash,
|
||||
shortHash: entry.shortHash,
|
||||
message: entry.message,
|
||||
author: entry.author,
|
||||
authorEmail: entry.authorEmail,
|
||||
timestamp: entry.timestamp,
|
||||
committedDate: entry.committedDate,
|
||||
created: entry.created,
|
||||
enhancedMessage: entry.enhancedMessage,
|
||||
}));
|
||||
}
|
||||
|
||||
private safeRelativePath(path: string): string {
|
||||
if (path.length === 0) {
|
||||
return '';
|
||||
}
|
||||
if (isAbsolute(path)) {
|
||||
throw new Error('Path must be relative');
|
||||
}
|
||||
|
||||
const normalized = normalizeRelativePath(path);
|
||||
if (normalized === '.git' || normalized.startsWith('.git/')) {
|
||||
throw new Error('Path cannot access .git');
|
||||
}
|
||||
|
||||
const absolute = resolve(this.rootDir, normalized);
|
||||
if (absolute !== this.rootDir && !absolute.startsWith(`${this.rootDir}${sep}`)) {
|
||||
throw new Error('Path escapes the project directory');
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private absolutePath(path: string): string {
|
||||
return path ? join(this.rootDir, path) : this.rootDir;
|
||||
}
|
||||
|
||||
private async walk(dir: string): Promise<string[]> {
|
||||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
const files: string[] = [];
|
||||
|
||||
for (const entry of entries) {
|
||||
const absolute = join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (entry.name !== '.git') {
|
||||
files.push(...(await this.walk(absolute)));
|
||||
}
|
||||
} else if (entry.isFile()) {
|
||||
files.push(absolute);
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
}
|
||||
6
packages/cli/src/context/project/local-state-db.ts
Normal file
6
packages/cli/src/context/project/local-state-db.ts
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
import { join } from 'node:path';
|
||||
import type { KtxLocalProject } from './project.js';
|
||||
|
||||
export function ktxLocalStateDbPath(project: Pick<KtxLocalProject, 'projectDir'>): string {
|
||||
return join(project.projectDir, '.ktx', 'db.sqlite');
|
||||
}
|
||||
101
packages/cli/src/context/project/mappings-yaml-schema.test.ts
Normal file
101
packages/cli/src/context/project/mappings-yaml-schema.test.ts
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
lookerMappingsSchema,
|
||||
lookmlMappingsSchema,
|
||||
metabaseMappingsSchema,
|
||||
parseConnectionMappingBootstrap,
|
||||
parseLookmlMappingBootstrap,
|
||||
parseLookerMappingBootstrap,
|
||||
parseMetabaseMappingBootstrap,
|
||||
} from './mappings-yaml-schema.js';
|
||||
|
||||
describe('ktx.yaml mapping bootstrap schema', () => {
|
||||
it('parses Metabase mapping intent with CLI syncMode default ALL', () => {
|
||||
const bootstrap = parseMetabaseMappingBootstrap('prod-metabase', {
|
||||
driver: 'metabase',
|
||||
mappings: {
|
||||
databaseMappings: { '1': 'prod-warehouse', '2': null },
|
||||
syncEnabled: { '1': true, '2': false },
|
||||
selections: { collections: [12], items: [345] },
|
||||
defaultTagNames: ['ktx', 'prod'],
|
||||
},
|
||||
});
|
||||
|
||||
expect(bootstrap).toEqual({
|
||||
adapter: 'metabase',
|
||||
connectionId: 'prod-metabase',
|
||||
databaseMappings: { '1': 'prod-warehouse', '2': null },
|
||||
syncEnabled: { '1': true, '2': false },
|
||||
syncMode: 'ALL',
|
||||
selections: { collections: [12], items: [345] },
|
||||
defaultTagNames: ['ktx', 'prod'],
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects Metabase non-integer mapping keys', () => {
|
||||
expect(() =>
|
||||
parseMetabaseMappingBootstrap('prod-metabase', {
|
||||
driver: 'metabase',
|
||||
mappings: { databaseMappings: { abc: 'warehouse' } },
|
||||
}),
|
||||
).toThrow(/databaseMappings key "abc" must be a positive integer string/);
|
||||
});
|
||||
|
||||
it('parses Looker connection mapping intent', () => {
|
||||
const bootstrap = parseLookerMappingBootstrap('prod-looker', {
|
||||
driver: 'looker',
|
||||
mappings: {
|
||||
connectionMappings: {
|
||||
bigquery_prod: 'prod-warehouse',
|
||||
snowflake_dev: null,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(bootstrap).toEqual({
|
||||
adapter: 'looker',
|
||||
connectionId: 'prod-looker',
|
||||
connectionMappings: {
|
||||
bigquery_prod: 'prod-warehouse',
|
||||
snowflake_dev: null,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('parses LookML expected connection from mappings block', () => {
|
||||
expect(
|
||||
parseLookmlMappingBootstrap('prod-lookml', {
|
||||
driver: 'lookml',
|
||||
repo_url: 'https://github.com/acme/looker.git',
|
||||
mappings: { expectedLookerConnectionName: 'bigquery_prod' },
|
||||
}),
|
||||
).toEqual({
|
||||
adapter: 'lookml',
|
||||
connectionId: 'prod-lookml',
|
||||
expectedLookerConnectionName: 'bigquery_prod',
|
||||
});
|
||||
});
|
||||
|
||||
it('dispatches by flat driver and returns null for connections with no mappings block', () => {
|
||||
expect(parseConnectionMappingBootstrap('warehouse', { driver: 'postgres', url: 'env:DATABASE_URL' })).toBeNull();
|
||||
expect(
|
||||
parseConnectionMappingBootstrap('prod-looker', {
|
||||
driver: 'looker',
|
||||
mappings: { connectionMappings: { analytics: 'prod-warehouse' } },
|
||||
}),
|
||||
).toMatchObject({ adapter: 'looker', connectionId: 'prod-looker' });
|
||||
});
|
||||
|
||||
it('exports mapping shapes that parse documented examples', () => {
|
||||
expect(metabaseMappingsSchema.parse({ databaseMappings: { '1': 'wh' } })).toMatchObject({
|
||||
databaseMappings: { '1': 'wh' },
|
||||
syncMode: 'ALL',
|
||||
});
|
||||
expect(lookerMappingsSchema.parse({ connectionMappings: { x: 'wh' } })).toEqual({
|
||||
connectionMappings: { x: 'wh' },
|
||||
});
|
||||
expect(lookmlMappingsSchema.parse({ expectedLookerConnectionName: 'x' })).toEqual({
|
||||
expectedLookerConnectionName: 'x',
|
||||
});
|
||||
});
|
||||
});
|
||||
166
packages/cli/src/context/project/mappings-yaml-schema.ts
Normal file
166
packages/cli/src/context/project/mappings-yaml-schema.ts
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
import * as z from 'zod';
|
||||
|
||||
const metabaseSyncModeSchema = z.enum(['ALL', 'ONLY', 'EXCEPT']);
|
||||
const positiveIntegerValueSchema = z.number().int().positive();
|
||||
const stringTargetSchema = z.string().min(1).nullable();
|
||||
|
||||
const metabaseSelectionsSchema = z
|
||||
.object({
|
||||
collections: z.array(positiveIntegerValueSchema).default([]),
|
||||
items: z.array(positiveIntegerValueSchema).default([]),
|
||||
});
|
||||
|
||||
export const metabaseMappingsSchema = z
|
||||
.object({
|
||||
databaseMappings: z
|
||||
.record(z.string(), stringTargetSchema)
|
||||
.default({})
|
||||
.describe('Map of Metabase database ID (positive integer string) to KTX connection ID. Use null to explicitly unmap.'),
|
||||
syncEnabled: z
|
||||
.record(z.string(), z.boolean())
|
||||
.default({})
|
||||
.describe('Per-Metabase-database sync toggle, keyed by Metabase database ID string.'),
|
||||
syncMode: metabaseSyncModeSchema
|
||||
.default('ALL')
|
||||
.describe('Sync scope: ALL ingests every mapped DB; ONLY restricts to syncEnabled=true; EXCEPT excludes syncEnabled=true.'),
|
||||
selections: metabaseSelectionsSchema
|
||||
.default({ collections: [], items: [] })
|
||||
.describe('Optional Metabase collection and item IDs to scope ingest.'),
|
||||
defaultTagNames: z
|
||||
.array(z.string().min(1))
|
||||
.default([])
|
||||
.describe('Default tag names applied to ingested Metabase artifacts.'),
|
||||
})
|
||||
.describe('Metabase database-to-warehouse mapping and sync configuration.');
|
||||
|
||||
export const lookerMappingsSchema = z
|
||||
.object({
|
||||
connectionMappings: z
|
||||
.record(z.string().min(1), stringTargetSchema)
|
||||
.default({})
|
||||
.describe('Map of Looker connection name to KTX connection ID. Use null to explicitly unmap.'),
|
||||
})
|
||||
.describe('Looker connection-to-warehouse mapping configuration.');
|
||||
|
||||
export const lookmlMappingsSchema = z
|
||||
.object({
|
||||
expectedLookerConnectionName: z
|
||||
.string()
|
||||
.min(1)
|
||||
.nullable()
|
||||
.default(null)
|
||||
.describe('Looker connection name that LookML models must declare; mismatches block sl_write_source at ingest time.'),
|
||||
})
|
||||
.describe('LookML connection-name expectation for ingest gating.');
|
||||
|
||||
export type MetabaseMappingBootstrap = {
|
||||
adapter: 'metabase';
|
||||
connectionId: string;
|
||||
databaseMappings: Record<string, string | null>;
|
||||
syncEnabled: Record<string, boolean>;
|
||||
syncMode: z.infer<typeof metabaseSyncModeSchema>;
|
||||
selections: { collections: number[]; items: number[] };
|
||||
defaultTagNames: string[];
|
||||
};
|
||||
|
||||
export type LookerMappingBootstrap = {
|
||||
adapter: 'looker';
|
||||
connectionId: string;
|
||||
connectionMappings: Record<string, string | null>;
|
||||
};
|
||||
|
||||
/** @internal */
|
||||
export type LookmlMappingBootstrap = {
|
||||
adapter: 'lookml';
|
||||
connectionId: string;
|
||||
expectedLookerConnectionName: string | null;
|
||||
};
|
||||
|
||||
export type ConnectionMappingBootstrap = MetabaseMappingBootstrap | LookerMappingBootstrap | LookmlMappingBootstrap;
|
||||
|
||||
type MappingConnectionInput = Record<string, unknown> & {
|
||||
driver?: unknown;
|
||||
mappings?: unknown;
|
||||
};
|
||||
|
||||
function recordValue(value: unknown): Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value) ? (value as Record<string, unknown>) : {};
|
||||
}
|
||||
|
||||
function assertPositiveIntegerKeys(field: string, record: Record<string, unknown>): void {
|
||||
for (const key of Object.keys(record)) {
|
||||
if (!/^[1-9]\d*$/.test(key)) {
|
||||
throw new Error(`${field} key "${key}" must be a positive integer string`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function driverOf(connection: MappingConnectionInput): string {
|
||||
return String(connection.driver ?? '').toLowerCase();
|
||||
}
|
||||
|
||||
export function parseMetabaseMappingBootstrap(
|
||||
connectionId: string,
|
||||
connection: MappingConnectionInput,
|
||||
): MetabaseMappingBootstrap {
|
||||
const rawMappings = recordValue(connection.mappings);
|
||||
assertPositiveIntegerKeys('databaseMappings', recordValue(rawMappings.databaseMappings));
|
||||
assertPositiveIntegerKeys('syncEnabled', recordValue(rawMappings.syncEnabled));
|
||||
const parsed = metabaseMappingsSchema.parse(rawMappings);
|
||||
return {
|
||||
adapter: 'metabase',
|
||||
connectionId,
|
||||
databaseMappings: parsed.databaseMappings,
|
||||
syncEnabled: parsed.syncEnabled,
|
||||
syncMode: parsed.syncMode,
|
||||
selections: parsed.selections,
|
||||
defaultTagNames: parsed.defaultTagNames,
|
||||
};
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function parseLookerMappingBootstrap(
|
||||
connectionId: string,
|
||||
connection: MappingConnectionInput,
|
||||
): LookerMappingBootstrap {
|
||||
const parsed = lookerMappingsSchema.parse(recordValue(connection.mappings));
|
||||
return {
|
||||
adapter: 'looker',
|
||||
connectionId,
|
||||
connectionMappings: parsed.connectionMappings,
|
||||
};
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export function parseLookmlMappingBootstrap(
|
||||
connectionId: string,
|
||||
connection: MappingConnectionInput,
|
||||
): LookmlMappingBootstrap {
|
||||
const parsed = lookmlMappingsSchema.parse(recordValue(connection.mappings));
|
||||
return {
|
||||
adapter: 'lookml',
|
||||
connectionId,
|
||||
expectedLookerConnectionName: parsed.expectedLookerConnectionName,
|
||||
};
|
||||
}
|
||||
|
||||
export function parseConnectionMappingBootstrap(
|
||||
connectionId: string,
|
||||
connection: MappingConnectionInput,
|
||||
): ConnectionMappingBootstrap | null {
|
||||
if (!connection.mappings || typeof connection.mappings !== 'object' || Array.isArray(connection.mappings)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const driver = driverOf(connection);
|
||||
if (driver === 'metabase') {
|
||||
return parseMetabaseMappingBootstrap(connectionId, connection);
|
||||
}
|
||||
if (driver === 'looker') {
|
||||
return parseLookerMappingBootstrap(connectionId, connection);
|
||||
}
|
||||
if (driver === 'lookml') {
|
||||
return parseLookmlMappingBootstrap(connectionId, connection);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
73
packages/cli/src/context/project/project.test.ts
Normal file
73
packages/cli/src/context/project/project.test.ts
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
import { mkdtemp, readFile, rm, stat } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { initKtxProject, loadKtxProject } from './project.js';
|
||||
|
||||
describe('KTX local project runtime', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-project-runtime-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('initializes the standalone project layout and commits it', async () => {
|
||||
const projectDir = join(tempDir, 'warehouse');
|
||||
|
||||
const result = await initKtxProject({
|
||||
projectDir,
|
||||
authorName: 'Agent',
|
||||
authorEmail: 'agent@example.com',
|
||||
});
|
||||
|
||||
expect(result.projectDir).toBe(projectDir);
|
||||
expect(result.commitHash).toMatch(/^[0-9a-f]{40}$/);
|
||||
await expect(readFile(join(projectDir, 'ktx.yaml'), 'utf-8')).resolves.not.toContain('project:');
|
||||
const gitignore = await readFile(join(projectDir, '.ktx/.gitignore'), 'utf-8');
|
||||
expect(gitignore).toContain('cache/');
|
||||
expect(gitignore).toContain('db.sqlite');
|
||||
expect(gitignore).toContain('db.sqlite-*');
|
||||
expect(gitignore).toContain('ingest-transcripts/');
|
||||
expect(gitignore).toContain('secrets/');
|
||||
expect(gitignore).toContain('setup/');
|
||||
expect(gitignore).toContain('agents/');
|
||||
await expect(stat(join(projectDir, 'wiki/global/.gitkeep'))).resolves.toBeDefined();
|
||||
await expect(stat(join(projectDir, 'semantic-layer/.gitkeep'))).resolves.toBeDefined();
|
||||
await expect(stat(join(projectDir, '_schema/.gitkeep'))).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
await expect(stat(join(projectDir, 'raw-sources/.gitkeep'))).resolves.toBeDefined();
|
||||
await expect(stat(join(projectDir, '.git'))).resolves.toBeDefined();
|
||||
});
|
||||
|
||||
it('loads an initialized project with a working file store', async () => {
|
||||
const projectDir = join(tempDir, 'warehouse');
|
||||
await initKtxProject({ projectDir });
|
||||
|
||||
const loaded = await loadKtxProject({ projectDir });
|
||||
await loaded.fileStore.writeFile(
|
||||
'wiki/global/revenue.md',
|
||||
'# Revenue\n',
|
||||
'Agent',
|
||||
'agent@example.com',
|
||||
'Add revenue page',
|
||||
);
|
||||
|
||||
await expect(loaded.fileStore.readFile('wiki/global/revenue.md')).resolves.toMatchObject({
|
||||
content: '# Revenue\n',
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects reinitializing an existing project unless force is set', async () => {
|
||||
const projectDir = join(tempDir, 'warehouse');
|
||||
await initKtxProject({ projectDir });
|
||||
|
||||
await expect(initKtxProject({ projectDir })).rejects.toThrow('Project already contains ktx.yaml');
|
||||
|
||||
await expect(initKtxProject({ projectDir, force: true })).resolves.toMatchObject({
|
||||
configPath: join(projectDir, 'ktx.yaml'),
|
||||
});
|
||||
});
|
||||
});
|
||||
146
packages/cli/src/context/project/project.ts
Normal file
146
packages/cli/src/context/project/project.ts
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
import { promises as fs } from 'node:fs';
|
||||
import { basename, dirname, join, resolve } from 'node:path';
|
||||
import { GitService } from '../../context/core/git.service.js';
|
||||
import { type KtxCoreConfig, type KtxLogger, noopLogger } from '../../context/core/config.js';
|
||||
import type { KtxProjectConfig } from './config.js';
|
||||
import { buildDefaultKtxProjectConfig, parseKtxProjectConfig, serializeKtxProjectConfig } from './config.js';
|
||||
import { LocalGitFileStore } from './local-git-file-store.js';
|
||||
|
||||
export interface InitKtxProjectOptions {
|
||||
projectDir: string;
|
||||
force?: boolean;
|
||||
authorName?: string;
|
||||
authorEmail?: string;
|
||||
logger?: KtxLogger;
|
||||
}
|
||||
|
||||
export interface LoadKtxProjectOptions {
|
||||
projectDir: string;
|
||||
authorName?: string;
|
||||
authorEmail?: string;
|
||||
logger?: KtxLogger;
|
||||
}
|
||||
|
||||
export interface KtxLocalProject {
|
||||
projectDir: string;
|
||||
configPath: string;
|
||||
config: KtxProjectConfig;
|
||||
coreConfig: KtxCoreConfig;
|
||||
git: GitService;
|
||||
fileStore: LocalGitFileStore;
|
||||
}
|
||||
|
||||
export interface InitKtxProjectResult extends KtxLocalProject {
|
||||
commitHash: string | null;
|
||||
}
|
||||
|
||||
const TRACKED_SCAFFOLD_FILES: Array<{ path: string; content: string }> = [
|
||||
{
|
||||
path: '.ktx/.gitignore',
|
||||
content: 'cache/\ndb.sqlite\ndb.sqlite-*\ningest-transcripts/\nsecrets/\nsetup/\nagents/\n',
|
||||
},
|
||||
{ path: '.ktx/prompts/.gitkeep', content: '' },
|
||||
{ path: '.ktx/skills/.gitkeep', content: '' },
|
||||
{ path: 'wiki/global/.gitkeep', content: '' },
|
||||
{ path: 'semantic-layer/.gitkeep', content: '' },
|
||||
{ path: 'raw-sources/.gitkeep', content: '' },
|
||||
];
|
||||
|
||||
function createCoreConfig(projectDir: string, authorName: string, authorEmail: string): KtxCoreConfig {
|
||||
return {
|
||||
storage: {
|
||||
configDir: projectDir,
|
||||
homeDir: dirname(projectDir),
|
||||
worktreesDir: join(projectDir, '.ktx/worktrees'),
|
||||
},
|
||||
git: {
|
||||
userName: authorName,
|
||||
userEmail: authorEmail,
|
||||
bootstrapMessage: 'Initialize ktx project repository',
|
||||
bootstrapAuthor: authorName,
|
||||
bootstrapAuthorEmail: authorEmail,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function fileExists(path: string): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(path);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function writeProjectFile(projectDir: string, relativePath: string, content: string): Promise<void> {
|
||||
const absolutePath = join(projectDir, relativePath);
|
||||
await fs.mkdir(dirname(absolutePath), { recursive: true });
|
||||
await fs.writeFile(absolutePath, content, 'utf-8');
|
||||
}
|
||||
|
||||
async function createRuntime(
|
||||
projectDir: string,
|
||||
config: KtxProjectConfig,
|
||||
authorName: string,
|
||||
authorEmail: string,
|
||||
logger: KtxLogger,
|
||||
): Promise<KtxLocalProject> {
|
||||
const coreConfig = createCoreConfig(projectDir, authorName, authorEmail);
|
||||
const git = new GitService(coreConfig, logger);
|
||||
await git.onModuleInit();
|
||||
|
||||
return {
|
||||
projectDir,
|
||||
configPath: join(projectDir, 'ktx.yaml'),
|
||||
config,
|
||||
coreConfig,
|
||||
git,
|
||||
fileStore: new LocalGitFileStore({ rootDir: projectDir, git }),
|
||||
};
|
||||
}
|
||||
|
||||
export async function initKtxProject(options: InitKtxProjectOptions): Promise<InitKtxProjectResult> {
|
||||
const projectDir = resolve(options.projectDir);
|
||||
const projectName = basename(projectDir) || 'ktx-project';
|
||||
const authorName = options.authorName ?? 'ktx';
|
||||
const authorEmail = options.authorEmail ?? 'ktx@example.com';
|
||||
const logger = options.logger ?? noopLogger;
|
||||
const configPath = join(projectDir, 'ktx.yaml');
|
||||
|
||||
await fs.mkdir(projectDir, { recursive: true });
|
||||
if (!options.force && (await fileExists(configPath))) {
|
||||
throw new Error(`Project already contains ktx.yaml: ${configPath}`);
|
||||
}
|
||||
|
||||
const config = buildDefaultKtxProjectConfig();
|
||||
const runtime = await createRuntime(projectDir, config, authorName, authorEmail, logger);
|
||||
|
||||
await writeProjectFile(projectDir, 'ktx.yaml', serializeKtxProjectConfig(config));
|
||||
await fs.mkdir(join(projectDir, '.ktx/cache'), { recursive: true });
|
||||
for (const file of TRACKED_SCAFFOLD_FILES) {
|
||||
await writeProjectFile(projectDir, file.path, file.content);
|
||||
}
|
||||
|
||||
const commit = await runtime.git.commitFiles(
|
||||
['ktx.yaml', ...TRACKED_SCAFFOLD_FILES.map((file) => file.path)],
|
||||
`Initialize KTX project: ${projectName}`,
|
||||
authorName,
|
||||
authorEmail,
|
||||
);
|
||||
|
||||
return {
|
||||
...runtime,
|
||||
commitHash: commit.commitHash,
|
||||
};
|
||||
}
|
||||
|
||||
export async function loadKtxProject(options: LoadKtxProjectOptions): Promise<KtxLocalProject> {
|
||||
const projectDir = resolve(options.projectDir);
|
||||
const authorName = options.authorName ?? 'ktx';
|
||||
const authorEmail = options.authorEmail ?? 'ktx@example.com';
|
||||
const logger = options.logger ?? noopLogger;
|
||||
const configPath = join(projectDir, 'ktx.yaml');
|
||||
const raw = await fs.readFile(configPath, 'utf-8');
|
||||
const config = parseKtxProjectConfig(raw);
|
||||
return createRuntime(projectDir, config, authorName, authorEmail, logger);
|
||||
}
|
||||
58
packages/cli/src/context/project/setup-config.test.ts
Normal file
58
packages/cli/src/context/project/setup-config.test.ts
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import { mkdtemp, readFile, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
import { buildDefaultKtxProjectConfig } from './config.js';
|
||||
import {
|
||||
markKtxSetupStateStepComplete,
|
||||
mergeKtxSetupGitignoreEntries,
|
||||
readKtxSetupState,
|
||||
setKtxSetupDatabaseConnectionIds,
|
||||
} from './setup-config.js';
|
||||
|
||||
describe('KTX setup config helpers', () => {
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(join(tmpdir(), 'ktx-setup-state-'));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('marks setup steps complete in local state without duplicating existing state', async () => {
|
||||
await markKtxSetupStateStepComplete(tempDir, 'project');
|
||||
await markKtxSetupStateStepComplete(tempDir, 'project');
|
||||
await markKtxSetupStateStepComplete(tempDir, 'llm');
|
||||
await markKtxSetupStateStepComplete(tempDir, 'runtime');
|
||||
await markKtxSetupStateStepComplete(tempDir, 'context');
|
||||
|
||||
expect(await readKtxSetupState(tempDir)).toEqual({
|
||||
completed_steps: ['project', 'llm', 'runtime', 'context'],
|
||||
});
|
||||
await expect(readFile(join(tempDir, '.ktx', 'setup', 'state.json'), 'utf-8')).resolves.toBe(
|
||||
`${JSON.stringify({ completed_steps: ['project', 'llm', 'runtime', 'context'] }, null, 2)}\n`,
|
||||
);
|
||||
});
|
||||
|
||||
it('sets setup database connection ids without duplicates', () => {
|
||||
const config = buildDefaultKtxProjectConfig();
|
||||
|
||||
const withDatabases = setKtxSetupDatabaseConnectionIds(config, ['warehouse', 'analytics', 'warehouse']);
|
||||
|
||||
expect(withDatabases.setup).toEqual({
|
||||
database_connection_ids: ['warehouse', 'analytics'],
|
||||
});
|
||||
expect(config.setup).toBeUndefined();
|
||||
});
|
||||
|
||||
it('merges setup-local gitignore entries without removing existing lines', () => {
|
||||
expect(mergeKtxSetupGitignoreEntries('cache/\ndb.sqlite\n')).toBe(
|
||||
['cache/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'secrets/', 'setup/', 'agents/', ''].join('\n'),
|
||||
);
|
||||
expect(mergeKtxSetupGitignoreEntries('cache/\nsecrets/\n')).toBe(
|
||||
['cache/', 'secrets/', 'db.sqlite', 'db.sqlite-*', 'ingest-transcripts/', 'setup/', 'agents/', ''].join('\n'),
|
||||
);
|
||||
});
|
||||
});
|
||||
104
packages/cli/src/context/project/setup-config.ts
Normal file
104
packages/cli/src/context/project/setup-config.ts
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import type { KtxProjectConfig } from './config.js';
|
||||
|
||||
const KTX_SETUP_STEPS = [
|
||||
'project',
|
||||
'llm',
|
||||
'embeddings',
|
||||
'databases',
|
||||
'sources',
|
||||
'runtime',
|
||||
'context',
|
||||
'agents',
|
||||
] as const;
|
||||
|
||||
export type KtxSetupStep = (typeof KTX_SETUP_STEPS)[number];
|
||||
|
||||
export interface KtxSetupState {
|
||||
completed_steps: KtxSetupStep[];
|
||||
}
|
||||
|
||||
const SETUP_GITIGNORE_ENTRIES = [
|
||||
'cache/',
|
||||
'db.sqlite',
|
||||
'db.sqlite-*',
|
||||
'ingest-transcripts/',
|
||||
'secrets/',
|
||||
'setup/',
|
||||
'agents/',
|
||||
] as const;
|
||||
|
||||
function isKtxSetupStep(value: unknown): value is KtxSetupStep {
|
||||
return typeof value === 'string' && (KTX_SETUP_STEPS as readonly string[]).includes(value);
|
||||
}
|
||||
|
||||
function uniqueSetupSteps(steps: unknown): KtxSetupStep[] {
|
||||
if (!Array.isArray(steps)) {
|
||||
return [];
|
||||
}
|
||||
return [...new Set(steps.filter(isKtxSetupStep))];
|
||||
}
|
||||
|
||||
function ktxSetupStatePath(projectDir: string): string {
|
||||
return join(projectDir, '.ktx', 'setup', 'state.json');
|
||||
}
|
||||
|
||||
export async function readKtxSetupState(projectDir: string): Promise<KtxSetupState> {
|
||||
try {
|
||||
const parsed = JSON.parse(await readFile(ktxSetupStatePath(projectDir), 'utf-8')) as Record<string, unknown>;
|
||||
return { completed_steps: uniqueSetupSteps(parsed.completed_steps) };
|
||||
} catch (error) {
|
||||
if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT') {
|
||||
return { completed_steps: [] };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/** @internal */
|
||||
export async function writeKtxSetupState(projectDir: string, state: KtxSetupState): Promise<void> {
|
||||
await mkdir(join(projectDir, '.ktx', 'setup'), { recursive: true });
|
||||
await writeFile(
|
||||
ktxSetupStatePath(projectDir),
|
||||
`${JSON.stringify({ completed_steps: uniqueSetupSteps(state.completed_steps) }, null, 2)}\n`,
|
||||
'utf-8',
|
||||
);
|
||||
}
|
||||
|
||||
export async function markKtxSetupStateStepComplete(projectDir: string, step: KtxSetupStep): Promise<KtxSetupState> {
|
||||
const state = await readKtxSetupState(projectDir);
|
||||
const completedSteps = state.completed_steps.includes(step) ? state.completed_steps : [...state.completed_steps, step];
|
||||
const nextState = { completed_steps: completedSteps };
|
||||
await writeKtxSetupState(projectDir, nextState);
|
||||
return nextState;
|
||||
}
|
||||
|
||||
export function setKtxSetupDatabaseConnectionIds(
|
||||
config: KtxProjectConfig,
|
||||
connectionIds: string[],
|
||||
): KtxProjectConfig {
|
||||
const uniqueConnectionIds = [...new Set(connectionIds.filter((connectionId) => connectionId.trim().length > 0))];
|
||||
|
||||
return {
|
||||
...config,
|
||||
setup: {
|
||||
database_connection_ids: uniqueConnectionIds,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function mergeKtxSetupGitignoreEntries(content: string): string {
|
||||
const lines = content
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trimEnd())
|
||||
.filter((line, index, all) => line.length > 0 || index < all.length - 1);
|
||||
const existing = new Set(lines);
|
||||
for (const entry of SETUP_GITIGNORE_ENTRIES) {
|
||||
if (!existing.has(entry)) {
|
||||
lines.push(entry);
|
||||
existing.add(entry);
|
||||
}
|
||||
}
|
||||
return `${lines.join('\n')}\n`;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue