ktx/packages/cli/src/context/project/config.ts

import { KTX_MODEL_ROLES } from '../../llm/types.js';
import YAML from 'yaml';
import * as z from 'zod';
import { connectionConfigSchema } from './driver-schemas.js';

const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway', 'claude-code', 'codex'] as const;
const KTX_EMBEDDING_BACKENDS = ['none', 'openai', 'sentence-transformers'] as const;
const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'] as const;
const KTX_STORAGE_STATES = ['sqlite', 'postgres'] as const;
const KTX_SEARCH_BACKENDS = ['sqlite-fts5', 'postgres-hybrid'] as const;

const apiCredentialsSchema = z
  .strictObject({
    api_key: z.string().min(1).optional().describe('API key for the provider. Read from this value or the provider-specific environment variable.'),
    base_url: z.string().min(1).optional().describe('Override the provider\'s default API base URL (e.g. a proxy or self-hosted gateway).'),
  })
  .describe('API credentials block: optional key and base URL for an LLM or embedding provider.');

const vertexProviderSchema = z
  .strictObject({
    project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'),
    location: z.string().min(1).describe('Vertex AI region (e.g. "us-east5"). Required whenever the vertex provider block is present.'),
  })
  .describe('Google Vertex AI provider configuration.');

const sentenceTransformersSchema = z
  .strictObject({
    base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) when the `ktx` CLI is expected to start and manage a local daemon for this project; programmatic consumers must populate it explicitly.'),
    pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'),
  })
  .describe('Sentence-transformers embedding server configuration.');

const llmProviderSchema = z
  .strictObject({
    backend: z
      .enum(KTX_LLM_BACKENDS)
      .default('none')
      .describe(
        'LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block; "claude-code" uses the local Claude Code session; "codex" uses the local Codex session.',
      ),
    vertex: vertexProviderSchema.optional().describe('Vertex AI credentials, used when backend is "vertex".'),
    anthropic: apiCredentialsSchema.optional().describe('Anthropic API credentials, used when backend is "anthropic".'),
    gateway: apiCredentialsSchema.optional().describe('AI Gateway credentials, used when backend is "gateway".'),
  })
  .describe('LLM provider selection and credentials.');

const promptCachingSchema = z
  .strictObject({
    enabled: z.boolean().optional().describe('Master switch for Anthropic-style prompt caching. When omitted, the backend\'s default applies.'),
    systemTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the system prompt segment ("5m" or "1h").'),
    toolsTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the tools/schema segment ("5m" or "1h").'),
    historyTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for conversation-history cache breakpoints ("5m" or "1h").'),
    vertexFallbackTo5m: z.boolean().optional().describe('When true, transparently downgrade 1h TTLs to 5m on Vertex, which does not support 1h caching.'),
  })
  .describe('Prompt-caching tunables for Anthropic-compatible providers.');

const llmSchema = z
  .strictObject({
    provider: llmProviderSchema.prefault({}).describe('LLM provider backend and credentials.'),
    models: z
      .partialRecord(z.enum(KTX_MODEL_ROLES), z.string().min(1))
      .default({})
      .describe('Per-role model overrides keyed by KTX model role (e.g. "default", "triage"). Values are provider-specific model identifiers.'),
    promptCaching: promptCachingSchema.optional().describe('Optional prompt-caching tunables.'),
  })
  .describe('LLM provider, per-role model overrides, and prompt-caching tunables.');

const embeddingSchema = z
  .strictObject({
    backend: z
      .enum(KTX_EMBEDDING_BACKENDS)
      .default('none')
      .describe('Embedding backend. "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
    model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small").'),
    dimensions: z
      .int()
      .positive()
      .default(8)
      .describe(
        'Embedding vector dimensionality. The default value 8 is a placeholder that is only valid alongside backend: none; ' +
          'before switching backend to openai/sentence-transformers, set this explicitly to match the chosen model ' +
          '(e.g. 384 for all-MiniLM-L6-v2, 1536 for text-embedding-3-small).',
      ),
    openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
    sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
    batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'),
  })
  .describe('Embedding backend, model, and provider credentials.');

const workUnitsSchema = z
  .strictObject({
    stepBudget: z.int().positive().default(40).describe('Maximum number of agent steps allowed per work unit before it is force-terminated.'),
    maxConcurrency: z.int().positive().default(1).describe('Maximum number of work units run concurrently during ingest.'),
    failureMode: z
      .enum(KTX_WORK_UNIT_FAILURE_MODES)
      .default('continue')
      .describe('Behavior when a work unit fails: "abort" stops the whole ingest run; "continue" records the failure and keeps going.'),
  })
  .describe('Concurrency and failure handling for ingest work units.');

const ingestSchema = z
  .strictObject({
    adapters: z
      .array(z.string().min(1))
      .default([])
      .describe('Ingest adapter identifiers to run (e.g. "metabase", "looker", "historic-sql"). Empty array means no adapters are run.'),
    embeddings: embeddingSchema
      .prefault({ backend: 'none' })
      .describe('Embedding configuration used when ingest adapters need to embed documents.'),
    workUnits: workUnitsSchema.prefault({}).describe('Concurrency and failure handling for ingest work units.'),
    profile: z
      .union([z.boolean(), z.literal('json')])
      .default(false)
      .describe(
        'Print a timing breakdown to stderr at the end of each ingest run. `true` prints a human table; `"json"` prints the raw structured profile for coding agents; `false` disables it. Equivalent to the KTX_PROFILE_INGEST environment variable (`1`/`true`/`json`).',
      ),
  })
  .describe('Ingest pipeline configuration: adapters, embeddings, and work-unit policy.');

const scanEnrichmentSchema = z
  .strictObject({
    mode: z
      .enum(KTX_ENRICHMENT_MODES)
      .default('none')
      .describe('Column/table enrichment mode. "none" disables enrichment; "deterministic" uses local heuristics; "llm" calls the configured LLM provider.'),
    embeddings: embeddingSchema.optional().describe('Optional embedding override for enrichment-time vectorization. Falls back to ingest.embeddings when omitted.'),
  })
  .describe('Schema-scan enrichment: how columns and tables are described.');

const scanRelationshipsSchema = z
  .strictObject({
    enabled: z.boolean().default(true).describe('Master switch for relationship discovery during scan.'),
    llmProposals: z.boolean().default(true).describe('When true, propose relationships using the configured LLM in addition to deterministic candidates.'),
    validationRequiredForManifest: z
      .boolean()
      .default(true)
      .describe('When true, only relationships that pass database-side validation are written to the manifest.'),
    acceptThreshold: z
      .number()
      .min(0)
      .max(1)
      .default(0.85)
      .describe('Confidence score (0–1) at or above which an LLM-proposed relationship is auto-accepted into the manifest.'),
    reviewThreshold: z
      .number()
      .min(0)
      .max(1)
      .default(0.55)
      .describe('Confidence score (0–1) at or above which a proposal is surfaced for human review (but not auto-accepted).'),
    maxLlmTablesPerBatch: z
      .int()
      .positive()
      .default(40)
      .describe('Maximum number of tables included in a single LLM relationship-proposal batch.'),
    maxCandidatesPerColumn: z
      .int()
      .positive()
      .default(25)
      .describe('Maximum number of candidate join partners considered per column during relationship discovery.'),
    profileSampleRows: z.int().positive().default(10000).describe('Number of rows sampled per table when profiling values for relationship inference.'),
    profileConcurrency: z
      .int()
      .positive()
      .default(4)
      .describe('Parallel relationship-profile queries run against the database during scan.'),
    validationConcurrency: z.int().positive().default(4).describe('Number of relationship validation queries run in parallel against the database.'),
    validationBudget: z
      .union([z.literal('all'), z.int().nonnegative()])
      .optional()
      .describe('Cap on validation queries per scan run. Use "all" for unlimited, an integer for a hard cap, or omit for the runtime default.'),
  })
  .describe('Schema-scan relationship discovery and validation tunables.');

const scanSchema = z
  .strictObject({
    enrichment: scanEnrichmentSchema.prefault({}).describe('Column/table enrichment configuration.'),
    relationships: scanRelationshipsSchema.prefault({}).describe('Relationship discovery and validation configuration.'),
  })
  .describe('Schema-scan configuration: enrichment and relationship discovery.');

const setupSchema = z
  .strictObject({
    database_connection_ids: z
      .array(z.string().min(1))
      .default([])
      .describe('Connection IDs (keys of the top-level `connections` map) that the setup wizard treats as the project\'s primary databases.'),
  })
  .describe('Setup-wizard state captured during `ktx setup`.');

const storageGitSchema = z
  .strictObject({
    auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits state changes to the local Git-backed store.'),
    author: z
      .string()
      .min(1)
      .default('ktx <ktx@example.com>')
      .describe('Git author identity used for auto-commits, in standard "Name <email>" form.'),
  })
  .describe('Git-backed storage commit policy.');

const storageSchema = z
  .strictObject({
    state: z
      .enum(KTX_STORAGE_STATES)
      .default('sqlite')
      .describe('Backend for KTX state storage. "sqlite" uses .ktx/db.sqlite; "postgres" expects a configured Postgres connection.'),
    search: z
      .enum(KTX_SEARCH_BACKENDS)
      .default('sqlite-fts5')
      .describe('Backend for search indexes. "sqlite-fts5" uses SQLite FTS5; "postgres-hybrid" uses Postgres lexical + vector hybrid search.'),
    git: storageGitSchema.prefault({}).describe('Git-backed storage commit policy.'),
  })
  .describe('Storage backends and commit policy for KTX state and search indexes.');

const connectionSchema = connectionConfigSchema;

const agentSchema = z
  .strictObject({
    run_research: z
      .strictObject({
        enabled: z.boolean().default(false).describe('Master switch for the research agent.'),
        max_iterations: z
          .number()
          .int()
          .nonnegative()
          .default(20)
          .describe('Maximum number of tool-call iterations the research agent may take per run.'),
        default_toolset: z
          .array(z.string().min(1))
          .default(['sl_query', 'wiki_search', 'sl_read_source'])
          .describe('Default list of tool identifiers exposed to the research agent.'),
      })
      .prefault({})
      .describe('Research-agent configuration.'),
  })
  .describe('Agent feature configuration.');

const memorySchema = z
  .strictObject({
    auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits memory updates to the Git-backed store.'),
  })
  .describe('Memory subsystem configuration.');

const ktxProjectConfigSchema = z
  .strictObject({
    setup: setupSchema.optional().describe('Setup-wizard state. Written by `ktx setup`; may be omitted.'),
    connections: z
      .record(z.string(), connectionSchema)
      .default({})
      .describe('Map of connection ID to connector configuration. Keys are user-chosen names referenced elsewhere in the config.'),
    storage: storageSchema.prefault({}).describe('Storage backends and commit policy for KTX state and search indexes.'),
    llm: llmSchema.prefault({}).describe('LLM provider, per-role model overrides, and prompt-caching tunables.'),
    ingest: ingestSchema.prefault({}).describe('Ingest pipeline configuration.'),
    agent: agentSchema.prefault({}).describe('Agent feature configuration.'),
    memory: memorySchema.prefault({}).describe('Memory subsystem configuration.'),
    scan: scanSchema.prefault({}).describe('Schema-scan configuration: enrichment and relationship discovery.'),
  })
  .describe('Configuration schema for KTX project files (ktx.yaml).');

export type KtxProjectConfig = z.infer<typeof ktxProjectConfigSchema>;
export type KtxProjectLlmConfig = z.infer<typeof llmSchema>;
export type KtxProjectEmbeddingConfig = z.infer<typeof embeddingSchema>;
export type KtxScanEnrichmentConfig = z.infer<typeof scanEnrichmentSchema>;
export type KtxScanRelationshipConfig = z.infer<typeof scanRelationshipsSchema>;
export type KtxProjectConnectionConfig = z.infer<typeof connectionSchema>;

export interface KtxConfigIssue {
  path: string;
  message: string;
  fix?: string;
}

export interface KtxConfigValidation {
  ok: boolean;
  issues: KtxConfigIssue[];
}

function isRecord(value: unknown): value is Record<string, unknown> {
  return typeof value === 'object' && value !== null && !Array.isArray(value);
}

function dottedPath(path: ReadonlyArray<PropertyKey>): string {
  return path.map((segment) => String(segment)).join('.');
}

function valueAtPath(root: unknown, path: ReadonlyArray<PropertyKey>): unknown {
  let cursor: unknown = root;
  for (const segment of path) {
    if (cursor === null || typeof cursor !== 'object') return undefined;
    cursor = (cursor as Record<PropertyKey, unknown>)[segment];
  }
  return cursor;
}

function formatIssue(issue: z.core.$ZodIssue, input: unknown): KtxConfigIssue[] {
  const basePath = dottedPath(issue.path);

  if (issue.code === 'unrecognized_keys') {
    const keys = (issue as { keys?: readonly string[] }).keys ?? [];
    return keys.map((key) => {
      const fullPath = basePath.length > 0 ? `${basePath}.${key}` : key;
      return { path: fullPath, message: `Unsupported ${fullPath}: unknown field` };
    });
  }

  const lastSegment = issue.path[issue.path.length - 1];
  if (lastSegment === 'backend' && (issue.code === 'invalid_value' || issue.code === 'invalid_type')) {
    const value = valueAtPath(input, issue.path);
    return [{ path: basePath, message: `Unsupported ${basePath}: ${String(value)}` }];
  }

  return [{ path: basePath, message: basePath.length > 0 ? `${basePath}: ${issue.message}` : issue.message }];
}

function collectIssues(error: z.ZodError, input: unknown): KtxConfigIssue[] {
  return error.issues.flatMap((issue) => formatIssue(issue, input));
}

function formatZodError(error: z.ZodError, input: unknown): string {
  return collectIssues(error, input)
    .map((issue) => issue.message)
    .join('\n');
}

export function buildDefaultKtxProjectConfig(): KtxProjectConfig {
  return ktxProjectConfigSchema.parse({});
}

export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
  const parsed = YAML.parse(raw) as unknown;
  if (!isRecord(parsed)) {
    throw new Error('ktx.yaml must contain a YAML object');
  }
  const result = ktxProjectConfigSchema.safeParse(parsed);
  if (!result.success) {
    throw new Error(formatZodError(result.error, parsed));
  }
  return result.data;
}

export function validateKtxProjectConfig(raw: string): KtxConfigValidation {
  let parsed: unknown;
  try {
    parsed = YAML.parse(raw);
  } catch (error) {
    const message = error instanceof Error ? error.message : String(error);
    return { ok: false, issues: [{ path: '', message: `ktx.yaml parse error: ${message}` }] };
  }
  if (!isRecord(parsed)) {
    return { ok: false, issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }] };
  }
  const result = ktxProjectConfigSchema.safeParse(parsed);
  if (result.success) {
    return { ok: true, issues: [] };
  }
  return { ok: false, issues: collectIssues(result.error, parsed) };
}

export function generateKtxProjectConfigJsonSchema(): Record<string, unknown> {
  const schema = z.toJSONSchema(ktxProjectConfigSchema, {
    target: 'draft-7',
    io: 'input',
  }) as Record<string, unknown>;
  return {
    $schema: 'http://json-schema.org/draft-07/schema#',
    $id: 'https://ktx.dev/schemas/ktx-project-config.json',
    title: 'ktx.yaml',
    ...schema,
  };
}

export function serializeKtxProjectConfig(config: KtxProjectConfig): string {
  const serializedConfig =
    config.ingest.adapters.length === 0
      ? {
          ...config,
          ingest: {
            embeddings: config.ingest.embeddings,
            workUnits: config.ingest.workUnits,
          },
        }
      : config;
  return `${YAML.stringify(serializedConfig, { indent: 2, lineWidth: 0 }).trimEnd()}\n`;
}
-												chore(workspace): gate dead-code with knip production mode (#196)

* refactor(workspace): relocate @ktx/llm source into packages/cli/src/llm

* refactor(workspace): rewrite @ktx/llm imports to relative paths

* refactor(workspace): fold internal packages into cli

* chore(workspace): gate dead-code with knip production mode

Turn on production-mode knip plus an autofix run in pre-commit and the
`pnpm dead-code` script, document the `/** @internal */` convention for
test-only exports in AGENTS.md, annotate test-only exports across the
CLI with that JSDoc, and drop dead exports/wrappers the new gate
surfaced (e.g. `cli-project.ts`, `lookerRuntimeSourceToFileAdapterSource`,
`createLocalScanEnrichmentProvidersFromConfig`,
`PGLITE_OWNER_PROCESS_BACKEND_CAPABILITIES`, stale type re-exports).
Replace the loose `ignoreIssues` allowlist in `knip.json` with explicit
production entries so cross-package barrel leaks are caught.

* refactor(cli): delete internal barrel index.ts files

The 34 `index.ts` re-export barrels inside `packages/cli/src/` were
holdovers from the pre-fold multi-workspace structure. Post-fold-in they
served no production purpose: external consumers go through the single
package main entry, and in-repo callers mostly imported through them
only because the path was short. Internally, knip flagged most barrel
re-exports as production-dead (only reached via tests).

This change:
- Deletes every internal barrel except `packages/cli/src/index.ts`
  (the published package entry).
- Rewrites ~270 source/test files to import each name directly from
  the file that defines it.
- Moves `tools/warehouse-verification/index.ts` to
  `create-warehouse-verification-tools.ts` (the function it defined
  locally) and updates its single consumer.
- Renames `search/backend-conformance.ts` → `.test-utils.ts` to match
  the existing test-helper file convention.
- Deletes 13 dead test-only chains (dbt-descriptions/*,
  live-database/extracted-schema, live-database/structural-sync,
  relationship-* feedback/review chain) plus their tests and a
  cascading orphan integration test.
- Updates test mocks that pointed at deleted barrel paths
  (notion-client, connector barrels in scan/local-scan-connectors
  tests) to mock the source files instead.
- Points the maintainer benchmark script
  (`scripts/relationship-benchmark-report.mjs`) at source files
  instead of `dist/context/scan/index.js`.
- Drops the barrel `!` entries from `knip.json`; adds explicit
  production entries only for the benchmark code reached via dist by
  the maintainer script.

Net: 413 files changed, ~1.2k insertions, ~9.4k deletions.

`pnpm run dead-code` (Biome + knip default + knip production) and
`pnpm run type-check` are clean; 2277 tests pass.

* refactor(workspace): rename @ktx/cli to @kaelio/ktx and pack it directly

Promote the CLI workspace package to the public name `@kaelio/ktx` and
drop the separate `scripts/build-public-npm-package.mjs` wrapper. The
CLI package is now publishable in place (`publishConfig.access: public`,
`provenance: true`), so artifact packing uses `pnpm pack` against
`packages/cli/` instead of assembling a parallel package tree.

Updates all workspace filter invocations, docs, tests, and release
readiness checks to reference the new package name, and folds the
tarball-name helper into `scripts/public-npm-release-metadata.mjs`.

* docs: align "agent clients" and "data agents" terminology

Replace "client agents" with "agent clients" and "database agents" with
"data agents" across AGENTS.md, README.md, the docs-site copy, and the
matching setup-agents test description, matching the canonical
vocabulary in docs/terminology.md.

Also moves packages/cli/tsconfig.json's tsBuildInfoFile from
node_modules/.cache/ to dist/.tsbuildinfo so incremental builds survive
node_modules reinstalls.

* refactor(release): single source of truth for package version

Make packages/cli/package.json the single source of truth for the
@kaelio/ktx version. publicNpmPackageVersion() now reads it directly,
so artifact filenames, release-readiness checks, and the Python wheel
version all derive from one field. The duplicate
release-policy.json.publicNpmPackageVersion is removed.

Previously the two fields could drift: tarballs were named
kaelio-ktx-0.4.1.tgz while internally containing
@kaelio/ktx@0.0.0-private.

- update-public-release-version.mjs rewrites both Python pyproject.toml
  files (ktx-daemon, ktx-sl) alongside the npm package.jsons,
  normalizing the version for PEP 440 (e.g. 0.1.0-rc.2 -> 0.1.0rc2).
- semantic-release-config.cjs adds the two pyproject.toml files to
  @semantic-release/git assets so the release commit back to main
  carries every version source in lockstep.
- The six "?? '0.0.0-private'" fallback literals across the CLI are
  replaced with "?? getKtxCliPackageInfo().version", and
  createDefaultKtxMcpServer makes its version arg required.
- docs/release.md describes the actual commit-back model: the dev tree
  always reflects the most recent release; no sentinel pin to
  maintain.

Verified: pnpm run artifacts:build now produces
kaelio-ktx-0.4.1.tgz and kaelio_ktx-0.4.1-py3-none-any.whl with
@kaelio/ktx@0.4.1 inside. Full type-check, dead-code, and
2287 vitests + 173 script tests pass.

* refactor(cli): inject embedding provider resolution and detect sentence-transformers runtime

Make resolveProjectEmbeddingProvider and runtimeIo injectable in ingest and
scan command entrypoints so tests can stub them, and teach
resolvePublicIngestRuntimeRequirements to flag the local-embeddings runtime
feature when ktx.yaml selects sentence-transformers.

* chore(cli): mark buildLocalStatsStatus and LocalStatsStatus as @internal

Both symbols are consumed only by status-project.test.ts. Annotating with
/** @internal */ keeps knip's production-mode check clean without changing
runtime behavior.

* fix(cli): use real package metadata in print-command-tree

The stubbed package name embedded a forbidden product identifier that
tripped the boundary check in CI. Read the metadata from package.json
instead — keeps the rendered tree unchanged and removes a duplicate
source of truth.

* feat(cli): show embedding coverage in `ktx status`, drop duplicate disk counts

Inline `(N embedded)` next to the Wiki scope counts and Semantic-layer
source counts, computed with `SUM(embedding_json IS NOT NULL)` over
`knowledge_pages` and `local_sl_sources`. Rename the "Knowledge" label to
"Wiki" (canonical per `docs/terminology.md`) and rename the matching
`localStats.knowledgePages` field to `localStats.wikiPages`.

Drop `wiki=N md` and `semantic-layer=N yaml` from the Disk row — those
duplicated the per-surface rows above. Disk now reports only actual byte
usage (db, cache, raw-sources). The unused `wikiGlobalMarkdownCount` /
`semanticLayerYamlCount` fields, the `isMarkdownEntry` / `isYamlEntry`
helpers, and the `filter` arg on `summarizeDir` are removed.
											
										
										
											2026-05-21 15:28:58 +02:00
+								import { KTX_MODEL_ROLES } from '../../llm/types.js';
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								import YAML from 'yaml';
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								import * as z from 'zod';
-												feat(context): add driver-discriminated connection schemas (#96)

* refactor(context): export and describe mapping shape schemas

* feat(context): add driver-schemas module with warehouse drivers

* feat(context): add metabase, looker, lookml driver schemas with mappings

* feat(context): add notion, dbt, metricflow driver schemas

* refactor(context): make connectionSchema a driver-discriminated union

* chore(context): re-export KtxConnectionConfig from project package

* docs(context): add connection driver schema plan

* chore(secrets): allowlist example credentials in driver-schemas fixtures

* test(cli): align metabase fixtures with required api_url field

The driver-discriminated union added in this branch now requires api_url
for metabase connections and a known driver for warehouses. Update slow
CLI test fixtures and assertions so they exercise the new schema:
- ingest.test-utils.ts: add api_url to the prod-metabase fixture.
- setup.test.ts: switch metabase fixture from 'url' to 'api_url'.
- local-scan-connectors.test.ts: invalid-driver/missing-driver tests now
  expect the schema error from loadKtxProject (parse-time rejection).
											
										
										
											2026-05-15 00:08:11 +02:00
+								import { connectionConfigSchema } from './driver-schemas.js';
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
-												feat: add codex llm backend

											
										
										
											2026-06-01 17:22:24 +02:00
+								const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway', 'claude-code', 'codex'] as const;
-												fix: remove deterministic embedding backend (#146)

* fix: remove deterministic embedding backend

* test: update slow tests for disabled embeddings
											
										
										
											2026-05-19 16:40:01 +02:00
+								const KTX_EMBEDDING_BACKENDS = ['none', 'openai', 'sentence-transformers'] as const;
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
 								const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
 								const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'] as const;
 								const KTX_STORAGE_STATES = ['sqlite', 'postgres'] as const;
 								const KTX_SEARCH_BACKENDS = ['sqlite-fts5', 'postgres-hybrid'] as const;
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								const apiCredentialsSchema = z
 								  .strictObject({
 								    api_key: z.string().min(1).optional().describe('API key for the provider. Read from this value or the provider-specific environment variable.'),
 								    base_url: z.string().min(1).optional().describe('Override the provider\'s default API base URL (e.g. a proxy or self-hosted gateway).'),
 								  })
 								  .describe('API credentials block: optional key and base URL for an LLM or embedding provider.');
 								const vertexProviderSchema = z
 								  .strictObject({
 								    project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'),
-												fix: surface silent failures in SL, wiki, and embedding wiring (#195)

* fix: surface silent failures in SL, wiki, and embedding wiring

- require non-empty `vertex.location` in the project schema instead of defaulting
  to an empty string with a description that promised SDK fallback the resolver
  never honored
- log YAML parse failures from `SemanticLayerService.loadSource` and
  `KnowledgeWikiService.readPage` so corrupted overlays aren't silently treated
  as "does not exist" by ingest/agent tools
- push directory-listing errors in `loadAllSources` and `listPageKeys` into the
  load-error / log path instead of returning empty success
- accept an `embeddingProvider` in `createLocalProjectMemoryIngest` and plumb the
  resolved CLI provider through `mcp-server-factory`; warn in both the memory
  and bundle runtimes when they fall back to `NoopEmbeddingPort` while the
  project config requests an active embedding backend
- clarify `embeddings.dimensions` description as a placeholder valid only with
  `backend: none`, and tighten the sentence-transformers `base_url` description
  to call out that managed-daemon resolution is CLI-only

* test: improve PR coverage
											
										
										
											2026-05-21 10:38:23 +02:00
+								    location: z.string().min(1).describe('Vertex AI region (e.g. "us-east5"). Required whenever the vertex provider block is present.'),
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								  })
 								  .describe('Google Vertex AI provider configuration.');
 								const sentenceTransformersSchema = z
 								  .strictObject({
-												fix: surface silent failures in SL, wiki, and embedding wiring (#195)

* fix: surface silent failures in SL, wiki, and embedding wiring

- require non-empty `vertex.location` in the project schema instead of defaulting
  to an empty string with a description that promised SDK fallback the resolver
  never honored
- log YAML parse failures from `SemanticLayerService.loadSource` and
  `KnowledgeWikiService.readPage` so corrupted overlays aren't silently treated
  as "does not exist" by ingest/agent tools
- push directory-listing errors in `loadAllSources` and `listPageKeys` into the
  load-error / log path instead of returning empty success
- accept an `embeddingProvider` in `createLocalProjectMemoryIngest` and plumb the
  resolved CLI provider through `mcp-server-factory`; warn in both the memory
  and bundle runtimes when they fall back to `NoopEmbeddingPort` while the
  project config requests an active embedding backend
- clarify `embeddings.dimensions` description as a placeholder valid only with
  `backend: none`, and tighten the sentence-transformers `base_url` description
  to call out that managed-daemon resolution is CLI-only

* test: improve PR coverage
											
										
										
											2026-05-21 10:38:23 +02:00
+								    base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) when the `ktx` CLI is expected to start and manage a local daemon for this project; programmatic consumers must populate it explicitly.'),
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								    pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'),
 								  })
 								  .describe('Sentence-transformers embedding server configuration.');
 								const llmProviderSchema = z
 								  .strictObject({
 								    backend: z
 								      .enum(KTX_LLM_BACKENDS)
 								      .default('none')
-												feat: add claude-code llm backend with runtime port (#115)

* docs: revise claude-code ingest backend spec

* docs: keep claude-code spec focused on ingest

* docs: expand claude-code spec to full llm parity

* Refine claude-code backend spec after adversarial review iteration 1

* Refine claude-code backend spec after adversarial review iteration 2

* Refine claude-code backend spec after adversarial review iteration 3

* feat: recognize claude-code llm backend

* feat: add ktx llm runtime port

* feat: add claude-code llm runtime

* feat: route non-agent llm calls through runtime

* feat: run ingest agents through llm runtime

* feat: support claude-code setup and status

* test: verify claude-code backend runtime

* docs: add claude-code backend v1 runtime plan

* fix: close claude-code runtime isolation checks

* fix: warn on claude-code prompt caching during setup

* chore: verify claude-code v1 closure

* docs: add claude-code backend v1 isolation closure plan

* fix: update claude-code ingest setup guidance

* docs: add claude-code backend v1 ingest guidance closure plan

* docs: align claude-code isolation spec with sdk metadata

* test: cover claude-code host discovery metadata

* fix: tolerate claude-code host discovery metadata

* docs: clarify claude-code host discovery metadata

* docs: add claude-code auth-probe isolation fix plan

* chore: prepare kaelio ktx rc1 release

* chore: add semantic release workflow

* fix: unblock ci checks

* chore(release): 0.1.0-rc.1

* feat: add Claude Code model selection to setup

* fix: keep git maintenance attached in local repos
											
										
										
											2026-05-16 12:06:34 +02:00
+								      .describe(
-												feat: add codex llm backend

											
										
										
											2026-06-01 17:22:24 +02:00
+								        'LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block; "claude-code" uses the local Claude Code session; "codex" uses the local Codex session.',
-												feat: add claude-code llm backend with runtime port (#115)

* docs: revise claude-code ingest backend spec

* docs: keep claude-code spec focused on ingest

* docs: expand claude-code spec to full llm parity

* Refine claude-code backend spec after adversarial review iteration 1

* Refine claude-code backend spec after adversarial review iteration 2

* Refine claude-code backend spec after adversarial review iteration 3

* feat: recognize claude-code llm backend

* feat: add ktx llm runtime port

* feat: add claude-code llm runtime

* feat: route non-agent llm calls through runtime

* feat: run ingest agents through llm runtime

* feat: support claude-code setup and status

* test: verify claude-code backend runtime

* docs: add claude-code backend v1 runtime plan

* fix: close claude-code runtime isolation checks

* fix: warn on claude-code prompt caching during setup

* chore: verify claude-code v1 closure

* docs: add claude-code backend v1 isolation closure plan

* fix: update claude-code ingest setup guidance

* docs: add claude-code backend v1 ingest guidance closure plan

* docs: align claude-code isolation spec with sdk metadata

* test: cover claude-code host discovery metadata

* fix: tolerate claude-code host discovery metadata

* docs: clarify claude-code host discovery metadata

* docs: add claude-code auth-probe isolation fix plan

* chore: prepare kaelio ktx rc1 release

* chore: add semantic release workflow

* fix: unblock ci checks

* chore(release): 0.1.0-rc.1

* feat: add Claude Code model selection to setup

* fix: keep git maintenance attached in local repos
											
										
										
											2026-05-16 12:06:34 +02:00
+								      ),
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								    vertex: vertexProviderSchema.optional().describe('Vertex AI credentials, used when backend is "vertex".'),
 								    anthropic: apiCredentialsSchema.optional().describe('Anthropic API credentials, used when backend is "anthropic".'),
 								    gateway: apiCredentialsSchema.optional().describe('AI Gateway credentials, used when backend is "gateway".'),
 								  })
 								  .describe('LLM provider selection and credentials.');
 								const promptCachingSchema = z
 								  .strictObject({
 								    enabled: z.boolean().optional().describe('Master switch for Anthropic-style prompt caching. When omitted, the backend\'s default applies.'),
 								    systemTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the system prompt segment ("5m" or "1h").'),
 								    toolsTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the tools/schema segment ("5m" or "1h").'),
 								    historyTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for conversation-history cache breakpoints ("5m" or "1h").'),
 								    vertexFallbackTo5m: z.boolean().optional().describe('When true, transparently downgrade 1h TTLs to 5m on Vertex, which does not support 1h caching.'),
 								  })
 								  .describe('Prompt-caching tunables for Anthropic-compatible providers.');
 								const llmSchema = z
 								  .strictObject({
 								    provider: llmProviderSchema.prefault({}).describe('LLM provider backend and credentials.'),
 								    models: z
 								      .partialRecord(z.enum(KTX_MODEL_ROLES), z.string().min(1))
 								      .default({})
 								      .describe('Per-role model overrides keyed by KTX model role (e.g. "default", "triage"). Values are provider-specific model identifiers.'),
 								    promptCaching: promptCachingSchema.optional().describe('Optional prompt-caching tunables.'),
 								  })
 								  .describe('LLM provider, per-role model overrides, and prompt-caching tunables.');
 								const embeddingSchema = z
 								  .strictObject({
 								    backend: z
 								      .enum(KTX_EMBEDDING_BACKENDS)
-												fix: remove deterministic embedding backend (#146)

* fix: remove deterministic embedding backend

* test: update slow tests for disabled embeddings
											
										
										
											2026-05-19 16:40:01 +02:00
+								      .default('none')
 								      .describe('Embedding backend. "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
 								    model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small").'),
-												fix: surface silent failures in SL, wiki, and embedding wiring (#195)

* fix: surface silent failures in SL, wiki, and embedding wiring

- require non-empty `vertex.location` in the project schema instead of defaulting
  to an empty string with a description that promised SDK fallback the resolver
  never honored
- log YAML parse failures from `SemanticLayerService.loadSource` and
  `KnowledgeWikiService.readPage` so corrupted overlays aren't silently treated
  as "does not exist" by ingest/agent tools
- push directory-listing errors in `loadAllSources` and `listPageKeys` into the
  load-error / log path instead of returning empty success
- accept an `embeddingProvider` in `createLocalProjectMemoryIngest` and plumb the
  resolved CLI provider through `mcp-server-factory`; warn in both the memory
  and bundle runtimes when they fall back to `NoopEmbeddingPort` while the
  project config requests an active embedding backend
- clarify `embeddings.dimensions` description as a placeholder valid only with
  `backend: none`, and tighten the sentence-transformers `base_url` description
  to call out that managed-daemon resolution is CLI-only

* test: improve PR coverage
											
										
										
											2026-05-21 10:38:23 +02:00
+								    dimensions: z
 								      .int()
 								      .positive()
 								      .default(8)
 								      .describe(
 								        'Embedding vector dimensionality. The default value 8 is a placeholder that is only valid alongside backend: none; ' +
 								          'before switching backend to openai/sentence-transformers, set this explicitly to match the chosen model ' +
 								          '(e.g. 384 for all-MiniLM-L6-v2, 1536 for text-embedding-3-small).',
 								      ),
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								    openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
 								    sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
 								    batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'),
 								  })
 								  .describe('Embedding backend, model, and provider credentials.');
 								const workUnitsSchema = z
 								  .strictObject({
 								    stepBudget: z.int().positive().default(40).describe('Maximum number of agent steps allowed per work unit before it is force-terminated.'),
 								    maxConcurrency: z.int().positive().default(1).describe('Maximum number of work units run concurrently during ingest.'),
 								    failureMode: z
 								      .enum(KTX_WORK_UNIT_FAILURE_MODES)
 								      .default('continue')
 								      .describe('Behavior when a work unit fails: "abort" stops the whole ingest run; "continue" records the failure and keeps going.'),
 								  })
 								  .describe('Concurrency and failure handling for ingest work units.');
 								const ingestSchema = z
 								  .strictObject({
 								    adapters: z
 								      .array(z.string().min(1))
 								      .default([])
 								      .describe('Ingest adapter identifiers to run (e.g. "metabase", "looker", "historic-sql"). Empty array means no adapters are run.'),
 								    embeddings: embeddingSchema
-												fix: remove deterministic embedding backend (#146)

* fix: remove deterministic embedding backend

* test: update slow tests for disabled embeddings
											
										
										
											2026-05-19 16:40:01 +02:00
+								      .prefault({ backend: 'none' })
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								      .describe('Embedding configuration used when ingest adapters need to embed documents.'),
 								    workUnits: workUnitsSchema.prefault({}).describe('Concurrency and failure handling for ingest work units.'),
-												feat(cli): profile ingest runs and split model vs tool time (#249)

* feat(cli): profile ingest runs to find where wall-clock time goes

Add opt-in profiling for `ktx ingest`. Each timed phase, work unit, and
agent loop now records durationMs / step count / token usage in the
trace, and a post-run aggregator rolls them up into a "where did the
time go" report printed to stderr.

Enable per run with KTX_PROFILE_INGEST (1/true -> human table, json ->
raw structured profile) or persistently via `ingest.profile` in
ktx.yaml. The json form emits raw milliseconds, token counts, and a
summary.headline one-line diagnosis so coding agents can parse it
directly; json wins when both env and config request profiling.

- runtime-port: RunLoopMetrics (totalMs, usage, stepCount,
  stepBoundariesMs) plus onMetrics callbacks on text/object generation
- ai-sdk + claude-code runtimes: capture per-loop timing and token usage
- work-unit-executor and stages 3/4: thread metrics into trace events
- ingest-bundle.runner: time worktree / triage / clustering / index /
  reconcile / squash phases and emit the profile in a finally block
  (best-effort; never affects the run outcome)
- ingest-profile: new trace+transcript aggregator with table/json formatters
- config: ingest.profile flag; docs: profiling section in ktx-ingest.mdx

* fix(cli): flush tool-call logs before reading ingest profile

Tool transcripts are appended fire-and-forget so the agent hot path never
blocks on logging. The ingest profiler read them before the writes settled,
so per-work-unit toolMs (and the model-vs-tool split derived from it) could
be incomplete. Track in-flight appends and expose flushToolCallLogs() —
bounded by a timeout so it can never hang — and flush before the profiler
reads the transcript.
											
										
										
											2026-06-01 15:49:17 +02:00
+								    profile: z
 								      .union([z.boolean(), z.literal('json')])
 								      .default(false)
 								      .describe(
 								        'Print a timing breakdown to stderr at the end of each ingest run. `true` prints a human table; `"json"` prints the raw structured profile for coding agents; `false` disables it. Equivalent to the KTX_PROFILE_INGEST environment variable (`1`/`true`/`json`).',
 								      ),
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								  })
 								  .describe('Ingest pipeline configuration: adapters, embeddings, and work-unit policy.');
 								const scanEnrichmentSchema = z
 								  .strictObject({
 								    mode: z
 								      .enum(KTX_ENRICHMENT_MODES)
 								      .default('none')
 								      .describe('Column/table enrichment mode. "none" disables enrichment; "deterministic" uses local heuristics; "llm" calls the configured LLM provider.'),
 								    embeddings: embeddingSchema.optional().describe('Optional embedding override for enrichment-time vectorization. Falls back to ingest.embeddings when omitted.'),
 								  })
 								  .describe('Schema-scan enrichment: how columns and tables are described.');
 								const scanRelationshipsSchema = z
 								  .strictObject({
 								    enabled: z.boolean().default(true).describe('Master switch for relationship discovery during scan.'),
 								    llmProposals: z.boolean().default(true).describe('When true, propose relationships using the configured LLM in addition to deterministic candidates.'),
 								    validationRequiredForManifest: z
 								      .boolean()
 								      .default(true)
 								      .describe('When true, only relationships that pass database-side validation are written to the manifest.'),
 								    acceptThreshold: z
 								      .number()
 								      .min(0)
 								      .max(1)
 								      .default(0.85)
 								      .describe('Confidence score (0–1) at or above which an LLM-proposed relationship is auto-accepted into the manifest.'),
 								    reviewThreshold: z
 								      .number()
 								      .min(0)
 								      .max(1)
 								      .default(0.55)
 								      .describe('Confidence score (0–1) at or above which a proposal is surfaced for human review (but not auto-accepted).'),
 								    maxLlmTablesPerBatch: z
 								      .int()
 								      .positive()
 								      .default(40)
 								      .describe('Maximum number of tables included in a single LLM relationship-proposal batch.'),
 								    maxCandidatesPerColumn: z
 								      .int()
 								      .positive()
 								      .default(25)
 								      .describe('Maximum number of candidate join partners considered per column during relationship discovery.'),
 								    profileSampleRows: z.int().positive().default(10000).describe('Number of rows sampled per table when profiling values for relationship inference.'),
-												fix(snowflake): unblock multi-schema ingest and relationship discovery (#204)

* feat(setup): drop redundant Snowflake schema prompt; fall back to free-text on listSchemas failure

Snowflake setup previously asked for a single schema as free text, then
ran a multiselect against the discovered schemas — two schema questions
back-to-back, with the first being only a session bootstrap. The SDK's
`schema` is optional, so the bootstrap step is unnecessary.

- Remove the free-text Snowflake schema prompt; only pass `schema` to
  snowflake-sdk when one is configured.
- When `listSchemas()` fails (e.g. role lacks SHOW SCHEMAS), prompt the
  user for a comma-separated list, persist it as `schema_names`, and use
  it as both the table-list filter and the multiselect default. Applies
  to every driver with a scope-discovery spec, not just Snowflake.
- Update docs to lead with `schema_names`; keep `schema_name` as a
  documented single-schema shorthand.

* fix(snowflake): keep introspecting when primary-key discovery is denied

The PK query joins INFORMATION_SCHEMA.TABLE_CONSTRAINTS and
INFORMATION_SCHEMA.KEY_COLUMN_USAGE, which require grants the
connection role may not have. Previously a 'SQL compilation error:
Object ANALYTICS.INFORMATION_SCHEMA.KEY_COLUMN_USAGE does not exist
or not authorized' aborted the entire introspect — schemas, columns,
and row counts were all discarded over a missing nice-to-have.

Wrap the constraint query in try/catch, log a one-line warning per
schema, and return an empty PK map. Columns end up with
primaryKey=false; relationship inference still has FK and profiling
to fall back on.

* fix(scan): unblock relationship discovery on Snowflake

Two adjacent bugs prevented the scan's relationship pipeline from producing
any joins on a Snowflake warehouse:

- relationship-profiling.ts fell through to a default `GROUP_CONCAT` branch
  for unknown drivers. Snowflake has no GROUP_CONCAT, so every per-table
  profile query failed with "Unknown function GROUP_CONCAT". Add an explicit
  Snowflake branch that uses LISTAGG with a literal '\x1f' delimiter
  (Snowflake requires the delimiter to be a constant, so CHR(31) is rejected).
- description-generation.ts destructured `connector.sampleTable` and
  `connector.sampleColumn` into bare locals, losing the `this` binding when
  the class-method connectors (Snowflake, Postgres, MySQL) were invoked.
  Every sample call threw "Cannot read properties of undefined (reading
  'assertConnection')" and degraded LLM descriptions to metadata-only
  prompts. Call the methods through the connector instead.

Without these, even after the primary-key probe is allowed to fail softly,
the scan ends up with 0 validated relationships and an empty `joins:` block
in every shard YAML.

* test(scan): cover table-ref helpers

* feat(scan): plumb tableScope through live-database introspection port

* feat(scan): apply tableScope during metadata fetch

* feat(scan): enforce table scope at fetch boundary

* feat(scan): pool Snowflake sessions and batch enrichment for faster ingest (#206)

* feat(cli): add RSA key-pair auth option to Snowflake setup wizard

Extends the interactive Snowflake setup flow with an authentication-method
prompt (password vs RSA/JWT key-pair). The RSA branch collects a private-key
path (env/file/absolute) and an optional passphrase; the resulting connection
config records `authMethod: 'rsa'` with `privateKey` and `passphrase` instead
of `password`.

* feat(scan): pool Snowflake sessions

* fix(scan): reuse structural snapshots and cleanup connectors

* feat(scan): parallelize relationship profiling

* feat(scan): batch table description generation

* docs: document Snowflake ingest concurrency knobs

* fix(scan): close Snowflake ingest perf verification gaps

* fix(scan): keep batched description failure bounded

* feat(scan): dispatch query-history probes by connection driver

Extract historic-sql dialect resolution into a shared helper so the
status-project readiness check and the local ingest factory agree on
which connections enable query history and which probe to run. The
status command now picks the postgres/snowflake/bigquery probe based on
the connection's driver instead of always reporting against postgres,
which previously caused snowflake connections with queryHistory.enabled
to surface a misleading "driver is snowflake" failure.

Also drops a noisy console.warn from Snowflake primary-key discovery —
INFORMATION_SCHEMA.KEY_COLUMN_USAGE is commonly ungranted for read-only
roles and the FK + profiling paths handle the empty PK map already.

* fix(llm): allow StructuredOutput tool and raise maxTurns for generateObject

The Claude Code agent SDK announces an internal pseudo-tool named
StructuredOutput in the system/init message whenever outputFormat is set
to { type: 'json_schema' }. The runtime's isolation check built its
allowedToolIds set only from MCP tool ids and treated StructuredOutput
as an unexpected host-injected tool, so every generateObject call threw
"Claude Code runtime isolation failed: tools=StructuredOutput ..." and
the table-descriptions and relationship-LLM-proposal enrichment stages
recorded null output across the board.

Whitelist StructuredOutput specifically in generateObject's
allowedToolIds — the check also enforces missing_tools symmetry, so
generateText and runAgentLoop, which do not see StructuredOutput, must
not require it.

generateObject also ran with maxTurns: 1, which the model intermittently
breached when it emitted thinking text before the structured response.
Raised to 5 to give the schema-bound call enough headroom without
allowing unbounded loops. The existing tests now exercise the path with
an init message that announces StructuredOutput so the regression cannot
slip back in.

* chore(scripts): add ktx-reset.sh project-cleanup helper

Convenience script for repeatable ingest testing: takes a project
directory and prunes everything except ktx.yaml and .ktx/secrets/, so
the next ktx setup or ktx ingest run starts from a known-clean state.
											
										
										
											2026-05-23 10:41:30 +02:00
+								    profileConcurrency: z
 								      .int()
 								      .positive()
 								      .default(4)
 								      .describe('Parallel relationship-profile queries run against the database during scan.'),
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								    validationConcurrency: z.int().positive().default(4).describe('Number of relationship validation queries run in parallel against the database.'),
 								    validationBudget: z
 								      .union([z.literal('all'), z.int().nonnegative()])
 								      .optional()
 								      .describe('Cap on validation queries per scan run. Use "all" for unlimited, an integer for a hard cap, or omit for the runtime default.'),
 								  })
 								  .describe('Schema-scan relationship discovery and validation tunables.');
 								const scanSchema = z
 								  .strictObject({
 								    enrichment: scanEnrichmentSchema.prefault({}).describe('Column/table enrichment configuration.'),
 								    relationships: scanRelationshipsSchema.prefault({}).describe('Relationship discovery and validation configuration.'),
 								  })
 								  .describe('Schema-scan configuration: enrichment and relationship discovery.');
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
 								const setupSchema = z
 								  .strictObject({
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								    database_connection_ids: z
 								      .array(z.string().min(1))
 								      .default([])
 								      .describe('Connection IDs (keys of the top-level `connections` map) that the setup wizard treats as the project\'s primary databases.'),
 								  })
 								  .describe('Setup-wizard state captured during `ktx setup`.');
 								const storageGitSchema = z
 								  .strictObject({
 								    auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits state changes to the local Git-backed store.'),
 								    author: z
 								      .string()
 								      .min(1)
 								      .default('ktx <ktx@example.com>')
 								      .describe('Git author identity used for auto-commits, in standard "Name <email>" form.'),
 								  })
 								  .describe('Git-backed storage commit policy.');
 								const storageSchema = z
 								  .strictObject({
 								    state: z
 								      .enum(KTX_STORAGE_STATES)
 								      .default('sqlite')
 								      .describe('Backend for KTX state storage. "sqlite" uses .ktx/db.sqlite; "postgres" expects a configured Postgres connection.'),
 								    search: z
 								      .enum(KTX_SEARCH_BACKENDS)
 								      .default('sqlite-fts5')
 								      .describe('Backend for search indexes. "sqlite-fts5" uses SQLite FTS5; "postgres-hybrid" uses Postgres lexical + vector hybrid search.'),
 								    git: storageGitSchema.prefault({}).describe('Git-backed storage commit policy.'),
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  })
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								  .describe('Storage backends and commit policy for KTX state and search indexes.');
-												feat(context): add driver-discriminated connection schemas (#96)

* refactor(context): export and describe mapping shape schemas

* feat(context): add driver-schemas module with warehouse drivers

* feat(context): add metabase, looker, lookml driver schemas with mappings

* feat(context): add notion, dbt, metricflow driver schemas

* refactor(context): make connectionSchema a driver-discriminated union

* chore(context): re-export KtxConnectionConfig from project package

* docs(context): add connection driver schema plan

* chore(secrets): allowlist example credentials in driver-schemas fixtures

* test(cli): align metabase fixtures with required api_url field

The driver-discriminated union added in this branch now requires api_url
for metabase connections and a known driver for warehouses. Update slow
CLI test fixtures and assertions so they exercise the new schema:
- ingest.test-utils.ts: add api_url to the prod-metabase fixture.
- setup.test.ts: switch metabase fixture from 'url' to 'api_url'.
- local-scan-connectors.test.ts: invalid-driver/missing-driver tests now
  expect the schema error from loadKtxProject (parse-time rejection).
											
										
										
											2026-05-15 00:08:11 +02:00
+								const connectionSchema = connectionConfigSchema;
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
 								const agentSchema = z
 								  .strictObject({
 								    run_research: z
 								      .strictObject({
 								        enabled: z.boolean().default(false).describe('Master switch for the research agent.'),
 								        max_iterations: z
 								          .number()
 								          .int()
 								          .nonnegative()
 								          .default(20)
 								          .describe('Maximum number of tool-call iterations the research agent may take per run.'),
 								        default_toolset: z
 								          .array(z.string().min(1))
 								          .default(['sl_query', 'wiki_search', 'sl_read_source'])
 								          .describe('Default list of tool identifiers exposed to the research agent.'),
 								      })
 								      .prefault({})
 								      .describe('Research-agent configuration.'),
 								  })
 								  .describe('Agent feature configuration.');
 								const memorySchema = z
 								  .strictObject({
 								    auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits memory updates to the Git-backed store.'),
 								  })
 								  .describe('Memory subsystem configuration.');
 								const ktxProjectConfigSchema = z
 								  .strictObject({
 								    setup: setupSchema.optional().describe('Setup-wizard state. Written by `ktx setup`; may be omitted.'),
 								    connections: z
 								      .record(z.string(), connectionSchema)
 								      .default({})
 								      .describe('Map of connection ID to connector configuration. Keys are user-chosen names referenced elsewhere in the config.'),
 								    storage: storageSchema.prefault({}).describe('Storage backends and commit policy for KTX state and search indexes.'),
 								    llm: llmSchema.prefault({}).describe('LLM provider, per-role model overrides, and prompt-caching tunables.'),
 								    ingest: ingestSchema.prefault({}).describe('Ingest pipeline configuration.'),
 								    agent: agentSchema.prefault({}).describe('Agent feature configuration.'),
 								    memory: memorySchema.prefault({}).describe('Memory subsystem configuration.'),
 								    scan: scanSchema.prefault({}).describe('Schema-scan configuration: enrichment and relationship discovery.'),
 								  })
 								  .describe('Configuration schema for KTX project files (ktx.yaml).');
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
 								export type KtxProjectConfig = z.infer<typeof ktxProjectConfigSchema>;
 								export type KtxProjectLlmConfig = z.infer<typeof llmSchema>;
 								export type KtxProjectEmbeddingConfig = z.infer<typeof embeddingSchema>;
 								export type KtxScanEnrichmentConfig = z.infer<typeof scanEnrichmentSchema>;
 								export type KtxScanRelationshipConfig = z.infer<typeof scanRelationshipsSchema>;
 								export type KtxProjectConnectionConfig = z.infer<typeof connectionSchema>;
 								export interface KtxConfigIssue {
 								  path: string;
 								  message: string;
 								  fix?: string;
 								}
 								export interface KtxConfigValidation {
 								  ok: boolean;
 								  issues: KtxConfigIssue[];
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
 								function isRecord(value: unknown): value is Record<string, unknown> {
 								  return typeof value === 'object' && value !== null && !Array.isArray(value);
 								}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								function dottedPath(path: ReadonlyArray<PropertyKey>): string {
 								  return path.map((segment) => String(segment)).join('.');
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								function valueAtPath(root: unknown, path: ReadonlyArray<PropertyKey>): unknown {
 								  let cursor: unknown = root;
 								  for (const segment of path) {
 								    if (cursor === null || typeof cursor !== 'object') return undefined;
 								    cursor = (cursor as Record<PropertyKey, unknown>)[segment];
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  return cursor;
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								function formatIssue(issue: z.core.$ZodIssue, input: unknown): KtxConfigIssue[] {
 								  const basePath = dottedPath(issue.path);
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  if (issue.code === 'unrecognized_keys') {
 								    const keys = (issue as { keys?: readonly string[] }).keys ?? [];
 								    return keys.map((key) => {
 								      const fullPath = basePath.length > 0 ? `${basePath}.${key}` : key;
 								      return { path: fullPath, message: `Unsupported ${fullPath}: unknown field` };
 								    });
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  const lastSegment = issue.path[issue.path.length - 1];
 								  if (lastSegment === 'backend' && (issue.code === 'invalid_value' || issue.code === 'invalid_type')) {
 								    const value = valueAtPath(input, issue.path);
 								    return [{ path: basePath, message: `Unsupported ${basePath}: ${String(value)}` }];
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  return [{ path: basePath, message: basePath.length > 0 ? `${basePath}: ${issue.message}` : issue.message }];
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								function collectIssues(error: z.ZodError, input: unknown): KtxConfigIssue[] {
 								  return error.issues.flatMap((issue) => formatIssue(issue, input));
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								function formatZodError(error: z.ZodError, input: unknown): string {
 								  return collectIssues(error, input)
 								    .map((issue) => issue.message)
 								    .join('\n');
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												fix: remove project from ktx config (#95)
											
										
										
											2026-05-14 17:39:31 +02:00
+								export function buildDefaultKtxProjectConfig(): KtxProjectConfig {
 								  return ktxProjectConfigSchema.parse({});
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												rename klo to ktx

											
										
										
											2026-05-10 23:51:24 +02:00
+								export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  const parsed = YAML.parse(raw) as unknown;
 								  if (!isRecord(parsed)) {
-												rename klo to ktx

											
										
										
											2026-05-10 23:51:24 +02:00
+								    throw new Error('ktx.yaml must contain a YAML object');
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  const result = ktxProjectConfigSchema.safeParse(parsed);
 								  if (!result.success) {
 								    throw new Error(formatZodError(result.error, parsed));
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  return result.data;
 								}
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								export function validateKtxProjectConfig(raw: string): KtxConfigValidation {
 								  let parsed: unknown;
 								  try {
 								    parsed = YAML.parse(raw);
 								  } catch (error) {
 								    const message = error instanceof Error ? error.message : String(error);
 								    return { ok: false, issues: [{ path: '', message: `ktx.yaml parse error: ${message}` }] };
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  if (!isRecord(parsed)) {
 								    return { ok: false, issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }] };
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  const result = ktxProjectConfigSchema.safeParse(parsed);
 								  if (result.success) {
 								    return { ok: true, issues: [] };
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  return { ok: false, issues: collectIssues(result.error, parsed) };
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								export function generateKtxProjectConfigJsonSchema(): Record<string, unknown> {
 								  const schema = z.toJSONSchema(ktxProjectConfigSchema, {
 								    target: 'draft-7',
 								    io: 'input',
 								  }) as Record<string, unknown>;
 								  return {
 								    $schema: 'http://json-schema.org/draft-07/schema#',
 								    $id: 'https://ktx.dev/schemas/ktx-project-config.json',
 								    title: 'ktx.yaml',
 								    ...schema,
 								  };
 								}
-												rename klo to ktx

											
										
										
											2026-05-10 23:51:24 +02:00
+								export function serializeKtxProjectConfig(config: KtxProjectConfig): string {
-												feat: merge ingest and scan

* docs: add CLI component reuse guidance

* docs: add unified ingest ux design

* Refine unified ingest UX design after adversarial review iteration 1

* Refine unified ingest UX design after adversarial review iteration 2

* Refine unified ingest UX design after adversarial review iteration 3

* feat(cli): route public connection ingest command

* feat(cli): hide standalone scan from public help

* feat(cli): plan public ingest depth and query history

* feat(cli): execute public database ingest facets

* feat(ingest): read connection query history config

* fix(cli): use public ingest wording

* fix(config): stop generating ingest adapter allow lists

* docs: document public ingest command

* test: align ingest surface expectations

* docs: add unified ingest public CLI surface plan

* feat(cli): preflight deep public ingest readiness

* feat(setup): store query history in connection context

* feat(setup): store database context depth

* feat(setup): verify context readiness by database depth

* fix(setup): keep context build foreground only

* fix(config): reject reserved ingest connection ids

* test: close unified ingest v1 expectations

* docs: add unified ingest v1 closure plan

* fix(ingest): bypass adapter allow-list for public source ingest

* fix(ingest): honor query history window intent

* fix(ingest): hide scan internals from public database ingest

* feat(ingest): use foreground view for interactive public ingest

* fix(setup): use schema context and query history wording

* test(cli): verify unified ingest public output

* docs: add unified ingest v1 public output closure plan

* fix(setup): forward query history flags

* fix(setup): prompt for postgres query history

* fix(status): report query history readiness

* fix(ingest): remove legacy public guidance

* fix(ingest): polish foreground retry copy

* docs(examples): use unified query history wording

* chore(ingest): finish public query history cleanup

* docs: add unified ingest v1 query history status cleanup plan

* test(docs): cover unified ingest public docs

* docs: align ingest CLI reference with unified UX

* docs: update context build guides for unified ingest

* docs: update setup and primary source ingest wording

* docs: stop advertising adapter-backed example ingest

* docs: close unified ingest public docs gaps

* docs: add unified ingest v1 docs site closure plan

* fix: render unified ingest foreground warnings

* fix: explain query history schema order

* fix: add public ingest retry guidance

* fix: align setup next steps with unified ingest

* fix: remove scan wording from demo progress

* test: verify unified ingest ux closure

* docs: add unified ingest v1 foreground and retry closure plan

* fix(cli): preserve query-history pull config in public ingest

* fix(cli): omit hidden commands from docs command tree

* test(cli): close unified ingest final public surface checks

* docs: add unified ingest v1 final public surface closure plan

* fix(cli): use public source labels in ingest reports

* fix(cli): suppress low-level public ingest output

* test(cli): verify unified ingest public plain output

* docs: add unified ingest v1 public plain output closure plan

* fix(cli): add public ingest copy sanitizers

* fix(cli): sanitize public ingest progress copy

* fix(cli): rename setup schema scope prompt

* docs(plan): add progress copy closure; test: align setup back-nav fixture

Adds the iter9 plan and updates the setup back-navigation test fixture
to pass disableQueryHistory plus listSchemas/listTables stubs that the
unified ingest setup step now requires.

* docs(plan): add final ux labels plan with narrowed label scans

* fix(cli): aggregate unsupported query-history warnings

* fix(cli): align setup database labels

* test(cli): fix setup database test type-check

* fix(cli): remove primary-source wording from setup output

* test(cli): verify unified ingest setup closure

* docs(plan): add unified ingest v1 verification copy closure plan

* fix(cli): remove top-level scan command

* fix(cli): remove legacy ingest and wiki commands

* Merge scan into ingest flow

* feat(cli): split ingest progress into per-phase rows, rename work units to tasks

Each database target in the unified ingest dashboard now renders one row per
real subprocess (Schema, then Query history when enabled) instead of a single
combined bar. Each phase has its own monotonic 0-100% bar so the progress
never snaps back to zero when historic-sql starts after scan completes.
Completed phases keep their final bar, summary, and elapsed time visible as
an inline audit trail; queued and skipped phases are shown explicitly.

Also rename user-facing "work units" / "Failed work units" to "tasks" /
"Failed tasks" in ingest output and parseIngestSummary. The parser still
accepts the legacy "Work units:" wording in captured output for backward
compat. Internal memory-flow event names and type fields are left alone.

* Fix test harness failures

* Fix CI smoke checks

---------

Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
											
										
										
											2026-05-14 01:43:06 +02:00
+								  const serializedConfig =
 								    config.ingest.adapters.length === 0
 								      ? {
 								          ...config,
 								          ingest: {
 								            embeddings: config.ingest.embeddings,
 								            workUnits: config.ingest.workUnits,
 								          },
 								        }
 								      : config;
 								  return `${YAML.stringify(serializedConfig, { indent: 2, lineWidth: 0 }).trimEnd()}\n`;
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}