ktx/packages/context/src/project/config.ts

import { KTX_MODEL_ROLES } from '@ktx/llm';
import YAML from 'yaml';
import * as z from 'zod';

const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway'] as const;
const KTX_EMBEDDING_BACKENDS = ['none', 'deterministic', 'openai', 'sentence-transformers'] as const;
const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'] as const;
const KTX_STORAGE_STATES = ['sqlite', 'postgres'] as const;
const KTX_SEARCH_BACKENDS = ['sqlite-fts5', 'postgres-hybrid'] as const;

const DEPRECATED_KEY_HINTS: Record<string, string> = {
  'llm.provider.provider': 'use llm.provider.backend',
  'ingest.llm': 'use top-level llm.provider, llm.models, and ingest.workUnits',
  'ingest.embeddings.provider': 'use ingest.embeddings.backend',
  'scan.enrichment.backend': 'use scan.enrichment.mode',
  'scan.enrichment.llm': 'use top-level llm.provider and llm.models',
  'scan.enrichment.embeddings.provider': 'use scan.enrichment.embeddings.backend',
};

const apiCredentialsSchema = z
  .strictObject({
    api_key: z.string().min(1).optional().describe('API key for the provider. Read from this value or the provider-specific environment variable.'),
    base_url: z.string().min(1).optional().describe('Override the provider\'s default API base URL (e.g. a proxy or self-hosted gateway).'),
  })
  .describe('API credentials block: optional key and base URL for an LLM or embedding provider.');

const vertexProviderSchema = z
  .strictObject({
    project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'),
    location: z.string().default('').describe('Vertex AI region (e.g. "us-east5"). Empty string falls back to the SDK default.'),
  })
  .describe('Google Vertex AI provider configuration.');

const sentenceTransformersSchema = z
  .strictObject({
    base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Empty string uses the managed local runtime.'),
    pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'),
  })
  .describe('Sentence-transformers embedding server configuration.');

const llmProviderSchema = z
  .strictObject({
    backend: z
      .enum(KTX_LLM_BACKENDS)
      .default('none')
      .describe('LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block.'),
    vertex: vertexProviderSchema.optional().describe('Vertex AI credentials, used when backend is "vertex".'),
    anthropic: apiCredentialsSchema.optional().describe('Anthropic API credentials, used when backend is "anthropic".'),
    gateway: apiCredentialsSchema.optional().describe('AI Gateway credentials, used when backend is "gateway".'),
  })
  .describe('LLM provider selection and credentials.');

const promptCachingSchema = z
  .strictObject({
    enabled: z.boolean().optional().describe('Master switch for Anthropic-style prompt caching. When omitted, the backend\'s default applies.'),
    systemTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the system prompt segment ("5m" or "1h").'),
    toolsTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the tools/schema segment ("5m" or "1h").'),
    historyTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for conversation-history cache breakpoints ("5m" or "1h").'),
    vertexFallbackTo5m: z.boolean().optional().describe('When true, transparently downgrade 1h TTLs to 5m on Vertex, which does not support 1h caching.'),
  })
  .describe('Prompt-caching tunables for Anthropic-compatible providers.');

const llmSchema = z
  .strictObject({
    provider: llmProviderSchema.prefault({}).describe('LLM provider backend and credentials.'),
    models: z
      .partialRecord(z.enum(KTX_MODEL_ROLES), z.string().min(1))
      .default({})
      .describe('Per-role model overrides keyed by KTX model role (e.g. "default", "triage"). Values are provider-specific model identifiers.'),
    promptCaching: promptCachingSchema.optional().describe('Optional prompt-caching tunables.'),
  })
  .describe('LLM provider, per-role model overrides, and prompt-caching tunables.');

const embeddingSchema = z
  .strictObject({
    backend: z
      .enum(KTX_EMBEDDING_BACKENDS)
      .default('deterministic')
      .describe('Embedding backend. "deterministic" is a built-in hash-based vector for offline use; "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
    model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small"). Ignored by the "deterministic" backend.'),
    dimensions: z.int().positive().default(8).describe('Embedding vector dimensionality. Must match the chosen model when using a real provider.'),
    openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
    sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
    batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'),
  })
  .describe('Embedding backend, model, and provider credentials.');

const workUnitsSchema = z
  .strictObject({
    stepBudget: z.int().positive().default(40).describe('Maximum number of agent steps allowed per work unit before it is force-terminated.'),
    maxConcurrency: z.int().positive().default(1).describe('Maximum number of work units run concurrently during ingest.'),
    failureMode: z
      .enum(KTX_WORK_UNIT_FAILURE_MODES)
      .default('continue')
      .describe('Behavior when a work unit fails: "abort" stops the whole ingest run; "continue" records the failure and keeps going.'),
  })
  .describe('Concurrency and failure handling for ingest work units.');

const ingestSchema = z
  .strictObject({
    adapters: z
      .array(z.string().min(1))
      .default([])
      .describe('Ingest adapter identifiers to run (e.g. "metabase", "looker", "historic-sql"). Empty array means no adapters are run.'),
    embeddings: embeddingSchema
      .prefault({ backend: 'deterministic', model: 'deterministic' })
      .describe('Embedding configuration used when ingest adapters need to embed documents.'),
    workUnits: workUnitsSchema.prefault({}).describe('Concurrency and failure handling for ingest work units.'),
  })
  .describe('Ingest pipeline configuration: adapters, embeddings, and work-unit policy.');

const scanEnrichmentSchema = z
  .strictObject({
    mode: z
      .enum(KTX_ENRICHMENT_MODES)
      .default('none')
      .describe('Column/table enrichment mode. "none" disables enrichment; "deterministic" uses local heuristics; "llm" calls the configured LLM provider.'),
    embeddings: embeddingSchema.optional().describe('Optional embedding override for enrichment-time vectorization. Falls back to ingest.embeddings when omitted.'),
  })
  .describe('Schema-scan enrichment: how columns and tables are described.');

const scanRelationshipsSchema = z
  .strictObject({
    enabled: z.boolean().default(true).describe('Master switch for relationship discovery during scan.'),
    llmProposals: z.boolean().default(true).describe('When true, propose relationships using the configured LLM in addition to deterministic candidates.'),
    validationRequiredForManifest: z
      .boolean()
      .default(true)
      .describe('When true, only relationships that pass database-side validation are written to the manifest.'),
    acceptThreshold: z
      .number()
      .min(0)
      .max(1)
      .default(0.85)
      .describe('Confidence score (0–1) at or above which an LLM-proposed relationship is auto-accepted into the manifest.'),
    reviewThreshold: z
      .number()
      .min(0)
      .max(1)
      .default(0.55)
      .describe('Confidence score (0–1) at or above which a proposal is surfaced for human review (but not auto-accepted).'),
    maxLlmTablesPerBatch: z
      .int()
      .positive()
      .default(40)
      .describe('Maximum number of tables included in a single LLM relationship-proposal batch.'),
    maxCandidatesPerColumn: z
      .int()
      .positive()
      .default(25)
      .describe('Maximum number of candidate join partners considered per column during relationship discovery.'),
    profileSampleRows: z.int().positive().default(10000).describe('Number of rows sampled per table when profiling values for relationship inference.'),
    validationConcurrency: z.int().positive().default(4).describe('Number of relationship validation queries run in parallel against the database.'),
    validationBudget: z
      .union([z.literal('all'), z.int().nonnegative()])
      .optional()
      .describe('Cap on validation queries per scan run. Use "all" for unlimited, an integer for a hard cap, or omit for the runtime default.'),
  })
  .describe('Schema-scan relationship discovery and validation tunables.');

const scanSchema = z
  .strictObject({
    enrichment: scanEnrichmentSchema.prefault({}).describe('Column/table enrichment configuration.'),
    relationships: scanRelationshipsSchema.prefault({}).describe('Relationship discovery and validation configuration.'),
  })
  .describe('Schema-scan configuration: enrichment and relationship discovery.');

const setupSchema = z
  .strictObject({
    database_connection_ids: z
      .array(z.string().min(1))
      .default([])
      .describe('Connection IDs (keys of the top-level `connections` map) that the setup wizard treats as the project\'s primary databases.'),
    completed_steps: z
      .unknown()
      .optional()
      .describe('Deprecated. Accepted for backward compatibility but ignored; KTX no longer tracks setup progress here.'),
  })
  .transform(({ database_connection_ids }) => ({ database_connection_ids }))
  .describe('Setup-wizard state captured during `ktx setup`.');

const storageGitSchema = z
  .strictObject({
    auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits state changes to the local Git-backed store.'),
    author: z
      .string()
      .min(1)
      .default('ktx <ktx@example.com>')
      .describe('Git author identity used for auto-commits, in standard "Name <email>" form.'),
  })
  .describe('Git-backed storage commit policy.');

const storageSchema = z
  .strictObject({
    state: z
      .enum(KTX_STORAGE_STATES)
      .default('sqlite')
      .describe('Backend for KTX state storage. "sqlite" uses .ktx/db.sqlite; "postgres" expects a configured Postgres connection.'),
    search: z
      .enum(KTX_SEARCH_BACKENDS)
      .default('sqlite-fts5')
      .describe('Backend for search indexes. "sqlite-fts5" uses SQLite FTS5; "postgres-hybrid" uses Postgres lexical + vector hybrid search.'),
    git: storageGitSchema.prefault({}).describe('Git-backed storage commit policy.'),
  })
  .describe('Storage backends and commit policy for KTX state and search indexes.');

const connectionSchema = z
  .looseObject({
    driver: z.string().min(1).optional().describe('Connector driver identifier (e.g. "postgres", "bigquery", "snowflake").'),
    url: z.string().optional().describe('Connection URL or DSN. Format depends on the driver; may contain environment-variable references.'),
  })
  .describe('A single database/connector connection entry. Additional driver-specific fields are accepted and passed through.');

const agentSchema = z
  .strictObject({
    run_research: z
      .strictObject({
        enabled: z.boolean().default(false).describe('Master switch for the research agent.'),
        max_iterations: z
          .number()
          .int()
          .nonnegative()
          .default(20)
          .describe('Maximum number of tool-call iterations the research agent may take per run.'),
        default_toolset: z
          .array(z.string().min(1))
          .default(['sl_query', 'wiki_search', 'sl_read_source'])
          .describe('Default list of tool identifiers exposed to the research agent.'),
      })
      .prefault({})
      .describe('Research-agent configuration.'),
  })
  .describe('Agent feature configuration.');

const memorySchema = z
  .strictObject({
    auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits memory updates to the Git-backed store.'),
  })
  .describe('Memory subsystem configuration.');

const ktxProjectConfigSchema = z
  .strictObject({
    setup: setupSchema.optional().describe('Setup-wizard state. Written by `ktx setup`; may be omitted.'),
    connections: z
      .record(z.string(), connectionSchema)
      .default({})
      .describe('Map of connection ID to connector configuration. Keys are user-chosen names referenced elsewhere in the config.'),
    storage: storageSchema.prefault({}).describe('Storage backends and commit policy for KTX state and search indexes.'),
    llm: llmSchema.prefault({}).describe('LLM provider, per-role model overrides, and prompt-caching tunables.'),
    ingest: ingestSchema.prefault({}).describe('Ingest pipeline configuration.'),
    agent: agentSchema.prefault({}).describe('Agent feature configuration.'),
    memory: memorySchema.prefault({}).describe('Memory subsystem configuration.'),
    scan: scanSchema.prefault({}).describe('Schema-scan configuration: enrichment and relationship discovery.'),
  })
  .describe('Configuration schema for KTX project files (ktx.yaml).');

export type KtxProjectConfig = z.infer<typeof ktxProjectConfigSchema>;
export type KtxProjectLlmConfig = z.infer<typeof llmSchema>;
export type KtxProjectLlmProviderConfig = z.infer<typeof llmProviderSchema>;
export type KtxProjectEmbeddingConfig = z.infer<typeof embeddingSchema>;
export type KtxScanEnrichmentConfig = z.infer<typeof scanEnrichmentSchema>;
export type KtxIngestWorkUnitsConfig = z.infer<typeof workUnitsSchema>;
export type KtxScanRelationshipConfig = z.infer<typeof scanRelationshipsSchema>;
export type KtxProjectScanConfig = z.infer<typeof scanSchema>;
export type KtxProjectConnectionConfig = z.infer<typeof connectionSchema>;
export type KtxProjectSetupConfig = z.infer<typeof setupSchema>;
export type KtxStorageState = z.infer<typeof storageSchema>['state'];
export type KtxSearchBackend = z.infer<typeof storageSchema>['search'];

export interface KtxConfigIssue {
  path: string;
  message: string;
  fix?: string;
}

export interface KtxConfigValidation {
  ok: boolean;
  issues: KtxConfigIssue[];
}

function isRecord(value: unknown): value is Record<string, unknown> {
  return typeof value === 'object' && value !== null && !Array.isArray(value);
}

function dottedPath(path: ReadonlyArray<PropertyKey>): string {
  return path.map((segment) => String(segment)).join('.');
}

function valueAtPath(root: unknown, path: ReadonlyArray<PropertyKey>): unknown {
  let cursor: unknown = root;
  for (const segment of path) {
    if (cursor === null || typeof cursor !== 'object') return undefined;
    cursor = (cursor as Record<PropertyKey, unknown>)[segment];
  }
  return cursor;
}

function formatIssue(issue: z.core.$ZodIssue, input: unknown): KtxConfigIssue[] {
  const basePath = dottedPath(issue.path);

  if (issue.code === 'unrecognized_keys') {
    const keys = (issue as { keys?: readonly string[] }).keys ?? [];
    return keys.map((key) => {
      const fullPath = basePath.length > 0 ? `${basePath}.${key}` : key;
      const hint = DEPRECATED_KEY_HINTS[fullPath];
      if (hint !== undefined) {
        return { path: fullPath, message: `Unsupported ${fullPath}: ${hint}`, fix: hint };
      }
      return { path: fullPath, message: `Unsupported ${fullPath}: unknown field` };
    });
  }

  const lastSegment = issue.path[issue.path.length - 1];
  if (lastSegment === 'backend' && (issue.code === 'invalid_value' || issue.code === 'invalid_type')) {
    const value = valueAtPath(input, issue.path);
    return [{ path: basePath, message: `Unsupported ${basePath}: ${String(value)}` }];
  }

  return [{ path: basePath, message: basePath.length > 0 ? `${basePath}: ${issue.message}` : issue.message }];
}

function collectIssues(error: z.ZodError, input: unknown): KtxConfigIssue[] {
  return error.issues.flatMap((issue) => formatIssue(issue, input));
}

function formatZodError(error: z.ZodError, input: unknown): string {
  return collectIssues(error, input)
    .map((issue) => issue.message)
    .join('\n');
}

export function buildDefaultKtxProjectConfig(): KtxProjectConfig {
  return ktxProjectConfigSchema.parse({});
}

export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
  const parsed = YAML.parse(raw) as unknown;
  if (!isRecord(parsed)) {
    throw new Error('ktx.yaml must contain a YAML object');
  }
  const result = ktxProjectConfigSchema.safeParse(parsed);
  if (!result.success) {
    throw new Error(formatZodError(result.error, parsed));
  }
  return result.data;
}

export function validateKtxProjectConfig(raw: string): KtxConfigValidation {
  let parsed: unknown;
  try {
    parsed = YAML.parse(raw);
  } catch (error) {
    const message = error instanceof Error ? error.message : String(error);
    return { ok: false, issues: [{ path: '', message: `ktx.yaml parse error: ${message}` }] };
  }
  if (!isRecord(parsed)) {
    return { ok: false, issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }] };
  }
  const result = ktxProjectConfigSchema.safeParse(parsed);
  if (result.success) {
    return { ok: true, issues: [] };
  }
  return { ok: false, issues: collectIssues(result.error, parsed) };
}

export function generateKtxProjectConfigJsonSchema(): Record<string, unknown> {
  const schema = z.toJSONSchema(ktxProjectConfigSchema, {
    target: 'draft-7',
    io: 'input',
  }) as Record<string, unknown>;
  return {
    $schema: 'http://json-schema.org/draft-07/schema#',
    $id: 'https://ktx.dev/schemas/ktx-project-config.json',
    title: 'ktx.yaml',
    ...schema,
  };
}

export function serializeKtxProjectConfig(config: KtxProjectConfig): string {
  const serializedConfig =
    config.ingest.adapters.length === 0
      ? {
          ...config,
          ingest: {
            embeddings: config.ingest.embeddings,
            workUnits: config.ingest.workUnits,
          },
        }
      : config;
  return `${YAML.stringify(serializedConfig, { indent: 2, lineWidth: 0 }).trimEnd()}\n`;
}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								import { KTX_MODEL_ROLES } from '@ktx/llm';
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								import YAML from 'yaml';
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								import * as z from 'zod';
 								const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway'] as const;
 								const KTX_EMBEDDING_BACKENDS = ['none', 'deterministic', 'openai', 'sentence-transformers'] as const;
 								const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
 								const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
 								const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'] as const;
 								const KTX_STORAGE_STATES = ['sqlite', 'postgres'] as const;
 								const KTX_SEARCH_BACKENDS = ['sqlite-fts5', 'postgres-hybrid'] as const;
 								const DEPRECATED_KEY_HINTS: Record<string, string> = {
 								  'llm.provider.provider': 'use llm.provider.backend',
 								  'ingest.llm': 'use top-level llm.provider, llm.models, and ingest.workUnits',
 								  'ingest.embeddings.provider': 'use ingest.embeddings.backend',
 								  'scan.enrichment.backend': 'use scan.enrichment.mode',
 								  'scan.enrichment.llm': 'use top-level llm.provider and llm.models',
 								  'scan.enrichment.embeddings.provider': 'use scan.enrichment.embeddings.backend',
 								};
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								const apiCredentialsSchema = z
 								  .strictObject({
 								    api_key: z.string().min(1).optional().describe('API key for the provider. Read from this value or the provider-specific environment variable.'),
 								    base_url: z.string().min(1).optional().describe('Override the provider\'s default API base URL (e.g. a proxy or self-hosted gateway).'),
 								  })
 								  .describe('API credentials block: optional key and base URL for an LLM or embedding provider.');
 								const vertexProviderSchema = z
 								  .strictObject({
 								    project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'),
 								    location: z.string().default('').describe('Vertex AI region (e.g. "us-east5"). Empty string falls back to the SDK default.'),
 								  })
 								  .describe('Google Vertex AI provider configuration.');
 								const sentenceTransformersSchema = z
 								  .strictObject({
 								    base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Empty string uses the managed local runtime.'),
 								    pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'),
 								  })
 								  .describe('Sentence-transformers embedding server configuration.');
 								const llmProviderSchema = z
 								  .strictObject({
 								    backend: z
 								      .enum(KTX_LLM_BACKENDS)
 								      .default('none')
 								      .describe('LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block.'),
 								    vertex: vertexProviderSchema.optional().describe('Vertex AI credentials, used when backend is "vertex".'),
 								    anthropic: apiCredentialsSchema.optional().describe('Anthropic API credentials, used when backend is "anthropic".'),
 								    gateway: apiCredentialsSchema.optional().describe('AI Gateway credentials, used when backend is "gateway".'),
 								  })
 								  .describe('LLM provider selection and credentials.');
 								const promptCachingSchema = z
 								  .strictObject({
 								    enabled: z.boolean().optional().describe('Master switch for Anthropic-style prompt caching. When omitted, the backend\'s default applies.'),
 								    systemTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the system prompt segment ("5m" or "1h").'),
 								    toolsTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the tools/schema segment ("5m" or "1h").'),
 								    historyTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for conversation-history cache breakpoints ("5m" or "1h").'),
 								    vertexFallbackTo5m: z.boolean().optional().describe('When true, transparently downgrade 1h TTLs to 5m on Vertex, which does not support 1h caching.'),
 								  })
 								  .describe('Prompt-caching tunables for Anthropic-compatible providers.');
 								const llmSchema = z
 								  .strictObject({
 								    provider: llmProviderSchema.prefault({}).describe('LLM provider backend and credentials.'),
 								    models: z
 								      .partialRecord(z.enum(KTX_MODEL_ROLES), z.string().min(1))
 								      .default({})
 								      .describe('Per-role model overrides keyed by KTX model role (e.g. "default", "triage"). Values are provider-specific model identifiers.'),
 								    promptCaching: promptCachingSchema.optional().describe('Optional prompt-caching tunables.'),
 								  })
 								  .describe('LLM provider, per-role model overrides, and prompt-caching tunables.');
 								const embeddingSchema = z
 								  .strictObject({
 								    backend: z
 								      .enum(KTX_EMBEDDING_BACKENDS)
 								      .default('deterministic')
 								      .describe('Embedding backend. "deterministic" is a built-in hash-based vector for offline use; "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
 								    model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small"). Ignored by the "deterministic" backend.'),
 								    dimensions: z.int().positive().default(8).describe('Embedding vector dimensionality. Must match the chosen model when using a real provider.'),
 								    openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
 								    sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
 								    batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'),
 								  })
 								  .describe('Embedding backend, model, and provider credentials.');
 								const workUnitsSchema = z
 								  .strictObject({
 								    stepBudget: z.int().positive().default(40).describe('Maximum number of agent steps allowed per work unit before it is force-terminated.'),
 								    maxConcurrency: z.int().positive().default(1).describe('Maximum number of work units run concurrently during ingest.'),
 								    failureMode: z
 								      .enum(KTX_WORK_UNIT_FAILURE_MODES)
 								      .default('continue')
 								      .describe('Behavior when a work unit fails: "abort" stops the whole ingest run; "continue" records the failure and keeps going.'),
 								  })
 								  .describe('Concurrency and failure handling for ingest work units.');
 								const ingestSchema = z
 								  .strictObject({
 								    adapters: z
 								      .array(z.string().min(1))
 								      .default([])
 								      .describe('Ingest adapter identifiers to run (e.g. "metabase", "looker", "historic-sql"). Empty array means no adapters are run.'),
 								    embeddings: embeddingSchema
 								      .prefault({ backend: 'deterministic', model: 'deterministic' })
 								      .describe('Embedding configuration used when ingest adapters need to embed documents.'),
 								    workUnits: workUnitsSchema.prefault({}).describe('Concurrency and failure handling for ingest work units.'),
 								  })
 								  .describe('Ingest pipeline configuration: adapters, embeddings, and work-unit policy.');
 								const scanEnrichmentSchema = z
 								  .strictObject({
 								    mode: z
 								      .enum(KTX_ENRICHMENT_MODES)
 								      .default('none')
 								      .describe('Column/table enrichment mode. "none" disables enrichment; "deterministic" uses local heuristics; "llm" calls the configured LLM provider.'),
 								    embeddings: embeddingSchema.optional().describe('Optional embedding override for enrichment-time vectorization. Falls back to ingest.embeddings when omitted.'),
 								  })
 								  .describe('Schema-scan enrichment: how columns and tables are described.');
 								const scanRelationshipsSchema = z
 								  .strictObject({
 								    enabled: z.boolean().default(true).describe('Master switch for relationship discovery during scan.'),
 								    llmProposals: z.boolean().default(true).describe('When true, propose relationships using the configured LLM in addition to deterministic candidates.'),
 								    validationRequiredForManifest: z
 								      .boolean()
 								      .default(true)
 								      .describe('When true, only relationships that pass database-side validation are written to the manifest.'),
 								    acceptThreshold: z
 								      .number()
 								      .min(0)
 								      .max(1)
 								      .default(0.85)
 								      .describe('Confidence score (0–1) at or above which an LLM-proposed relationship is auto-accepted into the manifest.'),
 								    reviewThreshold: z
 								      .number()
 								      .min(0)
 								      .max(1)
 								      .default(0.55)
 								      .describe('Confidence score (0–1) at or above which a proposal is surfaced for human review (but not auto-accepted).'),
 								    maxLlmTablesPerBatch: z
 								      .int()
 								      .positive()
 								      .default(40)
 								      .describe('Maximum number of tables included in a single LLM relationship-proposal batch.'),
 								    maxCandidatesPerColumn: z
 								      .int()
 								      .positive()
 								      .default(25)
 								      .describe('Maximum number of candidate join partners considered per column during relationship discovery.'),
 								    profileSampleRows: z.int().positive().default(10000).describe('Number of rows sampled per table when profiling values for relationship inference.'),
 								    validationConcurrency: z.int().positive().default(4).describe('Number of relationship validation queries run in parallel against the database.'),
 								    validationBudget: z
 								      .union([z.literal('all'), z.int().nonnegative()])
 								      .optional()
 								      .describe('Cap on validation queries per scan run. Use "all" for unlimited, an integer for a hard cap, or omit for the runtime default.'),
 								  })
 								  .describe('Schema-scan relationship discovery and validation tunables.');
 								const scanSchema = z
 								  .strictObject({
 								    enrichment: scanEnrichmentSchema.prefault({}).describe('Column/table enrichment configuration.'),
 								    relationships: scanRelationshipsSchema.prefault({}).describe('Relationship discovery and validation configuration.'),
 								  })
 								  .describe('Schema-scan configuration: enrichment and relationship discovery.');
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
 								const setupSchema = z
 								  .strictObject({
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								    database_connection_ids: z
 								      .array(z.string().min(1))
 								      .default([])
 								      .describe('Connection IDs (keys of the top-level `connections` map) that the setup wizard treats as the project\'s primary databases.'),
 								    completed_steps: z
 								      .unknown()
 								      .optional()
 								      .describe('Deprecated. Accepted for backward compatibility but ignored; KTX no longer tracks setup progress here.'),
 								  })
 								  .transform(({ database_connection_ids }) => ({ database_connection_ids }))
 								  .describe('Setup-wizard state captured during `ktx setup`.');
 								const storageGitSchema = z
 								  .strictObject({
 								    auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits state changes to the local Git-backed store.'),
 								    author: z
 								      .string()
 								      .min(1)
 								      .default('ktx <ktx@example.com>')
 								      .describe('Git author identity used for auto-commits, in standard "Name <email>" form.'),
 								  })
 								  .describe('Git-backed storage commit policy.');
 								const storageSchema = z
 								  .strictObject({
 								    state: z
 								      .enum(KTX_STORAGE_STATES)
 								      .default('sqlite')
 								      .describe('Backend for KTX state storage. "sqlite" uses .ktx/db.sqlite; "postgres" expects a configured Postgres connection.'),
 								    search: z
 								      .enum(KTX_SEARCH_BACKENDS)
 								      .default('sqlite-fts5')
 								      .describe('Backend for search indexes. "sqlite-fts5" uses SQLite FTS5; "postgres-hybrid" uses Postgres lexical + vector hybrid search.'),
 								    git: storageGitSchema.prefault({}).describe('Git-backed storage commit policy.'),
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  })
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								  .describe('Storage backends and commit policy for KTX state and search indexes.');
 								const connectionSchema = z
 								  .looseObject({
 								    driver: z.string().min(1).optional().describe('Connector driver identifier (e.g. "postgres", "bigquery", "snowflake").'),
 								    url: z.string().optional().describe('Connection URL or DSN. Format depends on the driver; may contain environment-variable references.'),
 								  })
 								  .describe('A single database/connector connection entry. Additional driver-specific fields are accepted and passed through.');
 								const agentSchema = z
 								  .strictObject({
 								    run_research: z
 								      .strictObject({
 								        enabled: z.boolean().default(false).describe('Master switch for the research agent.'),
 								        max_iterations: z
 								          .number()
 								          .int()
 								          .nonnegative()
 								          .default(20)
 								          .describe('Maximum number of tool-call iterations the research agent may take per run.'),
 								        default_toolset: z
 								          .array(z.string().min(1))
 								          .default(['sl_query', 'wiki_search', 'sl_read_source'])
 								          .describe('Default list of tool identifiers exposed to the research agent.'),
 								      })
 								      .prefault({})
 								      .describe('Research-agent configuration.'),
 								  })
 								  .describe('Agent feature configuration.');
 								const memorySchema = z
 								  .strictObject({
 								    auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits memory updates to the Git-backed store.'),
 								  })
 								  .describe('Memory subsystem configuration.');
 								const ktxProjectConfigSchema = z
 								  .strictObject({
 								    setup: setupSchema.optional().describe('Setup-wizard state. Written by `ktx setup`; may be omitted.'),
 								    connections: z
 								      .record(z.string(), connectionSchema)
 								      .default({})
 								      .describe('Map of connection ID to connector configuration. Keys are user-chosen names referenced elsewhere in the config.'),
 								    storage: storageSchema.prefault({}).describe('Storage backends and commit policy for KTX state and search indexes.'),
 								    llm: llmSchema.prefault({}).describe('LLM provider, per-role model overrides, and prompt-caching tunables.'),
 								    ingest: ingestSchema.prefault({}).describe('Ingest pipeline configuration.'),
 								    agent: agentSchema.prefault({}).describe('Agent feature configuration.'),
 								    memory: memorySchema.prefault({}).describe('Memory subsystem configuration.'),
 								    scan: scanSchema.prefault({}).describe('Schema-scan configuration: enrichment and relationship discovery.'),
 								  })
 								  .describe('Configuration schema for KTX project files (ktx.yaml).');
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
 								export type KtxProjectConfig = z.infer<typeof ktxProjectConfigSchema>;
 								export type KtxProjectLlmConfig = z.infer<typeof llmSchema>;
 								export type KtxProjectLlmProviderConfig = z.infer<typeof llmProviderSchema>;
 								export type KtxProjectEmbeddingConfig = z.infer<typeof embeddingSchema>;
 								export type KtxScanEnrichmentConfig = z.infer<typeof scanEnrichmentSchema>;
 								export type KtxIngestWorkUnitsConfig = z.infer<typeof workUnitsSchema>;
 								export type KtxScanRelationshipConfig = z.infer<typeof scanRelationshipsSchema>;
 								export type KtxProjectScanConfig = z.infer<typeof scanSchema>;
 								export type KtxProjectConnectionConfig = z.infer<typeof connectionSchema>;
 								export type KtxProjectSetupConfig = z.infer<typeof setupSchema>;
 								export type KtxStorageState = z.infer<typeof storageSchema>['state'];
 								export type KtxSearchBackend = z.infer<typeof storageSchema>['search'];
 								export interface KtxConfigIssue {
 								  path: string;
 								  message: string;
 								  fix?: string;
 								}
 								export interface KtxConfigValidation {
 								  ok: boolean;
 								  issues: KtxConfigIssue[];
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
 								function isRecord(value: unknown): value is Record<string, unknown> {
 								  return typeof value === 'object' && value !== null && !Array.isArray(value);
 								}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								function dottedPath(path: ReadonlyArray<PropertyKey>): string {
 								  return path.map((segment) => String(segment)).join('.');
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								function valueAtPath(root: unknown, path: ReadonlyArray<PropertyKey>): unknown {
 								  let cursor: unknown = root;
 								  for (const segment of path) {
 								    if (cursor === null || typeof cursor !== 'object') return undefined;
 								    cursor = (cursor as Record<PropertyKey, unknown>)[segment];
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  return cursor;
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								function formatIssue(issue: z.core.$ZodIssue, input: unknown): KtxConfigIssue[] {
 								  const basePath = dottedPath(issue.path);
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  if (issue.code === 'unrecognized_keys') {
 								    const keys = (issue as { keys?: readonly string[] }).keys ?? [];
 								    return keys.map((key) => {
 								      const fullPath = basePath.length > 0 ? `${basePath}.${key}` : key;
 								      const hint = DEPRECATED_KEY_HINTS[fullPath];
 								      if (hint !== undefined) {
 								        return { path: fullPath, message: `Unsupported ${fullPath}: ${hint}`, fix: hint };
 								      }
 								      return { path: fullPath, message: `Unsupported ${fullPath}: unknown field` };
 								    });
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  const lastSegment = issue.path[issue.path.length - 1];
 								  if (lastSegment === 'backend' && (issue.code === 'invalid_value' || issue.code === 'invalid_type')) {
 								    const value = valueAtPath(input, issue.path);
 								    return [{ path: basePath, message: `Unsupported ${basePath}: ${String(value)}` }];
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  return [{ path: basePath, message: basePath.length > 0 ? `${basePath}: ${issue.message}` : issue.message }];
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								function collectIssues(error: z.ZodError, input: unknown): KtxConfigIssue[] {
 								  return error.issues.flatMap((issue) => formatIssue(issue, input));
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								function formatZodError(error: z.ZodError, input: unknown): string {
 								  return collectIssues(error, input)
 								    .map((issue) => issue.message)
 								    .join('\n');
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												fix: remove project from ktx config (#95)
											
										
										
											2026-05-14 17:39:31 +02:00
+								export function buildDefaultKtxProjectConfig(): KtxProjectConfig {
 								  return ktxProjectConfigSchema.parse({});
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												rename klo to ktx

											
										
										
											2026-05-10 23:51:24 +02:00
+								export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  const parsed = YAML.parse(raw) as unknown;
 								  if (!isRecord(parsed)) {
-												rename klo to ktx

											
										
										
											2026-05-10 23:51:24 +02:00
+								    throw new Error('ktx.yaml must contain a YAML object');
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  const result = ktxProjectConfigSchema.safeParse(parsed);
 								  if (!result.success) {
 								    throw new Error(formatZodError(result.error, parsed));
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  return result.data;
 								}
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								export function validateKtxProjectConfig(raw: string): KtxConfigValidation {
 								  let parsed: unknown;
 								  try {
 								    parsed = YAML.parse(raw);
 								  } catch (error) {
 								    const message = error instanceof Error ? error.message : String(error);
 								    return { ok: false, issues: [{ path: '', message: `ktx.yaml parse error: ${message}` }] };
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  if (!isRecord(parsed)) {
 								    return { ok: false, issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }] };
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  const result = ktxProjectConfigSchema.safeParse(parsed);
 								  if (result.success) {
 								    return { ok: true, issues: [] };
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								  }
-												refactor(context): validate ktx.yaml with Zod and surface issues in status (#91)

* refactor(context): validate ktx.yaml with Zod and surface issues in status

- Replace hand-rolled ktx.yaml parsing with a strict Zod schema and
  derive KtxProjectConfig types from it.
- Add validateKtxProjectConfig returning structured KtxConfigIssue[]
  with migration hints for deprecated keys (ingest.llm,
  scan.enrichment.backend, etc.).
- Wire ktx status/doctor to run validation, render schema issues in
  plain and JSON output, and add a Config row to project status.
- Update the orbit example to camelCase scan.relationships keys to
  match the schema.

* fix(context): tolerate legacy setup.completed_steps and optional driver

- Accept and drop the legacy setup.completed_steps field so existing
  ktx.yaml files migrated from older versions still load.
- Make connections.<id>.driver optional in the schema; runtime code
  already produces a clear "no driver" error at use time.

* feat(cli): add ktx status --validate to run only ktx.yaml schema validation

- New --validate flag dispatches a focused runKtxDoctor 'validate' branch
  that reads ktx.yaml, runs validateKtxProjectConfig, and skips LLM,
  connection, embedding, and query-history checks.
- Plain output prints a single Config row; JSON output emits
  {ok: true} on success or the existing invalid_config / missing_project
  shapes on failure.
											
										
										
											2026-05-14 15:36:35 +02:00
+								  return { ok: false, issues: collectIssues(result.error, parsed) };
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}
-												feat(cli): add ktx dev schema to emit ktx.yaml JSON Schema (#93)

Annotates the Zod config schema with .describe() text on every field and
adds generateKtxProjectConfigJsonSchema() plus a ktx dev schema command
that prints (or writes) a draft-07 JSON Schema for editors and LLM agents.
											
										
										
											2026-05-14 16:21:29 +02:00
+								export function generateKtxProjectConfigJsonSchema(): Record<string, unknown> {
 								  const schema = z.toJSONSchema(ktxProjectConfigSchema, {
 								    target: 'draft-7',
 								    io: 'input',
 								  }) as Record<string, unknown>;
 								  return {
 								    $schema: 'http://json-schema.org/draft-07/schema#',
 								    $id: 'https://ktx.dev/schemas/ktx-project-config.json',
 								    title: 'ktx.yaml',
 								    ...schema,
 								  };
 								}
-												rename klo to ktx

											
										
										
											2026-05-10 23:51:24 +02:00
+								export function serializeKtxProjectConfig(config: KtxProjectConfig): string {
-												feat: merge ingest and scan

* docs: add CLI component reuse guidance

* docs: add unified ingest ux design

* Refine unified ingest UX design after adversarial review iteration 1

* Refine unified ingest UX design after adversarial review iteration 2

* Refine unified ingest UX design after adversarial review iteration 3

* feat(cli): route public connection ingest command

* feat(cli): hide standalone scan from public help

* feat(cli): plan public ingest depth and query history

* feat(cli): execute public database ingest facets

* feat(ingest): read connection query history config

* fix(cli): use public ingest wording

* fix(config): stop generating ingest adapter allow lists

* docs: document public ingest command

* test: align ingest surface expectations

* docs: add unified ingest public CLI surface plan

* feat(cli): preflight deep public ingest readiness

* feat(setup): store query history in connection context

* feat(setup): store database context depth

* feat(setup): verify context readiness by database depth

* fix(setup): keep context build foreground only

* fix(config): reject reserved ingest connection ids

* test: close unified ingest v1 expectations

* docs: add unified ingest v1 closure plan

* fix(ingest): bypass adapter allow-list for public source ingest

* fix(ingest): honor query history window intent

* fix(ingest): hide scan internals from public database ingest

* feat(ingest): use foreground view for interactive public ingest

* fix(setup): use schema context and query history wording

* test(cli): verify unified ingest public output

* docs: add unified ingest v1 public output closure plan

* fix(setup): forward query history flags

* fix(setup): prompt for postgres query history

* fix(status): report query history readiness

* fix(ingest): remove legacy public guidance

* fix(ingest): polish foreground retry copy

* docs(examples): use unified query history wording

* chore(ingest): finish public query history cleanup

* docs: add unified ingest v1 query history status cleanup plan

* test(docs): cover unified ingest public docs

* docs: align ingest CLI reference with unified UX

* docs: update context build guides for unified ingest

* docs: update setup and primary source ingest wording

* docs: stop advertising adapter-backed example ingest

* docs: close unified ingest public docs gaps

* docs: add unified ingest v1 docs site closure plan

* fix: render unified ingest foreground warnings

* fix: explain query history schema order

* fix: add public ingest retry guidance

* fix: align setup next steps with unified ingest

* fix: remove scan wording from demo progress

* test: verify unified ingest ux closure

* docs: add unified ingest v1 foreground and retry closure plan

* fix(cli): preserve query-history pull config in public ingest

* fix(cli): omit hidden commands from docs command tree

* test(cli): close unified ingest final public surface checks

* docs: add unified ingest v1 final public surface closure plan

* fix(cli): use public source labels in ingest reports

* fix(cli): suppress low-level public ingest output

* test(cli): verify unified ingest public plain output

* docs: add unified ingest v1 public plain output closure plan

* fix(cli): add public ingest copy sanitizers

* fix(cli): sanitize public ingest progress copy

* fix(cli): rename setup schema scope prompt

* docs(plan): add progress copy closure; test: align setup back-nav fixture

Adds the iter9 plan and updates the setup back-navigation test fixture
to pass disableQueryHistory plus listSchemas/listTables stubs that the
unified ingest setup step now requires.

* docs(plan): add final ux labels plan with narrowed label scans

* fix(cli): aggregate unsupported query-history warnings

* fix(cli): align setup database labels

* test(cli): fix setup database test type-check

* fix(cli): remove primary-source wording from setup output

* test(cli): verify unified ingest setup closure

* docs(plan): add unified ingest v1 verification copy closure plan

* fix(cli): remove top-level scan command

* fix(cli): remove legacy ingest and wiki commands

* Merge scan into ingest flow

* feat(cli): split ingest progress into per-phase rows, rename work units to tasks

Each database target in the unified ingest dashboard now renders one row per
real subprocess (Schema, then Query history when enabled) instead of a single
combined bar. Each phase has its own monotonic 0-100% bar so the progress
never snaps back to zero when historic-sql starts after scan completes.
Completed phases keep their final bar, summary, and elapsed time visible as
an inline audit trail; queued and skipped phases are shown explicitly.

Also rename user-facing "work units" / "Failed work units" to "tasks" /
"Failed tasks" in ingest output and parseIngestSummary. The parser still
accepts the legacy "Work units:" wording in captured output for backward
compat. Internal memory-flow event names and type fields are left alone.

* Fix test harness failures

* Fix CI smoke checks

---------

Co-authored-by: Andrey Avtomonov <7889985+andreybavt@users.noreply.github.com>
											
										
										
											2026-05-14 01:43:06 +02:00
+								  const serializedConfig =
 								    config.ingest.adapters.length === 0
 								      ? {
 								          ...config,
 								          ingest: {
 								            embeddings: config.ingest.embeddings,
 								            workUnits: config.ingest.workUnits,
 								          },
 								        }
 								      : config;
 								  return `${YAML.stringify(serializedConfig, { indent: 2, lineWidth: 0 }).trimEnd()}\n`;
-												Initial open-source release

											
										
										
											2026-05-10 23:12:26 +02:00
+								}