mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
* feat(cli): add ingest rate limit governor * feat(cli): wire ingest rate-limit config * feat(cli): report provider rate-limit signals * feat(cli): show ingest rate-limit waits * fix(cli): complete rate-limit event coverage * fix(cli): abort ingest provider calls cleanly * fix(cli): propagate ingest cancellation * fix(cli): reject pre-aborted ingest rate-limit waits * fix(cli): honor Claude rate-limit reset waits * fix(cli): retry thrown Codex rate-limit failures * fix(cli): type Claude rate-limit result details * fix(cli): emit ingest rate-limit countdowns from rejected signals * fix(cli): report ai sdk rate-limit header utilization * fix(cli): gate LLM rate-limit retries on the governor budget The AI SDK and Codex runtimes retried 429 / opaque rate-limit failures up to 6-7 times with no backoff when constructed without a RateLimitGovernor (scan, memory, setup) or with pacing disabled, ignoring Retry-After and worsening the limit. The outer retry loop only cooperates with the governor's pause, so without active pacing there is no backoff to apply. Route the retry bound through a single source: RateLimitGovernor .maxRetryAttempts(), which returns retry.maxAttempts when enabled and 1 (no outer retry) when absent or disabled. All three runtimes (ai-sdk, codex, claude-code) now use it, so ingest.rateLimit.retry.maxAttempts genuinely controls attempts and the hard-coded 6 (plus Codex's off-by-one extra attempt) is gone. Backend-native retry (e.g. the AI SDK's maxRetries) still handles transient 429s. Also correct the ktx.yaml docs for maxWaitMs (caps each wait, not the whole run) and maxAttempts, and sync uv.lock ktx-sl/ktx-daemon to 0.9.0.
425 lines
19 KiB
TypeScript
425 lines
19 KiB
TypeScript
import { KTX_MODEL_ROLES } from '../../llm/types.js';
|
||
import YAML from 'yaml';
|
||
import * as z from 'zod';
|
||
import { connectionConfigSchema } from './driver-schemas.js';
|
||
|
||
const KTX_LLM_BACKENDS = ['none', 'anthropic', 'vertex', 'gateway', 'claude-code', 'codex'] as const;
|
||
const KTX_EMBEDDING_BACKENDS = ['none', 'openai', 'sentence-transformers'] as const;
|
||
const KTX_PROMPT_CACHE_TTLS = ['5m', '1h'] as const;
|
||
const KTX_ENRICHMENT_MODES = ['none', 'deterministic', 'llm'] as const;
|
||
const KTX_WORK_UNIT_FAILURE_MODES = ['abort', 'continue'] as const;
|
||
const KTX_STORAGE_STATES = ['sqlite', 'postgres'] as const;
|
||
const KTX_SEARCH_BACKENDS = ['sqlite-fts5', 'postgres-hybrid'] as const;
|
||
|
||
const apiCredentialsSchema = z
|
||
.strictObject({
|
||
api_key: z.string().min(1).optional().describe('API key for the provider. Read from this value or the provider-specific environment variable.'),
|
||
base_url: z.string().min(1).optional().describe('Override the provider\'s default API base URL (e.g. a proxy or self-hosted gateway).'),
|
||
})
|
||
.describe('API credentials block: optional key and base URL for an LLM or embedding provider.');
|
||
|
||
const vertexProviderSchema = z
|
||
.strictObject({
|
||
project: z.string().min(1).optional().describe('Google Cloud project ID hosting the Vertex AI endpoint.'),
|
||
location: z.string().min(1).describe('Vertex AI region (e.g. "us-east5"). Required whenever the vertex provider block is present.'),
|
||
})
|
||
.describe('Google Vertex AI provider configuration.');
|
||
|
||
const sentenceTransformersSchema = z
|
||
.strictObject({
|
||
base_url: z.string().default('').describe('Base URL of the sentence-transformers HTTP server. Leave empty (or omit) when the `ktx` CLI is expected to start and manage a local daemon for this project; programmatic consumers must populate it explicitly.'),
|
||
pathPrefix: z.string().optional().describe('Optional URL path prefix prepended to embedding requests.'),
|
||
})
|
||
.describe('Sentence-transformers embedding server configuration.');
|
||
|
||
const llmProviderSchema = z
|
||
.strictObject({
|
||
backend: z
|
||
.enum(KTX_LLM_BACKENDS)
|
||
.default('none')
|
||
.describe(
|
||
'LLM provider backend. "none" disables LLM features; "anthropic" / "vertex" / "gateway" require the matching nested credentials block; "claude-code" uses the local Claude Code session; "codex" uses the local Codex session.',
|
||
),
|
||
vertex: vertexProviderSchema.optional().describe('Vertex AI credentials, used when backend is "vertex".'),
|
||
anthropic: apiCredentialsSchema.optional().describe('Anthropic API credentials, used when backend is "anthropic".'),
|
||
gateway: apiCredentialsSchema.optional().describe('AI Gateway credentials, used when backend is "gateway".'),
|
||
})
|
||
.describe('LLM provider selection and credentials.');
|
||
|
||
const promptCachingSchema = z
|
||
.strictObject({
|
||
enabled: z.boolean().optional().describe('Master switch for Anthropic-style prompt caching. When omitted, the backend\'s default applies.'),
|
||
systemTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the system prompt segment ("5m" or "1h").'),
|
||
toolsTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for the tools/schema segment ("5m" or "1h").'),
|
||
historyTtl: z.enum(KTX_PROMPT_CACHE_TTLS).optional().describe('Cache TTL for conversation-history cache breakpoints ("5m" or "1h").'),
|
||
vertexFallbackTo5m: z.boolean().optional().describe('When true, transparently downgrade 1h TTLs to 5m on Vertex, which does not support 1h caching.'),
|
||
})
|
||
.describe('Prompt-caching tunables for Anthropic-compatible providers.');
|
||
|
||
const llmSchema = z
|
||
.strictObject({
|
||
provider: llmProviderSchema.prefault({}).describe('LLM provider backend and credentials.'),
|
||
models: z
|
||
.partialRecord(z.enum(KTX_MODEL_ROLES), z.string().min(1))
|
||
.default({})
|
||
.describe('Per-role model overrides keyed by KTX model role (e.g. "default", "triage"). Values are provider-specific model identifiers.'),
|
||
promptCaching: promptCachingSchema.optional().describe('Optional prompt-caching tunables.'),
|
||
})
|
||
.describe('LLM provider, per-role model overrides, and prompt-caching tunables.');
|
||
|
||
const embeddingSchema = z
|
||
.strictObject({
|
||
backend: z
|
||
.enum(KTX_EMBEDDING_BACKENDS)
|
||
.default('none')
|
||
.describe('Embedding backend. "openai" and "sentence-transformers" call out to those providers; "none" disables embeddings.'),
|
||
model: z.string().min(1).optional().describe('Provider-specific embedding model identifier (e.g. "text-embedding-3-small").'),
|
||
dimensions: z
|
||
.int()
|
||
.positive()
|
||
.default(8)
|
||
.describe(
|
||
'Embedding vector dimensionality. The default value 8 is a placeholder that is only valid alongside backend: none; ' +
|
||
'before switching backend to openai/sentence-transformers, set this explicitly to match the chosen model ' +
|
||
'(e.g. 384 for all-MiniLM-L6-v2, 1536 for text-embedding-3-small).',
|
||
),
|
||
openai: apiCredentialsSchema.optional().describe('OpenAI credentials, used when backend is "openai".'),
|
||
sentenceTransformers: sentenceTransformersSchema.optional().describe('Sentence-transformers server config, used when backend is "sentence-transformers".'),
|
||
batchSize: z.int().positive().optional().describe('Number of texts per embedding API call. Omit to use the backend default.'),
|
||
})
|
||
.describe('Embedding backend, model, and provider credentials.');
|
||
|
||
const workUnitsSchema = z
|
||
.strictObject({
|
||
stepBudget: z.int().positive().default(40).describe('Maximum number of agent steps allowed per work unit before it is force-terminated.'),
|
||
maxConcurrency: z.int().positive().default(1).describe('Maximum number of work units run concurrently during ingest.'),
|
||
failureMode: z
|
||
.enum(KTX_WORK_UNIT_FAILURE_MODES)
|
||
.default('continue')
|
||
.describe('Behavior when a work unit fails: "abort" stops the whole ingest run; "continue" records the failure and keeps going.'),
|
||
})
|
||
.describe('Concurrency and failure handling for ingest work units.');
|
||
|
||
const ingestRateLimitRetrySchema = z
|
||
.strictObject({
|
||
maxAttempts: z
|
||
.int()
|
||
.positive()
|
||
.default(6)
|
||
.describe(
|
||
'Maximum attempts for a single rate-limited LLM call before the failure surfaces, counting the first try. Also bounds how far opaque backoff grows for providers that do not expose a reset time.',
|
||
),
|
||
baseDelayMs: z.int().positive().default(1_000).describe('Initial opaque retry delay in milliseconds.'),
|
||
maxDelayMs: z.int().positive().default(60_000).describe('Maximum opaque retry delay in milliseconds.'),
|
||
jitter: z.boolean().default(true).describe('When true, apply bounded jitter to opaque retry delays.'),
|
||
})
|
||
.describe('Retry policy for rate-limit responses that do not include a reset time or retry-after value.');
|
||
|
||
const ingestRateLimitSchema = z
|
||
.strictObject({
|
||
enabled: z.boolean().default(true).describe('Master switch for ingest LLM rate-limit pacing and visible waits.'),
|
||
throttleThreshold: z
|
||
.number()
|
||
.min(0)
|
||
.max(1)
|
||
.default(0.8)
|
||
.describe('Provider utilization at or above which ingest throttles new work-unit starts.'),
|
||
minConcurrencyUnderPressure: z
|
||
.int()
|
||
.positive()
|
||
.default(1)
|
||
.describe('Effective work-unit concurrency while a provider is under rate-limit pressure.'),
|
||
maxWaitMs: z
|
||
.int()
|
||
.positive()
|
||
.optional()
|
||
.describe('Optional cap on a single provider reset wait. Omit to wait indefinitely until the provider reset time.'),
|
||
retry: ingestRateLimitRetrySchema.prefault({}).describe('Opaque retry policy for providers without reset hints.'),
|
||
})
|
||
.describe('Rate-limit pacing and wait policy for ingest LLM calls.');
|
||
|
||
const ingestSchema = z
|
||
.strictObject({
|
||
adapters: z
|
||
.array(z.string().min(1))
|
||
.default([])
|
||
.describe('Ingest adapter identifiers to run (e.g. "metabase", "looker", "historic-sql"). Empty array means no adapters are run.'),
|
||
embeddings: embeddingSchema
|
||
.prefault({ backend: 'none' })
|
||
.describe('Embedding configuration used when ingest adapters need to embed documents.'),
|
||
workUnits: workUnitsSchema.prefault({}).describe('Concurrency and failure handling for ingest work units.'),
|
||
rateLimit: ingestRateLimitSchema.prefault({}).describe('LLM rate-limit pacing and visible-wait policy for ingest.'),
|
||
profile: z
|
||
.union([z.boolean(), z.literal('json')])
|
||
.default(false)
|
||
.describe(
|
||
'Print a timing breakdown to stderr at the end of each ingest run. `true` prints a human table; `"json"` prints the raw structured profile for coding agents; `false` disables it. Equivalent to the KTX_PROFILE_INGEST environment variable (`1`/`true`/`json`).',
|
||
),
|
||
})
|
||
.describe('Ingest pipeline configuration: adapters, embeddings, and work-unit policy.');
|
||
|
||
const scanEnrichmentSchema = z
|
||
.strictObject({
|
||
mode: z
|
||
.enum(KTX_ENRICHMENT_MODES)
|
||
.default('none')
|
||
.describe('Column/table enrichment mode. "none" disables enrichment; "deterministic" uses local heuristics; "llm" calls the configured LLM provider.'),
|
||
embeddings: embeddingSchema.optional().describe('Optional embedding override for enrichment-time vectorization. Falls back to ingest.embeddings when omitted.'),
|
||
})
|
||
.describe('Schema-scan enrichment: how columns and tables are described.');
|
||
|
||
const scanRelationshipsSchema = z
|
||
.strictObject({
|
||
enabled: z.boolean().default(true).describe('Master switch for relationship discovery during scan.'),
|
||
llmProposals: z.boolean().default(true).describe('When true, propose relationships using the configured LLM in addition to deterministic candidates.'),
|
||
validationRequiredForManifest: z
|
||
.boolean()
|
||
.default(true)
|
||
.describe('When true, only relationships that pass database-side validation are written to the manifest.'),
|
||
acceptThreshold: z
|
||
.number()
|
||
.min(0)
|
||
.max(1)
|
||
.default(0.85)
|
||
.describe('Confidence score (0–1) at or above which an LLM-proposed relationship is auto-accepted into the manifest.'),
|
||
reviewThreshold: z
|
||
.number()
|
||
.min(0)
|
||
.max(1)
|
||
.default(0.55)
|
||
.describe('Confidence score (0–1) at or above which a proposal is surfaced for human review (but not auto-accepted).'),
|
||
maxLlmTablesPerBatch: z
|
||
.int()
|
||
.positive()
|
||
.default(40)
|
||
.describe('Maximum number of tables included in a single LLM relationship-proposal batch.'),
|
||
maxCandidatesPerColumn: z
|
||
.int()
|
||
.positive()
|
||
.default(25)
|
||
.describe('Maximum number of candidate join partners considered per column during relationship discovery.'),
|
||
profileSampleRows: z.int().positive().default(10000).describe('Number of rows sampled per table when profiling values for relationship inference.'),
|
||
profileConcurrency: z
|
||
.int()
|
||
.positive()
|
||
.default(4)
|
||
.describe('Parallel relationship-profile queries run against the database during scan.'),
|
||
validationConcurrency: z.int().positive().default(4).describe('Number of relationship validation queries run in parallel against the database.'),
|
||
validationBudget: z
|
||
.union([z.literal('all'), z.int().nonnegative()])
|
||
.optional()
|
||
.describe('Cap on validation queries per scan run. Use "all" for unlimited, an integer for a hard cap, or omit for the runtime default.'),
|
||
})
|
||
.describe('Schema-scan relationship discovery and validation tunables.');
|
||
|
||
const scanSchema = z
|
||
.strictObject({
|
||
enrichment: scanEnrichmentSchema.prefault({}).describe('Column/table enrichment configuration.'),
|
||
relationships: scanRelationshipsSchema.prefault({}).describe('Relationship discovery and validation configuration.'),
|
||
})
|
||
.describe('Schema-scan configuration: enrichment and relationship discovery.');
|
||
|
||
const setupSchema = z
|
||
.strictObject({
|
||
database_connection_ids: z
|
||
.array(z.string().min(1))
|
||
.default([])
|
||
.describe('Connection IDs (keys of the top-level `connections` map) that the setup wizard treats as the project\'s primary databases.'),
|
||
})
|
||
.describe('Setup-wizard state captured during `ktx setup`.');
|
||
|
||
const storageGitSchema = z
|
||
.strictObject({
|
||
auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits state changes to the local Git-backed store.'),
|
||
author: z
|
||
.string()
|
||
.min(1)
|
||
.default('ktx <ktx@example.com>')
|
||
.describe('Git author identity used for auto-commits, in standard "Name <email>" form.'),
|
||
})
|
||
.describe('Git-backed storage commit policy.');
|
||
|
||
const storageSchema = z
|
||
.strictObject({
|
||
state: z
|
||
.enum(KTX_STORAGE_STATES)
|
||
.default('sqlite')
|
||
.describe('Backend for KTX state storage. "sqlite" uses .ktx/db.sqlite; "postgres" expects a configured Postgres connection.'),
|
||
search: z
|
||
.enum(KTX_SEARCH_BACKENDS)
|
||
.default('sqlite-fts5')
|
||
.describe('Backend for search indexes. "sqlite-fts5" uses SQLite FTS5; "postgres-hybrid" uses Postgres lexical + vector hybrid search.'),
|
||
git: storageGitSchema.prefault({}).describe('Git-backed storage commit policy.'),
|
||
})
|
||
.describe('Storage backends and commit policy for KTX state and search indexes.');
|
||
|
||
const connectionSchema = connectionConfigSchema;
|
||
|
||
const agentSchema = z
|
||
.strictObject({
|
||
run_research: z
|
||
.strictObject({
|
||
enabled: z.boolean().default(false).describe('Master switch for the research agent.'),
|
||
max_iterations: z
|
||
.number()
|
||
.int()
|
||
.nonnegative()
|
||
.default(20)
|
||
.describe('Maximum number of tool-call iterations the research agent may take per run.'),
|
||
default_toolset: z
|
||
.array(z.string().min(1))
|
||
.default(['sl_query', 'wiki_search', 'sl_read_source'])
|
||
.describe('Default list of tool identifiers exposed to the research agent.'),
|
||
})
|
||
.prefault({})
|
||
.describe('Research-agent configuration.'),
|
||
})
|
||
.describe('Agent feature configuration.');
|
||
|
||
const memorySchema = z
|
||
.strictObject({
|
||
auto_commit: z.boolean().default(true).describe('When true, KTX automatically commits memory updates to the Git-backed store.'),
|
||
})
|
||
.describe('Memory subsystem configuration.');
|
||
|
||
const ktxProjectConfigSchema = z
|
||
.strictObject({
|
||
setup: setupSchema.optional().describe('Setup-wizard state. Written by `ktx setup`; may be omitted.'),
|
||
connections: z
|
||
.record(z.string(), connectionSchema)
|
||
.default({})
|
||
.describe('Map of connection ID to connector configuration. Keys are user-chosen names referenced elsewhere in the config.'),
|
||
storage: storageSchema.prefault({}).describe('Storage backends and commit policy for KTX state and search indexes.'),
|
||
llm: llmSchema.prefault({}).describe('LLM provider, per-role model overrides, and prompt-caching tunables.'),
|
||
ingest: ingestSchema.prefault({}).describe('Ingest pipeline configuration.'),
|
||
agent: agentSchema.prefault({}).describe('Agent feature configuration.'),
|
||
memory: memorySchema.prefault({}).describe('Memory subsystem configuration.'),
|
||
scan: scanSchema.prefault({}).describe('Schema-scan configuration: enrichment and relationship discovery.'),
|
||
})
|
||
.describe('Configuration schema for KTX project files (ktx.yaml).');
|
||
|
||
export type KtxProjectConfig = z.infer<typeof ktxProjectConfigSchema>;
|
||
export type KtxProjectLlmConfig = z.infer<typeof llmSchema>;
|
||
export type KtxProjectEmbeddingConfig = z.infer<typeof embeddingSchema>;
|
||
export type KtxScanEnrichmentConfig = z.infer<typeof scanEnrichmentSchema>;
|
||
export type KtxScanRelationshipConfig = z.infer<typeof scanRelationshipsSchema>;
|
||
export type KtxProjectConnectionConfig = z.infer<typeof connectionSchema>;
|
||
|
||
export interface KtxConfigIssue {
|
||
path: string;
|
||
message: string;
|
||
fix?: string;
|
||
}
|
||
|
||
export interface KtxConfigValidation {
|
||
ok: boolean;
|
||
issues: KtxConfigIssue[];
|
||
}
|
||
|
||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||
}
|
||
|
||
function dottedPath(path: ReadonlyArray<PropertyKey>): string {
|
||
return path.map((segment) => String(segment)).join('.');
|
||
}
|
||
|
||
function valueAtPath(root: unknown, path: ReadonlyArray<PropertyKey>): unknown {
|
||
let cursor: unknown = root;
|
||
for (const segment of path) {
|
||
if (cursor === null || typeof cursor !== 'object') return undefined;
|
||
cursor = (cursor as Record<PropertyKey, unknown>)[segment];
|
||
}
|
||
return cursor;
|
||
}
|
||
|
||
function formatIssue(issue: z.core.$ZodIssue, input: unknown): KtxConfigIssue[] {
|
||
const basePath = dottedPath(issue.path);
|
||
|
||
if (issue.code === 'unrecognized_keys') {
|
||
const keys = (issue as { keys?: readonly string[] }).keys ?? [];
|
||
return keys.map((key) => {
|
||
const fullPath = basePath.length > 0 ? `${basePath}.${key}` : key;
|
||
return { path: fullPath, message: `Unsupported ${fullPath}: unknown field` };
|
||
});
|
||
}
|
||
|
||
const lastSegment = issue.path[issue.path.length - 1];
|
||
if (lastSegment === 'backend' && (issue.code === 'invalid_value' || issue.code === 'invalid_type')) {
|
||
const value = valueAtPath(input, issue.path);
|
||
return [{ path: basePath, message: `Unsupported ${basePath}: ${String(value)}` }];
|
||
}
|
||
|
||
return [{ path: basePath, message: basePath.length > 0 ? `${basePath}: ${issue.message}` : issue.message }];
|
||
}
|
||
|
||
function collectIssues(error: z.ZodError, input: unknown): KtxConfigIssue[] {
|
||
return error.issues.flatMap((issue) => formatIssue(issue, input));
|
||
}
|
||
|
||
function formatZodError(error: z.ZodError, input: unknown): string {
|
||
return collectIssues(error, input)
|
||
.map((issue) => issue.message)
|
||
.join('\n');
|
||
}
|
||
|
||
export function buildDefaultKtxProjectConfig(): KtxProjectConfig {
|
||
return ktxProjectConfigSchema.parse({});
|
||
}
|
||
|
||
export function parseKtxProjectConfig(raw: string): KtxProjectConfig {
|
||
const parsed = YAML.parse(raw) as unknown;
|
||
if (!isRecord(parsed)) {
|
||
throw new Error('ktx.yaml must contain a YAML object');
|
||
}
|
||
const result = ktxProjectConfigSchema.safeParse(parsed);
|
||
if (!result.success) {
|
||
throw new Error(formatZodError(result.error, parsed));
|
||
}
|
||
return result.data;
|
||
}
|
||
|
||
export function validateKtxProjectConfig(raw: string): KtxConfigValidation {
|
||
let parsed: unknown;
|
||
try {
|
||
parsed = YAML.parse(raw);
|
||
} catch (error) {
|
||
const message = error instanceof Error ? error.message : String(error);
|
||
return { ok: false, issues: [{ path: '', message: `ktx.yaml parse error: ${message}` }] };
|
||
}
|
||
if (!isRecord(parsed)) {
|
||
return { ok: false, issues: [{ path: '', message: 'ktx.yaml must contain a YAML object' }] };
|
||
}
|
||
const result = ktxProjectConfigSchema.safeParse(parsed);
|
||
if (result.success) {
|
||
return { ok: true, issues: [] };
|
||
}
|
||
return { ok: false, issues: collectIssues(result.error, parsed) };
|
||
}
|
||
|
||
export function generateKtxProjectConfigJsonSchema(): Record<string, unknown> {
|
||
const schema = z.toJSONSchema(ktxProjectConfigSchema, {
|
||
target: 'draft-7',
|
||
io: 'input',
|
||
}) as Record<string, unknown>;
|
||
return {
|
||
$schema: 'http://json-schema.org/draft-07/schema#',
|
||
$id: 'https://ktx.dev/schemas/ktx-project-config.json',
|
||
title: 'ktx.yaml',
|
||
...schema,
|
||
};
|
||
}
|
||
|
||
export function serializeKtxProjectConfig(config: KtxProjectConfig): string {
|
||
const serializedConfig =
|
||
config.ingest.adapters.length === 0
|
||
? {
|
||
...config,
|
||
ingest: {
|
||
embeddings: config.ingest.embeddings,
|
||
workUnits: config.ingest.workUnits,
|
||
},
|
||
}
|
||
: config;
|
||
return `${YAML.stringify(serializedConfig, { indent: 2, lineWidth: 0 }).trimEnd()}\n`;
|
||
}
|