diff --git a/AGENTS.md b/AGENTS.md index 20f9bcdf..ec715364 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -337,7 +337,8 @@ use `PascalCase` without the suffix. ## Telemetry -**ktx** ships PostHog usage telemetry. When adding commands or events: +**ktx** ships PostHog usage telemetry. Catalog telemetry events use strict +schemas. When adding commands or events: - **MUST NOT**: Add fields that carry user data — file paths, hostnames, environment values, SQL text, schema/table/column names, error messages, @@ -354,6 +355,24 @@ use `PascalCase` without the suffix. of collected data changes. Adding another event with no new field types needs no docs change. +### Error reports + +**ktx** also sends PostHog Error Tracking `$exception` events when telemetry is +enabled. This channel is separate from the strict catalog event schema and is +used only for exception diagnostics. + +`$exception` events may include stack frames, error class names, raw error +messages, cause chains, `source`, `handled`, `fatal`, runtime version fields, +OS/runtime fields, and the hashed `projectId` when known. Stack frames may +include local file paths and the local username when those appear in paths. + +`$exception` events must never intentionally include secrets, credentials, +database URLs, auth headers, raw argv, raw environment values, SQL text, +schema/table/column names as explicit properties, customer row data, user prompt +text, or raw MCP arguments. Reporters must redact call-site-provided secret +snapshots and common static credential patterns before the SDK serializes the +exception. + ## Documentation and Specs - Keep public documentation in `README.md`, package READMEs, example READMEs, diff --git a/README.md b/README.md index 2c433e0d..d286e3f1 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Documentation Join the ktx Slack community License - Y Combinator P25 + Y Combinator P25

@@ -23,6 +23,10 @@ Slack

+

+ Built and maintained by Kaelio +

+ --- **ktx** is a self-improving context layer that teaches agents how to query your @@ -247,11 +251,17 @@ uv run pytest -q ## Telemetry -**ktx** collects anonymous usage telemetry from interactive CLI runs to -improve setup, command reliability, and data-agent workflows. No file paths, -hostnames, SQL, schema names, error messages, or argv are recorded. See -[Telemetry](https://docs.kaelio.com/ktx/docs/community/telemetry) for the -event catalog and opt-out options. +**ktx** collects privacy-conscious usage telemetry to understand installs and +improve setup, command reliability, and data-agent workflows. Catalog telemetry +events do not record file paths, hostnames, SQL, schema names, table names, +column names, error messages, raw environment values, or argv. Error reports use +PostHog Error Tracking and can include stack frames and raw error messages, +which may contain local file paths or the local username in those paths. +**ktx** redacts secrets, credentials, database URLs, auth headers, argv, raw +environment values, SQL text, row data, and user-typed prompt or MCP argument +text from the explicit `$exception` payload. See +[Telemetry](https://docs.kaelio.com/ktx/docs/community/telemetry) for the event +catalog and opt-out options. ## License diff --git a/assets/star-history.svg b/assets/star-history.svg index 23016f3e..b34947d2 100644 --- a/assets/star-history.svg +++ b/assets/star-history.svg @@ -1 +1 @@ -star-history.comMay 17May 24May 31 200400600800kaelio/ktxStar HistoryDateGitHub Stars +star-history.comMay 17May 24May 31 200400600800kaelio/ktxStar HistoryDateGitHub Stars diff --git a/docs-site/app/layout.config.tsx b/docs-site/app/layout.config.tsx index 3245ab09..28ba6b03 100644 --- a/docs-site/app/layout.config.tsx +++ b/docs-site/app/layout.config.tsx @@ -5,7 +5,7 @@ import { SlackIcon } from "@/components/slack-icon"; export const baseOptions: BaseLayoutProps = { nav: { - title: , + title: Logo, transparentMode: "top", }, links: [ diff --git a/docs-site/components/diagram-studio/flows.ts b/docs-site/components/diagram-studio/flows.ts index cddf75cb..e63cc512 100644 --- a/docs-site/components/diagram-studio/flows.ts +++ b/docs-site/components/diagram-studio/flows.ts @@ -305,8 +305,8 @@ export const runtimeEdges: Edge[] = [ sourceHandle: "to-context", target: "context", targetHandle: "in", - type: "default", - label: "search", + type: "smoothstep", + label: "search + read", ...labelBg, style: edgeStyle, markerStart: marker, @@ -318,7 +318,7 @@ export const runtimeEdges: Edge[] = [ sourceHandle: "to-warehouse", target: "warehouse", targetHandle: "in", - type: "default", + type: "smoothstep", label: "read-only", ...labelBg, style: edgeStyle, diff --git a/docs-site/components/logo.tsx b/docs-site/components/logo.tsx index afc926a8..77370280 100644 --- a/docs-site/components/logo.tsx +++ b/docs-site/components/logo.tsx @@ -1,40 +1,56 @@ -export function Logo() { +"use client"; + +import Link from "next/link"; + +const brandFont = { + fontFamily: "var(--font-display), var(--font-sans), sans-serif", +} as const; + +export function Logo({ href = "/", className }: { href?: string; className?: string }) { return ( -
-
- - -
-
+
+
+ + + + + + +
+ + ktx + + + by Kaelio + +
- ktx - - - by Kaelio + Docs
- - Docs -
); } diff --git a/docs-site/components/product-runtime.tsx b/docs-site/components/product-runtime.tsx new file mode 100644 index 00000000..bfe7d64a --- /dev/null +++ b/docs-site/components/product-runtime.tsx @@ -0,0 +1,576 @@ +"use client"; + +import { + type Edge, + type EdgeProps, + getSmoothStepPath, + Handle, + MarkerType, + type Node, + type NodeProps, + Position, +} from "@xyflow/react"; + +import { FlowCanvas } from "./flow-canvas"; + +type AgentNodeData = { + title: string; + items: string[]; +}; + +type HubNodeData = { + title: string; + badge: string; + rows: string[]; +}; + +type TargetNodeData = { + accent: string; + title: string; + body: string; + rows: { text: string; color?: string; mono?: boolean }[]; + badge?: string; +}; + +type AgentNode = Node; +type HubNode = Node; +type TargetNode = Node; +type FlowNode = AgentNode | HubNode | TargetNode; + +const AGENT_W = 252; +const AGENT_H = 96; +const HUB_W = 306; +const HUB_H = 190; +const TARGET_W = 268; +const TARGET_H = 148; + +const CENTER_X = 470; +const ROW_AGENT_Y = 0; +const ROW_HUB_Y = 196; +const ROW_TARGET_Y = 488; + +const AGENT_X = CENTER_X - AGENT_W / 2; +const HUB_X = CENTER_X - HUB_W / 2; + +const TARGET_GAP_X = 38; +const TARGETS_TOTAL = TARGET_W * 2 + TARGET_GAP_X; +const TARGETS_START_X = CENTER_X - TARGETS_TOTAL / 2; +const CONTEXT_X = TARGETS_START_X; +const WAREHOUSE_X = TARGETS_START_X + TARGET_W + TARGET_GAP_X; + +const EDGE_STROKE = "#94a3b8"; +const CYCLE_STROKE = "#0e7490"; +const EMERALD = "#059669"; +const TEAL = "#0e7490"; + +const nodes: FlowNode[] = [ + { + id: "agent", + type: "agent", + position: { x: AGENT_X, y: ROW_AGENT_Y }, + data: { + title: "Your agent", + items: ["Claude Code", "Cursor", "Codex"], + }, + draggable: false, + selectable: false, + }, + { + id: "hub", + type: "hub", + position: { x: HUB_X, y: ROW_HUB_Y }, + data: { + title: "ktx", + badge: "MCP + CLI", + rows: [ + "Search wiki + semantic layer", + "Return approved metrics", + "Compile metrics → SQL", + ], + }, + draggable: false, + selectable: false, + }, + { + id: "context", + type: "target", + position: { x: CONTEXT_X, y: ROW_TARGET_Y }, + data: { + accent: TEAL, + title: "Context layer", + body: "Approved definitions agents search before they answer.", + rows: [ + { text: "wiki/*.md", color: EMERALD, mono: true }, + { text: "semantic-layer/*.yaml", color: TEAL, mono: true }, + ], + }, + draggable: false, + selectable: false, + }, + { + id: "warehouse", + type: "target", + position: { x: WAREHOUSE_X, y: ROW_TARGET_Y }, + data: { + accent: "#334155", + title: "Database", + badge: "read-only", + body: "Runs the compiled SQL. ktx never writes to it.", + rows: [], + }, + draggable: false, + selectable: false, + }, +]; + +const labelBg = { + labelBgPadding: [6, 3] as [number, number], + labelBgBorderRadius: 4, + labelStyle: { + fontSize: 13, + fontWeight: 600, + fill: "var(--color-fd-muted-foreground)", + }, + labelBgStyle: { + fill: "var(--color-fd-background)", + stroke: "var(--color-fd-border)", + strokeWidth: 1, + }, +}; + +const requestMarker = { + type: MarkerType.ArrowClosed, + color: EDGE_STROKE, + width: 16, + height: 16, +}; + +const flowEdges: Edge[] = [ + { + id: "e-ask", + source: "agent", + sourceHandle: "ask", + target: "hub", + targetHandle: "ask", + type: "straight", + label: "ask", + ...labelBg, + style: { stroke: EDGE_STROKE, strokeWidth: 1.5 }, + markerEnd: requestMarker, + }, + { + id: "e-answer", + source: "hub", + sourceHandle: "answer", + target: "agent", + targetHandle: "answer", + type: "straight", + label: "answer", + ...labelBg, + style: { stroke: EDGE_STROKE, strokeWidth: 1.5 }, + markerEnd: requestMarker, + }, + { + id: "e-search", + source: "hub", + sourceHandle: "to-context", + target: "context", + targetHandle: "in", + type: "smoothstep", + label: "search + read", + ...labelBg, + style: { stroke: CYCLE_STROKE, strokeWidth: 1.5 }, + markerStart: { type: MarkerType.ArrowClosed, color: CYCLE_STROKE, width: 14, height: 14 }, + markerEnd: { type: MarkerType.ArrowClosed, color: CYCLE_STROKE, width: 14, height: 14 }, + }, + { + id: "e-readonly", + source: "hub", + sourceHandle: "to-warehouse", + target: "warehouse", + targetHandle: "in", + type: "smoothstep", + label: "read-only", + ...labelBg, + style: { stroke: CYCLE_STROKE, strokeWidth: 1.5 }, + markerStart: { type: MarkerType.ArrowClosed, color: CYCLE_STROKE, width: 14, height: 14 }, + markerEnd: { type: MarkerType.ArrowClosed, color: CYCLE_STROKE, width: 14, height: 14 }, + }, +]; + +function AgentNodeView({ data }: NodeProps) { + return ( +
+ + +
+ + + +

+ {data.title} +

+
+
+ {data.items.map((item) => ( + + {item} + + ))} +
+
+ ); +} + +function HubNodeView({ data }: NodeProps) { + return ( +
+ + + + +
+ + k + + + {data.title} + + + {data.badge} + +
+
+ {data.rows.map((row) => ( +
+ + + {row} + +
+ ))} +
+
+ ); +} + +function TargetNodeView({ data }: NodeProps) { + return ( +
+ +
+

+ {data.title} +

+ {data.badge ? ( + + {data.badge} + + ) : null} +
+ {data.rows.length > 0 ? ( +
+ {data.rows.map((row) => ( + + {row.text} + + ))} +
+ ) : null} +

+ {data.body} +

+
+ ); +} + +/* ------------------------------- Particles ------------------------------- */ + +const PARTICLE_SPEED_PX_PER_SEC = 150; +const PARTICLE_MIN_DURATION_SEC = 5; + +type Leg = { + sx: number; + sy: number; + sPos: Position; + tx: number; + ty: number; + tPos: Position; +}; + +const AGENT_ASK_X = AGENT_X + AGENT_W * 0.35; +const AGENT_ANSWER_X = AGENT_X + AGENT_W * 0.65; +const AGENT_BOTTOM_Y = ROW_AGENT_Y + AGENT_H; +const HUB_ASK_X = HUB_X + HUB_W * 0.375; +const HUB_ANSWER_X = HUB_X + HUB_W * 0.625; +const HUB_TO_CONTEXT_X = HUB_X + HUB_W * 0.44; +const HUB_TO_WAREHOUSE_X = HUB_X + HUB_W * 0.56; +const HUB_BOTTOM_Y = ROW_HUB_Y + HUB_H; +const CONTEXT_TOP_X = CONTEXT_X + TARGET_W / 2; +const WAREHOUSE_TOP_X = WAREHOUSE_X + TARGET_W / 2; + +function buildCyclePath(spokeX: number, targetX: number): { + d: string; + length: number; +} { + const legs: Leg[] = [ + // agent → hub (ask, down) + { sx: AGENT_ASK_X, sy: AGENT_BOTTOM_Y, sPos: Position.Bottom, tx: HUB_ASK_X, ty: ROW_HUB_Y, tPos: Position.Top }, + // through the hub to its spoke handle (down, drawn behind the hub) + { sx: HUB_ASK_X, sy: ROW_HUB_Y, sPos: Position.Bottom, tx: spokeX, ty: HUB_BOTTOM_Y, tPos: Position.Top }, + // hub → target (down) + { sx: spokeX, sy: HUB_BOTTOM_Y, sPos: Position.Bottom, tx: targetX, ty: ROW_TARGET_Y, tPos: Position.Top }, + // target → hub (up) + { sx: targetX, sy: ROW_TARGET_Y, sPos: Position.Top, tx: spokeX, ty: HUB_BOTTOM_Y, tPos: Position.Bottom }, + // through the hub to its answer handle (up, drawn behind the hub) + { sx: spokeX, sy: HUB_BOTTOM_Y, sPos: Position.Top, tx: HUB_ANSWER_X, ty: ROW_HUB_Y, tPos: Position.Bottom }, + // hub → agent (answer, up) + { sx: HUB_ANSWER_X, sy: ROW_HUB_Y, sPos: Position.Top, tx: AGENT_ANSWER_X, ty: AGENT_BOTTOM_Y, tPos: Position.Bottom }, + ]; + + const segments = legs.map((leg) => { + const [segment] = getSmoothStepPath({ + sourceX: leg.sx, + sourceY: leg.sy, + sourcePosition: leg.sPos, + targetX: leg.tx, + targetY: leg.ty, + targetPosition: leg.tPos, + }); + return segment; + }); + + let d = segments[0]; + for (let i = 1; i < segments.length; i += 1) { + d += ` ${segments[i].replace(/^M/, "L")}`; + } + + const length = legs.reduce( + (sum, leg) => sum + Math.abs(leg.tx - leg.sx) + Math.abs(leg.ty - leg.sy), + 0, + ); + + return { d, length }; +} + +type ParticleEdgeData = { + d: string; + duration: number; + beginOffset: number; + color: string; +}; + +type ParticleEdge = Edge; + +function ParticleEdgeView({ id, data }: EdgeProps) { + if (!data) return null; + const pathId = `runtime-particle-path-${id}`; + return ( + <> + + + + + + + + + + + ); +} + +function makeCycleEdge( + id: string, + source: string, + spokeX: number, + targetX: number, + beginFraction: number, +): ParticleEdge { + const { d, length } = buildCyclePath(spokeX, targetX); + const duration = Math.max( + PARTICLE_MIN_DURATION_SEC, + length / PARTICLE_SPEED_PX_PER_SEC, + ); + return { + id, + source, + target: source, + type: "particle", + data: { d, duration, beginOffset: duration * beginFraction, color: CYCLE_STROKE }, + }; +} + +const particleEdges: ParticleEdge[] = [ + makeCycleEdge("p-context", "context", HUB_TO_CONTEXT_X, CONTEXT_TOP_X, 0), + makeCycleEdge("p-warehouse", "warehouse", HUB_TO_WAREHOUSE_X, WAREHOUSE_TOP_X, 0.5), +]; + +const nodeTypes = { + agent: AgentNodeView, + hub: HubNodeView, + target: TargetNodeView, +}; + +const edgeTypes = { + particle: ParticleEdgeView, +}; + +const edges = [...flowEdges, ...particleEdges]; + +export function ProductRuntime() { + return ( +
+
+

+ How serving works +

+

+ At runtime, agents reach ktx through MCP. ktx searches the context + layer, returns approved metrics, and compiles them into read-only SQL + the warehouse runs. +

+
+ +
+
+

+ Serving flow +

+

+ From an agent request to a governed answer +

+

+ The agent asks in plain language. ktx is the only thing that touches + the context layer and the warehouse, and every database connection + is read-only. +

+
+ + +
+ +
+ ); +} diff --git a/docs-site/content/docs/cli-reference/ktx-ingest.mdx b/docs-site/content/docs/cli-reference/ktx-ingest.mdx index 80820efa..ab3d231d 100644 --- a/docs-site/content/docs/cli-reference/ktx-ingest.mdx +++ b/docs-site/content/docs/cli-reference/ktx-ingest.mdx @@ -177,7 +177,9 @@ Slowest phase: reconciliation (2m 05s, 48% of wall time). 2 work units (1 failed Work units run serially by default (`ingest.workUnits.maxConcurrency` is `1`); raise it in `ktx.yaml` if the profile shows the run is bound by serialized -work-unit agent loops. +work-unit agent loops. If the provider reports an LLM rate limit, **ktx** shows +a transient wait message and temporarily reduces effective work-unit concurrency +according to `ingest.rateLimit`. ## Common errors diff --git a/docs-site/content/docs/cli-reference/ktx.mdx b/docs-site/content/docs/cli-reference/ktx.mdx index 8b9a2cc5..ebdeb1c6 100644 --- a/docs-site/content/docs/cli-reference/ktx.mdx +++ b/docs-site/content/docs/cli-reference/ktx.mdx @@ -74,6 +74,44 @@ The public context-build entrypoint is `ktx ingest [connectionId]` or | `-v`, `--version` | Show the CLI package name and version. | | `-h`, `--help` | Show help for the current command. | +## Update notices + +> **Note:** The update notifier writes only to stderr and keeps command stdout +> unchanged. + +When a newer package is available on your installed release channel, `ktx` +prints a short notice after the command finishes: + +```text +↑ Update available: ktx 0.9.0 → 0.10.0 + npm i -g @kaelio/ktx +``` + +Stable installs compare against the npm `latest` dist-tag. +Release-candidate installs compare against the `next` dist-tag and show: + +```text +npm i -g @kaelio/ktx@next +``` + +The check is skipped for JSON output, CI, non-TTY stdout, and hidden completion +commands. To opt out explicitly, set any of these environment variables: + +```bash +KTX_NO_UPDATE_CHECK=1 +NO_UPDATE_NOTIFIER=1 +DO_NOT_TRACK=1 +``` + +The `ktx` CLI prints one npm command because globally installed binaries don't +expose a reliable runtime package-manager signal. If you prefer another global +package manager, use the equivalent command: + +```bash +pnpm add -g @kaelio/ktx +yarn global add @kaelio/ktx +``` + ## Project resolution Most commands are project-aware. Pass `--project-dir ` when scripting or diff --git a/docs-site/content/docs/community/telemetry.mdx b/docs-site/content/docs/community/telemetry.mdx index a3a10564..78bdb3e5 100644 --- a/docs-site/content/docs/community/telemetry.mdx +++ b/docs-site/content/docs/community/telemetry.mdx @@ -46,6 +46,33 @@ an operation errors, the detail we record is the error as your tools reported it, which can include identifiers from your setup. If you'd rather send nothing at all, turn telemetry off using any of the options above. +## Error reports + +When telemetry is enabled, **ktx** sends PostHog Error Tracking `$exception` +events for CLI and daemon exceptions. Error reports help group crashes and +handled failures into PostHog issues. + +Error reports can include: + +- Stack frames, including function names, local file paths, line numbers, and + SDK-provided source context. +- Error class names and raw error messages. +- Cause chains when the runtime exposes them. +- `source`, `handled`, and `fatal` diagnostic fields. +- Runtime version, OS, architecture, and CI fields. +- The hashed `projectId` when **ktx** knows the project. + +Error reports never intentionally include: + +- Secrets, credentials, API keys, tokens, cookies, signed URLs, or auth headers. +- Database URLs, connection strings, DSNs, raw argv, or raw environment values. +- SQL text, schema names, table names, or column names as explicit payload + properties. +- Customer row data. +- User prompt text or raw MCP arguments. + +The same opt-out controls listed above disable error reports. + ## Storage and retention Telemetry is sent to PostHog, a third-party product-analytics service used by diff --git a/docs-site/content/docs/configuration/ktx-yaml.mdx b/docs-site/content/docs/configuration/ktx-yaml.mdx index 17a04c53..831e678a 100644 --- a/docs-site/content/docs/configuration/ktx-yaml.mdx +++ b/docs-site/content/docs/configuration/ktx-yaml.mdx @@ -452,6 +452,16 @@ ingest: stepBudget: 40 maxConcurrency: 2 failureMode: continue + rateLimit: + enabled: true + throttleThreshold: 0.8 + minConcurrencyUnderPressure: 1 + maxWaitMs: 600000 + retry: + maxAttempts: 6 + baseDelayMs: 1000 + maxDelayMs: 60000 + jitter: true ``` ### Adapters @@ -498,6 +508,24 @@ handles failures. | `workUnits.maxConcurrency` | `int > 0` | `1` | How many work units run in parallel. | | `workUnits.failureMode` | `abort` \| `continue` | `continue` | `abort` stops the whole ingest run on the first failure; `continue` records it and keeps going. | +### Rate limits + +`rateLimit` controls provider-neutral pacing for LLM calls during ingest. When a +provider reports a subscription window, retry-after delay, or HTTP 429, +**ktx** pauses new work-unit model calls, shows a transient wait in the CLI, +and reduces work-unit concurrency while the provider is under pressure. + +| Field | Type | Default | Purpose | +|-------|------|---------|---------| +| `rateLimit.enabled` | `boolean` | `true` | Master switch for ingest LLM rate-limit pacing and visible waits. | +| `rateLimit.throttleThreshold` | `number between 0 and 1` | `0.8` | Fraction of a known provider window at which **ktx** starts reducing concurrency. | +| `rateLimit.minConcurrencyUnderPressure` | `int > 0` | `1` | Effective work-unit concurrency while a provider is under rate-limit pressure. | +| `rateLimit.maxWaitMs` | `int > 0` | unset | Caps how long a single provider-reset wait can last. This bounds each wait, not the whole run: after a capped wait elapses **ktx** retries and may pause again. Omit to wait until the provider's reset time. | +| `rateLimit.retry.maxAttempts` | `int > 0` | `6` | Maximum attempts for a single rate-limited LLM call before the failure surfaces (counts the first try). Also bounds how far opaque backoff grows for responses without a reset time or retry-after value. | +| `rateLimit.retry.baseDelayMs` | `int > 0` | `1000` | Initial opaque retry delay in milliseconds. | +| `rateLimit.retry.maxDelayMs` | `int > 0` | `60000` | Maximum opaque retry delay in milliseconds. | +| `rateLimit.retry.jitter` | `boolean` | `true` | Add jitter to opaque retry delays. | + ## `scan` `scan` configures how schema-level inputs become structured context: diff --git a/docs-site/content/docs/getting-started/introduction.mdx b/docs-site/content/docs/getting-started/introduction.mdx index cc3b0ca8..50ffe20d 100644 --- a/docs-site/content/docs/getting-started/introduction.mdx +++ b/docs-site/content/docs/getting-started/introduction.mdx @@ -4,6 +4,7 @@ description: ktx is an open-source, self-improving context layer for data agents --- import { ProductMechanics } from "@/components/product-mechanics"; +import { ProductRuntime } from "@/components/product-runtime";
@@ -59,6 +60,8 @@ serves that context to agents at runtime. + + ## Use it for Use **ktx** when agents need more than raw database access. Agents can search wiki diff --git a/docs-site/next.config.mjs b/docs-site/next.config.mjs index 380dba85..e47a0cc7 100644 --- a/docs-site/next.config.mjs +++ b/docs-site/next.config.mjs @@ -30,7 +30,36 @@ const config = { }; }, async redirects() { + // Alias-host canonicalization MUST come before the generic root/docs + // redirects below. Those generic rules have no host guard, so if they ran + // first they would inject a "/ktx" basePath into the path on the alias + // hosts, which the alias catch-alls would then prepend a second time — + // producing https://docs.kaelio.com/ktx/ktx/docs/... Redirects also run + // before beforeFiles rewrites, so the ktx.sh catch-all must exclude + // /stars* to let the stars dashboard rewrite proxy through. return [ + { + source: "/slack", + has: [{ type: "host", value: "ktx.sh" }], + destination: + "https://join.slack.com/t/ktxcommunity/shared_invite/zt-3y9b44m1x-LVyNNJD5nwaZHq4XS29LMQ", + permanent: false, + basePath: false, + }, + { + source: "/:path*", + has: [{ type: "host", value: "docs.ktx.sh" }], + destination: "https://docs.kaelio.com/ktx/:path*", + permanent: true, + basePath: false, + }, + { + source: "/:path((?!stars(?:/|$)).*)", + has: [{ type: "host", value: "ktx.sh" }], + destination: "https://docs.kaelio.com/ktx/:path", + permanent: true, + basePath: false, + }, { source: "/", destination: "/ktx/docs/getting-started/introduction", @@ -43,28 +72,6 @@ const config = { permanent: false, basePath: false, }, - { - source: "/:path*", - has: [{ type: "host", value: "docs.ktx.sh" }], - destination: "https://docs.kaelio.com/ktx/:path*", - permanent: true, - basePath: false, - }, - { - source: "/slack", - has: [{ type: "host", value: "ktx.sh" }], - destination: - "https://join.slack.com/t/ktxcommunity/shared_invite/zt-3y9b44m1x-LVyNNJD5nwaZHq4XS29LMQ", - permanent: false, - basePath: false, - }, - { - source: "/:path((?!stars(?:/|$)).*)", - has: [{ type: "host", value: "ktx.sh" }], - destination: "https://docs.kaelio.com/ktx/:path", - permanent: true, - basePath: false, - }, ]; }, }; diff --git a/docs-site/tests/docs-index-route.test.mjs b/docs-site/tests/docs-index-route.test.mjs index fdd8ec81..6fac0e3c 100644 --- a/docs-site/tests/docs-index-route.test.mjs +++ b/docs-site/tests/docs-index-route.test.mjs @@ -2,6 +2,8 @@ import assert from "node:assert/strict"; import { spawn } from "node:child_process"; import { once } from "node:events"; import { readFile, writeFile } from "node:fs/promises"; +import http from "node:http"; +import https from "node:https"; import { dirname, join } from "node:path"; import { createServer } from "node:net"; import { after, before, test } from "node:test"; @@ -100,6 +102,37 @@ after(async () => { } }); +// Node's fetch (undici) overwrites the Host header with the connection host, +// so the alias-host redirect rules never match. The low-level http(s) client +// sends Host verbatim, which is what the alias canonicalization keys off of. +function requestWithHost(hostHeader, path) { + const target = new URL(docsSiteUrl); + const client = target.protocol === "https:" ? https : http; + const port = + target.port || (target.protocol === "https:" ? "443" : "80"); + + return new Promise((resolve, reject) => { + const request = client.request( + { + hostname: target.hostname, + port, + path, + method: "GET", + headers: { Host: hostHeader }, + }, + (response) => { + response.resume(); + resolve({ + status: response.statusCode, + location: response.headers.location, + }); + }, + ); + request.on("error", reject); + request.end(); + }); +} + test("/ktx/docs redirects to the docs introduction", async () => { const response = await fetch(`${docsSiteUrl}${docsBasePath}/docs`, { redirect: "manual", @@ -141,3 +174,51 @@ test("/ktx/api/search returns docs search results", async () => { "search should return at least one docs result", ); }); + +test("ktx.sh canonicalizes to a single /ktx basePath on the docs host", async () => { + const root = await requestWithHost("ktx.sh", "/"); + assert.equal(root.status, 308); + assert.equal(root.location, "https://docs.kaelio.com/ktx/"); + assert.ok( + !root.location.includes("/ktx/ktx"), + "the basePath must not be doubled", + ); + + const page = await requestWithHost( + "ktx.sh", + "/docs/getting-started/quickstart", + ); + assert.equal(page.status, 308); + assert.equal( + page.location, + "https://docs.kaelio.com/ktx/docs/getting-started/quickstart", + ); +}); + +test("docs.ktx.sh canonicalizes to a single /ktx basePath on the docs host", async () => { + const root = await requestWithHost("docs.ktx.sh", "/"); + assert.equal(root.status, 308); + assert.equal(root.location, "https://docs.kaelio.com/ktx"); + assert.ok( + !root.location.includes("/ktx/ktx"), + "the basePath must not be doubled", + ); + + const page = await requestWithHost("docs.ktx.sh", "/llms.txt"); + assert.equal(page.status, 308); + assert.equal(page.location, "https://docs.kaelio.com/ktx/llms.txt"); +}); + +test("ktx.sh keeps the /slack and /stars exceptions", async () => { + const slack = await requestWithHost("ktx.sh", "/slack"); + assert.equal(slack.status, 307); + assert.match(slack.location, /^https:\/\/join\.slack\.com\//); + + // /stars is proxied by a beforeFiles rewrite, so the apex catch-all must not + // canonicalize it to the docs host. + const stars = await requestWithHost("ktx.sh", "/stars"); + assert.ok( + !(stars.location ?? "").startsWith("https://docs.kaelio.com"), + "the stars dashboard must not be redirected to the docs host", + ); +}); diff --git a/docs-site/tests/product-mechanics-content.test.mjs b/docs-site/tests/product-mechanics-content.test.mjs index 5cce9001..d0c9471c 100644 --- a/docs-site/tests/product-mechanics-content.test.mjs +++ b/docs-site/tests/product-mechanics-content.test.mjs @@ -85,7 +85,7 @@ test("product mechanics component explains ingestion outputs", async () => { "compile into SQL", '"use client"', "@xyflow/react", - " { ); } - assert.match( - component, + // The ReactFlow canvas config lives in the shared FlowCanvas wrapper, which + // product-mechanics renders. Assert the static read-only behavior there. + const flowCanvas = await readDocsFile("components/flow-canvas.tsx"); + for (const guard of [ /nodesDraggable=\{false\}/, - "ReactFlow canvas should disable node dragging", - ); - assert.match( - component, - /panOnDrag=\{false\}/, - "ReactFlow canvas should disable panning", - ); - assert.match( - component, + /nodesConnectable=\{false\}/, /zoomOnScroll=\{false\}/, - "ReactFlow canvas should disable scroll zoom", - ); + /elementsSelectable=\{false\}/, + ]) { + assert.match( + flowCanvas, + guard, + `shared FlowCanvas should enforce static read-only behavior: ${guard}`, + ); + } assert.doesNotMatch(component, /raw-sources/); assert.doesNotMatch(component, /\.ktx/); diff --git a/docs-site/tests/product-runtime-content.test.mjs b/docs-site/tests/product-runtime-content.test.mjs new file mode 100644 index 00000000..ac643faa --- /dev/null +++ b/docs-site/tests/product-runtime-content.test.mjs @@ -0,0 +1,74 @@ +import assert from "node:assert/strict"; +import { readFile } from "node:fs/promises"; +import { dirname, join } from "node:path"; +import { test } from "node:test"; +import { fileURLToPath } from "node:url"; + +const docsSiteDir = join(dirname(fileURLToPath(import.meta.url)), ".."); + +async function readDocsFile(path) { + return readFile(join(docsSiteDir, path), "utf8"); +} + +test("docs introduction renders the serving phase after ingestion", async () => { + const introduction = await readDocsFile( + "content/docs/getting-started/introduction.mdx", + ); + + assert.match( + introduction, + /import\s+\{\s*ProductRuntime\s*\}\s+from\s+"@\/components\/product-runtime";/, + ); + assert.match(introduction, //); + + const mechanicsIndex = introduction.indexOf(""); + const runtimeIndex = introduction.indexOf(""); + const useCaseIndex = introduction.indexOf("## Use it for"); + + assert.ok( + runtimeIndex > mechanicsIndex, + "serving diagram should appear after the ingestion diagram", + ); + assert.ok( + runtimeIndex < useCaseIndex, + "serving diagram should appear before use-case sections", + ); +}); + +test("product runtime component explains the serving cycle", async () => { + const component = await readDocsFile("components/product-runtime.tsx"); + + for (const expectedText of [ + "How serving works", + "Serving flow", + "From an agent request to a governed answer", + "Your agent", + "Claude Code", + "Cursor", + "Codex", + "Search wiki + semantic layer", + "Return approved metrics", + "Compile metrics → SQL", + "Context layer", + "Database", + "search + read", + "read-only", + "wiki/*.md", + "semantic-layer/*.yaml", + '"use client"', + "@xyflow/react", + "FlowCanvas", + "getSmoothStepPath", + "animateMotion", + "runtime-particle", + "buildCyclePath", + ]) { + assert.ok( + component.includes(expectedText), + `component should include: ${expectedText}`, + ); + } + + assert.doesNotMatch(component, /raw-sources/); + assert.doesNotMatch(component, /=22.0.0" @@ -47,6 +51,7 @@ "@ai-sdk/devtools": "0.0.18", "@ai-sdk/google-vertex": "^4.0.134", "@anthropic-ai/claude-agent-sdk": "0.3.146", + "@clack/core": "1.3.1", "@clack/prompts": "1.4.0", "@clickhouse/client": "^1.18.5", "@commander-js/extra-typings": "14.0.0", @@ -72,6 +77,7 @@ "pg": "^8.21.0", "posthog-node": "^5.34.9", "react": "^19.2.6", + "semver": "^7.8.1", "simple-git": "3.36.0", "snowflake-sdk": "^2.4.2", "yaml": "^2.9.0", @@ -85,6 +91,7 @@ "@types/node": "^25.9.1", "@types/pg": "^8.20.0", "@types/react": "^19.2.15", + "@types/semver": "^7.7.1", "@vitest/coverage-v8": "^4.1.7", "ajv": "8.20.0", "ink-testing-library": "^4.0.0", diff --git a/packages/cli/src/clack.ts b/packages/cli/src/clack.ts index 2ad51e6c..31be2e1b 100644 --- a/packages/cli/src/clack.ts +++ b/packages/cli/src/clack.ts @@ -3,6 +3,30 @@ import type { KtxCliIo } from './cli-runtime.js'; const ESC = String.fromCharCode(0x1b); +export interface CliStyleEnv { + NO_COLOR?: string; + TERM?: string; +} + +function ansiEnabled(env: CliStyleEnv = process.env): boolean { + return !env.NO_COLOR && env.TERM !== 'dumb'; +} + +function ansiColor(text: string, open: number, close: number, env?: CliStyleEnv): string { + if (!ansiEnabled(env)) { + return text; + } + return `${ESC}[${open}m${text}${ESC}[${close}m`; +} + +export function dim(text: string, env?: CliStyleEnv): string { + return ansiColor(text, 2, 22, env); +} + +export function cyan(text: string, env?: CliStyleEnv): string { + return ansiColor(text, 36, 39, env); +} + export interface RailBufferedSource { stdoutText(): string; stderrText(): string; @@ -61,11 +85,11 @@ export function createClackSpinner(): KtxCliSpinner { } function magenta(text: string): string { - return `${ESC}[35m${text}${ESC}[39m`; + return ansiColor(text, 35, 39); } function red(text: string): string { - return `${ESC}[31m${text}${ESC}[39m`; + return ansiColor(text, 31, 39); } export function createStaticCliSpinner(io: KtxCliSpinnerIo): KtxCliSpinner { diff --git a/packages/cli/src/cli-program.ts b/packages/cli/src/cli-program.ts index 31ab8a03..6359d897 100644 --- a/packages/cli/src/cli-program.ts +++ b/packages/cli/src/cli-program.ts @@ -16,6 +16,7 @@ import { renderMissingProjectMessage } from './doctor.js'; import { findNearestKtxProjectDir, resolveKtxProjectDir } from './project-resolver.js'; import { profileMark, profileSpan } from './startup-profile.js'; import type { CommandOutcome } from './telemetry/index.js'; +import { prepareUpdateCheckNotice, type PrepareUpdateCheckNoticeOptions } from './update-check/update-check.js'; profileMark('module:cli-program'); @@ -39,6 +40,8 @@ interface KtxCommanderProgramOptions { runInit: (args: { projectDir: string; force: boolean }, io: KtxCliIo) => Promise; } +type KtxCliUpdateCheckOptions = Pick; + export interface BuildKtxProgramOptions { io: KtxCliIo; deps: KtxCliDeps; @@ -47,6 +50,7 @@ export interface BuildKtxProgramOptions { setExitCode?: (code: number) => void; argv?: string[]; setTelemetryModule?: (telemetry: typeof import('./telemetry/index.js')) => void; + updateCheck?: KtxCliUpdateCheckOptions; } type CommanderExitLike = { exitCode: number; code: string; message: string }; @@ -431,16 +435,29 @@ export function collectCommandFlagsPresent(command: CommandUnknownOpts): Record< export function buildKtxProgram(options: BuildKtxProgramOptions): Command { const program = createBaseProgram(options.packageInfo, options.io); + let pendingUpdateNotice: string | null = null; + program.hook('preAction', async (_thisCommand, actionCommand) => { // The hidden completion command must stay silent and side-effect free: skip - // the telemetry notice, command span, and project checks entirely. + // the telemetry notice, command span, project checks, and update checks entirely. if (commandPath(actionCommand as CommandPathNode).includes('__complete')) { return; } + const commandNode = actionCommand as CommandPathNode; + const updateCheck = await prepareUpdateCheckNotice({ + io: options.io, + env: options.updateCheck?.env, + fetchDistTags: options.updateCheck?.fetchDistTags, + homeDir: options.updateCheck?.homeDir, + installedVersion: options.packageInfo.version, + now: options.updateCheck?.now, + commandOptions: commandOptions(commandNode), + }); + pendingUpdateNotice = updateCheck.notice; + const telemetry = await import('./telemetry/index.js'); options.setTelemetryModule?.(telemetry); await telemetry.showTelemetryNoticeIfNeeded(options.io, options.packageInfo); - const commandNode = actionCommand as CommandPathNode; const path = commandPath(commandNode); const projectDir = resolveCommandProjectDir(commandNode); const hasProject = ktxYamlExists(projectDir); @@ -457,6 +474,13 @@ export function buildKtxProgram(options: BuildKtxProgramOptions): Command { ensureProjectAvailable(options.io, commandNode); }); + program.hook('postAction', () => { + if (pendingUpdateNotice) { + options.io.stderr.write(pendingUpdateNotice); + pendingUpdateNotice = null; + } + }); + const context: KtxCliCommandContext = { io: options.io, deps: options.deps, @@ -529,6 +553,13 @@ export async function runCommanderKtxCli( try { return await runBareInteractiveCommand(program, io, context); } catch (error) { + const telemetry = await import('./telemetry/index.js'); + await telemetry.reportException({ + error, + context: { source: 'bare-interactive', handled: true, fatal: false }, + packageInfo: info, + io, + }); io.stderr.write(`${formatCliError(error)}\n`); return 1; } @@ -563,6 +594,23 @@ export async function runCommanderKtxCli( outcome: commandOutcomeForParseResult(parseError, exitCode), error: parseError, }); + if ( + parseError && + !isCommanderExit(parseError) && + !isKtxProjectMissingAbortError(parseError) + ) { + await telemetryModule.reportException({ + error: parseError, + context: { + source: completed?.commandPath.join(' ') ?? 'commander parseAsync', + handled: true, + fatal: false, + }, + projectDir: completed?.projectGroupAttached ? completed.projectDir : undefined, + packageInfo: info, + io, + }); + } await telemetryModule.emitCompletedCommand({ completed, packageInfo: info, io }); await telemetryModule.shutdownTelemetryEmitter(); } diff --git a/packages/cli/src/cli-runtime.ts b/packages/cli/src/cli-runtime.ts index 7043143b..4e13b472 100644 --- a/packages/cli/src/cli-runtime.ts +++ b/packages/cli/src/cli-runtime.ts @@ -129,6 +129,48 @@ function installTelemetrySignalFlush(io: KtxCliIo, info: KtxCliPackageInfo): () }; } +/** @internal */ +export function createGlobalExceptionReporter(io: KtxCliIo, info: KtxCliPackageInfo) { + return async (source: 'uncaughtException' | 'unhandledRejection', error: unknown): Promise => { + const { reportException, shutdownTelemetryEmitter } = await import('./telemetry/index.js'); + await reportException({ + error, + context: { source, handled: false, fatal: true }, + io, + packageInfo: info, + immediate: true, + }); + await shutdownTelemetryEmitter(); + }; +} + +export function installGlobalExceptionHandlers(io: KtxCliIo, info: KtxCliPackageInfo): () => void { + const report = createGlobalExceptionReporter(io, info); + const handle = (source: 'uncaughtException' | 'unhandledRejection', error: unknown): void => { + void (async () => { + try { + await report(source, error); + } catch { + // Best-effort: preserve Node's process termination behavior. + } + if (error instanceof Error && error.stack) { + io.stderr.write(`${error.stack}\n`); + } else { + io.stderr.write(`${String(error)}\n`); + } + process.exit(1); + })(); + }; + const onUncaught = (error: Error): void => handle('uncaughtException', error); + const onUnhandled = (reason: unknown): void => handle('unhandledRejection', reason); + process.on('uncaughtException', onUncaught); + process.on('unhandledRejection', onUnhandled); + return () => { + process.off('uncaughtException', onUncaught); + process.off('unhandledRejection', onUnhandled); + }; +} + export async function runKtxCli( argv = process.argv.slice(2), io: KtxCliIo = process, @@ -141,11 +183,14 @@ export async function runKtxCli( // Real-process entry only: flush telemetry if interrupted. Test/programmatic // callers pass their own `io`, so they never install process-level handlers. const removeSignalFlush = (io as unknown) === process ? installTelemetrySignalFlush(io, info) : undefined; + const removeGlobalExceptionHandlers = + (io as unknown) === process ? installGlobalExceptionHandlers(io, info) : undefined; try { return await runCommanderKtxCli(argv, io, deps, info, { runInit: runInitForCommander, }); } finally { + removeGlobalExceptionHandlers?.(); removeSignalFlush?.(); } } diff --git a/packages/cli/src/commands/setup-commands.ts b/packages/cli/src/commands/setup-commands.ts index 1619a80a..0302e9ed 100644 --- a/packages/cli/src/commands/setup-commands.ts +++ b/packages/cli/src/commands/setup-commands.ts @@ -406,6 +406,8 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo } const resolvedAgentScope = options.local ? 'local' : options.global ? 'global' : 'project'; + const debugEnabled = + ((command.optsWithGlobals ? command.optsWithGlobals() : command.opts()) as { debug?: unknown }).debug === true; await runSetupArgs(context, { command: 'run', projectDir: resolveCommandProjectDir(command), @@ -415,6 +417,7 @@ export function registerSetupCommands(program: Command, context: KtxCliCommandCo agentScope: resolvedAgentScope, skipAgents: options.skipAgents === true, inputMode: options.input === false ? 'disabled' : 'auto', + ...(debugEnabled ? { debug: true } : {}), yes: options.yes === true, cliVersion: context.packageInfo.version, ...(options.llmBackend ? { llmBackend: options.llmBackend } : {}), diff --git a/packages/cli/src/connection.ts b/packages/cli/src/connection.ts index 96281e82..9b6b4294 100644 --- a/packages/cli/src/connection.ts +++ b/packages/cli/src/connection.ts @@ -16,7 +16,8 @@ import { bold, dim, green, red, SYMBOLS } from './io/symbols.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; import { profileMark } from './startup-profile.js'; import { isDemoConnection } from './telemetry/demo-detect.js'; -import { emitTelemetryEvent } from './telemetry/index.js'; +import { emitTelemetryEvent, reportException } from './telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js'; import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:connection'); @@ -74,6 +75,12 @@ async function testNativeConnection( } const result = await connector.testConnection(); if (!result.success) { + // Re-throw the driver's original error so connection_test telemetry records + // its real class (e.g. ConnectionError) and code (e.g. ELOGIN) instead of + // collapsing every native failure to a generic Error with no code. + if (result.cause instanceof Error) { + throw result.cause; + } throw new Error(result.error ?? 'connection test failed'); } return { driver: connector.driver }; @@ -318,6 +325,21 @@ async function emitConnectionTest(input: { ...(errorDetail ? { errorDetail } : {}), }, }); + if (input.error) { + await reportException({ + error: input.error, + context: { source: 'connection test', handled: true, fatal: false }, + projectDir: input.project.projectDir, + io: input.io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project: input.project, + connectionId: input.connectionId, + includeLlm: false, + includeEmbeddings: false, + env: process.env, + }), + }); + } } function visualWidth(text: string): number { diff --git a/packages/cli/src/connectors/bigquery/connector.ts b/packages/cli/src/connectors/bigquery/connector.ts index edebe284..eae0f2ed 100644 --- a/packages/cli/src/connectors/bigquery/connector.ts +++ b/packages/cli/src/connectors/bigquery/connector.ts @@ -5,7 +5,9 @@ import { assertReadOnlySql, limitSqlForExecution } from '../../context/connectio import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; import { + connectorTestFailure, createKtxConnectorCapabilities, + type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, @@ -320,7 +322,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector { this.id = `bigquery:${options.connectionId}`; } - async testConnection(): Promise<{ success: boolean; error?: string }> { + async testConnection(): Promise { try { const client = this.getClient(); await client.getDatasets({ maxResults: 1 }); @@ -329,7 +331,7 @@ export class KtxBigQueryScanConnector implements KtxScanConnector { } return { success: true }; } catch (error) { - return { success: false, error: error instanceof Error ? error.message : String(error) }; + return connectorTestFailure(error); } } diff --git a/packages/cli/src/connectors/clickhouse/connector.ts b/packages/cli/src/connectors/clickhouse/connector.ts index 74ef7a77..23622701 100644 --- a/packages/cli/src/connectors/clickhouse/connector.ts +++ b/packages/cli/src/connectors/clickhouse/connector.ts @@ -1,7 +1,7 @@ import { createClient } from '@clickhouse/client'; import { getDialectForDriver } from '../../context/connections/dialects.js'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; -import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js'; +import { connectorTestFailure, createKtxConnectorCapabilities, type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; import { readFileSync } from 'node:fs'; import { Agent as HttpsAgent } from 'node:https'; @@ -317,12 +317,12 @@ export class KtxClickHouseScanConnector implements KtxScanConnector { this.id = `clickhouse:${options.connectionId}`; } - async testConnection(): Promise<{ success: boolean; error?: string }> { + async testConnection(): Promise { try { await this.query('SELECT 1'); return { success: true }; } catch (error) { - return { success: false, error: error instanceof Error ? error.message : String(error) }; + return connectorTestFailure(error); } } diff --git a/packages/cli/src/connectors/mysql/connector.ts b/packages/cli/src/connectors/mysql/connector.ts index 29dacc26..c147c7dd 100644 --- a/packages/cli/src/connectors/mysql/connector.ts +++ b/packages/cli/src/connectors/mysql/connector.ts @@ -11,7 +11,9 @@ import { } from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; import { + connectorTestFailure, createKtxConnectorCapabilities, + type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, @@ -413,12 +415,12 @@ export class KtxMysqlScanConnector implements KtxScanConnector { this.id = `mysql:${options.connectionId}`; } - async testConnection(): Promise<{ success: boolean; error?: string }> { + async testConnection(): Promise { try { await this.query('SELECT 1'); return { success: true }; } catch (error) { - return { success: false, error: error instanceof Error ? error.message : String(error) }; + return connectorTestFailure(error); } } diff --git a/packages/cli/src/connectors/postgres/connector.ts b/packages/cli/src/connectors/postgres/connector.ts index f206fa6a..1a956a3d 100644 --- a/packages/cli/src/connectors/postgres/connector.ts +++ b/packages/cli/src/connectors/postgres/connector.ts @@ -6,7 +6,9 @@ import { assertReadOnlySql, limitSqlForExecution } from '../../context/connectio import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; import { + connectorTestFailure, createKtxConnectorCapabilities, + type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, @@ -442,12 +444,12 @@ export class KtxPostgresScanConnector implements KtxScanConnector { this.id = `postgres:${options.connectionId}`; } - async testConnection(): Promise<{ success: boolean; error?: string }> { + async testConnection(): Promise { try { await this.query('SELECT 1'); return { success: true }; } catch (error) { - return { success: false, error: error instanceof Error ? error.message : String(error) }; + return connectorTestFailure(error); } } diff --git a/packages/cli/src/connectors/snowflake/connector.ts b/packages/cli/src/connectors/snowflake/connector.ts index 86d7ebe7..51a91e52 100644 --- a/packages/cli/src/connectors/snowflake/connector.ts +++ b/packages/cli/src/connectors/snowflake/connector.ts @@ -7,7 +7,9 @@ import { assertReadOnlySql, limitSqlForExecution } from '../../context/connectio import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; import { + connectorTestFailure, createKtxConnectorCapabilities, + type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, @@ -464,7 +466,7 @@ class SnowflakeSdkDriver implements KtxSnowflakeDriver { await this.query('SELECT 1'); return { success: true }; } catch (error) { - return { success: false, error: error instanceof Error ? error.message : String(error) }; + return connectorTestFailure(error); } } @@ -573,7 +575,7 @@ export class KtxSnowflakeScanConnector implements KtxScanConnector { } } - async testConnection(): Promise<{ success: boolean; error?: string }> { + async testConnection(): Promise { return this.getDriver().test(); } diff --git a/packages/cli/src/connectors/sqlite/connector.ts b/packages/cli/src/connectors/sqlite/connector.ts index e996bc25..f5ba2a55 100644 --- a/packages/cli/src/connectors/sqlite/connector.ts +++ b/packages/cli/src/connectors/sqlite/connector.ts @@ -6,7 +6,7 @@ import { fileURLToPath } from 'node:url'; import { getDialectForDriver } from '../../context/connections/dialects.js'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; import { normalizeQueryRows } from '../../context/connections/query-executor.js'; -import { createKtxConnectorCapabilities, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js'; +import { connectorTestFailure, createKtxConnectorCapabilities, type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaForeignKey, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableListEntry, type KtxTableRef, type KtxTableSampleInput, type KtxTableSampleResult } from '../../context/scan/types.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; export interface KtxSqliteConnectionConfig { @@ -167,7 +167,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector { this.id = `sqlite:${options.connectionId}`; } - async testConnection(): Promise<{ success: boolean; error?: string }> { + async testConnection(): Promise { try { if (!existsSync(this.dbPath) || !statSync(this.dbPath).isFile()) { return { success: false, error: `File not found: ${this.dbPath}` }; @@ -175,7 +175,7 @@ export class KtxSqliteScanConnector implements KtxScanConnector { this.database().prepare('SELECT 1').get(); return { success: true }; } catch (error) { - return { success: false, error: error instanceof Error ? error.message : String(error) }; + return connectorTestFailure(error); } } diff --git a/packages/cli/src/connectors/sqlserver/connector.ts b/packages/cli/src/connectors/sqlserver/connector.ts index 0115781d..5dd9969b 100644 --- a/packages/cli/src/connectors/sqlserver/connector.ts +++ b/packages/cli/src/connectors/sqlserver/connector.ts @@ -3,7 +3,9 @@ import { getDialectForDriver } from '../../context/connections/dialects.js'; import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; import { + connectorTestFailure, createKtxConnectorCapabilities, + type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, @@ -384,12 +386,12 @@ export class KtxSqlServerScanConnector implements KtxScanConnector { this.id = `sqlserver:${options.connectionId}`; } - async testConnection(): Promise<{ success: boolean; error?: string }> { + async testConnection(): Promise { try { await this.query('SELECT 1'); return { success: true }; } catch (error) { - return { success: false, error: error instanceof Error ? error.message : String(error) }; + return connectorTestFailure(error); } } diff --git a/packages/cli/src/context/core/abort.ts b/packages/cli/src/context/core/abort.ts new file mode 100644 index 00000000..95467c52 --- /dev/null +++ b/packages/cli/src/context/core/abort.ts @@ -0,0 +1,39 @@ +/** @internal */ +export function createAbortError(message = 'Aborted'): DOMException { + return new DOMException(message, 'AbortError'); +} + +export function isAbortError(error: unknown): boolean { + if (error instanceof DOMException && error.name === 'AbortError') { + return true; + } + if (!error || typeof error !== 'object') { + return false; + } + const record = error as { name?: unknown; code?: unknown }; + return record.name === 'AbortError' || record.code === 'ABORT_ERR'; +} + +/** @internal */ +export function throwIfAborted(signal?: AbortSignal): void { + if (signal?.aborted) { + throw createAbortError(); + } +} + +export function linkAbortSignal(parent?: AbortSignal): { controller: AbortController; dispose: () => void } { + const controller = new AbortController(); + if (!parent) { + return { controller, dispose: () => undefined }; + } + if (parent.aborted) { + controller.abort(createAbortError()); + return { controller, dispose: () => undefined }; + } + const onAbort = () => controller.abort(createAbortError()); + parent.addEventListener('abort', onAbort, { once: true }); + return { + controller, + dispose: () => parent.removeEventListener('abort', onAbort), + }; +} diff --git a/packages/cli/src/context/ingest/adapters/historic-sql/query-history-filter-picker.ts b/packages/cli/src/context/ingest/adapters/historic-sql/query-history-filter-picker.ts index bb296513..3f77900d 100644 --- a/packages/cli/src/context/ingest/adapters/historic-sql/query-history-filter-picker.ts +++ b/packages/cli/src/context/ingest/adapters/historic-sql/query-history-filter-picker.ts @@ -23,6 +23,7 @@ export interface QueryHistoryFilterProposal { consideredRoleCount: number; skipped: { reason: 'no-llm' | 'no-daemon' | 'no-in-scope-history' | 'user-block-present' } | null; warnings: string[]; + parseFailedTemplateIds: string[]; } export interface ProposeQueryHistoryServiceAccountFiltersInput { @@ -74,7 +75,7 @@ const queryHistoryFilterAdjudicationSchema = z.object({ type QueryHistoryFilterAdjudication = z.infer; function emptyProposal(skipped: QueryHistoryFilterProposal['skipped'], warnings: string[] = []): QueryHistoryFilterProposal { - return { excludedRoles: [], consideredRoleCount: 0, skipped, warnings }; + return { excludedRoles: [], consideredRoleCount: 0, skipped, warnings, parseFailedTemplateIds: [] }; } function displayTableRef(ref: KtxTableRef): string { @@ -180,6 +181,7 @@ export async function proposeQueryHistoryServiceAccountFilters( const windowDays = 'windowDays' in config ? config.windowDays : 90; const windowStart = new Date(now.getTime() - windowDays * 24 * 60 * 60 * 1000); const warnings: string[] = []; + const parseFailedTemplateIds: string[] = []; const snapshot: AggregatedTemplate[] = []; try { @@ -212,7 +214,7 @@ export async function proposeQueryHistoryServiceAccountFilters( for (const template of snapshot) { const parsed = analysis.get(template.templateId); if (!parsed || parsed.error) { - warnings.push(`query_history_filter_picker_parse_failed:${template.templateId}`); + parseFailedTemplateIds.push(template.templateId); continue; } const tablesTouched = [...new Map(parsed.tablesTouched.map((ref) => [tableRefKey(ref), ref])).values()] @@ -236,6 +238,7 @@ export async function proposeQueryHistoryServiceAccountFilters( consideredRoleCount: records.length, skipped: { reason: 'no-in-scope-history' }, warnings, + parseFailedTemplateIds, }; } @@ -256,6 +259,7 @@ export async function proposeQueryHistoryServiceAccountFilters( ...warnings, `query_history_filter_picker_llm_failed:${error instanceof Error ? error.message : String(error)}`, ], + parseFailedTemplateIds, }; } @@ -274,5 +278,6 @@ export async function proposeQueryHistoryServiceAccountFilters( consideredRoleCount: records.length, skipped: input.userServiceAccountsPresent ? { reason: 'user-block-present' } : null, warnings, + parseFailedTemplateIds, }; } diff --git a/packages/cli/src/context/ingest/context-candidates/curator-pagination.service.ts b/packages/cli/src/context/ingest/context-candidates/curator-pagination.service.ts index 348544ca..7848fab7 100644 --- a/packages/cli/src/context/ingest/context-candidates/curator-pagination.service.ts +++ b/packages/cli/src/context/ingest/context-candidates/curator-pagination.service.ts @@ -40,6 +40,7 @@ export interface CuratorPaginationInput { buildToolSet: (passNumber: number) => KtxRuntimeToolSet; getReconciliationActions: () => MemoryAction[]; onStepFinish?: (info: { passNumber: number; stepIndex: number; stepBudget: number }) => void; + abortSignal?: AbortSignal; } interface CuratorPaginationResult extends ReconciliationOutcome { @@ -243,6 +244,7 @@ export class CuratorPaginationService implements CuratorPaginationPort { sourceKey: params.input.sourceKey, jobId: params.input.jobId, forceRun: params.forceRun, + abortSignal: params.input.abortSignal, onStepFinish: params.input.onStepFinish ? ({ stepIndex, stepBudget }) => params.input.onStepFinish?.({ passNumber: params.passNumber, stepIndex, stepBudget }) diff --git a/packages/cli/src/context/ingest/final-gate-repair.ts b/packages/cli/src/context/ingest/final-gate-repair.ts index 1c373aa6..f32178d8 100644 --- a/packages/cli/src/context/ingest/final-gate-repair.ts +++ b/packages/cli/src/context/ingest/final-gate-repair.ts @@ -21,6 +21,7 @@ export interface RepairFinalGateFailureInput { repairKind: FinalGateRepairKind; maxAttempts?: number; stepBudget?: number; + abortSignal?: AbortSignal; } const readRepairFileSchema = z.object({ @@ -200,6 +201,7 @@ export async function repairFinalGateFailure( jobId: input.trace.context.jobId, repairKind: input.repairKind, }, + abortSignal: input.abortSignal, }), ); diff --git a/packages/cli/src/context/ingest/ingest-bundle.runner.ts b/packages/cli/src/context/ingest/ingest-bundle.runner.ts index 3f2b41d3..a242d58a 100644 --- a/packages/cli/src/context/ingest/ingest-bundle.runner.ts +++ b/packages/cli/src/context/ingest/ingest-bundle.runner.ts @@ -3,6 +3,7 @@ import { dirname, join } from 'node:path'; import pLimit from 'p-limit'; import { z } from 'zod'; import { type KtxLogger, noopLogger } from '../../context/core/config.js'; +import type { RateLimitWaitState } from '../../context/llm/rate-limit-governor.js'; import { createRuntimeToolDescriptorFromAiTool } from '../../context/llm/runtime-tools.js'; import type { KtxRuntimeToolSet } from '../../context/llm/runtime-port.js'; import type { CaptureSession, MemoryAction } from '../../context/memory/types.js'; @@ -219,6 +220,10 @@ export class IngestBundleRunner { } async run(job: IngestBundleJob, ctx?: IngestJobContext): Promise { + const unsubscribeRateLimitGovernor = this.subscribeRateLimitGovernor({ + trace: this.createTrace(job), + memoryFlow: ctx?.memoryFlow, + }); const key = job.connectionId; const previous = this.chainByConnection.get(key); if (previous) { @@ -241,10 +246,72 @@ export class IngestBundleRunner { ctx?.memoryFlow?.finish('error', [sanitizeMemoryFlowError(error)]); throw error; } finally { + unsubscribeRateLimitGovernor(); await this.maybeEmitIngestProfile(job.jobId); } } + private formatRateLimitWait( + state: Extract, + ): string { + const seconds = Math.ceil(state.remainingMs / 1_000); + const minutes = Math.floor(seconds / 60); + const remainder = seconds % 60; + const duration = minutes > 0 ? `${minutes}m${String(remainder).padStart(2, '0')}s` : `${seconds}s`; + const type = state.rateLimitType ? ` ${state.rateLimitType}` : ''; + return `Rate-limited (${state.provider}${type}); resuming in ${duration}; Ctrl+C to stop`; + } + + private subscribeRateLimitGovernor(input: { + trace: IngestTraceWriter; + memoryFlow?: MemoryFlowEventSink; + }): () => void { + const governor = this.deps.settings.rateLimitGovernor; + if (!governor) { + return () => undefined; + } + return governor.subscribe((state: RateLimitWaitState) => { + if (state.kind === 'rate_limit_observed') { + void input.trace.event('info', 'rate_limit', 'rate_limit_observed', { ...state }); + return; + } + if (state.kind === 'concurrency_adjusted') { + void input.trace.event('info', 'rate_limit', 'concurrency_adjusted', { ...state }); + return; + } + void input.trace.event('info', 'rate_limit', state.kind, { ...state }); + if (state.kind === 'wait_tick' || state.kind === 'wait_started') { + input.memoryFlow?.emit({ + type: 'rate_limit_wait', + provider: state.provider, + ...(state.rateLimitType ? { rateLimitType: state.rateLimitType } : {}), + resumeAtMs: state.resumeAtMs, + remainingMs: state.remainingMs, + }); + input.memoryFlow?.emit({ + type: 'stage_progress', + stage: 'integration', + percent: 50, + message: this.formatRateLimitWait(state), + transient: true, + }); + } + }); + } + + private async withRateLimitWorkSlot(abortSignal: AbortSignal | undefined, fn: () => Promise): Promise { + const governor = this.deps.settings.rateLimitGovernor; + if (!governor) { + return fn(); + } + const release = await governor.acquireWorkSlot(abortSignal); + try { + return await fn(); + } finally { + release(); + } + } + /** * When profiling is enabled — via the `KTX_PROFILE_INGEST` env var or the * `ingest.profile` config setting — read the job's trace + tool transcripts @@ -877,6 +944,7 @@ export class IngestBundleRunner { includeContextEvidenceTools: boolean; currentTableExists(tableRef: string): Promise; memoryFlow?: MemoryFlowEventSink; + abortSignal?: AbortSignal; wuSkillNames: string[]; onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void; }): Promise { @@ -1029,6 +1097,7 @@ export class IngestBundleRunner { jobId: input.job.jobId, toolFailureCount: (unitKey) => input.transcriptSummaries.get(unitKey)?.fatalErrorCount ?? 0, onStepFinish: input.onStepFinish, + abortSignal: input.abortSignal, }, input.wu, ); @@ -1524,7 +1593,8 @@ export class IngestBundleRunner { try { await Promise.all( workUnits.map((wu, index) => - limitWorkUnit(async () => { + limitWorkUnit(() => + this.withRateLimitWorkSlot(ctx?.abortSignal, async () => { const outcome = await runIsolatedWorkUnit({ unitIndex: index, ingestionBaseSha, @@ -1532,6 +1602,7 @@ export class IngestBundleRunner { patchDir, trace: runTrace, workUnit: wu, + abortSignal: ctx?.abortSignal, afterSuccess: (child) => copyTransientIngestEvidence(child.workdir, sessionWorktree.workdir), run: async (child) => { const scopedWikiService = this.deps.wikiService.forWorktree(child.workdir); @@ -1565,6 +1636,7 @@ export class IngestBundleRunner { includeContextEvidenceTools: adapter.evidenceIndexing === 'documents' && !!contextReport, currentTableExists: (tableRef) => this.tableRefExistsInSemanticLayer(scopedSemanticLayerService, slConnectionIds, tableRef), + abortSignal: ctx?.abortSignal, memoryFlow, wuSkillNames, onStepFinish: ({ stepIndex, stepBudget }) => { @@ -1594,7 +1666,8 @@ export class IngestBundleRunner { completedWorkUnits / workUnits.length, `${completedWorkUnits} of ${workUnits.length} work units complete`, ); - }), + }), + ), ), ); } catch (error) { @@ -1693,6 +1766,7 @@ export class IngestBundleRunner { reason: context.reason, maxAttempts: 1, stepBudget: 12, + abortSignal: ctx?.abortSignal, }); emitStageProgress( 'integration', @@ -1714,6 +1788,7 @@ export class IngestBundleRunner { repairKind: 'patch_semantic_gate', maxAttempts: 1, stepBudget: 16, + abortSignal: ctx?.abortSignal, }); emitStageProgress( 'integration', @@ -1993,6 +2068,7 @@ export class IngestBundleRunner { ); } : undefined, + abortSignal: ctx?.abortSignal, }); curatorReport = curatorOutcome.report; curatorWarnings = curatorOutcome.warnings; @@ -2038,6 +2114,7 @@ export class IngestBundleRunner { sourceKey: job.sourceKey, jobId: job.jobId, force: !!overrideReport, + abortSignal: ctx?.abortSignal, onStepFinish: stage4 ? ({ stepIndex, stepBudget }) => { emitStageProgress('reconciliation', 85, `Reconciling results: step ${stepIndex}/${stepBudget}`, { @@ -2470,6 +2547,7 @@ export class IngestBundleRunner { repairKind: 'final_artifact_gate', maxAttempts: 1, stepBudget: 16, + abortSignal: ctx?.abortSignal, }); isolatedDiffSummary.gateRepairAttempts += gateRepair.attempts; diff --git a/packages/cli/src/context/ingest/isolated-diff/textual-conflict-resolver.ts b/packages/cli/src/context/ingest/isolated-diff/textual-conflict-resolver.ts index 5ae551d1..c4a00448 100644 --- a/packages/cli/src/context/ingest/isolated-diff/textual-conflict-resolver.ts +++ b/packages/cli/src/context/ingest/isolated-diff/textual-conflict-resolver.ts @@ -19,6 +19,7 @@ export interface ResolveTextualConflictInput { reason: string; maxAttempts?: number; stepBudget?: number; + abortSignal?: AbortSignal; } const readIntegrationFileSchema = z.object({ @@ -208,6 +209,7 @@ export async function resolveTextualConflict( jobId: input.trace.context.jobId, unitKey: input.unitKey, }, + abortSignal: input.abortSignal, }), ); diff --git a/packages/cli/src/context/ingest/isolated-diff/work-unit-executor.ts b/packages/cli/src/context/ingest/isolated-diff/work-unit-executor.ts index 7475612e..5ab52102 100644 --- a/packages/cli/src/context/ingest/isolated-diff/work-unit-executor.ts +++ b/packages/cli/src/context/ingest/isolated-diff/work-unit-executor.ts @@ -14,6 +14,7 @@ export interface RunIsolatedWorkUnitInput { patchDir: string; trace: IngestTraceWriter; workUnit: WorkUnit; + abortSignal?: AbortSignal; run(child: IngestSessionWorktree): Promise; afterSuccess?(child: IngestSessionWorktree): Promise; } diff --git a/packages/cli/src/context/ingest/local-bundle-runtime.ts b/packages/cli/src/context/ingest/local-bundle-runtime.ts index 9d6aba95..e4c45b3f 100644 --- a/packages/cli/src/context/ingest/local-bundle-runtime.ts +++ b/packages/cli/src/context/ingest/local-bundle-runtime.ts @@ -12,6 +12,7 @@ import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic- import { createRuntimeToolDescriptorFromAiTool } from '../../context/llm/runtime-tools.js'; import { createLocalKtxLlmRuntimeFromConfig } from '../../context/llm/local-config.js'; import { KtxIngestEmbeddingPortAdapter } from '../../context/llm/embedding-port.js'; +import { createRateLimitGovernorConfig, RateLimitGovernor } from '../../context/llm/rate-limit-governor.js'; import { RuntimeAgentRunner, type AgentRunnerPort, type KtxLlmRuntimePort, type KtxRuntimeToolSet } from '../../context/llm/runtime-port.js'; import type { KtxEmbeddingProvider } from '../../llm/types.js'; import type { KtxLocalProject } from '../../context/project/project.js'; @@ -619,7 +620,7 @@ function localIngestLlmProviderGuardMessage(projectDir: string): string { ].join('\n'); } -function resolveAgentRunner(options: CreateLocalBundleIngestRuntimeOptions): { +function resolveAgentRunner(options: CreateLocalBundleIngestRuntimeOptions, rateLimitGovernor: RateLimitGovernor): { agentRunner: AgentRunnerPort; llmRuntime?: KtxLlmRuntimePort; } { @@ -628,6 +629,7 @@ function resolveAgentRunner(options: CreateLocalBundleIngestRuntimeOptions): { (options.createLlmRuntime ?? createLocalKtxLlmRuntimeFromConfig)(options.project.config.llm, { projectDir: options.project.projectDir, env: process.env, + rateLimitGovernor, }) ?? undefined; @@ -677,7 +679,13 @@ export function createLocalBundleIngestRuntime( const knowledgeIndex = new LocalKnowledgeIndex(options.project, embedding); const knowledgeEvents = new NoopKnowledgeEventPort(); const wikiService = new KnowledgeWikiService(rootFileStore, embedding, knowledgeIndex, options.project.git, logger); - const { agentRunner, llmRuntime } = resolveAgentRunner(options); + const rateLimitGovernor = new RateLimitGovernor( + createRateLimitGovernorConfig({ + ...options.project.config.ingest.rateLimit, + maxConcurrency: options.project.config.ingest.workUnits.maxConcurrency, + }), + ); + const { agentRunner, llmRuntime } = resolveAgentRunner(options, rateLimitGovernor); const promptService = new PromptService({ promptsDir, partials: [], logger }); const storage = new LocalIngestStorage(options.project); const registry = registerAdapters(options.adapters); @@ -717,6 +725,7 @@ export function createLocalBundleIngestRuntime( workUnitMaxConcurrency: options.project.config.ingest.workUnits.maxConcurrency, workUnitStepBudget: options.project.config.ingest.workUnits.stepBudget, workUnitFailureMode: options.project.config.ingest.workUnits.failureMode, + rateLimitGovernor, profileIngest: options.project.config.ingest.profile, ingestTraceLevel: ingestTraceLevelFromEnv(), }, diff --git a/packages/cli/src/context/ingest/local-ingest.ts b/packages/cli/src/context/ingest/local-ingest.ts index ec8a72f4..1a219629 100644 --- a/packages/cli/src/context/ingest/local-ingest.ts +++ b/packages/cli/src/context/ingest/local-ingest.ts @@ -3,6 +3,7 @@ import { cp, mkdir, rm } from 'node:fs/promises'; import { isAbsolute, resolve } from 'node:path'; import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js'; import type { KtxLogger } from '../../context/core/config.js'; +import { createAbortError, isAbortError } from '../../context/core/abort.js'; import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js'; import type { AgentRunnerPort, KtxLlmRuntimePort } from '../../context/llm/runtime-port.js'; import type { KtxLocalProject } from '../../context/project/project.js'; @@ -36,6 +37,7 @@ export interface RunLocalIngestOptions { queryExecutor?: KtxSqlQueryExecutorPort; logger?: KtxLogger; embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null; + abortSignal?: AbortSignal; } export interface LocalIngestResult { @@ -123,10 +125,11 @@ function findAdapter(adapters: SourceAdapter[], source: string): SourceAdapter { return adapter; } -function localJobContext(jobId: string, memoryFlow?: MemoryFlowEventSink): IngestJobContext { +function localJobContext(jobId: string, memoryFlow?: MemoryFlowEventSink, abortSignal?: AbortSignal): IngestJobContext { return { jobId, ...(memoryFlow ? { memoryFlow } : {}), + ...(abortSignal ? { abortSignal } : {}), startPhase() { return new LocalIngestPhase(); }, @@ -158,6 +161,7 @@ async function runScheduledPullJob(options: { queryExecutor?: KtxSqlQueryExecutorPort; logger?: KtxLogger; embeddingProvider?: import('../../llm/types.js').KtxEmbeddingProvider | null; + abortSignal?: AbortSignal; }): Promise { const runtime = createLocalBundleIngestRuntime(options); const jobId = options.jobId ?? runtime.nextJobId(); @@ -169,7 +173,7 @@ async function runScheduledPullJob(options: { trigger: options.trigger ?? 'manual_resync', bundleRef: { kind: 'scheduled_pull', config: options.pullConfig }, }, - localJobContext(jobId, options.memoryFlow), + localJobContext(jobId, options.memoryFlow, options.abortSignal), ); const report = await runtime.store.findByJobId(jobId); if (!report) { @@ -212,6 +216,7 @@ export async function runLocalIngest(options: RunLocalIngestOptions): Promise KtxRuntimeToolSet; getReconciliationActions: () => MemoryAction[]; onStepFinish?: (info: { passNumber: number; stepIndex: number; stepBudget: number }) => void; + abortSignal?: AbortSignal; }): Promise; } diff --git a/packages/cli/src/context/ingest/stages/stage-3-work-units.ts b/packages/cli/src/context/ingest/stages/stage-3-work-units.ts index ec514a02..a7387c8a 100644 --- a/packages/cli/src/context/ingest/stages/stage-3-work-units.ts +++ b/packages/cli/src/context/ingest/stages/stage-3-work-units.ts @@ -1,4 +1,5 @@ import type { KtxModelRole } from '../../../llm/types.js'; +import { isAbortError } from '../../core/abort.js'; import type { AgentRunnerPort, KtxRuntimeToolSet, RunLoopMetrics } from '../../../context/llm/runtime-port.js'; import type { CaptureSession, MemoryAction } from '../../../context/memory/types.js'; import { listTouchedSlSources, type TouchedSlSource } from '../../../context/tools/touched-sl-sources.js'; @@ -28,6 +29,7 @@ export interface WorkUnitExecutionDeps { connectionId: string; jobId: string; onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void; + abortSignal?: AbortSignal; toolFailureCount?: (unitKey: string) => number; } @@ -106,8 +108,12 @@ export async function executeWorkUnit(deps: WorkUnitExecutionDeps, wu: WorkUnit) jobId: deps.jobId, }, onStepFinish: deps.onStepFinish, + abortSignal: deps.abortSignal, }); } catch (error) { + if (isAbortError(error)) { + throw error; + } return failWithResetFromCurrentHead(error instanceof Error ? error.message : String(error)); } diff --git a/packages/cli/src/context/ingest/stages/stage-4-reconciliation.ts b/packages/cli/src/context/ingest/stages/stage-4-reconciliation.ts index 5abc9bfb..c78e1b48 100644 --- a/packages/cli/src/context/ingest/stages/stage-4-reconciliation.ts +++ b/packages/cli/src/context/ingest/stages/stage-4-reconciliation.ts @@ -16,6 +16,7 @@ export interface ReconciliationContext { jobId: string; force?: boolean; onStepFinish?: (info: { stepIndex: number; stepBudget: number }) => void; + abortSignal?: AbortSignal; forceRun?: boolean; } @@ -40,6 +41,7 @@ export async function runReconciliationStage4(ctx: ReconciliationContext): Promi stepBudget: ctx.stepBudget, telemetryTags: { operationName: 'ingest-bundle-reconcile', source: ctx.sourceKey, jobId: ctx.jobId }, onStepFinish: ctx.onStepFinish, + abortSignal: ctx.abortSignal, }); return { skipped: false, stopReason: run.stopReason, error: run.error, ...(run.metrics ? { metrics: run.metrics } : {}) }; } diff --git a/packages/cli/src/context/ingest/types.ts b/packages/cli/src/context/ingest/types.ts index 337885af..925f3d82 100644 --- a/packages/cli/src/context/ingest/types.ts +++ b/packages/cli/src/context/ingest/types.ts @@ -220,5 +220,6 @@ export interface IngestJobPhase { export interface IngestJobContext { jobId: string; memoryFlow?: MemoryFlowEventSink; + abortSignal?: AbortSignal; startPhase(weight: number): IngestJobPhase; } diff --git a/packages/cli/src/context/llm/ai-sdk-runtime.ts b/packages/cli/src/context/llm/ai-sdk-runtime.ts index f5752355..d5a60c7b 100644 --- a/packages/cli/src/context/llm/ai-sdk-runtime.ts +++ b/packages/cli/src/context/llm/ai-sdk-runtime.ts @@ -3,7 +3,9 @@ import type { KtxLlmProvider } from '../../llm/types.js'; import { generateText, Output, stepCountIs, type FlexibleSchema, type TelemetrySettings, type ToolSet } from 'ai'; import type { z } from 'zod'; import { noopLogger, type KtxLogger } from '../../context/core/config.js'; +import { isAbortError } from '../core/abort.js'; import { summarizeKtxLlmDebugRequest, type KtxLlmDebugRequestRecorder } from './debug-request-recorder.js'; +import type { RateLimitGovernor, RateLimitProvider, RateLimitSignal } from './rate-limit-governor.js'; import { createAiSdkToolSet } from './runtime-tools.js'; import type { KtxGenerateObjectInput, @@ -40,12 +42,129 @@ export interface AiSdkKtxLlmRuntimeDeps { telemetry?: AgentTelemetryPort; logger?: KtxLogger; debugRequestRecorder?: KtxLlmDebugRequestRecorder; + rateLimitGovernor?: Pick; } function hasTools(tools: Record): boolean { return Object.keys(tools).length > 0; } +function modelProviderName(model: unknown): RateLimitProvider { + const provider = (model as { provider?: string }).provider ?? ''; + return provider.includes('vertex') || provider.includes('google') ? 'vertex' : 'anthropic-api'; +} + +interface HeaderLimitPair { + limit: string; + remaining: string; + rateLimitType: string; +} + +const RATE_LIMIT_HEADER_PAIRS: HeaderLimitPair[] = [ + { + limit: 'anthropic-ratelimit-requests-limit', + remaining: 'anthropic-ratelimit-requests-remaining', + rateLimitType: 'rpm', + }, + { + limit: 'anthropic-ratelimit-tokens-limit', + remaining: 'anthropic-ratelimit-tokens-remaining', + rateLimitType: 'tpm', + }, + { + limit: 'anthropic-ratelimit-input-tokens-limit', + remaining: 'anthropic-ratelimit-input-tokens-remaining', + rateLimitType: 'itpm', + }, + { + limit: 'anthropic-ratelimit-output-tokens-limit', + remaining: 'anthropic-ratelimit-output-tokens-remaining', + rateLimitType: 'otpm', + }, + { + limit: 'x-ratelimit-limit-requests', + remaining: 'x-ratelimit-remaining-requests', + rateLimitType: 'rpm', + }, + { + limit: 'x-ratelimit-limit-tokens', + remaining: 'x-ratelimit-remaining-tokens', + rateLimitType: 'tpm', + }, +]; + +function normalizeHeaders(headers: unknown): Record { + if (!headers || typeof headers !== 'object') { + return {}; + } + const get = (headers as { get?: unknown }).get; + if (typeof get === 'function') { + const out: Record = {}; + for (const pair of RATE_LIMIT_HEADER_PAIRS) { + const limit = get.call(headers, pair.limit); + const remaining = get.call(headers, pair.remaining); + if (typeof limit === 'string') out[pair.limit] = limit; + if (typeof remaining === 'string') out[pair.remaining] = remaining; + } + return out; + } + return Object.fromEntries( + Object.entries(headers as Record) + .filter((entry): entry is [string, string | number] => typeof entry[1] === 'string' || typeof entry[1] === 'number') + .map(([key, value]) => [key.toLowerCase(), String(value)]), + ); +} + +function numericHeader(headers: Record, key: string): number | undefined { + const value = Number(headers[key]); + return Number.isFinite(value) && value >= 0 ? value : undefined; +} + +function utilizationForPair(headers: Record, pair: HeaderLimitPair): number | undefined { + const limit = numericHeader(headers, pair.limit); + const remaining = numericHeader(headers, pair.remaining); + if (limit === undefined || remaining === undefined || limit <= 0) { + return undefined; + } + return 1 - Math.min(limit, remaining) / limit; +} + +function aiSdkHeaderRateLimitSignal(provider: RateLimitProvider, result: unknown): RateLimitSignal | undefined { + const headers = normalizeHeaders((result as { response?: { headers?: unknown } }).response?.headers); + let best: { utilization: number; rateLimitType: string } | undefined; + for (const pair of RATE_LIMIT_HEADER_PAIRS) { + const utilization = utilizationForPair(headers, pair); + if (utilization === undefined) { + continue; + } + if (!best || utilization > best.utilization) { + best = { utilization, rateLimitType: pair.rateLimitType }; + } + } + if (!best) { + return undefined; + } + return { + provider, + status: 'allowed', + rateLimitType: best.rateLimitType, + utilization: Number(best.utilization.toFixed(4)), + }; +} + +function retryAfterMs(error: unknown): number | undefined { + const value = (error as { retryAfter?: unknown }).retryAfter; + if (typeof value === 'number' && Number.isFinite(value) && value > 0) { + return value < 1_000 ? value * 1_000 : value; + } + return undefined; +} + +function isAiSdkRateLimitError(error: unknown): boolean { + const record = error as { name?: string; statusCode?: number; status?: number }; + return record.name === 'TooManyRequestsError' || record.statusCode === 429 || record.status === 429; +} + export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort { private readonly logger: KtxLogger; @@ -53,6 +172,41 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort { this.logger = deps.logger ?? noopLogger; } + private async generateTextWithRateLimitRetry( + provider: RateLimitProvider, + abortSignal: AbortSignal | undefined, + run: () => Promise, + ): Promise { + // maxRetryAttempts() returns 1 when no governor is present or pacing is + // disabled, so a 429 throws immediately instead of hammering the provider + // with no backoff; the AI SDK's own maxRetries still handles transient 429s. + const maxAttempts = this.deps.rateLimitGovernor?.maxRetryAttempts() ?? 1; + let attempt = 0; + while (true) { + await this.deps.rateLimitGovernor?.waitForReady(abortSignal); + try { + const result = await run(); + const signal = aiSdkHeaderRateLimitSignal(provider, result); + if (signal) { + this.deps.rateLimitGovernor?.report(signal); + } + return result; + } catch (error) { + if (isAbortError(error) || !isAiSdkRateLimitError(error) || attempt >= maxAttempts - 1) { + throw error; + } + attempt += 1; + const retryAfter = retryAfterMs(error); + this.deps.rateLimitGovernor?.report({ + provider, + status: 'rejected', + rateLimitType: 'http_429', + ...(retryAfter !== undefined ? { retryAfterMs: retryAfter } : {}), + }); + } + } + } + async generateText(input: KtxGenerateTextInput): Promise { const model = this.deps.llmProvider.getModel(input.role); if ((model as { provider?: string }).provider === 'deterministic') { @@ -67,12 +221,13 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort { }); const split = splitKtxSystemMessages(built.messages); const startedAt = Date.now(); - const result = await generateText({ + const request = { model, temperature: input.temperature ?? 0, ...(split.system ? { system: split.system } : {}), messages: split.messages, tools: built.tools as ToolSet, + ...(input.abortSignal ? { abortSignal: input.abortSignal } : {}), ...(hasTools(tools) ? { experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({ @@ -80,7 +235,8 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort { }), } : {}), - }); + }; + const result = await this.generateTextWithRateLimitRetry(modelProviderName(model), input.abortSignal, () => generateText(request)); input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: toLlmTokenUsage(result.totalUsage ?? result.usage) }); if (typeof result.text !== 'string') { throw new Error('KTX LLM text generation returned no text'); @@ -101,12 +257,13 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort { }); const split = splitKtxSystemMessages(built.messages); const startedAt = Date.now(); - const result = await generateText({ + const request = { model, temperature: input.temperature ?? 0, ...(split.system ? { system: split.system } : {}), messages: split.messages, tools: built.tools as ToolSet, + ...(input.abortSignal ? { abortSignal: input.abortSignal } : {}), ...(hasTools(tools) ? { experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({ @@ -115,7 +272,8 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort { } : {}), output: Output.object({ schema: input.schema as unknown as FlexibleSchema }), - }); + }; + const result = await this.generateTextWithRateLimitRetry(modelProviderName(model), input.abortSignal, () => generateText(request)); input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: toLlmTokenUsage(result.totalUsage ?? result.usage) }); if (result.output == null) { throw new Error('KTX LLM object generation returned no output'); @@ -152,7 +310,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort { }), ); - const result = await generateText({ + const request = { model, temperature: 0, stopWhen: stepCountIs(params.stepBudget), @@ -163,6 +321,7 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort { ...(promptMessages.system ? { system: promptMessages.system } : {}), messages: promptMessages.messages, tools: built.tools as ToolSet, + ...(params.abortSignal ? { abortSignal: params.abortSignal } : {}), onStepFinish: async () => { stepIndex += 1; stepBoundariesMs.push(Date.now() - startedAt); @@ -179,7 +338,8 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort { ); } }, - }); + }; + const result = await this.generateTextWithRateLimitRetry(modelProviderName(model), params.abortSignal, () => generateText(request)); return { stopReason: 'natural', metrics: { @@ -190,6 +350,9 @@ export class AiSdkKtxLlmRuntime implements KtxLlmRuntimePort { }, }; } catch (error) { + if (isAbortError(error)) { + throw error; + } const err = error instanceof Error ? error : new Error(String(error)); this.logger.warn(`[agent-runner] loop failed: ${err.message}`); return { diff --git a/packages/cli/src/context/llm/claude-code-runtime.ts b/packages/cli/src/context/llm/claude-code-runtime.ts index 0c1e6881..26bd0529 100644 --- a/packages/cli/src/context/llm/claude-code-runtime.ts +++ b/packages/cli/src/context/llm/claude-code-runtime.ts @@ -7,8 +7,10 @@ import { } from '@anthropic-ai/claude-agent-sdk'; import { z } from 'zod'; import { noopLogger, type KtxLogger } from '../../context/core/config.js'; +import { createAbortError, isAbortError, throwIfAborted } from '../core/abort.js'; import { createKtxClaudeCodeEnv } from './claude-code-env.js'; import { resolveClaudeCodeModel } from './claude-code-models.js'; +import type { RateLimitGovernor, RateLimitSignal } from './rate-limit-governor.js'; import { createClaudeSdkTools, mcpToolIds } from './runtime-tools.js'; import type { KtxGenerateObjectInput, @@ -21,7 +23,16 @@ import type { RunLoopStopReason, } from './runtime-port.js'; -type QueryFn = (params: Parameters[0]) => AsyncIterable; +type QueryResult = AsyncIterable & { + interrupt?: () => void | Promise; +}; + +type QueryFn = (params: Parameters[0]) => QueryResult; + +interface ClaudeQueryOutcome { + result: SDKResultMessage; + rejectedRateLimitSignal?: RateLimitSignal; +} function claudeTokenUsage(result: SDKResultMessage): LlmTokenUsage { const usage = (result as { usage?: { input_tokens?: number; output_tokens?: number } }).usage; @@ -43,6 +54,7 @@ export interface ClaudeCodeKtxLlmRuntimeDeps { query?: QueryFn; env?: NodeJS.ProcessEnv; logger?: KtxLogger; + rateLimitGovernor?: Pick; } const BUILTIN_TOOLS = [ @@ -157,6 +169,74 @@ function expectedMcpServerNames(tools: KtxRuntimeToolSet | undefined): Set 0 ? new Set([KTX_MCP_SERVER_NAME]) : new Set(); } +const CLAUDE_RATE_LIMIT_ERROR_MARKERS = /\b429\b|rate limit|too many requests|quota exceeded|overloaded|max_retries/i; + +function normalizeClaudeResetAtMs(value: unknown): number | undefined { + if (typeof value === 'number' && Number.isFinite(value) && value > 0) { + return Math.round(value < 10_000_000_000 ? value * 1_000 : value); + } + if (typeof value === 'string') { + const numeric = Number(value); + if (Number.isFinite(numeric) && numeric > 0) { + return normalizeClaudeResetAtMs(numeric); + } + const parsed = Date.parse(value); + return Number.isFinite(parsed) ? parsed : undefined; + } + return undefined; +} + +function isClaudeRateLimitResult(result: SDKResultMessage, rejectedSignal: RateLimitSignal | undefined): boolean { + const error = resultError(result); + if (!error) { + return false; + } + if (rejectedSignal?.status === 'rejected') { + return true; + } + const resultDetails = result as { + stop_reason?: unknown; + terminal_reason?: unknown; + errors?: unknown[]; + }; + const details = [ + error.message, + resultDetails.stop_reason, + resultDetails.terminal_reason, + ...(resultDetails.errors ?? []), + ] + .filter((value): value is string => typeof value === 'string' && value.length > 0) + .join('\n'); + return CLAUDE_RATE_LIMIT_ERROR_MARKERS.test(details); +} + +function claudeRateLimitSignal(message: SDKMessage): RateLimitSignal | null { + const record = message as unknown as Record; + if (record.type === 'rate_limit_event') { + const info = record.rate_limit_info as Record | undefined; + if (!info) return null; + const rawStatus = typeof info.status === 'string' ? info.status : 'allowed'; + const resetAtMs = normalizeClaudeResetAtMs(info.resetsAt); + return { + provider: 'claude-subscription', + status: rawStatus === 'rejected' ? 'rejected' : rawStatus === 'allowed_warning' ? 'warning' : 'allowed', + ...(resetAtMs !== undefined ? { resetAtMs } : {}), + ...(typeof info.rateLimitType === 'string' ? { rateLimitType: info.rateLimitType } : {}), + ...(typeof info.utilization === 'number' ? { utilization: info.utilization } : {}), + }; + } + if (record.subtype === 'api_retry' || record.type === 'api_retry') { + const retryDelayMs = typeof record.retry_delay_ms === 'number' ? record.retry_delay_ms : undefined; + return { + provider: 'claude-subscription', + status: 'warning', + ...(retryDelayMs !== undefined ? { retryAfterMs: retryDelayMs } : {}), + rateLimitType: 'api_retry', + }; + } + return null; +} + function managedMcpSettings(serverNames: string[]): NonNullable { return { allowManagedMcpServersOnly: true, @@ -217,21 +297,63 @@ async function collectResult(params: { allowedToolIds: Set; expectedMcpServerNames: Set; onAssistantTurn?: () => Promise; -}): Promise { + rateLimitGovernor?: Pick; + abortSignal?: AbortSignal; +}): Promise { let result: SDKResultMessage | undefined; - for await (const message of params.query({ prompt: params.prompt, options: params.options })) { - assertInitIsolation(message, params.allowedToolIds, params.expectedMcpServerNames); - if (countsAsAssistantTurn(message)) { - await params.onAssistantTurn?.(); - } - if (isResult(message)) { - result = message; + let rejectedRateLimitSignal: RateLimitSignal | undefined; + throwIfAborted(params.abortSignal); + await params.rateLimitGovernor?.waitForReady(params.abortSignal); + throwIfAborted(params.abortSignal); + const queryResult = params.query({ prompt: params.prompt, options: params.options }); + const onAbort = () => { + void Promise.resolve(queryResult.interrupt?.()).catch(() => undefined); + }; + params.abortSignal?.addEventListener('abort', onAbort, { once: true }); + try { + for await (const message of queryResult) { + throwIfAborted(params.abortSignal); + const rateLimitSignal = claudeRateLimitSignal(message); + if (rateLimitSignal) { + if (rateLimitSignal.status === 'rejected') { + rejectedRateLimitSignal = rateLimitSignal; + } + params.rateLimitGovernor?.report(rateLimitSignal); + } + assertInitIsolation(message, params.allowedToolIds, params.expectedMcpServerNames); + if (countsAsAssistantTurn(message)) { + await params.onAssistantTurn?.(); + } + if (isResult(message)) { + result = message; + } } + } finally { + params.abortSignal?.removeEventListener('abort', onAbort); + } + if (params.abortSignal?.aborted) { + throw createAbortError(); } if (!result) { throw new Error('Claude Code query returned no result message'); } - return result; + return { + result, + ...(rejectedRateLimitSignal ? { rejectedRateLimitSignal } : {}), + }; +} + +async function collectResultWithRateLimitRetry(params: Parameters[0]): Promise { + // maxRetryAttempts() returns 1 when no governor is present or pacing is + // disabled, so a rate-limited result surfaces without an extra query; the + // Claude Code SDK applies its own backoff for transient rejections. + const maxAttempts = params.rateLimitGovernor?.maxRetryAttempts() ?? 1; + for (let attempt = 0; ; attempt += 1) { + const outcome = await collectResult(params); + if (!isClaudeRateLimitResult(outcome.result, outcome.rejectedRateLimitSignal) || attempt >= maxAttempts - 1) { + return outcome.result; + } + } } export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort { @@ -252,12 +374,14 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort { tools: input.tools, }); const startedAt = Date.now(); - const result = await collectResult({ + const result = await collectResultWithRateLimitRetry({ query: this.runQuery, prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'), options, allowedToolIds: new Set(mcpToolIds(input.tools ?? {})), expectedMcpServerNames: expectedMcpServerNames(input.tools), + rateLimitGovernor: this.deps.rateLimitGovernor, + abortSignal: input.abortSignal, }); input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: claudeTokenUsage(result) }); const error = resultError(result); @@ -289,12 +413,14 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort { outputFormat: { type: 'json_schema' as const, schema: jsonSchema(input.schema as z.ZodType) }, }; const startedAt = Date.now(); - const result = await collectResult({ + const result = await collectResultWithRateLimitRetry({ query: this.runQuery, prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'), options, allowedToolIds: new Set([...mcpToolIds(input.tools ?? {}), STRUCTURED_OUTPUT_TOOL_NAME]), expectedMcpServerNames: expectedMcpServerNames(input.tools), + rateLimitGovernor: this.deps.rateLimitGovernor, + abortSignal: input.abortSignal, }); input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: claudeTokenUsage(result) }); const error = resultError(result); @@ -319,12 +445,14 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort { maxTurns: params.stepBudget, tools: params.toolSet, }); - const result = await collectResult({ + const result = await collectResultWithRateLimitRetry({ query: this.runQuery, prompt: params.userPrompt, options: { ...options, systemPrompt: params.systemPrompt }, allowedToolIds: new Set(mcpToolIds(params.toolSet)), expectedMcpServerNames: expectedMcpServerNames(params.toolSet), + rateLimitGovernor: this.deps.rateLimitGovernor, + abortSignal: params.abortSignal, onAssistantTurn: async () => { stepIndex += 1; stepBoundariesMs.push(Date.now() - startedAt); @@ -355,6 +483,9 @@ export class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort { }, }; } catch (error) { + if (isAbortError(error)) { + throw error; + } const err = error instanceof Error ? error : new Error(String(error)); return { stopReason: 'error', @@ -388,7 +519,7 @@ export async function runClaudeCodeAuthProbe(input: { env: input.env, maxTurns: 1, }); - const result = await collectResult({ + const result = await collectResultWithRateLimitRetry({ query: input.query ?? defaultQuery, prompt: 'Reply with exactly: ok', options, diff --git a/packages/cli/src/context/llm/codex-runtime.ts b/packages/cli/src/context/llm/codex-runtime.ts index 3535072b..2958b3f8 100644 --- a/packages/cli/src/context/llm/codex-runtime.ts +++ b/packages/cli/src/context/llm/codex-runtime.ts @@ -1,5 +1,6 @@ import { z } from 'zod'; import { noopLogger, type KtxLogger } from '../core/config.js'; +import { isAbortError, linkAbortSignal } from '../core/abort.js'; import { isCompletedAgentStep, summarizeCodexExecEvents, type CodexExecEventSummary } from './codex-exec-events.js'; import { startCodexRuntimeMcpServer, @@ -8,6 +9,7 @@ import { import { resolveCodexModel } from './codex-models.js'; import { buildCodexRuntimeConfig } from './codex-runtime-config.js'; import { CodexSdkCliRunner, type CodexSdkRunner } from './codex-sdk-runner.js'; +import type { RateLimitGovernor } from './rate-limit-governor.js'; import type { KtxGenerateObjectInput, KtxGenerateTextInput, @@ -24,6 +26,7 @@ export interface CodexKtxLlmRuntimeDeps { runner?: CodexSdkRunner; startMcpServer?: (input: { projectDir: string; toolSet: KtxRuntimeToolSet }) => Promise; logger?: KtxLogger; + rateLimitGovernor?: Pick; } function modelForRole(modelSlots: CodexKtxLlmRuntimeDeps['modelSlots'], role: string): string { @@ -159,6 +162,12 @@ function runtimeToolNames(toolSet: KtxRuntimeToolSet | undefined): string[] { return Object.values(toolSet ?? {}).map((descriptor) => descriptor.name); } +const CODEX_RATE_LIMIT_MARKERS = /\b429\b|rate limit|too many requests|quota exceeded|temporarily overloaded/i; + +function isCodexRateLimitError(error: Error | undefined): boolean { + return !!error && CODEX_RATE_LIMIT_MARKERS.test(error.message); +} + export class CodexKtxLlmRuntime implements KtxLlmRuntimePort { private readonly runner: CodexSdkRunner; private readonly logger: KtxLogger; @@ -168,6 +177,37 @@ export class CodexKtxLlmRuntime implements KtxLlmRuntimePort { this.logger = deps.logger ?? noopLogger; } + private async runWithRateLimitRetry( + abortSignal: AbortSignal | undefined, + run: () => Promise, + getError: (result: T) => Error | undefined, + ): Promise { + // maxRetryAttempts() returns 1 when no governor is present or pacing is + // disabled, so an opaque rate-limit failure surfaces on the first attempt + // instead of being retried with no backoff. + const maxAttempts = this.deps.rateLimitGovernor?.maxRetryAttempts() ?? 1; + for (let attempt = 0; ; attempt += 1) { + await this.deps.rateLimitGovernor?.waitForReady(abortSignal); + const lastAttempt = attempt >= maxAttempts - 1; + try { + const result = await run(); + const error = getError(result); + if (!isCodexRateLimitError(error) || lastAttempt) { + return result; + } + } catch (error) { + if (isAbortError(error)) { + throw error; + } + const err = error instanceof Error ? error : new Error(String(error)); + if (!isCodexRateLimitError(err) || lastAttempt) { + throw error; + } + } + this.deps.rateLimitGovernor?.report({ provider: 'codex', status: 'rejected', rateLimitType: 'opaque' }); + } + } + async generateText(input: KtxGenerateTextInput): Promise { const startedAt = Date.now(); const model = modelForRole(this.deps.modelSlots, input.role); @@ -190,18 +230,26 @@ export class CodexKtxLlmRuntime implements KtxLlmRuntimePort { } : {}), }); - const collected = await collectEvents( - await this.runner.runStreamed({ - projectDir: this.deps.projectDir, - model, - prompt: promptWithSystem(input.system, input.prompt), - configOverrides: config.configOverrides, - env: config.env, - }), + const result = await this.runWithRateLimitRetry( + input.abortSignal, + async () => { + const collected = await collectEvents( + await this.runner.runStreamed({ + projectDir: this.deps.projectDir, + model, + prompt: promptWithSystem(input.system, input.prompt), + configOverrides: config.configOverrides, + env: config.env, + ...(input.abortSignal ? { signal: input.abortSignal } : {}), + }), + ); + const summary = summarizeCodexExecEvents(collected.events, { startedAt }); + return { collected, summary }; + }, + ({ collected, summary }) => summaryError(summary, collected.streamError), ); - const summary = summarizeCodexExecEvents(collected.events, { startedAt }); - input.onMetrics?.(metrics(summary, startedAt)); - return assertSuccessfulText(summary, collected.streamError); + input.onMetrics?.(metrics(result.summary, startedAt)); + return assertSuccessfulText(result.summary, result.collected.streamError); } finally { await mcp?.close(); } @@ -231,19 +279,27 @@ export class CodexKtxLlmRuntime implements KtxLlmRuntimePort { } : {}), }); - const collected = await collectEvents( - await this.runner.runStreamed({ - projectDir: this.deps.projectDir, - model, - prompt: promptWithSystem(input.system, input.prompt), - configOverrides: config.configOverrides, - env: config.env, - outputSchema: z.toJSONSchema(input.schema, { target: 'draft-7' }) as Record, - }), + const result = await this.runWithRateLimitRetry( + input.abortSignal, + async () => { + const collected = await collectEvents( + await this.runner.runStreamed({ + projectDir: this.deps.projectDir, + model, + prompt: promptWithSystem(input.system, input.prompt), + configOverrides: config.configOverrides, + env: config.env, + outputSchema: z.toJSONSchema(input.schema, { target: 'draft-7' }) as Record, + ...(input.abortSignal ? { signal: input.abortSignal } : {}), + }), + ); + const summary = summarizeCodexExecEvents(collected.events, { startedAt }); + return { collected, summary }; + }, + ({ collected, summary }) => summaryError(summary, collected.streamError), ); - const summary = summarizeCodexExecEvents(collected.events, { startedAt }); - input.onMetrics?.(metrics(summary, startedAt)); - return parseStructuredOutput(input.schema, assertSuccessfulText(summary, collected.streamError)); + input.onMetrics?.(metrics(result.summary, startedAt)); + return parseStructuredOutput(input.schema, assertSuccessfulText(result.summary, result.collected.streamError)); } finally { await mcp?.close(); } @@ -272,7 +328,6 @@ export class CodexKtxLlmRuntime implements KtxLlmRuntimePort { } : {}), }); - const abortController = new AbortController(); const onStep = async (stepIndex: number): Promise => { try { await params.onStepFinish?.({ stepIndex, stepBudget: params.stepBudget }); @@ -282,31 +337,50 @@ export class CodexKtxLlmRuntime implements KtxLlmRuntimePort { ); } }; - const collected = await collectEvents( - await this.runner.runStreamed({ - projectDir: this.deps.projectDir, - model, - prompt: promptWithSystem(params.systemPrompt, params.userPrompt), - configOverrides: config.configOverrides, - env: config.env, - signal: abortController.signal, - }), - { stepBudget: params.stepBudget, abortController, onStep }, + const result = await this.runWithRateLimitRetry( + params.abortSignal, + async () => { + const linked = linkAbortSignal(params.abortSignal); + const abortController = linked.controller; + try { + const collected = await collectEvents( + await this.runner.runStreamed({ + projectDir: this.deps.projectDir, + model, + prompt: promptWithSystem(params.systemPrompt, params.userPrompt), + configOverrides: config.configOverrides, + env: config.env, + signal: abortController.signal, + }), + { stepBudget: params.stepBudget, abortController, onStep }, + ); + const summary = summarizeCodexExecEvents(collected.events, { startedAt }); + return { collected, summary }; + } finally { + linked.dispose(); + } + }, + ({ collected, summary }) => summaryError(summary, collected.streamError), ); - const summary = summarizeCodexExecEvents(collected.events, { startedAt }); - const error = summaryError(summary, collected.streamError); - const stopReason = collected.budgetExceeded ? 'budget' : error ? 'error' : summary.stopReason; + const error = summaryError(result.summary, result.collected.streamError); + if (isAbortError(error)) { + throw error; + } + const stopReason = result.collected.budgetExceeded ? 'budget' : error ? 'error' : result.summary.stopReason; return { stopReason, ...(stopReason === 'error' && error ? { error } : {}), metrics: { totalMs: Date.now() - startedAt, - usage: summary.usage, - stepCount: summary.stepCount, - stepBoundariesMs: summary.stepBoundariesMs, + usage: result.summary.usage, + stepCount: result.summary.stepCount, + stepBoundariesMs: result.summary.stepBoundariesMs, }, }; } catch (error) { + if (isAbortError(error)) { + throw error; + } const err = error instanceof Error ? error : new Error(String(error)); return { stopReason: 'error', diff --git a/packages/cli/src/context/llm/local-config.ts b/packages/cli/src/context/llm/local-config.ts index 58bd29a5..4c2502d1 100644 --- a/packages/cli/src/context/llm/local-config.ts +++ b/packages/cli/src/context/llm/local-config.ts @@ -6,16 +6,28 @@ import type { KtxProjectEmbeddingConfig, KtxProjectLlmConfig } from '../project/ import { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js'; import { ClaudeCodeKtxLlmRuntime } from './claude-code-runtime.js'; import { CodexKtxLlmRuntime } from './codex-runtime.js'; +import type { RateLimitGovernor } from './rate-limit-governor.js'; import type { KtxLlmRuntimePort } from './runtime-port.js'; +type ClaudeCodeRuntimeDeps = ConstructorParameters[0] & { + rateLimitGovernor?: RateLimitGovernor; +}; +type CodexRuntimeDeps = ConstructorParameters[0] & { + rateLimitGovernor?: RateLimitGovernor; +}; +type AiSdkRuntimeDeps = ConstructorParameters[0] & { + rateLimitGovernor?: RateLimitGovernor; +}; + interface LocalConfigDeps { env?: NodeJS.ProcessEnv; projectDir?: string; + rateLimitGovernor?: RateLimitGovernor; createKtxLlmProvider?: typeof createKtxLlmProvider; createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider; - createClaudeCodeRuntime?: (deps: ConstructorParameters[0]) => KtxLlmRuntimePort; - createCodexRuntime?: (deps: ConstructorParameters[0]) => KtxLlmRuntimePort; - createAiSdkRuntime?: (deps: { llmProvider: KtxLlmProvider }) => KtxLlmRuntimePort; + createClaudeCodeRuntime?: (deps: ClaudeCodeRuntimeDeps) => KtxLlmRuntimePort; + createCodexRuntime?: (deps: CodexRuntimeDeps) => KtxLlmRuntimePort; + createAiSdkRuntime?: (deps: AiSdkRuntimeDeps) => KtxLlmRuntimePort; } function resolveOptional(value: string | undefined, env: NodeJS.ProcessEnv): string | undefined { @@ -129,6 +141,7 @@ export function createLocalKtxLlmRuntimeFromConfig( projectDir, modelSlots: resolved.modelSlots, env: deps.env, + rateLimitGovernor: deps.rateLimitGovernor, }); } if (resolved.backend === 'codex') { @@ -139,10 +152,14 @@ export function createLocalKtxLlmRuntimeFromConfig( return (deps.createCodexRuntime ?? ((runtimeDeps) => new CodexKtxLlmRuntime(runtimeDeps)))({ projectDir, modelSlots: resolved.modelSlots, + rateLimitGovernor: deps.rateLimitGovernor, }); } const llmProvider = (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved); - return (deps.createAiSdkRuntime ?? ((runtimeDeps) => new AiSdkKtxLlmRuntime(runtimeDeps)))({ llmProvider }); + return (deps.createAiSdkRuntime ?? ((runtimeDeps) => new AiSdkKtxLlmRuntime(runtimeDeps)))({ + llmProvider, + rateLimitGovernor: deps.rateLimitGovernor, + }); } export function resolveLocalKtxEmbeddingConfig( diff --git a/packages/cli/src/context/llm/rate-limit-governor.ts b/packages/cli/src/context/llm/rate-limit-governor.ts new file mode 100644 index 00000000..909e4c44 --- /dev/null +++ b/packages/cli/src/context/llm/rate-limit-governor.ts @@ -0,0 +1,387 @@ +import { createAbortError, throwIfAborted } from '../core/abort.js'; + +export type RateLimitProvider = 'claude-subscription' | 'anthropic-api' | 'vertex' | 'codex'; +type RateLimitSignalStatus = 'allowed' | 'warning' | 'rejected'; + +export interface RateLimitSignal { + provider: RateLimitProvider; + status: RateLimitSignalStatus; + resetAtMs?: number; + retryAfterMs?: number; + utilization?: number; + rateLimitType?: string; +} + +export interface RateLimitRetryConfig { + maxAttempts: number; + baseDelayMs: number; + maxDelayMs: number; + jitter: boolean; +} + +export interface RateLimitGovernorConfig { + enabled: boolean; + maxConcurrency: number; + throttleThreshold: number; + minConcurrencyUnderPressure: number; + maxWaitMs?: number; + waitStateTickMs: number; + retry: RateLimitRetryConfig; +} + +export type RateLimitWaitState = + | { + kind: 'rate_limit_observed'; + provider: RateLimitProvider; + status: RateLimitSignalStatus; + rateLimitType?: string; + resetAtMs?: number; + retryAfterMs?: number; + utilization?: number; + } + | { + kind: 'concurrency_adjusted'; + provider: RateLimitProvider; + from: number; + to: number; + reason: string; + rateLimitType?: string; + utilization?: number; + } + | { + kind: 'wait_started' | 'wait_tick' | 'wait_finished'; + provider: RateLimitProvider; + rateLimitType?: string; + resumeAtMs: number; + remainingMs: number; + }; + +export interface RateLimitGovernorDeps { + now?: () => number; + sleep?: (ms: number, signal?: AbortSignal) => Promise; + random?: () => number; +} + +export type RateLimitRelease = () => void; +type Subscriber = (state: RateLimitWaitState) => void; + +const defaultSleep = (ms: number, signal?: AbortSignal): Promise => + new Promise((resolve, reject) => { + if (signal?.aborted) { + reject(createAbortError()); + return; + } + const timeout = setTimeout(resolve, ms); + signal?.addEventListener( + 'abort', + () => { + clearTimeout(timeout); + reject(createAbortError()); + }, + { once: true }, + ); + }); + +export function createRateLimitGovernorConfig( + input: Partial & { retry?: Partial } = {}, +): RateLimitGovernorConfig { + return { + enabled: input.enabled ?? true, + maxConcurrency: input.maxConcurrency ?? 1, + throttleThreshold: input.throttleThreshold ?? 0.8, + minConcurrencyUnderPressure: input.minConcurrencyUnderPressure ?? 1, + ...(input.maxWaitMs !== undefined ? { maxWaitMs: input.maxWaitMs } : {}), + waitStateTickMs: input.waitStateTickMs ?? 1_000, + retry: { + maxAttempts: input.retry?.maxAttempts ?? 6, + baseDelayMs: input.retry?.baseDelayMs ?? 1_000, + maxDelayMs: input.retry?.maxDelayMs ?? 60_000, + jitter: input.retry?.jitter ?? true, + }, + }; +} + +export class RateLimitGovernor { + private readonly now: () => number; + private readonly sleep: (ms: number, signal?: AbortSignal) => Promise; + private readonly random: () => number; + private readonly subscribers = new Set(); + private waiters: Array<() => void> = []; + private active = 0; + private effectiveLimit: number; + private pausedUntilMs: number | null = null; + private pausedProvider: RateLimitProvider | null = null; + private pausedRateLimitType: string | undefined; + private pausedTickMs: number | null = null; + private opaqueAttempts = new Map(); + private pauseGeneration = 0; + private visibleWaitAbort: AbortController | null = null; + + constructor( + private readonly config: RateLimitGovernorConfig, + deps: RateLimitGovernorDeps = {}, + ) { + this.now = deps.now ?? Date.now; + this.sleep = deps.sleep ?? defaultSleep; + this.random = deps.random ?? Math.random; + this.effectiveLimit = Math.max(1, config.maxConcurrency); + } + + currentLimit(): number { + return this.config.enabled ? this.effectiveLimit : this.config.maxConcurrency; + } + + /** + * Total attempts a runtime should make for a single rate-limited LLM call, + * including the first try. Returns 1 (no outer retry) when pacing is disabled: + * the outer retry loop only exists to cooperate with this governor's pause, so + * without active pacing there is no backoff to apply and the backend's own + * retry handles transient rejections. + */ + maxRetryAttempts(): number { + return this.config.enabled ? Math.max(1, this.config.retry.maxAttempts) : 1; + } + + activeSlots(): number { + return this.active; + } + + subscribe(cb: Subscriber): () => void { + this.subscribers.add(cb); + if (this.pausedUntilMs !== null) { + this.startVisibleWaitTicker(); + } + return () => { + this.subscribers.delete(cb); + if (this.subscribers.size === 0) { + this.stopVisibleWaitTicker(); + this.wakeWaiters(); + } + }; + } + + report(signal: RateLimitSignal): void { + if (!this.config.enabled) { + return; + } + this.emit({ + kind: 'rate_limit_observed', + provider: signal.provider, + status: signal.status, + ...(signal.rateLimitType ? { rateLimitType: signal.rateLimitType } : {}), + ...(signal.resetAtMs !== undefined ? { resetAtMs: signal.resetAtMs } : {}), + ...(signal.retryAfterMs !== undefined ? { retryAfterMs: signal.retryAfterMs } : {}), + ...(signal.utilization !== undefined ? { utilization: signal.utilization } : {}), + }); + + if (signal.status === 'rejected') { + this.applyPause(signal); + return; + } + + if (signal.status === 'warning' || (signal.utilization ?? 0) >= this.config.throttleThreshold) { + this.adjustLimit(Math.max(1, this.config.minConcurrencyUnderPressure), signal, 'provider pressure'); + return; + } + + this.opaqueAttempts.delete(signal.provider); + if ((signal.utilization ?? 0) < this.config.throttleThreshold) { + this.adjustLimit(Math.max(1, this.config.maxConcurrency), signal, 'provider recovered'); + } + } + + async waitForReady(signal?: AbortSignal): Promise { + throwIfAborted(signal); + if (!this.config.enabled) { + return; + } + await this.waitForPause(signal); + throwIfAborted(signal); + } + + async acquireWorkSlot(signal?: AbortSignal): Promise { + throwIfAborted(signal); + if (!this.config.enabled) { + this.active += 1; + return () => { + this.active -= 1; + }; + } + + while (true) { + throwIfAborted(signal); + await this.waitForPause(signal); + throwIfAborted(signal); + if (this.active < this.effectiveLimit) { + this.active += 1; + let released = false; + return () => { + if (released) return; + released = true; + this.active -= 1; + this.wakeWaiters(); + }; + } + await this.waitForSlot(signal); + } + } + + private applyPause(signal: RateLimitSignal): void { + const resumeAtMs = this.resumeAtMsFor(signal); + const boundedResumeAtMs = + this.config.maxWaitMs === undefined ? resumeAtMs : Math.min(resumeAtMs, this.now() + this.config.maxWaitMs); + if (this.pausedUntilMs === null || boundedResumeAtMs > this.pausedUntilMs) { + this.pausedUntilMs = boundedResumeAtMs; + this.pausedProvider = signal.provider; + this.pausedRateLimitType = signal.rateLimitType; + this.pausedTickMs = signal.rateLimitType === 'opaque' ? Math.max(1, boundedResumeAtMs - this.now()) : null; + this.emitWait('wait_started'); + this.startVisibleWaitTicker(); + this.wakeWaiters(); + } + this.adjustLimit(Math.max(1, this.config.minConcurrencyUnderPressure), signal, 'provider rejected'); + } + + private resumeAtMsFor(signal: RateLimitSignal): number { + if (signal.resetAtMs !== undefined) { + return signal.resetAtMs; + } + if (signal.retryAfterMs !== undefined) { + return this.now() + signal.retryAfterMs; + } + const attempts = this.opaqueAttempts.get(signal.provider) ?? 0; + this.opaqueAttempts.set(signal.provider, Math.min(attempts + 1, this.config.retry.maxAttempts)); + const base = Math.min( + this.config.retry.maxDelayMs, + this.config.retry.baseDelayMs * 2 ** Math.min(attempts, this.config.retry.maxAttempts - 1), + ); + const jitterMultiplier = this.config.retry.jitter ? 0.75 + this.random() * 0.5 : 1; + return this.now() + Math.round(base * jitterMultiplier); + } + + private adjustLimit(to: number, signal: RateLimitSignal, reason: string): void { + const bounded = Math.max(1, Math.min(this.config.maxConcurrency, to)); + if (bounded === this.effectiveLimit) { + return; + } + const from = this.effectiveLimit; + this.effectiveLimit = bounded; + this.emit({ + kind: 'concurrency_adjusted', + provider: signal.provider, + from, + to: bounded, + reason, + ...(signal.rateLimitType ? { rateLimitType: signal.rateLimitType } : {}), + ...(signal.utilization !== undefined ? { utilization: signal.utilization } : {}), + }); + this.wakeWaiters(); + } + + private startVisibleWaitTicker(): void { + if (this.subscribers.size === 0 || this.pausedUntilMs === null) { + return; + } + this.stopVisibleWaitTicker(); + const generation = (this.pauseGeneration += 1); + const controller = new AbortController(); + this.visibleWaitAbort = controller; + void this.runVisibleWaitTicker(generation, controller.signal).catch(() => undefined); + } + + private stopVisibleWaitTicker(): void { + this.visibleWaitAbort?.abort(); + this.visibleWaitAbort = null; + } + + private async runVisibleWaitTicker(generation: number, signal: AbortSignal): Promise { + while (!signal.aborted && generation === this.pauseGeneration && this.pausedUntilMs !== null) { + const remainingMs = this.pausedUntilMs - this.now(); + if (remainingMs <= 0) { + this.finishPause(generation); + return; + } + this.emitWait('wait_tick'); + await this.sleep(Math.min(this.pausedTickMs ?? this.config.waitStateTickMs, remainingMs), signal); + } + } + + private finishPause(generation?: number): void { + if (generation !== undefined && generation !== this.pauseGeneration) { + return; + } + this.emitWait('wait_finished'); + this.pausedUntilMs = null; + this.pausedProvider = null; + this.pausedRateLimitType = undefined; + this.pausedTickMs = null; + this.stopVisibleWaitTicker(); + this.wakeWaiters(); + } + + private async waitForPause(signal?: AbortSignal): Promise { + throwIfAborted(signal); + while (this.pausedUntilMs !== null) { + const remainingMs = this.pausedUntilMs - this.now(); + if (remainingMs <= 0) { + this.finishPause(); + return; + } + if (this.visibleWaitAbort !== null) { + await this.waitForSlot(signal); + } else { + await this.sleep(Math.min(this.pausedTickMs ?? this.config.waitStateTickMs, remainingMs), signal); + } + throwIfAborted(signal); + } + } + + private waitForSlot(signal?: AbortSignal): Promise { + if (signal?.aborted) { + return Promise.reject(createAbortError()); + } + return new Promise((resolve, reject) => { + const wake = () => { + cleanup(); + resolve(); + }; + const onAbort = () => { + cleanup(); + reject(createAbortError()); + }; + const cleanup = () => { + this.waiters = this.waiters.filter((candidate) => candidate !== wake); + signal?.removeEventListener('abort', onAbort); + }; + this.waiters.push(wake); + signal?.addEventListener('abort', onAbort, { once: true }); + }); + } + + private wakeWaiters(): void { + const waiters = this.waiters; + this.waiters = []; + for (const waiter of waiters) { + waiter(); + } + } + + private emitWait(kind: Extract): void { + if (this.pausedUntilMs === null || this.pausedProvider === null) { + return; + } + this.emit({ + kind, + provider: this.pausedProvider, + ...(this.pausedRateLimitType ? { rateLimitType: this.pausedRateLimitType } : {}), + resumeAtMs: this.pausedUntilMs, + remainingMs: Math.max(0, this.pausedUntilMs - this.now()), + }); + } + + private emit(state: RateLimitWaitState): void { + for (const subscriber of this.subscribers) { + subscriber(state); + } + } +} diff --git a/packages/cli/src/context/llm/runtime-port.ts b/packages/cli/src/context/llm/runtime-port.ts index db648448..9fec6208 100644 --- a/packages/cli/src/context/llm/runtime-port.ts +++ b/packages/cli/src/context/llm/runtime-port.ts @@ -49,6 +49,7 @@ export interface RunLoopParams { stepBudget: number; telemetryTags: Record; onStepFinish?: (info: RunLoopStepInfo) => void | Promise; + abortSignal?: AbortSignal; } export interface RunLoopResult { @@ -64,6 +65,7 @@ export interface KtxGenerateTextInput { tools?: KtxRuntimeToolSet; temperature?: number; onMetrics?: (metrics: { totalMs: number; usage: LlmTokenUsage }) => void; + abortSignal?: AbortSignal; } export interface KtxGenerateObjectInput> { @@ -74,6 +76,7 @@ export interface KtxGenerateObjectInput void; + abortSignal?: AbortSignal; } export interface KtxLlmRuntimePort { diff --git a/packages/cli/src/context/mcp/context-tools.ts b/packages/cli/src/context/mcp/context-tools.ts index 03cd2ad4..2d07d121 100644 --- a/packages/cli/src/context/mcp/context-tools.ts +++ b/packages/cli/src/context/mcp/context-tools.ts @@ -3,7 +3,13 @@ import type { ToolAnnotations } from '@modelcontextprotocol/sdk/types.js'; import { z } from 'zod'; import type { KtxCliIo } from '../../cli-runtime.js'; import type { MemoryAgentInput } from '../../context/memory/types.js'; -import { emitTelemetryEvent, mcpTelemetrySampleRate, shouldEmitMcpTelemetry } from '../../telemetry/index.js'; +import { + emitTelemetryEvent, + mcpTelemetrySampleRate, + reportException, + shouldEmitMcpTelemetry, +} from '../../telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from '../../telemetry/redaction-secrets.js'; import { scrubErrorClass } from '../../telemetry/scrubber.js'; import type { KtxMcpClientInfo, @@ -518,11 +524,26 @@ function registerParsedTool( }, schema: TSchema, handler: (input: z.infer, context?: KtxMcpToolHandlerContext) => Promise, + telemetry?: { projectDir?: string; io?: KtxCliIo }, ): void { server.registerTool(name, config, async (input, context) => { try { return await handler(schema.parse(input), context); } catch (error) { + if (telemetry?.io) { + await reportException({ + error, + context: { source: `mcp:${name}`, handled: true, fatal: false }, + projectDir: telemetry.projectDir, + io: telemetry.io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + projectDir: telemetry.projectDir, + includeLlm: true, + includeEmbeddings: true, + env: process.env, + }), + }); + } return jsonErrorToolResult(formatToolError(error)); } }); @@ -571,6 +592,20 @@ function instrumentMcpServer( } return result; } catch (error) { + if (telemetry.io) { + await reportException({ + error, + context: { source: `mcp:${name}`, handled: true, fatal: false }, + projectDir: telemetry.projectDir, + io: telemetry.io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + projectDir: telemetry.projectDir, + includeLlm: true, + includeEmbeddings: true, + env: process.env, + }), + }); + } if (telemetry.io && telemetry.projectDir && shouldEmitMcpTelemetry()) { const errorClass = scrubErrorClass(error); await emitTelemetryEvent({ @@ -596,6 +631,7 @@ function instrumentMcpServer( export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void { const { ports, userContext } = deps; + const toolTelemetry = { projectDir: deps.projectDir, io: deps.io }; const server = instrumentMcpServer(deps.server, { projectDir: deps.projectDir, io: deps.io, @@ -616,6 +652,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void }, connectionListSchema, async () => jsonToolResult({ connections: await connections.list() }), + toolTelemetry, ); } @@ -640,6 +677,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void limit: input.limit, }), ), + toolTelemetry, ); registerParsedTool( @@ -657,6 +695,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void const page = await knowledge.read({ userId: userContext.userId, key: input.key }); return page ? jsonToolResult(page) : jsonErrorToolResult(`Wiki page "${input.key}" was not found.`); }, + toolTelemetry, ); } @@ -679,6 +718,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void ? jsonToolResult(source) : jsonErrorToolResult(`Semantic-layer source "${input.sourceName}" was not found.`); }, + toolTelemetry, ); registerParsedTool( @@ -711,6 +751,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void ); return jsonToolResult(projectSlQueryResult(result, input.include)); }, + toolTelemetry, ); } @@ -728,6 +769,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void }, entityDetailsSchema, async (input) => jsonToolResult(await entityDetails.read(input)), + toolTelemetry, ); } @@ -745,6 +787,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void }, dictionarySearchSchema, async (input) => jsonToolResult(await dictionarySearch.search(input)), + toolTelemetry, ); } @@ -762,6 +805,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void }, discoverDataSchema, async (input) => jsonToolResult({ refs: await discover.search(input) }), + toolTelemetry, ); } @@ -791,6 +835,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void ), ); }, + toolTelemetry, ); } @@ -818,6 +863,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void }; return jsonToolResult(await memoryIngest.ingest(ingestInput)); }, + toolTelemetry, ); registerParsedTool( @@ -835,6 +881,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void const status = await memoryIngest.status(input.runId); return status ? jsonToolResult(status) : jsonErrorToolResult(`Memory ingest run "${input.runId}" was not found.`); }, + toolTelemetry, ); } } diff --git a/packages/cli/src/context/project/config.ts b/packages/cli/src/context/project/config.ts index cbea79b6..fd7f482c 100644 --- a/packages/cli/src/context/project/config.ts +++ b/packages/cli/src/context/project/config.ts @@ -100,6 +100,44 @@ const workUnitsSchema = z }) .describe('Concurrency and failure handling for ingest work units.'); +const ingestRateLimitRetrySchema = z + .strictObject({ + maxAttempts: z + .int() + .positive() + .default(6) + .describe( + 'Maximum attempts for a single rate-limited LLM call before the failure surfaces, counting the first try. Also bounds how far opaque backoff grows for providers that do not expose a reset time.', + ), + baseDelayMs: z.int().positive().default(1_000).describe('Initial opaque retry delay in milliseconds.'), + maxDelayMs: z.int().positive().default(60_000).describe('Maximum opaque retry delay in milliseconds.'), + jitter: z.boolean().default(true).describe('When true, apply bounded jitter to opaque retry delays.'), + }) + .describe('Retry policy for rate-limit responses that do not include a reset time or retry-after value.'); + +const ingestRateLimitSchema = z + .strictObject({ + enabled: z.boolean().default(true).describe('Master switch for ingest LLM rate-limit pacing and visible waits.'), + throttleThreshold: z + .number() + .min(0) + .max(1) + .default(0.8) + .describe('Provider utilization at or above which ingest throttles new work-unit starts.'), + minConcurrencyUnderPressure: z + .int() + .positive() + .default(1) + .describe('Effective work-unit concurrency while a provider is under rate-limit pressure.'), + maxWaitMs: z + .int() + .positive() + .optional() + .describe('Optional cap on a single provider reset wait. Omit to wait indefinitely until the provider reset time.'), + retry: ingestRateLimitRetrySchema.prefault({}).describe('Opaque retry policy for providers without reset hints.'), + }) + .describe('Rate-limit pacing and wait policy for ingest LLM calls.'); + const ingestSchema = z .strictObject({ adapters: z @@ -110,6 +148,7 @@ const ingestSchema = z .prefault({ backend: 'none' }) .describe('Embedding configuration used when ingest adapters need to embed documents.'), workUnits: workUnitsSchema.prefault({}).describe('Concurrency and failure handling for ingest work units.'), + rateLimit: ingestRateLimitSchema.prefault({}).describe('LLM rate-limit pacing and visible-wait policy for ingest.'), profile: z .union([z.boolean(), z.literal('json')]) .default(false) diff --git a/packages/cli/src/context/scan/types.ts b/packages/cli/src/context/scan/types.ts index 1d9e6d6a..fc445b5e 100644 --- a/packages/cli/src/context/scan/types.ts +++ b/packages/cli/src/context/scan/types.ts @@ -303,9 +303,29 @@ export interface KtxTableListEntry { kind: 'table' | 'view'; } -interface KtxConnectorTestResult { +export interface KtxConnectorTestResult { success: boolean; error?: string; + /** + * The original error thrown by the driver, preserved unflattened so the + * connection-test path can re-throw it. Keeping the real error object lets + * telemetry record the driver's actual error class (e.g. `ConnectionError`) + * and `.code` (e.g. `ELOGIN`) instead of collapsing every failure to `Error`. + */ + cause?: unknown; +} + +/** + * Single source of truth for a failed connector test result. Captures the + * driver's message for display while preserving the original error as `cause` + * so callers can surface its real class and code. + */ +export function connectorTestFailure(error: unknown): KtxConnectorTestResult { + return { + success: false, + error: error instanceof Error ? error.message : String(error), + cause: error, + }; } export interface KtxScanConnector { diff --git a/packages/cli/src/ingest.ts b/packages/cli/src/ingest.ts index ad5ba270..319c3d1b 100644 --- a/packages/cli/src/ingest.ts +++ b/packages/cli/src/ingest.ts @@ -78,6 +78,7 @@ export interface KtxIngestDeps { readReportFile?: typeof readIngestReportSnapshotFile; renderStoredMemoryFlow?: typeof renderMemoryFlowTui; startLiveMemoryFlow?: typeof startLiveMemoryFlowTui; + abortSignal?: AbortSignal; env?: NodeJS.ProcessEnv; localIngestOptions?: Pick< RunLocalIngestOptions, @@ -93,6 +94,23 @@ export interface KtxIngestDeps { runtimeIo?: KtxIngestIo; } +function createCliAbortSignal(): { signal: AbortSignal; dispose: () => void } { + const controller = new AbortController(); + let interrupted = false; + const onSigint = () => { + if (interrupted) { + process.exit(130); + } + interrupted = true; + controller.abort(new DOMException('Aborted', 'AbortError')); + }; + process.on('SIGINT', onSigint); + return { + signal: controller.signal, + dispose: () => process.off('SIGINT', onSigint), + }; +} + const REPORT_SOURCE_LABELS = new Map([ ['live-database', 'Database schema'], ['historic-sql', 'Query history'], @@ -364,6 +382,12 @@ function plainIngestEventProgress( message: event.message, ...(event.transient !== undefined ? { transient: event.transient } : {}), }; + case 'rate_limit_wait': + return { + percent: 50, + message: `Rate-limited (${event.provider}${event.rateLimitType ? ` ${event.rateLimitType}` : ''}); resuming in ${Math.ceil(event.remainingMs / 1_000)}s`, + transient: true, + }; case 'work_unit_started': { const total = plannedWorkUnitCountThrough(snapshot, eventIndex); const ordinal = workUnitOrdinalThrough(snapshot, eventIndex, event.unitKey); @@ -750,6 +774,8 @@ export async function runKtxIngest( ); plainProgress?.start(); structuredProgress?.start(); + const cliAbort = deps.abortSignal ? null : createCliAbortSignal(); + const abortSignal = deps.abortSignal ?? cliAbort?.signal; let result: LocalMetabaseFanoutResult; try { result = await executeMetabaseFanout({ @@ -763,6 +789,7 @@ export async function runKtxIngest( embeddingProvider, ...(memoryFlow ? { memoryFlow } : {}), ...(progress ? { progress } : {}), + ...(abortSignal ? { abortSignal } : {}), }); plainProgress?.flush(); if (args.outputMode === 'json') { @@ -772,6 +799,7 @@ export async function runKtxIngest( } } finally { plainProgress?.flush(); + cliAbort?.dispose(); } return result.status === 'all_failed' ? 1 : 0; } @@ -820,6 +848,8 @@ export async function runKtxIngest( plainProgress?.start(); structuredProgress?.start(); + const cliAbort = deps.abortSignal ? null : createCliAbortSignal(); + const abortSignal = deps.abortSignal ?? cliAbort?.signal; try { const result = await executeLocalIngest({ @@ -836,6 +866,7 @@ export async function runKtxIngest( embeddingProvider, ...(args.debugLlmRequestFile ? { llmDebugRequestFile: args.debugLlmRequestFile } : {}), ...(memoryFlow ? { memoryFlow } : {}), + ...(abortSignal ? { abortSignal } : {}), }); if (shouldUseLiveViz && memoryFlow) { latestMemoryFlowSnapshot = finalRunMemoryFlowInput(memoryFlow.snapshot(), result.report); @@ -854,6 +885,7 @@ export async function runKtxIngest( } finally { plainProgress?.flush(); liveTui?.close(); + cliAbort?.dispose(); } } diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index 44a2b024..07f805b8 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -23,7 +23,8 @@ import type { KtxScanArgs, KtxScanDeps } from './scan.js'; import type { KtxTableRef } from './context/scan/types.js'; import { profileMark } from './startup-profile.js'; import { isDemoConnection } from './telemetry/demo-detect.js'; -import { emitProjectStackSnapshot, emitTelemetryEvent } from './telemetry/index.js'; +import { emitProjectStackSnapshot, emitTelemetryEvent, reportException } from './telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js'; import { formatErrorDetail } from './telemetry/scrubber.js'; profileMark('module:public-ingest'); @@ -1119,30 +1120,63 @@ export async function runKtxPublicIngest( feature, }); } catch (error) { + await reportException({ + error, + context: { source: 'ingest runtime', handled: true, fatal: false }, + projectDir: args.projectDir, + io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project, + projectDir: args.projectDir, + connectionId: args.targetConnectionId, + includeLlm: true, + includeEmbeddings: true, + env: deps.env ?? process.env, + }), + }); io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } } const { runContextBuild } = await import('./context-build-view.js'); const contextBuild = deps.runContextBuild ?? runContextBuild; - const result = await contextBuild( - project, - { + try { + const result = await contextBuild( + project, + { + projectDir: args.projectDir, + ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), + all: args.all, + entrypoint: 'ingest', + inputMode: args.inputMode, + ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), + ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), + ...(args.scanMode ? { scanMode: args.scanMode } : {}), + ...(args.detectRelationships !== undefined ? { detectRelationships: args.detectRelationships } : {}), + ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), + ...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}), + }, + io, + ); + return result.exitCode; + } catch (error) { + await reportException({ + error, + context: { source: 'ingest context-build', handled: true, fatal: false }, projectDir: args.projectDir, - ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), - all: args.all, - entrypoint: 'ingest', - inputMode: args.inputMode, - ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), - ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), - ...(args.scanMode ? { scanMode: args.scanMode } : {}), - ...(args.detectRelationships !== undefined ? { detectRelationships: args.detectRelationships } : {}), - ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), - ...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}), - }, - io, - ); - return result.exitCode; + io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project, + projectDir: args.projectDir, + connectionId: args.targetConnectionId, + includeLlm: true, + includeEmbeddings: true, + env: deps.env ?? process.env, + }), + }); + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } } const plan = buildPublicIngestPlan(project, args); diff --git a/packages/cli/src/reveal-password-prompt.ts b/packages/cli/src/reveal-password-prompt.ts new file mode 100644 index 00000000..3fe3ed66 --- /dev/null +++ b/packages/cli/src/reveal-password-prompt.ts @@ -0,0 +1,93 @@ +import { styleText } from 'node:util'; +import { PasswordPrompt, type PasswordOptions } from '@clack/core'; +import { S_BAR, S_BAR_END, S_PASSWORD_MASK, settings, symbol } from '@clack/prompts'; + +// How many trailing characters of a pasted secret to leave visible so the user +// can confirm what landed (e.g. `••••••a1b2`). Kept small on purpose. +const REVEAL_TAIL_COUNT = 4; + +/** + * Mask every character of `userInput` except the last `tail`, but only reveal the + * tail once the secret is long enough that the hidden portion still dominates + * (`length > tail * 2`). Short secrets stay fully masked so we never expose most + * of a small value. The returned string keeps the same code-unit length as the + * input so clack's cursor slicing in `userInputWithCursor` stays aligned. + * + * @internal + */ +export function maskRevealingTail(userInput: string, maskChar: string, tail: number): string { + const revealLength = userInput.length > tail * 2 ? tail : 0; + const hiddenLength = userInput.length - revealLength; + return maskChar.repeat(hiddenLength) + userInput.slice(hiddenLength); +} + +class RevealTailPasswordPrompt extends PasswordPrompt { + readonly #maskChar: string; + readonly #tail: number; + + constructor(options: PasswordOptions & { tail: number }) { + super(options); + this.#maskChar = options.mask ?? S_PASSWORD_MASK; + this.#tail = options.tail; + } + + override get masked(): string { + return maskRevealingTail(this.userInput, this.#maskChar, this.#tail); + } +} + +// Reproduces the @clack/prompts password frame (pinned to the installed version) +// so this prompt is visually identical to every other setup prompt; the only +// behavioral change is the tail-revealing `masked` getter above. +function renderPasswordFrame(prompt: Omit, message: string): string { + const withGuide = settings.withGuide; + const title = `${withGuide ? `${styleText('gray', S_BAR)}\n` : ''}${symbol(prompt.state)} ${message}\n`; + const masked = prompt.masked; + switch (prompt.state) { + case 'error': { + const bar = withGuide ? `${styleText('yellow', S_BAR)} ` : ''; + const end = withGuide ? `${styleText('yellow', S_BAR_END)} ` : ''; + return `${title.trim()}\n${bar}${masked}\n${end}${styleText('yellow', prompt.error)}\n`; + } + case 'submit': { + const bar = withGuide ? `${styleText('gray', S_BAR)} ` : ''; + return `${title}${bar}${masked ? styleText('dim', masked) : ''}`; + } + case 'cancel': { + const bar = withGuide ? `${styleText('gray', S_BAR)} ` : ''; + const body = masked ? styleText(['strikethrough', 'dim'], masked) : ''; + return `${title}${bar}${body}${masked && withGuide ? `\n${styleText('gray', S_BAR)}` : ''}`; + } + default: { + const bar = withGuide ? `${styleText('cyan', S_BAR)} ` : ''; + const end = withGuide ? styleText('cyan', S_BAR_END) : ''; + return `${title}${bar}${prompt.userInputWithCursor}\n${end}\n`; + } + } +} + +export interface RevealPasswordOptions { + message: string; + mask?: string; + tail?: number; + validate?: PasswordOptions['validate']; + signal?: AbortSignal; +} + +/** + * Drop-in replacement for clack's `password()` that reveals the last few + * characters of the entered value while typing. Resolves to the raw value or the + * clack cancel symbol, matching `password()`'s contract. + */ +export function revealPassword(options: RevealPasswordOptions): Promise { + const prompt = new RevealTailPasswordPrompt({ + mask: options.mask ?? S_PASSWORD_MASK, + tail: options.tail ?? REVEAL_TAIL_COUNT, + validate: options.validate, + signal: options.signal, + render() { + return renderPasswordFrame(this, options.message); + }, + }); + return prompt.prompt() as Promise; +} diff --git a/packages/cli/src/scan.ts b/packages/cli/src/scan.ts index 4f973e57..5961e3f1 100644 --- a/packages/cli/src/scan.ts +++ b/packages/cli/src/scan.ts @@ -1,6 +1,6 @@ import type { KtxProgressPort, KtxScanMode, KtxScanReport, KtxScanWarning } from './context/scan/types.js'; import { runLocalScan } from './context/scan/local-scan.js'; -import { loadKtxProject } from './context/project/project.js'; +import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; import { getKtxCliPackageInfo } from './cli-runtime.js'; import { resolveProjectEmbeddingProvider } from './embedding-resolution.js'; import type { KtxCliIo } from './index.js'; @@ -8,7 +8,8 @@ import { createKtxCliLocalIngestAdapters } from './local-adapters.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; import { profileMark } from './startup-profile.js'; -import { emitTelemetryEvent } from './telemetry/index.js'; +import { emitTelemetryEvent, reportException } from './telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js'; import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:scan'); @@ -322,8 +323,9 @@ export function createCliScanProgress( export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps: KtxScanDeps = {}): Promise { const startedAt = performance.now(); + let project: KtxLocalProject | undefined; try { - const project = await loadKtxProject({ projectDir: args.projectDir }); + project = await loadKtxProject({ projectDir: args.projectDir }); const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider; const resolution = await resolveEmbeddingProvider(project, { mode: 'ensure', @@ -397,6 +399,20 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps ...(errorDetail ? { errorDetail } : {}), }, }); + await reportException({ + error, + context: { source: 'scan run', handled: true, fatal: false }, + projectDir: args.projectDir, + io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project, + projectDir: args.projectDir, + connectionId: args.connectionId, + includeLlm: true, + includeEmbeddings: true, + env: process.env, + }), + }); io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index 3cb6c5d2..002ead30 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -73,6 +73,7 @@ export type KtxSetupDatabaseDriver = export interface KtxSetupDatabasesArgs { projectDir: string; inputMode: 'auto' | 'disabled'; + debug?: boolean; yes?: boolean; cliVersion?: string; runtimeInstallPolicy?: KtxManagedPythonInstallPolicy; @@ -1626,7 +1627,12 @@ function hasServiceAccountsBlock(connection: KtxProjectConnectionConfig | undefi return 'serviceAccounts' in filters; } -function printQueryHistoryFilterProposal(io: KtxCliIo, proposal: QueryHistoryFilterProposal): void { +function printQueryHistoryFilterProposal(io: KtxCliIo, proposal: QueryHistoryFilterProposal, debug = false): void { + if (debug && proposal.parseFailedTemplateIds.length > 0) { + io.stderr.write( + `[debug] query-history filter picker could not parse ${proposal.parseFailedTemplateIds.length} template(s): ${proposal.parseFailedTemplateIds.join(', ')}\n`, + ); + } if (proposal.excludedRoles.length === 0) { if (proposal.skipped?.reason === 'no-llm') { io.stdout.write('│ Query-history filter picker skipped: no LLM is configured.\n'); @@ -1635,6 +1641,12 @@ function printQueryHistoryFilterProposal(io: KtxCliIo, proposal: QueryHistoryFil } else if (proposal.skipped?.reason === 'no-in-scope-history') { io.stdout.write('│ Query-history filter picker found no in-scope service-account exclusions.\n'); } + if (proposal.parseFailedTemplateIds.length > 0) { + const count = proposal.parseFailedTemplateIds.length; + io.stdout.write( + `│ Skipped ${count} query template${count === 1 ? '' : 's'} ktx could not parse (run with --debug to list them).\n`, + ); + } for (const warning of proposal.warnings) { io.stdout.write(`│ ! ${warning}\n`); } @@ -1727,12 +1739,17 @@ async function maybeProposeQueryHistoryFilters(input: { deps: input.deps, }); if (!llmRuntime && !input.deps.queryHistoryFilterPicker) { - printQueryHistoryFilterProposal(input.io, { - excludedRoles: [], - consideredRoleCount: 0, - skipped: { reason: 'no-llm' }, - warnings: [], - }); + printQueryHistoryFilterProposal( + input.io, + { + excludedRoles: [], + consideredRoleCount: 0, + skipped: { reason: 'no-llm' }, + warnings: [], + parseFailedTemplateIds: [], + }, + input.args.debug === true, + ); return; } @@ -1773,7 +1790,19 @@ async function maybeProposeQueryHistoryFilters(input: { userServiceAccountsPresent, }); - printQueryHistoryFilterProposal(input.io, proposal); + printQueryHistoryFilterProposal(input.io, proposal, input.args.debug === true); + await emitTelemetryEvent({ + name: 'query_history_filter_completed', + projectDir: input.projectDir, + io: input.io, + fields: { + dialect, + consideredRoleCount: proposal.consideredRoleCount, + excludedRoleCount: proposal.excludedRoles.length, + parseFailedCount: proposal.parseFailedTemplateIds.length, + outcome: 'ok', + }, + }); if (proposal.skipped?.reason === 'user-block-present') { input.io.stdout.write('│ Existing query-history service-account filters left unchanged.\n'); return; diff --git a/packages/cli/src/setup-embeddings.ts b/packages/cli/src/setup-embeddings.ts index 8f49bcf1..5d02e3e4 100644 --- a/packages/cli/src/setup-embeddings.ts +++ b/packages/cli/src/setup-embeddings.ts @@ -222,8 +222,8 @@ async function chooseCredentialRef( const choice = await prompts.select({ message: `How should KTX find your ${embeddingBackendDisplayName(backend)} embedding API key?`, options: [ - { value: 'env', label: `Use ${defaultEnv} from the environment` }, { value: 'paste', label: 'Paste a key and save it as a local secret file' }, + { value: 'env', label: `Use ${defaultEnv} from the environment` }, { value: 'back', label: 'Back' }, ], }); diff --git a/packages/cli/src/setup-models.ts b/packages/cli/src/setup-models.ts index 8e8cf30b..e673cb99 100644 --- a/packages/cli/src/setup-models.ts +++ b/packages/cli/src/setup-models.ts @@ -470,8 +470,8 @@ async function chooseCredentialRef( const choice = await prompts.select({ message: `How should KTX find your Anthropic API key?\n\n${ANTHROPIC_CREDENTIAL_PROMPT_CONTEXT}`, options: [ - { value: 'env', label: 'Use ANTHROPIC_API_KEY from the environment' }, { value: 'paste', label: 'Paste a key and save it as a local secret file' }, + { value: 'env', label: 'Use ANTHROPIC_API_KEY from the environment' }, { value: 'back', label: 'Back' }, ], }); diff --git a/packages/cli/src/setup-prompts.ts b/packages/cli/src/setup-prompts.ts index 1609bd76..e508d8ff 100644 --- a/packages/cli/src/setup-prompts.ts +++ b/packages/cli/src/setup-prompts.ts @@ -9,12 +9,12 @@ import { log, multiselect, note, - password, select, text, } from '@clack/prompts'; import type { KtxCliIo } from './cli-runtime.js'; import { withMenuOptionsSpacing, withTextInputNavigation } from './prompt-navigation.js'; +import { revealPassword } from './reveal-password-prompt.js'; import { withSetupInterruptConfirmation } from './setup-interrupt.js'; export interface KtxSetupPromptOption { @@ -189,7 +189,7 @@ export function createKtxSetupPromptAdapter(options: KtxSetupPromptAdapterOption }, async password(promptOptions) { const value = await withSetupInterruptConfirmation(() => - password({ ...promptOptions, message: withTextInputNavigation(promptOptions.message) }), + revealPassword({ ...promptOptions, message: withTextInputNavigation(promptOptions.message) }), ); return isCancel(value) ? undefined : String(value); }, diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index 0a66c3a7..25552fbf 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -119,11 +119,11 @@ export interface KtxSetupSourcesDeps { const SOURCE_OPTIONS: Array<{ value: KtxSetupSourceType; label: string }> = [ { value: 'dbt', label: 'dbt' }, - { value: 'metricflow', label: 'MetricFlow' }, { value: 'metabase', label: 'Metabase' }, + { value: 'notion', label: 'Notion' }, + { value: 'metricflow', label: 'MetricFlow' }, { value: 'looker', label: 'Looker' }, { value: 'lookml', label: 'LookML' }, - { value: 'notion', label: 'Notion' }, ]; const SOURCE_LABELS = Object.fromEntries(SOURCE_OPTIONS.map((option) => [option.value, option.label])) as Record< @@ -269,8 +269,8 @@ async function chooseSourceCredentialRef(input: { message: `How should KTX find your ${input.label}?`, options: [ ...(input.existingRef ? [{ value: 'keep', label: 'Keep existing credential' }] : []), - { value: 'env', label: `Use ${input.envName} from the environment` }, { value: 'paste', label: 'Paste a key and save it as a local secret file' }, + { value: 'env', label: `Use ${input.envName} from the environment` }, { value: 'back', label: 'Back' }, ], }); @@ -307,8 +307,8 @@ async function chooseGitAuthCredentialRef(input: { message: `${label} repo requires authentication.`, options: [ ...(input.existingRef ? [{ value: 'keep', label: 'Keep existing credential' }] : []), - { value: 'env', label: 'Use GITHUB_TOKEN from the environment' }, { value: 'paste', label: 'Paste a token and save it as a local secret file' }, + { value: 'env', label: 'Use GITHUB_TOKEN from the environment' }, { value: 'skip', label: 'Skip — try without authentication' }, { value: 'back', label: 'Back' }, ], @@ -1063,8 +1063,8 @@ async function promptForInteractiveSource( const selectedLocation = await prompts.select({ message: `${source} source location`, options: [ - { value: 'path', label: 'Local path' }, { value: 'git', label: 'Git URL' }, + { value: 'path', label: 'Local path' }, { value: 'back', label: 'Back' }, ], }); @@ -1343,8 +1343,8 @@ async function promptForInteractiveSource( const crawlMode = await prompts.select({ message: 'Which Notion pages should KTX ingest?', options: [ - { value: 'selected_roots', label: 'Specific pages and their subpages (choose them in a picker)' }, { value: 'all_accessible', label: 'All pages the integration can access' }, + { value: 'selected_roots', label: 'Specific pages and their subpages (choose them in a picker)' }, { value: 'back', label: 'Back' }, ], }); @@ -2064,7 +2064,7 @@ export async function runKtxSetupSourcesStep( const addMore = await prompts.select({ message: `${readyConnectionIds.length} context source${readyConnectionIds.length > 1 ? 's' : ''} configured (${readyConnectionIds.join(', ')}). Add another?`, options: [ - { value: 'done', label: 'Done — continue to context build' }, + { value: 'done', label: 'Done adding context sources' }, { value: 'edit', label: 'Edit an existing context source' }, { value: 'add', label: 'Add another context source' }, ], diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index a991367e..fc45abb3 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -80,6 +80,7 @@ export type KtxSetupArgs = agentScope?: KtxAgentScope; skipAgents?: boolean; inputMode: 'auto' | 'disabled'; + debug?: boolean; yes: boolean; cliVersion: string; llmBackend?: KtxSetupLlmBackend; @@ -735,6 +736,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup { projectDir: projectResult.projectDir, inputMode: args.inputMode, + ...(args.debug !== undefined ? { debug: args.debug } : {}), yes: args.yes, cliVersion: args.cliVersion, runtimeInstallPolicy: setupRuntimeInstallPolicy(args), diff --git a/packages/cli/src/sl.ts b/packages/cli/src/sl.ts index f3eeb33e..dcf5e460 100644 --- a/packages/cli/src/sl.ts +++ b/packages/cli/src/sl.ts @@ -26,7 +26,8 @@ import { type KtxManagedPythonInstallPolicy, } from './managed-python-command.js'; import { profileMark } from './startup-profile.js'; -import { emitTelemetryEvent } from './telemetry/index.js'; +import { emitTelemetryEvent, reportException } from './telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js'; import { scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:sl'); @@ -202,8 +203,9 @@ function ambiguousSourceMessage(sourceName: string, connectionIds: readonly stri export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: KtxSlDeps = {}): Promise { const startedAt = performance.now(); let queryForTelemetry: SemanticLayerQueryInput | undefined; + let project: KtxLocalProject | undefined; try { - const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); + project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); if (args.command === 'list') { const sources = await listLocalSlSources(project, { connectionId: args.connectionId }); await printSlSources({ @@ -320,7 +322,7 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx projectDir: args.projectDir, }); const queryExecutor = args.execute ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() : undefined; - const result = await compileLocalSlQuery(project as KtxLocalProject, { + const result = await compileLocalSlQuery(project, { connectionId: args.connectionId, query, compute, @@ -351,6 +353,20 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx const _exhaustive: never = args; throw new Error(`Unsupported sl command: ${JSON.stringify(_exhaustive)}`); } catch (error) { + await reportException({ + error, + context: { source: `sl ${args.command}`, handled: true, fatal: false }, + projectDir: args.projectDir, + io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project, + projectDir: args.projectDir, + connectionId: args.connectionId, + includeLlm: args.command === 'query', + includeEmbeddings: args.command === 'search' || args.command === 'query', + env: process.env, + }), + }); if (args.command === 'validate') { const errorClass = scrubErrorClass(error); await emitTelemetryEvent({ diff --git a/packages/cli/src/sql.ts b/packages/cli/src/sql.ts index bfae0608..d3eb6a81 100644 --- a/packages/cli/src/sql.ts +++ b/packages/cli/src/sql.ts @@ -7,7 +7,8 @@ import { createKtxCliScanConnector } from './local-scan-connectors.js'; import { createManagedDaemonSqlAnalysisPort } from './managed-python-http.js'; import { profileMark } from './startup-profile.js'; import { isDemoConnection } from './telemetry/demo-detect.js'; -import { emitTelemetryEvent } from './telemetry/index.js'; +import { emitTelemetryEvent, reportException } from './telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js'; import { scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:sql'); @@ -142,8 +143,9 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: const startedAt = performance.now(); let driver = 'unknown'; let demoConnection = false; + let project: KtxLocalProject | undefined; try { - const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); + project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); const connection = project.config.connections[args.connectionId]; if (!connection) { throw new Error(`Connection "${args.connectionId}" is not configured in ktx.yaml`); @@ -171,7 +173,7 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: const createScanConnector = deps.createScanConnector ?? createKtxCliScanConnector; let connector: KtxScanConnector | null = null; try { - connector = await createScanConnector(project as KtxLocalProject, args.connectionId); + connector = await createScanConnector(project, args.connectionId); if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) { throw new Error(`Connection "${args.connectionId}" does not support read-only SQL execution.`); } @@ -218,6 +220,20 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: ...(errorClass ? { errorClass } : {}), }, }); + await reportException({ + error, + context: { source: 'sql run', handled: true, fatal: false }, + projectDir: args.projectDir, + io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project, + projectDir: args.projectDir, + connectionId: args.connectionId, + includeLlm: false, + includeEmbeddings: false, + env: process.env, + }), + }); io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } diff --git a/packages/cli/src/telemetry/emitter.ts b/packages/cli/src/telemetry/emitter.ts index 3344e00b..12453262 100644 --- a/packages/cli/src/telemetry/emitter.ts +++ b/packages/cli/src/telemetry/emitter.ts @@ -16,6 +16,16 @@ type PostHogClient = { properties: Record; groups?: Record; }): void; + captureException( + error: unknown, + distinctId?: string, + additionalProperties?: Record, + ): void; + captureExceptionImmediate( + error: unknown, + distinctId?: string, + additionalProperties?: Record, + ): Promise; shutdown(): Promise | void; }; @@ -105,6 +115,57 @@ export async function trackTelemetryEvent(input: { } } +function writeDebugExceptionPayload(input: { + error: Error; + distinctId: string; + properties: Record; + stderr: TelemetrySink; +}): void { + input.stderr.write( + `[telemetry-exception] ${JSON.stringify({ + distinctId: input.distinctId, + message: input.error.message, + name: input.error.name, + properties: input.properties, + })}\n`, + ); +} + +export async function trackTelemetryException(input: { + error: Error; + distinctId: string; + properties: Record; + env?: TelemetryEmitterEnv; + stderr: TelemetrySink; + projectApiKey?: string; + host?: string; + immediate?: boolean; +}): Promise { + const env = input.env ?? process.env; + + if (debugEnabled(env)) { + writeDebugExceptionPayload(input); + return; + } + + const projectApiKey = telemetryProjectApiKey(input.projectApiKey); + const host = telemetryHost(env, input.host); + const client = await getPostHogClient(projectApiKey, host); + if (!client) { + return; + } + + try { + if (input.immediate) { + await client.captureExceptionImmediate(input.error, input.distinctId, input.properties); + return; + } + client.captureException(input.error, input.distinctId, input.properties); + } catch { + return; + } +} + export async function shutdownTelemetryEmitter(): Promise { const client = await clientPromise; if (!client) { diff --git a/packages/cli/src/telemetry/events.schema.json b/packages/cli/src/telemetry/events.schema.json index a75f92f1..c6c3d6f8 100644 --- a/packages/cli/src/telemetry/events.schema.json +++ b/packages/cli/src/telemetry/events.schema.json @@ -206,6 +206,17 @@ "errorClass", "durationMs" ] + }, + { + "name": "query_history_filter_completed", + "description": "Emitted after the setup query-history service-account filter picker runs.", + "fields": [ + "dialect", + "consideredRoleCount", + "excludedRoleCount", + "parseFailedCount", + "outcome" + ] } ], "$defs": { @@ -1434,6 +1445,77 @@ "durationMs" ], "additionalProperties": false + }, + "query_history_filter_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "dialect": { + "type": "string" + }, + "consideredRoleCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "excludedRoleCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "parseFailedCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "dialect", + "consideredRoleCount", + "excludedRoleCount", + "parseFailedCount", + "outcome" + ], + "additionalProperties": false } } } diff --git a/packages/cli/src/telemetry/events.ts b/packages/cli/src/telemetry/events.ts index c4fc2e6f..cf650492 100644 --- a/packages/cli/src/telemetry/events.ts +++ b/packages/cli/src/telemetry/events.ts @@ -206,6 +206,16 @@ const sqlGenCompletedSchema = telemetryCommonEnvelopeSchema }) .strict(); +const queryHistoryFilterCompletedSchema = telemetryCommonEnvelopeSchema + .extend({ + dialect: z.string(), + consideredRoleCount: z.number().int().nonnegative(), + excludedRoleCount: z.number().int().nonnegative(), + parseFailedCount: z.number().int().nonnegative(), + outcome: outcomeSchema, + }) + .strict(); + /** @internal */ export const telemetryEventSchemas = { install_first_run: installFirstRunSchema, @@ -225,6 +235,7 @@ export const telemetryEventSchemas = { daemon_stopped: daemonStoppedSchema, sl_plan_completed: slPlanCompletedSchema, sql_gen_completed: sqlGenCompletedSchema, + query_history_filter_completed: queryHistoryFilterCompletedSchema, } as const; /** @internal */ @@ -360,6 +371,11 @@ export const telemetryEventCatalog = [ description: 'Emitted after daemon SQL generation completes.', fields: ['outcome', 'dialect', 'errorClass', 'durationMs'], }, + { + name: 'query_history_filter_completed', + description: 'Emitted after the setup query-history service-account filter picker runs.', + fields: ['dialect', 'consideredRoleCount', 'excludedRoleCount', 'parseFailedCount', 'outcome'], + }, ] as const; export type TelemetryEventName = keyof typeof telemetryEventSchemas; diff --git a/packages/cli/src/telemetry/exception.ts b/packages/cli/src/telemetry/exception.ts new file mode 100644 index 00000000..0ce81244 --- /dev/null +++ b/packages/cli/src/telemetry/exception.ts @@ -0,0 +1,201 @@ +import { inspect } from 'node:util'; + +import { getKtxCliPackageInfo, type KtxCliIo, type KtxCliPackageInfo } from '../cli-runtime.js'; +import { buildCommonEnvelope } from './events.js'; +import { trackTelemetryException } from './emitter.js'; +import { computeTelemetryProjectId, loadTelemetryIdentity } from './identity.js'; + +export interface ExceptionContext { + source: string; + handled: boolean; + fatal: boolean; + extra?: Record; +} + +type AnyObject = object; + +const reportedObjects = new WeakSet(); +const recentHandledPrimitives: string[] = []; +const RECENT_PRIMITIVE_LIMIT = 128; + +function primitiveKey(value: unknown): string { + return `${typeof value}:${String(value)}`; +} + +function rememberHandledPrimitive(value: unknown): void { + recentHandledPrimitives.push(primitiveKey(value)); + if (recentHandledPrimitives.length > RECENT_PRIMITIVE_LIMIT) { + recentHandledPrimitives.splice(0, recentHandledPrimitives.length - RECENT_PRIMITIVE_LIMIT); + } +} + +function consumeHandledPrimitive(value: unknown): boolean { + const key = primitiveKey(value); + const index = recentHandledPrimitives.indexOf(key); + if (index < 0) { + return false; + } + recentHandledPrimitives.splice(index, 1); + return true; +} + +function shouldSkipAsAlreadyReported(error: unknown, handled: boolean): boolean { + if ((typeof error === 'object' || typeof error === 'function') && error !== null) { + if (reportedObjects.has(error)) { + return true; + } + reportedObjects.add(error); + return false; + } + + if (handled) { + rememberHandledPrimitive(error); + return false; + } + + return consumeHandledPrimitive(error); +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function redactStaticPatterns(value: string): string { + return value + .replace(/([a-z][a-z0-9+.-]*:\/\/[^:\s/@]+:)([^@\s/]+)(@)/gi, '$1[redacted]$3') + .replace(/\b(password|pwd)=([^;&\s]+)/gi, '$1=[redacted]') + .replace(/\bAuthorization\s*:\s*[^\r\n,;]+/gi, 'Authorization: [redacted]') + .replace(/\bBearer\s+[A-Za-z0-9._~+/=-]+/gi, 'Bearer [redacted]') + .replace(/\b(api[_-]?key)\s*[:=]\s*([^\s,;]+)/gi, '$1=[redacted]') + .replace(/\b(KTX_[A-Z0-9_]*|[A-Z0-9_]*(?:TOKEN|SECRET))\s*[:=]\s*([^\s,;]+)/g, '$1=[redacted]') + .replace(/([?&](?:X-Amz-Signature|X-Goog-Signature|sig)=)[^&\s]+/gi, '$1[redacted]'); +} + +function redactText(value: string, secrets: ReadonlyArray): string { + let redacted = value; + for (const secret of secrets) { + if (secret) { + redacted = redacted.replace(new RegExp(escapeRegExp(secret), 'g'), '[redacted]'); + } + } + return redactStaticPatterns(redacted); +} + +const FORBIDDEN_EXTRA_PROPERTY_KEYS = new Set([ + 'argv', + 'args', + 'env', + 'environment', + 'sql', + 'query', + 'prompt', + 'mcparguments', + 'mcpargs', + 'tablename', + 'schemaname', + 'columnname', + 'databaseurl', + 'connectionstring', + 'url', + 'password', + 'token', + 'apikey', + 'api_key', + 'authorization', +]); + +function safeExtraProperties( + extra: Record | undefined, +): Record { + const safe: Record = {}; + for (const [key, value] of Object.entries(extra ?? {})) { + if (!FORBIDDEN_EXTRA_PROPERTY_KEYS.has(key.replace(/[^a-z0-9_]/gi, '').toLowerCase())) { + safe[key] = value; + } + } + return safe; +} + +function toMessage(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + if (typeof error === 'string') { + return error; + } + return inspect(error, { depth: 4, breakLength: 120 }); +} + +function sanitizedError(error: unknown, secrets: ReadonlyArray): Error { + if (error instanceof Error) { + const cause = 'cause' in error ? (error as Error & { cause?: unknown }).cause : undefined; + const clone = new Error(redactText(error.message, secrets), { + ...(cause !== undefined ? { cause: sanitizedError(cause, secrets) } : {}), + }); + clone.name = error.name; + if (error.stack) { + clone.stack = redactText(error.stack, secrets); + } + return clone; + } + return new Error(redactText(toMessage(error), secrets)); +} + +export async function reportException(input: { + error: unknown; + context: ExceptionContext; + io: KtxCliIo; + packageInfo?: KtxCliPackageInfo; + projectDir?: string; + immediate?: boolean; + redactionSecrets?: ReadonlyArray; +}): Promise { + try { + if (shouldSkipAsAlreadyReported(input.error, input.context.handled)) { + return; + } + + const debug = process.env.KTX_TELEMETRY_DEBUG === '1'; + const identity = await loadTelemetryIdentity({ + stderr: input.io.stderr, + env: process.env, + }); + + if ((!identity.enabled || !identity.installId) && !debug) { + return; + } + + const packageInfo = input.packageInfo ?? getKtxCliPackageInfo(); + const installId = identity.installId ?? 'debug'; + const projectId = input.projectDir ? computeTelemetryProjectId(installId, input.projectDir) : undefined; + const safeError = sanitizedError(input.error, input.redactionSecrets ?? []); + const properties: Record = { + ...buildCommonEnvelope({ + cliVersion: packageInfo.version, + isCi: Boolean(process.env.CI), + }), + source: input.context.source, + handled: input.context.handled, + fatal: input.context.fatal, + ...(projectId ? { projectId } : {}), + ...safeExtraProperties(input.context.extra), + }; + + delete properties.$groups; + await trackTelemetryException({ + error: safeError, + distinctId: installId, + properties, + env: process.env, + stderr: input.io.stderr, + immediate: input.immediate, + }); + } catch { + return; + } +} + +/** @internal */ +export function __resetTelemetryExceptionStateForTests(): void { + recentHandledPrimitives.length = 0; +} diff --git a/packages/cli/src/telemetry/index.ts b/packages/cli/src/telemetry/index.ts index b02e0224..e3716060 100644 --- a/packages/cli/src/telemetry/index.ts +++ b/packages/cli/src/telemetry/index.ts @@ -7,6 +7,7 @@ import { type CompletedCommandSpan, } from './command-hook.js'; import { shutdownTelemetryEmitter, trackTelemetryEvent } from './emitter.js'; +import { reportException, type ExceptionContext } from './exception.js'; import { buildCommonEnvelope, buildTelemetryEvent, @@ -17,8 +18,8 @@ import { import { computeTelemetryProjectId, loadTelemetryIdentity } from './identity.js'; import { buildProjectStackSnapshotFields } from './project-snapshot.js'; -export { beginCommandSpan, completeCommandSpan, shutdownTelemetryEmitter }; -export type { CommandOutcome, CompletedCommandSpan }; +export { beginCommandSpan, completeCommandSpan, reportException, shutdownTelemetryEmitter }; +export type { CommandOutcome, CompletedCommandSpan, ExceptionContext }; export async function showTelemetryNoticeIfNeeded(io: KtxCliIo, packageInfo: KtxCliPackageInfo): Promise { const identity = await loadTelemetryIdentity({ diff --git a/packages/cli/src/telemetry/redaction-secrets.ts b/packages/cli/src/telemetry/redaction-secrets.ts new file mode 100644 index 00000000..2bf7a863 --- /dev/null +++ b/packages/cli/src/telemetry/redaction-secrets.ts @@ -0,0 +1,117 @@ +import { resolveKtxConfigReference } from '../context/core/config-reference.js'; +import { loadKtxProject, type KtxLocalProject } from '../context/project/project.js'; + +const SENSITIVE_KEY = + /(password|secret|token|api[_-]?key|auth[_-]?token|auth_token_ref|private[_-]?key|passphrase|credential|authorization|url)$/i; + +type TelemetryRedactionProject = Pick; + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function addSecret(values: string[], value: string | undefined): void { + const trimmed = value?.trim(); + if (trimmed && !values.includes(trimmed)) { + values.push(trimmed); + } +} + +function tryResolve(value: string, env: NodeJS.ProcessEnv): string | undefined { + try { + return resolveKtxConfigReference(value, env); + } catch { + return undefined; + } +} + +function addUrlCredentials(values: string[], value: string): void { + try { + const parsed = new URL(value); + addSecret(values, parsed.password ? decodeURIComponent(parsed.password) : undefined); + addSecret(values, parsed.username ? decodeURIComponent(parsed.username) : undefined); + } catch { + return; + } +} + +function collectFromRecord(input: unknown, env: NodeJS.ProcessEnv, values: string[]): void { + if (Array.isArray(input)) { + for (const item of input) { + collectFromRecord(item, env, values); + } + return; + } + + if (!isRecord(input)) { + return; + } + + for (const [key, raw] of Object.entries(input)) { + if (isRecord(raw) || Array.isArray(raw)) { + collectFromRecord(raw, env, values); + continue; + } + if (typeof raw !== 'string' || !SENSITIVE_KEY.test(key)) { + continue; + } + const resolved = tryResolve(raw, env); + addSecret(values, resolved); + if (resolved) { + addUrlCredentials(values, resolved); + } + } +} + +function collectLlmSecrets(project: TelemetryRedactionProject, env: NodeJS.ProcessEnv, values: string[]): void { + collectFromRecord(project.config.llm.provider, env, values); +} + +function collectEmbeddingSecrets(project: TelemetryRedactionProject, env: NodeJS.ProcessEnv, values: string[]): void { + collectFromRecord(project.config.ingest.embeddings, env, values); + collectFromRecord(project.config.scan.enrichment.embeddings, env, values); +} + +function collectConnectionSecrets( + project: TelemetryRedactionProject, + connectionId: string | undefined, + env: NodeJS.ProcessEnv, + values: string[], +): void { + if (!connectionId) { + return; + } + collectFromRecord(project.config.connections[connectionId], env, values); +} + +export async function collectTelemetryRedactionSecrets(input: { + project?: TelemetryRedactionProject; + projectDir?: string; + connectionId?: string; + includeLlm?: boolean; + includeEmbeddings?: boolean; + env?: NodeJS.ProcessEnv; +}): Promise { + const env = input.env ?? process.env; + let project = input.project; + if (!project && input.projectDir) { + try { + project = await loadKtxProject({ projectDir: input.projectDir }); + } catch { + project = undefined; + } + } + if (!project) { + return []; + } + + const values: string[] = []; + if (input.includeLlm) { + collectLlmSecrets(project, env, values); + } + if (input.includeEmbeddings) { + collectEmbeddingSecrets(project, env, values); + } + collectConnectionSecrets(project, input.connectionId, env, values); + return values; +} diff --git a/packages/cli/src/update-check/cache.ts b/packages/cli/src/update-check/cache.ts new file mode 100644 index 00000000..19ebf07a --- /dev/null +++ b/packages/cli/src/update-check/cache.ts @@ -0,0 +1,45 @@ +import { renameSync, writeFileSync } from 'node:fs'; +import { mkdir, readFile } from 'node:fs/promises'; +import { homedir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { z } from 'zod'; + +const updateCheckCacheSchema = z + .object({ + checkedAt: z.string(), + channel: z.enum(['latest', 'next']), + installedVersion: z.string(), + latestForChannel: z.string(), + lastNoticeAt: z.string().optional(), + }) + .strict(); + +export type UpdateCheckCache = z.infer; + +/** @internal */ +export function updateCheckCachePath(homeDir = homedir()): string { + return join(homeDir, '.ktx', 'update-check.json'); +} + +export async function readUpdateCheckCache(options: { homeDir?: string } = {}): Promise { + try { + return updateCheckCacheSchema.parse(JSON.parse(await readFile(updateCheckCachePath(options.homeDir), 'utf-8'))); + } catch { + return null; + } +} + +export async function writeUpdateCheckCache( + value: UpdateCheckCache, + options: { homeDir?: string } = {}, +): Promise { + try { + const path = updateCheckCachePath(options.homeDir); + await mkdir(dirname(path), { recursive: true }); + const tempPath = `${path}.${process.pid}.${Date.now()}.tmp`; + writeFileSync(tempPath, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); + renameSync(tempPath, path); + } catch { + return; + } +} diff --git a/packages/cli/src/update-check/channel.ts b/packages/cli/src/update-check/channel.ts new file mode 100644 index 00000000..d8251021 --- /dev/null +++ b/packages/cli/src/update-check/channel.ts @@ -0,0 +1,43 @@ +import semver from 'semver'; + +export type UpdateChannel = 'latest' | 'next'; + +export type UpdateDecision = + | { status: 'skip' } + | { status: 'upToDate'; channel: UpdateChannel; target: string } + | { status: 'available'; channel: UpdateChannel; target: string }; + +/** @internal */ +export function inferUpdateChannel(installed: string): UpdateChannel | null { + const parsed = semver.parse(installed); + if (!parsed || installed === '0.0.0') { + return null; + } + + const [prereleaseId] = parsed.prerelease; + if (prereleaseId === undefined) { + return 'latest'; + } + if (prereleaseId === 'rc') { + return 'next'; + } + return null; +} + +export function decideUpdate(installed: string, distTags: Record): UpdateDecision { + const channel = inferUpdateChannel(installed); + if (!channel || !semver.valid(installed)) { + return { status: 'skip' }; + } + + const target = distTags[channel]; + if (!target || !semver.valid(target)) { + return { status: 'skip' }; + } + + if (semver.gt(target, installed)) { + return { status: 'available', channel, target }; + } + + return { status: 'upToDate', channel, target }; +} diff --git a/packages/cli/src/update-check/registry.ts b/packages/cli/src/update-check/registry.ts new file mode 100644 index 00000000..f0934933 --- /dev/null +++ b/packages/cli/src/update-check/registry.ts @@ -0,0 +1,52 @@ +import { request as httpsRequest } from 'node:https'; +import { URL } from 'node:url'; +import { z } from 'zod'; + +const DIST_TAGS_URL = new URL('https://registry.npmjs.org/-/package/@kaelio/ktx/dist-tags'); +const distTagsSchema = z.record(z.string(), z.string()); + +function parseDistTags(raw: string): Record { + return distTagsSchema.parse(JSON.parse(raw)); +} + +export function fetchDistTags(): Promise> { + return new Promise((resolve, reject) => { + const request = httpsRequest( + DIST_TAGS_URL, + { + method: 'GET', + headers: { + accept: 'application/json', + }, + }, + (response) => { + const chunks: Buffer[] = []; + response.on('data', (chunk: Buffer | string) => { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + }); + response.on('end', () => { + const text = Buffer.concat(chunks).toString('utf8'); + const statusCode = response.statusCode ?? 0; + if (statusCode < 200 || statusCode >= 300) { + reject(new Error(`npm dist-tags request failed with ${statusCode}: ${text}`)); + return; + } + try { + resolve(parseDistTags(text)); + } catch (error) { + reject(error); + } + }); + }, + ); + + request.on('socket', (socket) => { + socket.unref(); + }); + request.on('error', reject); + request.setTimeout(5000, () => { + request.destroy(new Error('npm dist-tags request timed out')); + }); + request.end(); + }); +} diff --git a/packages/cli/src/update-check/update-check.ts b/packages/cli/src/update-check/update-check.ts new file mode 100644 index 00000000..611a43a3 --- /dev/null +++ b/packages/cli/src/update-check/update-check.ts @@ -0,0 +1,187 @@ +import type { KtxCliIo } from '../cli-runtime.js'; +import { cyan, dim, type CliStyleEnv } from '../clack.js'; +import { resolveOutputMode } from '../io/mode.js'; +import { type UpdateCheckCache, readUpdateCheckCache, writeUpdateCheckCache } from './cache.js'; +import { decideUpdate, inferUpdateChannel, type UpdateChannel } from './channel.js'; +import { fetchDistTags as defaultFetchDistTags } from './registry.js'; + +const DAY_MS = 24 * 60 * 60 * 1000; + +/** @internal */ +export interface UpdateCheckEnv extends NodeJS.ProcessEnv, CliStyleEnv { + CI?: string; + DO_NOT_TRACK?: string; + KTX_NO_UPDATE_CHECK?: string; + KTX_OUTPUT?: string; + NO_UPDATE_NOTIFIER?: string; +} + +/** @internal */ +export interface UpdateCheckCommandOptions { + format?: unknown; + json?: unknown; + output?: unknown; +} + +export interface PrepareUpdateCheckNoticeOptions { + commandOptions?: UpdateCheckCommandOptions; + env?: UpdateCheckEnv; + fetchDistTags?: () => Promise>; + homeDir?: string; + installedVersion: string; + io: KtxCliIo; + now?: () => Date; +} + +export interface PreparedUpdateCheckNotice { + notice: string | null; +} + +function truthy(value: string | undefined): boolean { + return value !== undefined && value !== '' && value !== '0' && value !== 'false'; +} + +function commandRequestsJson(options: UpdateCheckCommandOptions | undefined): boolean { + return options?.json === true || options?.output === 'json' || options?.format === 'json'; +} + +/** @internal */ +export function shouldSuppressUpdateCheck(args: { + commandOptions?: UpdateCheckCommandOptions; + env?: UpdateCheckEnv; + io: KtxCliIo; +}): boolean { + const env = args.env ?? process.env; + if (truthy(env.KTX_NO_UPDATE_CHECK) || truthy(env.NO_UPDATE_NOTIFIER) || truthy(env.DO_NOT_TRACK)) { + return true; + } + + if (commandRequestsJson(args.commandOptions) || truthy(env.CI) || args.io.stdout.isTTY !== true) { + return true; + } + + try { + const mode = resolveOutputMode({ + json: false, + io: args.io, + env, + }); + return mode !== 'pretty'; + } catch { + return true; + } +} + +/** @internal */ +export function renderUpdateNotice(args: { + channel: UpdateChannel; + env?: CliStyleEnv; + installedVersion: string; + targetVersion: string; +}): string { + const command = args.channel === 'next' ? 'npm i -g @kaelio/ktx@next' : 'npm i -g @kaelio/ktx'; + return `${cyan('↑', args.env)} Update available: ktx ${args.installedVersion} → ${args.targetVersion}\n ${dim(command, args.env)}\n`; +} + +function timestampMs(value: string | undefined): number | null { + if (!value) { + return null; + } + const parsed = Date.parse(value); + return Number.isNaN(parsed) ? null : parsed; +} + +function elapsedAtLeast(value: string | undefined, now: Date, intervalMs: number): boolean { + const previous = timestampMs(value); + if (previous === null) { + return true; + } + return now.getTime() - previous >= intervalMs; +} + +function shouldRefreshCache(cache: UpdateCheckCache | null, installedVersion: string, now: Date): boolean { + if (!cache || cache.installedVersion !== installedVersion) { + return true; + } + return elapsedAtLeast(cache.checkedAt, now, DAY_MS); +} + +async function refreshUpdateCache(args: { + cache: UpdateCheckCache | null; + fetchDistTags: () => Promise>; + homeDir?: string; + installedVersion: string; + now: Date; +}): Promise { + const distTags = await args.fetchDistTags(); + const decision = decideUpdate(args.installedVersion, distTags); + if (decision.status === 'skip') { + return; + } + + await writeUpdateCheckCache( + { + checkedAt: args.now.toISOString(), + channel: decision.channel, + installedVersion: args.installedVersion, + latestForChannel: decision.target, + ...(args.cache?.installedVersion === args.installedVersion && args.cache.channel === decision.channel + ? { lastNoticeAt: args.cache.lastNoticeAt } + : {}), + }, + { homeDir: args.homeDir }, + ); +} + +export async function prepareUpdateCheckNotice( + options: PrepareUpdateCheckNoticeOptions, +): Promise { + const env = options.env ?? process.env; + const now = (options.now ?? (() => new Date()))(); + const fetchDistTags = options.fetchDistTags ?? defaultFetchDistTags; + + if ( + shouldSuppressUpdateCheck({ + commandOptions: options.commandOptions, + env, + io: options.io, + }) + ) { + return { notice: null }; + } + + if (!inferUpdateChannel(options.installedVersion)) { + return { notice: null }; + } + + let cache = await readUpdateCheckCache({ homeDir: options.homeDir }); + let notice: string | null = null; + + if (cache?.installedVersion === options.installedVersion) { + const decision = decideUpdate(options.installedVersion, { + [cache.channel]: cache.latestForChannel, + }); + if (decision.status === 'available' && elapsedAtLeast(cache.lastNoticeAt, now, DAY_MS)) { + notice = renderUpdateNotice({ + channel: decision.channel, + env, + installedVersion: options.installedVersion, + targetVersion: decision.target, + }); + cache = { ...cache, lastNoticeAt: now.toISOString() }; + await writeUpdateCheckCache(cache, { homeDir: options.homeDir }); + } + } + + if (shouldRefreshCache(cache, options.installedVersion, now)) { + void refreshUpdateCache({ + cache, + fetchDistTags, + homeDir: options.homeDir, + installedVersion: options.installedVersion, + now, + }).catch(() => {}); + } + + return { notice }; +} diff --git a/packages/cli/test/cli-program-telemetry.test.ts b/packages/cli/test/cli-program-telemetry.test.ts index 4e7130b3..30e2bd2b 100644 --- a/packages/cli/test/cli-program-telemetry.test.ts +++ b/packages/cli/test/cli-program-telemetry.test.ts @@ -7,6 +7,12 @@ import { runCommanderKtxCli } from '../src/cli-program.js'; import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from '../src/cli-runtime.js'; import { TELEMETRY_NOTICE } from '../src/telemetry/identity.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + function makeIo(stdoutIsTTY = true): { io: KtxCliIo; stdout: () => string; stderr: () => string } { let stdout = ''; let stderr = ''; @@ -43,6 +49,7 @@ describe('runCommanderKtxCli telemetry', () => { vi.stubEnv('CI', ''); vi.stubEnv('KTX_TELEMETRY_DISABLED', ''); vi.stubEnv('DO_NOT_TRACK', ''); + reportExceptionMock.mockClear(); }); afterEach(async () => { @@ -131,4 +138,30 @@ describe('runCommanderKtxCli telemetry', () => { await expect(runCommanderKtxCli(['unknown'], unknownIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(1); expect(unknownIo.stderr()).not.toContain('[telemetry]'); }); + + it('reports genuine top-level command catches as handled exceptions', async () => { + const io = makeIo(true); + const deps: KtxCliDeps = { + doctor: async () => { + throw new Error('status failed'); + }, + }; + + await expect( + runCommanderKtxCli( + ['--project-dir', tempDir, 'status', '--json'], + io.io, + deps, + info, + { runInit: async () => 0 }, + ), + ).resolves.toBe(1); + + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'ktx status', handled: true, fatal: false }), + projectDir: tempDir, + }), + ); + }); }); diff --git a/packages/cli/test/connection.test.ts b/packages/cli/test/connection.test.ts index 67e55af8..22c8bbe9 100644 --- a/packages/cli/test/connection.test.ts +++ b/packages/cli/test/connection.test.ts @@ -10,6 +10,12 @@ import type { KtxConnectionDriver, KtxScanConnector } from '../src/context/scan/ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxConnection } from '../src/connection.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + function stripAnsi(s: string): string { return s.replace(/\[[0-9;]*m/g, ''); } @@ -38,7 +44,7 @@ function makeIo() { function nativeConnector( driver: KtxConnectionDriver, - testResult: { success: true } | { success: false; error: string } = { success: true }, + testResult: { success: true } | { success: false; error: string; cause?: unknown } = { success: true }, ) { const testConnection = vi.fn(async () => testResult); const cleanup = vi.fn(async () => undefined); @@ -72,6 +78,7 @@ describe('runKtxConnection', () => { beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-connection-')); + reportExceptionMock.mockClear(); }); afterEach(async () => { @@ -165,12 +172,13 @@ describe('runKtxConnection', () => { it('records the raw errorDetail in connection_test telemetry when a native test fails', async () => { vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); vi.stubEnv('CI', ''); + vi.stubEnv('DATABASE_URL', 'postgres://svc:db-url-password@db.example.test/analytics'); // pragma: allowlist secret const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir }); await writeConnections(projectDir, { - warehouse: { driver: 'sqlite' }, + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' }, }); - const { connector } = nativeConnector('sqlite', { success: false, error: 'database file is unreadable' }); + const { connector } = nativeConnector('postgres', { success: false, error: 'database file is unreadable' }); const io = makeIo(); const code = await runKtxConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, { @@ -181,6 +189,44 @@ describe('runKtxConnection', () => { expect(io.stderr()).toContain('"event":"connection_test"'); expect(io.stderr()).toContain('"outcome":"error"'); expect(io.stderr()).toContain('"errorDetail":"database file is unreadable"'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'connection test', handled: true, fatal: false }), + projectDir, + redactionSecrets: expect.arrayContaining([ + 'postgres://svc:db-url-password@db.example.test/analytics', // pragma: allowlist secret + 'db-url-password', + ]), + }), + ); + }); + + it('preserves the driver error class and code in connection_test telemetry', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir }); + await writeConnections(projectDir, { + warehouse: { driver: 'sqlserver', host: 'db.example.test', database: 'analytics', username: 'svc_ro' }, + }); + class ConnectionError extends Error { + readonly code = 'ELOGIN'; + } + const driverError = new ConnectionError("Login failed for user 'svc_ro'."); + const { connector } = nativeConnector('sqlserver', { + success: false, + error: driverError.message, + cause: driverError, + }); + const io = makeIo(); + + const code = await runKtxConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, { + createScanConnector: vi.fn(async () => connector), + }); + + expect(code).toBe(1); + expect(io.stderr()).toContain('"errorClass":"ConnectionError"'); + expect(io.stderr()).toContain('"errorDetail":"ELOGIN: Login failed for user \'svc_ro\'."'); }); it('reports the connector error and still cleans up when native testConnection fails', async () => { diff --git a/packages/cli/test/context/core/abort.test.ts b/packages/cli/test/context/core/abort.test.ts new file mode 100644 index 00000000..aed46c1e --- /dev/null +++ b/packages/cli/test/context/core/abort.test.ts @@ -0,0 +1,31 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createAbortError, isAbortError, linkAbortSignal, throwIfAborted } from '../../../src/context/core/abort.js'; + +describe('abort helpers', () => { + it('recognizes DOMException abort errors and common abort-shaped errors', () => { + expect(isAbortError(createAbortError())).toBe(true); + expect(isAbortError(Object.assign(new Error('cancelled'), { name: 'AbortError' }))).toBe(true); + expect(isAbortError(Object.assign(new Error('operation aborted'), { code: 'ABORT_ERR' }))).toBe(true); + expect(isAbortError(new Error('ordinary failure'))).toBe(false); + }); + + it('throws when the provided signal is already aborted', () => { + const controller = new AbortController(); + controller.abort(); + + expect(() => throwIfAborted(controller.signal)).toThrow(/Aborted/); + }); + + it('links a child controller to a parent signal and removes the listener on dispose', () => { + const parent = new AbortController(); + const child = linkAbortSignal(parent.signal); + + expect(child.controller.signal.aborted).toBe(false); + parent.abort(); + expect(child.controller.signal.aborted).toBe(true); + + const removeSpy = vi.spyOn(parent.signal, 'removeEventListener'); + child.dispose(); + expect(removeSpy).toHaveBeenCalledWith('abort', expect.any(Function)); + }); +}); diff --git a/packages/cli/test/context/ingest/adapters/historic-sql/query-history-filter-picker.test.ts b/packages/cli/test/context/ingest/adapters/historic-sql/query-history-filter-picker.test.ts index 4c295092..5c9e2e60 100644 --- a/packages/cli/test/context/ingest/adapters/historic-sql/query-history-filter-picker.test.ts +++ b/packages/cli/test/context/ingest/adapters/historic-sql/query-history-filter-picker.test.ts @@ -64,6 +64,27 @@ function sqlAnalysis(tablesById: Record>, + errorIds: string[], +): SqlAnalysisPort { + const errors = new Set(errorIds); + return { + analyzeForFingerprint: vi.fn(), + analyzeBatch: vi.fn(async (items: SqlAnalysisBatchItem[]): Promise> => + new Map( + items.map((item) => [ + item.id, + errors.has(item.id) + ? { tablesTouched: [], columnsByClause: {}, error: 'parse boom' } + : { tablesTouched: tablesById[item.id] ?? [], columnsByClause: {} }, + ]), + ), + ), + validateReadOnly: vi.fn(async () => ({ ok: true })), + }; +} + function llm(decisions: Array<{ role: string; exclude: boolean; reason: string }>): KtxLlmRuntimePort { const generateObject = vi.fn(async () => ({ roles: decisions })) as KtxLlmRuntimePort['generateObject']; return { @@ -198,6 +219,7 @@ describe('query-history filter picker', () => { consideredRoleCount: 0, skipped: { reason: 'no-llm' }, warnings: [], + parseFailedTemplateIds: [], }); }); @@ -227,6 +249,32 @@ describe('query-history filter picker', () => { expect(proposal.skipped).toEqual({ reason: 'no-in-scope-history' }); }); + it('records parse failures as template ids, not warnings', async () => { + const proposal = await proposeQueryHistoryServiceAccountFilters({ + connectionId: 'warehouse', + dialect: 'postgres', + queryClient: {}, + reader: reader( + aggregate({ + templateId: 'good', + canonicalSql: 'select * from analytics.orders', + topUsers: [{ user: 'analyst', executions: 30 }], + }), + aggregate({ + templateId: 'broken', + canonicalSql: 'select * from where', + topUsers: [{ user: 'analyst', executions: 5 }], + }), + ), + sqlAnalysis: sqlAnalysisWithErrors({ good: [{ catalog: null, db: 'analytics', name: 'orders' }] }, ['broken']), + llmRuntime: llm([]), + pullConfig: { dialect: 'postgres', enabledSchemas: ['analytics'], filters: { dropTrivialProbes: true } }, + }); + + expect(proposal.parseFailedTemplateIds).toEqual(['broken']); + expect(proposal.warnings).toEqual([]); + }); + it('keeps clean in-scope history when the model excludes nothing', async () => { const proposal = await proposeQueryHistoryServiceAccountFilters({ connectionId: 'warehouse', diff --git a/packages/cli/test/context/ingest/ingest-bundle.runner.test.ts b/packages/cli/test/context/ingest/ingest-bundle.runner.test.ts index 447cd01e..b491acf2 100644 --- a/packages/cli/test/context/ingest/ingest-bundle.runner.test.ts +++ b/packages/cli/test/context/ingest/ingest-bundle.runner.test.ts @@ -426,6 +426,177 @@ describe('IngestBundleRunner — Stages 1 → 7', () => { ); }); + it('uses the rate-limit governor for work-unit start slots', async () => { + const deps = makeDeps(); + const acquireWorkSlot = vi.fn(async () => vi.fn()); + const runner = buildRunner(deps, { + settings: { + probeRowCount: 1, + memoryIngestionModel: 'test-model', + workUnitMaxConcurrency: 2, + rateLimitGovernor: { acquireWorkSlot, subscribe: vi.fn(() => vi.fn()) } as never, + }, + }); + deps.adapter.chunk.mockResolvedValue({ + workUnits: [ + { unitKey: 'u1', rawFiles: ['a.yml'], peerFileIndex: [], dependencyPaths: [] }, + { unitKey: 'u2', rawFiles: ['b.yml'], peerFileIndex: [], dependencyPaths: [] }, + ], + }); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([ + ['a.yml', 'h1'], + ['b.yml', 'h2'], + ]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run({ + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }); + + expect(acquireWorkSlot).toHaveBeenCalledTimes(2); + }); + + it('passes the job abort signal into rate-limit work-unit slots', async () => { + const deps = makeDeps(); + const controller = new AbortController(); + const acquireWorkSlot = vi.fn(async () => vi.fn()); + const runner = buildRunner(deps, { + settings: { + probeRowCount: 1, + memoryIngestionModel: 'test-model', + workUnitMaxConcurrency: 1, + rateLimitGovernor: { acquireWorkSlot, subscribe: vi.fn(() => vi.fn()) } as never, + }, + }); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await runner.run( + { + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }, + { jobId: 'j1', abortSignal: controller.signal, startPhase: () => new TestJobContext('j1', null, async () => undefined, async () => undefined) } as any, + ); + + expect(acquireWorkSlot).toHaveBeenCalledWith(controller.signal); + }); + + it('does not convert aborted work-unit agent loops into failed work units', async () => { + const deps = makeDeps(); + const controller = new AbortController(); + deps.agentRunner.runLoop.mockImplementation(async () => { + controller.abort(); + throw new DOMException('Aborted', 'AbortError'); + }); + const runner = buildRunner(deps, { + settings: { + probeRowCount: 1, + memoryIngestionModel: 'test-model', + workUnitMaxConcurrency: 1, + }, + }); + (runner as any).stageRawFilesStage1 = vi.fn().mockResolvedValue({ + currentHashes: new Map([['a.yml', 'h1']]), + rawDirInWorktree: 'raw-sources/c1/fake/s', + }); + (runner as any).resolveStagedDir = vi.fn().mockResolvedValue('/tmp/stage/upload-x'); + + await expect( + runner.run( + { + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }, + { jobId: 'j1', abortSignal: controller.signal, startPhase: () => new TestJobContext('j1', null, async () => undefined, async () => undefined) } as any, + ), + ).rejects.toThrow(/Aborted/); + + expect(deps.runsRepo.markFailed).toHaveBeenCalledWith('run-1'); + expect(deps.reportsRepo.create).not.toHaveBeenCalledWith( + expect.objectContaining({ + body: expect.objectContaining({ + failedWorkUnits: expect.arrayContaining(['u1']), + }), + }), + ); + }); + + it('emits trace and memory-flow status for rate-limit waits', async () => { + const deps = makeDeps(); + let subscriber: ((state: any) => void) | undefined; + const memoryFlow = createMemoryFlowLiveBuffer(bundleReplayInput()); + const runner = buildRunner(deps, { + settings: { + probeRowCount: 1, + memoryIngestionModel: 'test-model', + rateLimitGovernor: { + acquireWorkSlot: vi.fn(async () => vi.fn()), + subscribe: vi.fn((cb: (state: any) => void) => { + subscriber = cb; + return vi.fn(); + }), + } as never, + }, + }); + (runner as any).runInner = async (_job: any, ctx: any) => { + subscriber?.({ + kind: 'wait_tick', + provider: 'claude-subscription', + rateLimitType: 'five_hour', + resumeAtMs: 2_000, + remainingMs: 1_000, + }); + ctx.memoryFlow.emit({ type: 'report_created', runId: 'run-1' }); + return { + runId: 'run-1', + syncId: 'sync-1', + diffSummary: { added: 0, modified: 0, deleted: 0, unchanged: 0 }, + workUnitCount: 0, + failedWorkUnits: [], + artifactsWritten: 0, + commitSha: null, + }; + }; + + await runner.run( + { + jobId: 'j1', + connectionId: 'c1', + sourceKey: 'fake', + trigger: 'upload', + bundleRef: { kind: 'upload', uploadId: 'upload-x' }, + }, + { memoryFlow } as any, + ); + + expect(memoryFlow.snapshot().events).toContainEqual( + expect.objectContaining({ + type: 'rate_limit_wait', + provider: 'claude-subscription', + rateLimitType: 'five_hour', + resumeAtMs: 2_000, + remainingMs: 1_000, + }), + ); + }); + it('fails before squash when reconciliation leaves a touched wiki page with dangling refs', async () => { const deps = makeDeps(); let currentToolSession: any = null; diff --git a/packages/cli/test/context/ingest/local-bundle-runtime.test.ts b/packages/cli/test/context/ingest/local-bundle-runtime.test.ts index 9d1ec9b4..e3031cc5 100644 --- a/packages/cli/test/context/ingest/local-bundle-runtime.test.ts +++ b/packages/cli/test/context/ingest/local-bundle-runtime.test.ts @@ -301,6 +301,7 @@ describe('createLocalBundleIngestRuntime', () => { 'memoryIngestionModel', 'probeRowCount', 'profileIngest', + 'rateLimitGovernor', 'workUnitFailureMode', 'workUnitMaxConcurrency', 'workUnitStepBudget', diff --git a/packages/cli/test/context/ingest/memory-flow/schema.test.ts b/packages/cli/test/context/ingest/memory-flow/schema.test.ts index 1aaeec4b..ee8f3bb9 100644 --- a/packages/cli/test/context/ingest/memory-flow/schema.test.ts +++ b/packages/cli/test/context/ingest/memory-flow/schema.test.ts @@ -146,6 +146,29 @@ describe('memory-flow schemas', () => { expect(parsed.events).toContainEqual({ type: 'stage_skipped', stage: 'actions', reason: 'requires LLM' }); }); + it('accepts rate-limit wait replay events', () => { + expect( + memoryFlowReplayInputSchema.parse({ + ...snapshot(), + events: [ + { + type: 'rate_limit_wait', + provider: 'claude-subscription', + rateLimitType: 'five_hour', + resumeAtMs: 2_000, + remainingMs: 1_000, + }, + ], + }).events[0], + ).toEqual({ + type: 'rate_limit_wait', + provider: 'claude-subscription', + rateLimitType: 'five_hour', + resumeAtMs: 2_000, + remainingMs: 1_000, + }); + }); + it('parses snapshot and closed stream events', () => { expect(memoryFlowStreamEventSchema.parse({ type: 'snapshot', snapshot: snapshot({ status: 'done' }) })).toEqual({ type: 'snapshot', diff --git a/packages/cli/test/context/llm/ai-sdk-runtime.test.ts b/packages/cli/test/context/llm/ai-sdk-runtime.test.ts index 74987094..bab7d1d7 100644 --- a/packages/cli/test/context/llm/ai-sdk-runtime.test.ts +++ b/packages/cli/test/context/llm/ai-sdk-runtime.test.ts @@ -107,6 +107,199 @@ describe('AiSdkKtxLlmRuntime.runAgentLoop', () => { expect(result.error).toBe(err); }); + it('reports AI SDK retry-after rate limits and retries through the governor', async () => { + const waitForReady = vi.fn().mockResolvedValue(undefined); + const report = vi.fn(); + const rateLimitError = Object.assign(new Error('too many requests'), { + name: 'TooManyRequestsError', + retryAfter: 2, + statusCode: 429, + }); + (generateText as any).mockRejectedValueOnce(rateLimitError).mockResolvedValueOnce({ + text: 'done', + toolCalls: [], + steps: [], + usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 }, + }); + const runtime = new AiSdkKtxLlmRuntime({ + llmProvider: llmProvider as any, + rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 6 } as never, + }); + + const result = await runtime.runAgentLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + }); + + expect(result.stopReason).toBe('natural'); + expect(report).toHaveBeenCalledWith({ + provider: 'anthropic-api', + status: 'rejected', + retryAfterMs: 2_000, + rateLimitType: 'http_429', + }); + expect(waitForReady).toHaveBeenCalledTimes(2); + expect(generateText).toHaveBeenCalledTimes(2); + }); + + it('does not retry AI SDK rate limits without a governor', async () => { + const rateLimitError = Object.assign(new Error('too many requests'), { + name: 'TooManyRequestsError', + statusCode: 429, + }); + (generateText as any).mockRejectedValue(rateLimitError); + // The beforeEach runtime is constructed without a rateLimitGovernor. + + const result = await runtime.runAgentLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + }); + + expect(result.stopReason).toBe('error'); + expect(generateText).toHaveBeenCalledTimes(1); + }); + + it('honors a governor retry budget of one attempt without retrying', async () => { + const waitForReady = vi.fn().mockResolvedValue(undefined); + const report = vi.fn(); + const rateLimitError = Object.assign(new Error('too many requests'), { + name: 'TooManyRequestsError', + statusCode: 429, + }); + (generateText as any).mockRejectedValue(rateLimitError); + const runtime = new AiSdkKtxLlmRuntime({ + llmProvider: llmProvider as any, + rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 1 } as never, + }); + + const result = await runtime.runAgentLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + }); + + expect(result.stopReason).toBe('error'); + expect(generateText).toHaveBeenCalledTimes(1); + expect(report).not.toHaveBeenCalled(); + }); + + it('reports Anthropic API response-header utilization to the governor', async () => { + const waitForReady = vi.fn().mockResolvedValue(undefined); + const report = vi.fn(); + (generateText as any).mockResolvedValue({ + text: 'done', + toolCalls: [], + steps: [], + response: { + headers: { + 'anthropic-ratelimit-requests-limit': '100', + 'anthropic-ratelimit-requests-remaining': '8', + 'anthropic-ratelimit-input-tokens-limit': '10000', + 'anthropic-ratelimit-input-tokens-remaining': '9000', + }, + }, + }); + const runtime = new AiSdkKtxLlmRuntime({ + llmProvider: llmProvider as any, + rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 6 } as never, + }); + + const result = await runtime.runAgentLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + }); + + expect(result.stopReason).toBe('natural'); + expect(report).toHaveBeenCalledWith({ + provider: 'anthropic-api', + status: 'allowed', + rateLimitType: 'rpm', + utilization: 0.92, + }); + }); + + it('reports generic x-ratelimit response-header utilization for Vertex providers', async () => { + const waitForReady = vi.fn().mockResolvedValue(undefined); + const report = vi.fn(); + const vertexProvider = { + ...llmProvider, + getModel: vi.fn().mockReturnValue({ modelId: 'gemini-3-pro', provider: 'google-vertex' }), + }; + (generateText as any).mockResolvedValue({ + text: 'done', + toolCalls: [], + steps: [], + response: { + headers: { + 'x-ratelimit-limit-requests': '200', + 'x-ratelimit-remaining-requests': '30', + 'x-ratelimit-limit-tokens': '100000', + 'x-ratelimit-remaining-tokens': '4000', + }, + }, + }); + const runtime = new AiSdkKtxLlmRuntime({ + llmProvider: vertexProvider as any, + rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 6 } as never, + }); + + const result = await runtime.runAgentLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + }); + + expect(result.stopReason).toBe('natural'); + expect(report).toHaveBeenCalledWith({ + provider: 'vertex', + status: 'allowed', + rateLimitType: 'tpm', + utilization: 0.96, + }); + }); + + it('passes abort signals into governor waits and AI SDK generateText calls', async () => { + const controller = new AbortController(); + const waitForReady = vi.fn().mockResolvedValue(undefined); + (generateText as any).mockResolvedValue({ text: 'done', toolCalls: [], steps: [] }); + const runtime = new AiSdkKtxLlmRuntime({ + llmProvider: llmProvider as any, + rateLimitGovernor: { waitForReady, report: vi.fn(), maxRetryAttempts: () => 6 } as never, + }); + + const result = await runtime.runAgentLoop({ + modelRole: 'candidateExtraction', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + abortSignal: controller.signal, + }); + + expect(result.stopReason).toBe('natural'); + expect(waitForReady).toHaveBeenCalledWith(controller.signal); + expect((generateText as any).mock.calls[0][0].abortSignal).toBe(controller.signal); + }); + it('returns metrics with stepCount, per-step boundaries, and aggregate token usage', async () => { (generateText as any).mockImplementation(async (opts: any) => { await opts.onStepFinish({}); diff --git a/packages/cli/test/context/llm/claude-code-runtime.test.ts b/packages/cli/test/context/llm/claude-code-runtime.test.ts index 5c56c26c..ba83cde6 100644 --- a/packages/cli/test/context/llm/claude-code-runtime.test.ts +++ b/packages/cli/test/context/llm/claude-code-runtime.test.ts @@ -9,6 +9,14 @@ async function* stream(messages: SDKMessage[]): AsyncGenerator } } +function deferred() { + let resolve!: (value: T | PromiseLike) => void; + const promise = new Promise((innerResolve) => { + resolve = innerResolve; + }); + return { promise, resolve }; +} + function initMessage(overrides: Partial> = {}): Extract< SDKMessage, { type: 'system'; subtype: 'init' } @@ -91,6 +99,247 @@ describe('ClaudeCodeKtxLlmRuntime', () => { }); }); + it('waits before Claude Code text generation and reports rate-limit events', async () => { + const waitForReady = vi.fn().mockResolvedValue(undefined); + const report = vi.fn(); + const query = vi.fn((_input: any) => + stream([ + { + type: 'rate_limit_event', + rate_limit_info: { + status: 'allowed_warning', + resetsAt: new Date(2_000).toISOString(), + rateLimitType: 'five_hour', + utilization: 0.91, + }, + } as unknown as SDKMessage, + resultMessage({ result: 'ok' }), + ]), + ); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 6 } as never, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'hello' })).resolves.toBe('ok'); + expect(waitForReady).toHaveBeenCalledTimes(1); + expect(report).toHaveBeenCalledWith({ + provider: 'claude-subscription', + status: 'warning', + resetAtMs: 2_000, + rateLimitType: 'five_hour', + utilization: 0.91, + }); + }); + + it('maps numeric Claude Code reset times from SDK rate-limit events', async () => { + const report = vi.fn(); + const resetAtMs = 1_700_000_000_000; + const query = vi.fn((_input: any) => + stream([ + { + type: 'rate_limit_event', + rate_limit_info: { + status: 'rejected', + resetsAt: resetAtMs, + rateLimitType: 'five_hour', + utilization: 1, + }, + } as unknown as SDKMessage, + resultMessage({ result: 'ok' }), + ]), + ); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + rateLimitGovernor: { waitForReady: vi.fn().mockResolvedValue(undefined), report, maxRetryAttempts: () => 6 } as never, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'hello' })).resolves.toBe('ok'); + + expect(report).toHaveBeenCalledWith({ + provider: 'claude-subscription', + status: 'rejected', + resetAtMs, + rateLimitType: 'five_hour', + utilization: 1, + }); + }); + + it('retries a Claude Code query after an SDK rate-limit result error', async () => { + const waitForReady = vi.fn().mockResolvedValue(undefined); + const report = vi.fn(); + const resetAtMs = 1_700_000_000_000; + const query = vi + .fn() + .mockReturnValueOnce( + stream([ + { + type: 'rate_limit_event', + rate_limit_info: { + status: 'rejected', + resetsAt: resetAtMs, + rateLimitType: 'five_hour', + utilization: 1, + }, + } as unknown as SDKMessage, + resultMessage({ + subtype: 'error_during_execution', + is_error: true, + result: '', + errors: ['rate limit retry budget exhausted'], + terminal_reason: 'model_error', + } as never), + ]), + ) + .mockReturnValueOnce(stream([resultMessage({ result: 'ok' })])); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 6 } as never, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'hello' })).resolves.toBe('ok'); + + expect(query).toHaveBeenCalledTimes(2); + expect(waitForReady).toHaveBeenCalledTimes(2); + expect(report).toHaveBeenCalledWith({ + provider: 'claude-subscription', + status: 'rejected', + resetAtMs, + rateLimitType: 'five_hour', + utilization: 1, + }); + }); + + it('reports Claude Code api retry messages as warning signals', async () => { + const report = vi.fn(); + const query = vi.fn((_input: any) => + stream([ + { + type: 'system', + subtype: 'api_retry', + retry_delay_ms: 12_000, + } as unknown as SDKMessage, + resultMessage({ result: 'ok' }), + ]), + ); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + rateLimitGovernor: { waitForReady: vi.fn().mockResolvedValue(undefined), report, maxRetryAttempts: () => 6 } as never, + }); + + await runtime.generateText({ role: 'default', prompt: 'hello' }); + expect(report).toHaveBeenCalledWith({ + provider: 'claude-subscription', + status: 'warning', + retryAfterMs: 12_000, + rateLimitType: 'api_retry', + }); + }); + + it('passes abort signals into Claude Code governor waits', async () => { + const controller = new AbortController(); + const waitForReady = vi.fn().mockResolvedValue(undefined); + const query = vi.fn((_input: any) => stream([resultMessage({ result: 'ok' })])); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + rateLimitGovernor: { waitForReady, report: vi.fn(), maxRetryAttempts: () => 6 } as never, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'hello', abortSignal: controller.signal })).resolves.toBe('ok'); + + expect(waitForReady).toHaveBeenCalledWith(controller.signal); + }); + + it('interrupts an active Claude Code query when the abort signal fires', async () => { + const controller = new AbortController(); + const streamStarted = deferred(); + const releaseStream = deferred(); + const interrupt = vi.fn(() => releaseStream.resolve()); + const queryResult = { + async *[Symbol.asyncIterator]() { + streamStarted.resolve(); + await releaseStream.promise; + yield resultMessage({ result: 'ok' }); + }, + interrupt, + }; + const query = vi.fn(() => queryResult as never); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + rateLimitGovernor: { waitForReady: vi.fn().mockResolvedValue(undefined), report: vi.fn(), maxRetryAttempts: () => 6 } as never, + }); + + const pending = runtime.generateText({ role: 'default', prompt: 'hello', abortSignal: controller.signal }); + await streamStarted.promise; + controller.abort(); + + await expect(pending).rejects.toThrow(/Aborted/); + expect(interrupt).toHaveBeenCalledTimes(1); + }); + + it('throws abort before starting Claude Code query when the signal is already aborted', async () => { + const controller = new AbortController(); + controller.abort(); + const query = vi.fn((_input: any) => stream([resultMessage({ result: 'ok' })])); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + rateLimitGovernor: { waitForReady: vi.fn().mockResolvedValue(undefined), report: vi.fn(), maxRetryAttempts: () => 6 } as never, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'hello', abortSignal: controller.signal })).rejects.toThrow(/Aborted/); + expect(query).not.toHaveBeenCalled(); + }); + + it('treats an interrupted Claude Code stream with no result as abort', async () => { + const controller = new AbortController(); + const streamStarted = deferred(); + const releaseStream = deferred(); + const interrupt = vi.fn(() => releaseStream.resolve()); + const queryResult = { + async *[Symbol.asyncIterator]() { + streamStarted.resolve(); + await releaseStream.promise; + }, + interrupt, + }; + const query = vi.fn(() => queryResult as never); + const runtime = new ClaudeCodeKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'sonnet' }, + query, + env: {}, + rateLimitGovernor: { waitForReady: vi.fn().mockResolvedValue(undefined), report: vi.fn(), maxRetryAttempts: () => 6 } as never, + }); + + const pending = runtime.generateText({ role: 'default', prompt: 'hello', abortSignal: controller.signal }); + await streamStarted.promise; + controller.abort(); + + await expect(pending).rejects.toThrow(/Aborted/); + expect(interrupt).toHaveBeenCalledTimes(1); + }); + it('validates structured output with the caller schema and whitelists the SDK StructuredOutput tool', async () => { const schema = z.object({ answer: z.string() }); const query = vi.fn((_input: any) => diff --git a/packages/cli/test/context/llm/codex-runtime.test.ts b/packages/cli/test/context/llm/codex-runtime.test.ts index 2d408543..4c3fcdfd 100644 --- a/packages/cli/test/context/llm/codex-runtime.test.ts +++ b/packages/cli/test/context/llm/codex-runtime.test.ts @@ -130,6 +130,150 @@ describe('CodexKtxLlmRuntime', () => { ).rejects.toThrow('Codex structured output failed validation'); }); + it('reports Codex rate-limit failures and retries with opaque backoff', async () => { + const waitForReady = vi.fn().mockResolvedValue(undefined); + const report = vi.fn(); + const fakeRunner = { + runStreamed: vi + .fn() + .mockResolvedValueOnce(events([{ type: 'turn.failed', error: { message: '429 rate limit exceeded' } }])) + .mockResolvedValueOnce( + events([ + { type: 'turn.started' }, + { type: 'item.completed', item: { type: 'agent_message', text: 'ok' } }, + { type: 'turn.completed' }, + ]), + ), + }; + const runtime = new CodexKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'codex' }, + runner: fakeRunner, + rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 6 } as never, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'hello' })).resolves.toBe('ok'); + expect(report).toHaveBeenCalledWith({ provider: 'codex', status: 'rejected', rateLimitType: 'opaque' }); + expect(waitForReady).toHaveBeenCalledTimes(2); + expect(fakeRunner.runStreamed).toHaveBeenCalledTimes(2); + }); + + it('reports thrown Codex rate-limit failures and retries with opaque backoff', async () => { + const waitForReady = vi.fn().mockResolvedValue(undefined); + const report = vi.fn(); + const fakeRunner = { + runStreamed: vi + .fn() + .mockRejectedValueOnce(new Error('ThreadError: 429 rate limit exceeded')) + .mockResolvedValueOnce( + events([ + { type: 'turn.started' }, + { type: 'item.completed', item: { type: 'agent_message', text: 'ok' } }, + { type: 'turn.completed' }, + ]), + ), + }; + const runtime = new CodexKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'codex' }, + runner: fakeRunner, + rateLimitGovernor: { waitForReady, report, maxRetryAttempts: () => 6 } as never, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'hello' })).resolves.toBe('ok'); + + expect(report).toHaveBeenCalledWith({ provider: 'codex', status: 'rejected', rateLimitType: 'opaque' }); + expect(waitForReady).toHaveBeenCalledTimes(2); + expect(fakeRunner.runStreamed).toHaveBeenCalledTimes(2); + }); + + it('surfaces Codex rate-limit failures without retrying when no governor is present', async () => { + const fakeRunner = runner([{ type: 'turn.failed', error: { message: '429 rate limit exceeded' } }]); + const runtime = new CodexKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'codex' }, + runner: fakeRunner, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'hello' })).rejects.toThrow(/rate limit/i); + expect(fakeRunner.runStreamed).toHaveBeenCalledTimes(1); + }); + + it('passes abort signals into Codex text generation and governor waits', async () => { + const controller = new AbortController(); + const waitForReady = vi.fn().mockResolvedValue(undefined); + let observedSignal: AbortSignal | undefined; + const fakeRunner = { + runStreamed: vi.fn(async (input: { signal?: AbortSignal }) => { + observedSignal = input.signal; + return events([ + { type: 'turn.started' }, + { type: 'item.completed', item: { type: 'agent_message', text: 'ok' } }, + { type: 'turn.completed' }, + ]); + }), + }; + const runtime = new CodexKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'codex' }, + runner: fakeRunner, + rateLimitGovernor: { waitForReady, report: vi.fn(), maxRetryAttempts: () => 6 } as never, + }); + + await expect(runtime.generateText({ role: 'default', prompt: 'hello', abortSignal: controller.signal })).resolves.toBe('ok'); + + expect(waitForReady).toHaveBeenCalledWith(controller.signal); + expect(observedSignal).toBe(controller.signal); + }); + + it('links the parent abort signal into Codex agent-loop streamed runs', async () => { + const controller = new AbortController(); + let releaseStream!: () => void; + const streamRelease = new Promise((resolve) => { + releaseStream = resolve; + }); + let markRunnerCalled!: () => void; + const runnerCalled = new Promise((resolve) => { + markRunnerCalled = resolve; + }); + let observedSignal: AbortSignal | undefined; + const fakeRunner = { + runStreamed: vi.fn(async (input: { signal?: AbortSignal }) => { + observedSignal = input.signal; + markRunnerCalled(); + return (async function* () { + await streamRelease; + yield { type: 'turn.started' }; + yield { type: 'item.completed', item: { type: 'agent_message', text: 'ok' } }; + yield { type: 'turn.completed' }; + })(); + }), + }; + const runtime = new CodexKtxLlmRuntime({ + projectDir: '/tmp/project', + modelSlots: { default: 'codex' }, + runner: fakeRunner, + }); + + const pending = runtime.runAgentLoop({ + modelRole: 'default', + systemPrompt: '', + userPrompt: '', + toolSet: {}, + stepBudget: 10, + telemetryTags: {}, + abortSignal: controller.signal, + }); + + await runnerCalled; + expect(observedSignal).toBeDefined(); + expect(observedSignal).not.toBe(controller.signal); + controller.abort(); + expect(observedSignal?.aborted).toBe(true); + releaseStream(); + await expect(pending).resolves.toMatchObject({ stopReason: 'natural' }); + }); + it('starts and closes a temporary MCP server for tool-backed agent loops', async () => { const close = vi.fn(async () => undefined); const startMcpServer = vi.fn(async () => ({ diff --git a/packages/cli/test/context/llm/local-config.test.ts b/packages/cli/test/context/llm/local-config.test.ts index e153baaf..eed66261 100644 --- a/packages/cli/test/context/llm/local-config.test.ts +++ b/packages/cli/test/context/llm/local-config.test.ts @@ -7,6 +7,7 @@ import { import { createLocalKtxEmbeddingProviderFromConfig, createLocalKtxLlmProviderFromConfig, + createLocalKtxLlmRuntimeFromConfig, resolveLocalKtxEmbeddingConfig, resolveLocalKtxLlmConfig, } from '../../../src/context/llm/local-config.js'; @@ -129,6 +130,64 @@ describe('local KTX LLM config', () => { vertexFallbackTo5m: false, }); }); + + it('passes the rate-limit governor into created runtimes', () => { + const rateLimitGovernor = {} as never; + const createClaudeCodeRuntime = vi.fn(() => ({ + generateText: vi.fn(), + generateObject: vi.fn(), + runAgentLoop: vi.fn(), + })); + const createCodexRuntime = vi.fn(() => ({ + generateText: vi.fn(), + generateObject: vi.fn(), + runAgentLoop: vi.fn(), + })); + const createAiSdkRuntime = vi.fn(() => ({ + generateText: vi.fn(), + generateObject: vi.fn(), + runAgentLoop: vi.fn(), + })); + const createKtxLlmProvider = vi.fn(() => ({ + getModel: vi.fn(), + getModelByName: vi.fn(), + cacheMarker: vi.fn(), + repairToolCallHandler: vi.fn(), + thinkingProviderOptions: vi.fn(), + telemetryConfig: vi.fn(), + promptCachingConfig: vi.fn(), + activeBackend: vi.fn(() => 'anthropic'), + })); + + createLocalKtxLlmRuntimeFromConfig( + { + provider: { backend: 'claude-code' }, + models: { default: 'sonnet' }, + promptCaching: undefined, + }, + { projectDir: '/tmp/project', env: {}, rateLimitGovernor, createClaudeCodeRuntime }, + ); + createLocalKtxLlmRuntimeFromConfig( + { + provider: { backend: 'codex' }, + models: { default: 'codex' }, + promptCaching: undefined, + }, + { projectDir: '/tmp/project', env: {}, rateLimitGovernor, createCodexRuntime }, + ); + createLocalKtxLlmRuntimeFromConfig( + { + provider: { backend: 'anthropic' }, + models: { default: 'claude-sonnet-4-6' }, + promptCaching: undefined, + }, + { env: {}, rateLimitGovernor, createAiSdkRuntime, createKtxLlmProvider: createKtxLlmProvider as never }, + ); + + expect(createClaudeCodeRuntime).toHaveBeenCalledWith(expect.objectContaining({ rateLimitGovernor })); + expect(createCodexRuntime).toHaveBeenCalledWith(expect.objectContaining({ rateLimitGovernor })); + expect(createAiSdkRuntime).toHaveBeenCalledWith(expect.objectContaining({ rateLimitGovernor })); + }); }); describe('local KTX embedding config', () => { diff --git a/packages/cli/test/context/llm/rate-limit-governor.test.ts b/packages/cli/test/context/llm/rate-limit-governor.test.ts new file mode 100644 index 00000000..51fcba84 --- /dev/null +++ b/packages/cli/test/context/llm/rate-limit-governor.test.ts @@ -0,0 +1,278 @@ +import { describe, expect, it } from 'vitest'; +import { + createRateLimitGovernorConfig, + RateLimitGovernor, + type RateLimitWaitState, +} from '../../../src/context/llm/rate-limit-governor.js'; + +function testClock(startMs = 1_000) { + let nowMs = startMs; + return { + now: () => nowMs, + advance: (ms: number) => { + nowMs += ms; + }, + }; +} + +async function flushMicrotasks(turns = 10): Promise { + for (let i = 0; i < turns; i += 1) { + await Promise.resolve(); + } +} + +describe('RateLimitGovernor', () => { + it('drops and restores the effective work-unit limit from warning signals', () => { + const clock = testClock(); + const states: RateLimitWaitState[] = []; + const governor = new RateLimitGovernor( + createRateLimitGovernorConfig({ maxConcurrency: 6, minConcurrencyUnderPressure: 1 }), + { now: clock.now, sleep: async () => undefined, random: () => 0 }, + ); + governor.subscribe((state) => states.push(state)); + + expect(governor.currentLimit()).toBe(6); + governor.report({ + provider: 'claude-subscription', + status: 'warning', + utilization: 0.91, + rateLimitType: 'five_hour', + }); + expect(governor.currentLimit()).toBe(1); + governor.report({ + provider: 'claude-subscription', + status: 'allowed', + utilization: 0.2, + rateLimitType: 'five_hour', + }); + expect(governor.currentLimit()).toBe(6); + expect(states.map((state) => state.kind)).toContain('concurrency_adjusted'); + }); + + it('blocks work slots during a rejected reset window and emits wait states', async () => { + const clock = testClock(); + const states: RateLimitWaitState[] = []; + const sleeps: number[] = []; + const governor = new RateLimitGovernor( + createRateLimitGovernorConfig({ maxConcurrency: 2, waitStateTickMs: 100 }), + { + now: clock.now, + random: () => 0, + sleep: async (ms) => { + sleeps.push(ms); + clock.advance(ms); + }, + }, + ); + governor.subscribe((state) => states.push(state)); + + governor.report({ provider: 'anthropic-api', status: 'rejected', retryAfterMs: 250, rateLimitType: 'rpm' }); + const release = await governor.acquireWorkSlot(); + release(); + + expect(sleeps).toEqual([100, 100, 50]); + expect(states.some((state) => state.kind === 'wait_started' && state.provider === 'anthropic-api')).toBe(true); + expect(states.some((state) => state.kind === 'wait_finished' && state.provider === 'anthropic-api')).toBe(true); + }); + + it('rejects an interrupted wait without consuming a work slot', async () => { + const clock = testClock(); + let abortListener: (() => void) | undefined; + const governor = new RateLimitGovernor( + createRateLimitGovernorConfig({ maxConcurrency: 1, waitStateTickMs: 100 }), + { + now: clock.now, + random: () => 0, + sleep: async (_ms, signal) => + new Promise((_resolve, reject) => { + abortListener = () => reject(new DOMException('Aborted', 'AbortError')); + signal?.addEventListener('abort', abortListener, { once: true }); + }), + }, + ); + const controller = new AbortController(); + + governor.report({ + provider: 'claude-subscription', + status: 'rejected', + resetAtMs: 2_000, + rateLimitType: 'five_hour', + }); + const pending = governor.acquireWorkSlot(controller.signal); + controller.abort(); + abortListener?.(); + + await expect(pending).rejects.toThrow(/Aborted/); + expect(governor.activeSlots()).toBe(0); + }); + + it('rejects an already-aborted ready wait', async () => { + const governor = new RateLimitGovernor( + createRateLimitGovernorConfig({ maxConcurrency: 1 }), + { sleep: async () => undefined, random: () => 0 }, + ); + const controller = new AbortController(); + controller.abort(); + + await expect(governor.waitForReady(controller.signal)).rejects.toThrow(/Aborted/); + }); + + it('rejects an already-aborted work slot without consuming capacity', async () => { + const governor = new RateLimitGovernor( + createRateLimitGovernorConfig({ maxConcurrency: 1 }), + { sleep: async () => undefined, random: () => 0 }, + ); + const controller = new AbortController(); + controller.abort(); + + await expect(governor.acquireWorkSlot(controller.signal)).rejects.toThrow(/Aborted/); + expect(governor.activeSlots()).toBe(0); + }); + + it('uses bounded opaque backoff for rejected signals without reset hints', async () => { + const clock = testClock(); + const sleeps: number[] = []; + const governor = new RateLimitGovernor( + createRateLimitGovernorConfig({ + maxConcurrency: 1, + retry: { maxAttempts: 3, baseDelayMs: 1_000, maxDelayMs: 60_000, jitter: false }, + }), + { + now: clock.now, + random: () => 0, + sleep: async (ms) => { + sleeps.push(ms); + clock.advance(ms); + }, + }, + ); + + governor.report({ provider: 'codex', status: 'rejected', rateLimitType: 'opaque' }); + const release1 = await governor.acquireWorkSlot(); + release1(); + governor.report({ provider: 'codex', status: 'rejected', rateLimitType: 'opaque' }); + const release2 = await governor.acquireWorkSlot(); + release2(); + + expect(sleeps).toEqual([1_000, 2_000]); + }); + + it('exposes the configured retry budget and disables outer retries when pacing is off', () => { + const retry = { maxAttempts: 3, baseDelayMs: 1_000, maxDelayMs: 60_000, jitter: false }; + const enabled = new RateLimitGovernor(createRateLimitGovernorConfig({ retry })); + expect(enabled.maxRetryAttempts()).toBe(3); + + const disabled = new RateLimitGovernor(createRateLimitGovernorConfig({ enabled: false, retry })); + expect(disabled.maxRetryAttempts()).toBe(1); + }); + + it('emits visible wait ticks after a rejected report without a waiting caller', async () => { + const clock = testClock(); + const states: RateLimitWaitState[] = []; + const sleeps: number[] = []; + const governor = new RateLimitGovernor( + createRateLimitGovernorConfig({ maxConcurrency: 4, minConcurrencyUnderPressure: 1, waitStateTickMs: 100 }), + { + now: clock.now, + random: () => 0, + sleep: async (ms, signal) => { + if (signal?.aborted) { + throw new DOMException('Aborted', 'AbortError'); + } + sleeps.push(ms); + clock.advance(ms); + }, + }, + ); + governor.subscribe((state) => states.push(state)); + + governor.report({ + provider: 'claude-subscription', + status: 'rejected', + resetAtMs: 1_250, + rateLimitType: 'five_hour', + }); + await flushMicrotasks(); + + expect(sleeps).toEqual([100, 100, 50]); + expect(states).toContainEqual( + expect.objectContaining({ + kind: 'wait_started', + provider: 'claude-subscription', + rateLimitType: 'five_hour', + remainingMs: 250, + }), + ); + expect(states.filter((state) => state.kind === 'wait_tick')).toHaveLength(3); + expect(states).toContainEqual( + expect.objectContaining({ + kind: 'wait_finished', + provider: 'claude-subscription', + rateLimitType: 'five_hour', + remainingMs: 0, + }), + ); + }); + + it('does not duplicate countdown sleeps when a work slot waits during the same pause', async () => { + const clock = testClock(); + const states: RateLimitWaitState[] = []; + const sleeps: number[] = []; + const governor = new RateLimitGovernor( + createRateLimitGovernorConfig({ maxConcurrency: 2, waitStateTickMs: 100 }), + { + now: clock.now, + random: () => 0, + sleep: async (ms, signal) => { + if (signal?.aborted) { + throw new DOMException('Aborted', 'AbortError'); + } + sleeps.push(ms); + clock.advance(ms); + }, + }, + ); + governor.subscribe((state) => states.push(state)); + + governor.report({ provider: 'anthropic-api', status: 'rejected', retryAfterMs: 250, rateLimitType: 'rpm' }); + const pendingRelease = governor.acquireWorkSlot(); + await flushMicrotasks(); + const release = await pendingRelease; + release(); + + expect(sleeps).toEqual([100, 100, 50]); + expect(states.filter((state) => state.kind === 'wait_tick')).toHaveLength(3); + expect(governor.activeSlots()).toBe(0); + }); + + it('stops the visible wait ticker when the last subscriber unsubscribes', async () => { + const clock = testClock(); + let abortCount = 0; + const governor = new RateLimitGovernor( + createRateLimitGovernorConfig({ maxConcurrency: 1, waitStateTickMs: 100 }), + { + now: clock.now, + random: () => 0, + sleep: async (_ms, signal) => + new Promise((_resolve, reject) => { + signal?.addEventListener( + 'abort', + () => { + abortCount += 1; + reject(new DOMException('Aborted', 'AbortError')); + }, + { once: true }, + ); + }), + }, + ); + const unsubscribe = governor.subscribe(() => undefined); + + governor.report({ provider: 'claude-subscription', status: 'rejected', retryAfterMs: 1_000 }); + await flushMicrotasks(1); + unsubscribe(); + await flushMicrotasks(1); + + expect(abortCount).toBe(1); + }); +}); diff --git a/packages/cli/test/context/mcp/server.test.ts b/packages/cli/test/context/mcp/server.test.ts index 95985d68..1359d346 100644 --- a/packages/cli/test/context/mcp/server.test.ts +++ b/packages/cli/test/context/mcp/server.test.ts @@ -1,4 +1,4 @@ -import { access, mkdtemp, readFile, rm } from 'node:fs/promises'; +import { access, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { Client } from '@modelcontextprotocol/sdk/client/index.js'; @@ -7,6 +7,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import { createLocalProjectMemoryIngest } from '../../../src/context/memory/local-memory.js'; import { detectCaptureSignals } from '../../../src/context/memory/capture-signals.js'; import type { MemoryAgentInput } from '../../../src/context/memory/types.js'; +import { parseKtxProjectConfig, serializeKtxProjectConfig } from '../../../src/context/project/config.js'; import { initKtxProject } from '../../../src/context/project/project.js'; import { jsonToolResult } from '../../../src/context/mcp/context-tools.js'; import { createDefaultKtxMcpServer, createKtxMcpServer } from '../../../src/context/mcp/server.js'; @@ -23,6 +24,12 @@ import type { MemoryIngestPort, } from '../../../src/context/mcp/types.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../../../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + type RegisteredTool = { name: string; config: { @@ -280,6 +287,60 @@ describe('createKtxMcpServer', () => { expect(io.stderrText()).not.toContain('mcpClientVersion'); }); + it('reports MCP tool exceptions with a tool-derived source', async () => { + reportExceptionMock.mockClear(); + vi.stubEnv('ANTHROPIC_API_KEY', 'mcp-anthropic-secret'); // pragma: allowlist secret + const fake = makeFakeServer(); + const io = makeIo(); + const projectDir = await mkdtemp(join(tmpdir(), 'ktx-mcp-exception-')); + try { + await initKtxProject({ projectDir }); + const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + await writeFile( + join(projectDir, 'ktx.yaml'), + serializeKtxProjectConfig({ + ...config, + llm: { + ...config.llm, + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + }), + 'utf-8', + ); + + createKtxMcpServer({ + server: fake.server, + userContext: { userId: 'local-user' }, + projectDir, + io, + contextTools: { + knowledge: { + search: vi.fn().mockRejectedValue(new Error('wiki failed')), + read: vi.fn().mockResolvedValue(null), + }, + }, + }); + + await expect(getTool(fake.tools, 'wiki_search').handler({ query: 'revenue recognition', limit: 5 })).resolves.toMatchObject({ + isError: true, + }); + + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'mcp:wiki_search', handled: true, fatal: false }), + projectDir, + redactionSecrets: expect.arrayContaining(['mcp-anthropic-secret']), + }), + ); + } finally { + await rm(projectDir, { recursive: true, force: true }); + } + }); + it('captures the connecting MCP client name and version', async () => { vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); vi.stubEnv('CI', ''); diff --git a/packages/cli/test/context/project/config.test.ts b/packages/cli/test/context/project/config.test.ts index 6027d454..e5911a25 100644 --- a/packages/cli/test/context/project/config.test.ts +++ b/packages/cli/test/context/project/config.test.ts @@ -50,6 +50,17 @@ connections: maxConcurrency: 1, failureMode: 'continue', }, + rateLimit: { + enabled: true, + throttleThreshold: 0.8, + minConcurrencyUnderPressure: 1, + retry: { + maxAttempts: 6, + baseDelayMs: 1_000, + maxDelayMs: 60_000, + jitter: true, + }, + }, profile: false, }, agent: { @@ -163,6 +174,52 @@ ingest: expect(parseKtxProjectConfig('ingest:\n profile: json\n').ingest.profile).toBe('json'); }); + it('defaults ingest rate-limit settings', () => { + const config = buildDefaultKtxProjectConfig(); + expect(config.ingest.rateLimit).toEqual({ + enabled: true, + throttleThreshold: 0.8, + minConcurrencyUnderPressure: 1, + retry: { + maxAttempts: 6, + baseDelayMs: 1_000, + maxDelayMs: 60_000, + jitter: true, + }, + }); + }); + + it('validates ingest rate-limit retry settings', () => { + const config = parseKtxProjectConfig(` +llm: + provider: + backend: none +ingest: + rateLimit: + enabled: true + throttleThreshold: 0.7 + minConcurrencyUnderPressure: 2 + maxWaitMs: 300000 + retry: + maxAttempts: 4 + baseDelayMs: 500 + maxDelayMs: 30000 + jitter: false +`); + expect(config.ingest.rateLimit).toEqual({ + enabled: true, + throttleThreshold: 0.7, + minConcurrencyUnderPressure: 2, + maxWaitMs: 300_000, + retry: { + maxAttempts: 4, + baseDelayMs: 500, + maxDelayMs: 30_000, + jitter: false, + }, + }); + }); + it('parses global Vertex LLM config', () => { const config = parseKtxProjectConfig(` llm: diff --git a/packages/cli/test/public-ingest.test.ts b/packages/cli/test/public-ingest.test.ts index ba35faf6..6dea8834 100644 --- a/packages/cli/test/public-ingest.test.ts +++ b/packages/cli/test/public-ingest.test.ts @@ -3,7 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '../src/context/project/config.js'; import { initKtxProject } from '../src/context/project/project.js'; -import { afterEach, describe, expect, it, vi } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { buildPublicIngestPlan, executePublicIngestTarget, @@ -13,6 +13,12 @@ import { runKtxPublicIngest, } from '../src/public-ingest.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + /** Count non-overlapping occurrences of `needle` in `haystack`. */ function occurrences(haystack: string, needle: string): number { return haystack.split(needle).length - 1; @@ -377,6 +383,10 @@ describe('publicProgressMessage', () => { }); describe('runKtxPublicIngest', () => { + beforeEach(() => { + reportExceptionMock.mockClear(); + }); + afterEach(() => { vi.unstubAllEnvs(); }); @@ -1208,6 +1218,104 @@ describe('runKtxPublicIngest', () => { ); }); + it('reports foreground runtime preflight exceptions', async () => { + const io = makeIo({ isTTY: true, interactive: true }); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + }); + const ensureRuntime = vi.fn(async (): Promise => { + throw new Error('runtime unavailable'); + }); + const runContextBuild = vi.fn(async () => ({ exitCode: 0 })); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'auto', + queryHistory: 'enabled', + cliVersion: '0.2.0', + runtimeInstallPolicy: 'prompt', + }, + io.io, + { + loadProject: vi.fn(async () => project), + ensureRuntime, + runContextBuild, + }, + ), + ).resolves.toBe(1); + + expect(runContextBuild).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('runtime unavailable'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'ingest runtime', handled: true, fatal: false }), + projectDir: '/tmp/project', + }), + ); + }); + + it('reports foreground context-build exceptions', async () => { + const io = makeIo({ isTTY: true, interactive: true }); + const config = buildDefaultKtxProjectConfig(); + const project: KtxPublicIngestProject = { + projectDir: '/tmp/project', + config: { + ...config, + connections: { warehouse: { driver: 'postgres', password: 'env:INGEST_DB_PASSWORD' } }, // pragma: allowlist secret + llm: { + ...config.llm, + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + }, + }; + const runContextBuild = vi.fn(async () => { + throw new Error('context build failed'); + }); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'auto', + queryHistory: 'default', + }, + io.io, + { + loadProject: vi.fn(async () => project), + runContextBuild, + env: { + ...process.env, + ANTHROPIC_API_KEY: 'ingest-anthropic-secret', // pragma: allowlist secret + INGEST_DB_PASSWORD: 'ingest-db-password', // pragma: allowlist secret + }, + }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('context build failed'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'ingest context-build', handled: true, fatal: false }), + projectDir: '/tmp/project', + redactionSecrets: expect.arrayContaining(['ingest-anthropic-secret', 'ingest-db-password']), + }), + ); + }); + it('preflights foreground managed embeddings runtime before starting the context-build view', async () => { const io = makeIo({ isTTY: true, interactive: true }); const config = buildDefaultKtxProjectConfig(); diff --git a/packages/cli/test/reveal-password-prompt.test.ts b/packages/cli/test/reveal-password-prompt.test.ts new file mode 100644 index 00000000..7bb8cc10 --- /dev/null +++ b/packages/cli/test/reveal-password-prompt.test.ts @@ -0,0 +1,40 @@ +import { describe, expect, it } from 'vitest'; +import { maskRevealingTail } from '../src/reveal-password-prompt.js'; + +const MASK = '▪'; + +describe('maskRevealingTail', () => { + it('reveals the last `tail` characters of a long value', () => { + const value = 'example-token-value-abcd'; + const masked = maskRevealingTail(value, MASK, 4); + expect(masked).toBe(`${MASK.repeat(value.length - 4)}abcd`); + expect(masked.endsWith('abcd')).toBe(true); + }); + + it('keeps the same length as the input so cursor slicing stays aligned', () => { + for (const secret of ['', 'a', 'abcdefgh', 'abcdefghijklmnop']) { + expect(maskRevealingTail(secret, MASK, 4)).toHaveLength(secret.length); + } + }); + + it('fully masks secrets that are not longer than tail * 2', () => { + expect(maskRevealingTail('abcdefgh', MASK, 4)).toBe(MASK.repeat(8)); + expect(maskRevealingTail('abcd', MASK, 4)).toBe(MASK.repeat(4)); + expect(maskRevealingTail('ab', MASK, 4)).toBe(MASK.repeat(2)); + }); + + it('reveals the tail once the secret crosses the tail * 2 boundary', () => { + // length 9 > 8 → reveal last 4, hide the first 5 + expect(maskRevealingTail('abcdefghi', MASK, 4)).toBe(`${MASK.repeat(5)}fghi`); + }); + + it('fully masks an empty value', () => { + expect(maskRevealingTail('', MASK, 4)).toBe(''); + }); + + it('honors a custom tail count', () => { + // tail 2 reveals only when length > 4 + expect(maskRevealingTail('abcde', MASK, 2)).toBe(`${MASK.repeat(3)}de`); + expect(maskRevealingTail('abcd', MASK, 2)).toBe(MASK.repeat(4)); + }); +}); diff --git a/packages/cli/test/scan.test.ts b/packages/cli/test/scan.test.ts index 6a524fba..51c55498 100644 --- a/packages/cli/test/scan.test.ts +++ b/packages/cli/test/scan.test.ts @@ -2,12 +2,19 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import type { SourceAdapter } from '../src/context/ingest/types.js'; +import { parseKtxProjectConfig, serializeKtxProjectConfig } from '../src/context/project/config.js'; import { initKtxProject } from '../src/context/project/project.js'; import type { KtxScanReport } from '../src/context/scan/types.js'; import type { LocalScanRunResult, RunLocalScanOptions } from '../src/context/scan/local-scan.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { createCliScanProgress, runKtxScan, type KtxScanDeps } from '../src/scan.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + const sqlServerExtractSchema = vi.hoisted(() => vi.fn(async (connectionId: string) => ({ connectionId, @@ -317,6 +324,7 @@ describe('runKtxScan', () => { beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-scan-')); + reportExceptionMock.mockClear(); }); afterEach(async () => { @@ -426,7 +434,28 @@ describe('runKtxScan', () => { it('records the raw errorDetail in scan_completed telemetry when the scan throws', async () => { vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); vi.stubEnv('CI', ''); + vi.stubEnv('ANTHROPIC_API_KEY', 'anthropic-callsite-secret'); // pragma: allowlist secret + vi.stubEnv('DATABASE_URL', 'postgres://svc:scan-db-password@db.example.test/analytics'); // pragma: allowlist secret await initKtxProject({ projectDir: tempDir }); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + await writeFile( + join(tempDir, 'ktx.yaml'), + serializeKtxProjectConfig({ + ...config, + connections: { + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' }, + }, + llm: { + ...config.llm, + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + }), + 'utf-8', + ); const runLocalScan = vi.fn(async (): Promise => { const error = new Error('introspection timed out'); (error as { code?: unknown }).code = 'ETIMEDOUT'; @@ -452,6 +481,17 @@ describe('runKtxScan', () => { expect(io.stderr()).toContain('"event":"scan_completed"'); expect(io.stderr()).toContain('"outcome":"error"'); expect(io.stderr()).toContain('"errorDetail":"ETIMEDOUT: introspection timed out"'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'scan run', handled: true, fatal: false }), + projectDir: tempDir, + redactionSecrets: expect.arrayContaining([ + 'anthropic-callsite-secret', + 'postgres://svc:scan-db-password@db.example.test/analytics', // pragma: allowlist secret + 'scan-db-password', + ]), + }), + ); }); it('passes KTX daemon options to local ingest adapters when no explicit daemon URL is set', async () => { diff --git a/packages/cli/test/setup-databases.test.ts b/packages/cli/test/setup-databases.test.ts index 6adb0af0..957dfdb2 100644 --- a/packages/cli/test/setup-databases.test.ts +++ b/packages/cli/test/setup-databases.test.ts @@ -2654,6 +2654,7 @@ describe('setup databases step', () => { consideredRoleCount: 2, skipped: null, warnings: [], + parseFailedTemplateIds: [], })); const result = await runKtxSetupDatabasesStep( @@ -2706,6 +2707,54 @@ describe('setup databases step', () => { expect(io.stdout()).toContain('svc_loader'); }); + it('collapses query-history parse failures to a count and lists ids only with --debug', async () => { + const io = makeIo(); + const queryHistoryFilterPicker = vi.fn(async () => ({ + excludedRoles: [], + consideredRoleCount: 1, + skipped: { reason: 'no-in-scope-history' as const }, + warnings: [], + parseFailedTemplateIds: ['111', '222'], + })); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'disabled', + debug: true, + yes: true, + databaseDrivers: ['postgres'], + databaseConnectionId: 'warehouse', + databaseUrl: 'env:DATABASE_URL', + databaseSchemas: ['public'], + enableQueryHistory: true, + skipDatabases: false, + }, + io.io, + { + testConnection: vi.fn(async () => 0), + scanConnection: vi.fn(async () => 0), + historicSqlReadinessProbe: vi.fn(async () => { + const runner = fakeHistoricSqlRunner('postgres', 'pg_stat_statements'); + return { + ok: true as const, + dialect: 'postgres' as const, + runner, + result: { pgServerVersion: 'PostgreSQL 16.4', warnings: [], info: [] }, + }; + }), + queryHistoryFilterPicker, + createQueryHistoryLlmRuntime: vi.fn(() => null), + }, + ); + + expect(result.status).toBe('ready'); + expect(io.stdout()).toContain('Skipped 2 query templates ktx could not parse'); + expect(io.stdout()).not.toContain('111'); + expect(io.stdout()).not.toContain('222'); + expect(io.stderr()).toContain('could not parse 2 template(s): 111, 222'); + }); + it('lets interactive setup skip applying derived filters', async () => { const io = makeIo(); const prompts = makePromptAdapter({ @@ -2743,6 +2792,7 @@ describe('setup databases step', () => { consideredRoleCount: 2, skipped: null, warnings: [], + parseFailedTemplateIds: [], })), createQueryHistoryLlmRuntime: vi.fn(() => null), }, @@ -2811,6 +2861,7 @@ describe('setup databases step', () => { consideredRoleCount: 2, skipped: { reason: 'user-block-present' as const }, warnings: [], + parseFailedTemplateIds: [], })), createQueryHistoryLlmRuntime: vi.fn(() => null), }, diff --git a/packages/cli/test/setup-prompts.test.ts b/packages/cli/test/setup-prompts.test.ts index 46628b1c..8e83c558 100644 --- a/packages/cli/test/setup-prompts.test.ts +++ b/packages/cli/test/setup-prompts.test.ts @@ -17,7 +17,7 @@ const mocks = vi.hoisted(() => { autocomplete: vi.fn(), autocompleteMultiselect: vi.fn(), note: vi.fn(), - password: vi.fn(), + revealPassword: vi.fn(), select: vi.fn(), text: vi.fn(), withSetupInterruptConfirmation: vi.fn((prompt: () => Promise) => prompt()), @@ -34,11 +34,14 @@ vi.mock('@clack/prompts', () => ({ autocomplete: mocks.autocomplete, autocompleteMultiselect: mocks.autocompleteMultiselect, note: mocks.note, - password: mocks.password, select: mocks.select, text: mocks.text, })); +vi.mock('../src/reveal-password-prompt.js', () => ({ + revealPassword: mocks.revealPassword, +})); + vi.mock('../src/setup-interrupt.js', () => ({ withSetupInterruptConfirmation: mocks.withSetupInterruptConfirmation, })); @@ -54,7 +57,7 @@ describe('setup prompt adapter', () => { mocks.autocomplete.mockReset(); mocks.autocompleteMultiselect.mockReset(); mocks.note.mockReset(); - mocks.password.mockReset(); + mocks.revealPassword.mockReset(); mocks.select.mockReset(); mocks.text.mockReset(); mocks.withSetupInterruptConfirmation.mockClear(); @@ -96,7 +99,7 @@ describe('setup prompt adapter', () => { it('decorates text and password prompts with setup navigation copy', async () => { mocks.text.mockResolvedValueOnce('analytics-ktx'); - mocks.password.mockResolvedValueOnce('secret'); + mocks.revealPassword.mockResolvedValueOnce('secret'); const adapter = createKtxSetupPromptAdapter({ selectCancelValue: 'back' }); await expect(adapter.text({ message: 'Project folder path', placeholder: './analytics-ktx' })).resolves.toBe( @@ -108,7 +111,7 @@ describe('setup prompt adapter', () => { message: 'Project folder path\n│ Press Escape to go back.\n│', placeholder: './analytics-ktx', }); - expect(mocks.password).toHaveBeenCalledWith({ + expect(mocks.revealPassword).toHaveBeenCalledWith({ message: 'Anthropic API key\n│ Press Escape to go back.\n│', }); }); diff --git a/packages/cli/test/setup-sources.test.ts b/packages/cli/test/setup-sources.test.ts index e4f7af2d..ef18a1b6 100644 --- a/packages/cli/test/setup-sources.test.ts +++ b/packages/cli/test/setup-sources.test.ts @@ -447,8 +447,8 @@ describe('setup sources step', () => { expect(testPrompts.select).toHaveBeenCalledWith({ message: 'Which Notion pages should KTX ingest?', options: [ - { value: 'selected_roots', label: 'Specific pages and their subpages (choose them in a picker)' }, { value: 'all_accessible', label: 'All pages the integration can access' }, + { value: 'selected_roots', label: 'Specific pages and their subpages (choose them in a picker)' }, { value: 'back', label: 'Back' }, ], }); @@ -891,8 +891,8 @@ describe('setup sources step', () => { expect(testPrompts.select).toHaveBeenCalledWith({ message: 'This repo requires authentication.', options: [ - { value: 'env', label: 'Use GITHUB_TOKEN from the environment' }, { value: 'paste', label: 'Paste a token and save it as a local secret file' }, + { value: 'env', label: 'Use GITHUB_TOKEN from the environment' }, { value: 'skip', label: 'Skip — try without authentication' }, { value: 'back', label: 'Back' }, ], @@ -1407,8 +1407,8 @@ describe('setup sources step', () => { message: 'How should KTX find your Notion integration token?', options: [ { value: 'keep', label: 'Keep existing credential' }, - { value: 'env', label: 'Use NOTION_TOKEN from the environment' }, { value: 'paste', label: 'Paste a key and save it as a local secret file' }, + { value: 'env', label: 'Use NOTION_TOKEN from the environment' }, { value: 'back', label: 'Back' }, ], }); @@ -1476,8 +1476,8 @@ describe('setup sources step', () => { message: 'How should KTX find your Metabase API key?', options: [ { value: 'keep', label: 'Keep existing credential' }, - { value: 'env', label: 'Use METABASE_API_KEY from the environment' }, { value: 'paste', label: 'Paste a key and save it as a local secret file' }, + { value: 'env', label: 'Use METABASE_API_KEY from the environment' }, { value: 'back', label: 'Back' }, ], }); @@ -1582,8 +1582,8 @@ describe('setup sources step', () => { message: 'This MetricFlow repo requires authentication.', options: [ { value: 'keep', label: 'Keep existing credential' }, - { value: 'env', label: 'Use GITHUB_TOKEN from the environment' }, { value: 'paste', label: 'Paste a token and save it as a local secret file' }, + { value: 'env', label: 'Use GITHUB_TOKEN from the environment' }, { value: 'skip', label: 'Skip — try without authentication' }, { value: 'back', label: 'Back' }, ], @@ -1627,7 +1627,7 @@ describe('setup sources step', () => { expect(testPrompts.select).toHaveBeenCalledWith({ message: '1 context source configured (dbt-main). Add another?', options: [ - { value: 'done', label: 'Done — continue to context build' }, + { value: 'done', label: 'Done adding context sources' }, { value: 'edit', label: 'Edit an existing context source' }, { value: 'add', label: 'Add another context source' }, ], diff --git a/packages/cli/test/sl.test.ts b/packages/cli/test/sl.test.ts index ff9c1489..489ea950 100644 --- a/packages/cli/test/sl.test.ts +++ b/packages/cli/test/sl.test.ts @@ -1,12 +1,19 @@ -import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { stripVTControlCharacters } from 'node:util'; import Database from 'better-sqlite3'; +import { parseKtxProjectConfig, serializeKtxProjectConfig } from '../src/context/project/config.js'; import { initKtxProject } from '../src/context/project/project.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxSl } from '../src/sl.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + const ORDERS_YAML = [ 'name: orders', 'table: public.orders', @@ -61,6 +68,7 @@ describe('runKtxSl', () => { beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-sl-')); + reportExceptionMock.mockClear(); }); afterEach(async () => { @@ -351,6 +359,12 @@ describe('runKtxSl', () => { expect(validateIo.stdout()).toBe(''); expect(validateIo.stderr()).toBe('Semantic-layer source "missing_orders" was not found\n'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'sl validate', handled: true, fatal: false }), + projectDir, + }), + ); }); it('keeps scoped validation not-found wording', async () => { @@ -552,6 +566,53 @@ joins: [] expect(stderr.write).not.toHaveBeenCalled(); }); + it('reports sl query exceptions at the query catch boundary', async () => { + vi.stubEnv('ANTHROPIC_API_KEY', 'sl-anthropic-secret'); // pragma: allowlist secret + const projectDir = join(tempDir, 'missing-query-input'); + await seedSlSource({ projectDir }); + const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + await writeFile( + join(projectDir, 'ktx.yaml'), + serializeKtxProjectConfig({ + ...config, + llm: { + ...config.llm, + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + }), + 'utf-8', + ); + const io = makeIo(); + + await expect( + runKtxSl( + { + command: 'query', + projectDir, + connectionId: 'warehouse', + format: 'json', + execute: false, + cliVersion: '0.2.0', + runtimeInstallPolicy: 'auto', + }, + io.io, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('sl query requires query input'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'sl query', handled: true, fatal: false }), + projectDir, + redactionSecrets: expect.arrayContaining(['sl-anthropic-secret']), + }), + ); + }); + it('emits debug telemetry for sl query without project paths', async () => { vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); vi.stubEnv('CI', ''); diff --git a/packages/cli/test/sql.test.ts b/packages/cli/test/sql.test.ts index ef74fd49..5e297429 100644 --- a/packages/cli/test/sql.test.ts +++ b/packages/cli/test/sql.test.ts @@ -8,6 +8,12 @@ import type { SqlAnalysisPort } from '../src/context/sql-analysis/ports.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxSql } from '../src/sql.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + function makeIo(options: { isTTY?: boolean } = {}) { let stdout = ''; let stderr = ''; @@ -76,6 +82,7 @@ describe('runKtxSql', () => { beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-sql-')); + reportExceptionMock.mockClear(); }); afterEach(async () => { @@ -236,9 +243,10 @@ describe('runKtxSql', () => { }); it('rejects non-read-only SQL before executing connector SQL', async () => { + vi.stubEnv('SQL_DB_PASSWORD', 'sql-db-password'); // pragma: allowlist secret const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir }); - await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } }); + await writeConnections(projectDir, { warehouse: { driver: 'postgres', password: 'env:SQL_DB_PASSWORD' } }); // pragma: allowlist secret const connector = makeConnector(); const io = makeIo(); @@ -265,6 +273,13 @@ describe('runKtxSql', () => { expect(connector.executeReadOnly).not.toHaveBeenCalled(); expect(connector.cleanup).not.toHaveBeenCalled(); expect(io.stderr()).toContain('SQL contains read/write operation: Delete'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'sql run', handled: true, fatal: false }), + projectDir, + redactionSecrets: expect.arrayContaining(['sql-db-password']), + }), + ); }); it('rejects missing connections', async () => { diff --git a/packages/cli/test/telemetry/events.test.ts b/packages/cli/test/telemetry/events.test.ts index 29108600..033c2def 100644 --- a/packages/cli/test/telemetry/events.test.ts +++ b/packages/cli/test/telemetry/events.test.ts @@ -37,6 +37,7 @@ describe('telemetry event schemas', () => { 'daemon_stopped', 'sl_plan_completed', 'sql_gen_completed', + 'query_history_filter_completed', ]); }); diff --git a/packages/cli/test/telemetry/exception-payload.test.ts b/packages/cli/test/telemetry/exception-payload.test.ts new file mode 100644 index 00000000..da81e62e --- /dev/null +++ b/packages/cli/test/telemetry/exception-payload.test.ts @@ -0,0 +1,150 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { createServer, type IncomingMessage } from 'node:http'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { gunzipSync } from 'node:zlib'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import type { KtxCliIo } from '../../src/cli-runtime.js'; +import { __resetTelemetryEmitterForTests } from '../../src/telemetry/emitter.js'; +import { + __resetTelemetryExceptionStateForTests, + reportException, +} from '../../src/telemetry/exception.js'; + +function makeIo(): KtxCliIo { + return { + stdout: { write: () => {} }, + stderr: { write: () => {} }, + }; +} + +async function body(req: IncomingMessage): Promise { + const chunks: Buffer[] = []; + for await (const chunk of req) { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + } + const raw = Buffer.concat(chunks); + return req.headers['content-encoding'] === 'gzip' ? gunzipSync(raw).toString('utf-8') : raw.toString('utf-8'); +} + +async function withCaptureServer(run: (url: string, payloads: unknown[]) => Promise): Promise { + const payloads: unknown[] = []; + const server = createServer(async (req, res) => { + if (req.method === 'POST') { + payloads.push(JSON.parse(await body(req))); + } + res.statusCode = 200; + res.setHeader('content-type', 'application/json'); + res.end('{}'); + }); + await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + const address = server.address(); + if (!address || typeof address === 'string') { + throw new Error('test server did not bind to a TCP port'); + } + try { + return await run(`http://127.0.0.1:${address.port}`, payloads); + } finally { + await new Promise((resolve) => server.close(() => resolve())); + } +} + +function findExceptionEvent(payloads: unknown[]): Record { + for (const payload of payloads) { + if (typeof payload !== 'object' || payload === null) { + continue; + } + const record = payload as Record; + const batch = Array.isArray(record.batch) ? record.batch : [record]; + for (const item of batch) { + if (typeof item === 'object' && item !== null && (item as Record).event === '$exception') { + return item as Record; + } + } + } + throw new Error(`No $exception payload found: ${JSON.stringify(payloads)}`); +} + +describe('prepared Node exception payload', () => { + let homeDir: string; + + beforeEach(async () => { + homeDir = await mkdtemp(join(tmpdir(), 'ktx-node-exception-payload-')); + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + join(homeDir, '.ktx', 'telemetry.json'), + `${JSON.stringify({ + installId: '00000000-0000-4000-8000-000000000000', + enabled: true, + createdAt: '2026-06-05T00:00:00.000Z', + })}\n`, + 'utf-8', + ); + vi.stubEnv('HOME', homeDir); + vi.stubEnv('CI', ''); + vi.stubEnv('KTX_TELEMETRY_DISABLED', ''); + vi.stubEnv('DO_NOT_TRACK', ''); + __resetTelemetryEmitterForTests(); + __resetTelemetryExceptionStateForTests(); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + await rm(homeDir, { recursive: true, force: true }); + }); + + it('sends projectId, omits $groups, and redacts the serialized exception list', async () => { + await withCaptureServer(async (endpoint, payloads) => { + vi.stubEnv('KTX_TELEMETRY_ENDPOINT', endpoint); + const projectDir = join(homeDir, 'project'); + const snapshotSecret = ['plain', 'secret', 'value'].join('-'); + const dbPassword = ['db', 'url', 'secret'].join('-'); + const authToken = ['abc', '123'].join(''); + const error = new Error( + `${snapshotSecret} postgres://svc:${dbPassword}@db.example.test/analytics Authorization: Basic ${authToken}`, + ); + + await reportException({ + error, + context: { source: 'scan run', handled: true, fatal: false }, + io: makeIo(), + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + projectDir, + immediate: true, + redactionSecrets: [snapshotSecret], + }); + + const event = findExceptionEvent(payloads); + const properties = event.properties as Record; + expect(properties.projectId).toMatch(/^[a-f0-9]{64}$/); + expect(properties.$groups).toBeUndefined(); + expect(JSON.stringify(properties.$exception_list)).toContain('[redacted]'); + expect(JSON.stringify(properties.$exception_list)).not.toContain(snapshotSecret); + expect(JSON.stringify(properties.$exception_list)).not.toContain(dbPassword); + expect(JSON.stringify(properties.$exception_list)).not.toContain(authToken); + for (const key of [ + 'argv', + 'args', + 'env', + 'environment', + 'sql', + 'query', + 'prompt', + 'mcpArguments', + 'tableName', + 'schemaName', + 'columnName', + 'databaseUrl', + 'connectionString', + 'url', + 'password', + 'token', + 'apiKey', + 'authorization', + ]) { + expect(properties).not.toHaveProperty(key); + } + }); + }); +}); diff --git a/packages/cli/test/telemetry/exception.test.ts b/packages/cli/test/telemetry/exception.test.ts new file mode 100644 index 00000000..01608935 --- /dev/null +++ b/packages/cli/test/telemetry/exception.test.ts @@ -0,0 +1,456 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import type { KtxCliIo } from '../../src/cli-runtime.js'; +import { __resetTelemetryEmitterForTests } from '../../src/telemetry/emitter.js'; +import { + __resetTelemetryExceptionStateForTests, + reportException, +} from '../../src/telemetry/exception.js'; + +const captures: unknown[] = []; +const immediateCaptures: unknown[] = []; +const shutdown = vi.fn(async () => {}); + +vi.mock('posthog-node', () => ({ + PostHog: vi.fn(function PostHog() { + return { + captureException: ( + error: unknown, + distinctId?: string, + properties?: Record, + ) => { + captures.push({ error, distinctId, properties }); + }, + captureExceptionImmediate: async ( + error: unknown, + distinctId?: string, + properties?: Record, + ) => { + immediateCaptures.push({ error, distinctId, properties }); + }, + capture: vi.fn(), + shutdown, + }; + }), +})); + +function makeIo(): { io: KtxCliIo; stderr: () => string } { + let stderr = ''; + return { + io: { + stdout: { write: () => {} }, + stderr: { + write: (chunk) => { + stderr += chunk; + }, + }, + }, + stderr: () => stderr, + }; +} + +async function writeIdentity(homeDir: string, enabled = true): Promise { + const path = join(homeDir, '.ktx', 'telemetry.json'); + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + path, + `${JSON.stringify({ + installId: '00000000-0000-4000-8000-000000000000', + enabled, + createdAt: '2026-06-05T00:00:00.000Z', + })}\n`, + 'utf-8', + ); +} + +describe('reportException', () => { + let homeDir: string; + + beforeEach(async () => { + homeDir = await mkdtemp(join(tmpdir(), 'ktx-exception-')); + await writeIdentity(homeDir); + vi.stubEnv('HOME', homeDir); + vi.stubEnv('CI', ''); + vi.stubEnv('KTX_TELEMETRY_DISABLED', ''); + vi.stubEnv('DO_NOT_TRACK', ''); + captures.length = 0; + immediateCaptures.length = 0; + shutdown.mockClear(); + __resetTelemetryEmitterForTests(); + __resetTelemetryExceptionStateForTests(); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + await rm(homeDir, { recursive: true, force: true }); + }); + + it('honors telemetry kill switches', async () => { + vi.stubEnv('KTX_TELEMETRY_DISABLED', '1'); + const { io } = makeIo(); + + await reportException({ + error: new Error('boom'), + context: { source: 'scan run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + projectDir: join(homeDir, 'project'), + }); + + expect(captures).toEqual([]); + expect(immediateCaptures).toEqual([]); + }); + + it('prints debug payloads without sending', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('KTX_TELEMETRY_DISABLED', '1'); + const { io, stderr } = makeIo(); + + await reportException({ + error: new Error('debug boom'), + context: { source: 'scan run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + projectDir: join(homeDir, 'project'), + }); + + expect(stderr()).toContain('[telemetry-exception]'); + expect(stderr()).toContain('"source":"scan run"'); + expect(captures).toEqual([]); + }); + + it('sends projectId as a property and omits $groups for Node exceptions', async () => { + const { io } = makeIo(); + + await reportException({ + error: new Error('project boom'), + context: { source: 'sql run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + projectDir: join(homeDir, 'project'), + }); + + expect(captures).toHaveLength(1); + expect(captures[0]).toMatchObject({ + distinctId: '00000000-0000-4000-8000-000000000000', + properties: { + source: 'sql run', + handled: true, + fatal: false, + cliVersion: '0.0.0-test', + runtime: 'node', + }, + }); + expect( + (captures[0] as { properties: Record }).properties.projectId, + ).toMatch(/^[a-f0-9]{64}$/); + expect((captures[0] as { properties: Record }).properties.$groups).toBeUndefined(); + }); + + it('uses captureExceptionImmediate for fatal reports', async () => { + const { io } = makeIo(); + + await reportException({ + error: new Error('fatal boom'), + context: { source: 'uncaughtException', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + immediate: true, + }); + + expect(immediateCaptures).toHaveLength(1); + expect(captures).toEqual([]); + }); + + it('redacts snapshot secrets and static credential patterns from message and cause', async () => { + const { io } = makeIo(); + const cause = new Error('cause has sk-live-fixture-value and Authorization: Bearer token-123'); + const error = new Error('message has sk-live-fixture-value and password=hunter2', { cause }); + + await reportException({ + error, + context: { source: 'connection test', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + redactionSecrets: ['sk-live-fixture-value'], + }); + + const sent = captures[0] as { error: Error & { cause?: Error } }; + expect(sent.error.message).toContain('[redacted]'); + expect(sent.error.message).not.toContain('sk-live-fixture-value'); + expect(sent.error.message).not.toContain('hunter2'); + expect(sent.error.cause?.message).not.toContain('token-123'); + }); + + it('redacts URL userinfo credentials and non-bearer authorization values', async () => { + const { io } = makeIo(); + const error = new Error( + 'connect postgres://svc:db-url-secret@db.example.test/analytics Authorization: Basic abc123', // pragma: allowlist secret + ); + + await reportException({ + error, + context: { source: 'connection test', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + + const sent = captures[0] as { error: Error }; + expect(sent.error.message).toContain('postgres://svc:[redacted]@db.example.test/analytics'); + expect(sent.error.message).toContain('Authorization: [redacted]'); + expect(sent.error.message).not.toContain('db-url-secret'); + expect(sent.error.message).not.toContain('abc123'); + }); + + it('does not use process-global secret discovery when no snapshot is supplied', async () => { + vi.stubEnv('KTX_FAKE_SECRET', 'plain-secret-without-pattern'); + const { io } = makeIo(); + + await reportException({ + error: new Error('plain-secret-without-pattern'), + context: { source: 'uncaughtException', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + + const sent = captures[0] as { error: Error }; + expect(sent.error.message).toContain('plain-secret-without-pattern'); + }); + + it('dedupes the same Error instance between operation and global tiers', async () => { + const { io } = makeIo(); + const error = new Error('same object'); + + await reportException({ + error, + context: { source: 'scan run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + await reportException({ + error, + context: { source: 'uncaughtException', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + immediate: true, + }); + + expect(captures).toHaveLength(1); + expect(immediateCaptures).toHaveLength(0); + }); + + it('captures wrapped Error causes as distinct logical occurrences', async () => { + const { io } = makeIo(); + const inner = new Error('inner'); + const wrapper = new Error('outer', { cause: inner }); + + await reportException({ + error: inner, + context: { source: 'sl query', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + await reportException({ + error: wrapper, + context: { source: 'uncaughtException', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + immediate: true, + }); + + expect(captures).toHaveLength(1); + expect(immediateCaptures).toHaveLength(1); + }); + + it('dedupes primitive and plain-object throwables propagated to the global tier', async () => { + const { io } = makeIo(); + const objectThrowable = { message: 'plain object' }; + + await reportException({ + error: 'primitive boom', + context: { source: 'mcp:sql_execution', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + await reportException({ + error: 'primitive boom', + context: { source: 'unhandledRejection', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + immediate: true, + }); + await reportException({ + error: objectThrowable, + context: { source: 'mcp:discover_data', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + await reportException({ + error: objectThrowable, + context: { source: 'unhandledRejection', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + immediate: true, + }); + + expect(captures).toHaveLength(2); + expect(immediateCaptures).toHaveLength(0); + }); + + it('does not collapse independent primitive throw events with the same value', async () => { + const { io } = makeIo(); + + await reportException({ + error: 'oops', + context: { source: 'scan run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + await reportException({ + error: 'oops', + context: { source: 'sql run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + + expect(captures).toHaveLength(2); + }); + + it('drops forbidden caller-supplied extra property keys', async () => { + const { io } = makeIo(); + + await reportException({ + error: new Error('extra property boom'), + context: { + source: 'sql run', + handled: true, + fatal: false, + extra: { + sql: 'select * from private_table', + tableName: 'private_table', + schemaName: 'private_schema', + columnName: 'private_column', + argv: '--password secret', + env: 'KTX_TOKEN=secret', + password: 'secret-password', // pragma: allowlist secret + token: 'secret-token', + prompt: 'user prompt', + safeCount: 3, + }, + }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + + const sent = captures[0] as { properties: Record }; + expect(sent.properties.safeCount).toBe(3); + for (const key of [ + 'sql', + 'tableName', + 'schemaName', + 'columnName', + 'argv', + 'env', + 'password', + 'token', + 'prompt', + ]) { + expect(sent.properties).not.toHaveProperty(key); + } + }); + + it('redacts every required static credential pattern and leaves benign text intact', async () => { + const { io } = makeIo(); + const cases: Array<{ message: string; leaked: string; expected: string }> = [ + { + message: 'dsn password=hunter2', + leaked: 'hunter2', + expected: 'password=[redacted]', + }, + { + message: 'dsn pwd=swordfish', + leaked: 'swordfish', + expected: 'pwd=[redacted]', + }, + { + message: 'Authorization: Basic abc123', + leaked: 'abc123', + expected: 'Authorization: [redacted]', + }, + { + message: 'Authorization: Bearer token-123', + leaked: 'token-123', + expected: 'Authorization: [redacted]', + }, + { + message: 'Bearer standalone-token', + leaked: 'standalone-token', + expected: 'Bearer [redacted]', + }, + { + message: 'api_key=sk-live-secret', + leaked: 'sk-live-secret', + expected: 'api_key=[redacted]', + }, + { + message: 'api-key: sk-dash-secret', + leaked: 'sk-dash-secret', + expected: 'api-key=[redacted]', + }, + { + message: 'KTX_PROVIDER_TOKEN=ktx-secret', + leaked: 'ktx-secret', + expected: 'KTX_PROVIDER_TOKEN=[redacted]', + }, + { + message: 'REFRESH_SECRET: refresh-secret', + leaked: 'refresh-secret', + expected: 'REFRESH_SECRET=[redacted]', + }, + { + message: 'https://s3.example.test/file?X-Amz-Signature=aws-secret&ok=1', + leaked: 'aws-secret', + expected: 'X-Amz-Signature=[redacted]', + }, + { + message: 'https://storage.example.test/file?X-Goog-Signature=goog-secret&ok=1', + leaked: 'goog-secret', + expected: 'X-Goog-Signature=[redacted]', + }, + { + message: 'https://cdn.example.test/file?sig=signed-secret&ok=1', + leaked: 'signed-secret', + expected: 'sig=[redacted]', + }, + { + message: 'postgres://svc:url-password@db.example.test/analytics', // pragma: allowlist secret + leaked: 'url-password', + expected: 'postgres://svc:[redacted]@db.example.test/analytics', + }, + ]; + + for (const item of cases) { + await reportException({ + error: new Error(item.message), + context: { source: 'connection test', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + const sent = captures[captures.length - 1] as { error: Error }; + expect(sent.error.message).toContain(item.expected); + expect(sent.error.message).not.toContain(item.leaked); + } + + await reportException({ + error: new Error('token bucket metrics and passwordless auth are benign'), + context: { source: 'connection test', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + const benign = captures[captures.length - 1] as { error: Error }; + expect(benign.error.message).toBe('token bucket metrics and passwordless auth are benign'); + }); +}); diff --git a/packages/cli/test/telemetry/index.test.ts b/packages/cli/test/telemetry/index.test.ts index 7e88410f..3531116a 100644 --- a/packages/cli/test/telemetry/index.test.ts +++ b/packages/cli/test/telemetry/index.test.ts @@ -3,7 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import type { KtxCliIo } from '../../src/cli-runtime.js'; +import { createGlobalExceptionReporter, type KtxCliIo } from '../../src/cli-runtime.js'; import { beginCommandSpan, emitAbortedCommandAndShutdown, emitTelemetryEvent } from '../../src/telemetry/index.js'; import { resetCommandSpan } from '../../src/telemetry/command-hook.js'; @@ -120,3 +120,36 @@ describe('emitAbortedCommandAndShutdown', () => { expect(secondIo.stderr()).not.toContain('"event":"command"'); }); }); + +describe('global exception reporting contract', () => { + let homeDir: string; + + beforeEach(async () => { + homeDir = await mkdtemp(join(tmpdir(), 'ktx-telemetry-global-exception-')); + vi.stubEnv('HOME', homeDir); + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('KTX_TELEMETRY_DISABLED', '1'); + vi.stubEnv('DO_NOT_TRACK', ''); + vi.stubEnv('CI', ''); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + await rm(homeDir, { recursive: true, force: true }); + }); + + it('reports uncaughtException through the fatal debug payload', async () => { + const testIo = makeIo(); + const report = createGlobalExceptionReporter(testIo.io, { + name: '@kaelio/ktx', + version: '0.0.0-test', + }); + + await report('uncaughtException', new Error('global boom')); + + expect(testIo.stderr()).toContain('[telemetry-exception]'); + expect(testIo.stderr()).toContain('"source":"uncaughtException"'); + expect(testIo.stderr()).toContain('"handled":false'); + expect(testIo.stderr()).toContain('"fatal":true'); + }); +}); diff --git a/packages/cli/test/telemetry/project-snapshot.test.ts b/packages/cli/test/telemetry/project-snapshot.test.ts index 973ffb08..ce58f40e 100644 --- a/packages/cli/test/telemetry/project-snapshot.test.ts +++ b/packages/cli/test/telemetry/project-snapshot.test.ts @@ -34,6 +34,17 @@ describe('buildProjectStackSnapshotFields', () => { adapters: [], embeddings: { backend: 'sentence-transformers', dimensions: 384 }, workUnits: { stepBudget: 40, maxConcurrency: 1, failureMode: 'continue' }, + rateLimit: { + enabled: true, + throttleThreshold: 0.8, + minConcurrencyUnderPressure: 1, + retry: { + maxAttempts: 6, + baseDelayMs: 1_000, + maxDelayMs: 60_000, + jitter: true, + }, + }, profile: false, }, llm: { provider: { backend: 'none' }, models: {}, promptCaching: {} }, diff --git a/packages/cli/test/telemetry/redaction-secrets.test.ts b/packages/cli/test/telemetry/redaction-secrets.test.ts new file mode 100644 index 00000000..cdc15f22 --- /dev/null +++ b/packages/cli/test/telemetry/redaction-secrets.test.ts @@ -0,0 +1,127 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { parseKtxProjectConfig, serializeKtxProjectConfig } from '../../src/context/project/config.js'; +import { initKtxProject } from '../../src/context/project/project.js'; +import { collectTelemetryRedactionSecrets } from '../../src/telemetry/redaction-secrets.js'; + +describe('collectTelemetryRedactionSecrets', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-redaction-secrets-')); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + await rm(tempDir, { recursive: true, force: true }); + }); + + async function writeConfig(projectDir: string): Promise { + const configPath = join(projectDir, 'ktx.yaml'); + const config = parseKtxProjectConfig(await readFile(configPath, 'utf-8')); + await writeFile( + configPath, + serializeKtxProjectConfig({ + ...config, + llm: { + ...config.llm, + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + ingest: { + ...config.ingest, + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + openai: { api_key: 'file:~/.ktx/secrets/openai-key' }, // pragma: allowlist secret + }, + }, + scan: { + ...config.scan, + enrichment: { + ...config.scan.enrichment, + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + openai: { api_key: 'env:SCAN_OPENAI_API_KEY' }, // pragma: allowlist secret + }, + }, + }, + connections: { + warehouse: { + driver: 'postgres', + url: 'env:DATABASE_URL', + password: 'file:~/.ktx/secrets/db-password', // pragma: allowlist secret + }, + docs: { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', // pragma: allowlist secret + }, + }, + }), + 'utf-8', + ); + } + + it('derives only declared project secrets and parsed URL credentials', async () => { + const homeDir = join(tempDir, 'home'); + const projectDir = join(tempDir, 'project'); + await mkdir(join(homeDir, '.ktx', 'secrets'), { recursive: true }); + await writeFile(join(homeDir, '.ktx', 'secrets', 'openai-key'), 'openai-file-secret\n', 'utf-8'); + await writeFile(join(homeDir, '.ktx', 'secrets', 'db-password'), 'db-file-password\n', 'utf-8'); + vi.stubEnv('HOME', homeDir); + vi.stubEnv('ANTHROPIC_API_KEY', 'anthropic-env-secret'); + vi.stubEnv('SCAN_OPENAI_API_KEY', 'scan-openai-env-secret'); + vi.stubEnv('DATABASE_URL', 'postgres://svc:db-url-password@db.example.test/analytics'); // pragma: allowlist secret + vi.stubEnv('NOTION_TOKEN', 'notion-env-secret'); + vi.stubEnv('UNDECLARED_SECRET', 'must-not-appear'); + await initKtxProject({ projectDir }); + await writeConfig(projectDir); + + const secrets = await collectTelemetryRedactionSecrets({ + projectDir, + connectionId: 'warehouse', + includeLlm: true, + includeEmbeddings: true, + env: process.env, + }); + + expect(secrets).toEqual( + expect.arrayContaining([ + 'anthropic-env-secret', + 'openai-file-secret', + 'scan-openai-env-secret', + 'postgres://svc:db-url-password@db.example.test/analytics', // pragma: allowlist secret + 'db-url-password', + 'db-file-password', + ]), + ); + expect(secrets).not.toContain('notion-env-secret'); + expect(secrets).not.toContain('must-not-appear'); + }); + + it('can derive a named non-database connection secret', async () => { + const projectDir = join(tempDir, 'project'); + vi.stubEnv('NOTION_TOKEN', 'notion-env-secret'); + await initKtxProject({ projectDir }); + await writeConfig(projectDir); + + const secrets = await collectTelemetryRedactionSecrets({ + projectDir, + connectionId: 'docs', + includeLlm: false, + includeEmbeddings: false, + env: process.env, + }); + + expect(secrets).toEqual(['notion-env-secret']); + }); +}); diff --git a/packages/cli/test/update-check/cache.test.ts b/packages/cli/test/update-check/cache.test.ts new file mode 100644 index 00000000..446a62be --- /dev/null +++ b/packages/cli/test/update-check/cache.test.ts @@ -0,0 +1,95 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { + readUpdateCheckCache, + updateCheckCachePath, + writeUpdateCheckCache, +} from '../../src/update-check/cache.js'; + +describe('update-check cache', () => { + let homeDir: string; + + beforeEach(async () => { + homeDir = await mkdtemp(join(tmpdir(), 'ktx-update-check-cache-')); + }); + + afterEach(async () => { + await rm(homeDir, { recursive: true, force: true }); + }); + + it('uses ~/.ktx/update-check.json', () => { + expect(updateCheckCachePath(homeDir)).toBe(join(homeDir, '.ktx', 'update-check.json')); + }); + + it('round-trips strict cache data', async () => { + await writeUpdateCheckCache( + { + checkedAt: '2026-06-06T10:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + lastNoticeAt: '2026-06-06T11:00:00.000Z', + }, + { homeDir }, + ); + + await expect(readUpdateCheckCache({ homeDir })).resolves.toEqual({ + checkedAt: '2026-06-06T10:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + lastNoticeAt: '2026-06-06T11:00:00.000Z', + }); + }); + + it('returns null when the cache file is missing', async () => { + await expect(readUpdateCheckCache({ homeDir })).resolves.toBeNull(); + }); + + it('returns null when the cache file is corrupt JSON', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile(updateCheckCachePath(homeDir), '{bad json', 'utf-8'); + + await expect(readUpdateCheckCache({ homeDir })).resolves.toBeNull(); + }); + + it('returns null when the cache has unknown fields', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + updateCheckCachePath(homeDir), + JSON.stringify( + { + checkedAt: '2026-06-06T10:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + unexpected: true, + }, + null, + 2, + ), + 'utf-8', + ); + + await expect(readUpdateCheckCache({ homeDir })).resolves.toBeNull(); + }); + + it('writes formatted JSON with a trailing newline', async () => { + await writeUpdateCheckCache( + { + checkedAt: '2026-06-06T10:00:00.000Z', + channel: 'next', + installedVersion: '0.10.0-rc.1', + latestForChannel: '0.10.0-rc.2', + }, + { homeDir }, + ); + + const raw = await readFile(updateCheckCachePath(homeDir), 'utf-8'); + expect(raw).toContain('"channel": "next"'); + expect(raw.endsWith('\n')).toBe(true); + }); +}); diff --git a/packages/cli/test/update-check/channel.test.ts b/packages/cli/test/update-check/channel.test.ts new file mode 100644 index 00000000..f7b4a1e6 --- /dev/null +++ b/packages/cli/test/update-check/channel.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it } from 'vitest'; + +import { decideUpdate, inferUpdateChannel } from '../../src/update-check/channel.js'; + +describe('inferUpdateChannel', () => { + it.each([ + ['0.9.0', 'latest'], + ['0.10.0-rc.3', 'next'], + ['0.10.0-myfeat.2', null], + ['0.0.0', null], + ['not-a-version', null], + ])('maps %s to %s', (installed, expected) => { + expect(inferUpdateChannel(installed)).toBe(expected); + }); +}); + +describe('decideUpdate', () => { + it.each([ + [ + 'stable behind', + '0.9.0', + { latest: '0.10.0', next: '0.11.0-rc.1' }, + { status: 'available', channel: 'latest', target: '0.10.0' }, + ], + [ + 'stable equal', + '0.10.0', + { latest: '0.10.0', next: '0.11.0-rc.1' }, + { status: 'upToDate', channel: 'latest', target: '0.10.0' }, + ], + [ + 'stable ahead', + '0.11.0', + { latest: '0.10.0', next: '0.11.0-rc.1' }, + { status: 'upToDate', channel: 'latest', target: '0.10.0' }, + ], + [ + 'rc behind', + '0.11.0-rc.1', + { latest: '0.10.0', next: '0.11.0-rc.2' }, + { status: 'available', channel: 'next', target: '0.11.0-rc.2' }, + ], + [ + 'rc equal', + '0.11.0-rc.2', + { latest: '0.10.0', next: '0.11.0-rc.2' }, + { status: 'upToDate', channel: 'next', target: '0.11.0-rc.2' }, + ], + ['branch prerelease', '0.11.0-myfeat.1', { latest: '0.10.0', next: '0.11.0-rc.2' }, { status: 'skip' }], + ['missing channel tag', '0.9.0', { next: '0.11.0-rc.2' }, { status: 'skip' }], + ['invalid installed version', 'bad', { latest: '0.10.0' }, { status: 'skip' }], + ['invalid target version', '0.9.0', { latest: 'bad' }, { status: 'skip' }], + ['local development version', '0.0.0', { latest: '0.10.0' }, { status: 'skip' }], + ])('%s', (_name, installed, distTags, expected) => { + expect(decideUpdate(installed, distTags)).toEqual(expected); + }); +}); diff --git a/packages/cli/test/update-check/cli-program.test.ts b/packages/cli/test/update-check/cli-program.test.ts new file mode 100644 index 00000000..78116f97 --- /dev/null +++ b/packages/cli/test/update-check/cli-program.test.ts @@ -0,0 +1,152 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { buildKtxProgram } from '../../src/cli-program.js'; +import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from '../../src/cli-runtime.js'; +import { updateCheckCachePath } from '../../src/update-check/cache.js'; + +function makeIo(stdoutIsTTY = true): { io: KtxCliIo; stdout: () => string; stderr: () => string } { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: stdoutIsTTY, + write: (chunk) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +describe('cli-program update check hooks', () => { + let projectDir: string; + let homeDir: string; + const info: KtxCliPackageInfo = { name: '@kaelio/ktx', version: '0.9.0' }; + + beforeEach(async () => { + projectDir = await mkdtemp(join(tmpdir(), 'ktx-update-project-')); + homeDir = await mkdtemp(join(tmpdir(), 'ktx-update-home-')); + await writeFile(join(projectDir, 'ktx.yaml'), '{}\n', 'utf-8'); + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + vi.stubEnv('KTX_TELEMETRY_DISABLED', '1'); + vi.stubEnv('CI', ''); + vi.stubEnv('DO_NOT_TRACK', ''); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + await rm(projectDir, { recursive: true, force: true }); + await rm(homeDir, { recursive: true, force: true }); + }); + + it('prints a stale-cache notice without awaiting the background refresh', async () => { + await writeFile( + updateCheckCachePath(homeDir), + JSON.stringify( + { + checkedAt: '2026-06-05T10:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + }, + null, + 2, + ), + 'utf-8', + ); + const io = makeIo(true); + const deps: KtxCliDeps = { doctor: async () => 0 }; + const fetchDistTags = vi.fn( + () => + new Promise>(() => { + return; + }), + ); + const program = buildKtxProgram({ + io: io.io, + deps, + packageInfo: info, + runInit: async () => 0, + updateCheck: { + env: { NO_COLOR: '1' }, + fetchDistTags, + homeDir, + now: () => new Date('2026-06-06T12:00:00.000Z'), + }, + }); + + await program.parseAsync(['--project-dir', projectDir, 'status'], { from: 'user' }); + + expect(fetchDistTags).toHaveBeenCalledTimes(1); + expect(io.stderr()).toContain('↑ Update available: ktx 0.9.0 → 0.10.0\n npm i -g @kaelio/ktx\n'); + }); + + it('prints a queued fresh-cache notice after the action', async () => { + await writeFile( + updateCheckCachePath(homeDir), + JSON.stringify( + { + checkedAt: '2026-06-06T11:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + }, + null, + 2, + ), + 'utf-8', + ); + const io = makeIo(true); + const fetchDistTags = vi.fn(async () => ({ latest: '0.10.0' })); + const program = buildKtxProgram({ + io: io.io, + deps: { doctor: async () => 0 }, + packageInfo: info, + runInit: async () => 0, + updateCheck: { + env: { NO_COLOR: '1' }, + fetchDistTags, + homeDir, + now: () => new Date('2026-06-06T12:00:00.000Z'), + }, + }); + + await program.parseAsync(['--project-dir', projectDir, 'status'], { from: 'user' }); + + expect(fetchDistTags).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('↑ Update available: ktx 0.9.0 → 0.10.0\n npm i -g @kaelio/ktx\n'); + }); + + it('does not run update checks for the hidden completion command', async () => { + const io = makeIo(true); + const fetchDistTags = vi.fn(async () => ({ latest: '0.10.0' })); + const program = buildKtxProgram({ + io: io.io, + deps: {}, + packageInfo: info, + runInit: async () => 0, + updateCheck: { + env: { NO_COLOR: '1' }, + fetchDistTags, + homeDir, + now: () => new Date('2026-06-06T12:00:00.000Z'), + }, + }); + + await program.parseAsync(['__complete', '--', 'ktx', 'co'], { from: 'user' }); + + expect(fetchDistTags).not.toHaveBeenCalled(); + expect(io.stderr()).not.toContain('Update available'); + }); +}); diff --git a/packages/cli/test/update-check/registry.test.ts b/packages/cli/test/update-check/registry.test.ts new file mode 100644 index 00000000..a83d360d --- /dev/null +++ b/packages/cli/test/update-check/registry.test.ts @@ -0,0 +1,80 @@ +import { EventEmitter } from 'node:events'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const requestMock = vi.hoisted(() => vi.fn()); + +vi.mock('node:https', () => ({ + request: requestMock, +})); + +type MockResponse = EventEmitter & { statusCode?: number }; +type MockRequest = EventEmitter & { + destroy: ReturnType; + end: () => void; + setTimeout: ReturnType; +}; + +function mockHttpsResponse(statusCode: number, body: string): { socket: { unref: ReturnType } } { + const socket = { unref: vi.fn() }; + requestMock.mockImplementation((_url: unknown, _options: unknown, callback: (response: MockResponse) => void) => { + const request = new EventEmitter() as MockRequest; + request.destroy = vi.fn(); + request.setTimeout = vi.fn(); + request.end = () => { + request.emit('socket', socket); + const response = new EventEmitter() as MockResponse; + response.statusCode = statusCode; + callback(response); + response.emit('data', Buffer.from(body)); + response.emit('end'); + }; + return request; + }); + return { socket }; +} + +describe('fetchDistTags', () => { + beforeEach(() => { + requestMock.mockReset(); + }); + + it('fetches @kaelio/ktx npm dist-tags and unrefs the socket', async () => { + const { socket } = mockHttpsResponse(200, JSON.stringify({ latest: '0.10.0', next: '0.11.0-rc.1' })); + const { fetchDistTags } = await import('../../src/update-check/registry.js'); + + await expect(fetchDistTags()).resolves.toEqual({ latest: '0.10.0', next: '0.11.0-rc.1' }); + + expect(requestMock).toHaveBeenCalledWith( + expect.any(URL), + expect.objectContaining({ + method: 'GET', + headers: expect.objectContaining({ accept: 'application/json' }), + }), + expect.any(Function), + ); + const [url] = requestMock.mock.calls[0] as [URL]; + expect(url.toString()).toBe('https://registry.npmjs.org/-/package/@kaelio/ktx/dist-tags'); + expect(socket.unref).toHaveBeenCalledTimes(1); + }); + + it('rejects non-2xx responses', async () => { + mockHttpsResponse(503, 'registry unavailable'); + const { fetchDistTags } = await import('../../src/update-check/registry.js'); + + await expect(fetchDistTags()).rejects.toThrow('npm dist-tags request failed with 503'); + }); + + it('rejects invalid JSON payloads', async () => { + mockHttpsResponse(200, '{bad json'); + const { fetchDistTags } = await import('../../src/update-check/registry.js'); + + await expect(fetchDistTags()).rejects.toThrow(); + }); + + it('rejects payloads that are not string dist-tag maps', async () => { + mockHttpsResponse(200, JSON.stringify({ latest: 123 })); + const { fetchDistTags } = await import('../../src/update-check/registry.js'); + + await expect(fetchDistTags()).rejects.toThrow(); + }); +}); diff --git a/packages/cli/test/update-check/update-check.test.ts b/packages/cli/test/update-check/update-check.test.ts new file mode 100644 index 00000000..a19b35bf --- /dev/null +++ b/packages/cli/test/update-check/update-check.test.ts @@ -0,0 +1,332 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { updateCheckCachePath } from '../../src/update-check/cache.js'; +import { + prepareUpdateCheckNotice, + renderUpdateNotice, + shouldSuppressUpdateCheck, +} from '../../src/update-check/update-check.js'; + +function makeIo(stdoutIsTTY = true) { + let stderr = ''; + return { + io: { + stdout: { + isTTY: stdoutIsTTY, + write: () => {}, + }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stderr: () => stderr, + }; +} + +async function flushAsyncWork(): Promise { + await new Promise((resolve) => { + setImmediate(resolve); + }); + await new Promise((resolve) => { + setImmediate(resolve); + }); +} + +describe('update-check orchestration', () => { + let homeDir: string; + + beforeEach(async () => { + homeDir = await mkdtemp(join(tmpdir(), 'ktx-update-check-')); + }); + + afterEach(async () => { + await rm(homeDir, { recursive: true, force: true }); + }); + + it.each([ + ['json option', true, {}, { json: true }], + ['json output option', true, {}, { output: 'json' }], + ['json format option', true, {}, { format: 'json' }], + ['CI', true, { CI: '1' }, {}], + ['non-TTY stdout', false, {}, {}], + ['KTX_NO_UPDATE_CHECK', true, { KTX_NO_UPDATE_CHECK: '1' }, {}], + ['NO_UPDATE_NOTIFIER', true, { NO_UPDATE_NOTIFIER: '1' }, {}], + ['DO_NOT_TRACK', true, { DO_NOT_TRACK: '1' }, {}], + ])('suppresses cache and network work for %s', async (_name, stdoutIsTTY, env, commandOptions) => { + const fetchDistTags = vi.fn(async () => ({ latest: '0.10.0' })); + + const result = await prepareUpdateCheckNotice({ + io: makeIo(stdoutIsTTY).io, + env, + homeDir, + installedVersion: '0.9.0', + commandOptions, + now: () => new Date('2026-06-06T12:00:00.000Z'), + fetchDistTags, + }); + + expect(result.notice).toBeNull(); + expect(fetchDistTags).not.toHaveBeenCalled(); + await expect(readFile(updateCheckCachePath(homeDir), 'utf-8')).rejects.toThrow(); + }); + + it.each([ + ['CI', true, { CI: '1', KTX_OUTPUT: 'pretty' }], + ['non-TTY stdout', false, { KTX_OUTPUT: 'pretty' }], + ])('suppresses cache and network work for %s even when pretty output is forced', async (_name, stdoutIsTTY, env) => { + const fetchDistTags = vi.fn(async () => ({ latest: '0.10.0' })); + + const result = await prepareUpdateCheckNotice({ + io: makeIo(stdoutIsTTY).io, + env, + homeDir, + installedVersion: '0.9.0', + commandOptions: {}, + now: () => new Date('2026-06-06T12:00:00.000Z'), + fetchDistTags, + }); + + expect(result.notice).toBeNull(); + expect(fetchDistTags).not.toHaveBeenCalled(); + await expect(readFile(updateCheckCachePath(homeDir), 'utf-8')).rejects.toThrow(); + }); + + it('does not suppress when only KTX_TELEMETRY_DISABLED is set', () => { + expect( + shouldSuppressUpdateCheck({ + io: makeIo(true).io, + env: { KTX_TELEMETRY_DISABLED: '1' } as NodeJS.ProcessEnv, + commandOptions: {}, + }), + ).toBe(false); + }); + + it('renders a compact no-color stable notice', () => { + expect( + renderUpdateNotice({ + installedVersion: '0.9.0', + targetVersion: '0.10.0', + channel: 'latest', + env: { NO_COLOR: '1' }, + }), + ).toBe('↑ Update available: ktx 0.9.0 → 0.10.0\n npm i -g @kaelio/ktx\n'); + }); + + it('renders the next-channel install command', () => { + expect( + renderUpdateNotice({ + installedVersion: '0.10.0-rc.1', + targetVersion: '0.10.0-rc.2', + channel: 'next', + env: { NO_COLOR: '1' }, + }), + ).toBe('↑ Update available: ktx 0.10.0-rc.1 → 0.10.0-rc.2\n npm i -g @kaelio/ktx@next\n'); + }); + + it('queues a cached notice and stamps lastNoticeAt', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + updateCheckCachePath(homeDir), + JSON.stringify( + { + checkedAt: '2026-06-06T11:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + }, + null, + 2, + ), + 'utf-8', + ); + const fetchDistTags = vi.fn(async () => ({ latest: '0.10.0' })); + + const result = await prepareUpdateCheckNotice({ + io: makeIo(true).io, + env: { NO_COLOR: '1' }, + homeDir, + installedVersion: '0.9.0', + now: () => new Date('2026-06-06T12:00:00.000Z'), + fetchDistTags, + }); + + expect(result.notice).toBe('↑ Update available: ktx 0.9.0 → 0.10.0\n npm i -g @kaelio/ktx\n'); + expect(fetchDistTags).not.toHaveBeenCalled(); + const stored = JSON.parse(await readFile(updateCheckCachePath(homeDir), 'utf-8')) as { lastNoticeAt?: string }; + expect(stored.lastNoticeAt).toBe('2026-06-06T12:00:00.000Z'); + }); + + it('queues a stale cached notice and still refreshes in the background', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + updateCheckCachePath(homeDir), + JSON.stringify( + { + checkedAt: '2026-06-05T10:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + lastNoticeAt: '2026-06-05T11:00:00.000Z', + }, + null, + 2, + ), + 'utf-8', + ); + const fetchDistTags = vi.fn(async () => ({ latest: '0.11.0' })); + + const result = await prepareUpdateCheckNotice({ + io: makeIo(true).io, + env: { NO_COLOR: '1' }, + homeDir, + installedVersion: '0.9.0', + now: () => new Date('2026-06-06T12:00:00.000Z'), + fetchDistTags, + }); + + expect(result.notice).toBe('↑ Update available: ktx 0.9.0 → 0.10.0\n npm i -g @kaelio/ktx\n'); + expect(fetchDistTags).toHaveBeenCalledTimes(1); + + await flushAsyncWork(); + await vi.waitFor(async () => { + const stored = JSON.parse(await readFile(updateCheckCachePath(homeDir), 'utf-8')) as { + latestForChannel: string; + lastNoticeAt?: string; + }; + expect(stored.latestForChannel).toBe('0.11.0'); + expect(stored.lastNoticeAt).toBe('2026-06-06T12:00:00.000Z'); + }); + }); + + it('throttles a cached notice for 24 hours', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + updateCheckCachePath(homeDir), + JSON.stringify( + { + checkedAt: '2026-06-06T11:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + lastNoticeAt: '2026-06-06T11:30:00.000Z', + }, + null, + 2, + ), + 'utf-8', + ); + + await expect( + prepareUpdateCheckNotice({ + io: makeIo(true).io, + env: { NO_COLOR: '1' }, + homeDir, + installedVersion: '0.9.0', + now: () => new Date('2026-06-06T12:00:00.000Z'), + fetchDistTags: vi.fn(async () => ({ latest: '0.10.0' })), + }), + ).resolves.toEqual({ notice: null }); + }); + + it('does not show stale cache after the installed version changes and schedules a refresh', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + updateCheckCachePath(homeDir), + JSON.stringify( + { + checkedAt: '2026-06-06T11:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + }, + null, + 2, + ), + 'utf-8', + ); + const fetchDistTags = vi.fn(async () => ({ latest: '0.10.0' })); + + const result = await prepareUpdateCheckNotice({ + io: makeIo(true).io, + env: { NO_COLOR: '1' }, + homeDir, + installedVersion: '0.10.0', + now: () => new Date('2026-06-06T12:00:00.000Z'), + fetchDistTags, + }); + + expect(result.notice).toBeNull(); + expect(fetchDistTags).toHaveBeenCalledTimes(1); + }); + + it('refreshes stale cache in the background and preserves lastNoticeAt for the same install', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + updateCheckCachePath(homeDir), + JSON.stringify( + { + checkedAt: '2026-06-05T10:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + lastNoticeAt: '2026-06-06T09:00:00.000Z', + }, + null, + 2, + ), + 'utf-8', + ); + + await prepareUpdateCheckNotice({ + io: makeIo(true).io, + env: { NO_COLOR: '1' }, + homeDir, + installedVersion: '0.9.0', + now: () => new Date('2026-06-06T12:00:00.000Z'), + fetchDistTags: vi.fn(async () => ({ latest: '0.11.0' })), + }); + await flushAsyncWork(); + + await vi.waitFor(async () => { + const stored = JSON.parse(await readFile(updateCheckCachePath(homeDir), 'utf-8')) as { + checkedAt: string; + latestForChannel: string; + lastNoticeAt?: string; + }; + expect(stored.checkedAt).toBe('2026-06-06T12:00:00.000Z'); + expect(stored.latestForChannel).toBe('0.11.0'); + expect(stored.lastNoticeAt).toBe('2026-06-06T09:00:00.000Z'); + }); + }); + + it('swallows refresh failures and leaves existing cache untouched', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + const originalCache = { + checkedAt: '2026-06-05T10:00:00.000Z', + channel: 'latest', + installedVersion: '0.9.0', + latestForChannel: '0.10.0', + lastNoticeAt: '2026-06-06T09:00:00.000Z', + }; + await writeFile(updateCheckCachePath(homeDir), JSON.stringify(originalCache, null, 2), 'utf-8'); + + await prepareUpdateCheckNotice({ + io: makeIo(true).io, + env: { NO_COLOR: '1' }, + homeDir, + installedVersion: '0.9.0', + now: () => new Date('2026-06-06T12:00:00.000Z'), + fetchDistTags: vi.fn(async () => { + throw new Error('offline'); + }), + }); + await flushAsyncWork(); + + await expect(readFile(updateCheckCachePath(homeDir), 'utf-8')).resolves.toBe(JSON.stringify(originalCache, null, 2)); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a3eaad5f..cc2fb3d2 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -131,6 +131,9 @@ importers: '@anthropic-ai/claude-agent-sdk': specifier: 0.3.146 version: 0.3.146(@anthropic-ai/sdk@0.97.1(zod@4.4.3))(@modelcontextprotocol/sdk@1.29.0(zod@4.4.3))(zod@4.4.3) + '@clack/core': + specifier: 1.3.1 + version: 1.3.1 '@clack/prompts': specifier: 1.4.0 version: 1.4.0 @@ -206,6 +209,9 @@ importers: react: specifier: ^19.2.6 version: 19.2.6 + semver: + specifier: ^7.8.1 + version: 7.8.1 simple-git: specifier: 3.36.0 version: 3.36.0 @@ -240,6 +246,9 @@ importers: '@types/react': specifier: ^19.2.15 version: 19.2.15 + '@types/semver': + specifier: ^7.7.1 + version: 7.7.1 '@vitest/coverage-v8': specifier: ^4.1.7 version: 4.1.7(vitest@4.1.7) @@ -2498,6 +2507,9 @@ packages: '@types/readable-stream@4.0.23': resolution: {integrity: sha512-wwXrtQvbMHxCbBgjHaMGEmImFTQxxpfMOR/ZoQnXxB1woqkUbdLGFDgauo00Py9IudiaqSeiBiulSV9i6XIPig==} + '@types/semver@7.7.1': + resolution: {integrity: sha512-FmgJfu+MOcQ370SD0ev7EI8TlCAfKYU+B4m5T3yXc1CiRN94g/SZPtsCkk506aUDtlMnFZvasDwHHUcZUEaYuA==} + '@types/triple-beam@1.3.5': resolution: {integrity: sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw==} @@ -5216,6 +5228,11 @@ packages: engines: {node: '>=10'} hasBin: true + semver@7.8.1: + resolution: {integrity: sha512-rkVq3IXh+4FDGch+KwzX3aV9W3kO54GyEgpvBzSyctDA6Xtd7RJQV1xmXbeQp5v7+VzLOfVqiutSE6GICgPFvg==} + engines: {node: '>=10'} + hasBin: true + send@1.2.1: resolution: {integrity: sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==} engines: {node: '>= 18'} @@ -8318,6 +8335,8 @@ snapshots: dependencies: '@types/node': 24.12.4 + '@types/semver@7.7.1': {} + '@types/triple-beam@1.3.5': {} '@types/unist@2.0.11': {} @@ -11430,6 +11449,8 @@ snapshots: semver@7.8.0: {} + semver@7.8.1: {} + send@1.2.1: dependencies: debug: 4.4.3 diff --git a/python/ktx-daemon/src/ktx_daemon/__main__.py b/python/ktx-daemon/src/ktx_daemon/__main__.py index 2fc00186..cbc2e228 100644 --- a/python/ktx-daemon/src/ktx_daemon/__main__.py +++ b/python/ktx-daemon/src/ktx_daemon/__main__.py @@ -6,6 +6,8 @@ import argparse import json import sys import time +from collections.abc import Callable +from types import TracebackType from typing import Any from pydantic import ValidationError @@ -90,6 +92,41 @@ def _read_stdin_json() -> dict[str, Any]: return parsed +def install_serve_http_exception_hooks(started_at: float) -> Callable[[], None]: + original_hook = sys.excepthook + + def hook( + exc_type: type[BaseException], + exc: BaseException, + tb: TracebackType | None, + ) -> None: + report_serve_http_crash(exc, started_at=started_at) + original_hook(exc_type, exc, tb) + + sys.excepthook = hook + + def dispose() -> None: + sys.excepthook = original_hook + + return dispose + + +def report_serve_http_crash(error: BaseException, *, started_at: float) -> None: + from ktx_daemon.telemetry import report_exception + from ktx_daemon.telemetry.daemon_lifecycle import emit_daemon_stopped_once + + report_exception( + error, + source="serve-http", + handled=False, + fatal=True, + ) + emit_daemon_stopped_once( + reason="crash", + uptime_ms=max(0, (time.perf_counter() - started_at) * 1000), + ) + + def run_http_server( *, host: str, @@ -102,15 +139,23 @@ def run_http_server( from ktx_daemon.app import create_app started_at = time.perf_counter() - uvicorn.run( - create_app( - enable_code_execution=enable_code_execution, - telemetry_started_at=started_at, - ), - host=host, - port=port, - log_level=log_level, - ) + dispose_hooks = install_serve_http_exception_hooks(started_at) + try: + try: + uvicorn.run( + create_app( + enable_code_execution=enable_code_execution, + telemetry_started_at=started_at, + ), + host=host, + port=port, + log_level=log_level, + ) + except Exception as error: + report_serve_http_crash(error, started_at=started_at) + raise + finally: + dispose_hooks() def main(argv: list[str] | None = None) -> int: @@ -169,6 +214,14 @@ def main(argv: list[str] | None = None) -> int: sys.stderr.write(f"{error}\n") return 1 except Exception as error: + from ktx_daemon.telemetry import report_exception + + report_exception( + error, + source=str(args.command), + handled=True, + fatal=False, + ) sys.stderr.write(f"{type(error).__name__}: {error}\n") return 1 diff --git a/python/ktx-daemon/src/ktx_daemon/app.py b/python/ktx-daemon/src/ktx_daemon/app.py index 7a3fa950..5860c4e4 100644 --- a/python/ktx-daemon/src/ktx_daemon/app.py +++ b/python/ktx-daemon/src/ktx_daemon/app.py @@ -10,8 +10,8 @@ from contextlib import asynccontextmanager from collections.abc import Callable from typing import Any -from fastapi import FastAPI, HTTPException -from fastapi.responses import Response +from fastapi import FastAPI, HTTPException, Request +from fastapi.responses import JSONResponse, Response from ktx_daemon import VERSION from ktx_daemon.code_execution import ( @@ -65,9 +65,11 @@ from ktx_daemon.table_identifier import ( ParseTableIdentifierBatchResponse, parse_table_identifier_response, ) -from ktx_daemon.telemetry import track_telemetry_event +from ktx_daemon.telemetry import report_exception, track_telemetry_event +from ktx_daemon.telemetry.daemon_lifecycle import emit_daemon_stopped_once logger = logging.getLogger(__name__) +CREDENTIAL_KEYS = {"url", "password", "token", "api_key", "apikey", "auth_header"} class NumpyORJSONResponse(Response): @@ -77,6 +79,36 @@ class NumpyORJSONResponse(Response): return dumps_numpy_json(content) +def _route_source(request: Request) -> str: + route = request.scope.get("route") + path = getattr(route, "path", None) + if isinstance(path, str) and path: + return f"app:{path}" + return f"app:{request.url.path}" + + +def _secret_snapshot_from_payload(value: Any) -> list[str]: + secrets: list[str] = [] + if isinstance(value, dict): + for key, child in value.items(): + normalized_key = str(key).lower() + if normalized_key in CREDENTIAL_KEYS and isinstance(child, str) and child: + secrets.append(child) + secrets.extend(_secret_snapshot_from_payload(child)) + elif isinstance(value, list): + for child in value: + secrets.extend(_secret_snapshot_from_payload(child)) + return secrets + + +async def _request_secret_snapshot(request: Request) -> list[str]: + try: + payload = await request.json() + except Exception: + return [] + return _secret_snapshot_from_payload(payload) + + def create_app( *, embedding_provider: EmbeddingProvider | None = None, @@ -104,12 +136,9 @@ def create_app( try: yield finally: - track_telemetry_event( - "daemon_stopped", - { - "reason": "request", - "uptimeMs": max(0, (clock() - started_at) * 1000), - }, + emit_daemon_stopped_once( + reason="request", + uptime_ms=max(0, (clock() - started_at) * 1000), ) app = FastAPI( @@ -119,6 +148,25 @@ def create_app( lifespan=lifespan, ) + @app.middleware("http") + async def report_unhandled_exceptions(request: Request, call_next): + redaction_secrets = await _request_secret_snapshot(request) + try: + return await call_next(request) + except Exception as error: + logger.exception("Unhandled daemon request failed: %s", error) + report_exception( + error, + source=_route_source(request), + handled=True, + fatal=False, + redaction_secrets=redaction_secrets, + ) + return JSONResponse( + status_code=500, + content={"detail": f"Daemon request failed: {error}"}, + ) + @app.get("/health") async def health() -> dict[str, str]: response = {"status": "healthy"} @@ -137,12 +185,6 @@ def create_app( except ValueError as error: logger.warning("Database introspection rejected: %s", error) raise HTTPException(status_code=400, detail=str(error)) from error - except Exception as error: - logger.exception("Database introspection failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Database introspection failed: {error}", - ) from error @app.post("/embeddings/compute", response_model=ComputeEmbeddingResponse) async def embedding_compute( @@ -156,12 +198,6 @@ def create_app( except ValueError as error: logger.warning("Embedding compute rejected: %s", error) raise HTTPException(status_code=400, detail=str(error)) from error - except Exception as error: - logger.exception("Embedding compute failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Embedding compute failed: {error}", - ) from error @app.post( "/embeddings/compute-bulk", @@ -178,12 +214,6 @@ def create_app( except ValueError as error: logger.warning("Bulk embedding compute rejected: %s", error) raise HTTPException(status_code=400, detail=str(error)) from error - except Exception as error: - logger.exception("Bulk embedding compute failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Bulk embedding compute failed: {error}", - ) from error if enable_code_execution: @@ -193,29 +223,15 @@ def create_app( response_class=NumpyORJSONResponse, ) async def code_execute(request: ExecuteCodeRequest) -> ExecuteCodeResponse: - try: - return execute_code_response( - request, - nest_api_url=None, - auth_header=None, - ) - except Exception as error: - logger.exception("Code execution failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Code execution failed: {error}", - ) from error + return execute_code_response( + request, + nest_api_url=None, + auth_header=None, + ) @app.post("/lookml/parse", response_model=ParseLookMLResponse) async def lookml_parse(request: ParseLookMLRequest) -> ParseLookMLResponse: - try: - return parse_lookml_project(request) - except Exception as error: - logger.exception("LookML parsing failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"LookML parsing failed: {error}", - ) from error + return parse_lookml_project(request) @app.post( "/sql/parse-table-identifier", @@ -224,40 +240,19 @@ def create_app( async def sql_parse_table_identifier( request: ParseTableIdentifierBatchRequest, ) -> ParseTableIdentifierBatchResponse: - try: - return parse_table_identifier_response(request) - except Exception as error: - logger.exception("Table identifier parsing failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Table identifier parsing failed: {error}", - ) from error + return parse_table_identifier_response(request) @app.post("/sql/validate-read-only", response_model=ValidateReadOnlySqlResponse) async def sql_validate_read_only( request: ValidateReadOnlySqlRequest, ) -> ValidateReadOnlySqlResponse: - try: - return validate_read_only_sql_response(request) - except Exception as error: - logger.exception("SQL read-only validation failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"SQL read-only validation failed: {error}", - ) from error + return validate_read_only_sql_response(request) @app.post("/sql/analyze-batch", response_model=AnalyzeSqlBatchResponse) async def sql_analyze_batch( request: AnalyzeSqlBatchRequest, ) -> AnalyzeSqlBatchResponse: - try: - return analyze_sql_batch_response(request) - except Exception as error: - logger.exception("SQL batch analysis failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"SQL batch analysis failed: {error}", - ) from error + return analyze_sql_batch_response(request) @app.post( "/semantic-layer/generate-sources", response_model=GenerateSourcesResponse @@ -265,14 +260,7 @@ def create_app( async def semantic_generate_sources( request: GenerateSourcesRequest, ) -> GenerateSourcesResponse: - try: - return generate_sources_response(request) - except Exception as error: - logger.exception("Semantic source generation failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Semantic source generation failed: {error}", - ) from error + return generate_sources_response(request) @app.post("/semantic-layer/query", response_model=SemanticLayerQueryResponse) async def semantic_query( @@ -283,12 +271,6 @@ def create_app( except ValueError as error: logger.warning("Semantic query rejected: %s", error) raise HTTPException(status_code=400, detail=str(error)) from error - except Exception as error: - logger.exception("Semantic query failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Semantic layer query failed: {error}", - ) from error @app.post("/semantic-layer/validate", response_model=ValidateSourcesResponse) async def semantic_validate( diff --git a/python/ktx-daemon/src/ktx_daemon/semantic_layer.py b/python/ktx-daemon/src/ktx_daemon/semantic_layer.py index 78f57338..f58c6e39 100644 --- a/python/ktx-daemon/src/ktx_daemon/semantic_layer.py +++ b/python/ktx-daemon/src/ktx_daemon/semantic_layer.py @@ -5,7 +5,7 @@ from __future__ import annotations import time from typing import Any -from ktx_daemon.telemetry import error_class, track_telemetry_event +from ktx_daemon.telemetry import error_class, report_exception, track_telemetry_event from pydantic import BaseModel, ConfigDict, Field from semantic_layer.duplicate_check import validate_measure_duplicates from semantic_layer.engine import SemanticEngine @@ -150,6 +150,13 @@ def query_semantic_layer( track_telemetry_event( "sql_gen_completed", sql_fields, project_id=request.project_id ) + report_exception( + error, + source="semantic-query", + handled=True, + fatal=False, + project_id=request.project_id, + ) raise diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py b/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py index ff9cd07f..bef42338 100644 --- a/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py @@ -1,5 +1,12 @@ from __future__ import annotations +from ktx_daemon.telemetry.daemon_lifecycle import emit_daemon_stopped_once from ktx_daemon.telemetry.emitter import error_class, track_telemetry_event +from ktx_daemon.telemetry.exception import report_exception -__all__ = ["error_class", "track_telemetry_event"] +__all__ = [ + "emit_daemon_stopped_once", + "error_class", + "report_exception", + "track_telemetry_event", +] diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/daemon_lifecycle.py b/python/ktx-daemon/src/ktx_daemon/telemetry/daemon_lifecycle.py new file mode 100644 index 00000000..dc635601 --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/daemon_lifecycle.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import Literal + +from ktx_daemon.telemetry.emitter import track_telemetry_event + +StopReason = Literal["signal", "request", "crash"] + +_daemon_stop_emitted = False + + +def emit_daemon_stopped_once(*, reason: StopReason, uptime_ms: float) -> bool: + global _daemon_stop_emitted + if _daemon_stop_emitted: + return False + _daemon_stop_emitted = True + track_telemetry_event( + "daemon_stopped", + { + "reason": reason, + "uptimeMs": max(0, uptime_ms), + }, + ) + return True + + +def reset_daemon_lifecycle_for_tests() -> None: + global _daemon_stop_emitted + _daemon_stop_emitted = False diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json index a75f92f1..c6c3d6f8 100644 --- a/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json @@ -206,6 +206,17 @@ "errorClass", "durationMs" ] + }, + { + "name": "query_history_filter_completed", + "description": "Emitted after the setup query-history service-account filter picker runs.", + "fields": [ + "dialect", + "consideredRoleCount", + "excludedRoleCount", + "parseFailedCount", + "outcome" + ] } ], "$defs": { @@ -1434,6 +1445,77 @@ "durationMs" ], "additionalProperties": false + }, + "query_history_filter_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "dialect": { + "type": "string" + }, + "consideredRoleCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "excludedRoleCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "parseFailedCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "dialect", + "consideredRoleCount", + "excludedRoleCount", + "parseFailedCount", + "outcome" + ], + "additionalProperties": false } } } diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/exception.py b/python/ktx-daemon/src/ktx_daemon/telemetry/exception.py new file mode 100644 index 00000000..00050d1c --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/exception.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import json +import os +import re +import sys +from collections.abc import Mapping, Sequence +from pathlib import Path +from typing import Any + +from ktx_daemon import VERSION +from ktx_daemon.telemetry.emitter import POSTHOG_HOST, POSTHOG_PROJECT_API_KEY +from ktx_daemon.telemetry.events import _common_envelope +from ktx_daemon.telemetry.identity import load_telemetry_identity + +_KTX_REPORTED_ATTR = "__ktx_posthog_exception_reported" + + +def _debug_enabled(env: Mapping[str, str]) -> bool: + return env.get("KTX_TELEMETRY_DEBUG") == "1" + + +def _host(env: Mapping[str, str]) -> str: + return env.get("KTX_TELEMETRY_ENDPOINT") or POSTHOG_HOST + + +def _redact_static(value: str) -> str: + patterns = [ + ( + r"([a-z][a-z0-9+.-]*://[^:\s/@]+:)([^@\s/]+)(@)", + r"\1[redacted]\3", + ), + (r"\b(password|pwd)=([^;&\s]+)", r"\1=[redacted]"), + (r"\bAuthorization\s*:\s*[^\r\n,;]+", "Authorization: [redacted]"), + (r"\bBearer\s+[A-Za-z0-9._~+/=-]+", "Bearer [redacted]"), + (r"\b(api[_-]?key)\s*[:=]\s*([^\s,;]+)", r"\1=[redacted]"), + ( + r"\b(KTX_[A-Z0-9_]*|[A-Z0-9_]*(?:TOKEN|SECRET))\s*[:=]\s*([^\s,;]+)", + r"\1=[redacted]", + ), + (r"([?&](?:X-Amz-Signature|X-Goog-Signature|sig)=)[^&\s]+", r"\1[redacted]"), + ] + redacted = value + for pattern, replacement in patterns: + redacted = re.sub(pattern, replacement, redacted, flags=re.IGNORECASE) + return redacted + + +def _redact_text(value: str, secrets: Sequence[str]) -> str: + redacted = value + for secret in secrets: + if secret: + redacted = redacted.replace(secret, "[redacted]") + return _redact_static(redacted) + + +def _clone_exception(exception: BaseException, secrets: Sequence[str]) -> BaseException: + redacted_args = [_redact_text(str(arg), secrets) for arg in exception.args] + try: + cloned = type(exception)(*redacted_args) + except Exception: + cloned = RuntimeError(_redact_text(str(exception), secrets)) + cloned.__traceback__ = exception.__traceback__ + cloned.__cause__ = ( + _clone_exception(exception.__cause__, secrets) if exception.__cause__ else None + ) + cloned.__context__ = ( + _clone_exception(exception.__context__, secrets) + if exception.__context__ + else None + ) + return cloned + + +def _should_skip_as_reported(exception: BaseException) -> bool: + if getattr(exception, _KTX_REPORTED_ATTR, False): + return True + try: + setattr(exception, _KTX_REPORTED_ATTR, True) + except Exception: + return False + return False + + +def _properties(*, source: str, handled: bool, fatal: bool) -> dict[str, Any]: + return { + **_common_envelope(), + "daemonVersion": os.environ.get("KTX_DAEMON_VERSION", VERSION), + "source": source, + "handled": handled, + "fatal": fatal, + } + + +def report_exception( + exception: BaseException, + *, + source: str, + handled: bool, + fatal: bool, + project_id: str | None = None, + home_dir: Path | None = None, + env: Mapping[str, str] | None = None, + redaction_secrets: Sequence[str] | None = None, +) -> None: + source_env = env if env is not None else os.environ + try: + identity = load_telemetry_identity(home_dir=home_dir, env=source_env) + if not identity.enabled or not identity.install_id: + return + + if _should_skip_as_reported(exception): + return + + properties = _properties(source=source, handled=handled, fatal=fatal) + groups = {"project": project_id} if project_id else None + safe_exception = _clone_exception(exception, redaction_secrets or []) + + if _debug_enabled(source_env): + sys.stderr.write( + "[telemetry-exception] " + + json.dumps( + { + "distinctId": identity.install_id, + "message": str(safe_exception), + "properties": properties, + "groups": groups, + }, + sort_keys=True, + ) + + "\n" + ) + return + + if not POSTHOG_PROJECT_API_KEY.strip() or not _host(source_env).strip(): + return + + from posthog import Posthog + + client = Posthog( + POSTHOG_PROJECT_API_KEY, + host=_host(source_env), + flush_at=1, + flush_interval=0, + sync_mode=True, + timeout=1, + ) + client.capture_exception( + safe_exception, + distinct_id=identity.install_id, + properties=properties, + groups=groups, + ) + client.shutdown() + except Exception: + return diff --git a/python/ktx-daemon/tests/test_app.py b/python/ktx-daemon/tests/test_app.py index 2c3237ad..fffc2899 100644 --- a/python/ktx-daemon/tests/test_app.py +++ b/python/ktx-daemon/tests/test_app.py @@ -87,8 +87,10 @@ def test_app_lifespan_emits_daemon_lifecycle_debug_events( monkeypatch, capsys, ) -> None: + from ktx_daemon.telemetry.daemon_lifecycle import reset_daemon_lifecycle_for_tests from ktx_daemon.telemetry.identity import reset_identity_cache + reset_daemon_lifecycle_for_tests() reset_identity_cache() identity_path = tmp_path / ".ktx" / "telemetry.json" identity_path.parent.mkdir(parents=True) diff --git a/python/ktx-daemon/tests/test_exception_payload.py b/python/ktx-daemon/tests/test_exception_payload.py new file mode 100644 index 00000000..3198b08f --- /dev/null +++ b/python/ktx-daemon/tests/test_exception_payload.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +import gzip +import json +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer +from pathlib import Path +from typing import Any + +from ktx_daemon.telemetry.identity import reset_identity_cache + + +class CaptureHandler(BaseHTTPRequestHandler): + payloads: list[dict[str, Any]] = [] + + def do_POST(self) -> None: + length = int(self.headers.get("content-length", "0")) + raw = self.rfile.read(length) + if self.headers.get("content-encoding") == "gzip": + raw = gzip.decompress(raw) + self.payloads.append(json.loads(raw.decode("utf-8"))) + self.send_response(200) + self.send_header("content-type", "application/json") + self.end_headers() + self.wfile.write(b"{}") + + def log_message(self, _format: str, *_args: object) -> None: + return + + +def write_identity(home: Path) -> None: + target = home / ".ktx" / "telemetry.json" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text( + json.dumps( + { + "installId": "00000000-0000-4000-8000-000000000000", + "enabled": True, + "createdAt": "2026-06-05T00:00:00.000Z", + } + ) + + "\n", + encoding="utf-8", + ) + + +def find_exception_event(payloads: list[dict[str, Any]]) -> dict[str, Any]: + for payload in payloads: + batch = payload.get("batch") + events = batch if isinstance(batch, list) else [payload] + for event in events: + if isinstance(event, dict) and event.get("event") == "$exception": + return event + raise AssertionError(f"No $exception payload found: {payloads}") + + +def test_prepared_python_exception_payload_groups_and_redacts(tmp_path: Path) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + CaptureHandler.payloads.clear() + server = HTTPServer(("127.0.0.1", 0), CaptureHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + snapshot_secret = "-".join(["plain", "secret", "value"]) + db_password = "-".join(["db", "url", "secret"]) + auth_token = "".join(["abc", "123"]) + report_exception( + RuntimeError( + f"{snapshot_secret} postgres://svc:{db_password}@db.example.test/analytics " + f"Authorization: Basic {auth_token}" + ), + source="database-introspect", + handled=True, + fatal=False, + project_id="a" * 64, + home_dir=tmp_path, + env={"KTX_TELEMETRY_ENDPOINT": f"http://127.0.0.1:{server.server_port}"}, + redaction_secrets=[snapshot_secret], + ) + finally: + server.shutdown() + server.server_close() + thread.join(timeout=2) + + event = find_exception_event(CaptureHandler.payloads) + properties = event["properties"] + assert event.get("$groups") == {"project": "a" * 64} or properties.get( + "$groups" + ) == {"project": "a" * 64} + serialized = json.dumps(properties.get("$exception_list", [])) + assert "[redacted]" in serialized + assert snapshot_secret not in serialized + assert db_password not in serialized + assert auth_token not in serialized + forbidden_keys = { + "argv", + "args", + "env", + "environment", + "sql", + "query", + "prompt", + "mcpArguments", + "tableName", + "schemaName", + "columnName", + "databaseUrl", + "connectionString", + "url", + "password", + "token", + "apiKey", + "authorization", + } + assert forbidden_keys.isdisjoint(properties.keys()) diff --git a/python/ktx-daemon/tests/test_exception_telemetry.py b/python/ktx-daemon/tests/test_exception_telemetry.py new file mode 100644 index 00000000..43da007d --- /dev/null +++ b/python/ktx-daemon/tests/test_exception_telemetry.py @@ -0,0 +1,601 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from ktx_daemon.telemetry.identity import reset_identity_cache + + +class FakePosthog: + captures: list[dict[str, Any]] = [] + shutdowns = 0 + + def __init__(self, *_args: Any, **_kwargs: Any) -> None: + pass + + def capture_exception( + self, + exception: BaseException, + *, + distinct_id: str, + properties: dict[str, Any], + groups: dict[str, str] | None = None, + ) -> None: + self.captures.append( + { + "exception": exception, + "distinct_id": distinct_id, + "properties": properties, + "groups": groups, + } + ) + + def shutdown(self) -> None: + type(self).shutdowns += 1 + + +def write_identity(home: Path, *, enabled: bool = True) -> None: + target = home / ".ktx" / "telemetry.json" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text( + json.dumps( + { + "installId": "00000000-0000-4000-8000-000000000000", + "enabled": enabled, + "createdAt": "2026-06-05T00:00:00.000Z", + } + ) + + "\n", + encoding="utf-8", + ) + + +def test_report_exception_respects_disabled_gate(tmp_path: Path, monkeypatch) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + monkeypatch.setenv("KTX_TELEMETRY_DISABLED", "1") + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + report_exception( + RuntimeError("boom"), + source="semantic-query", + handled=True, + fatal=False, + home_dir=tmp_path, + env={"KTX_TELEMETRY_DISABLED": "1"}, + ) + + assert FakePosthog.captures == [] + + +def test_report_exception_sends_groups_and_properties( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + report_exception( + RuntimeError("boom"), + source="semantic-query", + handled=True, + fatal=False, + project_id="a" * 64, + home_dir=tmp_path, + env={}, + ) + + assert FakePosthog.captures == [ + { + "exception": FakePosthog.captures[0]["exception"], + "distinct_id": "00000000-0000-4000-8000-000000000000", + "properties": FakePosthog.captures[0]["properties"], + "groups": {"project": "a" * 64}, + } + ] + assert FakePosthog.captures[0]["properties"]["source"] == "semantic-query" + assert FakePosthog.captures[0]["properties"]["handled"] is True + assert FakePosthog.captures[0]["properties"]["fatal"] is False + assert FakePosthog.captures[0]["properties"]["runtime"] == "daemon-py" + + +def test_report_exception_debug_prints_without_sending(tmp_path: Path, capsys) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + + report_exception( + RuntimeError("debug boom"), + source="app:/health", + handled=True, + fatal=False, + home_dir=tmp_path, + env={"KTX_TELEMETRY_DEBUG": "1"}, + ) + + captured = capsys.readouterr() + assert "[telemetry-exception]" in captured.err + assert '"source": "app:/health"' in captured.err + assert FakePosthog.captures == [] + + +def test_report_exception_redacts_snapshot_and_static_patterns( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + error = RuntimeError("dsn has plain-secret and password=hunter2") + error.__cause__ = ValueError("Authorization: Bearer token-123") + + report_exception( + error, + source="database-introspect", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + redaction_secrets=["plain-secret"], + ) + + sent = FakePosthog.captures[0]["exception"] + assert "[redacted]" in str(sent) + assert "plain-secret" not in str(sent) + assert "hunter2" not in str(sent) + assert "token-123" not in str(sent.__cause__) + + +def test_report_exception_does_not_discover_env_values_without_snapshot( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setenv("KTX_FAKE_SECRET", "plain-secret-without-pattern") + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + report_exception( + RuntimeError("plain-secret-without-pattern"), + source="sys.excepthook", + handled=False, + fatal=True, + home_dir=tmp_path, + env={}, + ) + + assert "plain-secret-without-pattern" in str(FakePosthog.captures[0]["exception"]) + + +def test_route_derived_boundary_reports_new_throwing_route(monkeypatch) -> None: + from fastapi import FastAPI + from fastapi.testclient import TestClient + from ktx_daemon.app import create_app + + reports: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + monkeypatch.setattr("ktx_daemon.app.report_exception", fake_report) + app: FastAPI = create_app() + + @app.get("/new-throwing-route") + async def new_throwing_route() -> dict[str, str]: + raise RuntimeError("route boom") + + client = TestClient(app, raise_server_exceptions=False) + response = client.get("/new-throwing-route") + + assert response.status_code == 500 + assert reports + assert reports[0]["source"] in {"app:/new-throwing-route", "app:new_throwing_route"} + assert reports[0]["handled"] is True + assert reports[0]["fatal"] is False + + +def test_route_derived_boundary_covers_existing_validate_route(monkeypatch) -> None: + from fastapi.testclient import TestClient + from ktx_daemon import app as app_module + + reports: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + monkeypatch.setattr( + app_module, + "validate_semantic_layer", + lambda _request: (_ for _ in ()).throw(RuntimeError("validate boom")), + ) + monkeypatch.setattr(app_module, "report_exception", fake_report) + + client = TestClient(app_module.create_app(), raise_server_exceptions=False) + response = client.post("/semantic-layer/validate", json={"sources": []}) + + assert response.status_code == 500 + assert reports + assert reports[0]["source"] in { + "app:/semantic-layer/validate", + "app:semantic_validate", + } + + +def test_daemon_stopped_clean_shutdown_emits_request_once(monkeypatch) -> None: + from ktx_daemon.telemetry.daemon_lifecycle import ( + emit_daemon_stopped_once, + reset_daemon_lifecycle_for_tests, + ) + + events: list[tuple[str, dict[str, object]]] = [] + monkeypatch.setattr( + "ktx_daemon.telemetry.daemon_lifecycle.track_telemetry_event", + lambda name, fields: events.append((name, fields)), + ) + reset_daemon_lifecycle_for_tests() + + emit_daemon_stopped_once(reason="request", uptime_ms=1) + emit_daemon_stopped_once(reason="request", uptime_ms=2) + + assert events == [("daemon_stopped", {"reason": "request", "uptimeMs": 1})] + + +def test_daemon_stopped_crash_wins_over_request(monkeypatch) -> None: + from ktx_daemon.telemetry.daemon_lifecycle import ( + emit_daemon_stopped_once, + reset_daemon_lifecycle_for_tests, + ) + + events: list[tuple[str, dict[str, object]]] = [] + monkeypatch.setattr( + "ktx_daemon.telemetry.daemon_lifecycle.track_telemetry_event", + lambda name, fields: events.append((name, fields)), + ) + reset_daemon_lifecycle_for_tests() + + emit_daemon_stopped_once(reason="crash", uptime_ms=3) + emit_daemon_stopped_once(reason="request", uptime_ms=4) + + assert events == [("daemon_stopped", {"reason": "crash", "uptimeMs": 3})] + + +def test_report_exception_dedupes_same_exception_object( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + error = RuntimeError("same object") + + report_exception( + error, + source="semantic-query", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + ) + report_exception( + error, + source="app:/semantic-layer/query", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + ) + + assert len(FakePosthog.captures) == 1 + + +def test_report_exception_redacts_url_userinfo_and_authorization( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + db_password = ["db", "url", "secret"] + auth_token = ["abc", "123"] + report_exception( + RuntimeError( + "connect postgres://svc:" + + "-".join(db_password) + + "@db.example.test/analytics Authorization: Basic " + + "".join(auth_token) + ), + source="database-introspect", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + ) + + sent = str(FakePosthog.captures[0]["exception"]) + assert "postgres://svc:[redacted]@db.example.test/analytics" in sent + assert "Authorization: [redacted]" in sent + assert "-".join(db_password) not in sent + assert "".join(auth_token) not in sent + + +def test_report_exception_falls_back_when_exception_type_cannot_be_reconstructed( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + class KeywordOnlyException(Exception): + def __init__(self, *, message: str) -> None: + super().__init__(message) + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + report_exception( + KeywordOnlyException(message="custom secret-value"), + source="app:/custom", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + redaction_secrets=["secret-value"], + ) + + assert len(FakePosthog.captures) == 1 + sent = FakePosthog.captures[0]["exception"] + assert "[redacted]" in str(sent) + assert "secret-value" not in str(sent) + + +def test_report_exception_redacts_every_static_pattern_and_leaves_benign_text( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + cases = [ + ("dsn password=hunter2", "hunter2", "password=[redacted]"), + ("dsn pwd=swordfish", "swordfish", "pwd=[redacted]"), + ("Authorization: Basic abc123", "abc123", "Authorization: [redacted]"), + ("Authorization: Bearer token-123", "token-123", "Authorization: [redacted]"), + ("Bearer standalone-token", "standalone-token", "Bearer [redacted]"), + ("api_key=sk-live-secret", "sk-live-secret", "api_key=[redacted]"), + ("api-key: sk-dash-secret", "sk-dash-secret", "api-key=[redacted]"), + ( + "KTX_PROVIDER_TOKEN=ktx-secret", + "ktx-secret", + "KTX_PROVIDER_TOKEN=[redacted]", + ), + ( + "REFRESH_SECRET: refresh-secret", + "refresh-secret", + "REFRESH_SECRET=[redacted]", + ), + ( + "https://s3.example.test/file?X-Amz-Signature=aws-secret&ok=1", + "aws-secret", + "X-Amz-Signature=[redacted]", + ), + ( + "https://storage.example.test/file?X-Goog-Signature=goog-secret&ok=1", + "goog-secret", + "X-Goog-Signature=[redacted]", + ), + ( + "https://cdn.example.test/file?sig=signed-secret&ok=1", + "signed-secret", + "sig=[redacted]", + ), + ( + "postgres://svc:url-password@db.example.test/analytics", # pragma: allowlist secret + "url-password", + "postgres://svc:[redacted]@db.example.test/analytics", + ), + ] + + for message, leaked, expected in cases: + report_exception( + RuntimeError(message), + source="database-introspect", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + ) + sent = str(FakePosthog.captures[-1]["exception"]) + assert expected in sent + assert leaked not in sent + + report_exception( + RuntimeError("token bucket metrics and passwordless auth are benign"), + source="database-introspect", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + ) + assert str(FakePosthog.captures[-1]["exception"]) == ( + "token bucket metrics and passwordless auth are benign" + ) + + +def test_route_derived_boundary_covers_existing_health_route(monkeypatch) -> None: + from fastapi.testclient import TestClient + from ktx_daemon import app as app_module + + reports: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + class BrokenEnviron(dict[str, str]): + def get(self, key: str, default: str | None = None) -> str | None: + if key == "KTX_DAEMON_VERSION": + raise RuntimeError("health boom") + return default + + monkeypatch.setattr(app_module.os, "environ", BrokenEnviron()) + monkeypatch.setattr(app_module, "report_exception", fake_report) + + client = TestClient(app_module.create_app(), raise_server_exceptions=False) + response = client.get("/health") + + assert response.status_code == 500 + assert reports + assert reports[0]["source"] == "app:/health" + assert reports[0]["handled"] is True + assert reports[0]["fatal"] is False + + +def test_route_boundary_passes_request_scoped_database_secrets(monkeypatch) -> None: + from fastapi.testclient import TestClient + from ktx_daemon import app as app_module + + reports: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + monkeypatch.setattr( + app_module, + "introspect_database_response", + lambda _request: (_ for _ in ()).throw(RuntimeError("db-url-secret")), + ) + monkeypatch.setattr(app_module, "report_exception", fake_report) + + client = TestClient(app_module.create_app(), raise_server_exceptions=False) + response = client.post( + "/database/introspect", + json={ + "connection_id": "warehouse", + "url": "postgres://svc:db-url-secret@db.example.test/analytics", # pragma: allowlist secret + "password": "db-password-secret", # pragma: allowlist secret + }, + ) + + assert response.status_code == 500 + assert reports + assert ( + reports[0]["redaction_secrets"] + == [ + "postgres://svc:db-url-secret@db.example.test/analytics", # pragma: allowlist secret + "db-password-secret", # pragma: allowlist secret + ] + ) + + +def test_serve_http_run_crash_reports_exception_and_crash_stop(monkeypatch) -> None: + import sys + + from ktx_daemon import __main__ as main_module + + reports: list[dict[str, object]] = [] + stops: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + def fake_stop(*, reason: str, uptime_ms: float) -> bool: + stops.append({"reason": reason, "uptimeMs": uptime_ms}) + return True + + class FakeUvicorn: + @staticmethod + def run(*_args: object, **_kwargs: object) -> None: + raise RuntimeError("uvicorn crash") + + monkeypatch.setitem(sys.modules, "uvicorn", FakeUvicorn) + monkeypatch.setattr("ktx_daemon.telemetry.report_exception", fake_report) + monkeypatch.setattr( + "ktx_daemon.telemetry.daemon_lifecycle.emit_daemon_stopped_once", + fake_stop, + ) + + try: + main_module.run_http_server( + host="127.0.0.1", + port=9999, + log_level="info", + enable_code_execution=False, + ) + except RuntimeError as error: + assert str(error) == "uvicorn crash" + else: + raise AssertionError("run_http_server did not re-raise the crash") + + assert reports + assert reports[0]["source"] == "serve-http" + assert reports[0]["handled"] is False + assert reports[0]["fatal"] is True + assert stops and stops[0]["reason"] == "crash" + + +def test_one_shot_command_reports_without_excepthook_or_daemon_stopped( + monkeypatch, +) -> None: + import sys + + from ktx_daemon import __main__ as daemon_main + + original_hook = sys.excepthook + reports: list[dict[str, object]] = [] + stops: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + def fake_stop(*, reason: str, uptime_ms: float) -> bool: + stops.append({"reason": reason, "uptimeMs": uptime_ms}) + return True + + monkeypatch.setattr( + daemon_main, + "_read_stdin_json", + lambda: { + "connection_id": "warehouse", + "driver": "postgres", + "url": "postgresql://readonly@example.test/warehouse", + "schemas": ["public"], + }, + ) + monkeypatch.setattr( + daemon_main, + "introspect_database_response", + lambda _request: (_ for _ in ()).throw(RuntimeError("one-shot boom")), + ) + monkeypatch.setattr("ktx_daemon.telemetry.report_exception", fake_report) + monkeypatch.setattr( + "ktx_daemon.telemetry.daemon_lifecycle.emit_daemon_stopped_once", + fake_stop, + ) + + assert daemon_main.main(["database-introspect"]) == 1 + assert sys.excepthook is original_hook + assert stops == [] + assert reports + assert reports[0]["source"] == "database-introspect" + assert reports[0]["handled"] is True + assert reports[0]["fatal"] is False diff --git a/python/ktx-daemon/tests/test_semantic_layer.py b/python/ktx-daemon/tests/test_semantic_layer.py index 828e9359..72040df9 100644 --- a/python/ktx-daemon/tests/test_semantic_layer.py +++ b/python/ktx-daemon/tests/test_semantic_layer.py @@ -97,6 +97,33 @@ def test_query_semantic_layer_emits_plan_and_sql_debug_events( assert "public.orders" not in captured.err +def test_query_semantic_layer_reports_exception(monkeypatch) -> None: + from ktx_daemon import semantic_layer as semantic_layer_module + + reports: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + monkeypatch.setattr(semantic_layer_module, "report_exception", fake_report) + + with pytest.raises(ValueError): + query_semantic_layer( + SemanticLayerQueryRequest( + sources=[ORDERS_SOURCE, ORDERS_SOURCE], + dialect="postgres", + projectId="a" * 64, + query={"measures": ["orders.order_count"]}, + ) + ) + + assert reports + assert reports[0]["source"] == "semantic-query" + assert reports[0]["handled"] is True + assert reports[0]["fatal"] is False + assert reports[0]["project_id"] == "a" * 64 + + def test_semantic_layer_request_rejects_project_id_field_name() -> None: with pytest.raises(ValueError): SemanticLayerQueryRequest( diff --git a/python/ktx-daemon/tests/test_telemetry_schema_sync.py b/python/ktx-daemon/tests/test_telemetry_schema_sync.py index 6f2ba634..0cc822f9 100644 --- a/python/ktx-daemon/tests/test_telemetry_schema_sync.py +++ b/python/ktx-daemon/tests/test_telemetry_schema_sync.py @@ -36,4 +36,5 @@ def test_python_schema_copy_matches_node_schema() -> None: "daemon_stopped", "sl_plan_completed", "sql_gen_completed", + "query_history_filter_completed", ] diff --git a/uv.lock b/uv.lock index 6d00951d..40553e46 100644 --- a/uv.lock +++ b/uv.lock @@ -466,7 +466,7 @@ wheels = [ [[package]] name = "ktx-daemon" -version = "0.8.0" +version = "0.9.0" source = { editable = "python/ktx-daemon" } dependencies = [ { name = "fastapi" }, @@ -523,7 +523,7 @@ dev = [ [[package]] name = "ktx-sl" -version = "0.8.0" +version = "0.9.0" source = { editable = "python/ktx-sl" } dependencies = [ { name = "pydantic" }, diff --git a/website/vercel.json b/website/vercel.json deleted file mode 100644 index 7aa86301..00000000 --- a/website/vercel.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "redirects": [ - { - "source": "/:path*", - "has": [{ "type": "host", "value": "ktx.sh" }], - "destination": "https://docs.ktx.sh/:path*", - "permanent": true - } - ] -}