From fb7b94b60ee4b905e1efae9e004f39911569e68b Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Fri, 5 Jun 2026 19:36:21 +0200 Subject: [PATCH] feat(telemetry): collect PostHog $exception error reports in CLI and daemon (#262) * feat(telemetry): add node exception reporter * feat(telemetry): report node cli exceptions * feat(telemetry): add daemon exception reporter * feat(telemetry): report daemon exceptions * docs(telemetry): document error reports * fix(telemetry): pass redaction snapshots from node call sites * test(telemetry): verify prepared node exception payload * fix(telemetry): close daemon exception lifecycle gaps * test(telemetry): verify prepared daemon exception payload * test(telemetry): close error collection acceptance gaps * test(telemetry): close posthog exception acceptance gaps --- AGENTS.md | 21 +- README.md | 16 +- .../content/docs/community/telemetry.mdx | 27 + packages/cli/src/cli-program.ts | 24 + packages/cli/src/cli-runtime.ts | 45 ++ packages/cli/src/connection.ts | 18 +- packages/cli/src/context/mcp/context-tools.ts | 49 +- packages/cli/src/public-ingest.ts | 70 +- packages/cli/src/scan.ts | 22 +- packages/cli/src/sl.ts | 22 +- packages/cli/src/sql.ts | 22 +- packages/cli/src/telemetry/emitter.ts | 61 ++ packages/cli/src/telemetry/exception.ts | 201 ++++++ packages/cli/src/telemetry/index.ts | 5 +- .../cli/src/telemetry/redaction-secrets.ts | 117 ++++ .../cli/test/cli-program-telemetry.test.ts | 33 + packages/cli/test/connection.test.ts | 22 +- packages/cli/test/context/mcp/server.test.ts | 63 +- packages/cli/test/public-ingest.test.ts | 110 +++- packages/cli/test/scan.test.ts | 40 ++ packages/cli/test/sl.test.ts | 63 +- packages/cli/test/sql.test.ts | 17 +- .../test/telemetry/exception-payload.test.ts | 150 +++++ packages/cli/test/telemetry/exception.test.ts | 456 +++++++++++++ packages/cli/test/telemetry/index.test.ts | 35 +- .../test/telemetry/redaction-secrets.test.ts | 127 ++++ python/ktx-daemon/src/ktx_daemon/__main__.py | 71 ++- python/ktx-daemon/src/ktx_daemon/app.py | 152 ++--- .../src/ktx_daemon/semantic_layer.py | 9 +- .../src/ktx_daemon/telemetry/__init__.py | 9 +- .../ktx_daemon/telemetry/daemon_lifecycle.py | 29 + .../src/ktx_daemon/telemetry/exception.py | 156 +++++ python/ktx-daemon/tests/test_app.py | 2 + .../tests/test_exception_payload.py | 118 ++++ .../tests/test_exception_telemetry.py | 601 ++++++++++++++++++ .../ktx-daemon/tests/test_semantic_layer.py | 27 + 36 files changed, 2870 insertions(+), 140 deletions(-) create mode 100644 packages/cli/src/telemetry/exception.ts create mode 100644 packages/cli/src/telemetry/redaction-secrets.ts create mode 100644 packages/cli/test/telemetry/exception-payload.test.ts create mode 100644 packages/cli/test/telemetry/exception.test.ts create mode 100644 packages/cli/test/telemetry/redaction-secrets.test.ts create mode 100644 python/ktx-daemon/src/ktx_daemon/telemetry/daemon_lifecycle.py create mode 100644 python/ktx-daemon/src/ktx_daemon/telemetry/exception.py create mode 100644 python/ktx-daemon/tests/test_exception_payload.py create mode 100644 python/ktx-daemon/tests/test_exception_telemetry.py diff --git a/AGENTS.md b/AGENTS.md index 20f9bcdf..ec715364 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -337,7 +337,8 @@ use `PascalCase` without the suffix. ## Telemetry -**ktx** ships PostHog usage telemetry. When adding commands or events: +**ktx** ships PostHog usage telemetry. Catalog telemetry events use strict +schemas. When adding commands or events: - **MUST NOT**: Add fields that carry user data — file paths, hostnames, environment values, SQL text, schema/table/column names, error messages, @@ -354,6 +355,24 @@ use `PascalCase` without the suffix. of collected data changes. Adding another event with no new field types needs no docs change. +### Error reports + +**ktx** also sends PostHog Error Tracking `$exception` events when telemetry is +enabled. This channel is separate from the strict catalog event schema and is +used only for exception diagnostics. + +`$exception` events may include stack frames, error class names, raw error +messages, cause chains, `source`, `handled`, `fatal`, runtime version fields, +OS/runtime fields, and the hashed `projectId` when known. Stack frames may +include local file paths and the local username when those appear in paths. + +`$exception` events must never intentionally include secrets, credentials, +database URLs, auth headers, raw argv, raw environment values, SQL text, +schema/table/column names as explicit properties, customer row data, user prompt +text, or raw MCP arguments. Reporters must redact call-site-provided secret +snapshots and common static credential patterns before the SDK serializes the +exception. + ## Documentation and Specs - Keep public documentation in `README.md`, package READMEs, example READMEs, diff --git a/README.md b/README.md index 2c433e0d..67abe741 100644 --- a/README.md +++ b/README.md @@ -247,11 +247,17 @@ uv run pytest -q ## Telemetry -**ktx** collects anonymous usage telemetry from interactive CLI runs to -improve setup, command reliability, and data-agent workflows. No file paths, -hostnames, SQL, schema names, error messages, or argv are recorded. See -[Telemetry](https://docs.kaelio.com/ktx/docs/community/telemetry) for the -event catalog and opt-out options. +**ktx** collects privacy-conscious usage telemetry to understand installs and +improve setup, command reliability, and data-agent workflows. Catalog telemetry +events do not record file paths, hostnames, SQL, schema names, table names, +column names, error messages, raw environment values, or argv. Error reports use +PostHog Error Tracking and can include stack frames and raw error messages, +which may contain local file paths or the local username in those paths. +**ktx** redacts secrets, credentials, database URLs, auth headers, argv, raw +environment values, SQL text, row data, and user-typed prompt or MCP argument +text from the explicit `$exception` payload. See +[Telemetry](https://docs.kaelio.com/ktx/docs/community/telemetry) for the event +catalog and opt-out options. ## License diff --git a/docs-site/content/docs/community/telemetry.mdx b/docs-site/content/docs/community/telemetry.mdx index a3a10564..78bdb3e5 100644 --- a/docs-site/content/docs/community/telemetry.mdx +++ b/docs-site/content/docs/community/telemetry.mdx @@ -46,6 +46,33 @@ an operation errors, the detail we record is the error as your tools reported it, which can include identifiers from your setup. If you'd rather send nothing at all, turn telemetry off using any of the options above. +## Error reports + +When telemetry is enabled, **ktx** sends PostHog Error Tracking `$exception` +events for CLI and daemon exceptions. Error reports help group crashes and +handled failures into PostHog issues. + +Error reports can include: + +- Stack frames, including function names, local file paths, line numbers, and + SDK-provided source context. +- Error class names and raw error messages. +- Cause chains when the runtime exposes them. +- `source`, `handled`, and `fatal` diagnostic fields. +- Runtime version, OS, architecture, and CI fields. +- The hashed `projectId` when **ktx** knows the project. + +Error reports never intentionally include: + +- Secrets, credentials, API keys, tokens, cookies, signed URLs, or auth headers. +- Database URLs, connection strings, DSNs, raw argv, or raw environment values. +- SQL text, schema names, table names, or column names as explicit payload + properties. +- Customer row data. +- User prompt text or raw MCP arguments. + +The same opt-out controls listed above disable error reports. + ## Storage and retention Telemetry is sent to PostHog, a third-party product-analytics service used by diff --git a/packages/cli/src/cli-program.ts b/packages/cli/src/cli-program.ts index 31ab8a03..3f1b27e4 100644 --- a/packages/cli/src/cli-program.ts +++ b/packages/cli/src/cli-program.ts @@ -529,6 +529,13 @@ export async function runCommanderKtxCli( try { return await runBareInteractiveCommand(program, io, context); } catch (error) { + const telemetry = await import('./telemetry/index.js'); + await telemetry.reportException({ + error, + context: { source: 'bare-interactive', handled: true, fatal: false }, + packageInfo: info, + io, + }); io.stderr.write(`${formatCliError(error)}\n`); return 1; } @@ -563,6 +570,23 @@ export async function runCommanderKtxCli( outcome: commandOutcomeForParseResult(parseError, exitCode), error: parseError, }); + if ( + parseError && + !isCommanderExit(parseError) && + !isKtxProjectMissingAbortError(parseError) + ) { + await telemetryModule.reportException({ + error: parseError, + context: { + source: completed?.commandPath.join(' ') ?? 'commander parseAsync', + handled: true, + fatal: false, + }, + projectDir: completed?.projectGroupAttached ? completed.projectDir : undefined, + packageInfo: info, + io, + }); + } await telemetryModule.emitCompletedCommand({ completed, packageInfo: info, io }); await telemetryModule.shutdownTelemetryEmitter(); } diff --git a/packages/cli/src/cli-runtime.ts b/packages/cli/src/cli-runtime.ts index 7043143b..4e13b472 100644 --- a/packages/cli/src/cli-runtime.ts +++ b/packages/cli/src/cli-runtime.ts @@ -129,6 +129,48 @@ function installTelemetrySignalFlush(io: KtxCliIo, info: KtxCliPackageInfo): () }; } +/** @internal */ +export function createGlobalExceptionReporter(io: KtxCliIo, info: KtxCliPackageInfo) { + return async (source: 'uncaughtException' | 'unhandledRejection', error: unknown): Promise => { + const { reportException, shutdownTelemetryEmitter } = await import('./telemetry/index.js'); + await reportException({ + error, + context: { source, handled: false, fatal: true }, + io, + packageInfo: info, + immediate: true, + }); + await shutdownTelemetryEmitter(); + }; +} + +export function installGlobalExceptionHandlers(io: KtxCliIo, info: KtxCliPackageInfo): () => void { + const report = createGlobalExceptionReporter(io, info); + const handle = (source: 'uncaughtException' | 'unhandledRejection', error: unknown): void => { + void (async () => { + try { + await report(source, error); + } catch { + // Best-effort: preserve Node's process termination behavior. + } + if (error instanceof Error && error.stack) { + io.stderr.write(`${error.stack}\n`); + } else { + io.stderr.write(`${String(error)}\n`); + } + process.exit(1); + })(); + }; + const onUncaught = (error: Error): void => handle('uncaughtException', error); + const onUnhandled = (reason: unknown): void => handle('unhandledRejection', reason); + process.on('uncaughtException', onUncaught); + process.on('unhandledRejection', onUnhandled); + return () => { + process.off('uncaughtException', onUncaught); + process.off('unhandledRejection', onUnhandled); + }; +} + export async function runKtxCli( argv = process.argv.slice(2), io: KtxCliIo = process, @@ -141,11 +183,14 @@ export async function runKtxCli( // Real-process entry only: flush telemetry if interrupted. Test/programmatic // callers pass their own `io`, so they never install process-level handlers. const removeSignalFlush = (io as unknown) === process ? installTelemetrySignalFlush(io, info) : undefined; + const removeGlobalExceptionHandlers = + (io as unknown) === process ? installGlobalExceptionHandlers(io, info) : undefined; try { return await runCommanderKtxCli(argv, io, deps, info, { runInit: runInitForCommander, }); } finally { + removeGlobalExceptionHandlers?.(); removeSignalFlush?.(); } } diff --git a/packages/cli/src/connection.ts b/packages/cli/src/connection.ts index 2f4a0f4a..9b6b4294 100644 --- a/packages/cli/src/connection.ts +++ b/packages/cli/src/connection.ts @@ -16,7 +16,8 @@ import { bold, dim, green, red, SYMBOLS } from './io/symbols.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; import { profileMark } from './startup-profile.js'; import { isDemoConnection } from './telemetry/demo-detect.js'; -import { emitTelemetryEvent } from './telemetry/index.js'; +import { emitTelemetryEvent, reportException } from './telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js'; import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:connection'); @@ -324,6 +325,21 @@ async function emitConnectionTest(input: { ...(errorDetail ? { errorDetail } : {}), }, }); + if (input.error) { + await reportException({ + error: input.error, + context: { source: 'connection test', handled: true, fatal: false }, + projectDir: input.project.projectDir, + io: input.io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project: input.project, + connectionId: input.connectionId, + includeLlm: false, + includeEmbeddings: false, + env: process.env, + }), + }); + } } function visualWidth(text: string): number { diff --git a/packages/cli/src/context/mcp/context-tools.ts b/packages/cli/src/context/mcp/context-tools.ts index 03cd2ad4..2d07d121 100644 --- a/packages/cli/src/context/mcp/context-tools.ts +++ b/packages/cli/src/context/mcp/context-tools.ts @@ -3,7 +3,13 @@ import type { ToolAnnotations } from '@modelcontextprotocol/sdk/types.js'; import { z } from 'zod'; import type { KtxCliIo } from '../../cli-runtime.js'; import type { MemoryAgentInput } from '../../context/memory/types.js'; -import { emitTelemetryEvent, mcpTelemetrySampleRate, shouldEmitMcpTelemetry } from '../../telemetry/index.js'; +import { + emitTelemetryEvent, + mcpTelemetrySampleRate, + reportException, + shouldEmitMcpTelemetry, +} from '../../telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from '../../telemetry/redaction-secrets.js'; import { scrubErrorClass } from '../../telemetry/scrubber.js'; import type { KtxMcpClientInfo, @@ -518,11 +524,26 @@ function registerParsedTool( }, schema: TSchema, handler: (input: z.infer, context?: KtxMcpToolHandlerContext) => Promise, + telemetry?: { projectDir?: string; io?: KtxCliIo }, ): void { server.registerTool(name, config, async (input, context) => { try { return await handler(schema.parse(input), context); } catch (error) { + if (telemetry?.io) { + await reportException({ + error, + context: { source: `mcp:${name}`, handled: true, fatal: false }, + projectDir: telemetry.projectDir, + io: telemetry.io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + projectDir: telemetry.projectDir, + includeLlm: true, + includeEmbeddings: true, + env: process.env, + }), + }); + } return jsonErrorToolResult(formatToolError(error)); } }); @@ -571,6 +592,20 @@ function instrumentMcpServer( } return result; } catch (error) { + if (telemetry.io) { + await reportException({ + error, + context: { source: `mcp:${name}`, handled: true, fatal: false }, + projectDir: telemetry.projectDir, + io: telemetry.io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + projectDir: telemetry.projectDir, + includeLlm: true, + includeEmbeddings: true, + env: process.env, + }), + }); + } if (telemetry.io && telemetry.projectDir && shouldEmitMcpTelemetry()) { const errorClass = scrubErrorClass(error); await emitTelemetryEvent({ @@ -596,6 +631,7 @@ function instrumentMcpServer( export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void { const { ports, userContext } = deps; + const toolTelemetry = { projectDir: deps.projectDir, io: deps.io }; const server = instrumentMcpServer(deps.server, { projectDir: deps.projectDir, io: deps.io, @@ -616,6 +652,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void }, connectionListSchema, async () => jsonToolResult({ connections: await connections.list() }), + toolTelemetry, ); } @@ -640,6 +677,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void limit: input.limit, }), ), + toolTelemetry, ); registerParsedTool( @@ -657,6 +695,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void const page = await knowledge.read({ userId: userContext.userId, key: input.key }); return page ? jsonToolResult(page) : jsonErrorToolResult(`Wiki page "${input.key}" was not found.`); }, + toolTelemetry, ); } @@ -679,6 +718,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void ? jsonToolResult(source) : jsonErrorToolResult(`Semantic-layer source "${input.sourceName}" was not found.`); }, + toolTelemetry, ); registerParsedTool( @@ -711,6 +751,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void ); return jsonToolResult(projectSlQueryResult(result, input.include)); }, + toolTelemetry, ); } @@ -728,6 +769,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void }, entityDetailsSchema, async (input) => jsonToolResult(await entityDetails.read(input)), + toolTelemetry, ); } @@ -745,6 +787,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void }, dictionarySearchSchema, async (input) => jsonToolResult(await dictionarySearch.search(input)), + toolTelemetry, ); } @@ -762,6 +805,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void }, discoverDataSchema, async (input) => jsonToolResult({ refs: await discover.search(input) }), + toolTelemetry, ); } @@ -791,6 +835,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void ), ); }, + toolTelemetry, ); } @@ -818,6 +863,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void }; return jsonToolResult(await memoryIngest.ingest(ingestInput)); }, + toolTelemetry, ); registerParsedTool( @@ -835,6 +881,7 @@ export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void const status = await memoryIngest.status(input.runId); return status ? jsonToolResult(status) : jsonErrorToolResult(`Memory ingest run "${input.runId}" was not found.`); }, + toolTelemetry, ); } } diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index 44a2b024..07f805b8 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -23,7 +23,8 @@ import type { KtxScanArgs, KtxScanDeps } from './scan.js'; import type { KtxTableRef } from './context/scan/types.js'; import { profileMark } from './startup-profile.js'; import { isDemoConnection } from './telemetry/demo-detect.js'; -import { emitProjectStackSnapshot, emitTelemetryEvent } from './telemetry/index.js'; +import { emitProjectStackSnapshot, emitTelemetryEvent, reportException } from './telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js'; import { formatErrorDetail } from './telemetry/scrubber.js'; profileMark('module:public-ingest'); @@ -1119,30 +1120,63 @@ export async function runKtxPublicIngest( feature, }); } catch (error) { + await reportException({ + error, + context: { source: 'ingest runtime', handled: true, fatal: false }, + projectDir: args.projectDir, + io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project, + projectDir: args.projectDir, + connectionId: args.targetConnectionId, + includeLlm: true, + includeEmbeddings: true, + env: deps.env ?? process.env, + }), + }); io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } } const { runContextBuild } = await import('./context-build-view.js'); const contextBuild = deps.runContextBuild ?? runContextBuild; - const result = await contextBuild( - project, - { + try { + const result = await contextBuild( + project, + { + projectDir: args.projectDir, + ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), + all: args.all, + entrypoint: 'ingest', + inputMode: args.inputMode, + ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), + ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), + ...(args.scanMode ? { scanMode: args.scanMode } : {}), + ...(args.detectRelationships !== undefined ? { detectRelationships: args.detectRelationships } : {}), + ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), + ...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}), + }, + io, + ); + return result.exitCode; + } catch (error) { + await reportException({ + error, + context: { source: 'ingest context-build', handled: true, fatal: false }, projectDir: args.projectDir, - ...(args.targetConnectionId ? { targetConnectionId: args.targetConnectionId } : {}), - all: args.all, - entrypoint: 'ingest', - inputMode: args.inputMode, - ...(args.queryHistory ? { queryHistory: args.queryHistory } : {}), - ...(args.queryHistoryWindowDays !== undefined ? { queryHistoryWindowDays: args.queryHistoryWindowDays } : {}), - ...(args.scanMode ? { scanMode: args.scanMode } : {}), - ...(args.detectRelationships !== undefined ? { detectRelationships: args.detectRelationships } : {}), - ...(args.cliVersion ? { cliVersion: args.cliVersion } : {}), - ...(args.runtimeInstallPolicy ? { runtimeInstallPolicy: args.runtimeInstallPolicy } : {}), - }, - io, - ); - return result.exitCode; + io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project, + projectDir: args.projectDir, + connectionId: args.targetConnectionId, + includeLlm: true, + includeEmbeddings: true, + env: deps.env ?? process.env, + }), + }); + io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); + return 1; + } } const plan = buildPublicIngestPlan(project, args); diff --git a/packages/cli/src/scan.ts b/packages/cli/src/scan.ts index 4f973e57..5961e3f1 100644 --- a/packages/cli/src/scan.ts +++ b/packages/cli/src/scan.ts @@ -1,6 +1,6 @@ import type { KtxProgressPort, KtxScanMode, KtxScanReport, KtxScanWarning } from './context/scan/types.js'; import { runLocalScan } from './context/scan/local-scan.js'; -import { loadKtxProject } from './context/project/project.js'; +import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; import { getKtxCliPackageInfo } from './cli-runtime.js'; import { resolveProjectEmbeddingProvider } from './embedding-resolution.js'; import type { KtxCliIo } from './index.js'; @@ -8,7 +8,8 @@ import { createKtxCliLocalIngestAdapters } from './local-adapters.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; import { profileMark } from './startup-profile.js'; -import { emitTelemetryEvent } from './telemetry/index.js'; +import { emitTelemetryEvent, reportException } from './telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js'; import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:scan'); @@ -322,8 +323,9 @@ export function createCliScanProgress( export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps: KtxScanDeps = {}): Promise { const startedAt = performance.now(); + let project: KtxLocalProject | undefined; try { - const project = await loadKtxProject({ projectDir: args.projectDir }); + project = await loadKtxProject({ projectDir: args.projectDir }); const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider; const resolution = await resolveEmbeddingProvider(project, { mode: 'ensure', @@ -397,6 +399,20 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps ...(errorDetail ? { errorDetail } : {}), }, }); + await reportException({ + error, + context: { source: 'scan run', handled: true, fatal: false }, + projectDir: args.projectDir, + io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project, + projectDir: args.projectDir, + connectionId: args.connectionId, + includeLlm: true, + includeEmbeddings: true, + env: process.env, + }), + }); io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } diff --git a/packages/cli/src/sl.ts b/packages/cli/src/sl.ts index f3eeb33e..dcf5e460 100644 --- a/packages/cli/src/sl.ts +++ b/packages/cli/src/sl.ts @@ -26,7 +26,8 @@ import { type KtxManagedPythonInstallPolicy, } from './managed-python-command.js'; import { profileMark } from './startup-profile.js'; -import { emitTelemetryEvent } from './telemetry/index.js'; +import { emitTelemetryEvent, reportException } from './telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js'; import { scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:sl'); @@ -202,8 +203,9 @@ function ambiguousSourceMessage(sourceName: string, connectionIds: readonly stri export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: KtxSlDeps = {}): Promise { const startedAt = performance.now(); let queryForTelemetry: SemanticLayerQueryInput | undefined; + let project: KtxLocalProject | undefined; try { - const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); + project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); if (args.command === 'list') { const sources = await listLocalSlSources(project, { connectionId: args.connectionId }); await printSlSources({ @@ -320,7 +322,7 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx projectDir: args.projectDir, }); const queryExecutor = args.execute ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() : undefined; - const result = await compileLocalSlQuery(project as KtxLocalProject, { + const result = await compileLocalSlQuery(project, { connectionId: args.connectionId, query, compute, @@ -351,6 +353,20 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx const _exhaustive: never = args; throw new Error(`Unsupported sl command: ${JSON.stringify(_exhaustive)}`); } catch (error) { + await reportException({ + error, + context: { source: `sl ${args.command}`, handled: true, fatal: false }, + projectDir: args.projectDir, + io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project, + projectDir: args.projectDir, + connectionId: args.connectionId, + includeLlm: args.command === 'query', + includeEmbeddings: args.command === 'search' || args.command === 'query', + env: process.env, + }), + }); if (args.command === 'validate') { const errorClass = scrubErrorClass(error); await emitTelemetryEvent({ diff --git a/packages/cli/src/sql.ts b/packages/cli/src/sql.ts index bfae0608..d3eb6a81 100644 --- a/packages/cli/src/sql.ts +++ b/packages/cli/src/sql.ts @@ -7,7 +7,8 @@ import { createKtxCliScanConnector } from './local-scan-connectors.js'; import { createManagedDaemonSqlAnalysisPort } from './managed-python-http.js'; import { profileMark } from './startup-profile.js'; import { isDemoConnection } from './telemetry/demo-detect.js'; -import { emitTelemetryEvent } from './telemetry/index.js'; +import { emitTelemetryEvent, reportException } from './telemetry/index.js'; +import { collectTelemetryRedactionSecrets } from './telemetry/redaction-secrets.js'; import { scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:sql'); @@ -142,8 +143,9 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: const startedAt = performance.now(); let driver = 'unknown'; let demoConnection = false; + let project: KtxLocalProject | undefined; try { - const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); + project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); const connection = project.config.connections[args.connectionId]; if (!connection) { throw new Error(`Connection "${args.connectionId}" is not configured in ktx.yaml`); @@ -171,7 +173,7 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: const createScanConnector = deps.createScanConnector ?? createKtxCliScanConnector; let connector: KtxScanConnector | null = null; try { - connector = await createScanConnector(project as KtxLocalProject, args.connectionId); + connector = await createScanConnector(project, args.connectionId); if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) { throw new Error(`Connection "${args.connectionId}" does not support read-only SQL execution.`); } @@ -218,6 +220,20 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: ...(errorClass ? { errorClass } : {}), }, }); + await reportException({ + error, + context: { source: 'sql run', handled: true, fatal: false }, + projectDir: args.projectDir, + io, + redactionSecrets: await collectTelemetryRedactionSecrets({ + project, + projectDir: args.projectDir, + connectionId: args.connectionId, + includeLlm: false, + includeEmbeddings: false, + env: process.env, + }), + }); io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } diff --git a/packages/cli/src/telemetry/emitter.ts b/packages/cli/src/telemetry/emitter.ts index 3344e00b..12453262 100644 --- a/packages/cli/src/telemetry/emitter.ts +++ b/packages/cli/src/telemetry/emitter.ts @@ -16,6 +16,16 @@ type PostHogClient = { properties: Record; groups?: Record; }): void; + captureException( + error: unknown, + distinctId?: string, + additionalProperties?: Record, + ): void; + captureExceptionImmediate( + error: unknown, + distinctId?: string, + additionalProperties?: Record, + ): Promise; shutdown(): Promise | void; }; @@ -105,6 +115,57 @@ export async function trackTelemetryEvent(input: { } } +function writeDebugExceptionPayload(input: { + error: Error; + distinctId: string; + properties: Record; + stderr: TelemetrySink; +}): void { + input.stderr.write( + `[telemetry-exception] ${JSON.stringify({ + distinctId: input.distinctId, + message: input.error.message, + name: input.error.name, + properties: input.properties, + })}\n`, + ); +} + +export async function trackTelemetryException(input: { + error: Error; + distinctId: string; + properties: Record; + env?: TelemetryEmitterEnv; + stderr: TelemetrySink; + projectApiKey?: string; + host?: string; + immediate?: boolean; +}): Promise { + const env = input.env ?? process.env; + + if (debugEnabled(env)) { + writeDebugExceptionPayload(input); + return; + } + + const projectApiKey = telemetryProjectApiKey(input.projectApiKey); + const host = telemetryHost(env, input.host); + const client = await getPostHogClient(projectApiKey, host); + if (!client) { + return; + } + + try { + if (input.immediate) { + await client.captureExceptionImmediate(input.error, input.distinctId, input.properties); + return; + } + client.captureException(input.error, input.distinctId, input.properties); + } catch { + return; + } +} + export async function shutdownTelemetryEmitter(): Promise { const client = await clientPromise; if (!client) { diff --git a/packages/cli/src/telemetry/exception.ts b/packages/cli/src/telemetry/exception.ts new file mode 100644 index 00000000..0ce81244 --- /dev/null +++ b/packages/cli/src/telemetry/exception.ts @@ -0,0 +1,201 @@ +import { inspect } from 'node:util'; + +import { getKtxCliPackageInfo, type KtxCliIo, type KtxCliPackageInfo } from '../cli-runtime.js'; +import { buildCommonEnvelope } from './events.js'; +import { trackTelemetryException } from './emitter.js'; +import { computeTelemetryProjectId, loadTelemetryIdentity } from './identity.js'; + +export interface ExceptionContext { + source: string; + handled: boolean; + fatal: boolean; + extra?: Record; +} + +type AnyObject = object; + +const reportedObjects = new WeakSet(); +const recentHandledPrimitives: string[] = []; +const RECENT_PRIMITIVE_LIMIT = 128; + +function primitiveKey(value: unknown): string { + return `${typeof value}:${String(value)}`; +} + +function rememberHandledPrimitive(value: unknown): void { + recentHandledPrimitives.push(primitiveKey(value)); + if (recentHandledPrimitives.length > RECENT_PRIMITIVE_LIMIT) { + recentHandledPrimitives.splice(0, recentHandledPrimitives.length - RECENT_PRIMITIVE_LIMIT); + } +} + +function consumeHandledPrimitive(value: unknown): boolean { + const key = primitiveKey(value); + const index = recentHandledPrimitives.indexOf(key); + if (index < 0) { + return false; + } + recentHandledPrimitives.splice(index, 1); + return true; +} + +function shouldSkipAsAlreadyReported(error: unknown, handled: boolean): boolean { + if ((typeof error === 'object' || typeof error === 'function') && error !== null) { + if (reportedObjects.has(error)) { + return true; + } + reportedObjects.add(error); + return false; + } + + if (handled) { + rememberHandledPrimitive(error); + return false; + } + + return consumeHandledPrimitive(error); +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function redactStaticPatterns(value: string): string { + return value + .replace(/([a-z][a-z0-9+.-]*:\/\/[^:\s/@]+:)([^@\s/]+)(@)/gi, '$1[redacted]$3') + .replace(/\b(password|pwd)=([^;&\s]+)/gi, '$1=[redacted]') + .replace(/\bAuthorization\s*:\s*[^\r\n,;]+/gi, 'Authorization: [redacted]') + .replace(/\bBearer\s+[A-Za-z0-9._~+/=-]+/gi, 'Bearer [redacted]') + .replace(/\b(api[_-]?key)\s*[:=]\s*([^\s,;]+)/gi, '$1=[redacted]') + .replace(/\b(KTX_[A-Z0-9_]*|[A-Z0-9_]*(?:TOKEN|SECRET))\s*[:=]\s*([^\s,;]+)/g, '$1=[redacted]') + .replace(/([?&](?:X-Amz-Signature|X-Goog-Signature|sig)=)[^&\s]+/gi, '$1[redacted]'); +} + +function redactText(value: string, secrets: ReadonlyArray): string { + let redacted = value; + for (const secret of secrets) { + if (secret) { + redacted = redacted.replace(new RegExp(escapeRegExp(secret), 'g'), '[redacted]'); + } + } + return redactStaticPatterns(redacted); +} + +const FORBIDDEN_EXTRA_PROPERTY_KEYS = new Set([ + 'argv', + 'args', + 'env', + 'environment', + 'sql', + 'query', + 'prompt', + 'mcparguments', + 'mcpargs', + 'tablename', + 'schemaname', + 'columnname', + 'databaseurl', + 'connectionstring', + 'url', + 'password', + 'token', + 'apikey', + 'api_key', + 'authorization', +]); + +function safeExtraProperties( + extra: Record | undefined, +): Record { + const safe: Record = {}; + for (const [key, value] of Object.entries(extra ?? {})) { + if (!FORBIDDEN_EXTRA_PROPERTY_KEYS.has(key.replace(/[^a-z0-9_]/gi, '').toLowerCase())) { + safe[key] = value; + } + } + return safe; +} + +function toMessage(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + if (typeof error === 'string') { + return error; + } + return inspect(error, { depth: 4, breakLength: 120 }); +} + +function sanitizedError(error: unknown, secrets: ReadonlyArray): Error { + if (error instanceof Error) { + const cause = 'cause' in error ? (error as Error & { cause?: unknown }).cause : undefined; + const clone = new Error(redactText(error.message, secrets), { + ...(cause !== undefined ? { cause: sanitizedError(cause, secrets) } : {}), + }); + clone.name = error.name; + if (error.stack) { + clone.stack = redactText(error.stack, secrets); + } + return clone; + } + return new Error(redactText(toMessage(error), secrets)); +} + +export async function reportException(input: { + error: unknown; + context: ExceptionContext; + io: KtxCliIo; + packageInfo?: KtxCliPackageInfo; + projectDir?: string; + immediate?: boolean; + redactionSecrets?: ReadonlyArray; +}): Promise { + try { + if (shouldSkipAsAlreadyReported(input.error, input.context.handled)) { + return; + } + + const debug = process.env.KTX_TELEMETRY_DEBUG === '1'; + const identity = await loadTelemetryIdentity({ + stderr: input.io.stderr, + env: process.env, + }); + + if ((!identity.enabled || !identity.installId) && !debug) { + return; + } + + const packageInfo = input.packageInfo ?? getKtxCliPackageInfo(); + const installId = identity.installId ?? 'debug'; + const projectId = input.projectDir ? computeTelemetryProjectId(installId, input.projectDir) : undefined; + const safeError = sanitizedError(input.error, input.redactionSecrets ?? []); + const properties: Record = { + ...buildCommonEnvelope({ + cliVersion: packageInfo.version, + isCi: Boolean(process.env.CI), + }), + source: input.context.source, + handled: input.context.handled, + fatal: input.context.fatal, + ...(projectId ? { projectId } : {}), + ...safeExtraProperties(input.context.extra), + }; + + delete properties.$groups; + await trackTelemetryException({ + error: safeError, + distinctId: installId, + properties, + env: process.env, + stderr: input.io.stderr, + immediate: input.immediate, + }); + } catch { + return; + } +} + +/** @internal */ +export function __resetTelemetryExceptionStateForTests(): void { + recentHandledPrimitives.length = 0; +} diff --git a/packages/cli/src/telemetry/index.ts b/packages/cli/src/telemetry/index.ts index b02e0224..e3716060 100644 --- a/packages/cli/src/telemetry/index.ts +++ b/packages/cli/src/telemetry/index.ts @@ -7,6 +7,7 @@ import { type CompletedCommandSpan, } from './command-hook.js'; import { shutdownTelemetryEmitter, trackTelemetryEvent } from './emitter.js'; +import { reportException, type ExceptionContext } from './exception.js'; import { buildCommonEnvelope, buildTelemetryEvent, @@ -17,8 +18,8 @@ import { import { computeTelemetryProjectId, loadTelemetryIdentity } from './identity.js'; import { buildProjectStackSnapshotFields } from './project-snapshot.js'; -export { beginCommandSpan, completeCommandSpan, shutdownTelemetryEmitter }; -export type { CommandOutcome, CompletedCommandSpan }; +export { beginCommandSpan, completeCommandSpan, reportException, shutdownTelemetryEmitter }; +export type { CommandOutcome, CompletedCommandSpan, ExceptionContext }; export async function showTelemetryNoticeIfNeeded(io: KtxCliIo, packageInfo: KtxCliPackageInfo): Promise { const identity = await loadTelemetryIdentity({ diff --git a/packages/cli/src/telemetry/redaction-secrets.ts b/packages/cli/src/telemetry/redaction-secrets.ts new file mode 100644 index 00000000..2bf7a863 --- /dev/null +++ b/packages/cli/src/telemetry/redaction-secrets.ts @@ -0,0 +1,117 @@ +import { resolveKtxConfigReference } from '../context/core/config-reference.js'; +import { loadKtxProject, type KtxLocalProject } from '../context/project/project.js'; + +const SENSITIVE_KEY = + /(password|secret|token|api[_-]?key|auth[_-]?token|auth_token_ref|private[_-]?key|passphrase|credential|authorization|url)$/i; + +type TelemetryRedactionProject = Pick; + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function addSecret(values: string[], value: string | undefined): void { + const trimmed = value?.trim(); + if (trimmed && !values.includes(trimmed)) { + values.push(trimmed); + } +} + +function tryResolve(value: string, env: NodeJS.ProcessEnv): string | undefined { + try { + return resolveKtxConfigReference(value, env); + } catch { + return undefined; + } +} + +function addUrlCredentials(values: string[], value: string): void { + try { + const parsed = new URL(value); + addSecret(values, parsed.password ? decodeURIComponent(parsed.password) : undefined); + addSecret(values, parsed.username ? decodeURIComponent(parsed.username) : undefined); + } catch { + return; + } +} + +function collectFromRecord(input: unknown, env: NodeJS.ProcessEnv, values: string[]): void { + if (Array.isArray(input)) { + for (const item of input) { + collectFromRecord(item, env, values); + } + return; + } + + if (!isRecord(input)) { + return; + } + + for (const [key, raw] of Object.entries(input)) { + if (isRecord(raw) || Array.isArray(raw)) { + collectFromRecord(raw, env, values); + continue; + } + if (typeof raw !== 'string' || !SENSITIVE_KEY.test(key)) { + continue; + } + const resolved = tryResolve(raw, env); + addSecret(values, resolved); + if (resolved) { + addUrlCredentials(values, resolved); + } + } +} + +function collectLlmSecrets(project: TelemetryRedactionProject, env: NodeJS.ProcessEnv, values: string[]): void { + collectFromRecord(project.config.llm.provider, env, values); +} + +function collectEmbeddingSecrets(project: TelemetryRedactionProject, env: NodeJS.ProcessEnv, values: string[]): void { + collectFromRecord(project.config.ingest.embeddings, env, values); + collectFromRecord(project.config.scan.enrichment.embeddings, env, values); +} + +function collectConnectionSecrets( + project: TelemetryRedactionProject, + connectionId: string | undefined, + env: NodeJS.ProcessEnv, + values: string[], +): void { + if (!connectionId) { + return; + } + collectFromRecord(project.config.connections[connectionId], env, values); +} + +export async function collectTelemetryRedactionSecrets(input: { + project?: TelemetryRedactionProject; + projectDir?: string; + connectionId?: string; + includeLlm?: boolean; + includeEmbeddings?: boolean; + env?: NodeJS.ProcessEnv; +}): Promise { + const env = input.env ?? process.env; + let project = input.project; + if (!project && input.projectDir) { + try { + project = await loadKtxProject({ projectDir: input.projectDir }); + } catch { + project = undefined; + } + } + if (!project) { + return []; + } + + const values: string[] = []; + if (input.includeLlm) { + collectLlmSecrets(project, env, values); + } + if (input.includeEmbeddings) { + collectEmbeddingSecrets(project, env, values); + } + collectConnectionSecrets(project, input.connectionId, env, values); + return values; +} diff --git a/packages/cli/test/cli-program-telemetry.test.ts b/packages/cli/test/cli-program-telemetry.test.ts index 4e7130b3..30e2bd2b 100644 --- a/packages/cli/test/cli-program-telemetry.test.ts +++ b/packages/cli/test/cli-program-telemetry.test.ts @@ -7,6 +7,12 @@ import { runCommanderKtxCli } from '../src/cli-program.js'; import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from '../src/cli-runtime.js'; import { TELEMETRY_NOTICE } from '../src/telemetry/identity.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + function makeIo(stdoutIsTTY = true): { io: KtxCliIo; stdout: () => string; stderr: () => string } { let stdout = ''; let stderr = ''; @@ -43,6 +49,7 @@ describe('runCommanderKtxCli telemetry', () => { vi.stubEnv('CI', ''); vi.stubEnv('KTX_TELEMETRY_DISABLED', ''); vi.stubEnv('DO_NOT_TRACK', ''); + reportExceptionMock.mockClear(); }); afterEach(async () => { @@ -131,4 +138,30 @@ describe('runCommanderKtxCli telemetry', () => { await expect(runCommanderKtxCli(['unknown'], unknownIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(1); expect(unknownIo.stderr()).not.toContain('[telemetry]'); }); + + it('reports genuine top-level command catches as handled exceptions', async () => { + const io = makeIo(true); + const deps: KtxCliDeps = { + doctor: async () => { + throw new Error('status failed'); + }, + }; + + await expect( + runCommanderKtxCli( + ['--project-dir', tempDir, 'status', '--json'], + io.io, + deps, + info, + { runInit: async () => 0 }, + ), + ).resolves.toBe(1); + + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'ktx status', handled: true, fatal: false }), + projectDir: tempDir, + }), + ); + }); }); diff --git a/packages/cli/test/connection.test.ts b/packages/cli/test/connection.test.ts index da650b05..22c8bbe9 100644 --- a/packages/cli/test/connection.test.ts +++ b/packages/cli/test/connection.test.ts @@ -10,6 +10,12 @@ import type { KtxConnectionDriver, KtxScanConnector } from '../src/context/scan/ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxConnection } from '../src/connection.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + function stripAnsi(s: string): string { return s.replace(/\[[0-9;]*m/g, ''); } @@ -72,6 +78,7 @@ describe('runKtxConnection', () => { beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-connection-')); + reportExceptionMock.mockClear(); }); afterEach(async () => { @@ -165,12 +172,13 @@ describe('runKtxConnection', () => { it('records the raw errorDetail in connection_test telemetry when a native test fails', async () => { vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); vi.stubEnv('CI', ''); + vi.stubEnv('DATABASE_URL', 'postgres://svc:db-url-password@db.example.test/analytics'); // pragma: allowlist secret const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir }); await writeConnections(projectDir, { - warehouse: { driver: 'sqlite' }, + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' }, }); - const { connector } = nativeConnector('sqlite', { success: false, error: 'database file is unreadable' }); + const { connector } = nativeConnector('postgres', { success: false, error: 'database file is unreadable' }); const io = makeIo(); const code = await runKtxConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, { @@ -181,6 +189,16 @@ describe('runKtxConnection', () => { expect(io.stderr()).toContain('"event":"connection_test"'); expect(io.stderr()).toContain('"outcome":"error"'); expect(io.stderr()).toContain('"errorDetail":"database file is unreadable"'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'connection test', handled: true, fatal: false }), + projectDir, + redactionSecrets: expect.arrayContaining([ + 'postgres://svc:db-url-password@db.example.test/analytics', // pragma: allowlist secret + 'db-url-password', + ]), + }), + ); }); it('preserves the driver error class and code in connection_test telemetry', async () => { diff --git a/packages/cli/test/context/mcp/server.test.ts b/packages/cli/test/context/mcp/server.test.ts index 95985d68..1359d346 100644 --- a/packages/cli/test/context/mcp/server.test.ts +++ b/packages/cli/test/context/mcp/server.test.ts @@ -1,4 +1,4 @@ -import { access, mkdtemp, readFile, rm } from 'node:fs/promises'; +import { access, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { Client } from '@modelcontextprotocol/sdk/client/index.js'; @@ -7,6 +7,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import { createLocalProjectMemoryIngest } from '../../../src/context/memory/local-memory.js'; import { detectCaptureSignals } from '../../../src/context/memory/capture-signals.js'; import type { MemoryAgentInput } from '../../../src/context/memory/types.js'; +import { parseKtxProjectConfig, serializeKtxProjectConfig } from '../../../src/context/project/config.js'; import { initKtxProject } from '../../../src/context/project/project.js'; import { jsonToolResult } from '../../../src/context/mcp/context-tools.js'; import { createDefaultKtxMcpServer, createKtxMcpServer } from '../../../src/context/mcp/server.js'; @@ -23,6 +24,12 @@ import type { MemoryIngestPort, } from '../../../src/context/mcp/types.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../../../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + type RegisteredTool = { name: string; config: { @@ -280,6 +287,60 @@ describe('createKtxMcpServer', () => { expect(io.stderrText()).not.toContain('mcpClientVersion'); }); + it('reports MCP tool exceptions with a tool-derived source', async () => { + reportExceptionMock.mockClear(); + vi.stubEnv('ANTHROPIC_API_KEY', 'mcp-anthropic-secret'); // pragma: allowlist secret + const fake = makeFakeServer(); + const io = makeIo(); + const projectDir = await mkdtemp(join(tmpdir(), 'ktx-mcp-exception-')); + try { + await initKtxProject({ projectDir }); + const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + await writeFile( + join(projectDir, 'ktx.yaml'), + serializeKtxProjectConfig({ + ...config, + llm: { + ...config.llm, + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + }), + 'utf-8', + ); + + createKtxMcpServer({ + server: fake.server, + userContext: { userId: 'local-user' }, + projectDir, + io, + contextTools: { + knowledge: { + search: vi.fn().mockRejectedValue(new Error('wiki failed')), + read: vi.fn().mockResolvedValue(null), + }, + }, + }); + + await expect(getTool(fake.tools, 'wiki_search').handler({ query: 'revenue recognition', limit: 5 })).resolves.toMatchObject({ + isError: true, + }); + + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'mcp:wiki_search', handled: true, fatal: false }), + projectDir, + redactionSecrets: expect.arrayContaining(['mcp-anthropic-secret']), + }), + ); + } finally { + await rm(projectDir, { recursive: true, force: true }); + } + }); + it('captures the connecting MCP client name and version', async () => { vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); vi.stubEnv('CI', ''); diff --git a/packages/cli/test/public-ingest.test.ts b/packages/cli/test/public-ingest.test.ts index ba35faf6..6dea8834 100644 --- a/packages/cli/test/public-ingest.test.ts +++ b/packages/cli/test/public-ingest.test.ts @@ -3,7 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from '../src/context/project/config.js'; import { initKtxProject } from '../src/context/project/project.js'; -import { afterEach, describe, expect, it, vi } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { buildPublicIngestPlan, executePublicIngestTarget, @@ -13,6 +13,12 @@ import { runKtxPublicIngest, } from '../src/public-ingest.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + /** Count non-overlapping occurrences of `needle` in `haystack`. */ function occurrences(haystack: string, needle: string): number { return haystack.split(needle).length - 1; @@ -377,6 +383,10 @@ describe('publicProgressMessage', () => { }); describe('runKtxPublicIngest', () => { + beforeEach(() => { + reportExceptionMock.mockClear(); + }); + afterEach(() => { vi.unstubAllEnvs(); }); @@ -1208,6 +1218,104 @@ describe('runKtxPublicIngest', () => { ); }); + it('reports foreground runtime preflight exceptions', async () => { + const io = makeIo({ isTTY: true, interactive: true }); + const project = projectWithConnections({ + warehouse: { driver: 'postgres' }, + }); + const ensureRuntime = vi.fn(async (): Promise => { + throw new Error('runtime unavailable'); + }); + const runContextBuild = vi.fn(async () => ({ exitCode: 0 })); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'auto', + queryHistory: 'enabled', + cliVersion: '0.2.0', + runtimeInstallPolicy: 'prompt', + }, + io.io, + { + loadProject: vi.fn(async () => project), + ensureRuntime, + runContextBuild, + }, + ), + ).resolves.toBe(1); + + expect(runContextBuild).not.toHaveBeenCalled(); + expect(io.stderr()).toContain('runtime unavailable'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'ingest runtime', handled: true, fatal: false }), + projectDir: '/tmp/project', + }), + ); + }); + + it('reports foreground context-build exceptions', async () => { + const io = makeIo({ isTTY: true, interactive: true }); + const config = buildDefaultKtxProjectConfig(); + const project: KtxPublicIngestProject = { + projectDir: '/tmp/project', + config: { + ...config, + connections: { warehouse: { driver: 'postgres', password: 'env:INGEST_DB_PASSWORD' } }, // pragma: allowlist secret + llm: { + ...config.llm, + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + }, + }; + const runContextBuild = vi.fn(async () => { + throw new Error('context build failed'); + }); + + await expect( + runKtxPublicIngest( + { + command: 'run', + projectDir: '/tmp/project', + targetConnectionId: 'warehouse', + all: false, + json: false, + inputMode: 'auto', + queryHistory: 'default', + }, + io.io, + { + loadProject: vi.fn(async () => project), + runContextBuild, + env: { + ...process.env, + ANTHROPIC_API_KEY: 'ingest-anthropic-secret', // pragma: allowlist secret + INGEST_DB_PASSWORD: 'ingest-db-password', // pragma: allowlist secret + }, + }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('context build failed'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'ingest context-build', handled: true, fatal: false }), + projectDir: '/tmp/project', + redactionSecrets: expect.arrayContaining(['ingest-anthropic-secret', 'ingest-db-password']), + }), + ); + }); + it('preflights foreground managed embeddings runtime before starting the context-build view', async () => { const io = makeIo({ isTTY: true, interactive: true }); const config = buildDefaultKtxProjectConfig(); diff --git a/packages/cli/test/scan.test.ts b/packages/cli/test/scan.test.ts index 6a524fba..51c55498 100644 --- a/packages/cli/test/scan.test.ts +++ b/packages/cli/test/scan.test.ts @@ -2,12 +2,19 @@ import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import type { SourceAdapter } from '../src/context/ingest/types.js'; +import { parseKtxProjectConfig, serializeKtxProjectConfig } from '../src/context/project/config.js'; import { initKtxProject } from '../src/context/project/project.js'; import type { KtxScanReport } from '../src/context/scan/types.js'; import type { LocalScanRunResult, RunLocalScanOptions } from '../src/context/scan/local-scan.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { createCliScanProgress, runKtxScan, type KtxScanDeps } from '../src/scan.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + const sqlServerExtractSchema = vi.hoisted(() => vi.fn(async (connectionId: string) => ({ connectionId, @@ -317,6 +324,7 @@ describe('runKtxScan', () => { beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-scan-')); + reportExceptionMock.mockClear(); }); afterEach(async () => { @@ -426,7 +434,28 @@ describe('runKtxScan', () => { it('records the raw errorDetail in scan_completed telemetry when the scan throws', async () => { vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); vi.stubEnv('CI', ''); + vi.stubEnv('ANTHROPIC_API_KEY', 'anthropic-callsite-secret'); // pragma: allowlist secret + vi.stubEnv('DATABASE_URL', 'postgres://svc:scan-db-password@db.example.test/analytics'); // pragma: allowlist secret await initKtxProject({ projectDir: tempDir }); + const config = parseKtxProjectConfig(await readFile(join(tempDir, 'ktx.yaml'), 'utf-8')); + await writeFile( + join(tempDir, 'ktx.yaml'), + serializeKtxProjectConfig({ + ...config, + connections: { + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' }, + }, + llm: { + ...config.llm, + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + }), + 'utf-8', + ); const runLocalScan = vi.fn(async (): Promise => { const error = new Error('introspection timed out'); (error as { code?: unknown }).code = 'ETIMEDOUT'; @@ -452,6 +481,17 @@ describe('runKtxScan', () => { expect(io.stderr()).toContain('"event":"scan_completed"'); expect(io.stderr()).toContain('"outcome":"error"'); expect(io.stderr()).toContain('"errorDetail":"ETIMEDOUT: introspection timed out"'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'scan run', handled: true, fatal: false }), + projectDir: tempDir, + redactionSecrets: expect.arrayContaining([ + 'anthropic-callsite-secret', + 'postgres://svc:scan-db-password@db.example.test/analytics', // pragma: allowlist secret + 'scan-db-password', + ]), + }), + ); }); it('passes KTX daemon options to local ingest adapters when no explicit daemon URL is set', async () => { diff --git a/packages/cli/test/sl.test.ts b/packages/cli/test/sl.test.ts index ff9c1489..489ea950 100644 --- a/packages/cli/test/sl.test.ts +++ b/packages/cli/test/sl.test.ts @@ -1,12 +1,19 @@ -import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { stripVTControlCharacters } from 'node:util'; import Database from 'better-sqlite3'; +import { parseKtxProjectConfig, serializeKtxProjectConfig } from '../src/context/project/config.js'; import { initKtxProject } from '../src/context/project/project.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxSl } from '../src/sl.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + const ORDERS_YAML = [ 'name: orders', 'table: public.orders', @@ -61,6 +68,7 @@ describe('runKtxSl', () => { beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-sl-')); + reportExceptionMock.mockClear(); }); afterEach(async () => { @@ -351,6 +359,12 @@ describe('runKtxSl', () => { expect(validateIo.stdout()).toBe(''); expect(validateIo.stderr()).toBe('Semantic-layer source "missing_orders" was not found\n'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'sl validate', handled: true, fatal: false }), + projectDir, + }), + ); }); it('keeps scoped validation not-found wording', async () => { @@ -552,6 +566,53 @@ joins: [] expect(stderr.write).not.toHaveBeenCalled(); }); + it('reports sl query exceptions at the query catch boundary', async () => { + vi.stubEnv('ANTHROPIC_API_KEY', 'sl-anthropic-secret'); // pragma: allowlist secret + const projectDir = join(tempDir, 'missing-query-input'); + await seedSlSource({ projectDir }); + const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + await writeFile( + join(projectDir, 'ktx.yaml'), + serializeKtxProjectConfig({ + ...config, + llm: { + ...config.llm, + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + }), + 'utf-8', + ); + const io = makeIo(); + + await expect( + runKtxSl( + { + command: 'query', + projectDir, + connectionId: 'warehouse', + format: 'json', + execute: false, + cliVersion: '0.2.0', + runtimeInstallPolicy: 'auto', + }, + io.io, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('sl query requires query input'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'sl query', handled: true, fatal: false }), + projectDir, + redactionSecrets: expect.arrayContaining(['sl-anthropic-secret']), + }), + ); + }); + it('emits debug telemetry for sl query without project paths', async () => { vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); vi.stubEnv('CI', ''); diff --git a/packages/cli/test/sql.test.ts b/packages/cli/test/sql.test.ts index ef74fd49..5e297429 100644 --- a/packages/cli/test/sql.test.ts +++ b/packages/cli/test/sql.test.ts @@ -8,6 +8,12 @@ import type { SqlAnalysisPort } from '../src/context/sql-analysis/ports.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxSql } from '../src/sql.js'; +const reportExceptionMock = vi.hoisted(() => vi.fn(async () => {})); + +vi.mock('../src/telemetry/exception.js', () => ({ + reportException: reportExceptionMock, +})); + function makeIo(options: { isTTY?: boolean } = {}) { let stdout = ''; let stderr = ''; @@ -76,6 +82,7 @@ describe('runKtxSql', () => { beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-sql-')); + reportExceptionMock.mockClear(); }); afterEach(async () => { @@ -236,9 +243,10 @@ describe('runKtxSql', () => { }); it('rejects non-read-only SQL before executing connector SQL', async () => { + vi.stubEnv('SQL_DB_PASSWORD', 'sql-db-password'); // pragma: allowlist secret const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir }); - await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } }); + await writeConnections(projectDir, { warehouse: { driver: 'postgres', password: 'env:SQL_DB_PASSWORD' } }); // pragma: allowlist secret const connector = makeConnector(); const io = makeIo(); @@ -265,6 +273,13 @@ describe('runKtxSql', () => { expect(connector.executeReadOnly).not.toHaveBeenCalled(); expect(connector.cleanup).not.toHaveBeenCalled(); expect(io.stderr()).toContain('SQL contains read/write operation: Delete'); + expect(reportExceptionMock).toHaveBeenCalledWith( + expect.objectContaining({ + context: expect.objectContaining({ source: 'sql run', handled: true, fatal: false }), + projectDir, + redactionSecrets: expect.arrayContaining(['sql-db-password']), + }), + ); }); it('rejects missing connections', async () => { diff --git a/packages/cli/test/telemetry/exception-payload.test.ts b/packages/cli/test/telemetry/exception-payload.test.ts new file mode 100644 index 00000000..da81e62e --- /dev/null +++ b/packages/cli/test/telemetry/exception-payload.test.ts @@ -0,0 +1,150 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { createServer, type IncomingMessage } from 'node:http'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { gunzipSync } from 'node:zlib'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import type { KtxCliIo } from '../../src/cli-runtime.js'; +import { __resetTelemetryEmitterForTests } from '../../src/telemetry/emitter.js'; +import { + __resetTelemetryExceptionStateForTests, + reportException, +} from '../../src/telemetry/exception.js'; + +function makeIo(): KtxCliIo { + return { + stdout: { write: () => {} }, + stderr: { write: () => {} }, + }; +} + +async function body(req: IncomingMessage): Promise { + const chunks: Buffer[] = []; + for await (const chunk of req) { + chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)); + } + const raw = Buffer.concat(chunks); + return req.headers['content-encoding'] === 'gzip' ? gunzipSync(raw).toString('utf-8') : raw.toString('utf-8'); +} + +async function withCaptureServer(run: (url: string, payloads: unknown[]) => Promise): Promise { + const payloads: unknown[] = []; + const server = createServer(async (req, res) => { + if (req.method === 'POST') { + payloads.push(JSON.parse(await body(req))); + } + res.statusCode = 200; + res.setHeader('content-type', 'application/json'); + res.end('{}'); + }); + await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + const address = server.address(); + if (!address || typeof address === 'string') { + throw new Error('test server did not bind to a TCP port'); + } + try { + return await run(`http://127.0.0.1:${address.port}`, payloads); + } finally { + await new Promise((resolve) => server.close(() => resolve())); + } +} + +function findExceptionEvent(payloads: unknown[]): Record { + for (const payload of payloads) { + if (typeof payload !== 'object' || payload === null) { + continue; + } + const record = payload as Record; + const batch = Array.isArray(record.batch) ? record.batch : [record]; + for (const item of batch) { + if (typeof item === 'object' && item !== null && (item as Record).event === '$exception') { + return item as Record; + } + } + } + throw new Error(`No $exception payload found: ${JSON.stringify(payloads)}`); +} + +describe('prepared Node exception payload', () => { + let homeDir: string; + + beforeEach(async () => { + homeDir = await mkdtemp(join(tmpdir(), 'ktx-node-exception-payload-')); + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + join(homeDir, '.ktx', 'telemetry.json'), + `${JSON.stringify({ + installId: '00000000-0000-4000-8000-000000000000', + enabled: true, + createdAt: '2026-06-05T00:00:00.000Z', + })}\n`, + 'utf-8', + ); + vi.stubEnv('HOME', homeDir); + vi.stubEnv('CI', ''); + vi.stubEnv('KTX_TELEMETRY_DISABLED', ''); + vi.stubEnv('DO_NOT_TRACK', ''); + __resetTelemetryEmitterForTests(); + __resetTelemetryExceptionStateForTests(); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + await rm(homeDir, { recursive: true, force: true }); + }); + + it('sends projectId, omits $groups, and redacts the serialized exception list', async () => { + await withCaptureServer(async (endpoint, payloads) => { + vi.stubEnv('KTX_TELEMETRY_ENDPOINT', endpoint); + const projectDir = join(homeDir, 'project'); + const snapshotSecret = ['plain', 'secret', 'value'].join('-'); + const dbPassword = ['db', 'url', 'secret'].join('-'); + const authToken = ['abc', '123'].join(''); + const error = new Error( + `${snapshotSecret} postgres://svc:${dbPassword}@db.example.test/analytics Authorization: Basic ${authToken}`, + ); + + await reportException({ + error, + context: { source: 'scan run', handled: true, fatal: false }, + io: makeIo(), + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + projectDir, + immediate: true, + redactionSecrets: [snapshotSecret], + }); + + const event = findExceptionEvent(payloads); + const properties = event.properties as Record; + expect(properties.projectId).toMatch(/^[a-f0-9]{64}$/); + expect(properties.$groups).toBeUndefined(); + expect(JSON.stringify(properties.$exception_list)).toContain('[redacted]'); + expect(JSON.stringify(properties.$exception_list)).not.toContain(snapshotSecret); + expect(JSON.stringify(properties.$exception_list)).not.toContain(dbPassword); + expect(JSON.stringify(properties.$exception_list)).not.toContain(authToken); + for (const key of [ + 'argv', + 'args', + 'env', + 'environment', + 'sql', + 'query', + 'prompt', + 'mcpArguments', + 'tableName', + 'schemaName', + 'columnName', + 'databaseUrl', + 'connectionString', + 'url', + 'password', + 'token', + 'apiKey', + 'authorization', + ]) { + expect(properties).not.toHaveProperty(key); + } + }); + }); +}); diff --git a/packages/cli/test/telemetry/exception.test.ts b/packages/cli/test/telemetry/exception.test.ts new file mode 100644 index 00000000..01608935 --- /dev/null +++ b/packages/cli/test/telemetry/exception.test.ts @@ -0,0 +1,456 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import type { KtxCliIo } from '../../src/cli-runtime.js'; +import { __resetTelemetryEmitterForTests } from '../../src/telemetry/emitter.js'; +import { + __resetTelemetryExceptionStateForTests, + reportException, +} from '../../src/telemetry/exception.js'; + +const captures: unknown[] = []; +const immediateCaptures: unknown[] = []; +const shutdown = vi.fn(async () => {}); + +vi.mock('posthog-node', () => ({ + PostHog: vi.fn(function PostHog() { + return { + captureException: ( + error: unknown, + distinctId?: string, + properties?: Record, + ) => { + captures.push({ error, distinctId, properties }); + }, + captureExceptionImmediate: async ( + error: unknown, + distinctId?: string, + properties?: Record, + ) => { + immediateCaptures.push({ error, distinctId, properties }); + }, + capture: vi.fn(), + shutdown, + }; + }), +})); + +function makeIo(): { io: KtxCliIo; stderr: () => string } { + let stderr = ''; + return { + io: { + stdout: { write: () => {} }, + stderr: { + write: (chunk) => { + stderr += chunk; + }, + }, + }, + stderr: () => stderr, + }; +} + +async function writeIdentity(homeDir: string, enabled = true): Promise { + const path = join(homeDir, '.ktx', 'telemetry.json'); + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + path, + `${JSON.stringify({ + installId: '00000000-0000-4000-8000-000000000000', + enabled, + createdAt: '2026-06-05T00:00:00.000Z', + })}\n`, + 'utf-8', + ); +} + +describe('reportException', () => { + let homeDir: string; + + beforeEach(async () => { + homeDir = await mkdtemp(join(tmpdir(), 'ktx-exception-')); + await writeIdentity(homeDir); + vi.stubEnv('HOME', homeDir); + vi.stubEnv('CI', ''); + vi.stubEnv('KTX_TELEMETRY_DISABLED', ''); + vi.stubEnv('DO_NOT_TRACK', ''); + captures.length = 0; + immediateCaptures.length = 0; + shutdown.mockClear(); + __resetTelemetryEmitterForTests(); + __resetTelemetryExceptionStateForTests(); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + await rm(homeDir, { recursive: true, force: true }); + }); + + it('honors telemetry kill switches', async () => { + vi.stubEnv('KTX_TELEMETRY_DISABLED', '1'); + const { io } = makeIo(); + + await reportException({ + error: new Error('boom'), + context: { source: 'scan run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + projectDir: join(homeDir, 'project'), + }); + + expect(captures).toEqual([]); + expect(immediateCaptures).toEqual([]); + }); + + it('prints debug payloads without sending', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('KTX_TELEMETRY_DISABLED', '1'); + const { io, stderr } = makeIo(); + + await reportException({ + error: new Error('debug boom'), + context: { source: 'scan run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + projectDir: join(homeDir, 'project'), + }); + + expect(stderr()).toContain('[telemetry-exception]'); + expect(stderr()).toContain('"source":"scan run"'); + expect(captures).toEqual([]); + }); + + it('sends projectId as a property and omits $groups for Node exceptions', async () => { + const { io } = makeIo(); + + await reportException({ + error: new Error('project boom'), + context: { source: 'sql run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + projectDir: join(homeDir, 'project'), + }); + + expect(captures).toHaveLength(1); + expect(captures[0]).toMatchObject({ + distinctId: '00000000-0000-4000-8000-000000000000', + properties: { + source: 'sql run', + handled: true, + fatal: false, + cliVersion: '0.0.0-test', + runtime: 'node', + }, + }); + expect( + (captures[0] as { properties: Record }).properties.projectId, + ).toMatch(/^[a-f0-9]{64}$/); + expect((captures[0] as { properties: Record }).properties.$groups).toBeUndefined(); + }); + + it('uses captureExceptionImmediate for fatal reports', async () => { + const { io } = makeIo(); + + await reportException({ + error: new Error('fatal boom'), + context: { source: 'uncaughtException', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + immediate: true, + }); + + expect(immediateCaptures).toHaveLength(1); + expect(captures).toEqual([]); + }); + + it('redacts snapshot secrets and static credential patterns from message and cause', async () => { + const { io } = makeIo(); + const cause = new Error('cause has sk-live-fixture-value and Authorization: Bearer token-123'); + const error = new Error('message has sk-live-fixture-value and password=hunter2', { cause }); + + await reportException({ + error, + context: { source: 'connection test', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + redactionSecrets: ['sk-live-fixture-value'], + }); + + const sent = captures[0] as { error: Error & { cause?: Error } }; + expect(sent.error.message).toContain('[redacted]'); + expect(sent.error.message).not.toContain('sk-live-fixture-value'); + expect(sent.error.message).not.toContain('hunter2'); + expect(sent.error.cause?.message).not.toContain('token-123'); + }); + + it('redacts URL userinfo credentials and non-bearer authorization values', async () => { + const { io } = makeIo(); + const error = new Error( + 'connect postgres://svc:db-url-secret@db.example.test/analytics Authorization: Basic abc123', // pragma: allowlist secret + ); + + await reportException({ + error, + context: { source: 'connection test', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + + const sent = captures[0] as { error: Error }; + expect(sent.error.message).toContain('postgres://svc:[redacted]@db.example.test/analytics'); + expect(sent.error.message).toContain('Authorization: [redacted]'); + expect(sent.error.message).not.toContain('db-url-secret'); + expect(sent.error.message).not.toContain('abc123'); + }); + + it('does not use process-global secret discovery when no snapshot is supplied', async () => { + vi.stubEnv('KTX_FAKE_SECRET', 'plain-secret-without-pattern'); + const { io } = makeIo(); + + await reportException({ + error: new Error('plain-secret-without-pattern'), + context: { source: 'uncaughtException', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + + const sent = captures[0] as { error: Error }; + expect(sent.error.message).toContain('plain-secret-without-pattern'); + }); + + it('dedupes the same Error instance between operation and global tiers', async () => { + const { io } = makeIo(); + const error = new Error('same object'); + + await reportException({ + error, + context: { source: 'scan run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + await reportException({ + error, + context: { source: 'uncaughtException', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + immediate: true, + }); + + expect(captures).toHaveLength(1); + expect(immediateCaptures).toHaveLength(0); + }); + + it('captures wrapped Error causes as distinct logical occurrences', async () => { + const { io } = makeIo(); + const inner = new Error('inner'); + const wrapper = new Error('outer', { cause: inner }); + + await reportException({ + error: inner, + context: { source: 'sl query', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + await reportException({ + error: wrapper, + context: { source: 'uncaughtException', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + immediate: true, + }); + + expect(captures).toHaveLength(1); + expect(immediateCaptures).toHaveLength(1); + }); + + it('dedupes primitive and plain-object throwables propagated to the global tier', async () => { + const { io } = makeIo(); + const objectThrowable = { message: 'plain object' }; + + await reportException({ + error: 'primitive boom', + context: { source: 'mcp:sql_execution', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + await reportException({ + error: 'primitive boom', + context: { source: 'unhandledRejection', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + immediate: true, + }); + await reportException({ + error: objectThrowable, + context: { source: 'mcp:discover_data', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + await reportException({ + error: objectThrowable, + context: { source: 'unhandledRejection', handled: false, fatal: true }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + immediate: true, + }); + + expect(captures).toHaveLength(2); + expect(immediateCaptures).toHaveLength(0); + }); + + it('does not collapse independent primitive throw events with the same value', async () => { + const { io } = makeIo(); + + await reportException({ + error: 'oops', + context: { source: 'scan run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + await reportException({ + error: 'oops', + context: { source: 'sql run', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + + expect(captures).toHaveLength(2); + }); + + it('drops forbidden caller-supplied extra property keys', async () => { + const { io } = makeIo(); + + await reportException({ + error: new Error('extra property boom'), + context: { + source: 'sql run', + handled: true, + fatal: false, + extra: { + sql: 'select * from private_table', + tableName: 'private_table', + schemaName: 'private_schema', + columnName: 'private_column', + argv: '--password secret', + env: 'KTX_TOKEN=secret', + password: 'secret-password', // pragma: allowlist secret + token: 'secret-token', + prompt: 'user prompt', + safeCount: 3, + }, + }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + + const sent = captures[0] as { properties: Record }; + expect(sent.properties.safeCount).toBe(3); + for (const key of [ + 'sql', + 'tableName', + 'schemaName', + 'columnName', + 'argv', + 'env', + 'password', + 'token', + 'prompt', + ]) { + expect(sent.properties).not.toHaveProperty(key); + } + }); + + it('redacts every required static credential pattern and leaves benign text intact', async () => { + const { io } = makeIo(); + const cases: Array<{ message: string; leaked: string; expected: string }> = [ + { + message: 'dsn password=hunter2', + leaked: 'hunter2', + expected: 'password=[redacted]', + }, + { + message: 'dsn pwd=swordfish', + leaked: 'swordfish', + expected: 'pwd=[redacted]', + }, + { + message: 'Authorization: Basic abc123', + leaked: 'abc123', + expected: 'Authorization: [redacted]', + }, + { + message: 'Authorization: Bearer token-123', + leaked: 'token-123', + expected: 'Authorization: [redacted]', + }, + { + message: 'Bearer standalone-token', + leaked: 'standalone-token', + expected: 'Bearer [redacted]', + }, + { + message: 'api_key=sk-live-secret', + leaked: 'sk-live-secret', + expected: 'api_key=[redacted]', + }, + { + message: 'api-key: sk-dash-secret', + leaked: 'sk-dash-secret', + expected: 'api-key=[redacted]', + }, + { + message: 'KTX_PROVIDER_TOKEN=ktx-secret', + leaked: 'ktx-secret', + expected: 'KTX_PROVIDER_TOKEN=[redacted]', + }, + { + message: 'REFRESH_SECRET: refresh-secret', + leaked: 'refresh-secret', + expected: 'REFRESH_SECRET=[redacted]', + }, + { + message: 'https://s3.example.test/file?X-Amz-Signature=aws-secret&ok=1', + leaked: 'aws-secret', + expected: 'X-Amz-Signature=[redacted]', + }, + { + message: 'https://storage.example.test/file?X-Goog-Signature=goog-secret&ok=1', + leaked: 'goog-secret', + expected: 'X-Goog-Signature=[redacted]', + }, + { + message: 'https://cdn.example.test/file?sig=signed-secret&ok=1', + leaked: 'signed-secret', + expected: 'sig=[redacted]', + }, + { + message: 'postgres://svc:url-password@db.example.test/analytics', // pragma: allowlist secret + leaked: 'url-password', + expected: 'postgres://svc:[redacted]@db.example.test/analytics', + }, + ]; + + for (const item of cases) { + await reportException({ + error: new Error(item.message), + context: { source: 'connection test', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + const sent = captures[captures.length - 1] as { error: Error }; + expect(sent.error.message).toContain(item.expected); + expect(sent.error.message).not.toContain(item.leaked); + } + + await reportException({ + error: new Error('token bucket metrics and passwordless auth are benign'), + context: { source: 'connection test', handled: true, fatal: false }, + io, + packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' }, + }); + const benign = captures[captures.length - 1] as { error: Error }; + expect(benign.error.message).toBe('token bucket metrics and passwordless auth are benign'); + }); +}); diff --git a/packages/cli/test/telemetry/index.test.ts b/packages/cli/test/telemetry/index.test.ts index 7e88410f..3531116a 100644 --- a/packages/cli/test/telemetry/index.test.ts +++ b/packages/cli/test/telemetry/index.test.ts @@ -3,7 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import type { KtxCliIo } from '../../src/cli-runtime.js'; +import { createGlobalExceptionReporter, type KtxCliIo } from '../../src/cli-runtime.js'; import { beginCommandSpan, emitAbortedCommandAndShutdown, emitTelemetryEvent } from '../../src/telemetry/index.js'; import { resetCommandSpan } from '../../src/telemetry/command-hook.js'; @@ -120,3 +120,36 @@ describe('emitAbortedCommandAndShutdown', () => { expect(secondIo.stderr()).not.toContain('"event":"command"'); }); }); + +describe('global exception reporting contract', () => { + let homeDir: string; + + beforeEach(async () => { + homeDir = await mkdtemp(join(tmpdir(), 'ktx-telemetry-global-exception-')); + vi.stubEnv('HOME', homeDir); + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('KTX_TELEMETRY_DISABLED', '1'); + vi.stubEnv('DO_NOT_TRACK', ''); + vi.stubEnv('CI', ''); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + await rm(homeDir, { recursive: true, force: true }); + }); + + it('reports uncaughtException through the fatal debug payload', async () => { + const testIo = makeIo(); + const report = createGlobalExceptionReporter(testIo.io, { + name: '@kaelio/ktx', + version: '0.0.0-test', + }); + + await report('uncaughtException', new Error('global boom')); + + expect(testIo.stderr()).toContain('[telemetry-exception]'); + expect(testIo.stderr()).toContain('"source":"uncaughtException"'); + expect(testIo.stderr()).toContain('"handled":false'); + expect(testIo.stderr()).toContain('"fatal":true'); + }); +}); diff --git a/packages/cli/test/telemetry/redaction-secrets.test.ts b/packages/cli/test/telemetry/redaction-secrets.test.ts new file mode 100644 index 00000000..cdc15f22 --- /dev/null +++ b/packages/cli/test/telemetry/redaction-secrets.test.ts @@ -0,0 +1,127 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { parseKtxProjectConfig, serializeKtxProjectConfig } from '../../src/context/project/config.js'; +import { initKtxProject } from '../../src/context/project/project.js'; +import { collectTelemetryRedactionSecrets } from '../../src/telemetry/redaction-secrets.js'; + +describe('collectTelemetryRedactionSecrets', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-redaction-secrets-')); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + await rm(tempDir, { recursive: true, force: true }); + }); + + async function writeConfig(projectDir: string): Promise { + const configPath = join(projectDir, 'ktx.yaml'); + const config = parseKtxProjectConfig(await readFile(configPath, 'utf-8')); + await writeFile( + configPath, + serializeKtxProjectConfig({ + ...config, + llm: { + ...config.llm, + provider: { + backend: 'anthropic', + anthropic: { api_key: 'env:ANTHROPIC_API_KEY' }, // pragma: allowlist secret + }, + models: { default: 'claude-sonnet-4-6' }, + }, + ingest: { + ...config.ingest, + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + openai: { api_key: 'file:~/.ktx/secrets/openai-key' }, // pragma: allowlist secret + }, + }, + scan: { + ...config.scan, + enrichment: { + ...config.scan.enrichment, + embeddings: { + backend: 'openai', + model: 'text-embedding-3-small', + dimensions: 1536, + openai: { api_key: 'env:SCAN_OPENAI_API_KEY' }, // pragma: allowlist secret + }, + }, + }, + connections: { + warehouse: { + driver: 'postgres', + url: 'env:DATABASE_URL', + password: 'file:~/.ktx/secrets/db-password', // pragma: allowlist secret + }, + docs: { + driver: 'notion', + auth_token_ref: 'env:NOTION_TOKEN', // pragma: allowlist secret + }, + }, + }), + 'utf-8', + ); + } + + it('derives only declared project secrets and parsed URL credentials', async () => { + const homeDir = join(tempDir, 'home'); + const projectDir = join(tempDir, 'project'); + await mkdir(join(homeDir, '.ktx', 'secrets'), { recursive: true }); + await writeFile(join(homeDir, '.ktx', 'secrets', 'openai-key'), 'openai-file-secret\n', 'utf-8'); + await writeFile(join(homeDir, '.ktx', 'secrets', 'db-password'), 'db-file-password\n', 'utf-8'); + vi.stubEnv('HOME', homeDir); + vi.stubEnv('ANTHROPIC_API_KEY', 'anthropic-env-secret'); + vi.stubEnv('SCAN_OPENAI_API_KEY', 'scan-openai-env-secret'); + vi.stubEnv('DATABASE_URL', 'postgres://svc:db-url-password@db.example.test/analytics'); // pragma: allowlist secret + vi.stubEnv('NOTION_TOKEN', 'notion-env-secret'); + vi.stubEnv('UNDECLARED_SECRET', 'must-not-appear'); + await initKtxProject({ projectDir }); + await writeConfig(projectDir); + + const secrets = await collectTelemetryRedactionSecrets({ + projectDir, + connectionId: 'warehouse', + includeLlm: true, + includeEmbeddings: true, + env: process.env, + }); + + expect(secrets).toEqual( + expect.arrayContaining([ + 'anthropic-env-secret', + 'openai-file-secret', + 'scan-openai-env-secret', + 'postgres://svc:db-url-password@db.example.test/analytics', // pragma: allowlist secret + 'db-url-password', + 'db-file-password', + ]), + ); + expect(secrets).not.toContain('notion-env-secret'); + expect(secrets).not.toContain('must-not-appear'); + }); + + it('can derive a named non-database connection secret', async () => { + const projectDir = join(tempDir, 'project'); + vi.stubEnv('NOTION_TOKEN', 'notion-env-secret'); + await initKtxProject({ projectDir }); + await writeConfig(projectDir); + + const secrets = await collectTelemetryRedactionSecrets({ + projectDir, + connectionId: 'docs', + includeLlm: false, + includeEmbeddings: false, + env: process.env, + }); + + expect(secrets).toEqual(['notion-env-secret']); + }); +}); diff --git a/python/ktx-daemon/src/ktx_daemon/__main__.py b/python/ktx-daemon/src/ktx_daemon/__main__.py index 2fc00186..cbc2e228 100644 --- a/python/ktx-daemon/src/ktx_daemon/__main__.py +++ b/python/ktx-daemon/src/ktx_daemon/__main__.py @@ -6,6 +6,8 @@ import argparse import json import sys import time +from collections.abc import Callable +from types import TracebackType from typing import Any from pydantic import ValidationError @@ -90,6 +92,41 @@ def _read_stdin_json() -> dict[str, Any]: return parsed +def install_serve_http_exception_hooks(started_at: float) -> Callable[[], None]: + original_hook = sys.excepthook + + def hook( + exc_type: type[BaseException], + exc: BaseException, + tb: TracebackType | None, + ) -> None: + report_serve_http_crash(exc, started_at=started_at) + original_hook(exc_type, exc, tb) + + sys.excepthook = hook + + def dispose() -> None: + sys.excepthook = original_hook + + return dispose + + +def report_serve_http_crash(error: BaseException, *, started_at: float) -> None: + from ktx_daemon.telemetry import report_exception + from ktx_daemon.telemetry.daemon_lifecycle import emit_daemon_stopped_once + + report_exception( + error, + source="serve-http", + handled=False, + fatal=True, + ) + emit_daemon_stopped_once( + reason="crash", + uptime_ms=max(0, (time.perf_counter() - started_at) * 1000), + ) + + def run_http_server( *, host: str, @@ -102,15 +139,23 @@ def run_http_server( from ktx_daemon.app import create_app started_at = time.perf_counter() - uvicorn.run( - create_app( - enable_code_execution=enable_code_execution, - telemetry_started_at=started_at, - ), - host=host, - port=port, - log_level=log_level, - ) + dispose_hooks = install_serve_http_exception_hooks(started_at) + try: + try: + uvicorn.run( + create_app( + enable_code_execution=enable_code_execution, + telemetry_started_at=started_at, + ), + host=host, + port=port, + log_level=log_level, + ) + except Exception as error: + report_serve_http_crash(error, started_at=started_at) + raise + finally: + dispose_hooks() def main(argv: list[str] | None = None) -> int: @@ -169,6 +214,14 @@ def main(argv: list[str] | None = None) -> int: sys.stderr.write(f"{error}\n") return 1 except Exception as error: + from ktx_daemon.telemetry import report_exception + + report_exception( + error, + source=str(args.command), + handled=True, + fatal=False, + ) sys.stderr.write(f"{type(error).__name__}: {error}\n") return 1 diff --git a/python/ktx-daemon/src/ktx_daemon/app.py b/python/ktx-daemon/src/ktx_daemon/app.py index 7a3fa950..5860c4e4 100644 --- a/python/ktx-daemon/src/ktx_daemon/app.py +++ b/python/ktx-daemon/src/ktx_daemon/app.py @@ -10,8 +10,8 @@ from contextlib import asynccontextmanager from collections.abc import Callable from typing import Any -from fastapi import FastAPI, HTTPException -from fastapi.responses import Response +from fastapi import FastAPI, HTTPException, Request +from fastapi.responses import JSONResponse, Response from ktx_daemon import VERSION from ktx_daemon.code_execution import ( @@ -65,9 +65,11 @@ from ktx_daemon.table_identifier import ( ParseTableIdentifierBatchResponse, parse_table_identifier_response, ) -from ktx_daemon.telemetry import track_telemetry_event +from ktx_daemon.telemetry import report_exception, track_telemetry_event +from ktx_daemon.telemetry.daemon_lifecycle import emit_daemon_stopped_once logger = logging.getLogger(__name__) +CREDENTIAL_KEYS = {"url", "password", "token", "api_key", "apikey", "auth_header"} class NumpyORJSONResponse(Response): @@ -77,6 +79,36 @@ class NumpyORJSONResponse(Response): return dumps_numpy_json(content) +def _route_source(request: Request) -> str: + route = request.scope.get("route") + path = getattr(route, "path", None) + if isinstance(path, str) and path: + return f"app:{path}" + return f"app:{request.url.path}" + + +def _secret_snapshot_from_payload(value: Any) -> list[str]: + secrets: list[str] = [] + if isinstance(value, dict): + for key, child in value.items(): + normalized_key = str(key).lower() + if normalized_key in CREDENTIAL_KEYS and isinstance(child, str) and child: + secrets.append(child) + secrets.extend(_secret_snapshot_from_payload(child)) + elif isinstance(value, list): + for child in value: + secrets.extend(_secret_snapshot_from_payload(child)) + return secrets + + +async def _request_secret_snapshot(request: Request) -> list[str]: + try: + payload = await request.json() + except Exception: + return [] + return _secret_snapshot_from_payload(payload) + + def create_app( *, embedding_provider: EmbeddingProvider | None = None, @@ -104,12 +136,9 @@ def create_app( try: yield finally: - track_telemetry_event( - "daemon_stopped", - { - "reason": "request", - "uptimeMs": max(0, (clock() - started_at) * 1000), - }, + emit_daemon_stopped_once( + reason="request", + uptime_ms=max(0, (clock() - started_at) * 1000), ) app = FastAPI( @@ -119,6 +148,25 @@ def create_app( lifespan=lifespan, ) + @app.middleware("http") + async def report_unhandled_exceptions(request: Request, call_next): + redaction_secrets = await _request_secret_snapshot(request) + try: + return await call_next(request) + except Exception as error: + logger.exception("Unhandled daemon request failed: %s", error) + report_exception( + error, + source=_route_source(request), + handled=True, + fatal=False, + redaction_secrets=redaction_secrets, + ) + return JSONResponse( + status_code=500, + content={"detail": f"Daemon request failed: {error}"}, + ) + @app.get("/health") async def health() -> dict[str, str]: response = {"status": "healthy"} @@ -137,12 +185,6 @@ def create_app( except ValueError as error: logger.warning("Database introspection rejected: %s", error) raise HTTPException(status_code=400, detail=str(error)) from error - except Exception as error: - logger.exception("Database introspection failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Database introspection failed: {error}", - ) from error @app.post("/embeddings/compute", response_model=ComputeEmbeddingResponse) async def embedding_compute( @@ -156,12 +198,6 @@ def create_app( except ValueError as error: logger.warning("Embedding compute rejected: %s", error) raise HTTPException(status_code=400, detail=str(error)) from error - except Exception as error: - logger.exception("Embedding compute failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Embedding compute failed: {error}", - ) from error @app.post( "/embeddings/compute-bulk", @@ -178,12 +214,6 @@ def create_app( except ValueError as error: logger.warning("Bulk embedding compute rejected: %s", error) raise HTTPException(status_code=400, detail=str(error)) from error - except Exception as error: - logger.exception("Bulk embedding compute failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Bulk embedding compute failed: {error}", - ) from error if enable_code_execution: @@ -193,29 +223,15 @@ def create_app( response_class=NumpyORJSONResponse, ) async def code_execute(request: ExecuteCodeRequest) -> ExecuteCodeResponse: - try: - return execute_code_response( - request, - nest_api_url=None, - auth_header=None, - ) - except Exception as error: - logger.exception("Code execution failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Code execution failed: {error}", - ) from error + return execute_code_response( + request, + nest_api_url=None, + auth_header=None, + ) @app.post("/lookml/parse", response_model=ParseLookMLResponse) async def lookml_parse(request: ParseLookMLRequest) -> ParseLookMLResponse: - try: - return parse_lookml_project(request) - except Exception as error: - logger.exception("LookML parsing failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"LookML parsing failed: {error}", - ) from error + return parse_lookml_project(request) @app.post( "/sql/parse-table-identifier", @@ -224,40 +240,19 @@ def create_app( async def sql_parse_table_identifier( request: ParseTableIdentifierBatchRequest, ) -> ParseTableIdentifierBatchResponse: - try: - return parse_table_identifier_response(request) - except Exception as error: - logger.exception("Table identifier parsing failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Table identifier parsing failed: {error}", - ) from error + return parse_table_identifier_response(request) @app.post("/sql/validate-read-only", response_model=ValidateReadOnlySqlResponse) async def sql_validate_read_only( request: ValidateReadOnlySqlRequest, ) -> ValidateReadOnlySqlResponse: - try: - return validate_read_only_sql_response(request) - except Exception as error: - logger.exception("SQL read-only validation failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"SQL read-only validation failed: {error}", - ) from error + return validate_read_only_sql_response(request) @app.post("/sql/analyze-batch", response_model=AnalyzeSqlBatchResponse) async def sql_analyze_batch( request: AnalyzeSqlBatchRequest, ) -> AnalyzeSqlBatchResponse: - try: - return analyze_sql_batch_response(request) - except Exception as error: - logger.exception("SQL batch analysis failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"SQL batch analysis failed: {error}", - ) from error + return analyze_sql_batch_response(request) @app.post( "/semantic-layer/generate-sources", response_model=GenerateSourcesResponse @@ -265,14 +260,7 @@ def create_app( async def semantic_generate_sources( request: GenerateSourcesRequest, ) -> GenerateSourcesResponse: - try: - return generate_sources_response(request) - except Exception as error: - logger.exception("Semantic source generation failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Semantic source generation failed: {error}", - ) from error + return generate_sources_response(request) @app.post("/semantic-layer/query", response_model=SemanticLayerQueryResponse) async def semantic_query( @@ -283,12 +271,6 @@ def create_app( except ValueError as error: logger.warning("Semantic query rejected: %s", error) raise HTTPException(status_code=400, detail=str(error)) from error - except Exception as error: - logger.exception("Semantic query failed: %s", error) - raise HTTPException( - status_code=500, - detail=f"Semantic layer query failed: {error}", - ) from error @app.post("/semantic-layer/validate", response_model=ValidateSourcesResponse) async def semantic_validate( diff --git a/python/ktx-daemon/src/ktx_daemon/semantic_layer.py b/python/ktx-daemon/src/ktx_daemon/semantic_layer.py index 78f57338..f58c6e39 100644 --- a/python/ktx-daemon/src/ktx_daemon/semantic_layer.py +++ b/python/ktx-daemon/src/ktx_daemon/semantic_layer.py @@ -5,7 +5,7 @@ from __future__ import annotations import time from typing import Any -from ktx_daemon.telemetry import error_class, track_telemetry_event +from ktx_daemon.telemetry import error_class, report_exception, track_telemetry_event from pydantic import BaseModel, ConfigDict, Field from semantic_layer.duplicate_check import validate_measure_duplicates from semantic_layer.engine import SemanticEngine @@ -150,6 +150,13 @@ def query_semantic_layer( track_telemetry_event( "sql_gen_completed", sql_fields, project_id=request.project_id ) + report_exception( + error, + source="semantic-query", + handled=True, + fatal=False, + project_id=request.project_id, + ) raise diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py b/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py index ff9cd07f..bef42338 100644 --- a/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py @@ -1,5 +1,12 @@ from __future__ import annotations +from ktx_daemon.telemetry.daemon_lifecycle import emit_daemon_stopped_once from ktx_daemon.telemetry.emitter import error_class, track_telemetry_event +from ktx_daemon.telemetry.exception import report_exception -__all__ = ["error_class", "track_telemetry_event"] +__all__ = [ + "emit_daemon_stopped_once", + "error_class", + "report_exception", + "track_telemetry_event", +] diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/daemon_lifecycle.py b/python/ktx-daemon/src/ktx_daemon/telemetry/daemon_lifecycle.py new file mode 100644 index 00000000..dc635601 --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/daemon_lifecycle.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import Literal + +from ktx_daemon.telemetry.emitter import track_telemetry_event + +StopReason = Literal["signal", "request", "crash"] + +_daemon_stop_emitted = False + + +def emit_daemon_stopped_once(*, reason: StopReason, uptime_ms: float) -> bool: + global _daemon_stop_emitted + if _daemon_stop_emitted: + return False + _daemon_stop_emitted = True + track_telemetry_event( + "daemon_stopped", + { + "reason": reason, + "uptimeMs": max(0, uptime_ms), + }, + ) + return True + + +def reset_daemon_lifecycle_for_tests() -> None: + global _daemon_stop_emitted + _daemon_stop_emitted = False diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/exception.py b/python/ktx-daemon/src/ktx_daemon/telemetry/exception.py new file mode 100644 index 00000000..00050d1c --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/exception.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import json +import os +import re +import sys +from collections.abc import Mapping, Sequence +from pathlib import Path +from typing import Any + +from ktx_daemon import VERSION +from ktx_daemon.telemetry.emitter import POSTHOG_HOST, POSTHOG_PROJECT_API_KEY +from ktx_daemon.telemetry.events import _common_envelope +from ktx_daemon.telemetry.identity import load_telemetry_identity + +_KTX_REPORTED_ATTR = "__ktx_posthog_exception_reported" + + +def _debug_enabled(env: Mapping[str, str]) -> bool: + return env.get("KTX_TELEMETRY_DEBUG") == "1" + + +def _host(env: Mapping[str, str]) -> str: + return env.get("KTX_TELEMETRY_ENDPOINT") or POSTHOG_HOST + + +def _redact_static(value: str) -> str: + patterns = [ + ( + r"([a-z][a-z0-9+.-]*://[^:\s/@]+:)([^@\s/]+)(@)", + r"\1[redacted]\3", + ), + (r"\b(password|pwd)=([^;&\s]+)", r"\1=[redacted]"), + (r"\bAuthorization\s*:\s*[^\r\n,;]+", "Authorization: [redacted]"), + (r"\bBearer\s+[A-Za-z0-9._~+/=-]+", "Bearer [redacted]"), + (r"\b(api[_-]?key)\s*[:=]\s*([^\s,;]+)", r"\1=[redacted]"), + ( + r"\b(KTX_[A-Z0-9_]*|[A-Z0-9_]*(?:TOKEN|SECRET))\s*[:=]\s*([^\s,;]+)", + r"\1=[redacted]", + ), + (r"([?&](?:X-Amz-Signature|X-Goog-Signature|sig)=)[^&\s]+", r"\1[redacted]"), + ] + redacted = value + for pattern, replacement in patterns: + redacted = re.sub(pattern, replacement, redacted, flags=re.IGNORECASE) + return redacted + + +def _redact_text(value: str, secrets: Sequence[str]) -> str: + redacted = value + for secret in secrets: + if secret: + redacted = redacted.replace(secret, "[redacted]") + return _redact_static(redacted) + + +def _clone_exception(exception: BaseException, secrets: Sequence[str]) -> BaseException: + redacted_args = [_redact_text(str(arg), secrets) for arg in exception.args] + try: + cloned = type(exception)(*redacted_args) + except Exception: + cloned = RuntimeError(_redact_text(str(exception), secrets)) + cloned.__traceback__ = exception.__traceback__ + cloned.__cause__ = ( + _clone_exception(exception.__cause__, secrets) if exception.__cause__ else None + ) + cloned.__context__ = ( + _clone_exception(exception.__context__, secrets) + if exception.__context__ + else None + ) + return cloned + + +def _should_skip_as_reported(exception: BaseException) -> bool: + if getattr(exception, _KTX_REPORTED_ATTR, False): + return True + try: + setattr(exception, _KTX_REPORTED_ATTR, True) + except Exception: + return False + return False + + +def _properties(*, source: str, handled: bool, fatal: bool) -> dict[str, Any]: + return { + **_common_envelope(), + "daemonVersion": os.environ.get("KTX_DAEMON_VERSION", VERSION), + "source": source, + "handled": handled, + "fatal": fatal, + } + + +def report_exception( + exception: BaseException, + *, + source: str, + handled: bool, + fatal: bool, + project_id: str | None = None, + home_dir: Path | None = None, + env: Mapping[str, str] | None = None, + redaction_secrets: Sequence[str] | None = None, +) -> None: + source_env = env if env is not None else os.environ + try: + identity = load_telemetry_identity(home_dir=home_dir, env=source_env) + if not identity.enabled or not identity.install_id: + return + + if _should_skip_as_reported(exception): + return + + properties = _properties(source=source, handled=handled, fatal=fatal) + groups = {"project": project_id} if project_id else None + safe_exception = _clone_exception(exception, redaction_secrets or []) + + if _debug_enabled(source_env): + sys.stderr.write( + "[telemetry-exception] " + + json.dumps( + { + "distinctId": identity.install_id, + "message": str(safe_exception), + "properties": properties, + "groups": groups, + }, + sort_keys=True, + ) + + "\n" + ) + return + + if not POSTHOG_PROJECT_API_KEY.strip() or not _host(source_env).strip(): + return + + from posthog import Posthog + + client = Posthog( + POSTHOG_PROJECT_API_KEY, + host=_host(source_env), + flush_at=1, + flush_interval=0, + sync_mode=True, + timeout=1, + ) + client.capture_exception( + safe_exception, + distinct_id=identity.install_id, + properties=properties, + groups=groups, + ) + client.shutdown() + except Exception: + return diff --git a/python/ktx-daemon/tests/test_app.py b/python/ktx-daemon/tests/test_app.py index 2c3237ad..fffc2899 100644 --- a/python/ktx-daemon/tests/test_app.py +++ b/python/ktx-daemon/tests/test_app.py @@ -87,8 +87,10 @@ def test_app_lifespan_emits_daemon_lifecycle_debug_events( monkeypatch, capsys, ) -> None: + from ktx_daemon.telemetry.daemon_lifecycle import reset_daemon_lifecycle_for_tests from ktx_daemon.telemetry.identity import reset_identity_cache + reset_daemon_lifecycle_for_tests() reset_identity_cache() identity_path = tmp_path / ".ktx" / "telemetry.json" identity_path.parent.mkdir(parents=True) diff --git a/python/ktx-daemon/tests/test_exception_payload.py b/python/ktx-daemon/tests/test_exception_payload.py new file mode 100644 index 00000000..3198b08f --- /dev/null +++ b/python/ktx-daemon/tests/test_exception_payload.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +import gzip +import json +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer +from pathlib import Path +from typing import Any + +from ktx_daemon.telemetry.identity import reset_identity_cache + + +class CaptureHandler(BaseHTTPRequestHandler): + payloads: list[dict[str, Any]] = [] + + def do_POST(self) -> None: + length = int(self.headers.get("content-length", "0")) + raw = self.rfile.read(length) + if self.headers.get("content-encoding") == "gzip": + raw = gzip.decompress(raw) + self.payloads.append(json.loads(raw.decode("utf-8"))) + self.send_response(200) + self.send_header("content-type", "application/json") + self.end_headers() + self.wfile.write(b"{}") + + def log_message(self, _format: str, *_args: object) -> None: + return + + +def write_identity(home: Path) -> None: + target = home / ".ktx" / "telemetry.json" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text( + json.dumps( + { + "installId": "00000000-0000-4000-8000-000000000000", + "enabled": True, + "createdAt": "2026-06-05T00:00:00.000Z", + } + ) + + "\n", + encoding="utf-8", + ) + + +def find_exception_event(payloads: list[dict[str, Any]]) -> dict[str, Any]: + for payload in payloads: + batch = payload.get("batch") + events = batch if isinstance(batch, list) else [payload] + for event in events: + if isinstance(event, dict) and event.get("event") == "$exception": + return event + raise AssertionError(f"No $exception payload found: {payloads}") + + +def test_prepared_python_exception_payload_groups_and_redacts(tmp_path: Path) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + CaptureHandler.payloads.clear() + server = HTTPServer(("127.0.0.1", 0), CaptureHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + snapshot_secret = "-".join(["plain", "secret", "value"]) + db_password = "-".join(["db", "url", "secret"]) + auth_token = "".join(["abc", "123"]) + report_exception( + RuntimeError( + f"{snapshot_secret} postgres://svc:{db_password}@db.example.test/analytics " + f"Authorization: Basic {auth_token}" + ), + source="database-introspect", + handled=True, + fatal=False, + project_id="a" * 64, + home_dir=tmp_path, + env={"KTX_TELEMETRY_ENDPOINT": f"http://127.0.0.1:{server.server_port}"}, + redaction_secrets=[snapshot_secret], + ) + finally: + server.shutdown() + server.server_close() + thread.join(timeout=2) + + event = find_exception_event(CaptureHandler.payloads) + properties = event["properties"] + assert event.get("$groups") == {"project": "a" * 64} or properties.get( + "$groups" + ) == {"project": "a" * 64} + serialized = json.dumps(properties.get("$exception_list", [])) + assert "[redacted]" in serialized + assert snapshot_secret not in serialized + assert db_password not in serialized + assert auth_token not in serialized + forbidden_keys = { + "argv", + "args", + "env", + "environment", + "sql", + "query", + "prompt", + "mcpArguments", + "tableName", + "schemaName", + "columnName", + "databaseUrl", + "connectionString", + "url", + "password", + "token", + "apiKey", + "authorization", + } + assert forbidden_keys.isdisjoint(properties.keys()) diff --git a/python/ktx-daemon/tests/test_exception_telemetry.py b/python/ktx-daemon/tests/test_exception_telemetry.py new file mode 100644 index 00000000..43da007d --- /dev/null +++ b/python/ktx-daemon/tests/test_exception_telemetry.py @@ -0,0 +1,601 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from ktx_daemon.telemetry.identity import reset_identity_cache + + +class FakePosthog: + captures: list[dict[str, Any]] = [] + shutdowns = 0 + + def __init__(self, *_args: Any, **_kwargs: Any) -> None: + pass + + def capture_exception( + self, + exception: BaseException, + *, + distinct_id: str, + properties: dict[str, Any], + groups: dict[str, str] | None = None, + ) -> None: + self.captures.append( + { + "exception": exception, + "distinct_id": distinct_id, + "properties": properties, + "groups": groups, + } + ) + + def shutdown(self) -> None: + type(self).shutdowns += 1 + + +def write_identity(home: Path, *, enabled: bool = True) -> None: + target = home / ".ktx" / "telemetry.json" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text( + json.dumps( + { + "installId": "00000000-0000-4000-8000-000000000000", + "enabled": enabled, + "createdAt": "2026-06-05T00:00:00.000Z", + } + ) + + "\n", + encoding="utf-8", + ) + + +def test_report_exception_respects_disabled_gate(tmp_path: Path, monkeypatch) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + monkeypatch.setenv("KTX_TELEMETRY_DISABLED", "1") + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + report_exception( + RuntimeError("boom"), + source="semantic-query", + handled=True, + fatal=False, + home_dir=tmp_path, + env={"KTX_TELEMETRY_DISABLED": "1"}, + ) + + assert FakePosthog.captures == [] + + +def test_report_exception_sends_groups_and_properties( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + report_exception( + RuntimeError("boom"), + source="semantic-query", + handled=True, + fatal=False, + project_id="a" * 64, + home_dir=tmp_path, + env={}, + ) + + assert FakePosthog.captures == [ + { + "exception": FakePosthog.captures[0]["exception"], + "distinct_id": "00000000-0000-4000-8000-000000000000", + "properties": FakePosthog.captures[0]["properties"], + "groups": {"project": "a" * 64}, + } + ] + assert FakePosthog.captures[0]["properties"]["source"] == "semantic-query" + assert FakePosthog.captures[0]["properties"]["handled"] is True + assert FakePosthog.captures[0]["properties"]["fatal"] is False + assert FakePosthog.captures[0]["properties"]["runtime"] == "daemon-py" + + +def test_report_exception_debug_prints_without_sending(tmp_path: Path, capsys) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + + report_exception( + RuntimeError("debug boom"), + source="app:/health", + handled=True, + fatal=False, + home_dir=tmp_path, + env={"KTX_TELEMETRY_DEBUG": "1"}, + ) + + captured = capsys.readouterr() + assert "[telemetry-exception]" in captured.err + assert '"source": "app:/health"' in captured.err + assert FakePosthog.captures == [] + + +def test_report_exception_redacts_snapshot_and_static_patterns( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + error = RuntimeError("dsn has plain-secret and password=hunter2") + error.__cause__ = ValueError("Authorization: Bearer token-123") + + report_exception( + error, + source="database-introspect", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + redaction_secrets=["plain-secret"], + ) + + sent = FakePosthog.captures[0]["exception"] + assert "[redacted]" in str(sent) + assert "plain-secret" not in str(sent) + assert "hunter2" not in str(sent) + assert "token-123" not in str(sent.__cause__) + + +def test_report_exception_does_not_discover_env_values_without_snapshot( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setenv("KTX_FAKE_SECRET", "plain-secret-without-pattern") + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + report_exception( + RuntimeError("plain-secret-without-pattern"), + source="sys.excepthook", + handled=False, + fatal=True, + home_dir=tmp_path, + env={}, + ) + + assert "plain-secret-without-pattern" in str(FakePosthog.captures[0]["exception"]) + + +def test_route_derived_boundary_reports_new_throwing_route(monkeypatch) -> None: + from fastapi import FastAPI + from fastapi.testclient import TestClient + from ktx_daemon.app import create_app + + reports: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + monkeypatch.setattr("ktx_daemon.app.report_exception", fake_report) + app: FastAPI = create_app() + + @app.get("/new-throwing-route") + async def new_throwing_route() -> dict[str, str]: + raise RuntimeError("route boom") + + client = TestClient(app, raise_server_exceptions=False) + response = client.get("/new-throwing-route") + + assert response.status_code == 500 + assert reports + assert reports[0]["source"] in {"app:/new-throwing-route", "app:new_throwing_route"} + assert reports[0]["handled"] is True + assert reports[0]["fatal"] is False + + +def test_route_derived_boundary_covers_existing_validate_route(monkeypatch) -> None: + from fastapi.testclient import TestClient + from ktx_daemon import app as app_module + + reports: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + monkeypatch.setattr( + app_module, + "validate_semantic_layer", + lambda _request: (_ for _ in ()).throw(RuntimeError("validate boom")), + ) + monkeypatch.setattr(app_module, "report_exception", fake_report) + + client = TestClient(app_module.create_app(), raise_server_exceptions=False) + response = client.post("/semantic-layer/validate", json={"sources": []}) + + assert response.status_code == 500 + assert reports + assert reports[0]["source"] in { + "app:/semantic-layer/validate", + "app:semantic_validate", + } + + +def test_daemon_stopped_clean_shutdown_emits_request_once(monkeypatch) -> None: + from ktx_daemon.telemetry.daemon_lifecycle import ( + emit_daemon_stopped_once, + reset_daemon_lifecycle_for_tests, + ) + + events: list[tuple[str, dict[str, object]]] = [] + monkeypatch.setattr( + "ktx_daemon.telemetry.daemon_lifecycle.track_telemetry_event", + lambda name, fields: events.append((name, fields)), + ) + reset_daemon_lifecycle_for_tests() + + emit_daemon_stopped_once(reason="request", uptime_ms=1) + emit_daemon_stopped_once(reason="request", uptime_ms=2) + + assert events == [("daemon_stopped", {"reason": "request", "uptimeMs": 1})] + + +def test_daemon_stopped_crash_wins_over_request(monkeypatch) -> None: + from ktx_daemon.telemetry.daemon_lifecycle import ( + emit_daemon_stopped_once, + reset_daemon_lifecycle_for_tests, + ) + + events: list[tuple[str, dict[str, object]]] = [] + monkeypatch.setattr( + "ktx_daemon.telemetry.daemon_lifecycle.track_telemetry_event", + lambda name, fields: events.append((name, fields)), + ) + reset_daemon_lifecycle_for_tests() + + emit_daemon_stopped_once(reason="crash", uptime_ms=3) + emit_daemon_stopped_once(reason="request", uptime_ms=4) + + assert events == [("daemon_stopped", {"reason": "crash", "uptimeMs": 3})] + + +def test_report_exception_dedupes_same_exception_object( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + error = RuntimeError("same object") + + report_exception( + error, + source="semantic-query", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + ) + report_exception( + error, + source="app:/semantic-layer/query", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + ) + + assert len(FakePosthog.captures) == 1 + + +def test_report_exception_redacts_url_userinfo_and_authorization( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + db_password = ["db", "url", "secret"] + auth_token = ["abc", "123"] + report_exception( + RuntimeError( + "connect postgres://svc:" + + "-".join(db_password) + + "@db.example.test/analytics Authorization: Basic " + + "".join(auth_token) + ), + source="database-introspect", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + ) + + sent = str(FakePosthog.captures[0]["exception"]) + assert "postgres://svc:[redacted]@db.example.test/analytics" in sent + assert "Authorization: [redacted]" in sent + assert "-".join(db_password) not in sent + assert "".join(auth_token) not in sent + + +def test_report_exception_falls_back_when_exception_type_cannot_be_reconstructed( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + class KeywordOnlyException(Exception): + def __init__(self, *, message: str) -> None: + super().__init__(message) + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + report_exception( + KeywordOnlyException(message="custom secret-value"), + source="app:/custom", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + redaction_secrets=["secret-value"], + ) + + assert len(FakePosthog.captures) == 1 + sent = FakePosthog.captures[0]["exception"] + assert "[redacted]" in str(sent) + assert "secret-value" not in str(sent) + + +def test_report_exception_redacts_every_static_pattern_and_leaves_benign_text( + tmp_path: Path, monkeypatch +) -> None: + from ktx_daemon.telemetry.exception import report_exception + + reset_identity_cache() + write_identity(tmp_path) + FakePosthog.captures.clear() + monkeypatch.setattr("posthog.Posthog", FakePosthog) + + cases = [ + ("dsn password=hunter2", "hunter2", "password=[redacted]"), + ("dsn pwd=swordfish", "swordfish", "pwd=[redacted]"), + ("Authorization: Basic abc123", "abc123", "Authorization: [redacted]"), + ("Authorization: Bearer token-123", "token-123", "Authorization: [redacted]"), + ("Bearer standalone-token", "standalone-token", "Bearer [redacted]"), + ("api_key=sk-live-secret", "sk-live-secret", "api_key=[redacted]"), + ("api-key: sk-dash-secret", "sk-dash-secret", "api-key=[redacted]"), + ( + "KTX_PROVIDER_TOKEN=ktx-secret", + "ktx-secret", + "KTX_PROVIDER_TOKEN=[redacted]", + ), + ( + "REFRESH_SECRET: refresh-secret", + "refresh-secret", + "REFRESH_SECRET=[redacted]", + ), + ( + "https://s3.example.test/file?X-Amz-Signature=aws-secret&ok=1", + "aws-secret", + "X-Amz-Signature=[redacted]", + ), + ( + "https://storage.example.test/file?X-Goog-Signature=goog-secret&ok=1", + "goog-secret", + "X-Goog-Signature=[redacted]", + ), + ( + "https://cdn.example.test/file?sig=signed-secret&ok=1", + "signed-secret", + "sig=[redacted]", + ), + ( + "postgres://svc:url-password@db.example.test/analytics", # pragma: allowlist secret + "url-password", + "postgres://svc:[redacted]@db.example.test/analytics", + ), + ] + + for message, leaked, expected in cases: + report_exception( + RuntimeError(message), + source="database-introspect", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + ) + sent = str(FakePosthog.captures[-1]["exception"]) + assert expected in sent + assert leaked not in sent + + report_exception( + RuntimeError("token bucket metrics and passwordless auth are benign"), + source="database-introspect", + handled=True, + fatal=False, + home_dir=tmp_path, + env={}, + ) + assert str(FakePosthog.captures[-1]["exception"]) == ( + "token bucket metrics and passwordless auth are benign" + ) + + +def test_route_derived_boundary_covers_existing_health_route(monkeypatch) -> None: + from fastapi.testclient import TestClient + from ktx_daemon import app as app_module + + reports: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + class BrokenEnviron(dict[str, str]): + def get(self, key: str, default: str | None = None) -> str | None: + if key == "KTX_DAEMON_VERSION": + raise RuntimeError("health boom") + return default + + monkeypatch.setattr(app_module.os, "environ", BrokenEnviron()) + monkeypatch.setattr(app_module, "report_exception", fake_report) + + client = TestClient(app_module.create_app(), raise_server_exceptions=False) + response = client.get("/health") + + assert response.status_code == 500 + assert reports + assert reports[0]["source"] == "app:/health" + assert reports[0]["handled"] is True + assert reports[0]["fatal"] is False + + +def test_route_boundary_passes_request_scoped_database_secrets(monkeypatch) -> None: + from fastapi.testclient import TestClient + from ktx_daemon import app as app_module + + reports: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + monkeypatch.setattr( + app_module, + "introspect_database_response", + lambda _request: (_ for _ in ()).throw(RuntimeError("db-url-secret")), + ) + monkeypatch.setattr(app_module, "report_exception", fake_report) + + client = TestClient(app_module.create_app(), raise_server_exceptions=False) + response = client.post( + "/database/introspect", + json={ + "connection_id": "warehouse", + "url": "postgres://svc:db-url-secret@db.example.test/analytics", # pragma: allowlist secret + "password": "db-password-secret", # pragma: allowlist secret + }, + ) + + assert response.status_code == 500 + assert reports + assert ( + reports[0]["redaction_secrets"] + == [ + "postgres://svc:db-url-secret@db.example.test/analytics", # pragma: allowlist secret + "db-password-secret", # pragma: allowlist secret + ] + ) + + +def test_serve_http_run_crash_reports_exception_and_crash_stop(monkeypatch) -> None: + import sys + + from ktx_daemon import __main__ as main_module + + reports: list[dict[str, object]] = [] + stops: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + def fake_stop(*, reason: str, uptime_ms: float) -> bool: + stops.append({"reason": reason, "uptimeMs": uptime_ms}) + return True + + class FakeUvicorn: + @staticmethod + def run(*_args: object, **_kwargs: object) -> None: + raise RuntimeError("uvicorn crash") + + monkeypatch.setitem(sys.modules, "uvicorn", FakeUvicorn) + monkeypatch.setattr("ktx_daemon.telemetry.report_exception", fake_report) + monkeypatch.setattr( + "ktx_daemon.telemetry.daemon_lifecycle.emit_daemon_stopped_once", + fake_stop, + ) + + try: + main_module.run_http_server( + host="127.0.0.1", + port=9999, + log_level="info", + enable_code_execution=False, + ) + except RuntimeError as error: + assert str(error) == "uvicorn crash" + else: + raise AssertionError("run_http_server did not re-raise the crash") + + assert reports + assert reports[0]["source"] == "serve-http" + assert reports[0]["handled"] is False + assert reports[0]["fatal"] is True + assert stops and stops[0]["reason"] == "crash" + + +def test_one_shot_command_reports_without_excepthook_or_daemon_stopped( + monkeypatch, +) -> None: + import sys + + from ktx_daemon import __main__ as daemon_main + + original_hook = sys.excepthook + reports: list[dict[str, object]] = [] + stops: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + def fake_stop(*, reason: str, uptime_ms: float) -> bool: + stops.append({"reason": reason, "uptimeMs": uptime_ms}) + return True + + monkeypatch.setattr( + daemon_main, + "_read_stdin_json", + lambda: { + "connection_id": "warehouse", + "driver": "postgres", + "url": "postgresql://readonly@example.test/warehouse", + "schemas": ["public"], + }, + ) + monkeypatch.setattr( + daemon_main, + "introspect_database_response", + lambda _request: (_ for _ in ()).throw(RuntimeError("one-shot boom")), + ) + monkeypatch.setattr("ktx_daemon.telemetry.report_exception", fake_report) + monkeypatch.setattr( + "ktx_daemon.telemetry.daemon_lifecycle.emit_daemon_stopped_once", + fake_stop, + ) + + assert daemon_main.main(["database-introspect"]) == 1 + assert sys.excepthook is original_hook + assert stops == [] + assert reports + assert reports[0]["source"] == "database-introspect" + assert reports[0]["handled"] is True + assert reports[0]["fatal"] is False diff --git a/python/ktx-daemon/tests/test_semantic_layer.py b/python/ktx-daemon/tests/test_semantic_layer.py index 828e9359..72040df9 100644 --- a/python/ktx-daemon/tests/test_semantic_layer.py +++ b/python/ktx-daemon/tests/test_semantic_layer.py @@ -97,6 +97,33 @@ def test_query_semantic_layer_emits_plan_and_sql_debug_events( assert "public.orders" not in captured.err +def test_query_semantic_layer_reports_exception(monkeypatch) -> None: + from ktx_daemon import semantic_layer as semantic_layer_module + + reports: list[dict[str, object]] = [] + + def fake_report(exception: BaseException, **kwargs: object) -> None: + reports.append({"exception": exception, **kwargs}) + + monkeypatch.setattr(semantic_layer_module, "report_exception", fake_report) + + with pytest.raises(ValueError): + query_semantic_layer( + SemanticLayerQueryRequest( + sources=[ORDERS_SOURCE, ORDERS_SOURCE], + dialect="postgres", + projectId="a" * 64, + query={"measures": ["orders.order_count"]}, + ) + ) + + assert reports + assert reports[0]["source"] == "semantic-query" + assert reports[0]["handled"] is True + assert reports[0]["fatal"] is False + assert reports[0]["project_id"] == "a" * 64 + + def test_semantic_layer_request_rejects_project_id_field_name() -> None: with pytest.raises(ValueError): SemanticLayerQueryRequest(