feat(telemetry): include error details for failures (#254)

This commit is contained in:
Andrey Avtomonov 2026-06-02 17:23:51 +02:00 committed by GitHub
parent 494618ab14
commit 6da8c3452a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1259 additions and 999 deletions

View file

@ -25,10 +25,11 @@ Use any of these mechanisms to disable telemetry:
## What we collect
High-level signals only: which commands run, how long they take, whether they
High-level signals: which commands run, how long they take, whether they
succeed or fail, and basic environment metadata (CLI version, Node version, OS
platform). For project-level analysis, **ktx** sends a salted hash of the
project directory — never the raw path.
platform). When an operation fails, we also include diagnostic detail about the
error so we can debug it. For project-level analysis, **ktx** sends a salted
hash of the project directory to group events.
When an agent reaches **ktx** through MCP, we also record the connecting client
tool's self-reported name and version (for example Claude Desktop, Cursor, or
@ -37,11 +38,14 @@ tool, never you or your data.
## What we never collect
- File paths, hostnames, environment variable values, or command arguments
- `ktx.yaml` contents, connection passwords, API keys, or tokens
- Schema names, table names, column names, SQL text, or query results
- Error messages or stack traces
- Git remote URLs, Git user email, OS user, or hostname
We build telemetry around counts and coarse signals, not the contents of your
data or configuration. We don't deliberately collect your `ktx.yaml`, query
results, passwords, API keys, or access tokens.
The one place environment-specific text can appear is failure diagnostics: when
an operation errors, the detail we record is the error as your tools reported
it, which can include identifiers from your setup. If you'd rather send nothing
at all, turn telemetry off using any of the options above.
## Storage and retention

View file

@ -17,7 +17,7 @@ import { createKtxCliScanConnector } from './local-scan-connectors.js';
import { profileMark } from './startup-profile.js';
import { isDemoConnection } from './telemetry/demo-detect.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import { scrubErrorClass } from './telemetry/scrubber.js';
import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js';
profileMark('module:connection');
@ -304,6 +304,7 @@ async function emitConnectionTest(input: {
io: KtxCliIo;
}): Promise<void> {
const errorClass = input.error ? scrubErrorClass(input.error) : undefined;
const errorDetail = input.error ? formatErrorDetail(input.error) : undefined;
await emitTelemetryEvent({
name: 'connection_test',
projectDir: input.project.projectDir,
@ -314,6 +315,7 @@ async function emitConnectionTest(input: {
outcome: input.outcome,
durationMs: input.durationMs,
...(errorClass ? { errorClass } : {}),
...(errorDetail ? { errorDetail } : {}),
},
});
}

View file

@ -22,6 +22,7 @@ import type { KtxScanArgs, KtxScanDeps } from './scan.js';
import { profileMark } from './startup-profile.js';
import { isDemoConnection } from './telemetry/demo-detect.js';
import { emitProjectStackSnapshot, emitTelemetryEvent } from './telemetry/index.js';
import { formatErrorDetail } from './telemetry/scrubber.js';
profileMark('module:public-ingest');
@ -635,6 +636,9 @@ async function emitIngestCompleted(input: {
io: KtxCliIo;
}): Promise<void> {
const failed = resultFailed(input.result);
const failureDetail = failed
? formatErrorDetail(input.result.steps.find((step) => step.status === 'failed')?.detail)
: undefined;
await emitTelemetryEvent({
name: 'ingest_completed',
projectDir: input.args.projectDir,
@ -651,6 +655,7 @@ async function emitIngestCompleted(input: {
rowsBucket: rowsBucket(),
durationMs: Math.max(0, performance.now() - input.startedAt),
outcome: failed ? 'error' : 'ok',
...(failureDetail ? { errorDetail: failureDetail } : {}),
},
});
}

View file

@ -9,7 +9,7 @@ import { createKtxCliScanConnector } from './local-scan-connectors.js';
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
import { profileMark } from './startup-profile.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import { scrubErrorClass } from './telemetry/scrubber.js';
import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js';
profileMark('module:scan');
@ -380,6 +380,7 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps
return 0;
} catch (error) {
const errorClass = scrubErrorClass(error);
const errorDetail = formatErrorDetail(error);
await emitTelemetryEvent({
name: 'scan_completed',
projectDir: args.projectDir,
@ -393,6 +394,7 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'error',
...(errorClass ? { errorClass } : {}),
...(errorDetail ? { errorDetail } : {}),
},
});
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);

View file

@ -6,6 +6,7 @@ import { markKtxSetupStateStepComplete, readKtxSetupState } from './context/proj
import { serializeKtxProjectConfig } from './context/project/config.js';
import type { KtxCliIo } from './cli-runtime.js';
import { errorMessage, writePrefixedLines } from './clack.js';
import { formatErrorDetail } from './telemetry/scrubber.js';
import { buildPublicIngestPlan } from './public-ingest.js';
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
import {
@ -67,7 +68,7 @@ export type KtxSetupContextResult =
| { status: 'skipped'; projectDir: string }
| { status: 'back'; projectDir: string }
| { status: 'missing-input'; projectDir: string }
| { status: 'failed'; projectDir: string };
| { status: 'failed'; projectDir: string; errorDetail?: string };
export interface KtxSetupContextStepArgs {
projectDir: string;
@ -702,6 +703,6 @@ export async function runKtxSetupContextStep(
return await runBuild(args, io, deps, project, targets);
} catch (error) {
writePrefixedLines((chunk) => io.stderr.write(chunk), errorMessage(error));
return { status: 'failed', projectDir: args.projectDir };
return { status: 'failed', projectDir: args.projectDir, errorDetail: formatErrorDetail(error) };
}
}

View file

@ -217,6 +217,7 @@ async function recordSetupStep(input: {
startedAt: number;
io: KtxCliIo;
cliVersion?: string;
errorDetail?: string;
}): Promise<void> {
const { emitTelemetryEvent } = await import('./telemetry/index.js');
await emitTelemetryEvent({
@ -228,6 +229,7 @@ async function recordSetupStep(input: {
step: input.step,
outcome: setupTelemetryOutcome(input.status),
durationMs: Math.max(0, performance.now() - input.startedAt),
...(input.errorDetail ? { errorDetail: input.errorDetail } : {}),
},
});
}
@ -683,7 +685,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
if (!step) break;
const stepStartedAt = performance.now();
let stepResult: { status: KtxSetupFlowStatus };
let stepResult: { status: KtxSetupFlowStatus; errorDetail?: string };
if (step === 'models') {
const modelRunner =
deps.model ?? ((modelArgs, modelIo) => runKtxSetupAnthropicModelStep(modelArgs, modelIo, deps.modelDeps));
@ -844,6 +846,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
startedAt: stepStartedAt,
io,
cliVersion: args.cliVersion,
...(stepResult.errorDetail ? { errorDetail: stepResult.errorDetail } : {}),
});
if (stepResult.status === 'failed') {

View file

@ -1,4 +1,4 @@
import { scrubErrorClass } from './scrubber.js';
import { formatErrorDetail, scrubErrorClass } from './scrubber.js';
export type CommandOutcome = 'ok' | 'error' | 'aborted';
@ -16,6 +16,7 @@ export interface CompletedCommandSpan {
durationMs: number;
outcome: CommandOutcome;
errorClass?: string;
errorDetail?: string;
flagsPresent: Record<string, boolean>;
hasProject: boolean;
projectDir?: string;
@ -40,12 +41,14 @@ export function completeCommandSpan(input: {
}
const errorClass = input.error ? scrubErrorClass(input.error) : undefined;
const errorDetail = input.error ? formatErrorDetail(input.error) : undefined;
return {
commandPath: span.commandPath,
durationMs: Math.max(0, input.completedAt - span.startedAt),
outcome: input.outcome,
...(errorClass ? { errorClass } : {}),
...(errorDetail ? { errorDetail } : {}),
flagsPresent: span.flagsPresent,
hasProject: span.hasProject,
projectDir: span.projectDir,

View file

@ -26,6 +26,7 @@
"durationMs",
"outcome",
"errorClass",
"errorDetail",
"flagsPresent",
"hasProject",
"projectGroupAttached"
@ -37,7 +38,8 @@
"fields": [
"step",
"outcome",
"durationMs"
"durationMs",
"errorDetail"
]
},
{
@ -56,6 +58,7 @@
"isDemoConnection",
"outcome",
"errorClass",
"errorDetail",
"durationMs",
"serverVersion"
]
@ -84,7 +87,8 @@
"rowsBucket",
"durationMs",
"outcome",
"errorClass"
"errorClass",
"errorDetail"
]
},
{
@ -98,7 +102,8 @@
"declaredFkCount",
"durationMs",
"outcome",
"errorClass"
"errorClass",
"errorDetail"
]
},
{
@ -296,6 +301,10 @@
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
},
"flagsPresent": {
"type": "object",
"propertyNames": {
@ -384,6 +393,10 @@
"durationMs": {
"type": "number",
"minimum": 0
},
"errorDetail": {
"type": "string",
"maxLength": 1000
}
},
"required": [
@ -494,6 +507,10 @@
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
},
"durationMs": {
"type": "number",
"minimum": 0
@ -673,6 +690,10 @@
},
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
}
},
"required": [
@ -759,6 +780,10 @@
},
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
}
},
"required": [

View file

@ -21,6 +21,7 @@ const commandSchema = telemetryCommonEnvelopeSchema
durationMs: z.number().nonnegative(),
outcome: z.enum(['ok', 'error', 'aborted']),
errorClass: z.string().optional(),
errorDetail: z.string().max(1000).optional(),
flagsPresent: z.record(z.string(), z.boolean()),
hasProject: z.boolean(),
projectGroupAttached: z.boolean(),
@ -45,6 +46,7 @@ const setupStepSchema = telemetryCommonEnvelopeSchema
]),
outcome: z.enum(['completed', 'skipped', 'abandoned']),
durationMs: z.number().nonnegative(),
errorDetail: z.string().max(1000).optional(),
})
.strict();
@ -61,6 +63,7 @@ const connectionTestSchema = telemetryCommonEnvelopeSchema
isDemoConnection: z.boolean(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
errorDetail: z.string().max(1000).optional(),
durationMs: z.number().nonnegative(),
serverVersion: z.string().optional(),
})
@ -90,6 +93,7 @@ const ingestCompletedSchema = telemetryCommonEnvelopeSchema
durationMs: z.number().nonnegative(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
errorDetail: z.string().max(1000).optional(),
})
.strict();
@ -103,6 +107,7 @@ const scanCompletedSchema = telemetryCommonEnvelopeSchema
durationMs: z.number().nonnegative(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
errorDetail: z.string().max(1000).optional(),
})
.strict();
@ -237,6 +242,7 @@ export const telemetryEventCatalog = [
'durationMs',
'outcome',
'errorClass',
'errorDetail',
'flagsPresent',
'hasProject',
'projectGroupAttached',
@ -245,7 +251,7 @@ export const telemetryEventCatalog = [
{
name: 'setup_step',
description: 'Emitted after an interactive setup step completes, skips, or aborts.',
fields: ['step', 'outcome', 'durationMs'],
fields: ['step', 'outcome', 'durationMs', 'errorDetail'],
},
{
name: 'connection_added',
@ -255,7 +261,7 @@ export const telemetryEventCatalog = [
{
name: 'connection_test',
description: 'Emitted after ktx connection test completes.',
fields: ['driver', 'isDemoConnection', 'outcome', 'errorClass', 'durationMs', 'serverVersion'],
fields: ['driver', 'isDemoConnection', 'outcome', 'errorClass', 'errorDetail', 'durationMs', 'serverVersion'],
},
{
name: 'project_stack_snapshot',
@ -275,6 +281,7 @@ export const telemetryEventCatalog = [
'durationMs',
'outcome',
'errorClass',
'errorDetail',
],
},
{
@ -289,6 +296,7 @@ export const telemetryEventCatalog = [
'durationMs',
'outcome',
'errorClass',
'errorDetail',
],
},
{

View file

@ -26,3 +26,27 @@ export function scrubErrorClass(error: unknown): string | undefined {
return constructorName;
}
const MAX_ERROR_DETAIL_LENGTH = 1000;
/**
* Human-readable failure detail for telemetry: the error's `.code` (when
* present) prefixed onto its `message`, collapsed to a single line and
* length-capped. Captures the message only never the stack.
*
* This intentionally forwards raw error text, which can include identifiers from
* the user's environment (table/column names, hostnames, usernames), so that
* funnel failures are diagnosable. Callers must gate it to the failure path.
*/
export function formatErrorDetail(error: unknown): string | undefined {
if (error === undefined || error === null) {
return undefined;
}
const code = (error as { code?: unknown }).code;
const message = error instanceof Error ? error.message : String(error);
const prefix = typeof code === 'string' || typeof code === 'number' ? `${code}: ` : '';
const detail = `${prefix}${message}`.replace(/\s+/g, ' ').trim();
return detail.length > 0 ? detail.slice(0, MAX_ERROR_DETAIL_LENGTH) : undefined;
}

View file

@ -162,6 +162,27 @@ describe('runKtxConnection', () => {
expect(io.stderr()).not.toContain(projectDir);
});
it('records the raw errorDetail in connection_test telemetry when a native test fails', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
warehouse: { driver: 'sqlite' },
});
const { connector } = nativeConnector('sqlite', { success: false, error: 'database file is unreadable' });
const io = makeIo();
const code = await runKtxConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, {
createScanConnector: vi.fn(async () => connector),
});
expect(code).toBe(1);
expect(io.stderr()).toContain('"event":"connection_test"');
expect(io.stderr()).toContain('"outcome":"error"');
expect(io.stderr()).toContain('"errorDetail":"database file is unreadable"');
});
it('reports the connector error and still cleans up when native testConnection fails', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });

View file

@ -431,6 +431,32 @@ describe('runKtxPublicIngest', () => {
}
});
it('records errorDetail in ingest_completed telemetry when a target fails', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const projectDir = await mkdtemp(join(tmpdir(), 'ktx-public-ingest-telemetry-fail-'));
try {
await initKtxProject({ projectDir });
const io = makeIo({ isTTY: true });
const project = deepReadyProject({
warehouse: { driver: 'sqlite', path: join(projectDir, 'warehouse.sqlite') },
});
const code = await runKtxPublicIngest(
{ command: 'run', projectDir, targetConnectionId: 'warehouse', all: false, json: false, inputMode: 'disabled' },
io.io,
{ loadProject: vi.fn(async () => project), runScan: vi.fn(async () => 1) },
);
expect(code).toBe(1);
expect(io.stderr()).toContain('"event":"ingest_completed"');
expect(io.stderr()).toContain('"outcome":"error"');
expect(io.stderr()).toContain('"errorDetail"');
} finally {
await rm(projectDir, { recursive: true, force: true });
}
});
it('runs query history after schema ingest with current-run window override', async () => {
const io = makeIo();
const runtimeIo = makeIo({ isTTY: true });

View file

@ -423,6 +423,37 @@ describe('runKtxScan', () => {
expect(io.stderr()).not.toContain(tempDir);
});
it('records the raw errorDetail in scan_completed telemetry when the scan throws', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
await initKtxProject({ projectDir: tempDir });
const runLocalScan = vi.fn(async (): Promise<LocalScanRunResult> => {
const error = new Error('introspection timed out');
(error as { code?: unknown }).code = 'ETIMEDOUT';
throw error;
});
const io = makeIo({ isTTY: true });
const code = await runKtxScan(
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
databaseIntrospectionUrl: 'http://127.0.0.1:8765',
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
);
expect(code).toBe(1);
expect(io.stderr()).toContain('"event":"scan_completed"');
expect(io.stderr()).toContain('"outcome":"error"');
expect(io.stderr()).toContain('"errorDetail":"ETIMEDOUT: introspection timed out"');
});
it('passes KTX daemon options to local ingest adapters when no explicit daemon URL is set', async () => {
await initKtxProject({ projectDir: tempDir });
const createLocalIngestAdapters = vi.fn(() => []);

View file

@ -332,6 +332,30 @@ describe('setup context build state', () => {
});
});
it('captures the raw errorDetail on the result when the context build throws', async () => {
await writeReadyProject(tempDir);
const io = makeIo();
const runContextBuildMock = vi.fn<NonNullable<KtxSetupContextDeps['runContextBuild']>>(async () => {
throw new Error('managed runtime exited with code 1');
});
await expect(
runKtxSetupContextStep(
{ projectDir: tempDir, inputMode: 'disabled' },
io.io,
{
runIdFactory: () => 'setup-context-local-throw',
now: () => new Date('2026-05-09T10:00:00.000Z'),
runContextBuild: runContextBuildMock,
},
),
).resolves.toEqual({
status: 'failed',
projectDir: tempDir,
errorDetail: 'managed runtime exited with code 1',
});
});
it('marks context complete without prompting when initial source ingest already made agent context', async () => {
await writeReadyProject(tempDir);
await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true });

View file

@ -34,4 +34,23 @@ describe('telemetry command hook', () => {
resetCommandSpan();
expect(completeCommandSpan({ completedAt: 200, outcome: 'ok' })).toBeUndefined();
});
it('captures errorClass and raw errorDetail on a failed command', () => {
resetCommandSpan();
beginCommandSpan({
commandPath: ['ktx', 'ingest'],
flagsPresent: {},
hasProject: true,
attachProjectGroup: false,
startedAt: 0,
});
class KtxConnectionError extends Error {}
const error = new KtxConnectionError('connect ECONNREFUSED 127.0.0.1:5432');
const completed = completeCommandSpan({ completedAt: 10, outcome: 'error', error });
expect(completed?.outcome).toBe('error');
expect(completed?.errorClass).toBe('KtxConnectionError');
expect(completed?.errorDetail).toBe('connect ECONNREFUSED 127.0.0.1:5432');
});
});

View file

@ -1,6 +1,6 @@
import { describe, expect, it } from 'vitest';
import { scrubErrorClass } from '../../src/telemetry/scrubber.js';
import { formatErrorDetail, scrubErrorClass } from '../../src/telemetry/scrubber.js';
class KtxProjectMissingAbortError extends Error {}
@ -23,3 +23,39 @@ describe('scrubErrorClass', () => {
expect(scrubErrorClass(null)).toBeUndefined();
});
});
describe('formatErrorDetail', () => {
it('prefixes a string or numeric .code onto the message', () => {
const refused = new Error('connect failed');
(refused as { code?: unknown }).code = 'ECONNREFUSED';
expect(formatErrorDetail(refused)).toBe('ECONNREFUSED: connect failed');
const forbidden = new Error('forbidden');
(forbidden as { code?: unknown }).code = 403;
expect(formatErrorDetail(forbidden)).toBe('403: forbidden');
});
it('uses the bare message when there is no .code', () => {
expect(formatErrorDetail(new Error('password authentication failed for user "x"'))).toBe(
'password authentication failed for user "x"',
);
});
it('accepts non-Error values', () => {
expect(formatErrorDetail('boom')).toBe('boom');
});
it('collapses whitespace to a single line', () => {
expect(formatErrorDetail(new Error('line one\n line two'))).toBe('line one line two');
});
it('caps the length at 1000 characters', () => {
expect(formatErrorDetail(new Error('x'.repeat(2000)))?.length).toBe(1000);
});
it('returns undefined for empty, null, or undefined input', () => {
expect(formatErrorDetail(new Error(' '))).toBeUndefined();
expect(formatErrorDetail(null)).toBeUndefined();
expect(formatErrorDetail(undefined)).toBeUndefined();
});
});

View file

@ -26,6 +26,7 @@
"durationMs",
"outcome",
"errorClass",
"errorDetail",
"flagsPresent",
"hasProject",
"projectGroupAttached"
@ -37,7 +38,8 @@
"fields": [
"step",
"outcome",
"durationMs"
"durationMs",
"errorDetail"
]
},
{
@ -56,6 +58,7 @@
"isDemoConnection",
"outcome",
"errorClass",
"errorDetail",
"durationMs",
"serverVersion"
]
@ -84,7 +87,8 @@
"rowsBucket",
"durationMs",
"outcome",
"errorClass"
"errorClass",
"errorDetail"
]
},
{
@ -98,7 +102,8 @@
"declaredFkCount",
"durationMs",
"outcome",
"errorClass"
"errorClass",
"errorDetail"
]
},
{
@ -296,6 +301,10 @@
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
},
"flagsPresent": {
"type": "object",
"propertyNames": {
@ -384,6 +393,10 @@
"durationMs": {
"type": "number",
"minimum": 0
},
"errorDetail": {
"type": "string",
"maxLength": 1000
}
},
"required": [
@ -494,6 +507,10 @@
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
},
"durationMs": {
"type": "number",
"minimum": 0
@ -673,6 +690,10 @@
},
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
}
},
"required": [
@ -759,6 +780,10 @@
},
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
}
},
"required": [

1953
uv.lock generated

File diff suppressed because it is too large Load diff