feat(telemetry): include error details for failures (#254)

This commit is contained in:
Andrey Avtomonov 2026-06-02 17:23:51 +02:00 committed by GitHub
parent 494618ab14
commit 6da8c3452a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1259 additions and 999 deletions

View file

@ -17,7 +17,7 @@ import { createKtxCliScanConnector } from './local-scan-connectors.js';
import { profileMark } from './startup-profile.js';
import { isDemoConnection } from './telemetry/demo-detect.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import { scrubErrorClass } from './telemetry/scrubber.js';
import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js';
profileMark('module:connection');
@ -304,6 +304,7 @@ async function emitConnectionTest(input: {
io: KtxCliIo;
}): Promise<void> {
const errorClass = input.error ? scrubErrorClass(input.error) : undefined;
const errorDetail = input.error ? formatErrorDetail(input.error) : undefined;
await emitTelemetryEvent({
name: 'connection_test',
projectDir: input.project.projectDir,
@ -314,6 +315,7 @@ async function emitConnectionTest(input: {
outcome: input.outcome,
durationMs: input.durationMs,
...(errorClass ? { errorClass } : {}),
...(errorDetail ? { errorDetail } : {}),
},
});
}

View file

@ -22,6 +22,7 @@ import type { KtxScanArgs, KtxScanDeps } from './scan.js';
import { profileMark } from './startup-profile.js';
import { isDemoConnection } from './telemetry/demo-detect.js';
import { emitProjectStackSnapshot, emitTelemetryEvent } from './telemetry/index.js';
import { formatErrorDetail } from './telemetry/scrubber.js';
profileMark('module:public-ingest');
@ -635,6 +636,9 @@ async function emitIngestCompleted(input: {
io: KtxCliIo;
}): Promise<void> {
const failed = resultFailed(input.result);
const failureDetail = failed
? formatErrorDetail(input.result.steps.find((step) => step.status === 'failed')?.detail)
: undefined;
await emitTelemetryEvent({
name: 'ingest_completed',
projectDir: input.args.projectDir,
@ -651,6 +655,7 @@ async function emitIngestCompleted(input: {
rowsBucket: rowsBucket(),
durationMs: Math.max(0, performance.now() - input.startedAt),
outcome: failed ? 'error' : 'ok',
...(failureDetail ? { errorDetail: failureDetail } : {}),
},
});
}

View file

@ -9,7 +9,7 @@ import { createKtxCliScanConnector } from './local-scan-connectors.js';
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
import { profileMark } from './startup-profile.js';
import { emitTelemetryEvent } from './telemetry/index.js';
import { scrubErrorClass } from './telemetry/scrubber.js';
import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js';
profileMark('module:scan');
@ -380,6 +380,7 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps
return 0;
} catch (error) {
const errorClass = scrubErrorClass(error);
const errorDetail = formatErrorDetail(error);
await emitTelemetryEvent({
name: 'scan_completed',
projectDir: args.projectDir,
@ -393,6 +394,7 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps
durationMs: Math.max(0, performance.now() - startedAt),
outcome: 'error',
...(errorClass ? { errorClass } : {}),
...(errorDetail ? { errorDetail } : {}),
},
});
io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`);

View file

@ -6,6 +6,7 @@ import { markKtxSetupStateStepComplete, readKtxSetupState } from './context/proj
import { serializeKtxProjectConfig } from './context/project/config.js';
import type { KtxCliIo } from './cli-runtime.js';
import { errorMessage, writePrefixedLines } from './clack.js';
import { formatErrorDetail } from './telemetry/scrubber.js';
import { buildPublicIngestPlan } from './public-ingest.js';
import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js';
import {
@ -67,7 +68,7 @@ export type KtxSetupContextResult =
| { status: 'skipped'; projectDir: string }
| { status: 'back'; projectDir: string }
| { status: 'missing-input'; projectDir: string }
| { status: 'failed'; projectDir: string };
| { status: 'failed'; projectDir: string; errorDetail?: string };
export interface KtxSetupContextStepArgs {
projectDir: string;
@ -702,6 +703,6 @@ export async function runKtxSetupContextStep(
return await runBuild(args, io, deps, project, targets);
} catch (error) {
writePrefixedLines((chunk) => io.stderr.write(chunk), errorMessage(error));
return { status: 'failed', projectDir: args.projectDir };
return { status: 'failed', projectDir: args.projectDir, errorDetail: formatErrorDetail(error) };
}
}

View file

@ -217,6 +217,7 @@ async function recordSetupStep(input: {
startedAt: number;
io: KtxCliIo;
cliVersion?: string;
errorDetail?: string;
}): Promise<void> {
const { emitTelemetryEvent } = await import('./telemetry/index.js');
await emitTelemetryEvent({
@ -228,6 +229,7 @@ async function recordSetupStep(input: {
step: input.step,
outcome: setupTelemetryOutcome(input.status),
durationMs: Math.max(0, performance.now() - input.startedAt),
...(input.errorDetail ? { errorDetail: input.errorDetail } : {}),
},
});
}
@ -683,7 +685,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
if (!step) break;
const stepStartedAt = performance.now();
let stepResult: { status: KtxSetupFlowStatus };
let stepResult: { status: KtxSetupFlowStatus; errorDetail?: string };
if (step === 'models') {
const modelRunner =
deps.model ?? ((modelArgs, modelIo) => runKtxSetupAnthropicModelStep(modelArgs, modelIo, deps.modelDeps));
@ -844,6 +846,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup
startedAt: stepStartedAt,
io,
cliVersion: args.cliVersion,
...(stepResult.errorDetail ? { errorDetail: stepResult.errorDetail } : {}),
});
if (stepResult.status === 'failed') {

View file

@ -1,4 +1,4 @@
import { scrubErrorClass } from './scrubber.js';
import { formatErrorDetail, scrubErrorClass } from './scrubber.js';
export type CommandOutcome = 'ok' | 'error' | 'aborted';
@ -16,6 +16,7 @@ export interface CompletedCommandSpan {
durationMs: number;
outcome: CommandOutcome;
errorClass?: string;
errorDetail?: string;
flagsPresent: Record<string, boolean>;
hasProject: boolean;
projectDir?: string;
@ -40,12 +41,14 @@ export function completeCommandSpan(input: {
}
const errorClass = input.error ? scrubErrorClass(input.error) : undefined;
const errorDetail = input.error ? formatErrorDetail(input.error) : undefined;
return {
commandPath: span.commandPath,
durationMs: Math.max(0, input.completedAt - span.startedAt),
outcome: input.outcome,
...(errorClass ? { errorClass } : {}),
...(errorDetail ? { errorDetail } : {}),
flagsPresent: span.flagsPresent,
hasProject: span.hasProject,
projectDir: span.projectDir,

View file

@ -26,6 +26,7 @@
"durationMs",
"outcome",
"errorClass",
"errorDetail",
"flagsPresent",
"hasProject",
"projectGroupAttached"
@ -37,7 +38,8 @@
"fields": [
"step",
"outcome",
"durationMs"
"durationMs",
"errorDetail"
]
},
{
@ -56,6 +58,7 @@
"isDemoConnection",
"outcome",
"errorClass",
"errorDetail",
"durationMs",
"serverVersion"
]
@ -84,7 +87,8 @@
"rowsBucket",
"durationMs",
"outcome",
"errorClass"
"errorClass",
"errorDetail"
]
},
{
@ -98,7 +102,8 @@
"declaredFkCount",
"durationMs",
"outcome",
"errorClass"
"errorClass",
"errorDetail"
]
},
{
@ -296,6 +301,10 @@
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
},
"flagsPresent": {
"type": "object",
"propertyNames": {
@ -384,6 +393,10 @@
"durationMs": {
"type": "number",
"minimum": 0
},
"errorDetail": {
"type": "string",
"maxLength": 1000
}
},
"required": [
@ -494,6 +507,10 @@
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
},
"durationMs": {
"type": "number",
"minimum": 0
@ -673,6 +690,10 @@
},
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
}
},
"required": [
@ -759,6 +780,10 @@
},
"errorClass": {
"type": "string"
},
"errorDetail": {
"type": "string",
"maxLength": 1000
}
},
"required": [

View file

@ -21,6 +21,7 @@ const commandSchema = telemetryCommonEnvelopeSchema
durationMs: z.number().nonnegative(),
outcome: z.enum(['ok', 'error', 'aborted']),
errorClass: z.string().optional(),
errorDetail: z.string().max(1000).optional(),
flagsPresent: z.record(z.string(), z.boolean()),
hasProject: z.boolean(),
projectGroupAttached: z.boolean(),
@ -45,6 +46,7 @@ const setupStepSchema = telemetryCommonEnvelopeSchema
]),
outcome: z.enum(['completed', 'skipped', 'abandoned']),
durationMs: z.number().nonnegative(),
errorDetail: z.string().max(1000).optional(),
})
.strict();
@ -61,6 +63,7 @@ const connectionTestSchema = telemetryCommonEnvelopeSchema
isDemoConnection: z.boolean(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
errorDetail: z.string().max(1000).optional(),
durationMs: z.number().nonnegative(),
serverVersion: z.string().optional(),
})
@ -90,6 +93,7 @@ const ingestCompletedSchema = telemetryCommonEnvelopeSchema
durationMs: z.number().nonnegative(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
errorDetail: z.string().max(1000).optional(),
})
.strict();
@ -103,6 +107,7 @@ const scanCompletedSchema = telemetryCommonEnvelopeSchema
durationMs: z.number().nonnegative(),
outcome: outcomeSchema,
errorClass: z.string().optional(),
errorDetail: z.string().max(1000).optional(),
})
.strict();
@ -237,6 +242,7 @@ export const telemetryEventCatalog = [
'durationMs',
'outcome',
'errorClass',
'errorDetail',
'flagsPresent',
'hasProject',
'projectGroupAttached',
@ -245,7 +251,7 @@ export const telemetryEventCatalog = [
{
name: 'setup_step',
description: 'Emitted after an interactive setup step completes, skips, or aborts.',
fields: ['step', 'outcome', 'durationMs'],
fields: ['step', 'outcome', 'durationMs', 'errorDetail'],
},
{
name: 'connection_added',
@ -255,7 +261,7 @@ export const telemetryEventCatalog = [
{
name: 'connection_test',
description: 'Emitted after ktx connection test completes.',
fields: ['driver', 'isDemoConnection', 'outcome', 'errorClass', 'durationMs', 'serverVersion'],
fields: ['driver', 'isDemoConnection', 'outcome', 'errorClass', 'errorDetail', 'durationMs', 'serverVersion'],
},
{
name: 'project_stack_snapshot',
@ -275,6 +281,7 @@ export const telemetryEventCatalog = [
'durationMs',
'outcome',
'errorClass',
'errorDetail',
],
},
{
@ -289,6 +296,7 @@ export const telemetryEventCatalog = [
'durationMs',
'outcome',
'errorClass',
'errorDetail',
],
},
{

View file

@ -26,3 +26,27 @@ export function scrubErrorClass(error: unknown): string | undefined {
return constructorName;
}
const MAX_ERROR_DETAIL_LENGTH = 1000;
/**
* Human-readable failure detail for telemetry: the error's `.code` (when
* present) prefixed onto its `message`, collapsed to a single line and
* length-capped. Captures the message only never the stack.
*
* This intentionally forwards raw error text, which can include identifiers from
* the user's environment (table/column names, hostnames, usernames), so that
* funnel failures are diagnosable. Callers must gate it to the failure path.
*/
export function formatErrorDetail(error: unknown): string | undefined {
if (error === undefined || error === null) {
return undefined;
}
const code = (error as { code?: unknown }).code;
const message = error instanceof Error ? error.message : String(error);
const prefix = typeof code === 'string' || typeof code === 'number' ? `${code}: ` : '';
const detail = `${prefix}${message}`.replace(/\s+/g, ' ').trim();
return detail.length > 0 ? detail.slice(0, MAX_ERROR_DETAIL_LENGTH) : undefined;
}

View file

@ -162,6 +162,27 @@ describe('runKtxConnection', () => {
expect(io.stderr()).not.toContain(projectDir);
});
it('records the raw errorDetail in connection_test telemetry when a native test fails', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });
await writeConnections(projectDir, {
warehouse: { driver: 'sqlite' },
});
const { connector } = nativeConnector('sqlite', { success: false, error: 'database file is unreadable' });
const io = makeIo();
const code = await runKtxConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, {
createScanConnector: vi.fn(async () => connector),
});
expect(code).toBe(1);
expect(io.stderr()).toContain('"event":"connection_test"');
expect(io.stderr()).toContain('"outcome":"error"');
expect(io.stderr()).toContain('"errorDetail":"database file is unreadable"');
});
it('reports the connector error and still cleans up when native testConnection fails', async () => {
const projectDir = join(tempDir, 'project');
await initKtxProject({ projectDir });

View file

@ -431,6 +431,32 @@ describe('runKtxPublicIngest', () => {
}
});
it('records errorDetail in ingest_completed telemetry when a target fails', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const projectDir = await mkdtemp(join(tmpdir(), 'ktx-public-ingest-telemetry-fail-'));
try {
await initKtxProject({ projectDir });
const io = makeIo({ isTTY: true });
const project = deepReadyProject({
warehouse: { driver: 'sqlite', path: join(projectDir, 'warehouse.sqlite') },
});
const code = await runKtxPublicIngest(
{ command: 'run', projectDir, targetConnectionId: 'warehouse', all: false, json: false, inputMode: 'disabled' },
io.io,
{ loadProject: vi.fn(async () => project), runScan: vi.fn(async () => 1) },
);
expect(code).toBe(1);
expect(io.stderr()).toContain('"event":"ingest_completed"');
expect(io.stderr()).toContain('"outcome":"error"');
expect(io.stderr()).toContain('"errorDetail"');
} finally {
await rm(projectDir, { recursive: true, force: true });
}
});
it('runs query history after schema ingest with current-run window override', async () => {
const io = makeIo();
const runtimeIo = makeIo({ isTTY: true });

View file

@ -423,6 +423,37 @@ describe('runKtxScan', () => {
expect(io.stderr()).not.toContain(tempDir);
});
it('records the raw errorDetail in scan_completed telemetry when the scan throws', async () => {
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
await initKtxProject({ projectDir: tempDir });
const runLocalScan = vi.fn(async (): Promise<LocalScanRunResult> => {
const error = new Error('introspection timed out');
(error as { code?: unknown }).code = 'ETIMEDOUT';
throw error;
});
const io = makeIo({ isTTY: true });
const code = await runKtxScan(
{
command: 'run',
projectDir: tempDir,
connectionId: 'warehouse',
mode: 'structural',
detectRelationships: false,
dryRun: false,
databaseIntrospectionUrl: 'http://127.0.0.1:8765',
},
io.io,
{ runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters },
);
expect(code).toBe(1);
expect(io.stderr()).toContain('"event":"scan_completed"');
expect(io.stderr()).toContain('"outcome":"error"');
expect(io.stderr()).toContain('"errorDetail":"ETIMEDOUT: introspection timed out"');
});
it('passes KTX daemon options to local ingest adapters when no explicit daemon URL is set', async () => {
await initKtxProject({ projectDir: tempDir });
const createLocalIngestAdapters = vi.fn(() => []);

View file

@ -332,6 +332,30 @@ describe('setup context build state', () => {
});
});
it('captures the raw errorDetail on the result when the context build throws', async () => {
await writeReadyProject(tempDir);
const io = makeIo();
const runContextBuildMock = vi.fn<NonNullable<KtxSetupContextDeps['runContextBuild']>>(async () => {
throw new Error('managed runtime exited with code 1');
});
await expect(
runKtxSetupContextStep(
{ projectDir: tempDir, inputMode: 'disabled' },
io.io,
{
runIdFactory: () => 'setup-context-local-throw',
now: () => new Date('2026-05-09T10:00:00.000Z'),
runContextBuild: runContextBuildMock,
},
),
).resolves.toEqual({
status: 'failed',
projectDir: tempDir,
errorDetail: 'managed runtime exited with code 1',
});
});
it('marks context complete without prompting when initial source ingest already made agent context', async () => {
await writeReadyProject(tempDir);
await mkdir(join(tempDir, 'semantic-layer', 'dbt-main'), { recursive: true });

View file

@ -34,4 +34,23 @@ describe('telemetry command hook', () => {
resetCommandSpan();
expect(completeCommandSpan({ completedAt: 200, outcome: 'ok' })).toBeUndefined();
});
it('captures errorClass and raw errorDetail on a failed command', () => {
resetCommandSpan();
beginCommandSpan({
commandPath: ['ktx', 'ingest'],
flagsPresent: {},
hasProject: true,
attachProjectGroup: false,
startedAt: 0,
});
class KtxConnectionError extends Error {}
const error = new KtxConnectionError('connect ECONNREFUSED 127.0.0.1:5432');
const completed = completeCommandSpan({ completedAt: 10, outcome: 'error', error });
expect(completed?.outcome).toBe('error');
expect(completed?.errorClass).toBe('KtxConnectionError');
expect(completed?.errorDetail).toBe('connect ECONNREFUSED 127.0.0.1:5432');
});
});

View file

@ -1,6 +1,6 @@
import { describe, expect, it } from 'vitest';
import { scrubErrorClass } from '../../src/telemetry/scrubber.js';
import { formatErrorDetail, scrubErrorClass } from '../../src/telemetry/scrubber.js';
class KtxProjectMissingAbortError extends Error {}
@ -23,3 +23,39 @@ describe('scrubErrorClass', () => {
expect(scrubErrorClass(null)).toBeUndefined();
});
});
describe('formatErrorDetail', () => {
it('prefixes a string or numeric .code onto the message', () => {
const refused = new Error('connect failed');
(refused as { code?: unknown }).code = 'ECONNREFUSED';
expect(formatErrorDetail(refused)).toBe('ECONNREFUSED: connect failed');
const forbidden = new Error('forbidden');
(forbidden as { code?: unknown }).code = 403;
expect(formatErrorDetail(forbidden)).toBe('403: forbidden');
});
it('uses the bare message when there is no .code', () => {
expect(formatErrorDetail(new Error('password authentication failed for user "x"'))).toBe(
'password authentication failed for user "x"',
);
});
it('accepts non-Error values', () => {
expect(formatErrorDetail('boom')).toBe('boom');
});
it('collapses whitespace to a single line', () => {
expect(formatErrorDetail(new Error('line one\n line two'))).toBe('line one line two');
});
it('caps the length at 1000 characters', () => {
expect(formatErrorDetail(new Error('x'.repeat(2000)))?.length).toBe(1000);
});
it('returns undefined for empty, null, or undefined input', () => {
expect(formatErrorDetail(new Error(' '))).toBeUndefined();
expect(formatErrorDetail(null)).toBeUndefined();
expect(formatErrorDetail(undefined)).toBeUndefined();
});
});