Make telemetry reliable across interrupts and headless installs

Three reliability gaps surfaced while auditing why PostHog numbers were
untrustworthy:

1. Interrupted commands lost their events. capture() is fire-and-forget and the
   only flush guarantee lived in a finally block, which SIGINT/SIGTERM skip — so
   Ctrl-C'ing a long ingest or an MCP client killing 'ktx mcp stdio' dropped the
   command event and any queued events. Add SIGINT/SIGTERM handlers (real-process
   entry only; never under test/programmatic io) that mark the active command
   span aborted, emit it, drain the emitter, then exit. Idempotent with the
   normal finally path via the single-consume command span.

2. Headless-first installs were invisible. loadTelemetryIdentity refused to mint
   an installId unless stdout was a TTY, so a machine whose first run was an
   IDE-launched MCP server or a script emitted nothing, ever. Mint on first run
   regardless of surface (still honoring CI/DO_NOT_TRACK/KTX_TELEMETRY_DISABLED),
   writing the one-time notice to stderr — safe under the MCP stdio protocol,
   which reserves stdout. Drop the now-unused stdoutIsTTY option.

3. No guard against silent emit regressions (the 0.7.0 scan_completed blackout).
   Add tests: the shared executePublicIngestTarget chokepoint emits exactly one
   ingest_completed on success and on the preflight-failure branch, and a
   database target invokes the scan that emits scan_completed; plus coverage for
   the aborted-flush helper.

Identity is unchanged otherwise: every event still attributes to the installId
in ~/.ktx/telemetry.json. No event/field changes, so Node<->Python schema parity
is untouched. Docs updated to reflect first-run-on-any-surface activation.
This commit is contained in:
Andrey Avtomonov 2026-06-02 23:19:37 +02:00
parent 2334a4b6e3
commit cb6a67c2d7
7 changed files with 219 additions and 66 deletions

View file

@ -6,11 +6,10 @@ description: Understand what usage telemetry ktx collects and how to opt out.
**ktx** collects aggregated usage telemetry so maintainers can see
which commands work, where setup fails, and which parts of the data-agent
workflow need improvement. Telemetry is opt-out: it turns on the first time you
run **ktx** in an interactive terminal, which prints a one-time notice. From
then on the same install also reports background activity that has no terminal
of its own, such as the local MCP server your agent calls. It stays disabled in
CI, whenever an opt-out is set, and until that first interactive run has shown
the notice.
run **ktx** in any way — an interactive command, a script, or an
agent-launched MCP server — and prints a one-time notice (to the terminal when
there is one, otherwise to standard error). It stays disabled in CI and whenever
an opt-out is set.
## Opt out

View file

@ -89,6 +89,46 @@ export async function runInitForCommander(
return await runInit(args, io);
}
function signalExitCode(signal: NodeJS.Signals): number {
// 128 + signal number: SIGINT (2) -> 130, SIGTERM (15) -> 143.
return signal === 'SIGTERM' ? 143 : 130;
}
/**
* Flush telemetry on interrupt for the real CLI process. `capture()` is
* fire-and-forget and the only flush guarantee lives in a `finally` a signal
* skips, so Ctrl-C / `kill` of a long-running command (ingest, `mcp stdio`)
* would otherwise drop its `command` event and queued events. Installed only
* when driving the actual process; programmatic/test callers pass their own
* `io` and never reach here. Returns a disposer that removes the listeners.
*/
function installTelemetrySignalFlush(io: KtxCliIo, info: KtxCliPackageInfo): () => void {
let handling = false;
const handle = (signal: NodeJS.Signals): void => {
if (handling) {
process.exit(signalExitCode(signal));
}
handling = true;
void (async () => {
try {
const { emitAbortedCommandAndShutdown } = await import('./telemetry/index.js');
await emitAbortedCommandAndShutdown({ packageInfo: info, io });
} catch {
// Best-effort: never let a telemetry hiccup block the interrupt exit.
}
process.exit(signalExitCode(signal));
})();
};
const onSigint = (): void => handle('SIGINT');
const onSigterm = (): void => handle('SIGTERM');
process.on('SIGINT', onSigint);
process.on('SIGTERM', onSigterm);
return () => {
process.off('SIGINT', onSigint);
process.off('SIGTERM', onSigterm);
};
}
export async function runKtxCli(
argv = process.argv.slice(2),
io: KtxCliIo = process,
@ -98,7 +138,14 @@ export async function runKtxCli(
profileMark('runtime:runKtxCli');
const { runCommanderKtxCli } = await profileSpan('import ./cli-program.js', () => import('./cli-program.js'));
return await runCommanderKtxCli(argv, io, deps, info, {
runInit: runInitForCommander,
});
// Real-process entry only: flush telemetry if interrupted. Test/programmatic
// callers pass their own `io`, so they never install process-level handlers.
const removeSignalFlush = (io as unknown) === process ? installTelemetrySignalFlush(io, info) : undefined;
try {
return await runCommanderKtxCli(argv, io, deps, info, {
runInit: runInitForCommander,
});
} finally {
removeSignalFlush?.();
}
}

View file

@ -37,7 +37,6 @@ function styleNotice(notice: string, env: TelemetryIdentityEnv): string {
export interface LoadTelemetryIdentityOptions {
homeDir?: string;
env?: TelemetryIdentityEnv;
stdoutIsTTY: boolean;
stderr: { write(chunk: string): void };
now?: () => Date;
}
@ -94,13 +93,12 @@ export async function loadTelemetryIdentity(options: LoadTelemetryIdentityOption
};
}
// No identity yet. Minting one means showing the one-time opt-out notice, so
// first-run creation requires an interactive surface; a headless first run
// stays disabled and defers enablement until the next interactive run.
if (options.stdoutIsTTY !== true) {
return { enabled: false, createdFile: false, noticeShown: false, path };
}
// No identity yet → mint one regardless of surface. Telemetry is opt-out, so
// a fresh install is counted even when its first run is headless (an
// IDE-launched `ktx mcp stdio`, a scripted invocation); otherwise those
// installs would be permanently invisible. Opt-out env vars are honored
// above. The one-time notice is written to stderr — safe even under MCP
// stdio, which reserves stdout for its JSON-RPC protocol.
const timestamp = (options.now ?? (() => new Date()))().toISOString();
const next = {
installId: randomUUID(),

View file

@ -22,7 +22,6 @@ export type { CommandOutcome, CompletedCommandSpan };
export async function showTelemetryNoticeIfNeeded(io: KtxCliIo, packageInfo: KtxCliPackageInfo): Promise<void> {
const identity = await loadTelemetryIdentity({
stdoutIsTTY: io.stdout.isTTY === true,
stderr: io.stderr,
env: process.env,
});
@ -81,7 +80,6 @@ export async function emitTelemetryEvent<Name extends TelemetryEventName>(input:
}): Promise<void> {
const debug = telemetryDebugEnabled();
const identity = await loadTelemetryIdentity({
stdoutIsTTY: input.io.stdout.isTTY === true,
stderr: input.io.stderr,
env: process.env,
});
@ -154,3 +152,20 @@ export async function emitCompletedCommand(input: {
packageInfo: input.packageInfo,
});
}
/**
* Flush telemetry when the process is interrupted (Ctrl-C / kill). The normal
* `command` emit + flush lives in a `finally` that a signal skips, so without
* this an interrupted long-running command (ingest, `mcp stdio`) loses its
* `command` event and any queued events. Marks the active command span as
* `aborted`, emits it, and drains the emitter. Best-effort and idempotent: if
* the span was already completed (normal exit racing a signal) the emit no-ops.
*/
export async function emitAbortedCommandAndShutdown(input: {
packageInfo: KtxCliPackageInfo;
io: KtxCliIo;
}): Promise<void> {
const completed = completeCommandSpan({ completedAt: performance.now(), outcome: 'aborted' });
await emitCompletedCommand({ completed, packageInfo: input.packageInfo, io: input.io });
await shutdownTelemetryEmitter();
}

View file

@ -477,17 +477,51 @@ describe('runKtxPublicIngest', () => {
all: false,
}).targets;
const runScan = vi.fn(async () => 0);
const result = await executePublicIngestTarget(
target,
{ command: 'run', projectDir: '/tmp/project', targetConnectionId: 'warehouse', all: false, json: false, inputMode: 'disabled' },
io.io,
{ runScan: vi.fn(async () => 0) },
{ runScan },
project,
);
expect(result.steps.some((step) => step.status === 'failed')).toBe(false);
expect(occurrences(io.stderr(), '"event":"ingest_completed"')).toBe(1);
expect(io.stderr()).toContain('"outcome":"ok"');
// A database-ingest target must run a scan — runKtxScan is what emits
// scan_completed, so this guards against the 0.7.0-style regression where a
// path stopped triggering the scan and the event silently went to zero.
expect(runScan).toHaveBeenCalledTimes(1);
});
it('still emits ingest_completed when a target fails preflight (early-return branch)', async () => {
// The chokepoint must emit on every internal branch, including the early
// preflight-failure return — otherwise failed-setup installs vanish.
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
const io = makeIo({ isTTY: true });
// projectWithConnections leaves enrichment unconfigured → preflight failure.
const project = projectWithConnections({ warehouse: { driver: 'postgres' } });
const [target] = buildPublicIngestPlan(project, {
projectDir: '/tmp/project',
targetConnectionId: 'warehouse',
all: false,
}).targets;
expect(target.preflightFailure).toBeTruthy();
const runScan = vi.fn(async () => 0);
await executePublicIngestTarget(
target,
{ command: 'run', projectDir: '/tmp/project', targetConnectionId: 'warehouse', all: false, json: false, inputMode: 'disabled' },
io.io,
{ runScan },
project,
);
expect(occurrences(io.stderr(), '"event":"ingest_completed"')).toBe(1);
expect(io.stderr()).toContain('"outcome":"error"');
expect(runScan).not.toHaveBeenCalled();
});
it('emits one ingest_completed per target and never double-emits across a multi-target run', async () => {

View file

@ -11,18 +11,15 @@ import {
type TelemetryIdentityEnv,
} from '../../src/telemetry/identity.js';
function makeIo(stdoutIsTTY = true) {
function makeIo() {
let stderr = '';
return {
io: {
stdout: { isTTY: stdoutIsTTY, write: () => {} },
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
stderr: {
write: (chunk: string) => {
stderr += chunk;
},
},
stderr: () => stderr,
read: () => stderr,
};
}
@ -39,14 +36,13 @@ describe('telemetry identity', () => {
await rm(homeDir, { recursive: true, force: true });
});
it('creates the telemetry file and one-line notice on first interactive enabled load', async () => {
const testIo = makeIo(true);
it('creates the telemetry file and one-line notice on first enabled load', async () => {
const testIo = makeIo();
const identity = await loadTelemetryIdentity({
homeDir,
env,
stdoutIsTTY: true,
stderr: testIo.io.stderr,
stderr: testIo.stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
});
@ -54,7 +50,7 @@ describe('telemetry identity', () => {
expect(identity.installId).toMatch(/^[0-9a-f-]{36}$/);
expect(identity.createdFile).toBe(true);
expect(identity.noticeShown).toBe(true);
expect(testIo.stderr()).toBe(`${TELEMETRY_NOTICE}\n`);
expect(testIo.read()).toBe(`\x1b[2m${TELEMETRY_NOTICE}\x1b[22m\n`);
const stored = JSON.parse(await readFile(join(homeDir, '.ktx', 'telemetry.json'), 'utf-8')) as {
enabled: boolean;
@ -64,26 +60,46 @@ describe('telemetry identity', () => {
expect(stored.noticeShownVersion).toBe(1);
});
it('mints an identity on a headless first run (no TTY required)', async () => {
// A fresh install whose first invocation is headless (IDE-launched
// `ktx mcp stdio`, a scripted run) must still be counted. The one-time
// notice goes to stderr, which is safe even under the MCP stdio protocol.
const testIo = makeIo();
const identity = await loadTelemetryIdentity({
homeDir,
env,
stderr: testIo.stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
});
expect(identity).toMatchObject({ enabled: true, createdFile: true, noticeShown: true });
expect(identity.installId).toMatch(/^[0-9a-f-]{36}$/);
expect(testIo.read()).toBe(`\x1b[2m${TELEMETRY_NOTICE}\x1b[22m\n`);
const stored = JSON.parse(await readFile(join(homeDir, '.ktx', 'telemetry.json'), 'utf-8')) as {
enabled: boolean;
};
expect(stored.enabled).toBe(true);
});
it('emits the notice without ANSI when NO_COLOR is set', async () => {
const testIo = makeIo(true);
const testIo = makeIo();
await loadTelemetryIdentity({
homeDir,
env: { NO_COLOR: '1' },
stdoutIsTTY: true,
stderr: testIo.io.stderr,
stderr: testIo.stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
});
expect(testIo.stderr()).toBe(`${TELEMETRY_NOTICE}\n`);
expect(testIo.read()).toBe(`${TELEMETRY_NOTICE}\n`);
});
it('does not create a file when env disables telemetry', async () => {
const identity = await loadTelemetryIdentity({
homeDir,
env: { KTX_TELEMETRY_DISABLED: '1' },
stdoutIsTTY: true,
stderr: makeIo(true).io.stderr,
stderr: makeIo().stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
});
@ -91,26 +107,16 @@ describe('telemetry identity', () => {
await expect(readFile(join(homeDir, '.ktx', 'telemetry.json'), 'utf-8')).rejects.toThrow();
});
it('does not create a file for CI or non-TTY command invocations', async () => {
it('does not create a file under CI', async () => {
await expect(
loadTelemetryIdentity({
homeDir,
env: { CI: '1' },
stdoutIsTTY: true,
stderr: makeIo(true).io.stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
}),
).resolves.toMatchObject({ enabled: false, createdFile: false });
await expect(
loadTelemetryIdentity({
homeDir,
env: {},
stdoutIsTTY: false,
stderr: makeIo(false).io.stderr,
stderr: makeIo().stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
}),
).resolves.toMatchObject({ enabled: false, createdFile: false });
await expect(readFile(join(homeDir, '.ktx', 'telemetry.json'), 'utf-8')).rejects.toThrow();
});
it('honors persistent enabled false', async () => {
@ -135,8 +141,7 @@ describe('telemetry identity', () => {
loadTelemetryIdentity({
homeDir,
env,
stdoutIsTTY: true,
stderr: makeIo(true).io.stderr,
stderr: makeIo().stderr,
now: () => new Date('2026-05-22T15:00:00.000Z'),
}),
).resolves.toMatchObject({
@ -146,7 +151,7 @@ describe('telemetry identity', () => {
});
});
it('enables a consented identity without a TTY (MCP servers run headless)', async () => {
it('honors a consented identity without re-showing the notice', async () => {
await mkdir(join(homeDir, '.ktx'), { recursive: true });
await writeFile(
join(homeDir, '.ktx', 'telemetry.json'),
@ -163,14 +168,13 @@ describe('telemetry identity', () => {
) + '\n',
'utf-8',
);
const testIo = makeIo(false);
const testIo = makeIo();
await expect(
loadTelemetryIdentity({
homeDir,
env,
stdoutIsTTY: false,
stderr: testIo.io.stderr,
stderr: testIo.stderr,
now: () => new Date('2026-05-22T15:00:00.000Z'),
}),
).resolves.toMatchObject({
@ -179,12 +183,11 @@ describe('telemetry identity', () => {
createdFile: false,
noticeShown: false,
});
// The one-time notice belongs to interactive surfaces only; a headless load
// must never write it (the MCP stdio protocol shares the process streams).
expect(testIo.stderr()).toBe('');
// An already-consented identity must not re-emit the one-time notice.
expect(testIo.read()).toBe('');
});
it('keeps opt-outs suppressing a consented identity without a TTY', async () => {
it('keeps opt-outs suppressing a consented identity', async () => {
await mkdir(join(homeDir, '.ktx'), { recursive: true });
await writeFile(
join(homeDir, '.ktx', 'telemetry.json'),
@ -207,8 +210,7 @@ describe('telemetry identity', () => {
loadTelemetryIdentity({
homeDir,
env: optOut,
stdoutIsTTY: false,
stderr: makeIo(false).io.stderr,
stderr: makeIo().stderr,
now: () => new Date('2026-05-22T15:00:00.000Z'),
}),
).resolves.toMatchObject({ enabled: false });
@ -222,8 +224,7 @@ describe('telemetry identity', () => {
const identity = await loadTelemetryIdentity({
homeDir,
env,
stdoutIsTTY: true,
stderr: makeIo(true).io.stderr,
stderr: makeIo().stderr,
now: () => new Date('2026-05-22T14:33:02.000Z'),
});

View file

@ -4,7 +4,8 @@ import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import type { KtxCliIo } from '../../src/cli-runtime.js';
import { emitTelemetryEvent } from '../../src/telemetry/index.js';
import { beginCommandSpan, emitAbortedCommandAndShutdown, emitTelemetryEvent } from '../../src/telemetry/index.js';
import { resetCommandSpan } from '../../src/telemetry/command-hook.js';
function makeIo(): { io: KtxCliIo; stderr: () => string } {
let stderr = '';
@ -61,3 +62,61 @@ describe('emitTelemetryEvent', () => {
await expect(readFile(join(homeDir, '.ktx', 'telemetry.json'), 'utf-8')).rejects.toThrow();
});
});
describe('emitAbortedCommandAndShutdown', () => {
let homeDir: string;
beforeEach(async () => {
homeDir = await mkdtemp(join(tmpdir(), 'ktx-telemetry-abort-'));
vi.stubEnv('HOME', homeDir);
vi.stubEnv('KTX_TELEMETRY_DEBUG', '1');
vi.stubEnv('CI', '');
vi.stubEnv('KTX_TELEMETRY_DISABLED', '');
vi.stubEnv('DO_NOT_TRACK', '');
resetCommandSpan();
});
afterEach(async () => {
resetCommandSpan();
vi.unstubAllEnvs();
await rm(homeDir, { recursive: true, force: true });
});
it('flushes the active command span as aborted (the signal path)', async () => {
const testIo = makeIo();
beginCommandSpan({
commandPath: ['ktx', 'ingest'],
flagsPresent: {},
hasProject: true,
attachProjectGroup: false,
startedAt: performance.now(),
});
await emitAbortedCommandAndShutdown({
packageInfo: { name: '@kaelio/ktx', version: '0.0.0-test' },
io: testIo.io,
});
expect(testIo.stderr()).toContain('"event":"command"');
expect(testIo.stderr()).toContain('"outcome":"aborted"');
expect(testIo.stderr()).toContain('"commandPath":["ktx","ingest"]');
});
it('is idempotent: a second call (or no active span) emits nothing', async () => {
const testIo = makeIo();
beginCommandSpan({
commandPath: ['ktx', 'ingest'],
flagsPresent: {},
hasProject: true,
attachProjectGroup: false,
startedAt: performance.now(),
});
const pkg = { name: '@kaelio/ktx', version: '0.0.0-test' };
await emitAbortedCommandAndShutdown({ packageInfo: pkg, io: testIo.io });
const secondIo = makeIo();
await emitAbortedCommandAndShutdown({ packageInfo: pkg, io: secondIo.io });
expect(secondIo.stderr()).not.toContain('"event":"command"');
});
});