diff --git a/docs-site/content/docs/community/telemetry.mdx b/docs-site/content/docs/community/telemetry.mdx index 9c22b432..81a4f91d 100644 --- a/docs-site/content/docs/community/telemetry.mdx +++ b/docs-site/content/docs/community/telemetry.mdx @@ -3,10 +3,14 @@ title: Telemetry description: Understand what anonymous usage telemetry ktx collects and how to opt out. --- -**ktx** collects anonymous, aggregated usage telemetry from interactive CLI -runs so maintainers can see which commands work, where setup fails, and which -parts of the data-agent workflow need improvement. Telemetry is opt-out and -disabled automatically in CI and non-interactive runs. +**ktx** collects anonymous, aggregated usage telemetry so maintainers can see +which commands work, where setup fails, and which parts of the data-agent +workflow need improvement. Telemetry is opt-out: it turns on the first time you +run **ktx** in an interactive terminal, which prints a one-time notice. From +then on the same install also reports background activity that has no terminal +of its own, such as the local MCP server your agent calls. It stays disabled in +CI, whenever an opt-out is set, and until that first interactive run has shown +the notice. ## Opt out @@ -17,8 +21,7 @@ Use any of these mechanisms to disable telemetry: | `export KTX_TELEMETRY_DISABLED=1` | Disables telemetry for the shell and child processes | | `export DO_NOT_TRACK=1` | Standard do-not-track environment variable | | `CI=1` | Automatic in CI | -| Non-TTY output | Automatic for pipes and scripts | -| Edit `~/.ktx/telemetry.json` and set `"enabled": false` | Persistent for the machine | +| Edit `~/.ktx/telemetry.json` and set `"enabled": false` | Persistent for the machine, including the MCP server | ## What we collect @@ -27,6 +30,11 @@ succeed or fail, and basic environment metadata (CLI version, Node version, OS platform). For project-level analysis, **ktx** sends a salted hash of the project directory — never the raw path. +When an agent reaches **ktx** through MCP, we also record the connecting client +tool's self-reported name and version (for example Claude Desktop, Cursor, or +Cline) so we can see which agents people use **ktx** with. That describes the +tool, never you or your data. + ## What we never collect - File paths, hostnames, environment variable values, or command arguments diff --git a/packages/cli/src/context/mcp/context-tools.ts b/packages/cli/src/context/mcp/context-tools.ts index 057e570b..03cd2ad4 100644 --- a/packages/cli/src/context/mcp/context-tools.ts +++ b/packages/cli/src/context/mcp/context-tools.ts @@ -6,6 +6,7 @@ import type { MemoryAgentInput } from '../../context/memory/types.js'; import { emitTelemetryEvent, mcpTelemetrySampleRate, shouldEmitMcpTelemetry } from '../../telemetry/index.js'; import { scrubErrorClass } from '../../telemetry/scrubber.js'; import type { + KtxMcpClientInfo, KtxMcpContextPorts, KtxMcpProgressCallback, KtxMcpServerLike, @@ -22,6 +23,7 @@ export interface RegisterKtxContextToolsDeps { userContext: KtxMcpUserContext; projectDir?: string; io?: KtxCliIo; + getClientInfo?: () => KtxMcpClientInfo | undefined; } const connectionIdSchema = z.string().min(1); @@ -526,9 +528,24 @@ function registerParsedTool( }); } +/** + * Resolves the connected client's identity into the raw telemetry fields. The + * strings are client-controlled and untrusted, so they only ever land in the + * telemetry property bag — never in paths, logs, or error messages. + */ +function clientTelemetryFields( + getClientInfo: (() => KtxMcpClientInfo | undefined) | undefined, +): { mcpClientName?: string; mcpClientVersion?: string } { + const client = getClientInfo?.(); + return { + ...(client?.name ? { mcpClientName: client.name } : {}), + ...(client?.version ? { mcpClientVersion: client.version } : {}), + }; +} + function instrumentMcpServer( server: KtxMcpServerLike, - telemetry: { projectDir?: string; io?: KtxCliIo }, + telemetry: { projectDir?: string; io?: KtxCliIo; getClientInfo?: () => KtxMcpClientInfo | undefined }, ): KtxMcpServerLike { return { registerTool(name, config, handler) { @@ -548,6 +565,7 @@ function instrumentMcpServer( outcome: isError ? 'error' : 'ok', durationMs: Math.max(0, performance.now() - startedAt), sampleRate: mcpTelemetrySampleRate(), + ...clientTelemetryFields(telemetry.getClientInfo), }, }); } @@ -565,6 +583,7 @@ function instrumentMcpServer( ...(errorClass ? { errorClass } : {}), durationMs: Math.max(0, performance.now() - startedAt), sampleRate: mcpTelemetrySampleRate(), + ...clientTelemetryFields(telemetry.getClientInfo), }, }); } @@ -577,7 +596,11 @@ function instrumentMcpServer( export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void { const { ports, userContext } = deps; - const server = instrumentMcpServer(deps.server, { projectDir: deps.projectDir, io: deps.io }); + const server = instrumentMcpServer(deps.server, { + projectDir: deps.projectDir, + io: deps.io, + getClientInfo: deps.getClientInfo, + }); if (ports.connections) { const connections = ports.connections; diff --git a/packages/cli/src/context/mcp/server.ts b/packages/cli/src/context/mcp/server.ts index 97d79525..85871467 100644 --- a/packages/cli/src/context/mcp/server.ts +++ b/packages/cli/src/context/mcp/server.ts @@ -11,6 +11,7 @@ export function createKtxMcpServer(deps: KtxMcpServerDeps): KtxMcpServerDeps['se userContext: deps.userContext, projectDir: deps.projectDir, io: deps.io, + getClientInfo: deps.getClientInfo, }); } @@ -30,6 +31,9 @@ export function createDefaultKtxMcpServer( contextTools: deps.contextTools, projectDir: deps.projectDir, io: deps.io, + // The SDK populates the client identity after the initialize handshake, so + // read it lazily at emit time rather than at registration (undefined here). + getClientInfo: () => server.server.getClientVersion(), }); return server; } diff --git a/packages/cli/src/context/mcp/types.ts b/packages/cli/src/context/mcp/types.ts index e9fc0ff2..3694e3d6 100644 --- a/packages/cli/src/context/mcp/types.ts +++ b/packages/cli/src/context/mcp/types.ts @@ -50,6 +50,16 @@ export interface KtxMcpUserContext { userId: string; } +/** + * Identity of the connected MCP client tool (e.g. Claude Desktop, Cursor), + * read from the initialize handshake. Untrusted, client-controlled strings — + * use only as telemetry properties, never to build paths or log lines. + */ +export interface KtxMcpClientInfo { + name: string; + version: string; +} + export interface KtxMcpServerLike { registerTool( name: string, @@ -177,4 +187,6 @@ export interface KtxMcpServerDeps { contextTools?: KtxMcpContextPorts; projectDir?: string; io?: KtxCliIo; + /** Reads the connected client's identity once the initialize handshake completes. */ + getClientInfo?: () => KtxMcpClientInfo | undefined; } diff --git a/packages/cli/src/telemetry/events.schema.json b/packages/cli/src/telemetry/events.schema.json index 628c8f4b..acad7988 100644 --- a/packages/cli/src/telemetry/events.schema.json +++ b/packages/cli/src/telemetry/events.schema.json @@ -157,7 +157,9 @@ "outcome", "durationMs", "errorClass", - "sampleRate" + "sampleRate", + "mcpClientName", + "mcpClientVersion" ] }, { @@ -1131,7 +1133,13 @@ }, "sampleRate": { "type": "number", - "const": 0.1 + "const": 1 + }, + "mcpClientName": { + "type": "string" + }, + "mcpClientVersion": { + "type": "string" } }, "required": [ diff --git a/packages/cli/src/telemetry/events.ts b/packages/cli/src/telemetry/events.ts index 5e5b5335..e751cd70 100644 --- a/packages/cli/src/telemetry/events.ts +++ b/packages/cli/src/telemetry/events.ts @@ -156,7 +156,12 @@ const mcpRequestCompletedSchema = telemetryCommonEnvelopeSchema outcome: outcomeSchema, durationMs: z.number().nonnegative(), errorClass: z.string().optional(), - sampleRate: z.literal(0.1), + sampleRate: z.literal(1), + // Raw, client-tool-controlled identity from the MCP initialize handshake + // (clientInfo.name/version). Optional: clients may omit clientInfo. Stored + // verbatim — normalize the free-form names at query time, not at write time. + mcpClientName: z.string().optional(), + mcpClientVersion: z.string().optional(), }) .strict(); @@ -325,7 +330,7 @@ export const telemetryEventCatalog = [ { name: 'mcp_request_completed', description: 'Emitted for sampled MCP tool requests.', - fields: ['toolName', 'outcome', 'durationMs', 'errorClass', 'sampleRate'], + fields: ['toolName', 'outcome', 'durationMs', 'errorClass', 'sampleRate', 'mcpClientName', 'mcpClientVersion'], }, { name: 'daemon_started', diff --git a/packages/cli/src/telemetry/identity.ts b/packages/cli/src/telemetry/identity.ts index 4d46307c..69985f00 100644 --- a/packages/cli/src/telemetry/identity.ts +++ b/packages/cli/src/telemetry/identity.ts @@ -75,17 +75,14 @@ export async function loadTelemetryIdentity(options: LoadTelemetryIdentityOption const env = options.env ?? process.env; const path = telemetryPath(options.homeDir ?? homedir()); - if (envDisablesTelemetry(env) || options.stdoutIsTTY !== true) { - const existing = await readTelemetryFile(path); - return { - installId: existing?.installId, - enabled: false, - createdFile: false, - noticeShown: false, - path, - }; + if (envDisablesTelemetry(env)) { + return { enabled: false, createdFile: false, noticeShown: false, path }; } + // Honor an already-consented identity regardless of the current surface. + // Telemetry enablement follows the persisted decision and opt-out env vars, + // not whether this invocation happens to own a TTY — MCP servers always run + // headless (stdio stubs stdout; the HTTP server runs detached). const existing = await readTelemetryFile(path); if (existing) { return { @@ -97,6 +94,13 @@ export async function loadTelemetryIdentity(options: LoadTelemetryIdentityOption }; } + // No identity yet. Minting one means showing the one-time opt-out notice, so + // first-run creation requires an interactive surface; a headless first run + // stays disabled and defers enablement until the next interactive run. + if (options.stdoutIsTTY !== true) { + return { enabled: false, createdFile: false, noticeShown: false, path }; + } + const timestamp = (options.now ?? (() => new Date()))().toISOString(); const next = { installId: randomUUID(), diff --git a/packages/cli/src/telemetry/index.ts b/packages/cli/src/telemetry/index.ts index 27c5004a..c5b9b729 100644 --- a/packages/cli/src/telemetry/index.ts +++ b/packages/cli/src/telemetry/index.ts @@ -52,7 +52,11 @@ type TelemetryEventFields = Omit< >; const emittedProjectSnapshots = new Set(); -const MCP_SAMPLE_RATE = 0.1 as const; +// MCP tool calls are captured at full rate while ktx is early-stage: at current +// install counts any sampling below 1.0 yields too few events to be useful, and +// the recorded sampleRate lets us dial this down (and reweight history) once +// per-session call volume justifies it. +const MCP_SAMPLE_RATE = 1 as const; let mcpSampled: boolean | undefined; function telemetryDebugEnabled(): boolean { @@ -64,7 +68,7 @@ export function shouldEmitMcpTelemetry(): boolean { return mcpSampled; } -export function mcpTelemetrySampleRate(): 0.1 { +export function mcpTelemetrySampleRate(): 1 { return MCP_SAMPLE_RATE; } diff --git a/packages/cli/test/context/mcp/server.test.ts b/packages/cli/test/context/mcp/server.test.ts index 38bc4af9..95985d68 100644 --- a/packages/cli/test/context/mcp/server.test.ts +++ b/packages/cli/test/context/mcp/server.test.ts @@ -47,10 +47,10 @@ function makeFakeServer() { }; } -function makeIo() { +function makeIo(stdoutIsTTY = true) { let stderr = ''; return { - stdout: { isTTY: true, write() {} }, + stdout: { isTTY: stdoutIsTTY, write() {} }, stderr: { write(chunk: string) { stderr += chunk; @@ -272,8 +272,48 @@ describe('createKtxMcpServer', () => { expect(io.stderrText()).toContain('"event":"mcp_request_completed"'); expect(io.stderrText()).toContain('"toolName":"wiki_search"'); - expect(io.stderrText()).toContain('"sampleRate":0.1'); + expect(io.stderrText()).toContain('"sampleRate":1'); expect(io.stderrText()).not.toContain(projectDir); + // No client connected through the SDK here, so getClientInfo is absent: the + // event still emits and the optional client fields are simply omitted. + expect(io.stderrText()).not.toContain('mcpClientName'); + expect(io.stderrText()).not.toContain('mcpClientVersion'); + }); + + it('captures the connecting MCP client name and version', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + // Non-TTY io keeps the test hermetic (no ~/.ktx/telemetry.json is created) + // and mirrors a real headless MCP server; debug mode still emits the payload. + const io = makeIo(false); + + const server = createDefaultKtxMcpServer({ + name: 'ktx-test', + version: '0.0.0-test', + userContext: { userId: 'mcp-user' }, + projectDir: '/tmp/ktx-mcp-client-telemetry', + io, + contextTools: { + knowledge: { + search: vi.fn().mockResolvedValue({ results: [], totalFound: 0 }), + read: vi.fn().mockResolvedValue(null), + }, + }, + }); + const client = new Client({ name: 'test-agent', version: '9.9.9' }); + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair(); + await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]); + + try { + await client.callTool({ name: 'wiki_search', arguments: { query: 'revenue recognition', limit: 5 } }); + } finally { + await client.close(); + await server.close(); + } + + expect(io.stderrText()).toContain('"event":"mcp_request_completed"'); + expect(io.stderrText()).toContain('"mcpClientName":"test-agent"'); + expect(io.stderrText()).toContain('"mcpClientVersion":"9.9.9"'); }); it('registers parser-gated sql_execution when the host provides a SQL execution port', async () => { diff --git a/packages/cli/test/telemetry/events.snapshot.test.ts b/packages/cli/test/telemetry/events.snapshot.test.ts index 7a1b76f7..1ea67339 100644 --- a/packages/cli/test/telemetry/events.snapshot.test.ts +++ b/packages/cli/test/telemetry/events.snapshot.test.ts @@ -128,7 +128,9 @@ describe('telemetry privacy snapshot', () => { outcome: 'error', errorClass: 'KtxProjectMissingAbortError', durationMs: 12, - sampleRate: 0.1, + sampleRate: 1, + mcpClientName: 'Claude Desktop', + mcpClientVersion: '0.7.1', }), ]; diff --git a/packages/cli/test/telemetry/identity.test.ts b/packages/cli/test/telemetry/identity.test.ts index 31c3bfb5..e5b6bddf 100644 --- a/packages/cli/test/telemetry/identity.test.ts +++ b/packages/cli/test/telemetry/identity.test.ts @@ -146,6 +146,75 @@ describe('telemetry identity', () => { }); }); + it('enables a consented identity without a TTY (MCP servers run headless)', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + join(homeDir, '.ktx', 'telemetry.json'), + JSON.stringify( + { + installId: '00000000-0000-4000-8000-000000000000', + enabled: true, + noticeShownAt: '2026-05-22T14:33:02.000Z', + noticeShownVersion: 1, + createdAt: '2026-05-22T14:33:02.000Z', + }, + null, + 2, + ) + '\n', + 'utf-8', + ); + const testIo = makeIo(false); + + await expect( + loadTelemetryIdentity({ + homeDir, + env, + stdoutIsTTY: false, + stderr: testIo.io.stderr, + now: () => new Date('2026-05-22T15:00:00.000Z'), + }), + ).resolves.toMatchObject({ + installId: '00000000-0000-4000-8000-000000000000', + enabled: true, + createdFile: false, + noticeShown: false, + }); + // The one-time notice belongs to interactive surfaces only; a headless load + // must never write it (the MCP stdio protocol shares the process streams). + expect(testIo.stderr()).toBe(''); + }); + + it('keeps opt-outs suppressing a consented identity without a TTY', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + join(homeDir, '.ktx', 'telemetry.json'), + JSON.stringify( + { + installId: '00000000-0000-4000-8000-000000000000', + enabled: true, + noticeShownAt: '2026-05-22T14:33:02.000Z', + noticeShownVersion: 1, + createdAt: '2026-05-22T14:33:02.000Z', + }, + null, + 2, + ) + '\n', + 'utf-8', + ); + + for (const optOut of [{ KTX_TELEMETRY_DISABLED: '1' }, { DO_NOT_TRACK: '1' }, { CI: '1' }]) { + await expect( + loadTelemetryIdentity({ + homeDir, + env: optOut, + stdoutIsTTY: false, + stderr: makeIo(false).io.stderr, + now: () => new Date('2026-05-22T15:00:00.000Z'), + }), + ).resolves.toMatchObject({ enabled: false }); + } + }); + it('recreates a corrupted file instead of surfacing an error to users', async () => { await mkdir(join(homeDir, '.ktx'), { recursive: true }); await writeFile(join(homeDir, '.ktx', 'telemetry.json'), '{bad json', 'utf-8'); diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json index 628c8f4b..acad7988 100644 --- a/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json @@ -157,7 +157,9 @@ "outcome", "durationMs", "errorClass", - "sampleRate" + "sampleRate", + "mcpClientName", + "mcpClientVersion" ] }, { @@ -1131,7 +1133,13 @@ }, "sampleRate": { "type": "number", - "const": 0.1 + "const": 1 + }, + "mcpClientName": { + "type": "string" + }, + "mcpClientVersion": { + "type": "string" } }, "required": [