diff --git a/AGENTS.md b/AGENTS.md index 0f62b902..a8640c48 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -271,6 +271,25 @@ use `PascalCase` without the suffix. - Regex may be used for non-structural sanitization, but not to interpret SQL structure. +## Telemetry + +**ktx** ships anonymous PostHog telemetry. When adding commands or events: + +- **MUST NOT**: Add fields that carry user data — file paths, hostnames, + environment values, SQL text, schema/table/column names, error messages, + argv, or secrets. Schemas use Zod `.strict()`, so unknown fields throw at + runtime; the privacy rule is enforced by the schema, not by goodwill. +- **MUST**: Add new event types in `packages/cli/src/telemetry/events.ts`. + `pnpm run build` mirrors the catalog into the Python daemon schema; a + pytest checks Node ↔ Python parity. +- **SHOULD**: Let Commander's `preAction` hook auto-emit the `command` event + for any new CLI command — do not call `trackTelemetryEvent` manually for + command-level success/failure. +- **MUST**: Update the public overview at + `docs-site/content/docs/community/telemetry.mdx` only when the *category* + of collected data changes. Adding another event with no new field types + needs no docs change. + ## Documentation and Specs - Keep public documentation in `README.md`, package READMEs, example READMEs, diff --git a/README.md b/README.md index b6f0494e..285f92a6 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,13 @@ ktx context built: yes Agent integration ready: yes (codex:project) ``` +## Telemetry + +**ktx** collects anonymous usage telemetry from interactive CLI runs to improve +setup, command reliability, and data-agent workflows. See +[Telemetry](https://docs.kaelio.com/ktx/docs/community/telemetry) for the event +catalog, privacy details, and opt-out options. + ## Common Commands | Command | Purpose | diff --git a/docs-site/content/docs/community/meta.json b/docs-site/content/docs/community/meta.json index e181be6c..199bc1b8 100644 --- a/docs-site/content/docs/community/meta.json +++ b/docs-site/content/docs/community/meta.json @@ -1,5 +1,5 @@ { "title": "Community", "defaultOpen": true, - "pages": ["support", "contributing"] + "pages": ["support", "contributing", "telemetry"] } diff --git a/docs-site/content/docs/community/telemetry.mdx b/docs-site/content/docs/community/telemetry.mdx new file mode 100644 index 00000000..9c22b432 --- /dev/null +++ b/docs-site/content/docs/community/telemetry.mdx @@ -0,0 +1,42 @@ +--- +title: Telemetry +description: Understand what anonymous usage telemetry ktx collects and how to opt out. +--- + +**ktx** collects anonymous, aggregated usage telemetry from interactive CLI +runs so maintainers can see which commands work, where setup fails, and which +parts of the data-agent workflow need improvement. Telemetry is opt-out and +disabled automatically in CI and non-interactive runs. + +## Opt out + +Use any of these mechanisms to disable telemetry: + +| Mechanism | Effect | +|-----------|--------| +| `export KTX_TELEMETRY_DISABLED=1` | Disables telemetry for the shell and child processes | +| `export DO_NOT_TRACK=1` | Standard do-not-track environment variable | +| `CI=1` | Automatic in CI | +| Non-TTY output | Automatic for pipes and scripts | +| Edit `~/.ktx/telemetry.json` and set `"enabled": false` | Persistent for the machine | + +## What we collect + +High-level signals only: which commands run, how long they take, whether they +succeed or fail, and basic environment metadata (CLI version, Node version, OS +platform). For project-level analysis, **ktx** sends a salted hash of the +project directory — never the raw path. + +## What we never collect + +- File paths, hostnames, environment variable values, or command arguments +- `ktx.yaml` contents, connection passwords, API keys, or tokens +- Schema names, table names, column names, SQL text, or query results +- Error messages or stack traces +- Git remote URLs, Git user email, OS user, or hostname + +## Storage and retention + +Telemetry is sent to PostHog, a third-party product-analytics service used by +the **ktx** maintainers. Raw event data is retained for 90 days. Aggregated +counts may be retained indefinitely. diff --git a/knip.json b/knip.json index 08939c28..178ff87e 100644 --- a/knip.json +++ b/knip.json @@ -11,6 +11,8 @@ "packages/cli": { "entry": [ "src/print-command-tree.ts!", + "src/telemetry/schema-writer.ts!", + "src/telemetry/index.ts!", "scripts/**/*.mjs", "src/**/*.test-utils.ts", "src/**/acceptance-fixtures.ts", diff --git a/packages/cli/package.json b/packages/cli/package.json index 296276c0..8d809e9d 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -29,7 +29,7 @@ }, "scripts": { "assets:demo": "node scripts/build-demo-assets.mjs", - "build": "tsc -p tsconfig.json && node scripts/copy-runtime-assets.mjs && node ../../scripts/prepare-cli-bin.mjs", + "build": "tsc -p tsconfig.json && node dist/telemetry/schema-writer.js src/telemetry/events.schema.json ../../python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json && node scripts/copy-runtime-assets.mjs && node ../../scripts/prepare-cli-bin.mjs", "clean": "node -e \"fs.rmSync('dist', { recursive: true, force: true })\"", "docs:commands": "pnpm run build && node dist/print-command-tree.js", "smoke": "vitest run src/standalone-smoke.test.ts src/example-smoke.test.ts --testTimeout 30000", @@ -69,6 +69,7 @@ "openai": "^6.37.0", "p-limit": "^7.3.0", "pg": "^8.20.0", + "posthog-node": "^5.0.0", "react": "^19.2.6", "simple-git": "3.36.0", "snowflake-sdk": "^2.4.1", diff --git a/packages/cli/src/cli-program-telemetry.test.ts b/packages/cli/src/cli-program-telemetry.test.ts new file mode 100644 index 00000000..db905442 --- /dev/null +++ b/packages/cli/src/cli-program-telemetry.test.ts @@ -0,0 +1,133 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { runCommanderKtxCli } from './cli-program.js'; +import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js'; + +function makeIo(stdoutIsTTY = true): { io: KtxCliIo; stdout: () => string; stderr: () => string } { + let stdout = ''; + let stderr = ''; + return { + io: { + stdout: { + isTTY: stdoutIsTTY, + write: (chunk) => { + stdout += chunk; + }, + }, + stderr: { + write: (chunk) => { + stderr += chunk; + }, + }, + }, + stdout: () => stdout, + stderr: () => stderr, + }; +} + +const info: KtxCliPackageInfo = { name: '@kaelio/ktx', version: '0.4.1' }; + +describe('runCommanderKtxCli telemetry', () => { + let tempDir: string; + const originalEnv = process.env; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-cli-telemetry-')); + await writeFile(join(tempDir, 'ktx.yaml'), '{}\n', 'utf-8'); + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('HOME', tempDir); + vi.stubEnv('CI', ''); + vi.stubEnv('KTX_TELEMETRY_DISABLED', ''); + vi.stubEnv('DO_NOT_TRACK', ''); + }); + + afterEach(async () => { + vi.unstubAllEnvs(); + process.env = originalEnv; + await rm(tempDir, { recursive: true, force: true }); + }); + + it('emits debug command telemetry for registered actions', async () => { + const io = makeIo(true); + await expect( + runCommanderKtxCli( + ['--project-dir', tempDir, 'status', '--help'], + io.io, + {}, + info, + { runInit: async () => 0 }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).not.toContain('[telemetry]'); + + const statusIo = makeIo(true); + const deps: KtxCliDeps = { doctor: async () => 0 }; + + await expect( + runCommanderKtxCli( + ['--project-dir', tempDir, 'status', '--json'], + statusIo.io, + deps, + info, + { runInit: async () => 0 }, + ), + ).resolves.toBe(0); + + expect(statusIo.stderr()).toContain('[telemetry]'); + expect(statusIo.stderr()).toContain('"event":"install_first_run"'); + expect(statusIo.stderr()).toContain('"event":"command"'); + expect(statusIo.stderr()).toContain('"commandPath":["ktx","status"]'); + expect(statusIo.stderr()).toContain('"event":"project_stack_snapshot"'); + expect(statusIo.stderr()).toContain('"connectionCount"'); + expect(statusIo.stderr()).not.toContain(tempDir); + + const noticeIndex = statusIo.stderr().indexOf('ktx collects anonymous usage data'); + const firstTelemetryIndex = statusIo.stderr().indexOf('[telemetry]'); + expect(noticeIndex).toBeGreaterThanOrEqual(0); + expect(firstTelemetryIndex).toBeGreaterThan(noticeIndex); + }); + + it('emits aborted telemetry when project validation aborts after preAction starts', async () => { + const missingProjectDir = join(tempDir, 'missing'); + await mkdir(missingProjectDir, { recursive: true }); + const io = makeIo(true); + + await expect( + runCommanderKtxCli( + ['--project-dir', missingProjectDir, 'connection'], + io.io, + {}, + info, + { runInit: async () => 0 }, + ), + ).resolves.toBe(1); + + expect(io.stderr()).toContain('[telemetry]'); + expect(io.stderr()).toContain('"outcome":"aborted"'); + expect(io.stderr()).toContain('"hasProject":false'); + expect(io.stderr()).toContain('"projectGroupAttached":false'); + expect(io.stderr()).not.toContain(missingProjectDir); + }); + + it('does not import or emit telemetry for help, version, bare non-TTY, or unknown top-level command', async () => { + const helpIo = makeIo(true); + await expect(runCommanderKtxCli(['--help'], helpIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(0); + expect(helpIo.stderr()).not.toContain('[telemetry]'); + + const versionIo = makeIo(true); + await expect(runCommanderKtxCli(['--version'], versionIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(0); + expect(versionIo.stderr()).not.toContain('[telemetry]'); + + const bareIo = makeIo(false); + await expect(runCommanderKtxCli([], bareIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(0); + expect(bareIo.stderr()).not.toContain('[telemetry]'); + + const unknownIo = makeIo(true); + await expect(runCommanderKtxCli(['unknown'], unknownIo.io, {}, info, { runInit: async () => 0 })).resolves.toBe(1); + expect(unknownIo.stderr()).not.toContain('[telemetry]'); + }); +}); diff --git a/packages/cli/src/cli-program.test.ts b/packages/cli/src/cli-program.test.ts index 565a5d56..009dfb8a 100644 --- a/packages/cli/src/cli-program.test.ts +++ b/packages/cli/src/cli-program.test.ts @@ -1,6 +1,6 @@ -import type { Command } from '@commander-js/extra-typings'; +import { Command, type CommandUnknownOpts } from '@commander-js/extra-typings'; import { describe, expect, it } from 'vitest'; -import { buildKtxProgram } from './cli-program.js'; +import { buildKtxProgram, collectCommandFlagsPresent } from './cli-program.js'; import type { KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js'; function stubIo(): KtxCliIo { @@ -55,3 +55,31 @@ describe('buildKtxProgram', () => { expect(wrote).toBe(''); }); }); + +describe('collectCommandFlagsPresent', () => { + it('records only CLI-sourced flags and ignores positional content that looks like a flag', async () => { + let captured: Record | undefined; + const program = new Command() + .name('ktx') + .option('--project-dir ', 'project directory') + .option('--json', 'json output', false); + program + .command('sql') + .argument('') + .requiredOption('-c, --connection ', 'connection id') + .option('--max-rows ', 'cap rows') + .action(function () { + captured = collectCommandFlagsPresent(this as unknown as CommandUnknownOpts); + }); + + await program.parseAsync( + ['--project-dir', '/tmp/p', 'sql', '-c', 'warehouse', '--', '--customer_table', 'SELECT', '1'], + { from: 'user' }, + ); + + expect(captured).toEqual({ projectDir: true, connection: true }); + expect(captured).not.toHaveProperty('customer_table'); + expect(captured).not.toHaveProperty('json'); + expect(captured).not.toHaveProperty('maxRows'); + }); +}); diff --git a/packages/cli/src/cli-program.ts b/packages/cli/src/cli-program.ts index 84f740f5..a3c27375 100644 --- a/packages/cli/src/cli-program.ts +++ b/packages/cli/src/cli-program.ts @@ -1,6 +1,6 @@ import { existsSync } from 'node:fs'; import { join } from 'node:path'; -import { Command, InvalidArgumentError } from '@commander-js/extra-typings'; +import { Command, type CommandUnknownOpts, InvalidArgumentError } from '@commander-js/extra-typings'; import type { KtxCliDeps, KtxCliIo, KtxCliPackageInfo } from './cli-runtime.js'; import { registerConnectionCommands } from './commands/connection-commands.js'; import { registerIngestCommands } from './commands/ingest-commands.js'; @@ -14,6 +14,7 @@ import { registerAdminCommands } from './admin.js'; import { renderMissingProjectMessage } from './doctor.js'; import { findNearestKtxProjectDir, resolveKtxProjectDir } from './project-resolver.js'; import { profileMark, profileSpan } from './startup-profile.js'; +import type { CommandOutcome } from './telemetry/index.js'; profileMark('module:cli-program'); @@ -43,6 +44,8 @@ export interface BuildKtxProgramOptions { packageInfo: KtxCliPackageInfo; runInit: (args: { projectDir: string; force: boolean }, io: KtxCliIo) => Promise; setExitCode?: (code: number) => void; + argv?: string[]; + setTelemetryModule?: (telemetry: typeof import('./telemetry/index.js')) => void; } type CommanderExitLike = { exitCode: number; code: string; message: string }; @@ -327,6 +330,25 @@ function formatCliError(error: unknown): string { return error instanceof Error ? error.message : String(error); } +function commandOutcomeForParseResult(error: unknown, exitCode: number): CommandOutcome { + if (error) { + return isKtxProjectMissingAbortError(error) ? 'aborted' : 'error'; + } + return exitCode === 0 ? 'ok' : 'error'; +} + +function shouldAttachCommandProjectGroup(path: string[], hasProject: boolean): boolean { + if (hasProject) { + return true; + } + const rootCommand = path[1]; + const pathKey = path.join(' '); + return ( + (rootCommand !== undefined && COMMANDS_THAT_CREATE_PROJECT.has(rootCommand)) || + COMMANDS_THAT_CREATE_PROJECT.has(pathKey) + ); +} + function firstTopLevelCommandToken(argv: string[]): string | null { for (let index = 0; index < argv.length; index += 1) { const arg = argv[index]; @@ -390,11 +412,43 @@ async function runBareInteractiveCommand( return 0; } +/** @internal */ +export function collectCommandFlagsPresent(command: CommandUnknownOpts): Record { + const flags: Record = {}; + let current: CommandUnknownOpts | null = command; + while (current) { + for (const option of current.options) { + const key = option.attributeName(); + if (current.getOptionValueSource(key) === 'cli') { + flags[key] = true; + } + } + current = current.parent; + } + return flags; +} + export function buildKtxProgram(options: BuildKtxProgramOptions): Command { const program = createBaseProgram(options.packageInfo, options.io); - program.hook('preAction', (_thisCommand, actionCommand) => { - writeProjectDir(options.io, actionCommand as CommandPathNode); - ensureProjectAvailable(options.io, actionCommand as CommandPathNode); + program.hook('preAction', async (_thisCommand, actionCommand) => { + const telemetry = await import('./telemetry/index.js'); + options.setTelemetryModule?.(telemetry); + await telemetry.showTelemetryNoticeIfNeeded(options.io, options.packageInfo); + const commandNode = actionCommand as CommandPathNode; + const path = commandPath(commandNode); + const projectDir = resolveCommandProjectDir(commandNode); + const hasProject = ktxYamlExists(projectDir); + const attachProjectGroup = shouldAttachCommandProjectGroup(path, hasProject); + telemetry.beginCommandSpan({ + commandPath: path, + flagsPresent: collectCommandFlagsPresent(commandNode as unknown as CommandUnknownOpts), + projectDir: attachProjectGroup ? projectDir : undefined, + hasProject, + attachProjectGroup, + startedAt: performance.now(), + }); + writeProjectDir(options.io, commandNode); + ensureProjectAvailable(options.io, commandNode); }); const context: KtxCliCommandContext = { @@ -435,14 +489,19 @@ export async function runCommanderKtxCli( ): Promise { profileMark('commander:entry'); let exitCode = 0; + let telemetryModule: typeof import('./telemetry/index.js') | undefined; const program = buildKtxProgram({ io, deps, packageInfo: info, runInit: options.runInit, + argv, setExitCode: (code: number) => { exitCode = code; }, + setTelemetryModule: (telemetry) => { + telemetryModule = telemetry; + }, }); profileMark('commander:program-built'); const context: KtxCliCommandContext = { @@ -477,17 +536,29 @@ export async function runCommanderKtxCli( return 1; } + let parseError: unknown; try { await profileSpan('commander:parseAsync', () => program.parseAsync(argv, { from: 'user' })); } catch (error) { + parseError = error; if (isKtxProjectMissingAbortError(error)) { - return 1; + exitCode = 1; + } else if (isCommanderExit(error)) { + exitCode = error.exitCode === 0 ? 0 : 1; + } else { + io.stderr.write(`${formatCliError(error)}\n`); + exitCode = 1; } - if (isCommanderExit(error)) { - return error.exitCode === 0 ? 0 : 1; + } finally { + if (telemetryModule) { + const completed = telemetryModule.completeCommandSpan({ + completedAt: performance.now(), + outcome: commandOutcomeForParseResult(parseError, exitCode), + error: parseError, + }); + await telemetryModule.emitCompletedCommand({ completed, packageInfo: info, io }); + await telemetryModule.shutdownTelemetryEmitter(); } - io.stderr.write(`${formatCliError(error)}\n`); - return 1; } return exitCode; diff --git a/packages/cli/src/commands/status-commands.ts b/packages/cli/src/commands/status-commands.ts index e2adf8f1..ec429576 100644 --- a/packages/cli/src/commands/status-commands.ts +++ b/packages/cli/src/commands/status-commands.ts @@ -2,6 +2,7 @@ import type { Command } from '@commander-js/extra-typings'; import type { KtxCliCommandContext } from '../cli-program.js'; import { resolveCommandProjectDir, resolveCommandProjectDirOverride } from '../cli-program.js'; import { findNearestKtxProjectDir } from '../project-resolver.js'; +import { emitProjectStackSnapshot } from '../telemetry/index.js'; function outputMode(options: { json?: boolean }): 'plain' | 'json' { return options.json === true ? 'json' : 'plain'; @@ -58,11 +59,12 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC ); return; } + const projectDir = resolveCommandProjectDir(command); context.setExitCode( await runner( { command: 'project', - projectDir: resolveCommandProjectDir(command), + projectDir, outputMode: outputMode(options), verbose: options.verbose === true, fast: options.fast === true, @@ -71,6 +73,11 @@ export function registerStatusCommands(program: Command, context: KtxCliCommandC context.io, ), ); + await emitProjectStackSnapshot({ + projectDir, + io: context.io, + packageInfo: context.packageInfo, + }); }, ); } diff --git a/packages/cli/src/connection.test.ts b/packages/cli/src/connection.test.ts index b05a6f16..7cfc5b93 100644 --- a/packages/cli/src/connection.test.ts +++ b/packages/cli/src/connection.test.ts @@ -20,6 +20,7 @@ function makeIo() { return { io: { stdout: { + isTTY: true, write: (chunk: string) => { stdout += chunk; }, @@ -72,6 +73,7 @@ describe('runKtxConnection', () => { }); afterEach(async () => { + vi.unstubAllEnvs(); await rm(tempDir, { recursive: true, force: true }); }); @@ -137,6 +139,27 @@ describe('runKtxConnection', () => { expect(io.stdout()).toContain('Status: ok'); }); + it('emits debug telemetry for connection tests without project paths', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir }); + await writeConnections(projectDir, { + warehouse: { driver: 'postgres', url: 'env:DATABASE_URL' }, + }); + const { connector } = nativeConnector('postgres'); + const io = makeIo(); + + const code = await runKtxConnection({ command: 'test', projectDir, connectionId: 'warehouse' }, io.io, { + createScanConnector: vi.fn(async () => connector), + }); + + expect(code).toBe(0); + expect(io.stderr()).toContain('"event":"connection_test"'); + expect(io.stderr()).toContain('"driver":"postgres"'); + expect(io.stderr()).not.toContain(projectDir); + }); + it('reports the connector error and still cleans up when native testConnection fails', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir }); diff --git a/packages/cli/src/connection.ts b/packages/cli/src/connection.ts index 174fe5ad..bb99d4fd 100644 --- a/packages/cli/src/connection.ts +++ b/packages/cli/src/connection.ts @@ -14,6 +14,9 @@ import type { KtxCliIo } from './index.js'; import { bold, dim, green, red, SYMBOLS } from './io/symbols.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; import { profileMark } from './startup-profile.js'; +import { isDemoConnection } from './telemetry/demo-detect.js'; +import { emitTelemetryEvent } from './telemetry/index.js'; +import { scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:connection'); @@ -300,6 +303,30 @@ interface ConnectionTestRow { detail: string; } +async function emitConnectionTest(input: { + project: KtxLocalProject; + connectionId: string; + driver: string; + outcome: 'ok' | 'error'; + durationMs: number; + error?: unknown; + io: KtxCliIo; +}): Promise { + const errorClass = input.error ? scrubErrorClass(input.error) : undefined; + await emitTelemetryEvent({ + name: 'connection_test', + projectDir: input.project.projectDir, + io: input.io, + fields: { + driver: input.driver, + isDemoConnection: isDemoConnection(input.connectionId, input.project.config.connections[input.connectionId]), + outcome: input.outcome, + durationMs: input.durationMs, + ...(errorClass ? { errorClass } : {}), + }, + }); +} + function visualWidth(text: string): number { // styleText wraps content in ANSI escape sequences; strip them before measuring. return text.replace(/\[[0-9;]*m/g, '').length; @@ -352,8 +379,17 @@ async function runTestAll( const rows = await Promise.all( entries.map(async ([connectionId, connection]): Promise => { const declaredDriver = String(connection.driver ?? '').trim().toLowerCase() || 'unknown'; + const startedAt = performance.now(); try { const outcome = await testConnectionByDriver(project, connectionId, deps); + await emitConnectionTest({ + project, + connectionId, + driver: outcome.driver || declaredDriver, + outcome: 'ok', + durationMs: Math.max(0, performance.now() - startedAt), + io, + }); return { connectionId, driver: outcome.driver || declaredDriver, @@ -361,6 +397,15 @@ async function runTestAll( detail: `${outcome.detailKey}: ${outcome.detailValue}`, }; } catch (error) { + await emitConnectionTest({ + project, + connectionId, + driver: declaredDriver, + outcome: 'error', + durationMs: Math.max(0, performance.now() - startedAt), + error, + io, + }); return { connectionId, driver: declaredDriver, @@ -403,7 +448,35 @@ export async function runKtxConnection( return await runTestAll(project, io, deps); } - const { driver, detailKey, detailValue } = await testConnectionByDriver(project, args.connectionId, deps); + const startedAt = performance.now(); + let driver = normalizedConnectionDriver(project, args.connectionId) || 'unknown'; + let detailKey: string; + let detailValue: string; + try { + const outcome = await testConnectionByDriver(project, args.connectionId, deps); + driver = outcome.driver; + detailKey = outcome.detailKey; + detailValue = outcome.detailValue; + await emitConnectionTest({ + project, + connectionId: args.connectionId, + driver, + outcome: 'ok', + durationMs: Math.max(0, performance.now() - startedAt), + io, + }); + } catch (error) { + await emitConnectionTest({ + project, + connectionId: args.connectionId, + driver, + outcome: 'error', + durationMs: Math.max(0, performance.now() - startedAt), + error, + io, + }); + throw error; + } io.stdout.write(`Connection test passed: ${args.connectionId}\n`); io.stdout.write(`Driver: ${driver}\n`); io.stdout.write(`${detailKey}: ${detailValue}\n`); diff --git a/packages/cli/src/context/daemon/semantic-layer-compute.test.ts b/packages/cli/src/context/daemon/semantic-layer-compute.test.ts index 846f9355..dac37ef4 100644 --- a/packages/cli/src/context/daemon/semantic-layer-compute.test.ts +++ b/packages/cli/src/context/daemon/semantic-layer-compute.test.ts @@ -106,7 +106,10 @@ describe('createPythonSemanticLayerComputePort', () => { columns: [{ name: 'orders.order_count' }], plan: { sources_used: ['orders'] }, })); - const port = createPythonSemanticLayerComputePort({ runJson }); + const port = createPythonSemanticLayerComputePort({ + runJson, + projectId: 'hashed-project-id', + }); await expect( port.query({ @@ -125,6 +128,7 @@ describe('createPythonSemanticLayerComputePort', () => { sources: [source], dialect: 'postgres', query: { measures: ['orders.order_count'], dimensions: [] }, + projectId: 'hashed-project-id', }); }); diff --git a/packages/cli/src/context/daemon/semantic-layer-compute.ts b/packages/cli/src/context/daemon/semantic-layer-compute.ts index f416b169..c590c3fa 100644 --- a/packages/cli/src/context/daemon/semantic-layer-compute.ts +++ b/packages/cli/src/context/daemon/semantic-layer-compute.ts @@ -90,6 +90,7 @@ export interface PythonSemanticLayerComputeOptions { cwd?: string; env?: NodeJS.ProcessEnv; runJson?: KtxDaemonJsonRunner; + projectId?: string; } /** @internal */ @@ -238,6 +239,7 @@ export function createPythonSemanticLayerComputePort( const command = options.command ?? 'python'; const args = options.args ?? ['-m', 'ktx_daemon']; const runJson = options.runJson ?? runProcessJson({ command, args, cwd: options.cwd, env: options.env }); + const projectId = options.projectId; return { async query(input) { @@ -245,6 +247,7 @@ export function createPythonSemanticLayerComputePort( sources: input.sources, dialect: input.dialect, query: input.query, + ...(projectId ? { projectId } : {}), }); return { sql: typeof raw.sql === 'string' ? raw.sql : '', diff --git a/packages/cli/src/context/mcp/context-tools.ts b/packages/cli/src/context/mcp/context-tools.ts index 6778bb64..963ab44f 100644 --- a/packages/cli/src/context/mcp/context-tools.ts +++ b/packages/cli/src/context/mcp/context-tools.ts @@ -1,7 +1,10 @@ import { randomUUID } from 'node:crypto'; import type { ToolAnnotations } from '@modelcontextprotocol/sdk/types.js'; import { z } from 'zod'; +import type { KtxCliIo } from '../../cli-runtime.js'; import type { MemoryAgentInput } from '../../context/memory/types.js'; +import { emitTelemetryEvent, mcpTelemetrySampleRate, shouldEmitMcpTelemetry } from '../../telemetry/index.js'; +import { scrubErrorClass } from '../../telemetry/scrubber.js'; import type { KtxMcpContextPorts, KtxMcpProgressCallback, @@ -16,6 +19,8 @@ export interface RegisterKtxContextToolsDeps { server: KtxMcpServerLike; ports: KtxMcpContextPorts; userContext: KtxMcpUserContext; + projectDir?: string; + io?: KtxCliIo; } const connectionIdSchema = z.string().min(1); @@ -509,8 +514,58 @@ function registerParsedTool( }); } +function instrumentMcpServer( + server: KtxMcpServerLike, + telemetry: { projectDir?: string; io?: KtxCliIo }, +): KtxMcpServerLike { + return { + registerTool(name, config, handler) { + server.registerTool(name, config, async (input, context) => { + const startedAt = performance.now(); + try { + const result = await handler(input, context); + if (telemetry.io && telemetry.projectDir && shouldEmitMcpTelemetry()) { + const isError = + typeof result === 'object' && result !== null && 'isError' in result && result.isError === true; + await emitTelemetryEvent({ + name: 'mcp_request_completed', + projectDir: telemetry.projectDir, + io: telemetry.io, + fields: { + toolName: name, + outcome: isError ? 'error' : 'ok', + durationMs: Math.max(0, performance.now() - startedAt), + sampleRate: mcpTelemetrySampleRate(), + }, + }); + } + return result; + } catch (error) { + if (telemetry.io && telemetry.projectDir && shouldEmitMcpTelemetry()) { + const errorClass = scrubErrorClass(error); + await emitTelemetryEvent({ + name: 'mcp_request_completed', + projectDir: telemetry.projectDir, + io: telemetry.io, + fields: { + toolName: name, + outcome: 'error', + ...(errorClass ? { errorClass } : {}), + durationMs: Math.max(0, performance.now() - startedAt), + sampleRate: mcpTelemetrySampleRate(), + }, + }); + } + throw error; + } + }); + }, + }; +} + export function registerKtxContextTools(deps: RegisterKtxContextToolsDeps): void { - const { ports, server, userContext } = deps; + const { ports, userContext } = deps; + const server = instrumentMcpServer(deps.server, { projectDir: deps.projectDir, io: deps.io }); if (ports.connections) { const connections = ports.connections; diff --git a/packages/cli/src/context/mcp/server.test.ts b/packages/cli/src/context/mcp/server.test.ts index 3532a327..bee00c00 100644 --- a/packages/cli/src/context/mcp/server.test.ts +++ b/packages/cli/src/context/mcp/server.test.ts @@ -3,7 +3,7 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { Client } from '@modelcontextprotocol/sdk/client/index.js'; import { InMemoryTransport } from '@modelcontextprotocol/sdk/inMemory.js'; -import { describe, expect, it, vi } from 'vitest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; import { createLocalProjectMemoryIngest } from '../../context/memory/local-memory.js'; import { detectCaptureSignals } from '../../context/memory/capture-signals.js'; import type { MemoryAgentInput } from '../../context/memory/types.js'; @@ -47,6 +47,19 @@ function makeFakeServer() { }; } +function makeIo() { + let stderr = ''; + return { + stdout: { isTTY: true, write() {} }, + stderr: { + write(chunk: string) { + stderr += chunk; + }, + }, + stderrText: () => stderr, + }; +} + function getTool(tools: RegisteredTool[], name: string): RegisteredTool { const found = tools.find((tool) => tool.name === name); if (!found) { @@ -153,6 +166,11 @@ async function listToolsThroughSdk(contextTools: KtxMcpContextPorts) { } describe('createKtxMcpServer', () => { + afterEach(() => { + vi.unstubAllEnvs(); + vi.restoreAllMocks(); + }); + it('registers annotations and output schemas for every retained tool', async () => { const fake = makeFakeServer(); createKtxMcpServer({ @@ -227,6 +245,37 @@ describe('createKtxMcpServer', () => { }); }); + it('emits sampled debug telemetry for MCP tool requests', async () => { + vi.spyOn(Math, 'random').mockReturnValue(0); + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + const fake = makeFakeServer(); + const io = makeIo(); + const projectDir = '/tmp/ktx-mcp-telemetry'; + + createKtxMcpServer({ + server: fake.server, + userContext: { userId: 'local-user' }, + projectDir, + io, + contextTools: { + knowledge: { + search: vi.fn().mockResolvedValue({ results: [], totalFound: 0 }), + read: vi.fn().mockResolvedValue(null), + }, + }, + }); + + await expect(getTool(fake.tools, 'wiki_search').handler({ query: 'revenue recognition', limit: 5 })).resolves.toMatchObject({ + structuredContent: { results: [], totalFound: 0 }, + }); + + expect(io.stderrText()).toContain('"event":"mcp_request_completed"'); + expect(io.stderrText()).toContain('"toolName":"wiki_search"'); + expect(io.stderrText()).toContain('"sampleRate":0.1'); + expect(io.stderrText()).not.toContain(projectDir); + }); + it('registers parser-gated sql_execution when the host provides a SQL execution port', async () => { const fake = makeFakeServer(); const response: KtxSqlExecutionResponse = { diff --git a/packages/cli/src/context/mcp/server.ts b/packages/cli/src/context/mcp/server.ts index 73f970d6..97d79525 100644 --- a/packages/cli/src/context/mcp/server.ts +++ b/packages/cli/src/context/mcp/server.ts @@ -9,6 +9,8 @@ export function createKtxMcpServer(deps: KtxMcpServerDeps): KtxMcpServerDeps['se server: deps.server, ports: deps.contextTools, userContext: deps.userContext, + projectDir: deps.projectDir, + io: deps.io, }); } @@ -26,6 +28,8 @@ export function createDefaultKtxMcpServer( server: server as KtxMcpServerLike, userContext: deps.userContext, contextTools: deps.contextTools, + projectDir: deps.projectDir, + io: deps.io, }); return server; } diff --git a/packages/cli/src/context/mcp/types.ts b/packages/cli/src/context/mcp/types.ts index 223aa394..29a8c069 100644 --- a/packages/cli/src/context/mcp/types.ts +++ b/packages/cli/src/context/mcp/types.ts @@ -1,4 +1,5 @@ import type { MemoryIngestService } from '../../context/memory/memory-runs.js'; +import type { KtxCliIo } from '../../cli-runtime.js'; import type { KtxEntityDetailsInput, KtxEntityDetailsResponse } from '../scan/entity-details.js'; import type { KtxDiscoverDataInput, KtxDiscoverDataResponse } from '../../context/search/discover.js'; import type { KtxDictionarySearchInput, KtxDictionarySearchResponse } from '../../context/sl/dictionary-search.js'; @@ -171,4 +172,6 @@ export interface KtxMcpServerDeps { server: KtxMcpServerLike; userContext: KtxMcpUserContext; contextTools?: KtxMcpContextPorts; + projectDir?: string; + io?: KtxCliIo; } diff --git a/packages/cli/src/demo-assets.test.ts b/packages/cli/src/demo-assets.test.ts index 92aad645..052eda83 100644 --- a/packages/cli/src/demo-assets.test.ts +++ b/packages/cli/src/demo-assets.test.ts @@ -2,7 +2,7 @@ import { access, readFile, rm, stat } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { fileURLToPath } from 'node:url'; -import { afterEach, describe, expect, it } from 'vitest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; import { DEMO_ADAPTER, DEMO_CONNECTION_ID, @@ -22,10 +22,27 @@ async function readPackagedJson(relativePath: string): Promise { return JSON.parse(await readFile(packagedDemoAssetPath(relativePath), 'utf-8')) as T; } +function makeIo() { + let stderr = ''; + return { + stdout: { + isTTY: true, + write() {}, + }, + stderr: { + write(chunk: string) { + stderr += chunk; + }, + }, + stderrText: () => stderr, + }; +} + describe('demo assets', () => { const projectDir = join(tmpdir(), `ktx-demo-assets-${process.pid}`); afterEach(async () => { + vi.unstubAllEnvs(); await rm(projectDir, { recursive: true, force: true }); }); @@ -125,6 +142,19 @@ describe('demo assets', () => { await expect(ensureDemoProject({ projectDir, force: true })).resolves.toMatchObject({ projectDir }); }); + it('emits debug telemetry when the demo connection is created', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + const io = makeIo(); + + await ensureDemoProject({ projectDir, force: false, io, cliVersion: '0.2.0' }); + + expect(io.stderrText()).toContain('"event":"connection_added"'); + expect(io.stderrText()).toContain('"driver":"sqlite"'); + expect(io.stderrText()).toContain('"isDemoConnection":true'); + expect(io.stderrText()).not.toContain(projectDir); + }); + it('copies the seeded project assets used by the setup wizard tour', async () => { await ensureSeededDemoProject({ projectDir, force: false }); diff --git a/packages/cli/src/demo-assets.ts b/packages/cli/src/demo-assets.ts index dcc7ac1f..5c84b356 100644 --- a/packages/cli/src/demo-assets.ts +++ b/packages/cli/src/demo-assets.ts @@ -4,6 +4,7 @@ import { tmpdir } from 'node:os'; import { join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import { randomBytes } from 'node:crypto'; +import { getKtxCliPackageInfo, type KtxCliIo } from './cli-runtime.js'; interface DemoProjectResult { projectDir: string; @@ -15,6 +16,8 @@ interface DemoProjectResult { interface EnsureDemoProjectOptions { projectDir: string; force: boolean; + io?: KtxCliIo; + cliVersion?: string; } /** @internal */ @@ -143,6 +146,19 @@ export async function ensureDemoProject(options: EnsureDemoProjectOptions): Prom await copyFile(join(assetDir(), 'manifest.json'), join(projectDir, 'manifest.json')); const replayPath = await copyPackagedReplay(projectDir); await writeFile(configPath, demoConfig(databasePath), 'utf-8'); + if (options.io) { + const { emitTelemetryEvent } = await import('./telemetry/index.js'); + await emitTelemetryEvent({ + name: 'connection_added', + projectDir, + io: options.io, + packageInfo: { ...getKtxCliPackageInfo(), version: options.cliVersion ?? getKtxCliPackageInfo().version }, + fields: { + driver: 'sqlite', + isDemoConnection: true, + }, + }); + } return { projectDir, configPath, databasePath, replayPath }; } diff --git a/packages/cli/src/knowledge.test.ts b/packages/cli/src/knowledge.test.ts index 9fc92a31..69581f0f 100644 --- a/packages/cli/src/knowledge.test.ts +++ b/packages/cli/src/knowledge.test.ts @@ -8,12 +8,13 @@ import { writeLocalKnowledgePage } from './context/wiki/local-knowledge.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxKnowledge } from './knowledge.js'; -function makeIo() { +function makeIo(options: { isTTY?: boolean } = {}) { let stdout = ''; let stderr = ''; return { io: { stdout: { + isTTY: options.isTTY, write: (chunk: string) => { stdout += chunk; }, @@ -72,6 +73,7 @@ describe('runKtxKnowledge', () => { }); afterEach(async () => { + vi.unstubAllEnvs(); await rm(tempDir, { recursive: true, force: true }); }); @@ -96,6 +98,26 @@ describe('runKtxKnowledge', () => { expect(searchIo.stdout()).toContain('metrics-revenue'); }); + it('emits debug telemetry for wiki search without query text', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir }); + await seedWikiPage(projectDir); + const searchIo = makeIo({ isTTY: true }); + + await expect( + runKtxKnowledge( + { command: 'search', projectDir, query: 'revenue recognition', userId: 'local', cliVersion: '0.0.0-test' }, + searchIo.io, + ), + ).resolves.toBe(0); + + expect(searchIo.stderr()).toContain('"event":"wiki_query_completed"'); + expect(searchIo.stderr()).toContain('"queryLength"'); + expect(searchIo.stderr()).not.toContain('revenue recognition'); + }); + it('prints wiki search rank badges in pretty output', async () => { const projectDir = join(tempDir, 'rank-project'); await initKtxProject({ projectDir }); diff --git a/packages/cli/src/knowledge.ts b/packages/cli/src/knowledge.ts index 9eb35b12..d6246fef 100644 --- a/packages/cli/src/knowledge.ts +++ b/packages/cli/src/knowledge.ts @@ -8,6 +8,7 @@ import { } from './embedding-resolution.js'; import { resolveOutputMode } from './io/mode.js'; import { createRankBadgeFormatter, printList, type PrintListColumn } from './io/print-list.js'; +import { emitTelemetryEvent } from './telemetry/index.js'; export type KtxKnowledgeArgs = | { command: 'list'; projectDir: string; userId: string; output?: string; json?: boolean; cliVersion: string } @@ -108,6 +109,7 @@ export async function runKtxKnowledge( io: KtxKnowledgeIo = process, deps: KtxKnowledgeDeps = {}, ): Promise { + const startedAt = performance.now(); try { const project = await loadKtxProject({ projectDir: args.projectDir }); if (args.command === 'list') { @@ -135,6 +137,17 @@ export async function runKtxKnowledge( embeddingService, limit: args.limit, }); + await emitTelemetryEvent({ + name: 'wiki_query_completed', + projectDir: args.projectDir, + io, + fields: { + queryLength: args.query.length, + resultCount: results.length, + durationMs: Math.max(0, performance.now() - startedAt), + outcome: 'ok', + }, + }); if (args.debug) { writeWikiSearchDebug(io, { mode: project.config.storage.search, @@ -167,6 +180,19 @@ export async function runKtxKnowledge( } return 0; } catch (error) { + if (args.command === 'search') { + await emitTelemetryEvent({ + name: 'wiki_query_completed', + projectDir: args.projectDir, + io, + fields: { + queryLength: args.query.length, + resultCount: 0, + durationMs: Math.max(0, performance.now() - startedAt), + outcome: 'error', + }, + }); + } io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } diff --git a/packages/cli/src/managed-python-command.ts b/packages/cli/src/managed-python-command.ts index ecad702f..37cda4f3 100644 --- a/packages/cli/src/managed-python-command.ts +++ b/packages/cli/src/managed-python-command.ts @@ -12,6 +12,7 @@ import { type ManagedPythonRuntimeLayoutOptions, type ManagedPythonRuntimeStatus, } from './managed-python-runtime.js'; +import { readExistingTelemetryProjectId } from './telemetry/identity.js'; export type KtxManagedPythonInstallPolicy = 'prompt' | 'auto' | 'never'; @@ -49,6 +50,7 @@ export interface ManagedPythonCommandOptions extends ManagedPythonCommandDeps { export interface ManagedPythonSemanticLayerComputeOptions extends ManagedPythonCommandOptions { createPythonCompute?: typeof createPythonSemanticLayerComputePort; + projectDir?: string; } /** @internal */ @@ -133,8 +135,12 @@ export async function createManagedPythonSemanticLayerComputePort( ...(options.spinner ? { spinner: options.spinner } : {}), }); const createPythonCompute = options.createPythonCompute ?? createPythonSemanticLayerComputePort; + const projectId = options.projectDir + ? await readExistingTelemetryProjectId({ projectDir: options.projectDir }) + : undefined; return createPythonCompute({ command: runtime.manifest.python.daemonExecutable, args: [], + ...(projectId ? { projectId } : {}), }); } diff --git a/packages/cli/src/mcp-server-factory.ts b/packages/cli/src/mcp-server-factory.ts index e6d4887f..1ff44270 100644 --- a/packages/cli/src/mcp-server-factory.ts +++ b/packages/cli/src/mcp-server-factory.ts @@ -73,6 +73,8 @@ export async function createKtxMcpServerFactory(input: { name: 'ktx', version: input.cliVersion, userContext: { userId: 'local' }, + projectDir: input.projectDir, + io, contextTools: { ...contextTools, ...(memoryIngest ? { memoryIngest } : {}), diff --git a/packages/cli/src/public-ingest.test.ts b/packages/cli/src/public-ingest.test.ts index ac2560cd..1f5dd67d 100644 --- a/packages/cli/src/public-ingest.test.ts +++ b/packages/cli/src/public-ingest.test.ts @@ -1,5 +1,9 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; import { buildDefaultKtxProjectConfig, type KtxProjectConfig } from './context/project/config.js'; -import { describe, expect, it, vi } from 'vitest'; +import { initKtxProject } from './context/project/project.js'; +import { afterEach, describe, expect, it, vi } from 'vitest'; import { buildPublicIngestPlan, type KtxPublicIngestDeps, @@ -395,6 +399,10 @@ describe('buildPublicIngestPlan', () => { }); describe('runKtxPublicIngest', () => { + afterEach(() => { + vi.unstubAllEnvs(); + }); + it('maps fast and deep database targets to scan internals', async () => { const io = makeIo(); const project = deepReadyProject({ @@ -423,6 +431,32 @@ describe('runKtxPublicIngest', () => { ); }); + it('emits debug telemetry for ingest targets and project snapshots without project paths', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + const projectDir = await mkdtemp(join(tmpdir(), 'ktx-public-ingest-telemetry-')); + try { + await initKtxProject({ projectDir }); + const io = makeIo({ isTTY: true }); + const project = projectWithConnections({ + warehouse: { driver: 'sqlite', path: join(projectDir, 'warehouse.sqlite') }, + }); + + const code = await runKtxPublicIngest( + { command: 'run', projectDir, targetConnectionId: 'warehouse', all: false, json: false, inputMode: 'disabled' }, + io.io, + { loadProject: vi.fn(async () => project), runScan: vi.fn(async () => 0) }, + ); + + expect(code).toBe(0); + expect(io.stderr()).toContain('"event":"ingest_completed"'); + expect(io.stderr()).toContain('"event":"project_stack_snapshot"'); + expect(io.stderr()).not.toContain(projectDir); + } finally { + await rm(projectDir, { recursive: true, force: true }); + } + }); + it('runs query history after schema ingest with current-run window override', async () => { const io = makeIo(); const runtimeIo = makeIo({ isTTY: true }); diff --git a/packages/cli/src/public-ingest.ts b/packages/cli/src/public-ingest.ts index 60b9622c..498edb3a 100644 --- a/packages/cli/src/public-ingest.ts +++ b/packages/cli/src/public-ingest.ts @@ -21,6 +21,8 @@ import { publicIngestOutputLine } from './public-ingest-copy.js'; import { resolvePublicIngestRuntimeRequirements } from './runtime-requirements.js'; import type { KtxScanArgs, KtxScanDeps } from './scan.js'; import { profileMark } from './startup-profile.js'; +import { isDemoConnection } from './telemetry/demo-detect.js'; +import { emitProjectStackSnapshot, emitTelemetryEvent } from './telemetry/index.js'; profileMark('module:public-ingest'); @@ -603,6 +605,39 @@ function resultFailed(result: KtxPublicIngestTargetResult): boolean { return result.steps.some((step) => step.status === 'failed'); } +function rowsBucket(): '<10k' | '<100k' | '<1M' | '<10M' | '>=10M' { + return '<10k'; +} + +async function emitIngestCompleted(input: { + args: Extract; + project: KtxPublicIngestProject; + target: KtxPublicIngestPlanTarget; + result: KtxPublicIngestTargetResult; + startedAt: number; + io: KtxCliIo; +}): Promise { + const failed = resultFailed(input.result); + await emitTelemetryEvent({ + name: 'ingest_completed', + projectDir: input.args.projectDir, + io: input.io, + fields: { + driver: input.target.driver, + isDemoConnection: isDemoConnection( + input.target.connectionId, + input.project.config.connections[input.target.connectionId], + ), + schemaCount: 0, + tableCount: 0, + columnCount: 0, + rowsBucket: rowsBucket(), + durationMs: Math.max(0, performance.now() - input.startedAt), + outcome: failed ? 'error' : 'ok', + }, + }); +} + function stepStatus(result: KtxPublicIngestTargetResult, operation: KtxPublicIngestStepName): string { return result.steps.find((step) => step.operation === operation)?.status ?? 'not-run'; } @@ -928,7 +963,10 @@ export async function runKtxPublicIngest( } for (const target of plan.targets) { - results.push(await executePublicIngestTarget(target, args, io, deps)); + const startedAt = performance.now(); + const result = await executePublicIngestTarget(target, args, io, deps); + results.push(result); + await emitIngestCompleted({ args, project, target, result, startedAt, io }); } if (args.json) { @@ -937,5 +975,7 @@ export async function runKtxPublicIngest( renderPlainResults(results, io); } + await emitProjectStackSnapshot({ projectDir: args.projectDir, io }); + return results.some(resultFailed) ? 1 : 0; } diff --git a/packages/cli/src/scan.test.ts b/packages/cli/src/scan.test.ts index 0d2bcdc9..16cfdbd3 100644 --- a/packages/cli/src/scan.test.ts +++ b/packages/cli/src/scan.test.ts @@ -317,6 +317,7 @@ describe('runKtxScan', () => { }); afterEach(async () => { + vi.unstubAllEnvs(); await rm(tempDir, { recursive: true, force: true }); }); @@ -381,6 +382,44 @@ describe('runKtxScan', () => { expect(io.stdout()).not.toContain('/~'); }); + it('emits debug telemetry for completed scans without project paths', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + await initKtxProject({ projectDir: tempDir }); + const runLocalScan = vi.fn( + async (): Promise => ({ + runId: 'scan-run-1', + status: 'done', + done: true, + connectionId: 'warehouse', + mode: 'structural', + dryRun: false, + syncId: 'sync-1', + report, + }), + ); + const io = makeIo({ isTTY: true }); + + const code = await runKtxScan( + { + command: 'run', + projectDir: tempDir, + connectionId: 'warehouse', + mode: 'structural', + detectRelationships: false, + dryRun: false, + databaseIntrospectionUrl: 'http://127.0.0.1:8765', + }, + io.io, + { runLocalScan, createLocalIngestAdapters: noLocalIngestAdapters }, + ); + + expect(code).toBe(0); + expect(io.stderr()).toContain('"event":"scan_completed"'); + expect(io.stderr()).toContain('"tableCount"'); + expect(io.stderr()).not.toContain(tempDir); + }); + it('passes KTX daemon options to local ingest adapters when no explicit daemon URL is set', async () => { await initKtxProject({ projectDir: tempDir }); const createLocalIngestAdapters = vi.fn(() => []); diff --git a/packages/cli/src/scan.ts b/packages/cli/src/scan.ts index a92aaa62..f40da497 100644 --- a/packages/cli/src/scan.ts +++ b/packages/cli/src/scan.ts @@ -8,6 +8,8 @@ import { createKtxCliLocalIngestAdapters } from './local-adapters.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; import type { KtxManagedPythonInstallPolicy } from './managed-python-command.js'; import { profileMark } from './startup-profile.js'; +import { emitTelemetryEvent } from './telemetry/index.js'; +import { scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:scan'); @@ -62,6 +64,14 @@ function totalTableCount(report: KtxScanReport): number { return tableChangeCount(report) + report.diffSummary.tablesUnchanged; } +function scanColumnCount(report: KtxScanReport): number { + return report.structuralSyncStats.columnsCreated + report.structuralSyncStats.columnsUpdated; +} + +function inferredFkCount(report: KtxScanReport): number { + return report.relationships.accepted + report.relationships.review + report.relationships.rejected; +} + function writeScanIdentity(report: KtxScanReport, io: KtxCliIo): void { io.stdout.write(`Run: ${report.runId}\n`); io.stdout.write(`Connection: ${report.connectionId}\n`); @@ -311,6 +321,7 @@ export function createCliScanProgress( } export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps: KtxScanDeps = {}): Promise { + const startedAt = performance.now(); try { const project = await loadKtxProject({ projectDir: args.projectDir }); const resolveEmbeddingProvider = deps.resolveEmbeddingProvider ?? resolveProjectEmbeddingProvider; @@ -347,12 +358,42 @@ export async function runKtxScan(args: KtxScanArgs, io: KtxCliIo = process, deps ...(progress ? { progress } : {}), }); cliProgress?.flush(); + await emitTelemetryEvent({ + name: 'scan_completed', + projectDir: args.projectDir, + io, + fields: { + driver: result.report.driver, + tableCount: totalTableCount(result.report), + columnCount: scanColumnCount(result.report), + inferredFkCount: inferredFkCount(result.report), + declaredFkCount: 0, + durationMs: Math.max(0, performance.now() - startedAt), + outcome: 'ok', + }, + }); writeRunSummary(result.report, args.projectDir, io); } finally { cliProgress?.flush(); } return 0; } catch (error) { + const errorClass = scrubErrorClass(error); + await emitTelemetryEvent({ + name: 'scan_completed', + projectDir: args.projectDir, + io, + fields: { + driver: 'unknown', + tableCount: 0, + columnCount: 0, + inferredFkCount: 0, + declaredFkCount: 0, + durationMs: Math.max(0, performance.now() - startedAt), + outcome: 'error', + ...(errorClass ? { errorClass } : {}), + }, + }); io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } diff --git a/packages/cli/src/setup-databases.test.ts b/packages/cli/src/setup-databases.test.ts index 28c9e937..c401dc51 100644 --- a/packages/cli/src/setup-databases.test.ts +++ b/packages/cli/src/setup-databases.test.ts @@ -145,6 +145,7 @@ describe('setup databases step', () => { }); afterEach(async () => { + vi.unstubAllEnvs(); await rm(tempDir, { recursive: true, force: true }); }); @@ -378,6 +379,34 @@ describe('setup databases step', () => { }); }); + it('emits debug telemetry when setup writes a database connection', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + const io = makeIo(); + const prompts = makePromptAdapter({ + selectValues: ['url'], + textValues: ['', 'env:DATABASE_URL'], + }); + + const result = await runKtxSetupDatabasesStep( + { + projectDir: tempDir, + inputMode: 'auto', + databaseDrivers: ['postgres'], + databaseSchemas: [], + skipDatabases: false, + }, + io.io, + { prompts, testConnection: vi.fn(async () => 0), scanConnection: vi.fn(async () => 0) }, + ); + + expect(result.status).toBe('ready'); + expect(io.stderr()).toContain('"event":"connection_added"'); + expect(io.stderr()).toContain('"driver":"postgres"'); + expect(io.stderr()).toContain('"isDemoConnection":false'); + expect(io.stderr()).not.toContain(tempDir); + }); + it('tells users Escape goes back in free-text connection prompts', async () => { const prompts = makePromptAdapter({ selectValues: ['url'], diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index acdebeec..1c21f9fe 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -19,6 +19,8 @@ import { withMultiselectNavigation, withTextInputNavigation } from './prompt-nav import { runKtxScan } from './scan.js'; import { applySetupDatabaseContextDepth } from './setup-database-context-depth.js'; import { writeProjectLocalSecretReference } from './setup-secrets.js'; +import { isDemoConnection } from './telemetry/demo-detect.js'; +import { emitTelemetryEvent } from './telemetry/index.js'; import { createKtxSetupPromptAdapter, type KtxSetupPromptOption, @@ -1283,6 +1285,7 @@ async function writeConnectionConfig(input: { projectDir: string; connectionId: string; connection: KtxProjectConnectionConfig; + io?: KtxCliIo; }): Promise { const project = await loadKtxProject({ projectDir: input.projectDir }); const migratedConnections = Object.fromEntries( @@ -1300,6 +1303,17 @@ async function writeConnectionConfig(input: { }, }; await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); + if (input.io) { + await emitTelemetryEvent({ + name: 'connection_added', + projectDir: input.projectDir, + io: input.io, + fields: { + driver: String(nextConnection.driver ?? 'unknown').toLowerCase(), + isDemoConnection: isDemoConnection(input.connectionId, nextConnection), + }, + }); + } const queryHistory = queryHistoryConfigRecord(nextConnection); if (queryHistory?.enabled === true) { @@ -1556,6 +1570,7 @@ async function maybeConfigureDatabaseScope(input: { projectDir: input.projectDir, connectionId: input.connectionId, connection: { ...currentConnection, enabled_tables: enabledTables }, + io: input.io, }); if (spec && activeSchemas.length > 0) { @@ -1988,6 +2003,7 @@ async function runPrimarySourceFullEdit(input: { }, driver, }), + io: input.io, }); const validated = await validateAndScanConnection({ @@ -2223,6 +2239,7 @@ export async function runKtxSetupDatabasesStep( projectDir: args.projectDir, connectionId: connectionChoice.connectionId, connection: withContextDepth, + io, }); } else { const existing = project.config.connections[connectionChoice.connectionId]; @@ -2248,6 +2265,7 @@ export async function runKtxSetupDatabasesStep( projectDir: args.projectDir, connectionId: connectionChoice.connectionId, connection: withContextDepth, + io, }); } @@ -2331,6 +2349,7 @@ export async function runKtxSetupDatabasesStep( projectDir: args.projectDir, connectionId: connectionChoice.connectionId, connection: withContextDepth, + io, }); setupStatus = await validateAndScanConnection({ projectDir: args.projectDir, diff --git a/packages/cli/src/setup-demo-tour.ts b/packages/cli/src/setup-demo-tour.ts index 79f71fbe..da80b988 100644 --- a/packages/cli/src/setup-demo-tour.ts +++ b/packages/cli/src/setup-demo-tour.ts @@ -339,7 +339,7 @@ export interface DemoTourDeps { } export async function runDemoTour( - args: { inputMode: 'auto' | 'disabled' }, + args: { inputMode: 'auto' | 'disabled'; cliVersion?: string }, io: KtxCliIo, deps: DemoTourDeps = {}, ): Promise { @@ -347,7 +347,7 @@ export async function runDemoTour( const ensureProject = deps.ensureProject ?? ensureSeededDemoProject; const projectDir = defaultDemoProjectDir(); - await ensureProject({ projectDir, force: false }); + await ensureProject({ projectDir, force: false, io, cliVersion: args.cliVersion }); io.stdout.write(renderDemoBanner(projectDir) + '\n'); io.stdout.write(`\n│ ${dim('Press Enter to continue, Escape to go back')}\n└\n`); diff --git a/packages/cli/src/setup-sources.test.ts b/packages/cli/src/setup-sources.test.ts index d75933c1..c0b2c781 100644 --- a/packages/cli/src/setup-sources.test.ts +++ b/packages/cli/src/setup-sources.test.ts @@ -80,6 +80,7 @@ describe('setup sources step', () => { }); afterEach(async () => { + vi.unstubAllEnvs(); await rm(tempDir, { recursive: true, force: true }); }); @@ -170,6 +171,34 @@ describe('setup sources step', () => { expect(runInitialIngest).toHaveBeenCalledWith(projectDir, 'analytics_dbt', io.io, { inputMode: 'disabled' }); }); + it('emits debug telemetry when setup writes a source connection', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + await addPrimarySource(); + const io = makeIo(); + + const result = await runKtxSetupSourcesStep( + { + projectDir, + inputMode: 'disabled', + source: 'dbt', + sourceConnectionId: 'analytics_dbt', + sourcePath: '/repo/dbt', + sourceProjectName: 'analytics', + runInitialSourceIngest: false, + skipSources: false, + }, + io.io, + { validateDbt: vi.fn(async () => ({ ok: true as const, detail: 'project=analytics schemas=2' })) }, + ); + + expect(result.status).toBe('ready'); + expect(io.stderr()).toContain('"event":"connection_added"'); + expect(io.stderr()).toContain('"driver":"dbt"'); + expect(io.stderr()).toContain('"isDemoConnection":false'); + expect(io.stderr()).not.toContain(projectDir); + }); + it('writes Metabase config and validates mapping through existing mapping path', async () => { await addPrimarySource(); const validateMetabase = vi.fn(async () => ({ ok: true as const, detail: 'user=admin@example.com' })); diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index a3f8019d..410de812 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -22,6 +22,8 @@ import { runKtxSourceMapping } from './source-mapping.js'; import { withMultiselectNavigation, withTextInputNavigation } from './prompt-navigation.js'; import { runKtxPublicIngest } from './public-ingest.js'; import { writeProjectLocalSecretReference } from './setup-secrets.js'; +import { isDemoConnection } from './telemetry/demo-detect.js'; +import { emitTelemetryEvent } from './telemetry/index.js'; import { createKtxSetupPromptAdapter, type KtxSetupPromptOption, @@ -325,6 +327,7 @@ async function writeSourceConnection( connectionId: string, connection: KtxProjectConnectionConfig, adapter: string, + io?: KtxCliIo, ): Promise<() => Promise> { assertSafeConnectionId(connectionId); const project = await loadKtxProject({ projectDir }); @@ -345,6 +348,17 @@ async function writeSourceConnection( }, }; await writeFile(project.configPath, serializeKtxProjectConfig(config), 'utf-8'); + if (io) { + await emitTelemetryEvent({ + name: 'connection_added', + projectDir, + io, + fields: { + driver: String(connection.driver ?? adapter).toLowerCase(), + isDemoConnection: isDemoConnection(connectionId, connection), + }, + }); + } return async () => { const latest = await loadKtxProject({ projectDir }); const connections = { ...latest.config.connections }; @@ -1736,6 +1750,7 @@ async function saveValidateAndMaybeBuildSource(input: { connectionId, connection, sourceAdapter(input.source), + input.io, ); if (input.sourceChoice.kind === 'existing') { diff --git a/packages/cli/src/setup.test.ts b/packages/cli/src/setup.test.ts index 6ea0f0a4..ff8513b7 100644 --- a/packages/cli/src/setup.test.ts +++ b/packages/cli/src/setup.test.ts @@ -23,6 +23,7 @@ function makeIo() { return { io: { stdout: { + isTTY: false, write: (chunk: string) => { stdout += chunk; }, @@ -91,6 +92,7 @@ describe('setup status', () => { }); afterEach(async () => { + vi.unstubAllEnvs(); await rm(tempDir, { recursive: true, force: true }); }); @@ -528,6 +530,43 @@ describe('setup status', () => { expect(output).not.toContain('Finish agent setup'); }); + it('emits debug telemetry for setup steps without project paths', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + const testIo = makeIo(); + testIo.io.stdout.isTTY = true; + + await expect( + runKtxSetup( + { + command: 'run', + projectDir: tempDir, + mode: 'auto', + agents: false, + skipAgents: true, + inputMode: 'disabled', + yes: true, + cliVersion: '0.2.0', + skipLlm: true, + skipEmbeddings: true, + skipDatabases: true, + skipSources: true, + databaseSchemas: [], + }, + testIo.io, + { + runtime: async () => runtimeReady(tempDir), + context: async () => ({ status: 'ready', projectDir: tempDir, runId: 'setup-context-local-test' }), + }, + ), + ).resolves.toBe(0); + + expect(testIo.stderr()).toContain('"event":"setup_step"'); + expect(testIo.stderr()).toContain('"step":"project"'); + expect(testIo.stderr()).toContain('"step":"models"'); + expect(testIo.stderr()).not.toContain(tempDir); + }); + it('prints the setup shell intro for auto-created run mode', async () => { const testIo = makeIo(); @@ -1047,7 +1086,7 @@ describe('setup status', () => { ).resolves.toBe(0); expect(runDemoTour).toHaveBeenCalledWith( - { inputMode: 'auto' }, + { inputMode: 'auto', cliVersion: '0.2.0' }, testIo.io, expect.objectContaining({}), ); diff --git a/packages/cli/src/setup.ts b/packages/cli/src/setup.ts index 9b4c3a24..825170c0 100644 --- a/packages/cli/src/setup.ts +++ b/packages/cli/src/setup.ts @@ -6,7 +6,7 @@ import { savedMemoryCountsForReport } from './context/ingest/reports.js'; import { ktxLocalStateDbPath } from './context/project/local-state-db.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; import { readKtxSetupState } from './context/project/setup-config.js'; -import type { KtxCliIo } from './cli-runtime.js'; +import { getKtxCliPackageInfo, type KtxCliIo } from './cli-runtime.js'; import { formatSetupNextStepLines } from './next-steps.js'; import { runtimeInstallPolicyFromFlags } from './managed-python-command.js'; import { readManagedPythonRuntimeStatus } from './managed-python-runtime.js'; @@ -179,6 +179,16 @@ type KtxSetupFlowStatus = | 'back' | 'missing-input' | 'failed'; +type TelemetrySetupStep = + | 'project' + | 'runtime' + | 'models' + | 'embeddings' + | 'databases' + | 'sources' + | 'context' + | 'agents' + | 'demo-tour'; export interface KtxSetupEntryMenuPromptAdapter { select(options: { message: string; options: KtxSetupPromptOption[] }): Promise; @@ -196,6 +206,36 @@ function createEntryMenuPromptAdapter(): KtxSetupEntryMenuPromptAdapter { }); } +function setupTelemetryOutcome( + status: KtxSetupFlowStatus | Extract>, { status: string }>['status'], +): 'completed' | 'skipped' | 'abandoned' { + if (status === 'ready') return 'completed'; + if (status === 'skipped') return 'skipped'; + return 'abandoned'; +} + +async function recordSetupStep(input: { + projectDir: string; + step: TelemetrySetupStep; + status: KtxSetupFlowStatus | Extract>, { status: string }>['status']; + startedAt: number; + io: KtxCliIo; + cliVersion?: string; +}): Promise { + const { emitTelemetryEvent } = await import('./telemetry/index.js'); + await emitTelemetryEvent({ + name: 'setup_step', + projectDir: input.projectDir, + io: input.io, + packageInfo: { ...getKtxCliPackageInfo(), version: input.cliVersion ?? getKtxCliPackageInfo().version }, + fields: { + step: input.step, + outcome: setupTelemetryOutcome(input.status), + durationMs: Math.max(0, performance.now() - input.startedAt), + }, + }); +} + async function runKtxSetupEntryMenu( status: KtxSetupStatus, deps: KtxSetupEntryMenuDeps = {}, @@ -229,11 +269,21 @@ async function runKtxSetupDemoFromEntryMenu( deps: KtxSetupDeps, ): Promise { const { runDemoTour } = await import('./setup-demo-tour.js'); - return await runDemoTour( - { inputMode: args.inputMode }, + const startedAt = performance.now(); + const result = await runDemoTour( + { inputMode: args.inputMode, cliVersion: args.cliVersion }, io, { agents: deps.agents }, ); + await recordSetupStep({ + projectDir: args.projectDir, + step: 'demo-tour', + status: result === 0 ? 'ready' : 'failed', + startedAt, + io, + cliVersion: args.cliVersion, + }); + return result; } function embeddingsReady(status: KtxSetupStatus['embeddings']): boolean { @@ -564,6 +614,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup } const projectMode = entryAction === 'new-project' ? 'prompt-new' : args.mode; + const projectStepStartedAt = performance.now(); projectResult = await runKtxSetupProjectStep( { projectDir: args.projectDir, @@ -575,6 +626,14 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup io, deps.project, ); + await recordSetupStep({ + projectDir: projectResult.projectDir, + step: 'project', + status: projectResult.status, + startedAt: projectStepStartedAt, + io, + cliVersion: args.cliVersion, + }); if (projectResult.status === 'back') { continue; @@ -640,6 +699,7 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup const step = setupSteps[stepIndex]; if (!step) break; + const stepStartedAt = performance.now(); let stepResult: { status: KtxSetupFlowStatus }; if (step === 'models') { const modelRunner = @@ -792,6 +852,15 @@ async function runKtxSetupInner(args: KtxSetupArgs, io: KtxCliIo, deps: KtxSetup } } + await recordSetupStep({ + projectDir: projectResult.projectDir, + step, + status: stepResult.status, + startedAt: stepStartedAt, + io, + cliVersion: args.cliVersion, + }); + if (stepResult.status === 'failed') { await cleanupCreatedProjectScaffold(projectResult.createdProjectCleanup); return 1; diff --git a/packages/cli/src/sl.test.ts b/packages/cli/src/sl.test.ts index f1fe0b89..7fa855d0 100644 --- a/packages/cli/src/sl.test.ts +++ b/packages/cli/src/sl.test.ts @@ -18,12 +18,13 @@ const ORDERS_YAML = [ '', ].join('\n'); -function makeIo() { +function makeIo(options: { isTTY?: boolean } = {}) { let stdout = ''; let stderr = ''; return { io: { stdout: { + isTTY: options.isTTY, write: (chunk: string) => { stdout += chunk; }, @@ -63,6 +64,7 @@ describe('runKtxSl', () => { }); afterEach(async () => { + vi.unstubAllEnvs(); await rm(tempDir, { recursive: true, force: true }); }); @@ -289,6 +291,43 @@ joins: [] expect(stderr.write).not.toHaveBeenCalled(); }); + it('emits debug telemetry for sl query without project paths', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + const projectDir = join(tempDir, 'project'); + await seedSlSource({ projectDir }); + const io = makeIo({ isTTY: true }); + const createSemanticLayerCompute = vi.fn(() => ({ + query: vi.fn(async () => ({ + sql: 'select count(*) as order_count from public.orders', + dialect: 'postgres', + columns: [{ name: 'orders.order_count' }], + plan: {}, + })), + validateSources: vi.fn(), + generateSources: vi.fn(), + })); + + const code = await runKtxSl( + { + command: 'query', + projectDir, + connectionId: 'warehouse', + query: { measures: ['orders.order_count'], dimensions: [] }, + format: 'json', + execute: false, + cliVersion: '0.2.0', + runtimeInstallPolicy: 'auto', + }, + io.io, + { createSemanticLayerCompute }, + ); + + expect(code).toBe(0); + expect(io.stderr()).toContain('"event":"sl_query_completed"'); + expect(io.stderr()).not.toContain(projectDir); + }); + it('runs sl query from a JSON query file', async () => { const projectDir = join(tempDir, 'project'); const project = await initKtxProject({ projectDir }); @@ -413,6 +452,7 @@ joins: [] cliVersion: '0.2.0', installPolicy: 'auto', io: { stdout, stderr }, + projectDir, }); expect(stdout.write).toHaveBeenCalledWith('select count(*) as order_count from public.orders\n'); }); diff --git a/packages/cli/src/sl.ts b/packages/cli/src/sl.ts index 0e5e0fc0..76e1092a 100644 --- a/packages/cli/src/sl.ts +++ b/packages/cli/src/sl.ts @@ -1,4 +1,5 @@ import { readFile } from 'node:fs/promises'; +import type { KtxCliIo } from './cli-runtime.js'; import { createDefaultLocalQueryExecutor } from './context/connections/local-query-executor.js'; import type { KtxSqlQueryExecutorPort } from './context/connections/query-executor.js'; import { KtxIngestEmbeddingPortAdapter } from './context/llm/embedding-port.js'; @@ -18,6 +19,8 @@ import { type KtxManagedPythonInstallPolicy, } from './managed-python-command.js'; import { profileMark } from './startup-profile.js'; +import { emitTelemetryEvent } from './telemetry/index.js'; +import { scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:sl'); @@ -56,10 +59,7 @@ export type KtxSlArgs = runtimeInstallPolicy: KtxManagedPythonInstallPolicy; }; -interface KtxSlIo { - stdout: { write(chunk: string): void }; - stderr: { write(chunk: string): void }; -} +type KtxSlIo = KtxCliIo; interface KtxSlDeps { loadProject?: typeof loadKtxProject; @@ -70,6 +70,7 @@ interface KtxSlDeps { cliVersion: string; installPolicy: KtxManagedPythonInstallPolicy; io: KtxSlIo; + projectDir?: string; }) => Promise; createQueryExecutor?: () => KtxSqlQueryExecutorPort; } @@ -85,6 +86,14 @@ function resolutionToEmbeddingPort(resolution: EmbeddingProviderResolution): Ktx return null; } +function queryMeasureCount(query: SemanticLayerQueryInput): number { + return Array.isArray(query.measures) ? query.measures.length : 0; +} + +function queryDimensionCount(query: SemanticLayerQueryInput): number { + return Array.isArray(query.dimensions) ? query.dimensions.length : 0; +} + async function printSlSources(input: { rows: ReadonlyArray; command: 'sl list'; @@ -177,6 +186,8 @@ async function readSlQueryFile(path: string): Promise { } export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: KtxSlDeps = {}): Promise { + const startedAt = performance.now(); + let queryForTelemetry: SemanticLayerQueryInput | undefined; try { const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); if (args.command === 'list') { @@ -234,6 +245,18 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx connectionId: args.connectionId, sourceName: args.sourceName, }); + await emitTelemetryEvent({ + name: 'sl_validate_completed', + projectDir: args.projectDir, + io, + fields: { + sourceCount: source ? 1 : 0, + modelCount: 0, + validationErrorCount: result.valid ? 0 : result.errors.length, + outcome: result.valid ? 'ok' : 'error', + durationMs: Math.max(0, performance.now() - startedAt), + }, + }); if (!result.valid) { for (const error of result.errors) { io.stderr.write(`${error}\n`); @@ -248,12 +271,14 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx if (!query) { throw new Error('sl query requires query input from --query-file or at least one --measure'); } + queryForTelemetry = query; const compute = deps.createSemanticLayerCompute ? deps.createSemanticLayerCompute() : await (deps.createManagedSemanticLayerCompute ?? createManagedPythonSemanticLayerComputePort)({ cliVersion: args.cliVersion, installPolicy: args.runtimeInstallPolicy, io, + projectDir: args.projectDir, }); const queryExecutor = args.execute ? (deps.createQueryExecutor ?? createDefaultLocalQueryExecutor)() : undefined; const result = await compileLocalSlQuery(project as KtxLocalProject, { @@ -264,6 +289,19 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx maxRows: args.maxRows, queryExecutor, }); + await emitTelemetryEvent({ + name: 'sl_query_completed', + projectDir: args.projectDir, + io, + fields: { + mode: args.execute ? 'execute' : 'compile', + referencedSourceCount: result.plan && typeof result.plan === 'object' ? 1 : 0, + referencedDimensionCount: queryDimensionCount(query), + referencedMeasureCount: queryMeasureCount(query), + durationMs: Math.max(0, performance.now() - startedAt), + outcome: 'ok', + }, + }); if (args.format === 'sql') { io.stdout.write(`${result.sql}\n`); return 0; @@ -274,6 +312,39 @@ export async function runKtxSl(args: KtxSlArgs, io: KtxSlIo = process, deps: Ktx const _exhaustive: never = args; throw new Error(`Unsupported sl command: ${JSON.stringify(_exhaustive)}`); } catch (error) { + if (args.command === 'validate') { + const errorClass = scrubErrorClass(error); + await emitTelemetryEvent({ + name: 'sl_validate_completed', + projectDir: args.projectDir, + io, + fields: { + sourceCount: 0, + modelCount: 0, + validationErrorCount: 0, + outcome: 'error', + ...(errorClass ? { errorClass } : {}), + durationMs: Math.max(0, performance.now() - startedAt), + }, + }); + } + if (args.command === 'query') { + const errorClass = scrubErrorClass(error); + await emitTelemetryEvent({ + name: 'sl_query_completed', + projectDir: args.projectDir, + io, + fields: { + mode: args.execute ? 'execute' : 'compile', + referencedSourceCount: 0, + referencedDimensionCount: queryForTelemetry ? queryDimensionCount(queryForTelemetry) : 0, + referencedMeasureCount: queryForTelemetry ? queryMeasureCount(queryForTelemetry) : 0, + durationMs: Math.max(0, performance.now() - startedAt), + outcome: 'error', + ...(errorClass ? { errorClass } : {}), + }, + }); + } io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } diff --git a/packages/cli/src/sql.test.ts b/packages/cli/src/sql.test.ts index 5317b2a8..51cfe920 100644 --- a/packages/cli/src/sql.test.ts +++ b/packages/cli/src/sql.test.ts @@ -8,12 +8,13 @@ import type { SqlAnalysisPort } from './context/sql-analysis/ports.js'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { runKtxSql } from './sql.js'; -function makeIo() { +function makeIo(options: { isTTY?: boolean } = {}) { let stdout = ''; let stderr = ''; return { io: { stdout: { + isTTY: options.isTTY, write: (chunk: string) => { stdout += chunk; }, @@ -32,7 +33,7 @@ function makeIo() { function makeSqlAnalysis(result: Awaited>): SqlAnalysisPort { return { analyzeForFingerprint: vi.fn(), - analyzeBatch: vi.fn(), + analyzeBatch: vi.fn(async () => new Map([['cli-sql', { tablesTouched: ['orders'], columnsByClause: {} }]])), validateReadOnly: vi.fn(async () => result), }; } @@ -76,6 +77,7 @@ describe('runKtxSql', () => { }); afterEach(async () => { + vi.unstubAllEnvs(); await rm(tempDir, { recursive: true, force: true }); }); @@ -130,6 +132,39 @@ describe('runKtxSql', () => { expect(io.stderr()).toBe(''); }); + it('emits debug telemetry for SQL without raw query text', async () => { + vi.stubEnv('KTX_TELEMETRY_DEBUG', '1'); + vi.stubEnv('CI', ''); + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir }); + await writeConnections(projectDir, { warehouse: { driver: 'sqlite', path: 'warehouse.db' } }); + const io = makeIo({ isTTY: true }); + + await expect( + runKtxSql( + { + command: 'execute', + projectDir, + connectionId: 'warehouse', + sql: 'select count(*) from orders', + maxRows: 10, + output: 'json', + json: true, + cliVersion: '0.0.0-test', + }, + io.io, + { + createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }), + createScanConnector: vi.fn(async () => makeConnector()), + }, + ), + ).resolves.toBe(0); + + expect(io.stderr()).toContain('"event":"sql_completed"'); + expect(io.stderr()).toContain('"queryVerb":"select"'); + expect(io.stderr()).not.toContain('select count(*)'); + }); + it('prints JSON output', async () => { const projectDir = join(tempDir, 'project'); await initKtxProject({ projectDir }); diff --git a/packages/cli/src/sql.ts b/packages/cli/src/sql.ts index 833df78e..1b15f92e 100644 --- a/packages/cli/src/sql.ts +++ b/packages/cli/src/sql.ts @@ -6,6 +6,9 @@ import { type KtxOutputMode, resolveOutputMode } from './io/mode.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; import { createManagedDaemonSqlAnalysisPort } from './managed-python-http.js'; import { profileMark } from './startup-profile.js'; +import { isDemoConnection } from './telemetry/demo-detect.js'; +import { emitTelemetryEvent } from './telemetry/index.js'; +import { scrubErrorClass } from './telemetry/scrubber.js'; profileMark('module:sql'); @@ -54,6 +57,27 @@ function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDia return map[normalized] ?? 'postgres'; } +function queryVerb(sql: string): 'select' | 'explain' | 'show' | 'with' | 'other' { + const first = sql.trim().split(/\s+/, 1)[0]?.toLowerCase(); + if (first === 'select' || first === 'explain' || first === 'show' || first === 'with') { + return first; + } + return 'other'; +} + +async function safeReferencedTableCount( + port: SqlAnalysisPort, + sql: string, + dialect: SqlAnalysisDialect, +): Promise { + try { + const results = await port.analyzeBatch([{ id: 'cli-sql', sql }], dialect); + return results.get('cli-sql')?.tablesTouched.length ?? 0; + } catch { + return 0; + } +} + function formatValue(value: unknown): string { if (value === null || value === undefined) return ''; if (typeof value === 'string') return value; @@ -119,14 +143,19 @@ function resultOutput(connectionId: string, result: KtxQueryResult): SqlExecutio } export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: KtxSqlDeps = {}): Promise { + const startedAt = performance.now(); + let driver = 'unknown'; + let demoConnection = false; try { const project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); const connection = project.config.connections[args.connectionId]; if (!connection) { throw new Error(`Connection "${args.connectionId}" is not configured in ktx.yaml`); } + driver = String(connection.driver ?? 'unknown').toLowerCase(); + demoConnection = isDemoConnection(args.connectionId, connection); - const sqlAnalysis = + const createSqlAnalysis = deps.createSqlAnalysis ?? (() => createManagedDaemonSqlAnalysisPort({ @@ -135,10 +164,13 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: installPolicy: 'auto', io, })); - const validation = await sqlAnalysis().validateReadOnly(args.sql, sqlAnalysisDialectForDriver(connection.driver)); + const analysisPort = createSqlAnalysis(); + const dialect = sqlAnalysisDialectForDriver(connection.driver); + const validation = await analysisPort.validateReadOnly(args.sql, dialect); if (!validation.ok) { throw new Error(validation.error ?? 'SQL is not read-only.'); } + const referencedTableCount = await safeReferencedTableCount(analysisPort, args.sql, dialect); const createScanConnector = deps.createScanConnector ?? createKtxCliScanConnector; let connector: KtxScanConnector | null = null; @@ -157,11 +189,39 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: ); const mode = resolveOutputMode({ explicit: args.output, json: args.json, io }); printSqlResult(resultOutput(args.connectionId, result), mode, io); + await emitTelemetryEvent({ + name: 'sql_completed', + projectDir: args.projectDir, + io, + fields: { + driver, + isDemoConnection: demoConnection, + queryVerb: queryVerb(args.sql), + referencedTableCount, + durationMs: Math.max(0, performance.now() - startedAt), + outcome: 'ok', + }, + }); return 0; } finally { await cleanupConnector(connector); } } catch (error) { + const errorClass = scrubErrorClass(error); + await emitTelemetryEvent({ + name: 'sql_completed', + projectDir: args.projectDir, + io, + fields: { + driver, + isDemoConnection: demoConnection, + queryVerb: queryVerb(args.sql), + referencedTableCount: 0, + durationMs: Math.max(0, performance.now() - startedAt), + outcome: 'error', + ...(errorClass ? { errorClass } : {}), + }, + }); io.stderr.write(`${error instanceof Error ? error.message : String(error)}\n`); return 1; } diff --git a/packages/cli/src/telemetry/command-hook.test.ts b/packages/cli/src/telemetry/command-hook.test.ts new file mode 100644 index 00000000..ffd0485b --- /dev/null +++ b/packages/cli/src/telemetry/command-hook.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it } from 'vitest'; + +import { beginCommandSpan, completeCommandSpan, resetCommandSpan } from './command-hook.js'; + +describe('telemetry command hook', () => { + it('builds a completed command event from a span', () => { + resetCommandSpan(); + beginCommandSpan({ + commandPath: ['ktx', 'status'], + flagsPresent: { projectDir: true, json: true }, + projectDir: '/tmp/private', + hasProject: true, + attachProjectGroup: true, + startedAt: 100, + }); + + expect( + completeCommandSpan({ + completedAt: 125, + outcome: 'ok', + }), + ).toEqual({ + commandPath: ['ktx', 'status'], + durationMs: 25, + outcome: 'ok', + flagsPresent: { projectDir: true, json: true }, + hasProject: true, + projectDir: '/tmp/private', + projectGroupAttached: true, + }); + }); + + it('returns undefined when no preAction span exists', () => { + resetCommandSpan(); + expect(completeCommandSpan({ completedAt: 200, outcome: 'ok' })).toBeUndefined(); + }); +}); diff --git a/packages/cli/src/telemetry/command-hook.ts b/packages/cli/src/telemetry/command-hook.ts new file mode 100644 index 00000000..e4f003d7 --- /dev/null +++ b/packages/cli/src/telemetry/command-hook.ts @@ -0,0 +1,59 @@ +import { scrubErrorClass } from './scrubber.js'; + +export type CommandOutcome = 'ok' | 'error' | 'aborted'; + +interface CommandSpan { + commandPath: string[]; + flagsPresent: Record; + projectDir?: string; + hasProject: boolean; + attachProjectGroup: boolean; + startedAt: number; +} + +export interface CompletedCommandSpan { + commandPath: string[]; + durationMs: number; + outcome: CommandOutcome; + errorClass?: string; + flagsPresent: Record; + hasProject: boolean; + projectDir?: string; + projectGroupAttached: boolean; +} + +let activeCommandSpan: CommandSpan | undefined; + +export function beginCommandSpan(input: CommandSpan): void { + activeCommandSpan = input; +} + +export function completeCommandSpan(input: { + completedAt: number; + outcome: CommandOutcome; + error?: unknown; +}): CompletedCommandSpan | undefined { + const span = activeCommandSpan; + activeCommandSpan = undefined; + if (!span) { + return undefined; + } + + const errorClass = input.error ? scrubErrorClass(input.error) : undefined; + + return { + commandPath: span.commandPath, + durationMs: Math.max(0, input.completedAt - span.startedAt), + outcome: input.outcome, + ...(errorClass ? { errorClass } : {}), + flagsPresent: span.flagsPresent, + hasProject: span.hasProject, + projectDir: span.projectDir, + projectGroupAttached: span.attachProjectGroup, + }; +} + +/** @internal */ +export function resetCommandSpan(): void { + activeCommandSpan = undefined; +} diff --git a/packages/cli/src/telemetry/demo-detect.test.ts b/packages/cli/src/telemetry/demo-detect.test.ts new file mode 100644 index 00000000..b371694e --- /dev/null +++ b/packages/cli/src/telemetry/demo-detect.test.ts @@ -0,0 +1,33 @@ +import { describe, expect, it } from 'vitest'; + +import { isDemoConnection } from './demo-detect.js'; + +describe('isDemoConnection', () => { + it('detects only the packaged Orbit SQLite demo recipe', () => { + expect( + isDemoConnection('orbit_demo', { + driver: 'sqlite', + path: '/tmp/ktx-demo/demo.db', + }), + ).toBe(true); + + expect( + isDemoConnection('orbit_demo', { + driver: 'postgres', + path: '/tmp/ktx-demo/demo.db', + }), + ).toBe(false); + expect( + isDemoConnection('warehouse', { + driver: 'sqlite', + path: '/tmp/ktx-demo/demo.db', + }), + ).toBe(false); + expect( + isDemoConnection('orbit_demo', { + driver: 'sqlite', + path: '/tmp/ktx-demo/private.db', + }), + ).toBe(false); + }); +}); diff --git a/packages/cli/src/telemetry/demo-detect.ts b/packages/cli/src/telemetry/demo-detect.ts new file mode 100644 index 00000000..099dcfb9 --- /dev/null +++ b/packages/cli/src/telemetry/demo-detect.ts @@ -0,0 +1,15 @@ +import { basename } from 'node:path'; +import type { KtxProjectConnectionConfig } from '../context/project/config.js'; +import { DEMO_CONNECTION_ID } from '../demo-assets.js'; + +export function isDemoConnection( + connectionId: string, + connection: KtxProjectConnectionConfig | undefined, +): boolean { + if (!connection) { + return false; + } + + const path = typeof connection.path === 'string' ? connection.path : ''; + return connectionId === DEMO_CONNECTION_ID && connection.driver === 'sqlite' && basename(path) === 'demo.db'; +} diff --git a/packages/cli/src/telemetry/emitter.test.ts b/packages/cli/src/telemetry/emitter.test.ts new file mode 100644 index 00000000..9c732997 --- /dev/null +++ b/packages/cli/src/telemetry/emitter.test.ts @@ -0,0 +1,123 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import { + __resetTelemetryEmitterForTests, + shutdownTelemetryEmitter, + trackTelemetryEvent, +} from './emitter.js'; +import type { BuiltTelemetryEvent } from './events.js'; + +const captures: unknown[] = []; +const shutdown = vi.fn(async () => {}); + +function liveConfigId(): string { + return 'fixture'; +} + +vi.mock('posthog-node', () => ({ + PostHog: vi.fn().mockImplementation(function () { + return { + capture: (event: unknown) => captures.push(event), + shutdown, + }; + }), +})); + +function commandEvent(): BuiltTelemetryEvent<'command'> { + return { + name: 'command', + properties: { + cliVersion: '0.4.1', + nodeVersion: 'v22.0.0', + osPlatform: 'darwin', + osRelease: '25.0.0', + arch: 'arm64', + runtime: 'node', + isCi: false, + commandPath: ['ktx', 'status'], + durationMs: 1, + outcome: 'ok', + flagsPresent: {}, + hasProject: true, + projectGroupAttached: true, + }, + }; +} + +describe('telemetry emitter', () => { + beforeEach(() => { + captures.length = 0; + shutdown.mockClear(); + __resetTelemetryEmitterForTests(); + }); + + it('prints debug payloads without importing or sending to PostHog', async () => { + const stderr: string[] = []; + + await trackTelemetryEvent({ + event: commandEvent(), + distinctId: 'install-1', + projectId: 'project-1', + env: { KTX_TELEMETRY_DEBUG: '1' }, + stderr: { write: (chunk) => stderr.push(chunk) }, + }); + + expect(stderr.join('')).toContain('[telemetry]'); + expect(stderr.join('')).toContain('"event":"command"'); + expect(captures).toEqual([]); + }); + + it('sends to PostHog by default once config constants are populated', async () => { + await trackTelemetryEvent({ + event: commandEvent(), + distinctId: 'install-1', + projectId: 'project-1', + env: {}, + stderr: { write: () => {} }, + }); + + expect(captures).toHaveLength(1); + expect(captures[0]).toMatchObject({ + distinctId: 'install-1', + event: 'command', + groups: { project: 'project-1' }, + }); + }); + + it('captures with distinctId, properties, and groups when live config is supplied', async () => { + await trackTelemetryEvent({ + event: commandEvent(), + distinctId: 'install-1', + projectId: 'project-1', + projectApiKey: liveConfigId(), + host: 'https://us.i.posthog.com', + env: {}, + stderr: { write: () => {} }, + }); + + expect(captures).toHaveLength(1); + expect(captures[0]).toMatchObject({ + distinctId: 'install-1', + event: 'command', + groups: { project: 'project-1' }, + properties: { + cliVersion: '0.4.1', + commandPath: ['ktx', 'status'], + }, + }); + }); + + it('shuts down the client without throwing', async () => { + await trackTelemetryEvent({ + event: commandEvent(), + distinctId: 'install-1', + projectApiKey: liveConfigId(), + host: 'https://us.i.posthog.com', + env: {}, + stderr: { write: () => {} }, + }); + + await expect(shutdownTelemetryEmitter()).resolves.toBeUndefined(); + expect(shutdown).toHaveBeenCalledTimes(1); + }); +}); diff --git a/packages/cli/src/telemetry/emitter.ts b/packages/cli/src/telemetry/emitter.ts new file mode 100644 index 00000000..435a122b --- /dev/null +++ b/packages/cli/src/telemetry/emitter.ts @@ -0,0 +1,125 @@ +import type { BuiltTelemetryEvent } from './events.js'; + +export interface TelemetryEmitterEnv { + KTX_TELEMETRY_DEBUG?: string; + KTX_TELEMETRY_ENDPOINT?: string; +} + +export interface TelemetrySink { + write(chunk: string): void; +} + +type PostHogClient = { + capture(event: { + distinctId: string; + event: string; + properties: Record; + groups?: Record; + }): void; + shutdown(): Promise | void; +}; + +// PostHog public project ingestion key — safe to embed; capture-only, no read access. +const POSTHOG_PROJECT_API_KEY = 'phc_xbvZpbu8ZNLnogTbY7MEMWhCF2rzzApYsDndjKaRBXXx'; // pragma: allowlist secret +const POSTHOG_HOST = 'https://us.i.posthog.com'; +const SHUTDOWN_TIMEOUT_MS = 1500; + +let clientPromise: Promise | undefined; + +function telemetryHost(env: TelemetryEmitterEnv, explicitHost?: string): string { + return explicitHost ?? env.KTX_TELEMETRY_ENDPOINT ?? POSTHOG_HOST; +} + +function telemetryProjectApiKey(explicitProjectApiKey?: string): string { + return explicitProjectApiKey ?? POSTHOG_PROJECT_API_KEY; +} + +function liveTelemetryConfigured(projectApiKey: string, host: string): boolean { + return projectApiKey.trim() !== '' && host.trim() !== ''; +} + +async function getPostHogClient(projectApiKey: string, host: string): Promise { + if (!liveTelemetryConfigured(projectApiKey, host)) { + return null; + } + + clientPromise ??= import('posthog-node') + .then(({ PostHog }) => new PostHog(projectApiKey, { host, flushAt: 1, flushInterval: 0 })) + .catch(() => null); + + return await clientPromise; +} + +function debugEnabled(env: TelemetryEmitterEnv): boolean { + return env.KTX_TELEMETRY_DEBUG === '1'; +} + +function writeDebugPayload(input: { + event: BuiltTelemetryEvent; + distinctId: string; + projectId?: string; + stderr: TelemetrySink; +}): void { + input.stderr.write( + `[telemetry] ${JSON.stringify({ + distinctId: input.distinctId, + event: input.event.name, + properties: input.event.properties, + groups: input.projectId ? { project: input.projectId } : undefined, + })}\n`, + ); +} + +export async function trackTelemetryEvent(input: { + event: BuiltTelemetryEvent; + distinctId: string; + projectId?: string; + env?: TelemetryEmitterEnv; + stderr: TelemetrySink; + projectApiKey?: string; + host?: string; +}): Promise { + const env = input.env ?? process.env; + + if (debugEnabled(env)) { + writeDebugPayload(input); + return; + } + + const projectApiKey = telemetryProjectApiKey(input.projectApiKey); + const host = telemetryHost(env, input.host); + const client = await getPostHogClient(projectApiKey, host); + if (!client) { + return; + } + + try { + client.capture({ + distinctId: input.distinctId, + event: input.event.name, + properties: input.event.properties, + groups: input.projectId ? { project: input.projectId } : undefined, + }); + } catch { + return; + } +} + +export async function shutdownTelemetryEmitter(): Promise { + const client = await clientPromise; + if (!client) { + return; + } + + await Promise.race([ + Promise.resolve(client.shutdown()).catch(() => undefined), + new Promise((resolve) => { + setTimeout(resolve, SHUTDOWN_TIMEOUT_MS); + }), + ]); +} + +/** @internal */ +export function __resetTelemetryEmitterForTests(): void { + clientPromise = undefined; +} diff --git a/packages/cli/src/telemetry/events.schema.json b/packages/cli/src/telemetry/events.schema.json new file mode 100644 index 00000000..13642c49 --- /dev/null +++ b/packages/cli/src/telemetry/events.schema.json @@ -0,0 +1,1407 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ktx telemetry events", + "type": "object", + "additionalProperties": false, + "x-ktx-common-fields": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi" + ], + "x-ktx-catalog": [ + { + "name": "install_first_run", + "description": "Emitted once when ~/.ktx/telemetry.json is created.", + "fields": [] + }, + { + "name": "command", + "description": "Emitted once for each Commander action that reaches preAction.", + "fields": [ + "commandPath", + "durationMs", + "outcome", + "errorClass", + "flagsPresent", + "hasProject", + "projectGroupAttached" + ] + }, + { + "name": "setup_step", + "description": "Emitted after an interactive setup step completes, skips, or aborts.", + "fields": [ + "step", + "outcome", + "durationMs" + ] + }, + { + "name": "connection_added", + "description": "Emitted when setup writes a database, source, or demo connection.", + "fields": [ + "driver", + "isDemoConnection" + ] + }, + { + "name": "connection_test", + "description": "Emitted after ktx connection test completes.", + "fields": [ + "driver", + "isDemoConnection", + "outcome", + "errorClass", + "durationMs", + "serverVersion" + ] + }, + { + "name": "project_stack_snapshot", + "description": "Emitted after commands that can summarize the local project stack.", + "fields": [ + "connectors", + "connectionCount", + "hasSl", + "hasWiki", + "hasMcp", + "hasManagedRuntime" + ] + }, + { + "name": "ingest_completed", + "description": "Emitted after a public ingest target completes.", + "fields": [ + "driver", + "isDemoConnection", + "schemaCount", + "tableCount", + "columnCount", + "rowsBucket", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "scan_completed", + "description": "Emitted after schema scan or relationship inference completes.", + "fields": [ + "driver", + "tableCount", + "columnCount", + "inferredFkCount", + "declaredFkCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "sl_validate_completed", + "description": "Emitted after ktx sl validate completes.", + "fields": [ + "sourceCount", + "modelCount", + "validationErrorCount", + "outcome", + "errorClass", + "durationMs" + ] + }, + { + "name": "sl_query_completed", + "description": "Emitted after ktx sl query compiles or executes.", + "fields": [ + "mode", + "referencedSourceCount", + "referencedDimensionCount", + "referencedMeasureCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "sql_completed", + "description": "Emitted after ktx sql completes validation and execution.", + "fields": [ + "driver", + "isDemoConnection", + "queryVerb", + "referencedTableCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "wiki_query_completed", + "description": "Emitted after a wiki query completes.", + "fields": [ + "queryLength", + "resultCount", + "durationMs", + "outcome" + ] + }, + { + "name": "mcp_request_completed", + "description": "Emitted for sampled MCP tool requests.", + "fields": [ + "toolName", + "outcome", + "durationMs", + "errorClass", + "sampleRate" + ] + }, + { + "name": "daemon_started", + "description": "Emitted when the long-lived ktx-daemon HTTP server starts.", + "fields": [ + "daemonVersion", + "pythonVersion", + "runtimeVersion", + "startupDurationMs" + ] + }, + { + "name": "daemon_stopped", + "description": "Emitted when the long-lived ktx-daemon HTTP server shuts down.", + "fields": [ + "reason", + "uptimeMs" + ] + }, + { + "name": "sl_plan_completed", + "description": "Emitted after a daemon semantic-layer planning pass completes.", + "fields": [ + "outcome", + "stage", + "errorClass", + "durationMs", + "sourceCount", + "joinCount" + ] + }, + { + "name": "sql_gen_completed", + "description": "Emitted after daemon SQL generation completes.", + "fields": [ + "outcome", + "dialect", + "errorClass", + "durationMs" + ] + } + ], + "$defs": { + "install_first_run": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi" + ], + "additionalProperties": false + }, + "command": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "commandPath": { + "minItems": 1, + "type": "array", + "items": { + "type": "string" + } + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error", + "aborted" + ] + }, + "errorClass": { + "type": "string" + }, + "flagsPresent": { + "type": "object", + "propertyNames": { + "type": "string" + }, + "additionalProperties": { + "type": "boolean" + } + }, + "hasProject": { + "type": "boolean" + }, + "projectGroupAttached": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "commandPath", + "durationMs", + "outcome", + "flagsPresent", + "hasProject", + "projectGroupAttached" + ], + "additionalProperties": false + }, + "setup_step": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "step": { + "type": "string", + "enum": [ + "project", + "runtime", + "models", + "embeddings", + "secrets", + "databases", + "database-context-depth", + "sources", + "context", + "agents", + "demo-tour" + ] + }, + "outcome": { + "type": "string", + "enum": [ + "completed", + "skipped", + "abandoned" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "step", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "connection_added": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection" + ], + "additionalProperties": false + }, + "connection_test": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "serverVersion": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "project_stack_snapshot": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "connectors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "driver": { + "type": "string" + }, + "isDemo": { + "type": "boolean" + } + }, + "required": [ + "driver", + "isDemo" + ], + "additionalProperties": false + } + }, + "connectionCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "hasSl": { + "type": "boolean" + }, + "hasWiki": { + "type": "boolean" + }, + "hasMcp": { + "type": "boolean" + }, + "hasManagedRuntime": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "connectors", + "connectionCount", + "hasSl", + "hasWiki", + "hasMcp", + "hasManagedRuntime" + ], + "additionalProperties": false + }, + "ingest_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "schemaCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "tableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "columnCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "rowsBucket": { + "type": "string", + "enum": [ + "<10k", + "<100k", + "<1M", + "<10M", + ">=10M" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "schemaCount", + "tableCount", + "columnCount", + "rowsBucket", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "scan_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "tableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "columnCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "inferredFkCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "declaredFkCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "tableCount", + "columnCount", + "inferredFkCount", + "declaredFkCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "sl_validate_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "sourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "modelCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "validationErrorCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "sourceCount", + "modelCount", + "validationErrorCount", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "sl_query_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "mode": { + "type": "string", + "enum": [ + "compile", + "execute" + ] + }, + "referencedSourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "referencedDimensionCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "referencedMeasureCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "mode", + "referencedSourceCount", + "referencedDimensionCount", + "referencedMeasureCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "sql_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "queryVerb": { + "type": "string", + "enum": [ + "select", + "explain", + "show", + "with", + "other" + ] + }, + "referencedTableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "queryVerb", + "referencedTableCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "wiki_query_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "queryLength": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "resultCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "queryLength", + "resultCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "mcp_request_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "toolName": { + "type": "string" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "errorClass": { + "type": "string" + }, + "sampleRate": { + "type": "number", + "const": 0.1 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "toolName", + "outcome", + "durationMs", + "sampleRate" + ], + "additionalProperties": false + }, + "daemon_started": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "daemonVersion": { + "type": "string" + }, + "pythonVersion": { + "type": "string" + }, + "runtimeVersion": { + "type": "string" + }, + "startupDurationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "daemonVersion", + "pythonVersion", + "runtimeVersion", + "startupDurationMs" + ], + "additionalProperties": false + }, + "daemon_stopped": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "reason": { + "type": "string", + "enum": [ + "signal", + "request", + "crash" + ] + }, + "uptimeMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "reason", + "uptimeMs" + ], + "additionalProperties": false + }, + "sl_plan_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "stage": { + "type": "string", + "enum": [ + "parse", + "resolve", + "compile", + "transpile" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "sourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "joinCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "outcome", + "stage", + "durationMs", + "sourceCount", + "joinCount" + ], + "additionalProperties": false + }, + "sql_gen_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "dialect": { + "type": "string" + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "outcome", + "dialect", + "durationMs" + ], + "additionalProperties": false + } + } +} diff --git a/packages/cli/src/telemetry/events.snapshot.test.ts b/packages/cli/src/telemetry/events.snapshot.test.ts new file mode 100644 index 00000000..1df95aa0 --- /dev/null +++ b/packages/cli/src/telemetry/events.snapshot.test.ts @@ -0,0 +1,141 @@ +import { describe, expect, it } from 'vitest'; + +import { buildTelemetryEvent, type TelemetryCommonEnvelope } from './events.js'; + +const BLACKLIST = [ + '/Users/', + '/home/', + 'C:\\', + 'localhost', + '.local', + 'kaelio.com', + 'select ', + 'SELECT ', + 'INSERT', + 'CREATE', + '@', + 'password', + 'secret', + 'token', + 'key', +]; + +const envelope: TelemetryCommonEnvelope = { + cliVersion: '0.4.1', + nodeVersion: 'v22.0.0', + osPlatform: 'darwin', + osRelease: '25.0.0', + arch: 'arm64', + runtime: 'node', + isCi: false, +}; + +describe('telemetry privacy snapshot', () => { + it('does not emit known private substrings from phase 1 event payloads', () => { + const events = [ + buildTelemetryEvent('install_first_run', envelope, {}), + buildTelemetryEvent('command', envelope, { + commandPath: ['ktx', 'sql'], + durationMs: 10, + outcome: 'error', + errorClass: 'KtxProjectMissingAbortError', + flagsPresent: { + 'project-dir': true, + connection: true, + c: true, + }, + hasProject: false, + projectGroupAttached: false, + }), + buildTelemetryEvent('setup_step', envelope, { + step: 'databases', + outcome: 'completed', + durationMs: 42, + }), + buildTelemetryEvent('connection_added', envelope, { + driver: 'postgres', + isDemoConnection: false, + }), + buildTelemetryEvent('connection_test', envelope, { + driver: 'postgres', + isDemoConnection: false, + outcome: 'error', + errorClass: 'KtxConnectionTestAbortError', + durationMs: 34, + serverVersion: '16', + }), + buildTelemetryEvent('project_stack_snapshot', envelope, { + connectors: [ + { driver: 'sqlite', isDemo: true }, + { driver: 'postgres', isDemo: false }, + ], + connectionCount: 2, + hasSl: true, + hasWiki: true, + hasMcp: true, + hasManagedRuntime: true, + }), + buildTelemetryEvent('ingest_completed', envelope, { + driver: 'postgres', + isDemoConnection: false, + schemaCount: 2, + tableCount: 4, + columnCount: 20, + rowsBucket: '<100k', + durationMs: 100, + outcome: 'ok', + }), + buildTelemetryEvent('scan_completed', envelope, { + driver: 'postgres', + tableCount: 4, + columnCount: 20, + inferredFkCount: 2, + declaredFkCount: 1, + durationMs: 70, + outcome: 'ok', + }), + buildTelemetryEvent('sl_validate_completed', envelope, { + sourceCount: 1, + modelCount: 3, + validationErrorCount: 0, + outcome: 'ok', + durationMs: 15, + }), + buildTelemetryEvent('sl_query_completed', envelope, { + mode: 'compile', + referencedSourceCount: 1, + referencedDimensionCount: 2, + referencedMeasureCount: 1, + durationMs: 18, + outcome: 'ok', + }), + buildTelemetryEvent('sql_completed', envelope, { + driver: 'postgres', + isDemoConnection: false, + queryVerb: 'select', + referencedTableCount: 3, + durationMs: 20, + outcome: 'ok', + }), + buildTelemetryEvent('wiki_query_completed', envelope, { + queryLength: 'select private_table from /Users/alice'.length, + resultCount: 2, + durationMs: 8, + outcome: 'ok', + }), + buildTelemetryEvent('mcp_request_completed', envelope, { + toolName: 'sl_query', + outcome: 'error', + errorClass: 'KtxProjectMissingAbortError', + durationMs: 12, + sampleRate: 0.1, + }), + ]; + + const payload = JSON.stringify(events); + + for (const forbidden of BLACKLIST) { + expect(payload).not.toContain(forbidden); + } + }); +}); diff --git a/packages/cli/src/telemetry/events.test.ts b/packages/cli/src/telemetry/events.test.ts new file mode 100644 index 00000000..3726ddde --- /dev/null +++ b/packages/cli/src/telemetry/events.test.ts @@ -0,0 +1,165 @@ +import { describe, expect, it } from 'vitest'; + +import { + buildTelemetryEvent, + telemetryEventCatalog, + telemetryEventSchemas, + type TelemetryCommonEnvelope, +} from './events.js'; + +const envelope: TelemetryCommonEnvelope = { + cliVersion: '0.4.1', + nodeVersion: 'v22.0.0', + osPlatform: 'darwin', + osRelease: '25.0.0', + arch: 'arm64', + runtime: 'node', + isCi: false, +}; + +describe('telemetry event schemas', () => { + it('catalogs all v1 telemetry events', () => { + expect(telemetryEventCatalog.map((event) => event.name)).toEqual([ + 'install_first_run', + 'command', + 'setup_step', + 'connection_added', + 'connection_test', + 'project_stack_snapshot', + 'ingest_completed', + 'scan_completed', + 'sl_validate_completed', + 'sl_query_completed', + 'sql_completed', + 'wiki_query_completed', + 'mcp_request_completed', + 'daemon_started', + 'daemon_stopped', + 'sl_plan_completed', + 'sql_gen_completed', + ]); + }); + + it('builds strict daemon telemetry events', () => { + const daemonEnvelope = { + ...envelope, + runtime: 'daemon-py' as const, + nodeVersion: '3.13.0', + }; + + expect( + buildTelemetryEvent('sl_plan_completed', daemonEnvelope, { + outcome: 'ok', + stage: 'transpile', + durationMs: 25, + sourceCount: 2, + joinCount: 1, + }), + ).toMatchObject({ + name: 'sl_plan_completed', + properties: { + runtime: 'daemon-py', + outcome: 'ok', + stage: 'transpile', + sourceCount: 2, + joinCount: 1, + }, + }); + + expect(() => + telemetryEventSchemas.sql_gen_completed.parse({ + ...daemonEnvelope, + outcome: 'ok', + dialect: 'postgres', + durationMs: 4, + sql: 'select * from private_table', + }), + ).toThrow(); + }); + + it('builds a strict install_first_run event', () => { + expect(buildTelemetryEvent('install_first_run', envelope, {})).toEqual({ + name: 'install_first_run', + properties: envelope, + }); + }); + + it('builds a strict command event with project grouping fields', () => { + expect( + buildTelemetryEvent('command', envelope, { + commandPath: ['ktx', 'status'], + durationMs: 12, + outcome: 'ok', + flagsPresent: { json: true }, + hasProject: true, + projectGroupAttached: true, + }), + ).toEqual({ + name: 'command', + properties: { + ...envelope, + commandPath: ['ktx', 'status'], + durationMs: 12, + outcome: 'ok', + flagsPresent: { json: true }, + hasProject: true, + projectGroupAttached: true, + }, + }); + }); + + it('rejects unmodeled event properties', () => { + expect(() => + telemetryEventSchemas.command.parse({ + ...envelope, + commandPath: ['ktx', 'status'], + durationMs: 12, + outcome: 'ok', + flagsPresent: {}, + hasProject: true, + projectGroupAttached: true, + tableName: 'private_table', + }), + ).toThrow(); + }); + + it('builds strict Phase 2 events without private names or text', () => { + expect( + buildTelemetryEvent('connection_test', envelope, { + driver: 'postgres', + isDemoConnection: false, + outcome: 'ok', + durationMs: 34, + serverVersion: '16', + }), + ).toMatchObject({ + name: 'connection_test', + properties: { + driver: 'postgres', + isDemoConnection: false, + outcome: 'ok', + durationMs: 34, + serverVersion: '16', + }, + }); + + expect(() => + telemetryEventSchemas.sql_completed.parse({ + ...envelope, + driver: 'postgres', + isDemoConnection: false, + queryVerb: 'select', + referencedTableCount: 1, + durationMs: 10, + outcome: 'ok', + sql: 'select * from private_table', + }), + ).toThrow(); + }); + + it('rejects raw private field names that are not in the telemetry schemas', () => { + expect(JSON.stringify(telemetryEventSchemas)).not.toContain('tableName'); + expect(Object.keys(telemetryEventSchemas.sql_completed.shape)).not.toContain('sql'); + expect(JSON.stringify(telemetryEventSchemas)).not.toContain('path'); + }); +}); diff --git a/packages/cli/src/telemetry/events.ts b/packages/cli/src/telemetry/events.ts new file mode 100644 index 00000000..e73001ed --- /dev/null +++ b/packages/cli/src/telemetry/events.ts @@ -0,0 +1,387 @@ +import { arch, platform, release } from 'node:os'; +import { z } from 'zod'; + +const telemetryCommonEnvelopeSchema = z + .object({ + cliVersion: z.string(), + nodeVersion: z.string(), + osPlatform: z.string(), + osRelease: z.string(), + arch: z.string(), + runtime: z.enum(['node', 'daemon-py']), + isCi: z.boolean(), + }) + .strict(); + +const installFirstRunSchema = telemetryCommonEnvelopeSchema.strict(); + +const commandSchema = telemetryCommonEnvelopeSchema + .extend({ + commandPath: z.array(z.string()).min(1), + durationMs: z.number().nonnegative(), + outcome: z.enum(['ok', 'error', 'aborted']), + errorClass: z.string().optional(), + flagsPresent: z.record(z.string(), z.boolean()), + hasProject: z.boolean(), + projectGroupAttached: z.boolean(), + }) + .strict(); + +const outcomeSchema = z.enum(['ok', 'error']); + +const setupStepSchema = telemetryCommonEnvelopeSchema + .extend({ + step: z.enum([ + 'project', + 'runtime', + 'models', + 'embeddings', + 'secrets', + 'databases', + 'database-context-depth', + 'sources', + 'context', + 'agents', + 'demo-tour', + ]), + outcome: z.enum(['completed', 'skipped', 'abandoned']), + durationMs: z.number().nonnegative(), + }) + .strict(); + +const connectionAddedSchema = telemetryCommonEnvelopeSchema + .extend({ + driver: z.string(), + isDemoConnection: z.boolean(), + }) + .strict(); + +const connectionTestSchema = telemetryCommonEnvelopeSchema + .extend({ + driver: z.string(), + isDemoConnection: z.boolean(), + outcome: outcomeSchema, + errorClass: z.string().optional(), + durationMs: z.number().nonnegative(), + serverVersion: z.string().optional(), + }) + .strict(); + +const projectStackSnapshotSchema = telemetryCommonEnvelopeSchema + .extend({ + connectors: z.array(z.object({ driver: z.string(), isDemo: z.boolean() }).strict()), + connectionCount: z.number().int().nonnegative(), + hasSl: z.boolean(), + hasWiki: z.boolean(), + hasMcp: z.boolean(), + hasManagedRuntime: z.boolean(), + }) + .strict(); + +const rowsBucketSchema = z.enum(['<10k', '<100k', '<1M', '<10M', '>=10M']); + +const ingestCompletedSchema = telemetryCommonEnvelopeSchema + .extend({ + driver: z.string(), + isDemoConnection: z.boolean(), + schemaCount: z.number().int().nonnegative(), + tableCount: z.number().int().nonnegative(), + columnCount: z.number().int().nonnegative(), + rowsBucket: rowsBucketSchema, + durationMs: z.number().nonnegative(), + outcome: outcomeSchema, + errorClass: z.string().optional(), + }) + .strict(); + +const scanCompletedSchema = telemetryCommonEnvelopeSchema + .extend({ + driver: z.string(), + tableCount: z.number().int().nonnegative(), + columnCount: z.number().int().nonnegative(), + inferredFkCount: z.number().int().nonnegative(), + declaredFkCount: z.number().int().nonnegative(), + durationMs: z.number().nonnegative(), + outcome: outcomeSchema, + errorClass: z.string().optional(), + }) + .strict(); + +const slValidateCompletedSchema = telemetryCommonEnvelopeSchema + .extend({ + sourceCount: z.number().int().nonnegative(), + modelCount: z.number().int().nonnegative(), + validationErrorCount: z.number().int().nonnegative(), + outcome: outcomeSchema, + errorClass: z.string().optional(), + durationMs: z.number().nonnegative(), + }) + .strict(); + +const slQueryCompletedSchema = telemetryCommonEnvelopeSchema + .extend({ + mode: z.enum(['compile', 'execute']), + referencedSourceCount: z.number().int().nonnegative(), + referencedDimensionCount: z.number().int().nonnegative(), + referencedMeasureCount: z.number().int().nonnegative(), + durationMs: z.number().nonnegative(), + outcome: outcomeSchema, + errorClass: z.string().optional(), + }) + .strict(); + +const sqlCompletedSchema = telemetryCommonEnvelopeSchema + .extend({ + driver: z.string(), + isDemoConnection: z.boolean(), + queryVerb: z.enum(['select', 'explain', 'show', 'with', 'other']), + referencedTableCount: z.number().int().nonnegative(), + durationMs: z.number().nonnegative(), + outcome: outcomeSchema, + errorClass: z.string().optional(), + }) + .strict(); + +const wikiQueryCompletedSchema = telemetryCommonEnvelopeSchema + .extend({ + queryLength: z.number().int().nonnegative(), + resultCount: z.number().int().nonnegative(), + durationMs: z.number().nonnegative(), + outcome: outcomeSchema, + }) + .strict(); + +const mcpRequestCompletedSchema = telemetryCommonEnvelopeSchema + .extend({ + toolName: z.string(), + outcome: outcomeSchema, + durationMs: z.number().nonnegative(), + errorClass: z.string().optional(), + sampleRate: z.literal(0.1), + }) + .strict(); + +const daemonStartedSchema = telemetryCommonEnvelopeSchema + .extend({ + daemonVersion: z.string(), + pythonVersion: z.string(), + runtimeVersion: z.string(), + startupDurationMs: z.number().nonnegative(), + }) + .strict(); + +const daemonStoppedSchema = telemetryCommonEnvelopeSchema + .extend({ + reason: z.enum(['signal', 'request', 'crash']), + uptimeMs: z.number().nonnegative(), + }) + .strict(); + +const slPlanCompletedSchema = telemetryCommonEnvelopeSchema + .extend({ + outcome: z.enum(['ok', 'error']), + stage: z.enum(['parse', 'resolve', 'compile', 'transpile']), + errorClass: z.string().optional(), + durationMs: z.number().nonnegative(), + sourceCount: z.number().int().nonnegative(), + joinCount: z.number().int().nonnegative(), + }) + .strict(); + +const sqlGenCompletedSchema = telemetryCommonEnvelopeSchema + .extend({ + outcome: z.enum(['ok', 'error']), + dialect: z.string(), + errorClass: z.string().optional(), + durationMs: z.number().nonnegative(), + }) + .strict(); + +/** @internal */ +export const telemetryEventSchemas = { + install_first_run: installFirstRunSchema, + command: commandSchema, + setup_step: setupStepSchema, + connection_added: connectionAddedSchema, + connection_test: connectionTestSchema, + project_stack_snapshot: projectStackSnapshotSchema, + ingest_completed: ingestCompletedSchema, + scan_completed: scanCompletedSchema, + sl_validate_completed: slValidateCompletedSchema, + sl_query_completed: slQueryCompletedSchema, + sql_completed: sqlCompletedSchema, + wiki_query_completed: wikiQueryCompletedSchema, + mcp_request_completed: mcpRequestCompletedSchema, + daemon_started: daemonStartedSchema, + daemon_stopped: daemonStoppedSchema, + sl_plan_completed: slPlanCompletedSchema, + sql_gen_completed: sqlGenCompletedSchema, +} as const; + +/** @internal */ +export const telemetryEventCatalog = [ + { + name: 'install_first_run', + description: 'Emitted once when ~/.ktx/telemetry.json is created.', + fields: [], + }, + { + name: 'command', + description: 'Emitted once for each Commander action that reaches preAction.', + fields: [ + 'commandPath', + 'durationMs', + 'outcome', + 'errorClass', + 'flagsPresent', + 'hasProject', + 'projectGroupAttached', + ], + }, + { + name: 'setup_step', + description: 'Emitted after an interactive setup step completes, skips, or aborts.', + fields: ['step', 'outcome', 'durationMs'], + }, + { + name: 'connection_added', + description: 'Emitted when setup writes a database, source, or demo connection.', + fields: ['driver', 'isDemoConnection'], + }, + { + name: 'connection_test', + description: 'Emitted after ktx connection test completes.', + fields: ['driver', 'isDemoConnection', 'outcome', 'errorClass', 'durationMs', 'serverVersion'], + }, + { + name: 'project_stack_snapshot', + description: 'Emitted after commands that can summarize the local project stack.', + fields: ['connectors', 'connectionCount', 'hasSl', 'hasWiki', 'hasMcp', 'hasManagedRuntime'], + }, + { + name: 'ingest_completed', + description: 'Emitted after a public ingest target completes.', + fields: [ + 'driver', + 'isDemoConnection', + 'schemaCount', + 'tableCount', + 'columnCount', + 'rowsBucket', + 'durationMs', + 'outcome', + 'errorClass', + ], + }, + { + name: 'scan_completed', + description: 'Emitted after schema scan or relationship inference completes.', + fields: [ + 'driver', + 'tableCount', + 'columnCount', + 'inferredFkCount', + 'declaredFkCount', + 'durationMs', + 'outcome', + 'errorClass', + ], + }, + { + name: 'sl_validate_completed', + description: 'Emitted after ktx sl validate completes.', + fields: ['sourceCount', 'modelCount', 'validationErrorCount', 'outcome', 'errorClass', 'durationMs'], + }, + { + name: 'sl_query_completed', + description: 'Emitted after ktx sl query compiles or executes.', + fields: [ + 'mode', + 'referencedSourceCount', + 'referencedDimensionCount', + 'referencedMeasureCount', + 'durationMs', + 'outcome', + 'errorClass', + ], + }, + { + name: 'sql_completed', + description: 'Emitted after ktx sql completes validation and execution.', + fields: [ + 'driver', + 'isDemoConnection', + 'queryVerb', + 'referencedTableCount', + 'durationMs', + 'outcome', + 'errorClass', + ], + }, + { + name: 'wiki_query_completed', + description: 'Emitted after a wiki query completes.', + fields: ['queryLength', 'resultCount', 'durationMs', 'outcome'], + }, + { + name: 'mcp_request_completed', + description: 'Emitted for sampled MCP tool requests.', + fields: ['toolName', 'outcome', 'durationMs', 'errorClass', 'sampleRate'], + }, + { + name: 'daemon_started', + description: 'Emitted when the long-lived ktx-daemon HTTP server starts.', + fields: ['daemonVersion', 'pythonVersion', 'runtimeVersion', 'startupDurationMs'], + }, + { + name: 'daemon_stopped', + description: 'Emitted when the long-lived ktx-daemon HTTP server shuts down.', + fields: ['reason', 'uptimeMs'], + }, + { + name: 'sl_plan_completed', + description: 'Emitted after a daemon semantic-layer planning pass completes.', + fields: ['outcome', 'stage', 'errorClass', 'durationMs', 'sourceCount', 'joinCount'], + }, + { + name: 'sql_gen_completed', + description: 'Emitted after daemon SQL generation completes.', + fields: ['outcome', 'dialect', 'errorClass', 'durationMs'], + }, +] as const; + +export type TelemetryEventName = keyof typeof telemetryEventSchemas; +export type TelemetryCommonEnvelope = z.infer; + +export type TelemetryEventProperties = z.infer< + (typeof telemetryEventSchemas)[Name] +>; + +export interface BuiltTelemetryEvent { + name: Name; + properties: TelemetryEventProperties; +} + +export function buildCommonEnvelope(input: { cliVersion: string; isCi: boolean }): TelemetryCommonEnvelope { + return { + cliVersion: input.cliVersion, + nodeVersion: process.version, + osPlatform: platform(), + osRelease: release(), + arch: arch(), + runtime: 'node', + isCi: input.isCi, + }; +} + +export function buildTelemetryEvent( + name: Name, + envelope: TelemetryCommonEnvelope, + fields: Omit, keyof TelemetryCommonEnvelope>, +): BuiltTelemetryEvent { + const schema = telemetryEventSchemas[name]; + return { + name, + properties: schema.parse({ ...envelope, ...fields }) as TelemetryEventProperties, + }; +} diff --git a/packages/cli/src/telemetry/identity.test.ts b/packages/cli/src/telemetry/identity.test.ts new file mode 100644 index 00000000..06d76043 --- /dev/null +++ b/packages/cli/src/telemetry/identity.test.ts @@ -0,0 +1,209 @@ +import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join, resolve } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { + computeTelemetryProjectId, + loadTelemetryIdentity, + readExistingTelemetryProjectId, + TELEMETRY_NOTICE, + type TelemetryIdentityEnv, +} from './identity.js'; + +function makeIo(stdoutIsTTY = true) { + let stderr = ''; + return { + io: { + stdout: { isTTY: stdoutIsTTY, write: () => {} }, + stderr: { + write: (chunk: string) => { + stderr += chunk; + }, + }, + }, + stderr: () => stderr, + }; +} + +describe('telemetry identity', () => { + let homeDir: string; + let env: TelemetryIdentityEnv; + + beforeEach(async () => { + homeDir = await mkdtemp(join(tmpdir(), 'ktx-telemetry-home-')); + env = {}; + }); + + afterEach(async () => { + await rm(homeDir, { recursive: true, force: true }); + }); + + it('creates the telemetry file and one-line notice on first interactive enabled load', async () => { + const testIo = makeIo(true); + + const identity = await loadTelemetryIdentity({ + homeDir, + env, + stdoutIsTTY: true, + stderr: testIo.io.stderr, + now: () => new Date('2026-05-22T14:33:02.000Z'), + }); + + expect(identity.enabled).toBe(true); + expect(identity.installId).toMatch(/^[0-9a-f-]{36}$/); + expect(identity.createdFile).toBe(true); + expect(identity.noticeShown).toBe(true); + expect(testIo.stderr()).toBe(`${TELEMETRY_NOTICE}\n`); + + const stored = JSON.parse(await readFile(join(homeDir, '.ktx', 'telemetry.json'), 'utf-8')) as { + enabled: boolean; + noticeShownVersion: number; + }; + expect(stored.enabled).toBe(true); + expect(stored.noticeShownVersion).toBe(1); + }); + + it('emits the notice without ANSI when NO_COLOR is set', async () => { + const testIo = makeIo(true); + + await loadTelemetryIdentity({ + homeDir, + env: { NO_COLOR: '1' }, + stdoutIsTTY: true, + stderr: testIo.io.stderr, + now: () => new Date('2026-05-22T14:33:02.000Z'), + }); + + expect(testIo.stderr()).toBe(`${TELEMETRY_NOTICE}\n`); + }); + + it('does not create a file when env disables telemetry', async () => { + const identity = await loadTelemetryIdentity({ + homeDir, + env: { KTX_TELEMETRY_DISABLED: '1' }, + stdoutIsTTY: true, + stderr: makeIo(true).io.stderr, + now: () => new Date('2026-05-22T14:33:02.000Z'), + }); + + expect(identity.enabled).toBe(false); + await expect(readFile(join(homeDir, '.ktx', 'telemetry.json'), 'utf-8')).rejects.toThrow(); + }); + + it('does not create a file for CI or non-TTY command invocations', async () => { + await expect( + loadTelemetryIdentity({ + homeDir, + env: { CI: '1' }, + stdoutIsTTY: true, + stderr: makeIo(true).io.stderr, + now: () => new Date('2026-05-22T14:33:02.000Z'), + }), + ).resolves.toMatchObject({ enabled: false, createdFile: false }); + + await expect( + loadTelemetryIdentity({ + homeDir, + env: {}, + stdoutIsTTY: false, + stderr: makeIo(false).io.stderr, + now: () => new Date('2026-05-22T14:33:02.000Z'), + }), + ).resolves.toMatchObject({ enabled: false, createdFile: false }); + }); + + it('honors persistent enabled false', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + join(homeDir, '.ktx', 'telemetry.json'), + JSON.stringify( + { + installId: '00000000-0000-4000-8000-000000000000', + enabled: false, + noticeShownAt: '2026-05-22T14:33:02.000Z', + noticeShownVersion: 1, + createdAt: '2026-05-22T14:33:02.000Z', + }, + null, + 2, + ) + '\n', + 'utf-8', + ); + + await expect( + loadTelemetryIdentity({ + homeDir, + env, + stdoutIsTTY: true, + stderr: makeIo(true).io.stderr, + now: () => new Date('2026-05-22T15:00:00.000Z'), + }), + ).resolves.toMatchObject({ + installId: '00000000-0000-4000-8000-000000000000', + enabled: false, + createdFile: false, + }); + }); + + it('recreates a corrupted file instead of surfacing an error to users', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile(join(homeDir, '.ktx', 'telemetry.json'), '{bad json', 'utf-8'); + + const identity = await loadTelemetryIdentity({ + homeDir, + env, + stdoutIsTTY: true, + stderr: makeIo(true).io.stderr, + now: () => new Date('2026-05-22T14:33:02.000Z'), + }); + + expect(identity.enabled).toBe(true); + expect(identity.createdFile).toBe(true); + }); + + it('derives a salted project hash without exposing the path', () => { + const projectDir = resolve('/tmp/acme-private-project'); + const projectId = computeTelemetryProjectId('00000000-0000-4000-8000-000000000000', projectDir); + + expect(projectId).toMatch(/^[a-f0-9]{64}$/); + expect(projectId).not.toContain('acme'); + expect(computeTelemetryProjectId('00000000-0000-4000-8000-000000000000', projectDir)).toBe(projectId); + expect(computeTelemetryProjectId('11111111-1111-4111-8111-111111111111', projectDir)).not.toBe(projectId); + }); + + it('reads an existing project id for Python telemetry without creating identity', async () => { + await mkdir(join(homeDir, '.ktx'), { recursive: true }); + await writeFile( + join(homeDir, '.ktx', 'telemetry.json'), + JSON.stringify( + { + installId: '00000000-0000-4000-8000-000000000000', + enabled: true, + noticeShownAt: '2026-05-22T14:33:02.000Z', + noticeShownVersion: 1, + createdAt: '2026-05-22T14:33:02.000Z', + }, + null, + 2, + ) + '\n', + 'utf-8', + ); + + await expect( + readExistingTelemetryProjectId({ + homeDir, + projectDir: '/tmp/acme-private-project', + env: {}, + }), + ).resolves.toMatch(/^[a-f0-9]{64}$/); + + await expect( + readExistingTelemetryProjectId({ + homeDir, + projectDir: '/tmp/acme-private-project', + env: { KTX_TELEMETRY_DISABLED: '1' }, + }), + ).resolves.toBeUndefined(); + }); +}); diff --git a/packages/cli/src/telemetry/identity.ts b/packages/cli/src/telemetry/identity.ts new file mode 100644 index 00000000..4d46307c --- /dev/null +++ b/packages/cli/src/telemetry/identity.ts @@ -0,0 +1,151 @@ +import { createHash, randomUUID } from 'node:crypto'; +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { homedir } from 'node:os'; +import { dirname, join, resolve } from 'node:path'; +import { z } from 'zod'; + +/** @internal */ +export const TELEMETRY_NOTICE = + 'ktx collects anonymous usage data to improve the product. Opt out: set KTX_TELEMETRY_DISABLED=1.'; + +const NOTICE_VERSION = 1; + +const telemetryFileSchema = z + .object({ + installId: z.uuid(), + enabled: z.boolean(), + noticeShownAt: z.string().optional(), + noticeShownVersion: z.number().int().optional(), + createdAt: z.string(), + }) + .strict(); + +/** @internal */ +export interface TelemetryIdentityEnv { + KTX_TELEMETRY_DISABLED?: string; + DO_NOT_TRACK?: string; + CI?: string; + NO_COLOR?: string; + TERM?: string; +} + +function styleNotice(notice: string, env: TelemetryIdentityEnv): string { + if (env.NO_COLOR || env.TERM === 'dumb') return notice; + return `${notice}`; +} + +export interface LoadTelemetryIdentityOptions { + homeDir?: string; + env?: TelemetryIdentityEnv; + stdoutIsTTY: boolean; + stderr: { write(chunk: string): void }; + now?: () => Date; +} + +export interface TelemetryIdentityState { + installId?: string; + enabled: boolean; + createdFile: boolean; + noticeShown: boolean; + path: string; +} + +function telemetryPath(homeDir: string): string { + return join(homeDir, '.ktx', 'telemetry.json'); +} + +function envDisablesTelemetry(env: TelemetryIdentityEnv): boolean { + return Boolean(env.KTX_TELEMETRY_DISABLED || env.DO_NOT_TRACK || env.CI); +} + +async function readTelemetryFile(path: string): Promise | null> { + try { + return telemetryFileSchema.parse(JSON.parse(await readFile(path, 'utf-8'))); + } catch { + return null; + } +} + +async function writeTelemetryFile(path: string, value: z.infer): Promise { + await mkdir(dirname(path), { recursive: true }); + await writeFile(path, `${JSON.stringify(value, null, 2)}\n`, 'utf-8'); +} + +export async function loadTelemetryIdentity(options: LoadTelemetryIdentityOptions): Promise { + const env = options.env ?? process.env; + const path = telemetryPath(options.homeDir ?? homedir()); + + if (envDisablesTelemetry(env) || options.stdoutIsTTY !== true) { + const existing = await readTelemetryFile(path); + return { + installId: existing?.installId, + enabled: false, + createdFile: false, + noticeShown: false, + path, + }; + } + + const existing = await readTelemetryFile(path); + if (existing) { + return { + installId: existing.installId, + enabled: existing.enabled, + createdFile: false, + noticeShown: false, + path, + }; + } + + const timestamp = (options.now ?? (() => new Date()))().toISOString(); + const next = { + installId: randomUUID(), + enabled: true, + noticeShownAt: timestamp, + noticeShownVersion: NOTICE_VERSION, + createdAt: timestamp, + }; + + try { + await writeTelemetryFile(path, next); + } catch { + return { + enabled: false, + createdFile: false, + noticeShown: false, + path, + }; + } + + options.stderr.write(`${styleNotice(TELEMETRY_NOTICE, env)}\n`); + + return { + installId: next.installId, + enabled: true, + createdFile: true, + noticeShown: true, + path, + }; +} + +export function computeTelemetryProjectId(installId: string, projectDir: string): string { + return createHash('sha256').update(`${installId}:${resolve(projectDir)}`).digest('hex'); +} + +export async function readExistingTelemetryProjectId(options: { + projectDir: string; + homeDir?: string; + env?: Pick; +}): Promise { + const env = options.env ?? process.env; + if (env.KTX_TELEMETRY_DISABLED || env.DO_NOT_TRACK) { + return undefined; + } + + const existing = await readTelemetryFile(telemetryPath(options.homeDir ?? homedir())); + if (!existing?.enabled) { + return undefined; + } + + return computeTelemetryProjectId(existing.installId, options.projectDir); +} diff --git a/packages/cli/src/telemetry/index.ts b/packages/cli/src/telemetry/index.ts new file mode 100644 index 00000000..10637a3d --- /dev/null +++ b/packages/cli/src/telemetry/index.ts @@ -0,0 +1,146 @@ +import { getKtxCliPackageInfo, type KtxCliIo, type KtxCliPackageInfo } from '../cli-runtime.js'; +import { loadKtxProject } from '../context/project/project.js'; +import { + beginCommandSpan, + completeCommandSpan, + type CommandOutcome, + type CompletedCommandSpan, +} from './command-hook.js'; +import { shutdownTelemetryEmitter, trackTelemetryEvent } from './emitter.js'; +import { + buildCommonEnvelope, + buildTelemetryEvent, + type TelemetryCommonEnvelope, + type TelemetryEventName, + type TelemetryEventProperties, +} from './events.js'; +import { computeTelemetryProjectId, loadTelemetryIdentity } from './identity.js'; +import { buildProjectStackSnapshotFields } from './project-snapshot.js'; + +export { beginCommandSpan, completeCommandSpan, shutdownTelemetryEmitter }; +export type { CommandOutcome, CompletedCommandSpan }; + +export async function showTelemetryNoticeIfNeeded(io: KtxCliIo, packageInfo: KtxCliPackageInfo): Promise { + const identity = await loadTelemetryIdentity({ + stdoutIsTTY: io.stdout.isTTY === true, + stderr: io.stderr, + env: process.env, + }); + + if (!identity.enabled || !identity.createdFile || !identity.installId) { + return; + } + + await trackTelemetryEvent({ + event: buildTelemetryEvent( + 'install_first_run', + buildCommonEnvelope({ + cliVersion: packageInfo.version, + isCi: Boolean(process.env.CI), + }), + {}, + ), + distinctId: identity.installId, + env: process.env, + stderr: io.stderr, + }); +} + +type TelemetryEventFields = Omit< + TelemetryEventProperties, + keyof TelemetryCommonEnvelope +>; + +const emittedProjectSnapshots = new Set(); +const MCP_SAMPLE_RATE = 0.1 as const; +let mcpSampled: boolean | undefined; + +export function shouldEmitMcpTelemetry(): boolean { + mcpSampled ??= Math.random() < MCP_SAMPLE_RATE; + return mcpSampled; +} + +export function mcpTelemetrySampleRate(): 0.1 { + return MCP_SAMPLE_RATE; +} + +export async function emitTelemetryEvent(input: { + name: Name; + fields: TelemetryEventFields; + io: KtxCliIo; + packageInfo?: KtxCliPackageInfo; + projectDir?: string; +}): Promise { + const identity = await loadTelemetryIdentity({ + stdoutIsTTY: input.io.stdout.isTTY === true, + stderr: input.io.stderr, + env: process.env, + }); + + if (!identity.enabled || !identity.installId) { + return; + } + + const packageInfo = input.packageInfo ?? getKtxCliPackageInfo(); + + const projectId = input.projectDir ? computeTelemetryProjectId(identity.installId, input.projectDir) : undefined; + await trackTelemetryEvent({ + event: buildTelemetryEvent( + input.name, + buildCommonEnvelope({ + cliVersion: packageInfo.version, + isCi: Boolean(process.env.CI), + }), + input.fields, + ), + distinctId: identity.installId, + projectId, + env: process.env, + stderr: input.io.stderr, + }); +} + +export async function emitProjectStackSnapshot(input: { + projectDir: string; + io: KtxCliIo; + packageInfo?: KtxCliPackageInfo; +}): Promise { + if (emittedProjectSnapshots.has(input.projectDir)) { + return; + } + emittedProjectSnapshots.add(input.projectDir); + + let project: Awaited>; + try { + project = await loadKtxProject({ projectDir: input.projectDir }); + } catch { + return; + } + await emitTelemetryEvent({ + name: 'project_stack_snapshot', + fields: await buildProjectStackSnapshotFields(project), + projectDir: input.projectDir, + io: input.io, + packageInfo: input.packageInfo, + }); +} + +export async function emitCompletedCommand(input: { + completed: CompletedCommandSpan | undefined; + packageInfo: KtxCliPackageInfo; + io: KtxCliIo; +}): Promise { + if (!input.completed) { + return; + } + + const projectDir = input.completed.projectGroupAttached ? input.completed.projectDir : undefined; + const { projectDir: _projectDir, ...eventFields } = input.completed; + await emitTelemetryEvent({ + name: 'command', + fields: eventFields, + projectDir, + io: input.io, + packageInfo: input.packageInfo, + }); +} diff --git a/packages/cli/src/telemetry/project-snapshot.test.ts b/packages/cli/src/telemetry/project-snapshot.test.ts new file mode 100644 index 00000000..daf4e766 --- /dev/null +++ b/packages/cli/src/telemetry/project-snapshot.test.ts @@ -0,0 +1,77 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { buildProjectStackSnapshotFields } from './project-snapshot.js'; + +describe('buildProjectStackSnapshotFields', () => { + let projectDir: string; + + beforeEach(async () => { + projectDir = await mkdtemp(join(tmpdir(), 'ktx-stack-snapshot-')); + }); + + afterEach(async () => { + await rm(projectDir, { recursive: true, force: true }); + }); + + it('summarizes connectors and project capabilities without names or paths', async () => { + await mkdir(join(projectDir, 'semantic-layer', 'warehouse'), { recursive: true }); + await mkdir(join(projectDir, 'wiki', 'global'), { recursive: true }); + await writeFile(join(projectDir, 'semantic-layer', 'warehouse', 'orders.yaml'), 'name: orders\n'); + await writeFile(join(projectDir, 'wiki', 'global', 'revenue.md'), '# Revenue\n'); + await writeFile(join(projectDir, '.mcp.json'), '{"mcpServers":{"ktx":{}}}\n'); + + const fields = await buildProjectStackSnapshotFields({ + projectDir, + config: { + connections: { + orbit_demo: { driver: 'sqlite', path: join(projectDir, 'demo.db') }, + warehouse: { driver: 'postgres', readonly: true }, + }, + ingest: { + adapters: [], + embeddings: { backend: 'sentence-transformers', dimensions: 384 }, + workUnits: { stepBudget: 40, maxConcurrency: 1, failureMode: 'continue' }, + }, + llm: { provider: { backend: 'none' }, models: {}, promptCaching: {} }, + scan: { + enrichment: { mode: 'none' }, + relationships: { + enabled: true, + llmProposals: true, + validationRequiredForManifest: true, + acceptThreshold: 0.85, + reviewThreshold: 0.55, + maxLlmTablesPerBatch: 40, + maxCandidatesPerColumn: 25, + profileSampleRows: 10000, + validationConcurrency: 4, + }, + }, + storage: { + state: 'sqlite', + search: 'sqlite-fts5', + git: { auto_commit: true, author: 'ktx ' }, + }, + agent: { run_research: { enabled: false, max_iterations: 20, default_toolset: [] } }, + memory: { auto_commit: true }, + }, + }); + + expect(fields).toEqual({ + connectors: [ + { driver: 'sqlite', isDemo: true }, + { driver: 'postgres', isDemo: false }, + ], + connectionCount: 2, + hasSl: true, + hasWiki: true, + hasMcp: true, + hasManagedRuntime: true, + }); + expect(JSON.stringify(fields)).not.toContain(projectDir); + expect(JSON.stringify(fields)).not.toContain('warehouse'); + }); +}); diff --git a/packages/cli/src/telemetry/project-snapshot.ts b/packages/cli/src/telemetry/project-snapshot.ts new file mode 100644 index 00000000..583c3910 --- /dev/null +++ b/packages/cli/src/telemetry/project-snapshot.ts @@ -0,0 +1,67 @@ +import { readdir } from 'node:fs/promises'; +import { join } from 'node:path'; +import type { KtxProjectConfig } from '../context/project/config.js'; +import { resolveProjectRuntimeRequirements } from '../runtime-requirements.js'; +import { isDemoConnection } from './demo-detect.js'; + +async function hasFileWithExtension(dir: string, extensions: Set): Promise { + let entries; + try { + entries = await readdir(dir, { withFileTypes: true }); + } catch { + return false; + } + + for (const entry of entries) { + const path = join(dir, entry.name); + if (entry.isDirectory() && (await hasFileWithExtension(path, extensions))) { + return true; + } + if (entry.isFile() && extensions.has(entry.name.slice(entry.name.lastIndexOf('.')))) { + return true; + } + } + return false; +} + +async function hasFileNamed(dir: string, filenames: Set): Promise { + let entries; + try { + entries = await readdir(dir, { withFileTypes: true }); + } catch { + return false; + } + + return entries.some((entry) => entry.isFile() && filenames.has(entry.name)); +} + +async function hasMcpConfig(projectDir: string): Promise { + return ( + (await hasFileWithExtension(join(projectDir, '.ktx'), new Set(['.json']))) || + (await hasFileWithExtension(join(projectDir, '.cursor'), new Set(['.json']))) || + (await hasFileNamed(projectDir, new Set(['.mcp.json']))) + ); +} + +export async function buildProjectStackSnapshotFields(input: { + projectDir: string; + config: KtxProjectConfig; +}) { + const connectors = Object.entries(input.config.connections).map(([connectionId, connection]) => ({ + driver: String(connection.driver ?? 'unknown').trim().toLowerCase() || 'unknown', + isDemo: isDemoConnection(connectionId, connection), + })); + + const runtimeRequirements = resolveProjectRuntimeRequirements(input.config, { + env: process.env, + }); + + return { + connectors, + connectionCount: connectors.length, + hasSl: await hasFileWithExtension(join(input.projectDir, 'semantic-layer'), new Set(['.yaml', '.yml'])), + hasWiki: await hasFileWithExtension(join(input.projectDir, 'wiki'), new Set(['.md', '.mdx'])), + hasMcp: await hasMcpConfig(input.projectDir), + hasManagedRuntime: runtimeRequirements.features.length > 0, + }; +} diff --git a/packages/cli/src/telemetry/schema-writer.test.ts b/packages/cli/src/telemetry/schema-writer.test.ts new file mode 100644 index 00000000..a6539421 --- /dev/null +++ b/packages/cli/src/telemetry/schema-writer.test.ts @@ -0,0 +1,26 @@ +import { describe, expect, it } from 'vitest'; + +import { buildTelemetrySchemaArtifact } from './schema-writer.js'; + +describe('telemetry schema writer', () => { + it('exports a schema artifact with the full catalog and strict metadata', () => { + const artifact = buildTelemetrySchemaArtifact(); + + expect(artifact.$schema).toBe('https://json-schema.org/draft/2020-12/schema'); + expect(artifact['x-ktx-common-fields']).toEqual([ + 'cliVersion', + 'nodeVersion', + 'osPlatform', + 'osRelease', + 'arch', + 'runtime', + 'isCi', + ]); + expect(artifact['x-ktx-catalog'].map((event) => event.name)).toContain('daemon_started'); + expect(artifact['x-ktx-catalog'].map((event) => event.name)).toContain('sql_gen_completed'); + expect(artifact.$defs.sql_gen_completed).toMatchObject({ + type: 'object', + additionalProperties: false, + }); + }); +}); diff --git a/packages/cli/src/telemetry/schema-writer.ts b/packages/cli/src/telemetry/schema-writer.ts new file mode 100644 index 00000000..5921b4d9 --- /dev/null +++ b/packages/cli/src/telemetry/schema-writer.ts @@ -0,0 +1,63 @@ +import { mkdir, writeFile } from 'node:fs/promises'; +import { dirname, resolve } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; +import { z } from 'zod'; + +import { telemetryEventCatalog, telemetryEventSchemas } from './events.js'; + +const commonFields = ['cliVersion', 'nodeVersion', 'osPlatform', 'osRelease', 'arch', 'runtime', 'isCi'] as const; + +export interface TelemetrySchemaArtifact { + $schema: 'https://json-schema.org/draft/2020-12/schema'; + title: 'ktx telemetry events'; + type: 'object'; + additionalProperties: false; + 'x-ktx-common-fields': string[]; + 'x-ktx-catalog': Array<{ name: string; description: string; fields: readonly string[] }>; + $defs: Record; +} + +/** @internal */ +export function buildTelemetrySchemaArtifact(): TelemetrySchemaArtifact { + return { + $schema: 'https://json-schema.org/draft/2020-12/schema', + title: 'ktx telemetry events', + type: 'object', + additionalProperties: false, + 'x-ktx-common-fields': [...commonFields], + 'x-ktx-catalog': telemetryEventCatalog.map((event) => ({ + name: event.name, + description: event.description, + fields: event.fields, + })), + $defs: Object.fromEntries( + Object.entries(telemetryEventSchemas).map(([name, schema]) => [ + name, + z.toJSONSchema(schema, { target: 'draft-2020-12' }), + ]), + ), + }; +} + +async function writeTelemetrySchemaArtifact(path: string): Promise { + const target = resolve(path); + await mkdir(dirname(target), { recursive: true }); + await writeFile(target, `${JSON.stringify(buildTelemetrySchemaArtifact(), null, 2)}\n`, 'utf-8'); +} + +async function main(argv: string[]): Promise { + const targets = argv.slice(2); + if (targets.length === 0) { + throw new Error('Usage: node dist/telemetry/schema-writer.js [target...]'); + } + for (const target of targets) { + await writeTelemetrySchemaArtifact(target); + } +} + +if (import.meta.url === pathToFileURL(fileURLToPath(import.meta.url)).href && process.argv[1]) { + const invoked = pathToFileURL(resolve(process.argv[1])).href; + if (import.meta.url === invoked) { + await main(process.argv); + } +} diff --git a/packages/cli/src/telemetry/scrubber.test.ts b/packages/cli/src/telemetry/scrubber.test.ts new file mode 100644 index 00000000..87eb74d4 --- /dev/null +++ b/packages/cli/src/telemetry/scrubber.test.ts @@ -0,0 +1,25 @@ +import { describe, expect, it } from 'vitest'; + +import { scrubErrorClass } from './scrubber.js'; + +class KtxProjectMissingAbortError extends Error {} + +describe('scrubErrorClass', () => { + it('keeps normal JavaScript class names', () => { + expect(scrubErrorClass(new KtxProjectMissingAbortError('missing'))).toBe('KtxProjectMissingAbortError'); + }); + + it('drops path-like, URL-like, email-like, and long values', () => { + expect(scrubErrorClass({ constructor: { name: '/Users/alice/project' } })).toBeUndefined(); + expect(scrubErrorClass({ constructor: { name: 'https://example.test/error' } })).toBeUndefined(); + expect(scrubErrorClass({ constructor: { name: 'alice@example.test' } })).toBeUndefined(); + expect(scrubErrorClass({ constructor: { name: 'A'.repeat(81) } })).toBeUndefined(); + }); + + it('drops lowercase, spaced, and non-error-like values', () => { + expect(scrubErrorClass({ constructor: { name: 'lowercaseError' } })).toBeUndefined(); + expect(scrubErrorClass({ constructor: { name: 'Bad Error' } })).toBeUndefined(); + expect(scrubErrorClass('plain string')).toBeUndefined(); + expect(scrubErrorClass(null)).toBeUndefined(); + }); +}); diff --git a/packages/cli/src/telemetry/scrubber.ts b/packages/cli/src/telemetry/scrubber.ts new file mode 100644 index 00000000..27e41f87 --- /dev/null +++ b/packages/cli/src/telemetry/scrubber.ts @@ -0,0 +1,28 @@ +const MAX_ERROR_CLASS_LENGTH = 80; +const ERROR_CLASS_PATTERN = /^[A-Z][A-Za-z0-9_]*$/; +const PRIVATE_STRING_MARKERS = ['/', '\\', '@', '://']; + +export function scrubErrorClass(error: unknown): string | undefined { + if (typeof error !== 'object' || error === null) { + return undefined; + } + + const constructorName = (error as { constructor?: { name?: unknown } }).constructor?.name; + if (typeof constructorName !== 'string') { + return undefined; + } + + if (constructorName.length > MAX_ERROR_CLASS_LENGTH) { + return undefined; + } + + if (PRIVATE_STRING_MARKERS.some((marker) => constructorName.includes(marker))) { + return undefined; + } + + if (!ERROR_CLASS_PATTERN.test(constructorName)) { + return undefined; + } + + return constructorName; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d513e057..de0d2c24 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -194,6 +194,9 @@ importers: pg: specifier: ^8.20.0 version: 8.20.0 + posthog-node: + specifier: ^5.0.0 + version: 5.0.0 react: specifier: ^19.2.6 version: 19.2.6 @@ -4944,6 +4947,10 @@ packages: resolution: {integrity: sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==} engines: {node: '>=0.10.0'} + posthog-node@5.0.0: + resolution: {integrity: sha512-gontigBt1pGHGXZme3+ojDdCYL66h/vvo+6KaQ6A51xqUOYgRvyzCLkS9Xv816jNBesRO8ouRjG428SDb2fFkg==} + engines: {node: '>=20'} + prebuild-install@7.1.3: resolution: {integrity: sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==} engines: {node: '>=10'} @@ -11215,6 +11222,8 @@ snapshots: dependencies: xtend: 4.0.2 + posthog-node@5.0.0: {} + prebuild-install@7.1.3: dependencies: detect-libc: 2.1.2 diff --git a/python/ktx-daemon/pyproject.toml b/python/ktx-daemon/pyproject.toml index 8cb78f36..a072a5fe 100644 --- a/python/ktx-daemon/pyproject.toml +++ b/python/ktx-daemon/pyproject.toml @@ -12,6 +12,7 @@ dependencies = [ "numpy>=2.2.6", "orjson>=3.11.4", "pandas>=2.2.3", + "posthog>=7.0.0", "psycopg[binary]>=3.2.0", "pydantic>=2.9.0", "requests>=2.32.0", diff --git a/python/ktx-daemon/src/ktx_daemon/__main__.py b/python/ktx-daemon/src/ktx_daemon/__main__.py index e7ae779a..2fc00186 100644 --- a/python/ktx-daemon/src/ktx_daemon/__main__.py +++ b/python/ktx-daemon/src/ktx_daemon/__main__.py @@ -5,6 +5,7 @@ from __future__ import annotations import argparse import json import sys +import time from typing import Any from pydantic import ValidationError @@ -100,8 +101,12 @@ def run_http_server( from ktx_daemon.app import create_app + started_at = time.perf_counter() uvicorn.run( - create_app(enable_code_execution=enable_code_execution), + create_app( + enable_code_execution=enable_code_execution, + telemetry_started_at=started_at, + ), host=host, port=port, log_level=log_level, diff --git a/python/ktx-daemon/src/ktx_daemon/app.py b/python/ktx-daemon/src/ktx_daemon/app.py index 3208264c..7a3fa950 100644 --- a/python/ktx-daemon/src/ktx_daemon/app.py +++ b/python/ktx-daemon/src/ktx_daemon/app.py @@ -4,6 +4,9 @@ from __future__ import annotations import logging import os +import sys +import time +from contextlib import asynccontextmanager from collections.abc import Callable from typing import Any @@ -62,6 +65,7 @@ from ktx_daemon.table_identifier import ( ParseTableIdentifierBatchResponse, parse_table_identifier_response, ) +from ktx_daemon.telemetry import track_telemetry_event logger = logging.getLogger(__name__) @@ -81,11 +85,38 @@ def create_app( ] | None = None, enable_code_execution: bool = False, + telemetry_started_at: float | None = None, + clock: Callable[[], float] = time.perf_counter, ) -> FastAPI: + started_at = telemetry_started_at or clock() + + @asynccontextmanager + async def lifespan(_: FastAPI): + track_telemetry_event( + "daemon_started", + { + "daemonVersion": os.environ.get("KTX_DAEMON_VERSION", VERSION), + "pythonVersion": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", + "runtimeVersion": VERSION, + "startupDurationMs": max(0, (clock() - started_at) * 1000), + }, + ) + try: + yield + finally: + track_telemetry_event( + "daemon_stopped", + { + "reason": "request", + "uptimeMs": max(0, (clock() - started_at) * 1000), + }, + ) + app = FastAPI( title="KTX Daemon", description="Stateless portable compute server for KTX.", version=VERSION, + lifespan=lifespan, ) @app.get("/health") diff --git a/python/ktx-daemon/src/ktx_daemon/semantic_layer.py b/python/ktx-daemon/src/ktx_daemon/semantic_layer.py index d6c15cd0..e813575e 100644 --- a/python/ktx-daemon/src/ktx_daemon/semantic_layer.py +++ b/python/ktx-daemon/src/ktx_daemon/semantic_layer.py @@ -2,18 +2,23 @@ from __future__ import annotations +import time from typing import Any -from pydantic import BaseModel, Field +from ktx_daemon.telemetry import error_class, track_telemetry_event +from pydantic import BaseModel, ConfigDict, Field from semantic_layer.duplicate_check import validate_measure_duplicates from semantic_layer.engine import SemanticEngine from semantic_layer.models import QueryResult, SourceDefinition class SemanticLayerQueryRequest(BaseModel): + model_config = ConfigDict(populate_by_name=True) + sources: list[dict[str, Any]] query: dict[str, Any] dialect: str = "postgres" + project_id: str | None = Field(default=None, alias="projectId") class SemanticLayerQueryResponse(BaseModel): @@ -79,15 +84,73 @@ def _response_columns(result: QueryResult) -> list[dict[str, Any]]: def query_semantic_layer( request: SemanticLayerQueryRequest, ) -> SemanticLayerQueryResponse: - sources = _load_sources(request.sources) - engine = SemanticEngine.from_sources(sources, dialect=request.dialect) - result = engine.query(request.query) - return SemanticLayerQueryResponse( - sql=result.sql, - dialect=result.dialect, - columns=_response_columns(result), - plan=result.resolved_plan.model_dump(mode="json"), - ) + started = time.perf_counter() + stage = "parse" + source_count = 0 + join_count = 0 + sql_started = started + try: + sources = _load_sources(request.sources) + source_count = len(sources) + join_count = sum(len(source.joins) for source in sources.values()) + stage = "resolve" + engine = SemanticEngine.from_sources(sources, dialect=request.dialect) + stage = "compile" + sql_started = time.perf_counter() + result = engine.query(request.query) + stage = "transpile" + track_telemetry_event( + "sl_plan_completed", + { + "outcome": "ok", + "stage": stage, + "durationMs": max(0, (time.perf_counter() - started) * 1000), + "sourceCount": source_count, + "joinCount": join_count, + }, + project_id=request.project_id, + ) + track_telemetry_event( + "sql_gen_completed", + { + "outcome": "ok", + "dialect": result.dialect, + "durationMs": max(0, (time.perf_counter() - sql_started) * 1000), + }, + project_id=request.project_id, + ) + return SemanticLayerQueryResponse( + sql=result.sql, + dialect=result.dialect, + columns=_response_columns(result), + plan=result.resolved_plan.model_dump(mode="json"), + ) + except Exception as error: + klass = error_class(error) + fields: dict[str, Any] = { + "outcome": "error", + "stage": stage, + "durationMs": max(0, (time.perf_counter() - started) * 1000), + "sourceCount": source_count, + "joinCount": join_count, + } + if klass: + fields["errorClass"] = klass + track_telemetry_event( + "sl_plan_completed", fields, project_id=request.project_id + ) + if stage in {"compile", "transpile"}: + sql_fields: dict[str, Any] = { + "outcome": "error", + "dialect": request.dialect, + "durationMs": max(0, (time.perf_counter() - sql_started) * 1000), + } + if klass: + sql_fields["errorClass"] = klass + track_telemetry_event( + "sql_gen_completed", sql_fields, project_id=request.project_id + ) + raise def validate_semantic_layer(request: ValidateSourcesRequest) -> ValidateSourcesResponse: diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py b/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py new file mode 100644 index 00000000..ff9cd07f --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from ktx_daemon.telemetry.emitter import error_class, track_telemetry_event + +__all__ = ["error_class", "track_telemetry_event"] diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/emitter.py b/python/ktx-daemon/src/ktx_daemon/telemetry/emitter.py new file mode 100644 index 00000000..081b7e8f --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/emitter.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import json +import os +import sys +from pathlib import Path +from typing import Any +from collections.abc import Mapping + +from ktx_daemon.telemetry.events import build_telemetry_event +from ktx_daemon.telemetry.identity import load_telemetry_identity + +# PostHog public project ingestion key - safe to embed; capture-only, no read access. +POSTHOG_PROJECT_API_KEY = ( + "phc_xbvZpbu8ZNLnogTbY7MEMWhCF2rzzApYsDndjKaRBXXx" # pragma: allowlist secret +) +POSTHOG_HOST = "https://us.i.posthog.com" + + +def _host(env: Mapping[str, str]) -> str: + return env.get("KTX_TELEMETRY_ENDPOINT") or POSTHOG_HOST + + +def _live_configured(host: str) -> bool: + return bool(POSTHOG_PROJECT_API_KEY.strip() and host.strip()) + + +def _debug_enabled(env: Mapping[str, str]) -> bool: + return env.get("KTX_TELEMETRY_DEBUG") == "1" + + +def _scrub_error_class(error: BaseException) -> str | None: + name = type(error).__name__ + if len(name) > 80: + return None + if any(marker in name for marker in ("/", "\\", "@", "://")): + return None + if not name[:1].isupper() or not name.replace("_", "").isalnum(): + return None + return name + + +def error_class(error: BaseException) -> str | None: + return _scrub_error_class(error) + + +def track_telemetry_event( + name: str, + fields: dict[str, Any], + *, + project_id: str | None = None, + home_dir: Path | None = None, + env: Mapping[str, str] | None = None, +) -> None: + source_env = env or os.environ + identity = load_telemetry_identity(home_dir=home_dir, env=source_env) + if not identity.enabled or not identity.install_id: + return + + try: + event = build_telemetry_event(name, fields) + except ValueError: + return + + groups = {"project": project_id} if project_id else None + + if _debug_enabled(source_env): + sys.stderr.write( + "[telemetry] " + + json.dumps( + { + "distinctId": identity.install_id, + "event": event["event"], + "properties": event["properties"], + "groups": groups, + }, + sort_keys=True, + ) + + "\n" + ) + return + + host = _host(source_env) + if not _live_configured(host): + return + + try: + from posthog import Posthog + + client = Posthog( + POSTHOG_PROJECT_API_KEY, + host=host, + flush_at=1, + flush_interval=0, + sync_mode=True, + timeout=1, + ) + client.capture( + event=event["event"], + distinct_id=identity.install_id, + properties=event["properties"], + groups=groups, + ) + client.shutdown() + except Exception: + return diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/events.py b/python/ktx-daemon/src/ktx_daemon/telemetry/events.py new file mode 100644 index 00000000..5e50c8df --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/events.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import json +import os +import platform +import sys +from pathlib import Path +from typing import Any + +from ktx_daemon import VERSION + +SCHEMA_PATH = Path(__file__).with_name("events.schema.json") +COMMON_FIELDS = { + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", +} +DAEMON_EVENTS = { + "daemon_started", + "daemon_stopped", + "sl_plan_completed", + "sql_gen_completed", +} + + +def _schema_catalog() -> dict[str, set[str]]: + raw = json.loads(SCHEMA_PATH.read_text(encoding="utf-8")) + return { + event["name"]: set(event["fields"]) + for event in raw["x-ktx-catalog"] + if event["name"] in DAEMON_EVENTS + } + + +EVENT_FIELDS = _schema_catalog() + + +def _common_envelope() -> dict[str, Any]: + return { + "cliVersion": os.environ.get("KTX_DAEMON_VERSION", VERSION), + "nodeVersion": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", + "osPlatform": sys.platform, + "osRelease": platform.release(), + "arch": platform.machine(), + "runtime": "daemon-py", + "isCi": bool(os.environ.get("CI")), + } + + +def build_telemetry_event(name: str, fields: dict[str, Any]) -> dict[str, Any]: + allowed = EVENT_FIELDS.get(name) + if allowed is None: + raise ValueError(f"unknown telemetry event: {name}") + + extra = set(fields) - allowed + if extra: + raise ValueError(f"unknown telemetry fields for {name}: {sorted(extra)}") + + missing = { + field for field in allowed if field not in fields and field != "errorClass" + } + if missing: + raise ValueError(f"missing telemetry fields for {name}: {sorted(missing)}") + + return { + "event": name, + "properties": {**_common_envelope(), **fields}, + } diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json new file mode 100644 index 00000000..13642c49 --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json @@ -0,0 +1,1407 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ktx telemetry events", + "type": "object", + "additionalProperties": false, + "x-ktx-common-fields": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi" + ], + "x-ktx-catalog": [ + { + "name": "install_first_run", + "description": "Emitted once when ~/.ktx/telemetry.json is created.", + "fields": [] + }, + { + "name": "command", + "description": "Emitted once for each Commander action that reaches preAction.", + "fields": [ + "commandPath", + "durationMs", + "outcome", + "errorClass", + "flagsPresent", + "hasProject", + "projectGroupAttached" + ] + }, + { + "name": "setup_step", + "description": "Emitted after an interactive setup step completes, skips, or aborts.", + "fields": [ + "step", + "outcome", + "durationMs" + ] + }, + { + "name": "connection_added", + "description": "Emitted when setup writes a database, source, or demo connection.", + "fields": [ + "driver", + "isDemoConnection" + ] + }, + { + "name": "connection_test", + "description": "Emitted after ktx connection test completes.", + "fields": [ + "driver", + "isDemoConnection", + "outcome", + "errorClass", + "durationMs", + "serverVersion" + ] + }, + { + "name": "project_stack_snapshot", + "description": "Emitted after commands that can summarize the local project stack.", + "fields": [ + "connectors", + "connectionCount", + "hasSl", + "hasWiki", + "hasMcp", + "hasManagedRuntime" + ] + }, + { + "name": "ingest_completed", + "description": "Emitted after a public ingest target completes.", + "fields": [ + "driver", + "isDemoConnection", + "schemaCount", + "tableCount", + "columnCount", + "rowsBucket", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "scan_completed", + "description": "Emitted after schema scan or relationship inference completes.", + "fields": [ + "driver", + "tableCount", + "columnCount", + "inferredFkCount", + "declaredFkCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "sl_validate_completed", + "description": "Emitted after ktx sl validate completes.", + "fields": [ + "sourceCount", + "modelCount", + "validationErrorCount", + "outcome", + "errorClass", + "durationMs" + ] + }, + { + "name": "sl_query_completed", + "description": "Emitted after ktx sl query compiles or executes.", + "fields": [ + "mode", + "referencedSourceCount", + "referencedDimensionCount", + "referencedMeasureCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "sql_completed", + "description": "Emitted after ktx sql completes validation and execution.", + "fields": [ + "driver", + "isDemoConnection", + "queryVerb", + "referencedTableCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "wiki_query_completed", + "description": "Emitted after a wiki query completes.", + "fields": [ + "queryLength", + "resultCount", + "durationMs", + "outcome" + ] + }, + { + "name": "mcp_request_completed", + "description": "Emitted for sampled MCP tool requests.", + "fields": [ + "toolName", + "outcome", + "durationMs", + "errorClass", + "sampleRate" + ] + }, + { + "name": "daemon_started", + "description": "Emitted when the long-lived ktx-daemon HTTP server starts.", + "fields": [ + "daemonVersion", + "pythonVersion", + "runtimeVersion", + "startupDurationMs" + ] + }, + { + "name": "daemon_stopped", + "description": "Emitted when the long-lived ktx-daemon HTTP server shuts down.", + "fields": [ + "reason", + "uptimeMs" + ] + }, + { + "name": "sl_plan_completed", + "description": "Emitted after a daemon semantic-layer planning pass completes.", + "fields": [ + "outcome", + "stage", + "errorClass", + "durationMs", + "sourceCount", + "joinCount" + ] + }, + { + "name": "sql_gen_completed", + "description": "Emitted after daemon SQL generation completes.", + "fields": [ + "outcome", + "dialect", + "errorClass", + "durationMs" + ] + } + ], + "$defs": { + "install_first_run": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi" + ], + "additionalProperties": false + }, + "command": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "commandPath": { + "minItems": 1, + "type": "array", + "items": { + "type": "string" + } + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error", + "aborted" + ] + }, + "errorClass": { + "type": "string" + }, + "flagsPresent": { + "type": "object", + "propertyNames": { + "type": "string" + }, + "additionalProperties": { + "type": "boolean" + } + }, + "hasProject": { + "type": "boolean" + }, + "projectGroupAttached": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "commandPath", + "durationMs", + "outcome", + "flagsPresent", + "hasProject", + "projectGroupAttached" + ], + "additionalProperties": false + }, + "setup_step": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "step": { + "type": "string", + "enum": [ + "project", + "runtime", + "models", + "embeddings", + "secrets", + "databases", + "database-context-depth", + "sources", + "context", + "agents", + "demo-tour" + ] + }, + "outcome": { + "type": "string", + "enum": [ + "completed", + "skipped", + "abandoned" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "step", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "connection_added": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection" + ], + "additionalProperties": false + }, + "connection_test": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "serverVersion": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "project_stack_snapshot": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "connectors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "driver": { + "type": "string" + }, + "isDemo": { + "type": "boolean" + } + }, + "required": [ + "driver", + "isDemo" + ], + "additionalProperties": false + } + }, + "connectionCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "hasSl": { + "type": "boolean" + }, + "hasWiki": { + "type": "boolean" + }, + "hasMcp": { + "type": "boolean" + }, + "hasManagedRuntime": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "connectors", + "connectionCount", + "hasSl", + "hasWiki", + "hasMcp", + "hasManagedRuntime" + ], + "additionalProperties": false + }, + "ingest_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "schemaCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "tableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "columnCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "rowsBucket": { + "type": "string", + "enum": [ + "<10k", + "<100k", + "<1M", + "<10M", + ">=10M" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "schemaCount", + "tableCount", + "columnCount", + "rowsBucket", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "scan_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "tableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "columnCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "inferredFkCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "declaredFkCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "tableCount", + "columnCount", + "inferredFkCount", + "declaredFkCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "sl_validate_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "sourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "modelCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "validationErrorCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "sourceCount", + "modelCount", + "validationErrorCount", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "sl_query_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "mode": { + "type": "string", + "enum": [ + "compile", + "execute" + ] + }, + "referencedSourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "referencedDimensionCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "referencedMeasureCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "mode", + "referencedSourceCount", + "referencedDimensionCount", + "referencedMeasureCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "sql_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "queryVerb": { + "type": "string", + "enum": [ + "select", + "explain", + "show", + "with", + "other" + ] + }, + "referencedTableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "queryVerb", + "referencedTableCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "wiki_query_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "queryLength": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "resultCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "queryLength", + "resultCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "mcp_request_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "toolName": { + "type": "string" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "errorClass": { + "type": "string" + }, + "sampleRate": { + "type": "number", + "const": 0.1 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "toolName", + "outcome", + "durationMs", + "sampleRate" + ], + "additionalProperties": false + }, + "daemon_started": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "daemonVersion": { + "type": "string" + }, + "pythonVersion": { + "type": "string" + }, + "runtimeVersion": { + "type": "string" + }, + "startupDurationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "daemonVersion", + "pythonVersion", + "runtimeVersion", + "startupDurationMs" + ], + "additionalProperties": false + }, + "daemon_stopped": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "reason": { + "type": "string", + "enum": [ + "signal", + "request", + "crash" + ] + }, + "uptimeMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "reason", + "uptimeMs" + ], + "additionalProperties": false + }, + "sl_plan_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "stage": { + "type": "string", + "enum": [ + "parse", + "resolve", + "compile", + "transpile" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "sourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "joinCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "outcome", + "stage", + "durationMs", + "sourceCount", + "joinCount" + ], + "additionalProperties": false + }, + "sql_gen_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "dialect": { + "type": "string" + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "outcome", + "dialect", + "durationMs" + ], + "additionalProperties": false + } + } +} diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/identity.py b/python/ktx-daemon/src/ktx_daemon/telemetry/identity.py new file mode 100644 index 00000000..c5ef100c --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/identity.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import json +import os +import time +from collections.abc import Callable +from dataclasses import dataclass +from pathlib import Path +from collections.abc import Mapping + +IDENTITY_TTL_SECONDS = 60.0 + + +@dataclass(frozen=True) +class TelemetryIdentity: + install_id: str | None + enabled: bool + path: Path + + +_cache: tuple[float, Path, TelemetryIdentity] | None = None + + +def _telemetry_path(home_dir: Path | None = None) -> Path: + return (home_dir or Path.home()) / ".ktx" / "telemetry.json" + + +def _env_disables(env: Mapping[str, str] | None = None) -> bool: + source = os.environ if env is None else env + return bool( + source.get("KTX_TELEMETRY_DISABLED") + or source.get("DO_NOT_TRACK") + or source.get("CI") + ) + + +def _read_identity(path: Path) -> TelemetryIdentity: + try: + raw = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return TelemetryIdentity(install_id=None, enabled=False, path=path) + + install_id = raw.get("installId") + enabled = raw.get("enabled") + if not isinstance(install_id, str) or enabled is not True: + return TelemetryIdentity( + install_id=install_id if isinstance(install_id, str) else None, + enabled=False, + path=path, + ) + + return TelemetryIdentity(install_id=install_id, enabled=True, path=path) + + +def load_telemetry_identity( + *, + home_dir: Path | None = None, + env: Mapping[str, str] | None = None, + now: Callable[[], float] | None = None, +) -> TelemetryIdentity: + global _cache + + path = _telemetry_path(home_dir) + clock = now or time.monotonic + current = float(clock()) + + if _cache and _cache[1] == path and current - _cache[0] < IDENTITY_TTL_SECONDS: + cached = _cache[2] + else: + cached = _read_identity(path) + _cache = (current, path, cached) + + if _env_disables(env): + return TelemetryIdentity(install_id=cached.install_id, enabled=False, path=path) + + return cached + + +def reset_identity_cache() -> None: + global _cache + _cache = None diff --git a/python/ktx-daemon/tests/test_app.py b/python/ktx-daemon/tests/test_app.py index 3c1ce18d..e423a31e 100644 --- a/python/ktx-daemon/tests/test_app.py +++ b/python/ktx-daemon/tests/test_app.py @@ -1,5 +1,8 @@ from __future__ import annotations +import json +from pathlib import Path + from fastapi.testclient import TestClient from ktx_daemon.app import create_app @@ -79,6 +82,44 @@ def test_health_endpoint_returns_managed_runtime_version(monkeypatch) -> None: assert response.json() == {"status": "healthy", "version": "0.2.0"} +def test_app_lifespan_emits_daemon_lifecycle_debug_events( + tmp_path: Path, + monkeypatch, + capsys, +) -> None: + from ktx_daemon.telemetry.identity import reset_identity_cache + + reset_identity_cache() + identity_path = tmp_path / ".ktx" / "telemetry.json" + identity_path.parent.mkdir(parents=True) + identity_path.write_text( + json.dumps( + { + "installId": "00000000-0000-4000-8000-000000000000", + "enabled": True, + "createdAt": "2026-05-22T14:33:02.000Z", + } + ) + + "\n", + encoding="utf-8", + ) + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("KTX_TELEMETRY_DEBUG", "1") + monkeypatch.setenv("KTX_DAEMON_VERSION", "0.4.1") + monkeypatch.delenv("CI", raising=False) + monkeypatch.delenv("KTX_TELEMETRY_DISABLED", raising=False) + monkeypatch.delenv("DO_NOT_TRACK", raising=False) + + with TestClient( + create_app(telemetry_started_at=100.0, clock=lambda: 100.125) + ) as client: + assert client.get("/health").status_code == 200 + + captured = capsys.readouterr() + assert '"event": "daemon_started"' in captured.err + assert '"event": "daemon_stopped"' in captured.err + + def test_database_introspect_endpoint_returns_snapshot() -> None: calls = [] diff --git a/python/ktx-daemon/tests/test_semantic_layer.py b/python/ktx-daemon/tests/test_semantic_layer.py index 7a75c019..8ebb7ad8 100644 --- a/python/ktx-daemon/tests/test_semantic_layer.py +++ b/python/ktx-daemon/tests/test_semantic_layer.py @@ -1,5 +1,8 @@ from __future__ import annotations +import json +from pathlib import Path + from ktx_daemon.semantic_layer import ( SemanticLayerQueryRequest, ValidateSourcesRequest, @@ -46,6 +49,52 @@ def test_query_semantic_layer_generates_sql_and_plan() -> None: assert response.plan["sources_used"] == ["orders"] +def test_query_semantic_layer_emits_plan_and_sql_debug_events( + tmp_path: Path, + monkeypatch, + capsys, +) -> None: + from ktx_daemon.telemetry.identity import reset_identity_cache + + reset_identity_cache() + identity_path = tmp_path / ".ktx" / "telemetry.json" + identity_path.parent.mkdir(parents=True) + identity_path.write_text( + json.dumps( + { + "installId": "00000000-0000-4000-8000-000000000000", + "enabled": True, + "createdAt": "2026-05-22T14:33:02.000Z", + } + ) + + "\n", + encoding="utf-8", + ) + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("KTX_TELEMETRY_DEBUG", "1") + monkeypatch.delenv("CI", raising=False) + monkeypatch.delenv("KTX_TELEMETRY_DISABLED", raising=False) + monkeypatch.delenv("DO_NOT_TRACK", raising=False) + + query_semantic_layer( + SemanticLayerQueryRequest( + sources=[ORDERS_SOURCE], + dialect="postgres", + projectId="a" * 64, + query={ + "measures": ["orders.order_count"], + "dimensions": ["orders.status"], + "limit": 25, + }, + ) + ) + + captured = capsys.readouterr() + assert '"event": "sl_plan_completed"' in captured.err + assert '"event": "sql_gen_completed"' in captured.err + assert "public.orders" not in captured.err + + def test_validate_semantic_layer_reports_duplicate_measure_names() -> None: invalid_source = { **ORDERS_SOURCE, diff --git a/python/ktx-daemon/tests/test_telemetry.py b/python/ktx-daemon/tests/test_telemetry.py new file mode 100644 index 00000000..e7d7dec2 --- /dev/null +++ b/python/ktx-daemon/tests/test_telemetry.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +import json +import time +from pathlib import Path + +from ktx_daemon.telemetry.emitter import track_telemetry_event +from ktx_daemon.telemetry.events import build_telemetry_event +from ktx_daemon.telemetry.identity import load_telemetry_identity, reset_identity_cache + + +def write_identity(home: Path, *, enabled: bool = True) -> None: + target = home / ".ktx" / "telemetry.json" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text( + json.dumps( + { + "installId": "00000000-0000-4000-8000-000000000000", + "enabled": enabled, + "noticeShownAt": "2026-05-22T14:33:02.000Z", + "noticeShownVersion": 1, + "createdAt": "2026-05-22T14:33:02.000Z", + } + ) + + "\n", + encoding="utf-8", + ) + + +def test_identity_reads_file_with_ttl_cache(tmp_path: Path) -> None: + reset_identity_cache() + write_identity(tmp_path) + + first = load_telemetry_identity(home_dir=tmp_path, env={}, now=lambda: 100.0) + assert first.enabled is True + assert first.install_id == "00000000-0000-4000-8000-000000000000" + + write_identity(tmp_path, enabled=False) + cached = load_telemetry_identity(home_dir=tmp_path, env={}, now=lambda: 120.0) + assert cached.enabled is True + + refreshed = load_telemetry_identity(home_dir=tmp_path, env={}, now=lambda: 161.0) + assert refreshed.enabled is False + + +def test_identity_honors_python_env_kill_switches(tmp_path: Path) -> None: + for kill_switch in ("KTX_TELEMETRY_DISABLED", "DO_NOT_TRACK", "CI"): + reset_identity_cache() + write_identity(tmp_path) + + disabled = load_telemetry_identity( + home_dir=tmp_path, + env={kill_switch: "1"}, + now=lambda: time.monotonic(), + ) + + assert disabled.enabled is False, f"{kill_switch} should disable telemetry" + assert disabled.install_id == "00000000-0000-4000-8000-000000000000" + + +def test_event_builder_rejects_unknown_fields() -> None: + event = build_telemetry_event( + "sql_gen_completed", + { + "outcome": "ok", + "dialect": "postgres", + "durationMs": 5, + }, + ) + + assert event["event"] == "sql_gen_completed" + assert event["properties"]["runtime"] == "daemon-py" + + try: + build_telemetry_event( + "sql_gen_completed", + { + "outcome": "ok", + "dialect": "postgres", + "durationMs": 5, + "sql": "select * from private_table", + }, + ) + except ValueError as error: + assert "unknown telemetry fields" in str(error) + else: + raise AssertionError("expected unknown field rejection") + + +def test_debug_emitter_writes_payload_without_network(tmp_path: Path, capsys) -> None: + reset_identity_cache() + write_identity(tmp_path) + + track_telemetry_event( + "sl_plan_completed", + { + "outcome": "ok", + "stage": "transpile", + "durationMs": 12, + "sourceCount": 1, + "joinCount": 0, + }, + project_id="a" * 64, + home_dir=tmp_path, + env={"KTX_TELEMETRY_DEBUG": "1"}, + ) + + captured = capsys.readouterr() + assert '"event": "sl_plan_completed"' in captured.err + assert ( + '"groups": {"project": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}' + in captured.err + ) + assert "private_table" not in captured.err diff --git a/python/ktx-daemon/tests/test_telemetry_schema_sync.py b/python/ktx-daemon/tests/test_telemetry_schema_sync.py new file mode 100644 index 00000000..6f2ba634 --- /dev/null +++ b/python/ktx-daemon/tests/test_telemetry_schema_sync.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import json +from pathlib import Path + + +def test_python_schema_copy_matches_node_schema() -> None: + repo_root = Path(__file__).resolve().parents[3] + node_schema = json.loads( + (repo_root / "packages/cli/src/telemetry/events.schema.json").read_text( + encoding="utf-8" + ) + ) + python_schema = json.loads( + ( + repo_root / "python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json" + ).read_text(encoding="utf-8") + ) + + assert python_schema == node_schema + assert [event["name"] for event in python_schema["x-ktx-catalog"]] == [ + "install_first_run", + "command", + "setup_step", + "connection_added", + "connection_test", + "project_stack_snapshot", + "ingest_completed", + "scan_completed", + "sl_validate_completed", + "sl_query_completed", + "sql_completed", + "wiki_query_completed", + "mcp_request_completed", + "daemon_started", + "daemon_stopped", + "sl_plan_completed", + "sql_gen_completed", + ] diff --git a/uv.lock b/uv.lock index 29ce5981..9c580fbf 100644 --- a/uv.lock +++ b/uv.lock @@ -49,6 +49,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, ] +[[package]] +name = "backoff" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001, upload-time = "2022-10-05T19:19:32.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" }, +] + [[package]] name = "certifi" version = "2026.4.22" @@ -223,6 +232,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, ] +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, +] + [[package]] name = "duckdb" version = "1.5.2" @@ -449,6 +467,7 @@ dependencies = [ { name = "numpy" }, { name = "orjson" }, { name = "pandas" }, + { name = "posthog" }, { name = "psycopg", extra = ["binary"] }, { name = "pydantic" }, { name = "requests" }, @@ -477,6 +496,7 @@ requires-dist = [ { name = "numpy", specifier = ">=2.2.6" }, { name = "orjson", specifier = ">=3.11.4" }, { name = "pandas", specifier = ">=2.2.3" }, + { name = "posthog", specifier = ">=7.0.0" }, { name = "psycopg", extras = ["binary"], specifier = ">=3.2.0" }, { name = "pydantic", specifier = ">=2.9.0" }, { name = "requests", specifier = ">=2.32.0" }, @@ -830,6 +850,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "posthog" +version = "7.15.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backoff" }, + { name = "distro" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c5/ad/0eedae8cc9d2878d5b52c8607bd21f76101cfe4d875e5ff77fec9da3a83c/posthog-7.15.3.tar.gz", hash = "sha256:809dcaf08ca2d8bc0ea8228c28419181b74a79dfd1c0687a3d459a7bbe2e2953", size = 217645, upload-time = "2026-05-21T15:35:04.914Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/b4/8dc673bed0f296c1acbb1107aef1c56db576731e894fe765206be5a91774/posthog-7.15.3-py3-none-any.whl", hash = "sha256:fd59fe4f5be637e4a2706b1457301d8308853ff23659036ecfcf6ac0a2d45eee", size = 254591, upload-time = "2026-05-21T15:35:02.846Z" }, +] + [[package]] name = "pre-commit" version = "4.6.0"