From f72c7c8bdc1c90165d93004d3475db054d3081d4 Mon Sep 17 00:00:00 2001 From: Andrey Avtomonov Date: Fri, 22 May 2026 16:17:03 +0200 Subject: [PATCH] feat: add daemon telemetry foundation --- packages/cli/src/telemetry/events.schema.json | 1407 +++++++++++++++++ python/ktx-daemon/pyproject.toml | 1 + .../src/ktx_daemon/telemetry/__init__.py | 5 + .../src/ktx_daemon/telemetry/emitter.py | 105 ++ .../src/ktx_daemon/telemetry/events.py | 72 + .../ktx_daemon/telemetry/events.schema.json | 1407 +++++++++++++++++ .../src/ktx_daemon/telemetry/identity.py | 77 + python/ktx-daemon/tests/test_telemetry.py | 113 ++ uv.lock | 35 + 9 files changed, 3222 insertions(+) create mode 100644 packages/cli/src/telemetry/events.schema.json create mode 100644 python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py create mode 100644 python/ktx-daemon/src/ktx_daemon/telemetry/emitter.py create mode 100644 python/ktx-daemon/src/ktx_daemon/telemetry/events.py create mode 100644 python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json create mode 100644 python/ktx-daemon/src/ktx_daemon/telemetry/identity.py create mode 100644 python/ktx-daemon/tests/test_telemetry.py diff --git a/packages/cli/src/telemetry/events.schema.json b/packages/cli/src/telemetry/events.schema.json new file mode 100644 index 00000000..13642c49 --- /dev/null +++ b/packages/cli/src/telemetry/events.schema.json @@ -0,0 +1,1407 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ktx telemetry events", + "type": "object", + "additionalProperties": false, + "x-ktx-common-fields": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi" + ], + "x-ktx-catalog": [ + { + "name": "install_first_run", + "description": "Emitted once when ~/.ktx/telemetry.json is created.", + "fields": [] + }, + { + "name": "command", + "description": "Emitted once for each Commander action that reaches preAction.", + "fields": [ + "commandPath", + "durationMs", + "outcome", + "errorClass", + "flagsPresent", + "hasProject", + "projectGroupAttached" + ] + }, + { + "name": "setup_step", + "description": "Emitted after an interactive setup step completes, skips, or aborts.", + "fields": [ + "step", + "outcome", + "durationMs" + ] + }, + { + "name": "connection_added", + "description": "Emitted when setup writes a database, source, or demo connection.", + "fields": [ + "driver", + "isDemoConnection" + ] + }, + { + "name": "connection_test", + "description": "Emitted after ktx connection test completes.", + "fields": [ + "driver", + "isDemoConnection", + "outcome", + "errorClass", + "durationMs", + "serverVersion" + ] + }, + { + "name": "project_stack_snapshot", + "description": "Emitted after commands that can summarize the local project stack.", + "fields": [ + "connectors", + "connectionCount", + "hasSl", + "hasWiki", + "hasMcp", + "hasManagedRuntime" + ] + }, + { + "name": "ingest_completed", + "description": "Emitted after a public ingest target completes.", + "fields": [ + "driver", + "isDemoConnection", + "schemaCount", + "tableCount", + "columnCount", + "rowsBucket", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "scan_completed", + "description": "Emitted after schema scan or relationship inference completes.", + "fields": [ + "driver", + "tableCount", + "columnCount", + "inferredFkCount", + "declaredFkCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "sl_validate_completed", + "description": "Emitted after ktx sl validate completes.", + "fields": [ + "sourceCount", + "modelCount", + "validationErrorCount", + "outcome", + "errorClass", + "durationMs" + ] + }, + { + "name": "sl_query_completed", + "description": "Emitted after ktx sl query compiles or executes.", + "fields": [ + "mode", + "referencedSourceCount", + "referencedDimensionCount", + "referencedMeasureCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "sql_completed", + "description": "Emitted after ktx sql completes validation and execution.", + "fields": [ + "driver", + "isDemoConnection", + "queryVerb", + "referencedTableCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "wiki_query_completed", + "description": "Emitted after a wiki query completes.", + "fields": [ + "queryLength", + "resultCount", + "durationMs", + "outcome" + ] + }, + { + "name": "mcp_request_completed", + "description": "Emitted for sampled MCP tool requests.", + "fields": [ + "toolName", + "outcome", + "durationMs", + "errorClass", + "sampleRate" + ] + }, + { + "name": "daemon_started", + "description": "Emitted when the long-lived ktx-daemon HTTP server starts.", + "fields": [ + "daemonVersion", + "pythonVersion", + "runtimeVersion", + "startupDurationMs" + ] + }, + { + "name": "daemon_stopped", + "description": "Emitted when the long-lived ktx-daemon HTTP server shuts down.", + "fields": [ + "reason", + "uptimeMs" + ] + }, + { + "name": "sl_plan_completed", + "description": "Emitted after a daemon semantic-layer planning pass completes.", + "fields": [ + "outcome", + "stage", + "errorClass", + "durationMs", + "sourceCount", + "joinCount" + ] + }, + { + "name": "sql_gen_completed", + "description": "Emitted after daemon SQL generation completes.", + "fields": [ + "outcome", + "dialect", + "errorClass", + "durationMs" + ] + } + ], + "$defs": { + "install_first_run": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi" + ], + "additionalProperties": false + }, + "command": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "commandPath": { + "minItems": 1, + "type": "array", + "items": { + "type": "string" + } + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error", + "aborted" + ] + }, + "errorClass": { + "type": "string" + }, + "flagsPresent": { + "type": "object", + "propertyNames": { + "type": "string" + }, + "additionalProperties": { + "type": "boolean" + } + }, + "hasProject": { + "type": "boolean" + }, + "projectGroupAttached": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "commandPath", + "durationMs", + "outcome", + "flagsPresent", + "hasProject", + "projectGroupAttached" + ], + "additionalProperties": false + }, + "setup_step": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "step": { + "type": "string", + "enum": [ + "project", + "runtime", + "models", + "embeddings", + "secrets", + "databases", + "database-context-depth", + "sources", + "context", + "agents", + "demo-tour" + ] + }, + "outcome": { + "type": "string", + "enum": [ + "completed", + "skipped", + "abandoned" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "step", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "connection_added": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection" + ], + "additionalProperties": false + }, + "connection_test": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "serverVersion": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "project_stack_snapshot": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "connectors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "driver": { + "type": "string" + }, + "isDemo": { + "type": "boolean" + } + }, + "required": [ + "driver", + "isDemo" + ], + "additionalProperties": false + } + }, + "connectionCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "hasSl": { + "type": "boolean" + }, + "hasWiki": { + "type": "boolean" + }, + "hasMcp": { + "type": "boolean" + }, + "hasManagedRuntime": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "connectors", + "connectionCount", + "hasSl", + "hasWiki", + "hasMcp", + "hasManagedRuntime" + ], + "additionalProperties": false + }, + "ingest_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "schemaCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "tableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "columnCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "rowsBucket": { + "type": "string", + "enum": [ + "<10k", + "<100k", + "<1M", + "<10M", + ">=10M" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "schemaCount", + "tableCount", + "columnCount", + "rowsBucket", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "scan_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "tableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "columnCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "inferredFkCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "declaredFkCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "tableCount", + "columnCount", + "inferredFkCount", + "declaredFkCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "sl_validate_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "sourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "modelCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "validationErrorCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "sourceCount", + "modelCount", + "validationErrorCount", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "sl_query_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "mode": { + "type": "string", + "enum": [ + "compile", + "execute" + ] + }, + "referencedSourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "referencedDimensionCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "referencedMeasureCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "mode", + "referencedSourceCount", + "referencedDimensionCount", + "referencedMeasureCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "sql_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "queryVerb": { + "type": "string", + "enum": [ + "select", + "explain", + "show", + "with", + "other" + ] + }, + "referencedTableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "queryVerb", + "referencedTableCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "wiki_query_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "queryLength": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "resultCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "queryLength", + "resultCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "mcp_request_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "toolName": { + "type": "string" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "errorClass": { + "type": "string" + }, + "sampleRate": { + "type": "number", + "const": 0.1 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "toolName", + "outcome", + "durationMs", + "sampleRate" + ], + "additionalProperties": false + }, + "daemon_started": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "daemonVersion": { + "type": "string" + }, + "pythonVersion": { + "type": "string" + }, + "runtimeVersion": { + "type": "string" + }, + "startupDurationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "daemonVersion", + "pythonVersion", + "runtimeVersion", + "startupDurationMs" + ], + "additionalProperties": false + }, + "daemon_stopped": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "reason": { + "type": "string", + "enum": [ + "signal", + "request", + "crash" + ] + }, + "uptimeMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "reason", + "uptimeMs" + ], + "additionalProperties": false + }, + "sl_plan_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "stage": { + "type": "string", + "enum": [ + "parse", + "resolve", + "compile", + "transpile" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "sourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "joinCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "outcome", + "stage", + "durationMs", + "sourceCount", + "joinCount" + ], + "additionalProperties": false + }, + "sql_gen_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "dialect": { + "type": "string" + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "outcome", + "dialect", + "durationMs" + ], + "additionalProperties": false + } + } +} diff --git a/python/ktx-daemon/pyproject.toml b/python/ktx-daemon/pyproject.toml index 8cb78f36..a072a5fe 100644 --- a/python/ktx-daemon/pyproject.toml +++ b/python/ktx-daemon/pyproject.toml @@ -12,6 +12,7 @@ dependencies = [ "numpy>=2.2.6", "orjson>=3.11.4", "pandas>=2.2.3", + "posthog>=7.0.0", "psycopg[binary]>=3.2.0", "pydantic>=2.9.0", "requests>=2.32.0", diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py b/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py new file mode 100644 index 00000000..ff9cd07f --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/__init__.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from ktx_daemon.telemetry.emitter import error_class, track_telemetry_event + +__all__ = ["error_class", "track_telemetry_event"] diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/emitter.py b/python/ktx-daemon/src/ktx_daemon/telemetry/emitter.py new file mode 100644 index 00000000..c77acb75 --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/emitter.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import json +import os +import sys +from pathlib import Path +from typing import Any +from collections.abc import Mapping + +from ktx_daemon.telemetry.events import build_telemetry_event +from ktx_daemon.telemetry.identity import load_telemetry_identity + +POSTHOG_PROJECT_API_KEY = "" +POSTHOG_HOST = "" + + +def _host(env: Mapping[str, str]) -> str: + return env.get("KTX_TELEMETRY_ENDPOINT") or POSTHOG_HOST + + +def _live_configured(host: str) -> bool: + return bool(POSTHOG_PROJECT_API_KEY.strip() and host.strip()) + + +def _debug_enabled(env: Mapping[str, str]) -> bool: + return env.get("KTX_TELEMETRY_DEBUG") == "1" + + +def _scrub_error_class(error: BaseException) -> str | None: + name = type(error).__name__ + if len(name) > 80: + return None + if any(marker in name for marker in ("/", "\\", "@", "://")): + return None + if not name[:1].isupper() or not name.replace("_", "").isalnum(): + return None + return name + + +def error_class(error: BaseException) -> str | None: + return _scrub_error_class(error) + + +def track_telemetry_event( + name: str, + fields: dict[str, Any], + *, + project_id: str | None = None, + home_dir: Path | None = None, + env: Mapping[str, str] | None = None, +) -> None: + source_env = env or os.environ + identity = load_telemetry_identity(home_dir=home_dir, env=source_env) + if not identity.enabled or not identity.install_id: + return + + try: + event = build_telemetry_event(name, fields) + except ValueError: + return + + groups = {"project": project_id} if project_id else None + + if _debug_enabled(source_env): + sys.stderr.write( + "[telemetry] " + + json.dumps( + { + "distinctId": identity.install_id, + "event": event["event"], + "properties": event["properties"], + "groups": groups, + }, + sort_keys=True, + ) + + "\n" + ) + return + + host = _host(source_env) + if not _live_configured(host): + return + + try: + from posthog import Posthog + + client = Posthog( + POSTHOG_PROJECT_API_KEY, + host=host, + flush_at=1, + flush_interval=0, + sync_mode=True, + timeout=1, + disable_geoip=True, + ) + client.capture( + event=event["event"], + distinct_id=identity.install_id, + properties=event["properties"], + groups=groups, + disable_geoip=True, + ) + client.shutdown() + except Exception: + return diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/events.py b/python/ktx-daemon/src/ktx_daemon/telemetry/events.py new file mode 100644 index 00000000..5e50c8df --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/events.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import json +import os +import platform +import sys +from pathlib import Path +from typing import Any + +from ktx_daemon import VERSION + +SCHEMA_PATH = Path(__file__).with_name("events.schema.json") +COMMON_FIELDS = { + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", +} +DAEMON_EVENTS = { + "daemon_started", + "daemon_stopped", + "sl_plan_completed", + "sql_gen_completed", +} + + +def _schema_catalog() -> dict[str, set[str]]: + raw = json.loads(SCHEMA_PATH.read_text(encoding="utf-8")) + return { + event["name"]: set(event["fields"]) + for event in raw["x-ktx-catalog"] + if event["name"] in DAEMON_EVENTS + } + + +EVENT_FIELDS = _schema_catalog() + + +def _common_envelope() -> dict[str, Any]: + return { + "cliVersion": os.environ.get("KTX_DAEMON_VERSION", VERSION), + "nodeVersion": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}", + "osPlatform": sys.platform, + "osRelease": platform.release(), + "arch": platform.machine(), + "runtime": "daemon-py", + "isCi": bool(os.environ.get("CI")), + } + + +def build_telemetry_event(name: str, fields: dict[str, Any]) -> dict[str, Any]: + allowed = EVENT_FIELDS.get(name) + if allowed is None: + raise ValueError(f"unknown telemetry event: {name}") + + extra = set(fields) - allowed + if extra: + raise ValueError(f"unknown telemetry fields for {name}: {sorted(extra)}") + + missing = { + field for field in allowed if field not in fields and field != "errorClass" + } + if missing: + raise ValueError(f"missing telemetry fields for {name}: {sorted(missing)}") + + return { + "event": name, + "properties": {**_common_envelope(), **fields}, + } diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json new file mode 100644 index 00000000..13642c49 --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json @@ -0,0 +1,1407 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ktx telemetry events", + "type": "object", + "additionalProperties": false, + "x-ktx-common-fields": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi" + ], + "x-ktx-catalog": [ + { + "name": "install_first_run", + "description": "Emitted once when ~/.ktx/telemetry.json is created.", + "fields": [] + }, + { + "name": "command", + "description": "Emitted once for each Commander action that reaches preAction.", + "fields": [ + "commandPath", + "durationMs", + "outcome", + "errorClass", + "flagsPresent", + "hasProject", + "projectGroupAttached" + ] + }, + { + "name": "setup_step", + "description": "Emitted after an interactive setup step completes, skips, or aborts.", + "fields": [ + "step", + "outcome", + "durationMs" + ] + }, + { + "name": "connection_added", + "description": "Emitted when setup writes a database, source, or demo connection.", + "fields": [ + "driver", + "isDemoConnection" + ] + }, + { + "name": "connection_test", + "description": "Emitted after ktx connection test completes.", + "fields": [ + "driver", + "isDemoConnection", + "outcome", + "errorClass", + "durationMs", + "serverVersion" + ] + }, + { + "name": "project_stack_snapshot", + "description": "Emitted after commands that can summarize the local project stack.", + "fields": [ + "connectors", + "connectionCount", + "hasSl", + "hasWiki", + "hasMcp", + "hasManagedRuntime" + ] + }, + { + "name": "ingest_completed", + "description": "Emitted after a public ingest target completes.", + "fields": [ + "driver", + "isDemoConnection", + "schemaCount", + "tableCount", + "columnCount", + "rowsBucket", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "scan_completed", + "description": "Emitted after schema scan or relationship inference completes.", + "fields": [ + "driver", + "tableCount", + "columnCount", + "inferredFkCount", + "declaredFkCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "sl_validate_completed", + "description": "Emitted after ktx sl validate completes.", + "fields": [ + "sourceCount", + "modelCount", + "validationErrorCount", + "outcome", + "errorClass", + "durationMs" + ] + }, + { + "name": "sl_query_completed", + "description": "Emitted after ktx sl query compiles or executes.", + "fields": [ + "mode", + "referencedSourceCount", + "referencedDimensionCount", + "referencedMeasureCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "sql_completed", + "description": "Emitted after ktx sql completes validation and execution.", + "fields": [ + "driver", + "isDemoConnection", + "queryVerb", + "referencedTableCount", + "durationMs", + "outcome", + "errorClass" + ] + }, + { + "name": "wiki_query_completed", + "description": "Emitted after a wiki query completes.", + "fields": [ + "queryLength", + "resultCount", + "durationMs", + "outcome" + ] + }, + { + "name": "mcp_request_completed", + "description": "Emitted for sampled MCP tool requests.", + "fields": [ + "toolName", + "outcome", + "durationMs", + "errorClass", + "sampleRate" + ] + }, + { + "name": "daemon_started", + "description": "Emitted when the long-lived ktx-daemon HTTP server starts.", + "fields": [ + "daemonVersion", + "pythonVersion", + "runtimeVersion", + "startupDurationMs" + ] + }, + { + "name": "daemon_stopped", + "description": "Emitted when the long-lived ktx-daemon HTTP server shuts down.", + "fields": [ + "reason", + "uptimeMs" + ] + }, + { + "name": "sl_plan_completed", + "description": "Emitted after a daemon semantic-layer planning pass completes.", + "fields": [ + "outcome", + "stage", + "errorClass", + "durationMs", + "sourceCount", + "joinCount" + ] + }, + { + "name": "sql_gen_completed", + "description": "Emitted after daemon SQL generation completes.", + "fields": [ + "outcome", + "dialect", + "errorClass", + "durationMs" + ] + } + ], + "$defs": { + "install_first_run": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi" + ], + "additionalProperties": false + }, + "command": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "commandPath": { + "minItems": 1, + "type": "array", + "items": { + "type": "string" + } + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error", + "aborted" + ] + }, + "errorClass": { + "type": "string" + }, + "flagsPresent": { + "type": "object", + "propertyNames": { + "type": "string" + }, + "additionalProperties": { + "type": "boolean" + } + }, + "hasProject": { + "type": "boolean" + }, + "projectGroupAttached": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "commandPath", + "durationMs", + "outcome", + "flagsPresent", + "hasProject", + "projectGroupAttached" + ], + "additionalProperties": false + }, + "setup_step": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "step": { + "type": "string", + "enum": [ + "project", + "runtime", + "models", + "embeddings", + "secrets", + "databases", + "database-context-depth", + "sources", + "context", + "agents", + "demo-tour" + ] + }, + "outcome": { + "type": "string", + "enum": [ + "completed", + "skipped", + "abandoned" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "step", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "connection_added": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection" + ], + "additionalProperties": false + }, + "connection_test": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "serverVersion": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "project_stack_snapshot": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "connectors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "driver": { + "type": "string" + }, + "isDemo": { + "type": "boolean" + } + }, + "required": [ + "driver", + "isDemo" + ], + "additionalProperties": false + } + }, + "connectionCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "hasSl": { + "type": "boolean" + }, + "hasWiki": { + "type": "boolean" + }, + "hasMcp": { + "type": "boolean" + }, + "hasManagedRuntime": { + "type": "boolean" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "connectors", + "connectionCount", + "hasSl", + "hasWiki", + "hasMcp", + "hasManagedRuntime" + ], + "additionalProperties": false + }, + "ingest_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "schemaCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "tableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "columnCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "rowsBucket": { + "type": "string", + "enum": [ + "<10k", + "<100k", + "<1M", + "<10M", + ">=10M" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "schemaCount", + "tableCount", + "columnCount", + "rowsBucket", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "scan_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "tableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "columnCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "inferredFkCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "declaredFkCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "tableCount", + "columnCount", + "inferredFkCount", + "declaredFkCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "sl_validate_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "sourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "modelCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "validationErrorCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "sourceCount", + "modelCount", + "validationErrorCount", + "outcome", + "durationMs" + ], + "additionalProperties": false + }, + "sl_query_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "mode": { + "type": "string", + "enum": [ + "compile", + "execute" + ] + }, + "referencedSourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "referencedDimensionCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "referencedMeasureCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "mode", + "referencedSourceCount", + "referencedDimensionCount", + "referencedMeasureCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "sql_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "driver": { + "type": "string" + }, + "isDemoConnection": { + "type": "boolean" + }, + "queryVerb": { + "type": "string", + "enum": [ + "select", + "explain", + "show", + "with", + "other" + ] + }, + "referencedTableCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "errorClass": { + "type": "string" + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "driver", + "isDemoConnection", + "queryVerb", + "referencedTableCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "wiki_query_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "queryLength": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "resultCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "queryLength", + "resultCount", + "durationMs", + "outcome" + ], + "additionalProperties": false + }, + "mcp_request_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "toolName": { + "type": "string" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "errorClass": { + "type": "string" + }, + "sampleRate": { + "type": "number", + "const": 0.1 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "toolName", + "outcome", + "durationMs", + "sampleRate" + ], + "additionalProperties": false + }, + "daemon_started": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "daemonVersion": { + "type": "string" + }, + "pythonVersion": { + "type": "string" + }, + "runtimeVersion": { + "type": "string" + }, + "startupDurationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "daemonVersion", + "pythonVersion", + "runtimeVersion", + "startupDurationMs" + ], + "additionalProperties": false + }, + "daemon_stopped": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "reason": { + "type": "string", + "enum": [ + "signal", + "request", + "crash" + ] + }, + "uptimeMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "reason", + "uptimeMs" + ], + "additionalProperties": false + }, + "sl_plan_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "stage": { + "type": "string", + "enum": [ + "parse", + "resolve", + "compile", + "transpile" + ] + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + }, + "sourceCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + }, + "joinCount": { + "type": "integer", + "minimum": 0, + "maximum": 9007199254740991 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "outcome", + "stage", + "durationMs", + "sourceCount", + "joinCount" + ], + "additionalProperties": false + }, + "sql_gen_completed": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "cliVersion": { + "type": "string" + }, + "nodeVersion": { + "type": "string" + }, + "osPlatform": { + "type": "string" + }, + "osRelease": { + "type": "string" + }, + "arch": { + "type": "string" + }, + "runtime": { + "type": "string", + "enum": [ + "node", + "daemon-py" + ] + }, + "isCi": { + "type": "boolean" + }, + "outcome": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "dialect": { + "type": "string" + }, + "errorClass": { + "type": "string" + }, + "durationMs": { + "type": "number", + "minimum": 0 + } + }, + "required": [ + "cliVersion", + "nodeVersion", + "osPlatform", + "osRelease", + "arch", + "runtime", + "isCi", + "outcome", + "dialect", + "durationMs" + ], + "additionalProperties": false + } + } +} diff --git a/python/ktx-daemon/src/ktx_daemon/telemetry/identity.py b/python/ktx-daemon/src/ktx_daemon/telemetry/identity.py new file mode 100644 index 00000000..cfdcee30 --- /dev/null +++ b/python/ktx-daemon/src/ktx_daemon/telemetry/identity.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import json +import os +import time +from collections.abc import Callable +from dataclasses import dataclass +from pathlib import Path +from collections.abc import Mapping + +IDENTITY_TTL_SECONDS = 60.0 + + +@dataclass(frozen=True) +class TelemetryIdentity: + install_id: str | None + enabled: bool + path: Path + + +_cache: tuple[float, Path, TelemetryIdentity] | None = None + + +def _telemetry_path(home_dir: Path | None = None) -> Path: + return (home_dir or Path.home()) / ".ktx" / "telemetry.json" + + +def _env_disables(env: Mapping[str, str] | None = None) -> bool: + source = env or os.environ + return bool(source.get("KTX_TELEMETRY_DISABLED") or source.get("DO_NOT_TRACK")) + + +def _read_identity(path: Path) -> TelemetryIdentity: + try: + raw = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return TelemetryIdentity(install_id=None, enabled=False, path=path) + + install_id = raw.get("installId") + enabled = raw.get("enabled") + if not isinstance(install_id, str) or enabled is not True: + return TelemetryIdentity( + install_id=install_id if isinstance(install_id, str) else None, + enabled=False, + path=path, + ) + + return TelemetryIdentity(install_id=install_id, enabled=True, path=path) + + +def load_telemetry_identity( + *, + home_dir: Path | None = None, + env: Mapping[str, str] | None = None, + now: Callable[[], float] | None = None, +) -> TelemetryIdentity: + global _cache + + path = _telemetry_path(home_dir) + clock = now or time.monotonic + current = float(clock()) + + if _cache and _cache[1] == path and current - _cache[0] < IDENTITY_TTL_SECONDS: + cached = _cache[2] + else: + cached = _read_identity(path) + _cache = (current, path, cached) + + if _env_disables(env): + return TelemetryIdentity(install_id=cached.install_id, enabled=False, path=path) + + return cached + + +def reset_identity_cache() -> None: + global _cache + _cache = None diff --git a/python/ktx-daemon/tests/test_telemetry.py b/python/ktx-daemon/tests/test_telemetry.py new file mode 100644 index 00000000..f04a9037 --- /dev/null +++ b/python/ktx-daemon/tests/test_telemetry.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import json +import time +from pathlib import Path + +from ktx_daemon.telemetry.emitter import track_telemetry_event +from ktx_daemon.telemetry.events import build_telemetry_event +from ktx_daemon.telemetry.identity import load_telemetry_identity, reset_identity_cache + + +def write_identity(home: Path, *, enabled: bool = True) -> None: + target = home / ".ktx" / "telemetry.json" + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text( + json.dumps( + { + "installId": "00000000-0000-4000-8000-000000000000", + "enabled": enabled, + "noticeShownAt": "2026-05-22T14:33:02.000Z", + "noticeShownVersion": 1, + "createdAt": "2026-05-22T14:33:02.000Z", + } + ) + + "\n", + encoding="utf-8", + ) + + +def test_identity_reads_file_with_ttl_cache(tmp_path: Path) -> None: + reset_identity_cache() + write_identity(tmp_path) + + first = load_telemetry_identity(home_dir=tmp_path, now=lambda: 100.0) + assert first.enabled is True + assert first.install_id == "00000000-0000-4000-8000-000000000000" + + write_identity(tmp_path, enabled=False) + cached = load_telemetry_identity(home_dir=tmp_path, now=lambda: 120.0) + assert cached.enabled is True + + refreshed = load_telemetry_identity(home_dir=tmp_path, now=lambda: 161.0) + assert refreshed.enabled is False + + +def test_identity_honors_python_env_kill_switches(tmp_path: Path) -> None: + reset_identity_cache() + write_identity(tmp_path) + + disabled = load_telemetry_identity( + home_dir=tmp_path, + env={"KTX_TELEMETRY_DISABLED": "1"}, + now=lambda: time.monotonic(), + ) + + assert disabled.enabled is False + assert disabled.install_id == "00000000-0000-4000-8000-000000000000" + + +def test_event_builder_rejects_unknown_fields() -> None: + event = build_telemetry_event( + "sql_gen_completed", + { + "outcome": "ok", + "dialect": "postgres", + "durationMs": 5, + }, + ) + + assert event["event"] == "sql_gen_completed" + assert event["properties"]["runtime"] == "daemon-py" + + try: + build_telemetry_event( + "sql_gen_completed", + { + "outcome": "ok", + "dialect": "postgres", + "durationMs": 5, + "sql": "select * from private_table", + }, + ) + except ValueError as error: + assert "unknown telemetry fields" in str(error) + else: + raise AssertionError("expected unknown field rejection") + + +def test_debug_emitter_writes_payload_without_network(tmp_path: Path, capsys) -> None: + reset_identity_cache() + write_identity(tmp_path) + + track_telemetry_event( + "sl_plan_completed", + { + "outcome": "ok", + "stage": "transpile", + "durationMs": 12, + "sourceCount": 1, + "joinCount": 0, + }, + project_id="a" * 64, + home_dir=tmp_path, + env={"KTX_TELEMETRY_DEBUG": "1"}, + ) + + captured = capsys.readouterr() + assert '"event": "sl_plan_completed"' in captured.err + assert ( + '"groups": {"project": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}' + in captured.err + ) + assert "private_table" not in captured.err diff --git a/uv.lock b/uv.lock index 29ce5981..9c580fbf 100644 --- a/uv.lock +++ b/uv.lock @@ -49,6 +49,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, ] +[[package]] +name = "backoff" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001, upload-time = "2022-10-05T19:19:32.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" }, +] + [[package]] name = "certifi" version = "2026.4.22" @@ -223,6 +232,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, ] +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, +] + [[package]] name = "duckdb" version = "1.5.2" @@ -449,6 +467,7 @@ dependencies = [ { name = "numpy" }, { name = "orjson" }, { name = "pandas" }, + { name = "posthog" }, { name = "psycopg", extra = ["binary"] }, { name = "pydantic" }, { name = "requests" }, @@ -477,6 +496,7 @@ requires-dist = [ { name = "numpy", specifier = ">=2.2.6" }, { name = "orjson", specifier = ">=3.11.4" }, { name = "pandas", specifier = ">=2.2.3" }, + { name = "posthog", specifier = ">=7.0.0" }, { name = "psycopg", extras = ["binary"], specifier = ">=3.2.0" }, { name = "pydantic", specifier = ">=2.9.0" }, { name = "requests", specifier = ">=2.32.0" }, @@ -830,6 +850,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "posthog" +version = "7.15.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backoff" }, + { name = "distro" }, + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c5/ad/0eedae8cc9d2878d5b52c8607bd21f76101cfe4d875e5ff77fec9da3a83c/posthog-7.15.3.tar.gz", hash = "sha256:809dcaf08ca2d8bc0ea8228c28419181b74a79dfd1c0687a3d459a7bbe2e2953", size = 217645, upload-time = "2026-05-21T15:35:04.914Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/b4/8dc673bed0f296c1acbb1107aef1c56db576731e894fe765206be5a91774/posthog-7.15.3-py3-none-any.whl", hash = "sha256:fd59fe4f5be637e4a2706b1457301d8308853ff23659036ecfcf6ac0a2d45eee", size = 254591, upload-time = "2026-05-21T15:35:02.846Z" }, +] + [[package]] name = "pre-commit" version = "4.6.0"