feat(telemetry): anonymous posthog usage telemetry across node cli and python daemon (#205)

* feat: add telemetry phase 1

* feat: add node telemetry event catalog

* feat: add telemetry event helpers

* feat: emit setup and connection telemetry

* feat: emit connection and stack telemetry

* feat: emit ingest and scan telemetry

* feat: emit query telemetry

* feat: emit sampled mcp telemetry

* docs: expand telemetry event catalog

* feat: add telemetry schema sync artifact

* feat: pass telemetry project id to semantic daemon

* feat: add daemon telemetry foundation

* feat: emit semantic daemon telemetry

* feat: emit daemon lifecycle telemetry

* docs: document full telemetry event catalog

* feat(telemetry): dim first-run notice

* feat(telemetry): show first-run notice before command output

* feat(telemetry): wire ktx PostHog project for live ingestion

* docs(telemetry): drop posthog project name and host from storage section

* docs(telemetry): trim to general overview and disclaimer

* docs(agents): add short telemetry guidelines

* feat(telemetry): enable posthog geoip enrichment

* docs(telemetry): drop ip-geoip note from public overview

* refactor(telemetry): drop no-op groupIdentify, rely on capture groups field

* fix(telemetry): respect CI kill switch in python daemon identity

* fix(sql): route table-count analysis to existing analyze-batch endpoint

* fix(telemetry): emit install_first_run from notice path and derive flagsPresent from commander

* fix(telemetry): read package info via getKtxCliPackageInfo to satisfy boundary check

* fix(telemetry): make python identity env={} bypass os.environ and unset CI in tests

* fix(telemetry): unset CI kill switch in cli-program-telemetry tests
This commit is contained in:
Andrey Avtomonov 2026-05-22 18:18:47 +02:00 committed by GitHub
parent c87d14a554
commit b0dd13ce7c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
73 changed files with 6576 additions and 48 deletions

View file

@ -1,5 +1,8 @@
from __future__ import annotations
import json
from pathlib import Path
from fastapi.testclient import TestClient
from ktx_daemon.app import create_app
@ -79,6 +82,44 @@ def test_health_endpoint_returns_managed_runtime_version(monkeypatch) -> None:
assert response.json() == {"status": "healthy", "version": "0.2.0"}
def test_app_lifespan_emits_daemon_lifecycle_debug_events(
tmp_path: Path,
monkeypatch,
capsys,
) -> None:
from ktx_daemon.telemetry.identity import reset_identity_cache
reset_identity_cache()
identity_path = tmp_path / ".ktx" / "telemetry.json"
identity_path.parent.mkdir(parents=True)
identity_path.write_text(
json.dumps(
{
"installId": "00000000-0000-4000-8000-000000000000",
"enabled": True,
"createdAt": "2026-05-22T14:33:02.000Z",
}
)
+ "\n",
encoding="utf-8",
)
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("KTX_TELEMETRY_DEBUG", "1")
monkeypatch.setenv("KTX_DAEMON_VERSION", "0.4.1")
monkeypatch.delenv("CI", raising=False)
monkeypatch.delenv("KTX_TELEMETRY_DISABLED", raising=False)
monkeypatch.delenv("DO_NOT_TRACK", raising=False)
with TestClient(
create_app(telemetry_started_at=100.0, clock=lambda: 100.125)
) as client:
assert client.get("/health").status_code == 200
captured = capsys.readouterr()
assert '"event": "daemon_started"' in captured.err
assert '"event": "daemon_stopped"' in captured.err
def test_database_introspect_endpoint_returns_snapshot() -> None:
calls = []

View file

@ -1,5 +1,8 @@
from __future__ import annotations
import json
from pathlib import Path
from ktx_daemon.semantic_layer import (
SemanticLayerQueryRequest,
ValidateSourcesRequest,
@ -46,6 +49,52 @@ def test_query_semantic_layer_generates_sql_and_plan() -> None:
assert response.plan["sources_used"] == ["orders"]
def test_query_semantic_layer_emits_plan_and_sql_debug_events(
tmp_path: Path,
monkeypatch,
capsys,
) -> None:
from ktx_daemon.telemetry.identity import reset_identity_cache
reset_identity_cache()
identity_path = tmp_path / ".ktx" / "telemetry.json"
identity_path.parent.mkdir(parents=True)
identity_path.write_text(
json.dumps(
{
"installId": "00000000-0000-4000-8000-000000000000",
"enabled": True,
"createdAt": "2026-05-22T14:33:02.000Z",
}
)
+ "\n",
encoding="utf-8",
)
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("KTX_TELEMETRY_DEBUG", "1")
monkeypatch.delenv("CI", raising=False)
monkeypatch.delenv("KTX_TELEMETRY_DISABLED", raising=False)
monkeypatch.delenv("DO_NOT_TRACK", raising=False)
query_semantic_layer(
SemanticLayerQueryRequest(
sources=[ORDERS_SOURCE],
dialect="postgres",
projectId="a" * 64,
query={
"measures": ["orders.order_count"],
"dimensions": ["orders.status"],
"limit": 25,
},
)
)
captured = capsys.readouterr()
assert '"event": "sl_plan_completed"' in captured.err
assert '"event": "sql_gen_completed"' in captured.err
assert "public.orders" not in captured.err
def test_validate_semantic_layer_reports_duplicate_measure_names() -> None:
invalid_source = {
**ORDERS_SOURCE,

View file

@ -0,0 +1,114 @@
from __future__ import annotations
import json
import time
from pathlib import Path
from ktx_daemon.telemetry.emitter import track_telemetry_event
from ktx_daemon.telemetry.events import build_telemetry_event
from ktx_daemon.telemetry.identity import load_telemetry_identity, reset_identity_cache
def write_identity(home: Path, *, enabled: bool = True) -> None:
target = home / ".ktx" / "telemetry.json"
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(
json.dumps(
{
"installId": "00000000-0000-4000-8000-000000000000",
"enabled": enabled,
"noticeShownAt": "2026-05-22T14:33:02.000Z",
"noticeShownVersion": 1,
"createdAt": "2026-05-22T14:33:02.000Z",
}
)
+ "\n",
encoding="utf-8",
)
def test_identity_reads_file_with_ttl_cache(tmp_path: Path) -> None:
reset_identity_cache()
write_identity(tmp_path)
first = load_telemetry_identity(home_dir=tmp_path, env={}, now=lambda: 100.0)
assert first.enabled is True
assert first.install_id == "00000000-0000-4000-8000-000000000000"
write_identity(tmp_path, enabled=False)
cached = load_telemetry_identity(home_dir=tmp_path, env={}, now=lambda: 120.0)
assert cached.enabled is True
refreshed = load_telemetry_identity(home_dir=tmp_path, env={}, now=lambda: 161.0)
assert refreshed.enabled is False
def test_identity_honors_python_env_kill_switches(tmp_path: Path) -> None:
for kill_switch in ("KTX_TELEMETRY_DISABLED", "DO_NOT_TRACK", "CI"):
reset_identity_cache()
write_identity(tmp_path)
disabled = load_telemetry_identity(
home_dir=tmp_path,
env={kill_switch: "1"},
now=lambda: time.monotonic(),
)
assert disabled.enabled is False, f"{kill_switch} should disable telemetry"
assert disabled.install_id == "00000000-0000-4000-8000-000000000000"
def test_event_builder_rejects_unknown_fields() -> None:
event = build_telemetry_event(
"sql_gen_completed",
{
"outcome": "ok",
"dialect": "postgres",
"durationMs": 5,
},
)
assert event["event"] == "sql_gen_completed"
assert event["properties"]["runtime"] == "daemon-py"
try:
build_telemetry_event(
"sql_gen_completed",
{
"outcome": "ok",
"dialect": "postgres",
"durationMs": 5,
"sql": "select * from private_table",
},
)
except ValueError as error:
assert "unknown telemetry fields" in str(error)
else:
raise AssertionError("expected unknown field rejection")
def test_debug_emitter_writes_payload_without_network(tmp_path: Path, capsys) -> None:
reset_identity_cache()
write_identity(tmp_path)
track_telemetry_event(
"sl_plan_completed",
{
"outcome": "ok",
"stage": "transpile",
"durationMs": 12,
"sourceCount": 1,
"joinCount": 0,
},
project_id="a" * 64,
home_dir=tmp_path,
env={"KTX_TELEMETRY_DEBUG": "1"},
)
captured = capsys.readouterr()
assert '"event": "sl_plan_completed"' in captured.err
assert (
'"groups": {"project": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}'
in captured.err
)
assert "private_table" not in captured.err

View file

@ -0,0 +1,39 @@
from __future__ import annotations
import json
from pathlib import Path
def test_python_schema_copy_matches_node_schema() -> None:
repo_root = Path(__file__).resolve().parents[3]
node_schema = json.loads(
(repo_root / "packages/cli/src/telemetry/events.schema.json").read_text(
encoding="utf-8"
)
)
python_schema = json.loads(
(
repo_root / "python/ktx-daemon/src/ktx_daemon/telemetry/events.schema.json"
).read_text(encoding="utf-8")
)
assert python_schema == node_schema
assert [event["name"] for event in python_schema["x-ktx-catalog"]] == [
"install_first_run",
"command",
"setup_step",
"connection_added",
"connection_test",
"project_stack_snapshot",
"ingest_completed",
"scan_completed",
"sl_validate_completed",
"sl_query_completed",
"sql_completed",
"wiki_query_completed",
"mcp_request_completed",
"daemon_started",
"daemon_stopped",
"sl_plan_completed",
"sql_gen_completed",
]