feat(telemetry): anonymous posthog usage telemetry across node cli and python daemon (#205)

* feat: add telemetry phase 1

* feat: add node telemetry event catalog

* feat: add telemetry event helpers

* feat: emit setup and connection telemetry

* feat: emit connection and stack telemetry

* feat: emit ingest and scan telemetry

* feat: emit query telemetry

* feat: emit sampled mcp telemetry

* docs: expand telemetry event catalog

* feat: add telemetry schema sync artifact

* feat: pass telemetry project id to semantic daemon

* feat: add daemon telemetry foundation

* feat: emit semantic daemon telemetry

* feat: emit daemon lifecycle telemetry

* docs: document full telemetry event catalog

* feat(telemetry): dim first-run notice

* feat(telemetry): show first-run notice before command output

* feat(telemetry): wire ktx PostHog project for live ingestion

* docs(telemetry): drop posthog project name and host from storage section

* docs(telemetry): trim to general overview and disclaimer

* docs(agents): add short telemetry guidelines

* feat(telemetry): enable posthog geoip enrichment

* docs(telemetry): drop ip-geoip note from public overview

* refactor(telemetry): drop no-op groupIdentify, rely on capture groups field

* fix(telemetry): respect CI kill switch in python daemon identity

* fix(sql): route table-count analysis to existing analyze-batch endpoint

* fix(telemetry): emit install_first_run from notice path and derive flagsPresent from commander

* fix(telemetry): read package info via getKtxCliPackageInfo to satisfy boundary check

* fix(telemetry): make python identity env={} bypass os.environ and unset CI in tests

* fix(telemetry): unset CI kill switch in cli-program-telemetry tests
This commit is contained in:
Andrey Avtomonov 2026-05-22 18:18:47 +02:00 committed by GitHub
parent c87d14a554
commit b0dd13ce7c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
73 changed files with 6576 additions and 48 deletions

View file

@ -5,6 +5,7 @@ from __future__ import annotations
import argparse
import json
import sys
import time
from typing import Any
from pydantic import ValidationError
@ -100,8 +101,12 @@ def run_http_server(
from ktx_daemon.app import create_app
started_at = time.perf_counter()
uvicorn.run(
create_app(enable_code_execution=enable_code_execution),
create_app(
enable_code_execution=enable_code_execution,
telemetry_started_at=started_at,
),
host=host,
port=port,
log_level=log_level,

View file

@ -4,6 +4,9 @@ from __future__ import annotations
import logging
import os
import sys
import time
from contextlib import asynccontextmanager
from collections.abc import Callable
from typing import Any
@ -62,6 +65,7 @@ from ktx_daemon.table_identifier import (
ParseTableIdentifierBatchResponse,
parse_table_identifier_response,
)
from ktx_daemon.telemetry import track_telemetry_event
logger = logging.getLogger(__name__)
@ -81,11 +85,38 @@ def create_app(
]
| None = None,
enable_code_execution: bool = False,
telemetry_started_at: float | None = None,
clock: Callable[[], float] = time.perf_counter,
) -> FastAPI:
started_at = telemetry_started_at or clock()
@asynccontextmanager
async def lifespan(_: FastAPI):
track_telemetry_event(
"daemon_started",
{
"daemonVersion": os.environ.get("KTX_DAEMON_VERSION", VERSION),
"pythonVersion": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
"runtimeVersion": VERSION,
"startupDurationMs": max(0, (clock() - started_at) * 1000),
},
)
try:
yield
finally:
track_telemetry_event(
"daemon_stopped",
{
"reason": "request",
"uptimeMs": max(0, (clock() - started_at) * 1000),
},
)
app = FastAPI(
title="KTX Daemon",
description="Stateless portable compute server for KTX.",
version=VERSION,
lifespan=lifespan,
)
@app.get("/health")

View file

@ -2,18 +2,23 @@
from __future__ import annotations
import time
from typing import Any
from pydantic import BaseModel, Field
from ktx_daemon.telemetry import error_class, track_telemetry_event
from pydantic import BaseModel, ConfigDict, Field
from semantic_layer.duplicate_check import validate_measure_duplicates
from semantic_layer.engine import SemanticEngine
from semantic_layer.models import QueryResult, SourceDefinition
class SemanticLayerQueryRequest(BaseModel):
model_config = ConfigDict(populate_by_name=True)
sources: list[dict[str, Any]]
query: dict[str, Any]
dialect: str = "postgres"
project_id: str | None = Field(default=None, alias="projectId")
class SemanticLayerQueryResponse(BaseModel):
@ -79,15 +84,73 @@ def _response_columns(result: QueryResult) -> list[dict[str, Any]]:
def query_semantic_layer(
request: SemanticLayerQueryRequest,
) -> SemanticLayerQueryResponse:
sources = _load_sources(request.sources)
engine = SemanticEngine.from_sources(sources, dialect=request.dialect)
result = engine.query(request.query)
return SemanticLayerQueryResponse(
sql=result.sql,
dialect=result.dialect,
columns=_response_columns(result),
plan=result.resolved_plan.model_dump(mode="json"),
)
started = time.perf_counter()
stage = "parse"
source_count = 0
join_count = 0
sql_started = started
try:
sources = _load_sources(request.sources)
source_count = len(sources)
join_count = sum(len(source.joins) for source in sources.values())
stage = "resolve"
engine = SemanticEngine.from_sources(sources, dialect=request.dialect)
stage = "compile"
sql_started = time.perf_counter()
result = engine.query(request.query)
stage = "transpile"
track_telemetry_event(
"sl_plan_completed",
{
"outcome": "ok",
"stage": stage,
"durationMs": max(0, (time.perf_counter() - started) * 1000),
"sourceCount": source_count,
"joinCount": join_count,
},
project_id=request.project_id,
)
track_telemetry_event(
"sql_gen_completed",
{
"outcome": "ok",
"dialect": result.dialect,
"durationMs": max(0, (time.perf_counter() - sql_started) * 1000),
},
project_id=request.project_id,
)
return SemanticLayerQueryResponse(
sql=result.sql,
dialect=result.dialect,
columns=_response_columns(result),
plan=result.resolved_plan.model_dump(mode="json"),
)
except Exception as error:
klass = error_class(error)
fields: dict[str, Any] = {
"outcome": "error",
"stage": stage,
"durationMs": max(0, (time.perf_counter() - started) * 1000),
"sourceCount": source_count,
"joinCount": join_count,
}
if klass:
fields["errorClass"] = klass
track_telemetry_event(
"sl_plan_completed", fields, project_id=request.project_id
)
if stage in {"compile", "transpile"}:
sql_fields: dict[str, Any] = {
"outcome": "error",
"dialect": request.dialect,
"durationMs": max(0, (time.perf_counter() - sql_started) * 1000),
}
if klass:
sql_fields["errorClass"] = klass
track_telemetry_event(
"sql_gen_completed", sql_fields, project_id=request.project_id
)
raise
def validate_semantic_layer(request: ValidateSourcesRequest) -> ValidateSourcesResponse:

View file

@ -0,0 +1,5 @@
from __future__ import annotations
from ktx_daemon.telemetry.emitter import error_class, track_telemetry_event
__all__ = ["error_class", "track_telemetry_event"]

View file

@ -0,0 +1,106 @@
from __future__ import annotations
import json
import os
import sys
from pathlib import Path
from typing import Any
from collections.abc import Mapping
from ktx_daemon.telemetry.events import build_telemetry_event
from ktx_daemon.telemetry.identity import load_telemetry_identity
# PostHog public project ingestion key - safe to embed; capture-only, no read access.
POSTHOG_PROJECT_API_KEY = (
"phc_xbvZpbu8ZNLnogTbY7MEMWhCF2rzzApYsDndjKaRBXXx" # pragma: allowlist secret
)
POSTHOG_HOST = "https://us.i.posthog.com"
def _host(env: Mapping[str, str]) -> str:
return env.get("KTX_TELEMETRY_ENDPOINT") or POSTHOG_HOST
def _live_configured(host: str) -> bool:
return bool(POSTHOG_PROJECT_API_KEY.strip() and host.strip())
def _debug_enabled(env: Mapping[str, str]) -> bool:
return env.get("KTX_TELEMETRY_DEBUG") == "1"
def _scrub_error_class(error: BaseException) -> str | None:
name = type(error).__name__
if len(name) > 80:
return None
if any(marker in name for marker in ("/", "\\", "@", "://")):
return None
if not name[:1].isupper() or not name.replace("_", "").isalnum():
return None
return name
def error_class(error: BaseException) -> str | None:
return _scrub_error_class(error)
def track_telemetry_event(
name: str,
fields: dict[str, Any],
*,
project_id: str | None = None,
home_dir: Path | None = None,
env: Mapping[str, str] | None = None,
) -> None:
source_env = env or os.environ
identity = load_telemetry_identity(home_dir=home_dir, env=source_env)
if not identity.enabled or not identity.install_id:
return
try:
event = build_telemetry_event(name, fields)
except ValueError:
return
groups = {"project": project_id} if project_id else None
if _debug_enabled(source_env):
sys.stderr.write(
"[telemetry] "
+ json.dumps(
{
"distinctId": identity.install_id,
"event": event["event"],
"properties": event["properties"],
"groups": groups,
},
sort_keys=True,
)
+ "\n"
)
return
host = _host(source_env)
if not _live_configured(host):
return
try:
from posthog import Posthog
client = Posthog(
POSTHOG_PROJECT_API_KEY,
host=host,
flush_at=1,
flush_interval=0,
sync_mode=True,
timeout=1,
)
client.capture(
event=event["event"],
distinct_id=identity.install_id,
properties=event["properties"],
groups=groups,
)
client.shutdown()
except Exception:
return

View file

@ -0,0 +1,72 @@
from __future__ import annotations
import json
import os
import platform
import sys
from pathlib import Path
from typing import Any
from ktx_daemon import VERSION
SCHEMA_PATH = Path(__file__).with_name("events.schema.json")
COMMON_FIELDS = {
"cliVersion",
"nodeVersion",
"osPlatform",
"osRelease",
"arch",
"runtime",
"isCi",
}
DAEMON_EVENTS = {
"daemon_started",
"daemon_stopped",
"sl_plan_completed",
"sql_gen_completed",
}
def _schema_catalog() -> dict[str, set[str]]:
raw = json.loads(SCHEMA_PATH.read_text(encoding="utf-8"))
return {
event["name"]: set(event["fields"])
for event in raw["x-ktx-catalog"]
if event["name"] in DAEMON_EVENTS
}
EVENT_FIELDS = _schema_catalog()
def _common_envelope() -> dict[str, Any]:
return {
"cliVersion": os.environ.get("KTX_DAEMON_VERSION", VERSION),
"nodeVersion": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
"osPlatform": sys.platform,
"osRelease": platform.release(),
"arch": platform.machine(),
"runtime": "daemon-py",
"isCi": bool(os.environ.get("CI")),
}
def build_telemetry_event(name: str, fields: dict[str, Any]) -> dict[str, Any]:
allowed = EVENT_FIELDS.get(name)
if allowed is None:
raise ValueError(f"unknown telemetry event: {name}")
extra = set(fields) - allowed
if extra:
raise ValueError(f"unknown telemetry fields for {name}: {sorted(extra)}")
missing = {
field for field in allowed if field not in fields and field != "errorClass"
}
if missing:
raise ValueError(f"missing telemetry fields for {name}: {sorted(missing)}")
return {
"event": name,
"properties": {**_common_envelope(), **fields},
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,81 @@
from __future__ import annotations
import json
import os
import time
from collections.abc import Callable
from dataclasses import dataclass
from pathlib import Path
from collections.abc import Mapping
IDENTITY_TTL_SECONDS = 60.0
@dataclass(frozen=True)
class TelemetryIdentity:
install_id: str | None
enabled: bool
path: Path
_cache: tuple[float, Path, TelemetryIdentity] | None = None
def _telemetry_path(home_dir: Path | None = None) -> Path:
return (home_dir or Path.home()) / ".ktx" / "telemetry.json"
def _env_disables(env: Mapping[str, str] | None = None) -> bool:
source = os.environ if env is None else env
return bool(
source.get("KTX_TELEMETRY_DISABLED")
or source.get("DO_NOT_TRACK")
or source.get("CI")
)
def _read_identity(path: Path) -> TelemetryIdentity:
try:
raw = json.loads(path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return TelemetryIdentity(install_id=None, enabled=False, path=path)
install_id = raw.get("installId")
enabled = raw.get("enabled")
if not isinstance(install_id, str) or enabled is not True:
return TelemetryIdentity(
install_id=install_id if isinstance(install_id, str) else None,
enabled=False,
path=path,
)
return TelemetryIdentity(install_id=install_id, enabled=True, path=path)
def load_telemetry_identity(
*,
home_dir: Path | None = None,
env: Mapping[str, str] | None = None,
now: Callable[[], float] | None = None,
) -> TelemetryIdentity:
global _cache
path = _telemetry_path(home_dir)
clock = now or time.monotonic
current = float(clock())
if _cache and _cache[1] == path and current - _cache[0] < IDENTITY_TTL_SECONDS:
cached = _cache[2]
else:
cached = _read_identity(path)
_cache = (current, path, cached)
if _env_disables(env):
return TelemetryIdentity(install_id=cached.install_id, enabled=False, path=path)
return cached
def reset_identity_cache() -> None:
global _cache
_cache = None