feat(telemetry): anonymous posthog usage telemetry across node cli and python daemon (#205)

* feat: add telemetry phase 1

* feat: add node telemetry event catalog

* feat: add telemetry event helpers

* feat: emit setup and connection telemetry

* feat: emit connection and stack telemetry

* feat: emit ingest and scan telemetry

* feat: emit query telemetry

* feat: emit sampled mcp telemetry

* docs: expand telemetry event catalog

* feat: add telemetry schema sync artifact

* feat: pass telemetry project id to semantic daemon

* feat: add daemon telemetry foundation

* feat: emit semantic daemon telemetry

* feat: emit daemon lifecycle telemetry

* docs: document full telemetry event catalog

* feat(telemetry): dim first-run notice

* feat(telemetry): show first-run notice before command output

* feat(telemetry): wire ktx PostHog project for live ingestion

* docs(telemetry): drop posthog project name and host from storage section

* docs(telemetry): trim to general overview and disclaimer

* docs(agents): add short telemetry guidelines

* feat(telemetry): enable posthog geoip enrichment

* docs(telemetry): drop ip-geoip note from public overview

* refactor(telemetry): drop no-op groupIdentify, rely on capture groups field

* fix(telemetry): respect CI kill switch in python daemon identity

* fix(sql): route table-count analysis to existing analyze-batch endpoint

* fix(telemetry): emit install_first_run from notice path and derive flagsPresent from commander

* fix(telemetry): read package info via getKtxCliPackageInfo to satisfy boundary check

* fix(telemetry): make python identity env={} bypass os.environ and unset CI in tests

* fix(telemetry): unset CI kill switch in cli-program-telemetry tests
This commit is contained in:
Andrey Avtomonov 2026-05-22 18:18:47 +02:00 committed by GitHub
parent c87d14a554
commit b0dd13ce7c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
73 changed files with 6576 additions and 48 deletions

View file

@ -2,18 +2,23 @@
from __future__ import annotations
import time
from typing import Any
from pydantic import BaseModel, Field
from ktx_daemon.telemetry import error_class, track_telemetry_event
from pydantic import BaseModel, ConfigDict, Field
from semantic_layer.duplicate_check import validate_measure_duplicates
from semantic_layer.engine import SemanticEngine
from semantic_layer.models import QueryResult, SourceDefinition
class SemanticLayerQueryRequest(BaseModel):
model_config = ConfigDict(populate_by_name=True)
sources: list[dict[str, Any]]
query: dict[str, Any]
dialect: str = "postgres"
project_id: str | None = Field(default=None, alias="projectId")
class SemanticLayerQueryResponse(BaseModel):
@ -79,15 +84,73 @@ def _response_columns(result: QueryResult) -> list[dict[str, Any]]:
def query_semantic_layer(
request: SemanticLayerQueryRequest,
) -> SemanticLayerQueryResponse:
sources = _load_sources(request.sources)
engine = SemanticEngine.from_sources(sources, dialect=request.dialect)
result = engine.query(request.query)
return SemanticLayerQueryResponse(
sql=result.sql,
dialect=result.dialect,
columns=_response_columns(result),
plan=result.resolved_plan.model_dump(mode="json"),
)
started = time.perf_counter()
stage = "parse"
source_count = 0
join_count = 0
sql_started = started
try:
sources = _load_sources(request.sources)
source_count = len(sources)
join_count = sum(len(source.joins) for source in sources.values())
stage = "resolve"
engine = SemanticEngine.from_sources(sources, dialect=request.dialect)
stage = "compile"
sql_started = time.perf_counter()
result = engine.query(request.query)
stage = "transpile"
track_telemetry_event(
"sl_plan_completed",
{
"outcome": "ok",
"stage": stage,
"durationMs": max(0, (time.perf_counter() - started) * 1000),
"sourceCount": source_count,
"joinCount": join_count,
},
project_id=request.project_id,
)
track_telemetry_event(
"sql_gen_completed",
{
"outcome": "ok",
"dialect": result.dialect,
"durationMs": max(0, (time.perf_counter() - sql_started) * 1000),
},
project_id=request.project_id,
)
return SemanticLayerQueryResponse(
sql=result.sql,
dialect=result.dialect,
columns=_response_columns(result),
plan=result.resolved_plan.model_dump(mode="json"),
)
except Exception as error:
klass = error_class(error)
fields: dict[str, Any] = {
"outcome": "error",
"stage": stage,
"durationMs": max(0, (time.perf_counter() - started) * 1000),
"sourceCount": source_count,
"joinCount": join_count,
}
if klass:
fields["errorClass"] = klass
track_telemetry_event(
"sl_plan_completed", fields, project_id=request.project_id
)
if stage in {"compile", "transpile"}:
sql_fields: dict[str, Any] = {
"outcome": "error",
"dialect": request.dialect,
"durationMs": max(0, (time.perf_counter() - sql_started) * 1000),
}
if klass:
sql_fields["errorClass"] = klass
track_telemetry_event(
"sql_gen_completed", sql_fields, project_id=request.project_id
)
raise
def validate_semantic_layer(request: ValidateSourcesRequest) -> ValidateSourcesResponse: