mirror of
https://github.com/Kaelio/ktx.git
synced 2026-06-07 07:55:13 +02:00
feat(telemetry): collect PostHog $exception error reports in CLI and daemon (#262)
* feat(telemetry): add node exception reporter * feat(telemetry): report node cli exceptions * feat(telemetry): add daemon exception reporter * feat(telemetry): report daemon exceptions * docs(telemetry): document error reports * fix(telemetry): pass redaction snapshots from node call sites * test(telemetry): verify prepared node exception payload * fix(telemetry): close daemon exception lifecycle gaps * test(telemetry): verify prepared daemon exception payload * test(telemetry): close error collection acceptance gaps * test(telemetry): close posthog exception acceptance gaps
This commit is contained in:
parent
c3d8cedb0b
commit
fb7b94b60e
36 changed files with 2870 additions and 140 deletions
|
|
@ -6,6 +6,8 @@ import argparse
|
|||
import json
|
||||
import sys
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from types import TracebackType
|
||||
from typing import Any
|
||||
|
||||
from pydantic import ValidationError
|
||||
|
|
@ -90,6 +92,41 @@ def _read_stdin_json() -> dict[str, Any]:
|
|||
return parsed
|
||||
|
||||
|
||||
def install_serve_http_exception_hooks(started_at: float) -> Callable[[], None]:
|
||||
original_hook = sys.excepthook
|
||||
|
||||
def hook(
|
||||
exc_type: type[BaseException],
|
||||
exc: BaseException,
|
||||
tb: TracebackType | None,
|
||||
) -> None:
|
||||
report_serve_http_crash(exc, started_at=started_at)
|
||||
original_hook(exc_type, exc, tb)
|
||||
|
||||
sys.excepthook = hook
|
||||
|
||||
def dispose() -> None:
|
||||
sys.excepthook = original_hook
|
||||
|
||||
return dispose
|
||||
|
||||
|
||||
def report_serve_http_crash(error: BaseException, *, started_at: float) -> None:
|
||||
from ktx_daemon.telemetry import report_exception
|
||||
from ktx_daemon.telemetry.daemon_lifecycle import emit_daemon_stopped_once
|
||||
|
||||
report_exception(
|
||||
error,
|
||||
source="serve-http",
|
||||
handled=False,
|
||||
fatal=True,
|
||||
)
|
||||
emit_daemon_stopped_once(
|
||||
reason="crash",
|
||||
uptime_ms=max(0, (time.perf_counter() - started_at) * 1000),
|
||||
)
|
||||
|
||||
|
||||
def run_http_server(
|
||||
*,
|
||||
host: str,
|
||||
|
|
@ -102,15 +139,23 @@ def run_http_server(
|
|||
from ktx_daemon.app import create_app
|
||||
|
||||
started_at = time.perf_counter()
|
||||
uvicorn.run(
|
||||
create_app(
|
||||
enable_code_execution=enable_code_execution,
|
||||
telemetry_started_at=started_at,
|
||||
),
|
||||
host=host,
|
||||
port=port,
|
||||
log_level=log_level,
|
||||
)
|
||||
dispose_hooks = install_serve_http_exception_hooks(started_at)
|
||||
try:
|
||||
try:
|
||||
uvicorn.run(
|
||||
create_app(
|
||||
enable_code_execution=enable_code_execution,
|
||||
telemetry_started_at=started_at,
|
||||
),
|
||||
host=host,
|
||||
port=port,
|
||||
log_level=log_level,
|
||||
)
|
||||
except Exception as error:
|
||||
report_serve_http_crash(error, started_at=started_at)
|
||||
raise
|
||||
finally:
|
||||
dispose_hooks()
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
|
|
@ -169,6 +214,14 @@ def main(argv: list[str] | None = None) -> int:
|
|||
sys.stderr.write(f"{error}\n")
|
||||
return 1
|
||||
except Exception as error:
|
||||
from ktx_daemon.telemetry import report_exception
|
||||
|
||||
report_exception(
|
||||
error,
|
||||
source=str(args.command),
|
||||
handled=True,
|
||||
fatal=False,
|
||||
)
|
||||
sys.stderr.write(f"{type(error).__name__}: {error}\n")
|
||||
return 1
|
||||
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ from contextlib import asynccontextmanager
|
|||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import Response
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse, Response
|
||||
|
||||
from ktx_daemon import VERSION
|
||||
from ktx_daemon.code_execution import (
|
||||
|
|
@ -65,9 +65,11 @@ from ktx_daemon.table_identifier import (
|
|||
ParseTableIdentifierBatchResponse,
|
||||
parse_table_identifier_response,
|
||||
)
|
||||
from ktx_daemon.telemetry import track_telemetry_event
|
||||
from ktx_daemon.telemetry import report_exception, track_telemetry_event
|
||||
from ktx_daemon.telemetry.daemon_lifecycle import emit_daemon_stopped_once
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
CREDENTIAL_KEYS = {"url", "password", "token", "api_key", "apikey", "auth_header"}
|
||||
|
||||
|
||||
class NumpyORJSONResponse(Response):
|
||||
|
|
@ -77,6 +79,36 @@ class NumpyORJSONResponse(Response):
|
|||
return dumps_numpy_json(content)
|
||||
|
||||
|
||||
def _route_source(request: Request) -> str:
|
||||
route = request.scope.get("route")
|
||||
path = getattr(route, "path", None)
|
||||
if isinstance(path, str) and path:
|
||||
return f"app:{path}"
|
||||
return f"app:{request.url.path}"
|
||||
|
||||
|
||||
def _secret_snapshot_from_payload(value: Any) -> list[str]:
|
||||
secrets: list[str] = []
|
||||
if isinstance(value, dict):
|
||||
for key, child in value.items():
|
||||
normalized_key = str(key).lower()
|
||||
if normalized_key in CREDENTIAL_KEYS and isinstance(child, str) and child:
|
||||
secrets.append(child)
|
||||
secrets.extend(_secret_snapshot_from_payload(child))
|
||||
elif isinstance(value, list):
|
||||
for child in value:
|
||||
secrets.extend(_secret_snapshot_from_payload(child))
|
||||
return secrets
|
||||
|
||||
|
||||
async def _request_secret_snapshot(request: Request) -> list[str]:
|
||||
try:
|
||||
payload = await request.json()
|
||||
except Exception:
|
||||
return []
|
||||
return _secret_snapshot_from_payload(payload)
|
||||
|
||||
|
||||
def create_app(
|
||||
*,
|
||||
embedding_provider: EmbeddingProvider | None = None,
|
||||
|
|
@ -104,12 +136,9 @@ def create_app(
|
|||
try:
|
||||
yield
|
||||
finally:
|
||||
track_telemetry_event(
|
||||
"daemon_stopped",
|
||||
{
|
||||
"reason": "request",
|
||||
"uptimeMs": max(0, (clock() - started_at) * 1000),
|
||||
},
|
||||
emit_daemon_stopped_once(
|
||||
reason="request",
|
||||
uptime_ms=max(0, (clock() - started_at) * 1000),
|
||||
)
|
||||
|
||||
app = FastAPI(
|
||||
|
|
@ -119,6 +148,25 @@ def create_app(
|
|||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
@app.middleware("http")
|
||||
async def report_unhandled_exceptions(request: Request, call_next):
|
||||
redaction_secrets = await _request_secret_snapshot(request)
|
||||
try:
|
||||
return await call_next(request)
|
||||
except Exception as error:
|
||||
logger.exception("Unhandled daemon request failed: %s", error)
|
||||
report_exception(
|
||||
error,
|
||||
source=_route_source(request),
|
||||
handled=True,
|
||||
fatal=False,
|
||||
redaction_secrets=redaction_secrets,
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"detail": f"Daemon request failed: {error}"},
|
||||
)
|
||||
|
||||
@app.get("/health")
|
||||
async def health() -> dict[str, str]:
|
||||
response = {"status": "healthy"}
|
||||
|
|
@ -137,12 +185,6 @@ def create_app(
|
|||
except ValueError as error:
|
||||
logger.warning("Database introspection rejected: %s", error)
|
||||
raise HTTPException(status_code=400, detail=str(error)) from error
|
||||
except Exception as error:
|
||||
logger.exception("Database introspection failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Database introspection failed: {error}",
|
||||
) from error
|
||||
|
||||
@app.post("/embeddings/compute", response_model=ComputeEmbeddingResponse)
|
||||
async def embedding_compute(
|
||||
|
|
@ -156,12 +198,6 @@ def create_app(
|
|||
except ValueError as error:
|
||||
logger.warning("Embedding compute rejected: %s", error)
|
||||
raise HTTPException(status_code=400, detail=str(error)) from error
|
||||
except Exception as error:
|
||||
logger.exception("Embedding compute failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Embedding compute failed: {error}",
|
||||
) from error
|
||||
|
||||
@app.post(
|
||||
"/embeddings/compute-bulk",
|
||||
|
|
@ -178,12 +214,6 @@ def create_app(
|
|||
except ValueError as error:
|
||||
logger.warning("Bulk embedding compute rejected: %s", error)
|
||||
raise HTTPException(status_code=400, detail=str(error)) from error
|
||||
except Exception as error:
|
||||
logger.exception("Bulk embedding compute failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Bulk embedding compute failed: {error}",
|
||||
) from error
|
||||
|
||||
if enable_code_execution:
|
||||
|
||||
|
|
@ -193,29 +223,15 @@ def create_app(
|
|||
response_class=NumpyORJSONResponse,
|
||||
)
|
||||
async def code_execute(request: ExecuteCodeRequest) -> ExecuteCodeResponse:
|
||||
try:
|
||||
return execute_code_response(
|
||||
request,
|
||||
nest_api_url=None,
|
||||
auth_header=None,
|
||||
)
|
||||
except Exception as error:
|
||||
logger.exception("Code execution failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Code execution failed: {error}",
|
||||
) from error
|
||||
return execute_code_response(
|
||||
request,
|
||||
nest_api_url=None,
|
||||
auth_header=None,
|
||||
)
|
||||
|
||||
@app.post("/lookml/parse", response_model=ParseLookMLResponse)
|
||||
async def lookml_parse(request: ParseLookMLRequest) -> ParseLookMLResponse:
|
||||
try:
|
||||
return parse_lookml_project(request)
|
||||
except Exception as error:
|
||||
logger.exception("LookML parsing failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"LookML parsing failed: {error}",
|
||||
) from error
|
||||
return parse_lookml_project(request)
|
||||
|
||||
@app.post(
|
||||
"/sql/parse-table-identifier",
|
||||
|
|
@ -224,40 +240,19 @@ def create_app(
|
|||
async def sql_parse_table_identifier(
|
||||
request: ParseTableIdentifierBatchRequest,
|
||||
) -> ParseTableIdentifierBatchResponse:
|
||||
try:
|
||||
return parse_table_identifier_response(request)
|
||||
except Exception as error:
|
||||
logger.exception("Table identifier parsing failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Table identifier parsing failed: {error}",
|
||||
) from error
|
||||
return parse_table_identifier_response(request)
|
||||
|
||||
@app.post("/sql/validate-read-only", response_model=ValidateReadOnlySqlResponse)
|
||||
async def sql_validate_read_only(
|
||||
request: ValidateReadOnlySqlRequest,
|
||||
) -> ValidateReadOnlySqlResponse:
|
||||
try:
|
||||
return validate_read_only_sql_response(request)
|
||||
except Exception as error:
|
||||
logger.exception("SQL read-only validation failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"SQL read-only validation failed: {error}",
|
||||
) from error
|
||||
return validate_read_only_sql_response(request)
|
||||
|
||||
@app.post("/sql/analyze-batch", response_model=AnalyzeSqlBatchResponse)
|
||||
async def sql_analyze_batch(
|
||||
request: AnalyzeSqlBatchRequest,
|
||||
) -> AnalyzeSqlBatchResponse:
|
||||
try:
|
||||
return analyze_sql_batch_response(request)
|
||||
except Exception as error:
|
||||
logger.exception("SQL batch analysis failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"SQL batch analysis failed: {error}",
|
||||
) from error
|
||||
return analyze_sql_batch_response(request)
|
||||
|
||||
@app.post(
|
||||
"/semantic-layer/generate-sources", response_model=GenerateSourcesResponse
|
||||
|
|
@ -265,14 +260,7 @@ def create_app(
|
|||
async def semantic_generate_sources(
|
||||
request: GenerateSourcesRequest,
|
||||
) -> GenerateSourcesResponse:
|
||||
try:
|
||||
return generate_sources_response(request)
|
||||
except Exception as error:
|
||||
logger.exception("Semantic source generation failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Semantic source generation failed: {error}",
|
||||
) from error
|
||||
return generate_sources_response(request)
|
||||
|
||||
@app.post("/semantic-layer/query", response_model=SemanticLayerQueryResponse)
|
||||
async def semantic_query(
|
||||
|
|
@ -283,12 +271,6 @@ def create_app(
|
|||
except ValueError as error:
|
||||
logger.warning("Semantic query rejected: %s", error)
|
||||
raise HTTPException(status_code=400, detail=str(error)) from error
|
||||
except Exception as error:
|
||||
logger.exception("Semantic query failed: %s", error)
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Semantic layer query failed: {error}",
|
||||
) from error
|
||||
|
||||
@app.post("/semantic-layer/validate", response_model=ValidateSourcesResponse)
|
||||
async def semantic_validate(
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from __future__ import annotations
|
|||
import time
|
||||
from typing import Any
|
||||
|
||||
from ktx_daemon.telemetry import error_class, track_telemetry_event
|
||||
from ktx_daemon.telemetry import error_class, report_exception, track_telemetry_event
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
from semantic_layer.duplicate_check import validate_measure_duplicates
|
||||
from semantic_layer.engine import SemanticEngine
|
||||
|
|
@ -150,6 +150,13 @@ def query_semantic_layer(
|
|||
track_telemetry_event(
|
||||
"sql_gen_completed", sql_fields, project_id=request.project_id
|
||||
)
|
||||
report_exception(
|
||||
error,
|
||||
source="semantic-query",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
project_id=request.project_id,
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,12 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from ktx_daemon.telemetry.daemon_lifecycle import emit_daemon_stopped_once
|
||||
from ktx_daemon.telemetry.emitter import error_class, track_telemetry_event
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
__all__ = ["error_class", "track_telemetry_event"]
|
||||
__all__ = [
|
||||
"emit_daemon_stopped_once",
|
||||
"error_class",
|
||||
"report_exception",
|
||||
"track_telemetry_event",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from ktx_daemon.telemetry.emitter import track_telemetry_event
|
||||
|
||||
StopReason = Literal["signal", "request", "crash"]
|
||||
|
||||
_daemon_stop_emitted = False
|
||||
|
||||
|
||||
def emit_daemon_stopped_once(*, reason: StopReason, uptime_ms: float) -> bool:
|
||||
global _daemon_stop_emitted
|
||||
if _daemon_stop_emitted:
|
||||
return False
|
||||
_daemon_stop_emitted = True
|
||||
track_telemetry_event(
|
||||
"daemon_stopped",
|
||||
{
|
||||
"reason": reason,
|
||||
"uptimeMs": max(0, uptime_ms),
|
||||
},
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def reset_daemon_lifecycle_for_tests() -> None:
|
||||
global _daemon_stop_emitted
|
||||
_daemon_stop_emitted = False
|
||||
156
python/ktx-daemon/src/ktx_daemon/telemetry/exception.py
Normal file
156
python/ktx-daemon/src/ktx_daemon/telemetry/exception.py
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from collections.abc import Mapping, Sequence
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from ktx_daemon import VERSION
|
||||
from ktx_daemon.telemetry.emitter import POSTHOG_HOST, POSTHOG_PROJECT_API_KEY
|
||||
from ktx_daemon.telemetry.events import _common_envelope
|
||||
from ktx_daemon.telemetry.identity import load_telemetry_identity
|
||||
|
||||
_KTX_REPORTED_ATTR = "__ktx_posthog_exception_reported"
|
||||
|
||||
|
||||
def _debug_enabled(env: Mapping[str, str]) -> bool:
|
||||
return env.get("KTX_TELEMETRY_DEBUG") == "1"
|
||||
|
||||
|
||||
def _host(env: Mapping[str, str]) -> str:
|
||||
return env.get("KTX_TELEMETRY_ENDPOINT") or POSTHOG_HOST
|
||||
|
||||
|
||||
def _redact_static(value: str) -> str:
|
||||
patterns = [
|
||||
(
|
||||
r"([a-z][a-z0-9+.-]*://[^:\s/@]+:)([^@\s/]+)(@)",
|
||||
r"\1[redacted]\3",
|
||||
),
|
||||
(r"\b(password|pwd)=([^;&\s]+)", r"\1=[redacted]"),
|
||||
(r"\bAuthorization\s*:\s*[^\r\n,;]+", "Authorization: [redacted]"),
|
||||
(r"\bBearer\s+[A-Za-z0-9._~+/=-]+", "Bearer [redacted]"),
|
||||
(r"\b(api[_-]?key)\s*[:=]\s*([^\s,;]+)", r"\1=[redacted]"),
|
||||
(
|
||||
r"\b(KTX_[A-Z0-9_]*|[A-Z0-9_]*(?:TOKEN|SECRET))\s*[:=]\s*([^\s,;]+)",
|
||||
r"\1=[redacted]",
|
||||
),
|
||||
(r"([?&](?:X-Amz-Signature|X-Goog-Signature|sig)=)[^&\s]+", r"\1[redacted]"),
|
||||
]
|
||||
redacted = value
|
||||
for pattern, replacement in patterns:
|
||||
redacted = re.sub(pattern, replacement, redacted, flags=re.IGNORECASE)
|
||||
return redacted
|
||||
|
||||
|
||||
def _redact_text(value: str, secrets: Sequence[str]) -> str:
|
||||
redacted = value
|
||||
for secret in secrets:
|
||||
if secret:
|
||||
redacted = redacted.replace(secret, "[redacted]")
|
||||
return _redact_static(redacted)
|
||||
|
||||
|
||||
def _clone_exception(exception: BaseException, secrets: Sequence[str]) -> BaseException:
|
||||
redacted_args = [_redact_text(str(arg), secrets) for arg in exception.args]
|
||||
try:
|
||||
cloned = type(exception)(*redacted_args)
|
||||
except Exception:
|
||||
cloned = RuntimeError(_redact_text(str(exception), secrets))
|
||||
cloned.__traceback__ = exception.__traceback__
|
||||
cloned.__cause__ = (
|
||||
_clone_exception(exception.__cause__, secrets) if exception.__cause__ else None
|
||||
)
|
||||
cloned.__context__ = (
|
||||
_clone_exception(exception.__context__, secrets)
|
||||
if exception.__context__
|
||||
else None
|
||||
)
|
||||
return cloned
|
||||
|
||||
|
||||
def _should_skip_as_reported(exception: BaseException) -> bool:
|
||||
if getattr(exception, _KTX_REPORTED_ATTR, False):
|
||||
return True
|
||||
try:
|
||||
setattr(exception, _KTX_REPORTED_ATTR, True)
|
||||
except Exception:
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def _properties(*, source: str, handled: bool, fatal: bool) -> dict[str, Any]:
|
||||
return {
|
||||
**_common_envelope(),
|
||||
"daemonVersion": os.environ.get("KTX_DAEMON_VERSION", VERSION),
|
||||
"source": source,
|
||||
"handled": handled,
|
||||
"fatal": fatal,
|
||||
}
|
||||
|
||||
|
||||
def report_exception(
|
||||
exception: BaseException,
|
||||
*,
|
||||
source: str,
|
||||
handled: bool,
|
||||
fatal: bool,
|
||||
project_id: str | None = None,
|
||||
home_dir: Path | None = None,
|
||||
env: Mapping[str, str] | None = None,
|
||||
redaction_secrets: Sequence[str] | None = None,
|
||||
) -> None:
|
||||
source_env = env if env is not None else os.environ
|
||||
try:
|
||||
identity = load_telemetry_identity(home_dir=home_dir, env=source_env)
|
||||
if not identity.enabled or not identity.install_id:
|
||||
return
|
||||
|
||||
if _should_skip_as_reported(exception):
|
||||
return
|
||||
|
||||
properties = _properties(source=source, handled=handled, fatal=fatal)
|
||||
groups = {"project": project_id} if project_id else None
|
||||
safe_exception = _clone_exception(exception, redaction_secrets or [])
|
||||
|
||||
if _debug_enabled(source_env):
|
||||
sys.stderr.write(
|
||||
"[telemetry-exception] "
|
||||
+ json.dumps(
|
||||
{
|
||||
"distinctId": identity.install_id,
|
||||
"message": str(safe_exception),
|
||||
"properties": properties,
|
||||
"groups": groups,
|
||||
},
|
||||
sort_keys=True,
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
return
|
||||
|
||||
if not POSTHOG_PROJECT_API_KEY.strip() or not _host(source_env).strip():
|
||||
return
|
||||
|
||||
from posthog import Posthog
|
||||
|
||||
client = Posthog(
|
||||
POSTHOG_PROJECT_API_KEY,
|
||||
host=_host(source_env),
|
||||
flush_at=1,
|
||||
flush_interval=0,
|
||||
sync_mode=True,
|
||||
timeout=1,
|
||||
)
|
||||
client.capture_exception(
|
||||
safe_exception,
|
||||
distinct_id=identity.install_id,
|
||||
properties=properties,
|
||||
groups=groups,
|
||||
)
|
||||
client.shutdown()
|
||||
except Exception:
|
||||
return
|
||||
|
|
@ -87,8 +87,10 @@ def test_app_lifespan_emits_daemon_lifecycle_debug_events(
|
|||
monkeypatch,
|
||||
capsys,
|
||||
) -> None:
|
||||
from ktx_daemon.telemetry.daemon_lifecycle import reset_daemon_lifecycle_for_tests
|
||||
from ktx_daemon.telemetry.identity import reset_identity_cache
|
||||
|
||||
reset_daemon_lifecycle_for_tests()
|
||||
reset_identity_cache()
|
||||
identity_path = tmp_path / ".ktx" / "telemetry.json"
|
||||
identity_path.parent.mkdir(parents=True)
|
||||
|
|
|
|||
118
python/ktx-daemon/tests/test_exception_payload.py
Normal file
118
python/ktx-daemon/tests/test_exception_payload.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import gzip
|
||||
import json
|
||||
import threading
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from ktx_daemon.telemetry.identity import reset_identity_cache
|
||||
|
||||
|
||||
class CaptureHandler(BaseHTTPRequestHandler):
|
||||
payloads: list[dict[str, Any]] = []
|
||||
|
||||
def do_POST(self) -> None:
|
||||
length = int(self.headers.get("content-length", "0"))
|
||||
raw = self.rfile.read(length)
|
||||
if self.headers.get("content-encoding") == "gzip":
|
||||
raw = gzip.decompress(raw)
|
||||
self.payloads.append(json.loads(raw.decode("utf-8")))
|
||||
self.send_response(200)
|
||||
self.send_header("content-type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(b"{}")
|
||||
|
||||
def log_message(self, _format: str, *_args: object) -> None:
|
||||
return
|
||||
|
||||
|
||||
def write_identity(home: Path) -> None:
|
||||
target = home / ".ktx" / "telemetry.json"
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"installId": "00000000-0000-4000-8000-000000000000",
|
||||
"enabled": True,
|
||||
"createdAt": "2026-06-05T00:00:00.000Z",
|
||||
}
|
||||
)
|
||||
+ "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def find_exception_event(payloads: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
for payload in payloads:
|
||||
batch = payload.get("batch")
|
||||
events = batch if isinstance(batch, list) else [payload]
|
||||
for event in events:
|
||||
if isinstance(event, dict) and event.get("event") == "$exception":
|
||||
return event
|
||||
raise AssertionError(f"No $exception payload found: {payloads}")
|
||||
|
||||
|
||||
def test_prepared_python_exception_payload_groups_and_redacts(tmp_path: Path) -> None:
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
reset_identity_cache()
|
||||
write_identity(tmp_path)
|
||||
CaptureHandler.payloads.clear()
|
||||
server = HTTPServer(("127.0.0.1", 0), CaptureHandler)
|
||||
thread = threading.Thread(target=server.serve_forever, daemon=True)
|
||||
thread.start()
|
||||
try:
|
||||
snapshot_secret = "-".join(["plain", "secret", "value"])
|
||||
db_password = "-".join(["db", "url", "secret"])
|
||||
auth_token = "".join(["abc", "123"])
|
||||
report_exception(
|
||||
RuntimeError(
|
||||
f"{snapshot_secret} postgres://svc:{db_password}@db.example.test/analytics "
|
||||
f"Authorization: Basic {auth_token}"
|
||||
),
|
||||
source="database-introspect",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
project_id="a" * 64,
|
||||
home_dir=tmp_path,
|
||||
env={"KTX_TELEMETRY_ENDPOINT": f"http://127.0.0.1:{server.server_port}"},
|
||||
redaction_secrets=[snapshot_secret],
|
||||
)
|
||||
finally:
|
||||
server.shutdown()
|
||||
server.server_close()
|
||||
thread.join(timeout=2)
|
||||
|
||||
event = find_exception_event(CaptureHandler.payloads)
|
||||
properties = event["properties"]
|
||||
assert event.get("$groups") == {"project": "a" * 64} or properties.get(
|
||||
"$groups"
|
||||
) == {"project": "a" * 64}
|
||||
serialized = json.dumps(properties.get("$exception_list", []))
|
||||
assert "[redacted]" in serialized
|
||||
assert snapshot_secret not in serialized
|
||||
assert db_password not in serialized
|
||||
assert auth_token not in serialized
|
||||
forbidden_keys = {
|
||||
"argv",
|
||||
"args",
|
||||
"env",
|
||||
"environment",
|
||||
"sql",
|
||||
"query",
|
||||
"prompt",
|
||||
"mcpArguments",
|
||||
"tableName",
|
||||
"schemaName",
|
||||
"columnName",
|
||||
"databaseUrl",
|
||||
"connectionString",
|
||||
"url",
|
||||
"password",
|
||||
"token",
|
||||
"apiKey",
|
||||
"authorization",
|
||||
}
|
||||
assert forbidden_keys.isdisjoint(properties.keys())
|
||||
601
python/ktx-daemon/tests/test_exception_telemetry.py
Normal file
601
python/ktx-daemon/tests/test_exception_telemetry.py
Normal file
|
|
@ -0,0 +1,601 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from ktx_daemon.telemetry.identity import reset_identity_cache
|
||||
|
||||
|
||||
class FakePosthog:
|
||||
captures: list[dict[str, Any]] = []
|
||||
shutdowns = 0
|
||||
|
||||
def __init__(self, *_args: Any, **_kwargs: Any) -> None:
|
||||
pass
|
||||
|
||||
def capture_exception(
|
||||
self,
|
||||
exception: BaseException,
|
||||
*,
|
||||
distinct_id: str,
|
||||
properties: dict[str, Any],
|
||||
groups: dict[str, str] | None = None,
|
||||
) -> None:
|
||||
self.captures.append(
|
||||
{
|
||||
"exception": exception,
|
||||
"distinct_id": distinct_id,
|
||||
"properties": properties,
|
||||
"groups": groups,
|
||||
}
|
||||
)
|
||||
|
||||
def shutdown(self) -> None:
|
||||
type(self).shutdowns += 1
|
||||
|
||||
|
||||
def write_identity(home: Path, *, enabled: bool = True) -> None:
|
||||
target = home / ".ktx" / "telemetry.json"
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"installId": "00000000-0000-4000-8000-000000000000",
|
||||
"enabled": enabled,
|
||||
"createdAt": "2026-06-05T00:00:00.000Z",
|
||||
}
|
||||
)
|
||||
+ "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def test_report_exception_respects_disabled_gate(tmp_path: Path, monkeypatch) -> None:
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
reset_identity_cache()
|
||||
write_identity(tmp_path)
|
||||
monkeypatch.setenv("KTX_TELEMETRY_DISABLED", "1")
|
||||
FakePosthog.captures.clear()
|
||||
monkeypatch.setattr("posthog.Posthog", FakePosthog)
|
||||
|
||||
report_exception(
|
||||
RuntimeError("boom"),
|
||||
source="semantic-query",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
home_dir=tmp_path,
|
||||
env={"KTX_TELEMETRY_DISABLED": "1"},
|
||||
)
|
||||
|
||||
assert FakePosthog.captures == []
|
||||
|
||||
|
||||
def test_report_exception_sends_groups_and_properties(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
reset_identity_cache()
|
||||
write_identity(tmp_path)
|
||||
FakePosthog.captures.clear()
|
||||
monkeypatch.setattr("posthog.Posthog", FakePosthog)
|
||||
|
||||
report_exception(
|
||||
RuntimeError("boom"),
|
||||
source="semantic-query",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
project_id="a" * 64,
|
||||
home_dir=tmp_path,
|
||||
env={},
|
||||
)
|
||||
|
||||
assert FakePosthog.captures == [
|
||||
{
|
||||
"exception": FakePosthog.captures[0]["exception"],
|
||||
"distinct_id": "00000000-0000-4000-8000-000000000000",
|
||||
"properties": FakePosthog.captures[0]["properties"],
|
||||
"groups": {"project": "a" * 64},
|
||||
}
|
||||
]
|
||||
assert FakePosthog.captures[0]["properties"]["source"] == "semantic-query"
|
||||
assert FakePosthog.captures[0]["properties"]["handled"] is True
|
||||
assert FakePosthog.captures[0]["properties"]["fatal"] is False
|
||||
assert FakePosthog.captures[0]["properties"]["runtime"] == "daemon-py"
|
||||
|
||||
|
||||
def test_report_exception_debug_prints_without_sending(tmp_path: Path, capsys) -> None:
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
reset_identity_cache()
|
||||
write_identity(tmp_path)
|
||||
FakePosthog.captures.clear()
|
||||
|
||||
report_exception(
|
||||
RuntimeError("debug boom"),
|
||||
source="app:/health",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
home_dir=tmp_path,
|
||||
env={"KTX_TELEMETRY_DEBUG": "1"},
|
||||
)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "[telemetry-exception]" in captured.err
|
||||
assert '"source": "app:/health"' in captured.err
|
||||
assert FakePosthog.captures == []
|
||||
|
||||
|
||||
def test_report_exception_redacts_snapshot_and_static_patterns(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
reset_identity_cache()
|
||||
write_identity(tmp_path)
|
||||
FakePosthog.captures.clear()
|
||||
monkeypatch.setattr("posthog.Posthog", FakePosthog)
|
||||
error = RuntimeError("dsn has plain-secret and password=hunter2")
|
||||
error.__cause__ = ValueError("Authorization: Bearer token-123")
|
||||
|
||||
report_exception(
|
||||
error,
|
||||
source="database-introspect",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
home_dir=tmp_path,
|
||||
env={},
|
||||
redaction_secrets=["plain-secret"],
|
||||
)
|
||||
|
||||
sent = FakePosthog.captures[0]["exception"]
|
||||
assert "[redacted]" in str(sent)
|
||||
assert "plain-secret" not in str(sent)
|
||||
assert "hunter2" not in str(sent)
|
||||
assert "token-123" not in str(sent.__cause__)
|
||||
|
||||
|
||||
def test_report_exception_does_not_discover_env_values_without_snapshot(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
reset_identity_cache()
|
||||
write_identity(tmp_path)
|
||||
FakePosthog.captures.clear()
|
||||
monkeypatch.setenv("KTX_FAKE_SECRET", "plain-secret-without-pattern")
|
||||
monkeypatch.setattr("posthog.Posthog", FakePosthog)
|
||||
|
||||
report_exception(
|
||||
RuntimeError("plain-secret-without-pattern"),
|
||||
source="sys.excepthook",
|
||||
handled=False,
|
||||
fatal=True,
|
||||
home_dir=tmp_path,
|
||||
env={},
|
||||
)
|
||||
|
||||
assert "plain-secret-without-pattern" in str(FakePosthog.captures[0]["exception"])
|
||||
|
||||
|
||||
def test_route_derived_boundary_reports_new_throwing_route(monkeypatch) -> None:
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
from ktx_daemon.app import create_app
|
||||
|
||||
reports: list[dict[str, object]] = []
|
||||
|
||||
def fake_report(exception: BaseException, **kwargs: object) -> None:
|
||||
reports.append({"exception": exception, **kwargs})
|
||||
|
||||
monkeypatch.setattr("ktx_daemon.app.report_exception", fake_report)
|
||||
app: FastAPI = create_app()
|
||||
|
||||
@app.get("/new-throwing-route")
|
||||
async def new_throwing_route() -> dict[str, str]:
|
||||
raise RuntimeError("route boom")
|
||||
|
||||
client = TestClient(app, raise_server_exceptions=False)
|
||||
response = client.get("/new-throwing-route")
|
||||
|
||||
assert response.status_code == 500
|
||||
assert reports
|
||||
assert reports[0]["source"] in {"app:/new-throwing-route", "app:new_throwing_route"}
|
||||
assert reports[0]["handled"] is True
|
||||
assert reports[0]["fatal"] is False
|
||||
|
||||
|
||||
def test_route_derived_boundary_covers_existing_validate_route(monkeypatch) -> None:
|
||||
from fastapi.testclient import TestClient
|
||||
from ktx_daemon import app as app_module
|
||||
|
||||
reports: list[dict[str, object]] = []
|
||||
|
||||
def fake_report(exception: BaseException, **kwargs: object) -> None:
|
||||
reports.append({"exception": exception, **kwargs})
|
||||
|
||||
monkeypatch.setattr(
|
||||
app_module,
|
||||
"validate_semantic_layer",
|
||||
lambda _request: (_ for _ in ()).throw(RuntimeError("validate boom")),
|
||||
)
|
||||
monkeypatch.setattr(app_module, "report_exception", fake_report)
|
||||
|
||||
client = TestClient(app_module.create_app(), raise_server_exceptions=False)
|
||||
response = client.post("/semantic-layer/validate", json={"sources": []})
|
||||
|
||||
assert response.status_code == 500
|
||||
assert reports
|
||||
assert reports[0]["source"] in {
|
||||
"app:/semantic-layer/validate",
|
||||
"app:semantic_validate",
|
||||
}
|
||||
|
||||
|
||||
def test_daemon_stopped_clean_shutdown_emits_request_once(monkeypatch) -> None:
|
||||
from ktx_daemon.telemetry.daemon_lifecycle import (
|
||||
emit_daemon_stopped_once,
|
||||
reset_daemon_lifecycle_for_tests,
|
||||
)
|
||||
|
||||
events: list[tuple[str, dict[str, object]]] = []
|
||||
monkeypatch.setattr(
|
||||
"ktx_daemon.telemetry.daemon_lifecycle.track_telemetry_event",
|
||||
lambda name, fields: events.append((name, fields)),
|
||||
)
|
||||
reset_daemon_lifecycle_for_tests()
|
||||
|
||||
emit_daemon_stopped_once(reason="request", uptime_ms=1)
|
||||
emit_daemon_stopped_once(reason="request", uptime_ms=2)
|
||||
|
||||
assert events == [("daemon_stopped", {"reason": "request", "uptimeMs": 1})]
|
||||
|
||||
|
||||
def test_daemon_stopped_crash_wins_over_request(monkeypatch) -> None:
|
||||
from ktx_daemon.telemetry.daemon_lifecycle import (
|
||||
emit_daemon_stopped_once,
|
||||
reset_daemon_lifecycle_for_tests,
|
||||
)
|
||||
|
||||
events: list[tuple[str, dict[str, object]]] = []
|
||||
monkeypatch.setattr(
|
||||
"ktx_daemon.telemetry.daemon_lifecycle.track_telemetry_event",
|
||||
lambda name, fields: events.append((name, fields)),
|
||||
)
|
||||
reset_daemon_lifecycle_for_tests()
|
||||
|
||||
emit_daemon_stopped_once(reason="crash", uptime_ms=3)
|
||||
emit_daemon_stopped_once(reason="request", uptime_ms=4)
|
||||
|
||||
assert events == [("daemon_stopped", {"reason": "crash", "uptimeMs": 3})]
|
||||
|
||||
|
||||
def test_report_exception_dedupes_same_exception_object(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
reset_identity_cache()
|
||||
write_identity(tmp_path)
|
||||
FakePosthog.captures.clear()
|
||||
monkeypatch.setattr("posthog.Posthog", FakePosthog)
|
||||
error = RuntimeError("same object")
|
||||
|
||||
report_exception(
|
||||
error,
|
||||
source="semantic-query",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
home_dir=tmp_path,
|
||||
env={},
|
||||
)
|
||||
report_exception(
|
||||
error,
|
||||
source="app:/semantic-layer/query",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
home_dir=tmp_path,
|
||||
env={},
|
||||
)
|
||||
|
||||
assert len(FakePosthog.captures) == 1
|
||||
|
||||
|
||||
def test_report_exception_redacts_url_userinfo_and_authorization(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
reset_identity_cache()
|
||||
write_identity(tmp_path)
|
||||
FakePosthog.captures.clear()
|
||||
monkeypatch.setattr("posthog.Posthog", FakePosthog)
|
||||
|
||||
db_password = ["db", "url", "secret"]
|
||||
auth_token = ["abc", "123"]
|
||||
report_exception(
|
||||
RuntimeError(
|
||||
"connect postgres://svc:"
|
||||
+ "-".join(db_password)
|
||||
+ "@db.example.test/analytics Authorization: Basic "
|
||||
+ "".join(auth_token)
|
||||
),
|
||||
source="database-introspect",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
home_dir=tmp_path,
|
||||
env={},
|
||||
)
|
||||
|
||||
sent = str(FakePosthog.captures[0]["exception"])
|
||||
assert "postgres://svc:[redacted]@db.example.test/analytics" in sent
|
||||
assert "Authorization: [redacted]" in sent
|
||||
assert "-".join(db_password) not in sent
|
||||
assert "".join(auth_token) not in sent
|
||||
|
||||
|
||||
def test_report_exception_falls_back_when_exception_type_cannot_be_reconstructed(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
class KeywordOnlyException(Exception):
|
||||
def __init__(self, *, message: str) -> None:
|
||||
super().__init__(message)
|
||||
|
||||
reset_identity_cache()
|
||||
write_identity(tmp_path)
|
||||
FakePosthog.captures.clear()
|
||||
monkeypatch.setattr("posthog.Posthog", FakePosthog)
|
||||
|
||||
report_exception(
|
||||
KeywordOnlyException(message="custom secret-value"),
|
||||
source="app:/custom",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
home_dir=tmp_path,
|
||||
env={},
|
||||
redaction_secrets=["secret-value"],
|
||||
)
|
||||
|
||||
assert len(FakePosthog.captures) == 1
|
||||
sent = FakePosthog.captures[0]["exception"]
|
||||
assert "[redacted]" in str(sent)
|
||||
assert "secret-value" not in str(sent)
|
||||
|
||||
|
||||
def test_report_exception_redacts_every_static_pattern_and_leaves_benign_text(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
from ktx_daemon.telemetry.exception import report_exception
|
||||
|
||||
reset_identity_cache()
|
||||
write_identity(tmp_path)
|
||||
FakePosthog.captures.clear()
|
||||
monkeypatch.setattr("posthog.Posthog", FakePosthog)
|
||||
|
||||
cases = [
|
||||
("dsn password=hunter2", "hunter2", "password=[redacted]"),
|
||||
("dsn pwd=swordfish", "swordfish", "pwd=[redacted]"),
|
||||
("Authorization: Basic abc123", "abc123", "Authorization: [redacted]"),
|
||||
("Authorization: Bearer token-123", "token-123", "Authorization: [redacted]"),
|
||||
("Bearer standalone-token", "standalone-token", "Bearer [redacted]"),
|
||||
("api_key=sk-live-secret", "sk-live-secret", "api_key=[redacted]"),
|
||||
("api-key: sk-dash-secret", "sk-dash-secret", "api-key=[redacted]"),
|
||||
(
|
||||
"KTX_PROVIDER_TOKEN=ktx-secret",
|
||||
"ktx-secret",
|
||||
"KTX_PROVIDER_TOKEN=[redacted]",
|
||||
),
|
||||
(
|
||||
"REFRESH_SECRET: refresh-secret",
|
||||
"refresh-secret",
|
||||
"REFRESH_SECRET=[redacted]",
|
||||
),
|
||||
(
|
||||
"https://s3.example.test/file?X-Amz-Signature=aws-secret&ok=1",
|
||||
"aws-secret",
|
||||
"X-Amz-Signature=[redacted]",
|
||||
),
|
||||
(
|
||||
"https://storage.example.test/file?X-Goog-Signature=goog-secret&ok=1",
|
||||
"goog-secret",
|
||||
"X-Goog-Signature=[redacted]",
|
||||
),
|
||||
(
|
||||
"https://cdn.example.test/file?sig=signed-secret&ok=1",
|
||||
"signed-secret",
|
||||
"sig=[redacted]",
|
||||
),
|
||||
(
|
||||
"postgres://svc:url-password@db.example.test/analytics", # pragma: allowlist secret
|
||||
"url-password",
|
||||
"postgres://svc:[redacted]@db.example.test/analytics",
|
||||
),
|
||||
]
|
||||
|
||||
for message, leaked, expected in cases:
|
||||
report_exception(
|
||||
RuntimeError(message),
|
||||
source="database-introspect",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
home_dir=tmp_path,
|
||||
env={},
|
||||
)
|
||||
sent = str(FakePosthog.captures[-1]["exception"])
|
||||
assert expected in sent
|
||||
assert leaked not in sent
|
||||
|
||||
report_exception(
|
||||
RuntimeError("token bucket metrics and passwordless auth are benign"),
|
||||
source="database-introspect",
|
||||
handled=True,
|
||||
fatal=False,
|
||||
home_dir=tmp_path,
|
||||
env={},
|
||||
)
|
||||
assert str(FakePosthog.captures[-1]["exception"]) == (
|
||||
"token bucket metrics and passwordless auth are benign"
|
||||
)
|
||||
|
||||
|
||||
def test_route_derived_boundary_covers_existing_health_route(monkeypatch) -> None:
|
||||
from fastapi.testclient import TestClient
|
||||
from ktx_daemon import app as app_module
|
||||
|
||||
reports: list[dict[str, object]] = []
|
||||
|
||||
def fake_report(exception: BaseException, **kwargs: object) -> None:
|
||||
reports.append({"exception": exception, **kwargs})
|
||||
|
||||
class BrokenEnviron(dict[str, str]):
|
||||
def get(self, key: str, default: str | None = None) -> str | None:
|
||||
if key == "KTX_DAEMON_VERSION":
|
||||
raise RuntimeError("health boom")
|
||||
return default
|
||||
|
||||
monkeypatch.setattr(app_module.os, "environ", BrokenEnviron())
|
||||
monkeypatch.setattr(app_module, "report_exception", fake_report)
|
||||
|
||||
client = TestClient(app_module.create_app(), raise_server_exceptions=False)
|
||||
response = client.get("/health")
|
||||
|
||||
assert response.status_code == 500
|
||||
assert reports
|
||||
assert reports[0]["source"] == "app:/health"
|
||||
assert reports[0]["handled"] is True
|
||||
assert reports[0]["fatal"] is False
|
||||
|
||||
|
||||
def test_route_boundary_passes_request_scoped_database_secrets(monkeypatch) -> None:
|
||||
from fastapi.testclient import TestClient
|
||||
from ktx_daemon import app as app_module
|
||||
|
||||
reports: list[dict[str, object]] = []
|
||||
|
||||
def fake_report(exception: BaseException, **kwargs: object) -> None:
|
||||
reports.append({"exception": exception, **kwargs})
|
||||
|
||||
monkeypatch.setattr(
|
||||
app_module,
|
||||
"introspect_database_response",
|
||||
lambda _request: (_ for _ in ()).throw(RuntimeError("db-url-secret")),
|
||||
)
|
||||
monkeypatch.setattr(app_module, "report_exception", fake_report)
|
||||
|
||||
client = TestClient(app_module.create_app(), raise_server_exceptions=False)
|
||||
response = client.post(
|
||||
"/database/introspect",
|
||||
json={
|
||||
"connection_id": "warehouse",
|
||||
"url": "postgres://svc:db-url-secret@db.example.test/analytics", # pragma: allowlist secret
|
||||
"password": "db-password-secret", # pragma: allowlist secret
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 500
|
||||
assert reports
|
||||
assert (
|
||||
reports[0]["redaction_secrets"]
|
||||
== [
|
||||
"postgres://svc:db-url-secret@db.example.test/analytics", # pragma: allowlist secret
|
||||
"db-password-secret", # pragma: allowlist secret
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def test_serve_http_run_crash_reports_exception_and_crash_stop(monkeypatch) -> None:
|
||||
import sys
|
||||
|
||||
from ktx_daemon import __main__ as main_module
|
||||
|
||||
reports: list[dict[str, object]] = []
|
||||
stops: list[dict[str, object]] = []
|
||||
|
||||
def fake_report(exception: BaseException, **kwargs: object) -> None:
|
||||
reports.append({"exception": exception, **kwargs})
|
||||
|
||||
def fake_stop(*, reason: str, uptime_ms: float) -> bool:
|
||||
stops.append({"reason": reason, "uptimeMs": uptime_ms})
|
||||
return True
|
||||
|
||||
class FakeUvicorn:
|
||||
@staticmethod
|
||||
def run(*_args: object, **_kwargs: object) -> None:
|
||||
raise RuntimeError("uvicorn crash")
|
||||
|
||||
monkeypatch.setitem(sys.modules, "uvicorn", FakeUvicorn)
|
||||
monkeypatch.setattr("ktx_daemon.telemetry.report_exception", fake_report)
|
||||
monkeypatch.setattr(
|
||||
"ktx_daemon.telemetry.daemon_lifecycle.emit_daemon_stopped_once",
|
||||
fake_stop,
|
||||
)
|
||||
|
||||
try:
|
||||
main_module.run_http_server(
|
||||
host="127.0.0.1",
|
||||
port=9999,
|
||||
log_level="info",
|
||||
enable_code_execution=False,
|
||||
)
|
||||
except RuntimeError as error:
|
||||
assert str(error) == "uvicorn crash"
|
||||
else:
|
||||
raise AssertionError("run_http_server did not re-raise the crash")
|
||||
|
||||
assert reports
|
||||
assert reports[0]["source"] == "serve-http"
|
||||
assert reports[0]["handled"] is False
|
||||
assert reports[0]["fatal"] is True
|
||||
assert stops and stops[0]["reason"] == "crash"
|
||||
|
||||
|
||||
def test_one_shot_command_reports_without_excepthook_or_daemon_stopped(
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
import sys
|
||||
|
||||
from ktx_daemon import __main__ as daemon_main
|
||||
|
||||
original_hook = sys.excepthook
|
||||
reports: list[dict[str, object]] = []
|
||||
stops: list[dict[str, object]] = []
|
||||
|
||||
def fake_report(exception: BaseException, **kwargs: object) -> None:
|
||||
reports.append({"exception": exception, **kwargs})
|
||||
|
||||
def fake_stop(*, reason: str, uptime_ms: float) -> bool:
|
||||
stops.append({"reason": reason, "uptimeMs": uptime_ms})
|
||||
return True
|
||||
|
||||
monkeypatch.setattr(
|
||||
daemon_main,
|
||||
"_read_stdin_json",
|
||||
lambda: {
|
||||
"connection_id": "warehouse",
|
||||
"driver": "postgres",
|
||||
"url": "postgresql://readonly@example.test/warehouse",
|
||||
"schemas": ["public"],
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
daemon_main,
|
||||
"introspect_database_response",
|
||||
lambda _request: (_ for _ in ()).throw(RuntimeError("one-shot boom")),
|
||||
)
|
||||
monkeypatch.setattr("ktx_daemon.telemetry.report_exception", fake_report)
|
||||
monkeypatch.setattr(
|
||||
"ktx_daemon.telemetry.daemon_lifecycle.emit_daemon_stopped_once",
|
||||
fake_stop,
|
||||
)
|
||||
|
||||
assert daemon_main.main(["database-introspect"]) == 1
|
||||
assert sys.excepthook is original_hook
|
||||
assert stops == []
|
||||
assert reports
|
||||
assert reports[0]["source"] == "database-introspect"
|
||||
assert reports[0]["handled"] is True
|
||||
assert reports[0]["fatal"] is False
|
||||
|
|
@ -97,6 +97,33 @@ def test_query_semantic_layer_emits_plan_and_sql_debug_events(
|
|||
assert "public.orders" not in captured.err
|
||||
|
||||
|
||||
def test_query_semantic_layer_reports_exception(monkeypatch) -> None:
|
||||
from ktx_daemon import semantic_layer as semantic_layer_module
|
||||
|
||||
reports: list[dict[str, object]] = []
|
||||
|
||||
def fake_report(exception: BaseException, **kwargs: object) -> None:
|
||||
reports.append({"exception": exception, **kwargs})
|
||||
|
||||
monkeypatch.setattr(semantic_layer_module, "report_exception", fake_report)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
query_semantic_layer(
|
||||
SemanticLayerQueryRequest(
|
||||
sources=[ORDERS_SOURCE, ORDERS_SOURCE],
|
||||
dialect="postgres",
|
||||
projectId="a" * 64,
|
||||
query={"measures": ["orders.order_count"]},
|
||||
)
|
||||
)
|
||||
|
||||
assert reports
|
||||
assert reports[0]["source"] == "semantic-query"
|
||||
assert reports[0]["handled"] is True
|
||||
assert reports[0]["fatal"] is False
|
||||
assert reports[0]["project_id"] == "a" * 64
|
||||
|
||||
|
||||
def test_semantic_layer_request_rejects_project_id_field_name() -> None:
|
||||
with pytest.raises(ValueError):
|
||||
SemanticLayerQueryRequest(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue