From 581bbfb5c1301d84afa1f6c402ac724657894cd1 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 19 May 2026 21:30:25 +0200 Subject: [PATCH] perf(tokens): add per-call latency to capture log --- .../app/services/token_tracking_service.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py index 9406d9be4..58d06ba31 100644 --- a/surfsense_backend/app/services/token_tracking_service.py +++ b/surfsense_backend/app/services/token_tracking_service.py @@ -357,9 +357,19 @@ class TokenTrackingCallback(CustomLogger): cost_micros=cost_micros, call_kind=call_kind, ) + + # Per-LLM-call wall-clock latency (LiteLLM passes datetime objects). + call_latency_s: float | None = None + try: + if start_time is not None and end_time is not None: + delta = end_time - start_time + call_latency_s = getattr(delta, "total_seconds", lambda: float(delta))() + except Exception: + call_latency_s = None + logger.info( "[TokenTracking] Captured: model=%s kind=%s prompt=%d completion=%d total=%d " - "cost=$%.6f (%d micros) (accumulator now has %d calls)", + "cost=$%.6f (%d micros) (accumulator now has %d calls)%s", model, call_kind, prompt_tokens, @@ -368,6 +378,7 @@ class TokenTrackingCallback(CustomLogger): cost_usd, cost_micros, len(acc.calls), + f" latency={call_latency_s:.3f}s" if call_latency_s is not None else "", )