From d9a4f14f99763a9e31a2a05ff105dbc63e3da495 Mon Sep 17 00:00:00 2001
From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com>
Date: Sun, 14 Jun 2026 12:18:22 +0530
Subject: [PATCH] feat(token-tracking): enhance model metadata reconciliation
 by adding bare model name handling

---
 .../app/services/token_tracking_service.py    | 43 ++++++++++-
 .../services/test_token_quota_service_cost.py | 71 +++++++++++++++++++
 surfsense_web/app/layout.tsx                  |  6 +-
 3 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py
index 8383770a6..d1a29b82a 100644
--- a/surfsense_backend/app/services/token_tracking_service.py
+++ b/surfsense_backend/app/services/token_tracking_service.py
@@ -32,6 +32,23 @@ from app.db import TokenUsage
 logger = logging.getLogger(__name__)
 
 
+def _bare_model_name(model: str) -> str:
+    """Return a model identifier with any provider routing prefix stripped.
+
+    LiteLLM's ``get_llm_provider`` consumes the provider prefix we add in
+    ``to_litellm`` (e.g. ``azure/gpt-5.2-chat`` → ``gpt-5.2-chat`` because
+    ``azure`` is in ``litellm.provider_list``). The token-tracking success
+    callback therefore reports ``kwargs["model"]`` *without* that prefix,
+    while model metadata is registered under the *prefixed* string. Normalising
+    both sides to the last path segment lets the two reconcile so the per-model
+    breakdown carries provider/display_name and the UI attributes the turn to
+    the correct connection instead of falling back to a bare-name collision.
+    """
+    if not model:
+        return model
+    return model.split("/")[-1]
+
+
 @dataclass
 class TokenCallRecord:
     model: str
@@ -52,6 +69,12 @@ class TurnTokenAccumulator:
 
     calls: list[TokenCallRecord] = field(default_factory=list)
     model_metadata: dict[str, dict[str, str | None]] = field(default_factory=dict)
+    # Secondary index keyed by the bare model name (provider prefix stripped) so
+    # the LiteLLM callback — which never sees our routing prefix — can still
+    # reconcile its ``kwargs["model"]`` back to the registered metadata.
+    model_metadata_by_bare: dict[str, dict[str, str | None]] = field(
+        default_factory=dict
+    )
 
     def register_model_metadata(
         self,
@@ -63,12 +86,28 @@ class TurnTokenAccumulator:
         provider: str | None,
     ) -> None:
         """Attach resolved model metadata for later LiteLLM callback attribution."""
-        self.model_metadata[model] = {
+        metadata = {
             "model_ref": model_ref,
             "model_id": model_id,
             "display_name": display_name,
             "provider": provider,
         }
+        self.model_metadata[model] = metadata
+        # Index every reconcilable alias: the prefixed string's bare form and
+        # the resolved ``model_id`` (which for some providers is itself the bare
+        # deployment LiteLLM reports). Exact lookups always take precedence.
+        self.model_metadata_by_bare[_bare_model_name(model)] = metadata
+        if model_id:
+            self.model_metadata_by_bare.setdefault(_bare_model_name(model_id), metadata)
+
+    def _lookup_metadata(self, model: str) -> dict[str, str | None]:
+        """Resolve registered metadata for a callback model, tolerating the
+        provider-prefix stripping LiteLLM applies before the success callback
+        fires (see :func:`_bare_model_name`)."""
+        exact = self.model_metadata.get(model)
+        if exact is not None:
+            return exact
+        return self.model_metadata_by_bare.get(_bare_model_name(model), {})
 
     def add(
         self,
@@ -79,7 +118,7 @@ class TurnTokenAccumulator:
         cost_micros: int = 0,
         call_kind: str = "chat",
     ) -> None:
-        metadata = self.model_metadata.get(model, {})
+        metadata = self._lookup_metadata(model)
         self.calls.append(
             TokenCallRecord(
                 model=model,
diff --git a/surfsense_backend/tests/unit/services/test_token_quota_service_cost.py b/surfsense_backend/tests/unit/services/test_token_quota_service_cost.py
index a543c1459..9eeb55a4d 100644
--- a/surfsense_backend/tests/unit/services/test_token_quota_service_cost.py
+++ b/surfsense_backend/tests/unit/services/test_token_quota_service_cost.py
@@ -112,6 +112,77 @@ def test_per_message_summary_groups_cost_by_model():
     assert summary["gpt-4o-mini"]["cost_micros"] == 200
 
 
+def test_add_reconciles_metadata_when_litellm_strips_provider_prefix():
+    """Regression: LiteLLM's ``get_llm_provider`` strips the provider prefix we
+    add in ``to_litellm`` (``azure/gpt-5.2-chat`` → ``gpt-5.2-chat`` because
+    ``azure`` is in ``litellm.provider_list``), so the success callback reports
+    the bare model. Metadata registered under the *prefixed* string must still
+    attach to the call so the per-model breakdown carries provider/display_name
+    — otherwise the UI falls back to a bare-name collision and mis-attributes an
+    Azure turn to an OpenRouter model (e.g. shows "OpenAI: GPT-5.2 Chat").
+    """
+    from app.services.token_tracking_service import TurnTokenAccumulator
+
+    acc = TurnTokenAccumulator()
+    acc.register_model_metadata(
+        model="azure/gpt-5.2-chat",
+        model_ref="global:-1",
+        model_id="gpt-5.2-chat",
+        display_name="Azure GPT 5.2",
+        provider="azure",
+    )
+    # LiteLLM callback fires with the prefix-stripped model name.
+    acc.add(
+        model="gpt-5.2-chat",
+        prompt_tokens=100,
+        completion_tokens=50,
+        total_tokens=150,
+        cost_micros=4_000,
+    )
+
+    summary = acc.per_message_summary()
+    entry = summary["gpt-5.2-chat"]
+    assert entry["provider"] == "azure"
+    assert entry["display_name"] == "Azure GPT 5.2"
+    assert entry["model_id"] == "gpt-5.2-chat"
+    assert entry["model_ref"] == "global:-1"
+
+
+def test_add_prefers_exact_metadata_over_bare_alias():
+    """When the callback model matches a registered key exactly, the exact
+    metadata wins even if another model shares the same bare name — so a turn
+    that legitimately used two same-named deployments stays correctly
+    attributed."""
+    from app.services.token_tracking_service import TurnTokenAccumulator
+
+    acc = TurnTokenAccumulator()
+    acc.register_model_metadata(
+        model="azure/gpt-5.2-chat",
+        model_ref="global:-1",
+        model_id="gpt-5.2-chat",
+        display_name="Azure GPT 5.2",
+        provider="azure",
+    )
+    acc.register_model_metadata(
+        model="openai/gpt-5.2-chat",
+        model_ref="db:7",
+        model_id="gpt-5.2-chat",
+        display_name="OpenAI GPT 5.2",
+        provider="openai",
+    )
+    acc.add(
+        model="openai/gpt-5.2-chat",
+        prompt_tokens=10,
+        completion_tokens=5,
+        total_tokens=15,
+        cost_micros=100,
+    )
+
+    entry = acc.per_message_summary()["openai/gpt-5.2-chat"]
+    assert entry["provider"] == "openai"
+    assert entry["display_name"] == "OpenAI GPT 5.2"
+
+
 def test_serialized_calls_includes_cost_micros():
     """``serialized_calls`` is what flows into the SSE ``call_details``
     payload; cost_micros must be present on each entry so the FE message-info
diff --git a/surfsense_web/app/layout.tsx b/surfsense_web/app/layout.tsx
index eef03d463..1e9c9eebe 100644
--- a/surfsense_web/app/layout.tsx
+++ b/surfsense_web/app/layout.tsx
@@ -46,7 +46,7 @@ export const metadata: Metadata = {
 	alternates: {
 		canonical: "https://www.surfsense.com",
 	},
-	title: "SurfSense – Open Source, Privacy-Focused NotebookLM Alternative for Teams",
+	title: "SurfSense - Open Source, Privacy-Focused NotebookLM Alternative for Teams",
 	description:
 		"Open source NotebookLM alternative for teams with no data limits. Use ChatGPT, Claude AI, and any AI model for free.",
 	keywords: [
@@ -88,7 +88,7 @@ export const metadata: Metadata = {
 		"SurfSense",
 	],
 	openGraph: {
-		title: "SurfSense – Open Source, Privacy-Focused NotebookLM Alternative for Teams",
+		title: "SurfSense - Open Source, Privacy-Focused NotebookLM Alternative for Teams",
 		description:
 			"Open source NotebookLM alternative for teams with no data limits. Use ChatGPT, Claude, and any AI model for free.",
 		url: "https://www.surfsense.com",
@@ -106,7 +106,7 @@ export const metadata: Metadata = {
 	},
 	twitter: {
 		card: "summary_large_image",
-		title: "SurfSense – Open Source, Privacy-Focused NotebookLM Alternative for Teams",
+		title: "SurfSense - Open Source, Privacy-Focused NotebookLM Alternative for Teams",
 		description:
 			"Open source NotebookLM alternative for teams with no data limits. Use ChatGPT, Claude AI, and any AI model for free.",
 		creator: "@SurfSenseAI",