feat(token-tracking): enhance model metadata reconciliation by adding bare model name handling

This commit is contained in:
Anish Sarkar 2026-06-14 12:18:22 +05:30
parent 7926814070
commit d9a4f14f99
3 changed files with 115 additions and 5 deletions

View file

@ -32,6 +32,23 @@ from app.db import TokenUsage
logger = logging.getLogger(__name__)
def _bare_model_name(model: str) -> str:
"""Return a model identifier with any provider routing prefix stripped.
LiteLLM's ``get_llm_provider`` consumes the provider prefix we add in
``to_litellm`` (e.g. ``azure/gpt-5.2-chat`` ``gpt-5.2-chat`` because
``azure`` is in ``litellm.provider_list``). The token-tracking success
callback therefore reports ``kwargs["model"]`` *without* that prefix,
while model metadata is registered under the *prefixed* string. Normalising
both sides to the last path segment lets the two reconcile so the per-model
breakdown carries provider/display_name and the UI attributes the turn to
the correct connection instead of falling back to a bare-name collision.
"""
if not model:
return model
return model.split("/")[-1]
@dataclass
class TokenCallRecord:
model: str
@ -52,6 +69,12 @@ class TurnTokenAccumulator:
calls: list[TokenCallRecord] = field(default_factory=list)
model_metadata: dict[str, dict[str, str | None]] = field(default_factory=dict)
# Secondary index keyed by the bare model name (provider prefix stripped) so
# the LiteLLM callback — which never sees our routing prefix — can still
# reconcile its ``kwargs["model"]`` back to the registered metadata.
model_metadata_by_bare: dict[str, dict[str, str | None]] = field(
default_factory=dict
)
def register_model_metadata(
self,
@ -63,12 +86,28 @@ class TurnTokenAccumulator:
provider: str | None,
) -> None:
"""Attach resolved model metadata for later LiteLLM callback attribution."""
self.model_metadata[model] = {
metadata = {
"model_ref": model_ref,
"model_id": model_id,
"display_name": display_name,
"provider": provider,
}
self.model_metadata[model] = metadata
# Index every reconcilable alias: the prefixed string's bare form and
# the resolved ``model_id`` (which for some providers is itself the bare
# deployment LiteLLM reports). Exact lookups always take precedence.
self.model_metadata_by_bare[_bare_model_name(model)] = metadata
if model_id:
self.model_metadata_by_bare.setdefault(_bare_model_name(model_id), metadata)
def _lookup_metadata(self, model: str) -> dict[str, str | None]:
"""Resolve registered metadata for a callback model, tolerating the
provider-prefix stripping LiteLLM applies before the success callback
fires (see :func:`_bare_model_name`)."""
exact = self.model_metadata.get(model)
if exact is not None:
return exact
return self.model_metadata_by_bare.get(_bare_model_name(model), {})
def add(
self,
@ -79,7 +118,7 @@ class TurnTokenAccumulator:
cost_micros: int = 0,
call_kind: str = "chat",
) -> None:
metadata = self.model_metadata.get(model, {})
metadata = self._lookup_metadata(model)
self.calls.append(
TokenCallRecord(
model=model,

View file

@ -112,6 +112,77 @@ def test_per_message_summary_groups_cost_by_model():
assert summary["gpt-4o-mini"]["cost_micros"] == 200
def test_add_reconciles_metadata_when_litellm_strips_provider_prefix():
"""Regression: LiteLLM's ``get_llm_provider`` strips the provider prefix we
add in ``to_litellm`` (``azure/gpt-5.2-chat`` ``gpt-5.2-chat`` because
``azure`` is in ``litellm.provider_list``), so the success callback reports
the bare model. Metadata registered under the *prefixed* string must still
attach to the call so the per-model breakdown carries provider/display_name
otherwise the UI falls back to a bare-name collision and mis-attributes an
Azure turn to an OpenRouter model (e.g. shows "OpenAI: GPT-5.2 Chat").
"""
from app.services.token_tracking_service import TurnTokenAccumulator
acc = TurnTokenAccumulator()
acc.register_model_metadata(
model="azure/gpt-5.2-chat",
model_ref="global:-1",
model_id="gpt-5.2-chat",
display_name="Azure GPT 5.2",
provider="azure",
)
# LiteLLM callback fires with the prefix-stripped model name.
acc.add(
model="gpt-5.2-chat",
prompt_tokens=100,
completion_tokens=50,
total_tokens=150,
cost_micros=4_000,
)
summary = acc.per_message_summary()
entry = summary["gpt-5.2-chat"]
assert entry["provider"] == "azure"
assert entry["display_name"] == "Azure GPT 5.2"
assert entry["model_id"] == "gpt-5.2-chat"
assert entry["model_ref"] == "global:-1"
def test_add_prefers_exact_metadata_over_bare_alias():
"""When the callback model matches a registered key exactly, the exact
metadata wins even if another model shares the same bare name so a turn
that legitimately used two same-named deployments stays correctly
attributed."""
from app.services.token_tracking_service import TurnTokenAccumulator
acc = TurnTokenAccumulator()
acc.register_model_metadata(
model="azure/gpt-5.2-chat",
model_ref="global:-1",
model_id="gpt-5.2-chat",
display_name="Azure GPT 5.2",
provider="azure",
)
acc.register_model_metadata(
model="openai/gpt-5.2-chat",
model_ref="db:7",
model_id="gpt-5.2-chat",
display_name="OpenAI GPT 5.2",
provider="openai",
)
acc.add(
model="openai/gpt-5.2-chat",
prompt_tokens=10,
completion_tokens=5,
total_tokens=15,
cost_micros=100,
)
entry = acc.per_message_summary()["openai/gpt-5.2-chat"]
assert entry["provider"] == "openai"
assert entry["display_name"] == "OpenAI GPT 5.2"
def test_serialized_calls_includes_cost_micros():
"""``serialized_calls`` is what flows into the SSE ``call_details``
payload; cost_micros must be present on each entry so the FE message-info

View file

@ -46,7 +46,7 @@ export const metadata: Metadata = {
alternates: {
canonical: "https://www.surfsense.com",
},
title: "SurfSense Open Source, Privacy-Focused NotebookLM Alternative for Teams",
title: "SurfSense - Open Source, Privacy-Focused NotebookLM Alternative for Teams",
description:
"Open source NotebookLM alternative for teams with no data limits. Use ChatGPT, Claude AI, and any AI model for free.",
keywords: [
@ -88,7 +88,7 @@ export const metadata: Metadata = {
"SurfSense",
],
openGraph: {
title: "SurfSense Open Source, Privacy-Focused NotebookLM Alternative for Teams",
title: "SurfSense - Open Source, Privacy-Focused NotebookLM Alternative for Teams",
description:
"Open source NotebookLM alternative for teams with no data limits. Use ChatGPT, Claude, and any AI model for free.",
url: "https://www.surfsense.com",
@ -106,7 +106,7 @@ export const metadata: Metadata = {
},
twitter: {
card: "summary_large_image",
title: "SurfSense Open Source, Privacy-Focused NotebookLM Alternative for Teams",
title: "SurfSense - Open Source, Privacy-Focused NotebookLM Alternative for Teams",
description:
"Open source NotebookLM alternative for teams with no data limits. Use ChatGPT, Claude AI, and any AI model for free.",
creator: "@SurfSenseAI",