fix(tokens): use canonical prompt_tokens_details path for cache fields

LiteLLM normalizes every provider's cache fields onto
usage.prompt_tokens_details (cached_tokens + cache_creation_tokens).
The earlier fallback to usage.cache_read_input_tokens /
usage.cache_creation_input_tokens was wrong: Anthropic-shaped fields
only live there via a trailing setattr loop, and the canonical field
name on the wrapper is cache_creation_tokens (not _input_tokens).
This commit is contained in:
CREDO23 2026-05-20 09:55:39 +02:00
parent 6090980c5e
commit 32f6766cb6

View file

@ -325,20 +325,22 @@ class TokenTrackingCallback(CustomLogger):
total_tokens = getattr(usage, "total_tokens", 0) or 0 total_tokens = getattr(usage, "total_tokens", 0) or 0
call_kind = "chat" call_kind = "chat"
# Prompt-cache accounting. Field shapes differ by provider: # Prompt-cache accounting. LiteLLM normalizes every provider's cache
# - OpenAI / Azure: ``usage.prompt_tokens_details.cached_tokens`` # fields onto ``usage.prompt_tokens_details``:
# - Anthropic: ``usage.cache_read_input_tokens`` + ``usage.cache_creation_input_tokens`` # - ``cached_tokens`` — cache reads (OpenAI/Azure native, DeepSeek
# LiteLLM normalizes both; we read both shapes and prefer whichever is set. # mapped from ``prompt_cache_hit_tokens``,
# Anthropic mapped from ``cache_read_input_tokens``).
# - ``cache_creation_tokens`` — cache writes (Anthropic only; OpenAI/Azure
# do not expose a write count).
# See ``litellm.types.utils.Usage.__init__`` for the mapping.
cached_tokens = 0 cached_tokens = 0
cache_creation_tokens = 0 cache_creation_tokens = 0
if not is_image: if not is_image:
prompt_details = getattr(usage, "prompt_tokens_details", None) prompt_details = getattr(usage, "prompt_tokens_details", None)
if prompt_details is not None: if prompt_details is not None:
cached_tokens = getattr(prompt_details, "cached_tokens", 0) or 0 cached_tokens = getattr(prompt_details, "cached_tokens", 0) or 0
if cached_tokens == 0:
cached_tokens = getattr(usage, "cache_read_input_tokens", 0) or 0
cache_creation_tokens = ( cache_creation_tokens = (
getattr(usage, "cache_creation_input_tokens", 0) or 0 getattr(prompt_details, "cache_creation_tokens", 0) or 0
) )
model = kwargs.get("model", "unknown") model = kwargs.get("model", "unknown")