"""Conversation fingerprinting for prompt-cache-aware routing.""" import hashlib from typing import Optional def _conversation_fingerprint(model: str, messages: Optional[list], prompt: Optional[str]) -> Optional[str]: """ Stable hash over (model, first system + first user turn). That prefix determines whether the backend's prompt cache is reusable; later turns don't influence the routing decision because they extend the same prefix. Returns None when there is no usable prefix. """ parts: list[str] = [model or "_"] if messages: for m in messages: role = m.get("role") if isinstance(m, dict) else None if role not in ("system", "user"): continue content = m.get("content") if isinstance(content, list): # OpenAI multimodal parts content = "".join( p.get("text", "") for p in content if isinstance(p, dict) and p.get("type") == "text" ) if not isinstance(content, str): continue parts.append(f"{role}:{content}") if role == "user": break elif prompt: parts.append(f"user:{prompt}") else: return None return hashlib.sha1("\x1f".join(parts).encode("utf-8", "replace")).hexdigest()