36 lines
1.4 KiB
Python
36 lines
1.4 KiB
Python
|
|
"""Conversation fingerprinting for prompt-cache-aware routing."""
|
||
|
|
import hashlib
|
||
|
|
from typing import Optional
|
||
|
|
|
||
|
|
|
||
|
|
def _conversation_fingerprint(model: str, messages: Optional[list],
|
||
|
|
prompt: Optional[str]) -> Optional[str]:
|
||
|
|
"""
|
||
|
|
Stable hash over (model, first system + first user turn). That prefix
|
||
|
|
determines whether the backend's prompt cache is reusable; later turns
|
||
|
|
don't influence the routing decision because they extend the same prefix.
|
||
|
|
Returns None when there is no usable prefix.
|
||
|
|
"""
|
||
|
|
parts: list[str] = [model or "_"]
|
||
|
|
if messages:
|
||
|
|
for m in messages:
|
||
|
|
role = m.get("role") if isinstance(m, dict) else None
|
||
|
|
if role not in ("system", "user"):
|
||
|
|
continue
|
||
|
|
content = m.get("content")
|
||
|
|
if isinstance(content, list): # OpenAI multimodal parts
|
||
|
|
content = "".join(
|
||
|
|
p.get("text", "") for p in content
|
||
|
|
if isinstance(p, dict) and p.get("type") == "text"
|
||
|
|
)
|
||
|
|
if not isinstance(content, str):
|
||
|
|
continue
|
||
|
|
parts.append(f"{role}:{content}")
|
||
|
|
if role == "user":
|
||
|
|
break
|
||
|
|
elif prompt:
|
||
|
|
parts.append(f"user:{prompt}")
|
||
|
|
else:
|
||
|
|
return None
|
||
|
|
return hashlib.sha1("\x1f".join(parts).encode("utf-8", "replace")).hexdigest()
|