feat: visualization of conversation affinity in dashboard
This commit is contained in:
parent
4acbaeb29c
commit
aa7ec6354a
5 changed files with 306 additions and 19 deletions
47
router.py
47
router.py
|
|
@ -445,10 +445,12 @@ token_usage_counts: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(
|
|||
usage_lock = asyncio.Lock() # protects access to usage_counts
|
||||
token_usage_lock = asyncio.Lock()
|
||||
|
||||
# Conversation affinity map: fingerprint -> (endpoint, expires_at_monotonic).
|
||||
# Conversation affinity map: fingerprint -> (endpoint, model, expires_at_monotonic).
|
||||
# Keeps the same conversation pinned to the endpoint that already has its
|
||||
# KV-cache prefix warm. Never held together with usage_lock.
|
||||
_affinity_map: Dict[str, tuple[str, float]] = {}
|
||||
# KV-cache prefix warm. Model is stored so the dashboard can aggregate live
|
||||
# entries per (endpoint, model) without recomputing fingerprints.
|
||||
# Never held together with usage_lock.
|
||||
_affinity_map: Dict[str, tuple[str, str, float]] = {}
|
||||
_affinity_lock = asyncio.Lock()
|
||||
_AFFINITY_MAX_ENTRIES = 10000
|
||||
|
||||
|
|
@ -1859,7 +1861,7 @@ async def choose_endpoint(model: str, reserve: bool = True,
|
|||
async with _affinity_lock:
|
||||
entry = _affinity_map.get(affinity_key)
|
||||
if entry is not None:
|
||||
ep, expires_at = entry
|
||||
ep, _stored_model, expires_at = entry
|
||||
if expires_at < time.monotonic():
|
||||
_affinity_map.pop(affinity_key, None)
|
||||
else:
|
||||
|
|
@ -1961,10 +1963,10 @@ async def choose_endpoint(model: str, reserve: bool = True,
|
|||
if reserve and config.conversation_affinity and affinity_key:
|
||||
expires_at = time.monotonic() + config.conversation_affinity_ttl
|
||||
async with _affinity_lock:
|
||||
_affinity_map[affinity_key] = (selected, expires_at)
|
||||
_affinity_map[affinity_key] = (selected, model, expires_at)
|
||||
if len(_affinity_map) > _AFFINITY_MAX_ENTRIES:
|
||||
now = time.monotonic()
|
||||
for k in [k for k, v in _affinity_map.items() if v[1] < now]:
|
||||
for k in [k for k, v in _affinity_map.items() if v[2] < now]:
|
||||
_affinity_map.pop(k, None)
|
||||
return selected, tracking_model
|
||||
|
||||
|
|
@ -3103,6 +3105,39 @@ async def ps_details_proxy(request: Request):
|
|||
|
||||
return JSONResponse(content={"models": models}, status_code=200)
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# 18b. Conversation-affinity stats – feeds the PS-table dot matrix
|
||||
# -------------------------------------------------------------
|
||||
@app.get("/api/affinity_stats")
|
||||
async def affinity_stats(request: Request):
|
||||
"""
|
||||
Aggregate live conversation-affinity pins, one entry per pinned conversation.
|
||||
Each entry exposes only the endpoint, model, and remaining TTL in seconds —
|
||||
no fingerprints or content. When conversation_affinity is disabled the
|
||||
`entries` list is always empty.
|
||||
"""
|
||||
if not config.conversation_affinity:
|
||||
return {"enabled": False, "ttl": config.conversation_affinity_ttl, "entries": []}
|
||||
|
||||
now = time.monotonic()
|
||||
entries: list[dict] = []
|
||||
async with _affinity_lock:
|
||||
for fp, (ep, mdl, expires_at) in list(_affinity_map.items()):
|
||||
remaining = expires_at - now
|
||||
if remaining <= 0:
|
||||
_affinity_map.pop(fp, None)
|
||||
continue
|
||||
entries.append({
|
||||
"endpoint": ep,
|
||||
"model": mdl,
|
||||
"remaining": round(remaining, 2),
|
||||
})
|
||||
return {
|
||||
"enabled": True,
|
||||
"ttl": config.conversation_affinity_ttl,
|
||||
"entries": entries,
|
||||
}
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# 19. Proxy usage route – for monitoring
|
||||
# -------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue