feat: new helper to bridge change of behaviour in llama.cpp v1/models status - now correctly reporting "sleeping" or "loaded" for auto-unload

This commit is contained in:
Alpha Nerd 2026-05-07 11:34:09 +02:00
parent 353fadac48
commit e296ac19ba
Signed by: alpha-nerd
SSH key fingerprint: SHA256:QkkAgVoYi9TQ0UKPkiKSfnerZy2h4qhi3SVPXJmBN+M

View file

@ -572,6 +572,19 @@ def _is_llama_model_loaded(item: dict) -> bool:
return status == "loaded"
return False
def _is_llama_model_loaded_or_sleeping(item: dict) -> bool:
"""Return True if status is 'loaded' or 'sleeping'.
Newer llama-server versions report 'sleeping' in /v1/models when a model is idle;
ps_details needs to include these so _fetch_llama_props can detect and unload them."""
status = item.get("status")
if status is None:
return True
if isinstance(status, dict):
return status.get("value") in ("loaded", "sleeping")
if isinstance(status, str):
return status in ("loaded", "sleeping")
return False
def is_ext_openai_endpoint(endpoint: str) -> bool:
"""
Determine if an endpoint is an external OpenAI-compatible endpoint (not Ollama or llama-server).
@ -2908,8 +2921,8 @@ async def ps_details_proxy(request: Request):
llama_models_pending: list[dict] = []
for (endpoint, modellist) in zip([ep for ep, _ in llama_tasks], llama_loaded):
# Filter for loaded models only
loaded_models = [item for item in modellist if _is_llama_model_loaded(item)]
# Include sleeping models too so _fetch_llama_props can unload them
loaded_models = [item for item in modellist if _is_llama_model_loaded_or_sleeping(item)]
for item in loaded_models:
if isinstance(item, dict) and item.get("id"):
raw_id = item["id"]