feat: new helper to bridge change of behaviour in llama.cpp v1/models status - now correctly reporting "sleeping" or "loaded" for auto-unload
This commit is contained in:
parent
353fadac48
commit
e296ac19ba
1 changed files with 15 additions and 2 deletions
17
router.py
17
router.py
|
|
@ -572,6 +572,19 @@ def _is_llama_model_loaded(item: dict) -> bool:
|
|||
return status == "loaded"
|
||||
return False
|
||||
|
||||
def _is_llama_model_loaded_or_sleeping(item: dict) -> bool:
|
||||
"""Return True if status is 'loaded' or 'sleeping'.
|
||||
Newer llama-server versions report 'sleeping' in /v1/models when a model is idle;
|
||||
ps_details needs to include these so _fetch_llama_props can detect and unload them."""
|
||||
status = item.get("status")
|
||||
if status is None:
|
||||
return True
|
||||
if isinstance(status, dict):
|
||||
return status.get("value") in ("loaded", "sleeping")
|
||||
if isinstance(status, str):
|
||||
return status in ("loaded", "sleeping")
|
||||
return False
|
||||
|
||||
def is_ext_openai_endpoint(endpoint: str) -> bool:
|
||||
"""
|
||||
Determine if an endpoint is an external OpenAI-compatible endpoint (not Ollama or llama-server).
|
||||
|
|
@ -2908,8 +2921,8 @@ async def ps_details_proxy(request: Request):
|
|||
llama_models_pending: list[dict] = []
|
||||
|
||||
for (endpoint, modellist) in zip([ep for ep, _ in llama_tasks], llama_loaded):
|
||||
# Filter for loaded models only
|
||||
loaded_models = [item for item in modellist if _is_llama_model_loaded(item)]
|
||||
# Include sleeping models too so _fetch_llama_props can unload them
|
||||
loaded_models = [item for item in modellist if _is_llama_model_loaded_or_sleeping(item)]
|
||||
for item in loaded_models:
|
||||
if isinstance(item, dict) and item.get("id"):
|
||||
raw_id = item["id"]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue