feat: add llama-swap as a backend
This commit is contained in:
parent
c8da58430a
commit
aa8baebac5
17 changed files with 544 additions and 52 deletions
50
backends/control.py
Normal file
50
backends/control.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
"""Backend control operations (model unload).
|
||||
|
||||
llama-server and llama-swap evict a resident model through different routes:
|
||||
* llama-server → ``POST {base}/models/unload`` with body ``{"model": id}``
|
||||
* llama-swap → ``POST {base}/api/models/unload/{id}`` (path parameter)
|
||||
|
||||
``unload_model`` dispatches on the configured backend type so callers don't
|
||||
have to know which one they are talking to. Both routes live at the endpoint
|
||||
root, so any ``/v1`` suffix is stripped first.
|
||||
"""
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
|
||||
from config import get_config
|
||||
from state import default_headers
|
||||
from backends.sessions import get_probe_session
|
||||
from backends.normalize import is_llama_swap
|
||||
from backends.health import _format_connection_issue
|
||||
|
||||
|
||||
async def unload_model(endpoint: str, model_id: str) -> bool:
|
||||
"""Ask ``endpoint`` to unload ``model_id``. Returns True on a 2xx response.
|
||||
|
||||
``model_id`` must be the backend's native model identifier (the raw HF id
|
||||
for llama-server / llama-swap), not the router-normalized display name.
|
||||
"""
|
||||
cfg = get_config()
|
||||
base_url = endpoint.rstrip("/").removesuffix("/v1")
|
||||
headers = {"Referer": default_headers.get("HTTP-Referer", "https://nomyo.ai")}
|
||||
api_key: Optional[str] = cfg.api_keys.get(endpoint)
|
||||
if api_key is not None:
|
||||
headers["Authorization"] = "Bearer " + api_key
|
||||
|
||||
if is_llama_swap(endpoint):
|
||||
url = f"{base_url}/api/models/unload/{model_id}"
|
||||
json_body = None
|
||||
else:
|
||||
url = f"{base_url}/models/unload"
|
||||
json_body = {"model": model_id}
|
||||
|
||||
client: aiohttp.ClientSession = get_probe_session(endpoint)
|
||||
try:
|
||||
async with client.post(url, json=json_body, headers=headers) as resp:
|
||||
ok = resp.status < 400
|
||||
print(f"[unload_model] {model_id} on {endpoint}: {resp.status}")
|
||||
return ok
|
||||
except Exception as e:
|
||||
print(f"[unload_model] {_format_connection_issue(url, e)}")
|
||||
return False
|
||||
Loading…
Add table
Add a link
Reference in a new issue