feat: add llama-swap as a backend
All checks were successful
PR Tests / test (pull_request) Successful in 1m18s
NYX Security Scan / nyx-scan (pull_request) Successful in 6m19s

This commit is contained in:
Alpha Nerd 2026-06-14 16:34:31 +02:00
parent c8da58430a
commit aa8baebac5
Signed by: alpha-nerd
SSH key fingerprint: SHA256:QkkAgVoYi9TQ0UKPkiKSfnerZy2h4qhi3SVPXJmBN+M
17 changed files with 544 additions and 52 deletions

View file

@ -27,7 +27,7 @@ from state import (
_affinity_lock,
)
from sse import subscribe, unsubscribe
from backends.normalize import _normalize_llama_model_name
from backends.normalize import _normalize_llama_model_name, is_llama_server, llama_endpoints
from backends.probe import _endpoint_health
@ -127,7 +127,6 @@ async def affinity_stats(request: Request):
now = time.monotonic()
entries: list[dict] = []
llama_eps = set(config.llama_server_endpoints)
async with _affinity_lock:
for fp, (ep, mdl, expires_at) in list(_affinity_map.items()):
remaining = expires_at - now
@ -136,7 +135,7 @@ async def affinity_stats(request: Request):
continue
# Mirror the normalisation used by /api/ps_details so the dashboard
# can join affinity entries to PS rows by (endpoint, model).
display_model = _normalize_llama_model_name(mdl) if ep in llama_eps else mdl
display_model = _normalize_llama_model_name(mdl) if is_llama_server(ep) else mdl
entries.append({
"endpoint": ep,
"model": display_model,
@ -175,9 +174,12 @@ async def config_proxy(request: Request):
ollama_results = await asyncio.gather(*[check(ep) for ep in config.endpoints])
llama_results = []
if config.llama_server_endpoints:
# llama-server and llama-swap render identically in the dashboard ("llama" rows),
# so health-check both and merge them into one list.
llama_eps = llama_endpoints(config)
if llama_eps:
llama_results = await asyncio.gather(
*[check(ep) for ep in config.llama_server_endpoints]
*[check(ep) for ep in llama_eps]
)
return {
@ -227,7 +229,7 @@ async def health_proxy(request: Request):
# purposes. Probing /api/version alone would miss the case where the
# Ollama process is up but /api/ps is failing — see issue #83.
all_endpoints = list(config.endpoints)
llama_eps_extra = [ep for ep in config.llama_server_endpoints if ep not in config.endpoints]
llama_eps_extra = [ep for ep in llama_endpoints(config) if ep not in config.endpoints]
all_endpoints += llama_eps_extra
probe_results = await asyncio.gather(