feat: add llama-swap as a backend
This commit is contained in:
parent
c8da58430a
commit
aa8baebac5
17 changed files with 544 additions and 52 deletions
|
|
@ -27,7 +27,7 @@ from state import (
|
|||
_affinity_lock,
|
||||
)
|
||||
from sse import subscribe, unsubscribe
|
||||
from backends.normalize import _normalize_llama_model_name
|
||||
from backends.normalize import _normalize_llama_model_name, is_llama_server, llama_endpoints
|
||||
from backends.probe import _endpoint_health
|
||||
|
||||
|
||||
|
|
@ -127,7 +127,6 @@ async def affinity_stats(request: Request):
|
|||
|
||||
now = time.monotonic()
|
||||
entries: list[dict] = []
|
||||
llama_eps = set(config.llama_server_endpoints)
|
||||
async with _affinity_lock:
|
||||
for fp, (ep, mdl, expires_at) in list(_affinity_map.items()):
|
||||
remaining = expires_at - now
|
||||
|
|
@ -136,7 +135,7 @@ async def affinity_stats(request: Request):
|
|||
continue
|
||||
# Mirror the normalisation used by /api/ps_details so the dashboard
|
||||
# can join affinity entries to PS rows by (endpoint, model).
|
||||
display_model = _normalize_llama_model_name(mdl) if ep in llama_eps else mdl
|
||||
display_model = _normalize_llama_model_name(mdl) if is_llama_server(ep) else mdl
|
||||
entries.append({
|
||||
"endpoint": ep,
|
||||
"model": display_model,
|
||||
|
|
@ -175,9 +174,12 @@ async def config_proxy(request: Request):
|
|||
|
||||
ollama_results = await asyncio.gather(*[check(ep) for ep in config.endpoints])
|
||||
llama_results = []
|
||||
if config.llama_server_endpoints:
|
||||
# llama-server and llama-swap render identically in the dashboard ("llama" rows),
|
||||
# so health-check both and merge them into one list.
|
||||
llama_eps = llama_endpoints(config)
|
||||
if llama_eps:
|
||||
llama_results = await asyncio.gather(
|
||||
*[check(ep) for ep in config.llama_server_endpoints]
|
||||
*[check(ep) for ep in llama_eps]
|
||||
)
|
||||
|
||||
return {
|
||||
|
|
@ -227,7 +229,7 @@ async def health_proxy(request: Request):
|
|||
# purposes. Probing /api/version alone would miss the case where the
|
||||
# Ollama process is up but /api/ps is failing — see issue #83.
|
||||
all_endpoints = list(config.endpoints)
|
||||
llama_eps_extra = [ep for ep in config.llama_server_endpoints if ep not in config.endpoints]
|
||||
llama_eps_extra = [ep for ep in llama_endpoints(config) if ep not in config.endpoints]
|
||||
all_endpoints += llama_eps_extra
|
||||
|
||||
probe_results = await asyncio.gather(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue