feat: add llama-swap as a backend
All checks were successful
PR Tests / test (pull_request) Successful in 1m18s
NYX Security Scan / nyx-scan (pull_request) Successful in 6m19s

This commit is contained in:
Alpha Nerd 2026-06-14 16:34:31 +02:00
parent c8da58430a
commit aa8baebac5
Signed by: alpha-nerd
SSH key fingerprint: SHA256:QkkAgVoYi9TQ0UKPkiKSfnerZy2h4qhi3SVPXJmBN+M
17 changed files with 544 additions and 52 deletions

View file

@ -12,10 +12,11 @@ EP3 = "http://ep3:11434"
LLAMA_EP = "http://llama:8080/v1"
def _make_cfg(endpoints, llama_eps=None, max_conn=2, endpoint_config=None, priority_routing=False):
def _make_cfg(endpoints, llama_eps=None, swap_eps=None, max_conn=2, endpoint_config=None, priority_routing=False):
cfg = MagicMock()
cfg.endpoints = endpoints
cfg.llama_server_endpoints = llama_eps or []
cfg.llama_swap_endpoints = swap_eps or []
cfg.api_keys = {}
cfg.max_concurrent_connections = max_conn
cfg.endpoint_config = endpoint_config or {}
@ -46,6 +47,27 @@ class TestChooseEndpointBasic:
assert ep == EP1
assert tracking == "llama3.2:latest"
async def test_llama_swap_endpoint_is_a_candidate(self):
swap_ep = "http://swap:8080/v1"
cfg = _make_cfg([EP1], swap_eps=[swap_ep])
async def available(ep, *_):
# Only the llama-swap backend advertises this model
return {"org/model:Q4_K_M"} if ep == swap_ep else set()
async def loaded(ep):
return {"org/model:Q4_K_M"} if ep == swap_ep else set()
with (
patch.object(router, "config", cfg),
patch.object(router.fetch, "available_models", side_effect=available),
patch.object(router.fetch, "loaded_models", side_effect=loaded),
):
ep, tracking = await router.choose_endpoint("org/model:Q4_K_M")
assert ep == swap_ep
# llama-swap models are tracked under their normalized name
assert tracking == "model"
async def test_raises_when_no_endpoint_has_model(self):
cfg = _make_cfg([EP1, EP2])
with (