feat: add llama-swap as a backend
This commit is contained in:
parent
c8da58430a
commit
aa8baebac5
17 changed files with 544 additions and 52 deletions
|
|
@ -12,10 +12,11 @@ EP3 = "http://ep3:11434"
|
|||
LLAMA_EP = "http://llama:8080/v1"
|
||||
|
||||
|
||||
def _make_cfg(endpoints, llama_eps=None, max_conn=2, endpoint_config=None, priority_routing=False):
|
||||
def _make_cfg(endpoints, llama_eps=None, swap_eps=None, max_conn=2, endpoint_config=None, priority_routing=False):
|
||||
cfg = MagicMock()
|
||||
cfg.endpoints = endpoints
|
||||
cfg.llama_server_endpoints = llama_eps or []
|
||||
cfg.llama_swap_endpoints = swap_eps or []
|
||||
cfg.api_keys = {}
|
||||
cfg.max_concurrent_connections = max_conn
|
||||
cfg.endpoint_config = endpoint_config or {}
|
||||
|
|
@ -46,6 +47,27 @@ class TestChooseEndpointBasic:
|
|||
assert ep == EP1
|
||||
assert tracking == "llama3.2:latest"
|
||||
|
||||
async def test_llama_swap_endpoint_is_a_candidate(self):
|
||||
swap_ep = "http://swap:8080/v1"
|
||||
cfg = _make_cfg([EP1], swap_eps=[swap_ep])
|
||||
|
||||
async def available(ep, *_):
|
||||
# Only the llama-swap backend advertises this model
|
||||
return {"org/model:Q4_K_M"} if ep == swap_ep else set()
|
||||
|
||||
async def loaded(ep):
|
||||
return {"org/model:Q4_K_M"} if ep == swap_ep else set()
|
||||
|
||||
with (
|
||||
patch.object(router, "config", cfg),
|
||||
patch.object(router.fetch, "available_models", side_effect=available),
|
||||
patch.object(router.fetch, "loaded_models", side_effect=loaded),
|
||||
):
|
||||
ep, tracking = await router.choose_endpoint("org/model:Q4_K_M")
|
||||
assert ep == swap_ep
|
||||
# llama-swap models are tracked under their normalized name
|
||||
assert tracking == "model"
|
||||
|
||||
async def test_raises_when_no_endpoint_has_model(self):
|
||||
cfg = _make_cfg([EP1, EP2])
|
||||
with (
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue