feat: add llama-swap as a backend
This commit is contained in:
parent
c8da58430a
commit
aa8baebac5
17 changed files with 544 additions and 52 deletions
|
|
@ -20,10 +20,11 @@ MOCK_OLLAMA_EP = "http://mock-ollama:11434"
|
|||
MOCK_LLAMA_EP = "http://mock-llama:8080/v1"
|
||||
|
||||
|
||||
def _make_cfg(ollama_eps=None, llama_eps=None, api_keys=None):
|
||||
def _make_cfg(ollama_eps=None, llama_eps=None, swap_eps=None, api_keys=None):
|
||||
cfg = MagicMock()
|
||||
cfg.endpoints = ollama_eps or [MOCK_OLLAMA_EP]
|
||||
cfg.llama_server_endpoints = llama_eps or [MOCK_LLAMA_EP]
|
||||
cfg.llama_swap_endpoints = swap_eps or []
|
||||
cfg.api_keys = api_keys or {}
|
||||
cfg.max_concurrent_connections = 2
|
||||
cfg.router_api_key = None
|
||||
|
|
@ -228,6 +229,30 @@ class TestFetchLoadedModels:
|
|||
models = await router.fetch.loaded_models(MOCK_LLAMA_EP)
|
||||
assert "always-on-model" in models
|
||||
|
||||
async def test_llama_swap_reads_running_state_ready(self):
|
||||
# llama-swap omits the /v1/models status field, so loaded workers come
|
||||
# from /running (a root route — the /v1 suffix must be stripped).
|
||||
swap_ep = "http://mock-swap:8080/v1"
|
||||
cfg = _make_cfg(llama_eps=[], swap_eps=[swap_ep])
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
"http://mock-swap:8080/running",
|
||||
payload={"running": [
|
||||
{"model": "org/ready-model:Q4_K_M", "state": "ready"},
|
||||
{"model": "org/starting-model:Q8_0", "state": "starting"},
|
||||
]},
|
||||
)
|
||||
models = await router.fetch.loaded_models(swap_ep)
|
||||
assert models == {"org/ready-model:Q4_K_M"}
|
||||
|
||||
async def test_llama_swap_records_error_on_failure(self):
|
||||
swap_ep = "http://mock-swap:8080/v1"
|
||||
cfg = _make_cfg(llama_eps=[], swap_eps=[swap_ep])
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get("http://mock-swap:8080/running", status=502, payload={})
|
||||
await router.fetch.loaded_models(swap_ep)
|
||||
assert swap_ep in router._loaded_error_cache
|
||||
|
||||
async def test_returns_empty_on_error(self):
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue