"""Tests for llama-swap specific behavior: unload dispatch + /upstream resolution.""" from unittest.mock import AsyncMock, MagicMock, patch import pytest import router import backends.control as control import api.openai as openai_api import api.ollama as ollama_api SWAP_EP = "http://swap:8080/v1" SERVER_EP = "http://server:8080/v1" def _cfg(*, server=None, swap=None, api_keys=None): cfg = MagicMock() cfg.endpoints = [] cfg.llama_server_endpoints = server or [] cfg.llama_swap_endpoints = swap or [] cfg.api_keys = api_keys or {} return cfg class _RecordingSession: """Captures the most recent ``post`` call and returns a 200 response.""" def __init__(self, status=200): self.calls = [] self._status = status def post(self, url, **kwargs): self.calls.append((url, kwargs)) resp = MagicMock() resp.status = self._status class _Ctx: async def __aenter__(self_): return resp async def __aexit__(self_, *exc): return False return _Ctx() class TestUnloadDispatch: async def test_llama_swap_uses_path_param(self): sess = _RecordingSession() cfg = _cfg(swap=[SWAP_EP]) with ( patch.object(router, "config", cfg), patch.object(control, "get_probe_session", lambda ep: sess), ): ok = await control.unload_model(SWAP_EP, "org/model:Q4_K_M") assert ok is True url, kwargs = sess.calls[0] # /v1 stripped, model id is a path param, no JSON body assert url == "http://swap:8080/api/models/unload/org/model:Q4_K_M" assert kwargs.get("json") is None async def test_llama_server_uses_body(self): sess = _RecordingSession() cfg = _cfg(server=[SERVER_EP]) with ( patch.object(router, "config", cfg), patch.object(control, "get_probe_session", lambda ep: sess), ): ok = await control.unload_model(SERVER_EP, "org/model:Q4_K_M") assert ok is True url, kwargs = sess.calls[0] assert url == "http://server:8080/models/unload" assert kwargs.get("json") == {"model": "org/model:Q4_K_M"} async def test_unload_failure_returns_false(self): sess = _RecordingSession(status=500) cfg = _cfg(swap=[SWAP_EP]) with ( patch.object(router, "config", cfg), patch.object(control, "get_probe_session", lambda ep: sess), ): ok = await control.unload_model(SWAP_EP, "m") assert ok is False class TestUpstreamResolution: async def test_resolves_endpoint_that_advertises_model(self): cfg = _cfg(swap=[SWAP_EP]) with ( patch.object(openai_api, "get_config", lambda: cfg), patch.object(openai_api.fetch, "available_models", AsyncMock(return_value={"org/model:Q4_K_M"})), ): ep = await openai_api._resolve_llama_swap_endpoint("org/model:Q4_K_M") assert ep == SWAP_EP async def test_returns_none_when_unserved(self): cfg = _cfg(swap=[SWAP_EP]) with ( patch.object(openai_api, "get_config", lambda: cfg), patch.object(openai_api.fetch, "available_models", AsyncMock(return_value=set())), ): ep = await openai_api._resolve_llama_swap_endpoint("missing") assert ep is None async def test_returns_none_without_swap_endpoints(self): cfg = _cfg(swap=[]) with patch.object(openai_api, "get_config", lambda: cfg): ep = await openai_api._resolve_llama_swap_endpoint("any") assert ep is None class TestCtxSizeFromCmd: """ctx-size parsing from a /running worker's launch `cmd` string.""" def test_parses_long_flag(self): cmd = ("llama-server --port 5818\n -hf unsloth/gpt-oss-20b-GGUF:F16\n" " --ctx-size 131072\n --temp 1.0\n") assert ollama_api._ctx_size_from_cmd(cmd) == 131072 def test_parses_short_flag(self): assert ollama_api._ctx_size_from_cmd("llama-server -c 8192 --port 1") == 8192 def test_parses_equals_form(self): assert ollama_api._ctx_size_from_cmd("llama-server --ctx-size=4096") == 4096 def test_returns_none_when_absent(self): assert ollama_api._ctx_size_from_cmd("llama-server --port 5818") is None def test_returns_none_for_empty(self): assert ollama_api._ctx_size_from_cmd("") is None