131 lines
4.4 KiB
Python
131 lines
4.4 KiB
Python
"""Tests for llama-swap specific behavior: unload dispatch + /upstream resolution."""
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
import router
|
|
import backends.control as control
|
|
import api.openai as openai_api
|
|
import api.ollama as ollama_api
|
|
|
|
SWAP_EP = "http://swap:8080/v1"
|
|
SERVER_EP = "http://server:8080/v1"
|
|
|
|
|
|
def _cfg(*, server=None, swap=None, api_keys=None):
|
|
cfg = MagicMock()
|
|
cfg.endpoints = []
|
|
cfg.llama_server_endpoints = server or []
|
|
cfg.llama_swap_endpoints = swap or []
|
|
cfg.api_keys = api_keys or {}
|
|
return cfg
|
|
|
|
|
|
class _RecordingSession:
|
|
"""Captures the most recent ``post`` call and returns a 200 response."""
|
|
|
|
def __init__(self, status=200):
|
|
self.calls = []
|
|
self._status = status
|
|
|
|
def post(self, url, **kwargs):
|
|
self.calls.append((url, kwargs))
|
|
resp = MagicMock()
|
|
resp.status = self._status
|
|
|
|
class _Ctx:
|
|
async def __aenter__(self_):
|
|
return resp
|
|
|
|
async def __aexit__(self_, *exc):
|
|
return False
|
|
|
|
return _Ctx()
|
|
|
|
|
|
class TestUnloadDispatch:
|
|
async def test_llama_swap_uses_path_param(self):
|
|
sess = _RecordingSession()
|
|
cfg = _cfg(swap=[SWAP_EP])
|
|
with (
|
|
patch.object(router, "config", cfg),
|
|
patch.object(control, "get_probe_session", lambda ep: sess),
|
|
):
|
|
ok = await control.unload_model(SWAP_EP, "org/model:Q4_K_M")
|
|
assert ok is True
|
|
url, kwargs = sess.calls[0]
|
|
# /v1 stripped, model id is a path param, no JSON body
|
|
assert url == "http://swap:8080/api/models/unload/org/model:Q4_K_M"
|
|
assert kwargs.get("json") is None
|
|
|
|
async def test_llama_server_uses_body(self):
|
|
sess = _RecordingSession()
|
|
cfg = _cfg(server=[SERVER_EP])
|
|
with (
|
|
patch.object(router, "config", cfg),
|
|
patch.object(control, "get_probe_session", lambda ep: sess),
|
|
):
|
|
ok = await control.unload_model(SERVER_EP, "org/model:Q4_K_M")
|
|
assert ok is True
|
|
url, kwargs = sess.calls[0]
|
|
assert url == "http://server:8080/models/unload"
|
|
assert kwargs.get("json") == {"model": "org/model:Q4_K_M"}
|
|
|
|
async def test_unload_failure_returns_false(self):
|
|
sess = _RecordingSession(status=500)
|
|
cfg = _cfg(swap=[SWAP_EP])
|
|
with (
|
|
patch.object(router, "config", cfg),
|
|
patch.object(control, "get_probe_session", lambda ep: sess),
|
|
):
|
|
ok = await control.unload_model(SWAP_EP, "m")
|
|
assert ok is False
|
|
|
|
|
|
class TestUpstreamResolution:
|
|
async def test_resolves_endpoint_that_advertises_model(self):
|
|
cfg = _cfg(swap=[SWAP_EP])
|
|
with (
|
|
patch.object(openai_api, "get_config", lambda: cfg),
|
|
patch.object(openai_api.fetch, "available_models",
|
|
AsyncMock(return_value={"org/model:Q4_K_M"})),
|
|
):
|
|
ep = await openai_api._resolve_llama_swap_endpoint("org/model:Q4_K_M")
|
|
assert ep == SWAP_EP
|
|
|
|
async def test_returns_none_when_unserved(self):
|
|
cfg = _cfg(swap=[SWAP_EP])
|
|
with (
|
|
patch.object(openai_api, "get_config", lambda: cfg),
|
|
patch.object(openai_api.fetch, "available_models",
|
|
AsyncMock(return_value=set())),
|
|
):
|
|
ep = await openai_api._resolve_llama_swap_endpoint("missing")
|
|
assert ep is None
|
|
|
|
async def test_returns_none_without_swap_endpoints(self):
|
|
cfg = _cfg(swap=[])
|
|
with patch.object(openai_api, "get_config", lambda: cfg):
|
|
ep = await openai_api._resolve_llama_swap_endpoint("any")
|
|
assert ep is None
|
|
|
|
|
|
class TestCtxSizeFromCmd:
|
|
"""ctx-size parsing from a /running worker's launch `cmd` string."""
|
|
|
|
def test_parses_long_flag(self):
|
|
cmd = ("llama-server --port 5818\n -hf unsloth/gpt-oss-20b-GGUF:F16\n"
|
|
" --ctx-size 131072\n --temp 1.0\n")
|
|
assert ollama_api._ctx_size_from_cmd(cmd) == 131072
|
|
|
|
def test_parses_short_flag(self):
|
|
assert ollama_api._ctx_size_from_cmd("llama-server -c 8192 --port 1") == 8192
|
|
|
|
def test_parses_equals_form(self):
|
|
assert ollama_api._ctx_size_from_cmd("llama-server --ctx-size=4096") == 4096
|
|
|
|
def test_returns_none_when_absent(self):
|
|
assert ollama_api._ctx_size_from_cmd("llama-server --port 5818") is None
|
|
|
|
def test_returns_none_for_empty(self):
|
|
assert ollama_api._ctx_size_from_cmd("") is None
|