nomyo-router/test/test_llama_swap.py
alpha nerd aa8baebac5
All checks were successful
PR Tests / test (pull_request) Successful in 1m18s
NYX Security Scan / nyx-scan (pull_request) Successful in 6m19s
feat: add llama-swap as a backend
2026-06-14 16:34:31 +02:00

109 lines
3.6 KiB
Python

"""Tests for llama-swap specific behavior: unload dispatch + /upstream resolution."""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
import router
import backends.control as control
import api.openai as openai_api
SWAP_EP = "http://swap:8080/v1"
SERVER_EP = "http://server:8080/v1"
def _cfg(*, server=None, swap=None, api_keys=None):
cfg = MagicMock()
cfg.endpoints = []
cfg.llama_server_endpoints = server or []
cfg.llama_swap_endpoints = swap or []
cfg.api_keys = api_keys or {}
return cfg
class _RecordingSession:
"""Captures the most recent ``post`` call and returns a 200 response."""
def __init__(self, status=200):
self.calls = []
self._status = status
def post(self, url, **kwargs):
self.calls.append((url, kwargs))
resp = MagicMock()
resp.status = self._status
class _Ctx:
async def __aenter__(self_):
return resp
async def __aexit__(self_, *exc):
return False
return _Ctx()
class TestUnloadDispatch:
async def test_llama_swap_uses_path_param(self):
sess = _RecordingSession()
cfg = _cfg(swap=[SWAP_EP])
with (
patch.object(router, "config", cfg),
patch.object(control, "get_probe_session", lambda ep: sess),
):
ok = await control.unload_model(SWAP_EP, "org/model:Q4_K_M")
assert ok is True
url, kwargs = sess.calls[0]
# /v1 stripped, model id is a path param, no JSON body
assert url == "http://swap:8080/api/models/unload/org/model:Q4_K_M"
assert kwargs.get("json") is None
async def test_llama_server_uses_body(self):
sess = _RecordingSession()
cfg = _cfg(server=[SERVER_EP])
with (
patch.object(router, "config", cfg),
patch.object(control, "get_probe_session", lambda ep: sess),
):
ok = await control.unload_model(SERVER_EP, "org/model:Q4_K_M")
assert ok is True
url, kwargs = sess.calls[0]
assert url == "http://server:8080/models/unload"
assert kwargs.get("json") == {"model": "org/model:Q4_K_M"}
async def test_unload_failure_returns_false(self):
sess = _RecordingSession(status=500)
cfg = _cfg(swap=[SWAP_EP])
with (
patch.object(router, "config", cfg),
patch.object(control, "get_probe_session", lambda ep: sess),
):
ok = await control.unload_model(SWAP_EP, "m")
assert ok is False
class TestUpstreamResolution:
async def test_resolves_endpoint_that_advertises_model(self):
cfg = _cfg(swap=[SWAP_EP])
with (
patch.object(openai_api, "get_config", lambda: cfg),
patch.object(openai_api.fetch, "available_models",
AsyncMock(return_value={"org/model:Q4_K_M"})),
):
ep = await openai_api._resolve_llama_swap_endpoint("org/model:Q4_K_M")
assert ep == SWAP_EP
async def test_returns_none_when_unserved(self):
cfg = _cfg(swap=[SWAP_EP])
with (
patch.object(openai_api, "get_config", lambda: cfg),
patch.object(openai_api.fetch, "available_models",
AsyncMock(return_value=set())),
):
ep = await openai_api._resolve_llama_swap_endpoint("missing")
assert ep is None
async def test_returns_none_without_swap_endpoints(self):
cfg = _cfg(swap=[])
with patch.object(openai_api, "get_config", lambda: cfg):
ep = await openai_api._resolve_llama_swap_endpoint("any")
assert ep is None