nomyo-router/test/test_llama_swap.py

"""Tests for llama-swap specific behavior: unload dispatch + /upstream resolution."""
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

import router
import backends.control as control
import api.openai as openai_api

SWAP_EP = "http://swap:8080/v1"
SERVER_EP = "http://server:8080/v1"


def _cfg(*, server=None, swap=None, api_keys=None):
    cfg = MagicMock()
    cfg.endpoints = []
    cfg.llama_server_endpoints = server or []
    cfg.llama_swap_endpoints = swap or []
    cfg.api_keys = api_keys or {}
    return cfg


class _RecordingSession:
    """Captures the most recent ``post`` call and returns a 200 response."""

    def __init__(self, status=200):
        self.calls = []
        self._status = status

    def post(self, url, **kwargs):
        self.calls.append((url, kwargs))
        resp = MagicMock()
        resp.status = self._status

        class _Ctx:
            async def __aenter__(self_):
                return resp

            async def __aexit__(self_, *exc):
                return False

        return _Ctx()


class TestUnloadDispatch:
    async def test_llama_swap_uses_path_param(self):
        sess = _RecordingSession()
        cfg = _cfg(swap=[SWAP_EP])
        with (
            patch.object(router, "config", cfg),
            patch.object(control, "get_probe_session", lambda ep: sess),
        ):
            ok = await control.unload_model(SWAP_EP, "org/model:Q4_K_M")
        assert ok is True
        url, kwargs = sess.calls[0]
        # /v1 stripped, model id is a path param, no JSON body
        assert url == "http://swap:8080/api/models/unload/org/model:Q4_K_M"
        assert kwargs.get("json") is None

    async def test_llama_server_uses_body(self):
        sess = _RecordingSession()
        cfg = _cfg(server=[SERVER_EP])
        with (
            patch.object(router, "config", cfg),
            patch.object(control, "get_probe_session", lambda ep: sess),
        ):
            ok = await control.unload_model(SERVER_EP, "org/model:Q4_K_M")
        assert ok is True
        url, kwargs = sess.calls[0]
        assert url == "http://server:8080/models/unload"
        assert kwargs.get("json") == {"model": "org/model:Q4_K_M"}

    async def test_unload_failure_returns_false(self):
        sess = _RecordingSession(status=500)
        cfg = _cfg(swap=[SWAP_EP])
        with (
            patch.object(router, "config", cfg),
            patch.object(control, "get_probe_session", lambda ep: sess),
        ):
            ok = await control.unload_model(SWAP_EP, "m")
        assert ok is False


class TestUpstreamResolution:
    async def test_resolves_endpoint_that_advertises_model(self):
        cfg = _cfg(swap=[SWAP_EP])
        with (
            patch.object(openai_api, "get_config", lambda: cfg),
            patch.object(openai_api.fetch, "available_models",
                         AsyncMock(return_value={"org/model:Q4_K_M"})),
        ):
            ep = await openai_api._resolve_llama_swap_endpoint("org/model:Q4_K_M")
        assert ep == SWAP_EP

    async def test_returns_none_when_unserved(self):
        cfg = _cfg(swap=[SWAP_EP])
        with (
            patch.object(openai_api, "get_config", lambda: cfg),
            patch.object(openai_api.fetch, "available_models",
                         AsyncMock(return_value=set())),
        ):
            ep = await openai_api._resolve_llama_swap_endpoint("missing")
        assert ep is None

    async def test_returns_none_without_swap_endpoints(self):
        cfg = _cfg(swap=[])
        with patch.object(openai_api, "get_config", lambda: cfg):
            ep = await openai_api._resolve_llama_swap_endpoint("any")
        assert ep is None