Compare commits

...

1 commit

Author SHA1 Message Date
d163fea154
fix: remove aioresponses
sec: bumb aiohttp 3.14

fix: tiktoken test issue by pre-cache the vocab file
2026-06-07 13:23:35 +02:00
5 changed files with 100381 additions and 40 deletions

View file

@ -7,6 +7,18 @@ fits inside (n_ctx - safety_margin).
Also owns the per-(endpoint, model) n_ctx cache that the routes populate from Also owns the per-(endpoint, model) n_ctx cache that the routes populate from
exceed_context_size_error bodies and from finish_reason=="length" signals. exceed_context_size_error bodies and from finish_reason=="length" signals.
""" """
import os
# Point tiktoken at the vendored cl100k_base vocab so the encoding loads offline,
# without a network download. The download would otherwise fail anyway: this repo
# has a top-level `requests` package that shadows the pip `requests` tiktoken's
# downloader imports, so get_encoding() would silently fall back to char/4. See
# vendor/tiktoken/. setdefault lets an explicit env override win.
os.environ.setdefault(
"TIKTOKEN_CACHE_DIR",
os.path.join(os.path.dirname(os.path.abspath(__file__)), "vendor", "tiktoken"),
)
try: try:
import tiktoken as _tiktoken import tiktoken as _tiktoken
_tiktoken_enc = _tiktoken.get_encoding("cl100k_base") _tiktoken_enc = _tiktoken.get_encoding("cl100k_base")

View file

@ -1,5 +1,5 @@
aiohappyeyeballs==2.6.1 aiohappyeyeballs==2.6.1
aiohttp==3.13.5 aiohttp==3.14.0
aiosignal==1.4.0 aiosignal==1.4.0
annotated-types==0.7.0 annotated-types==0.7.0
anyio==4.13.0 anyio==4.13.0

View file

@ -1,4 +1,3 @@
pytest>=8.0 pytest>=8.0
pytest-asyncio>=0.24 pytest-asyncio>=0.24
pytest-cov>=5.0 pytest-cov>=5.0
aioresponses>=0.7

View file

@ -1,11 +1,19 @@
"""Tests for fetch.available_models and fetch.loaded_models using aioresponses mocking.""" """Tests for fetch.available_models and fetch.loaded_models.
The backend probes obtain their HTTP client via ``backends.probe.get_probe_session``
and only ever call ``async with client.get(url, headers=...) as resp``. We patch that
seam with a tiny fake session instead of mocking aiohttp's internals (aioresponses),
so the suite stays independent of aiohttp's private ClientResponse/ConnectionKey
structure across version bumps.
"""
import time import time
from contextlib import contextmanager
from unittest.mock import patch, MagicMock from unittest.mock import patch, MagicMock
import pytest import pytest
from aioresponses import aioresponses
import router import router
import backends.probe as probe
from conftest import TEST_OLLAMA, TEST_LLAMA from conftest import TEST_OLLAMA, TEST_LLAMA
MOCK_OLLAMA_EP = "http://mock-ollama:11434" MOCK_OLLAMA_EP = "http://mock-ollama:11434"
@ -22,6 +30,73 @@ def _make_cfg(ollama_eps=None, llama_eps=None, api_keys=None):
return cfg return cfg
# ── Fake probe session ────────────────────────────────────────────────────────
class _MockResponse:
"""Minimal stand-in for the aiohttp response used by the probes."""
def __init__(self, *, status=200, payload=None, text=None):
self.status = status
self._payload = payload
self._text = text if text is not None else ""
async def json(self):
return self._payload
async def text(self):
return self._text
async def __aenter__(self):
return self
async def __aexit__(self, *exc):
return False
class _RaisingCtx:
"""``async with client.get(...)`` that raises on entry — mimics a failed connection."""
def __init__(self, exc):
self._exc = exc
async def __aenter__(self):
raise self._exc
async def __aexit__(self, *exc):
return False
class _MockProbeSession:
"""Stand-in for the aiohttp ClientSession returned by ``get_probe_session``.
Routes are registered by exact URL via :meth:`add_get`. A registered exception
is raised when the route is entered; otherwise a :class:`_MockResponse` is yielded.
An unregistered GET fails loudly so tests can't silently pass on a wrong URL.
"""
def __init__(self):
self._routes = {}
def add_get(self, url, *, status=200, payload=None, text=None, exception=None):
self._routes[url] = exception if exception is not None else _MockResponse(
status=status, payload=payload, text=text
)
def get(self, url, **kwargs):
if url not in self._routes:
raise AssertionError(f"unexpected probe GET {url}")
entry = self._routes[url]
return _RaisingCtx(entry) if isinstance(entry, Exception) else entry
@contextmanager
def mock_probe():
"""Patch the probe's session factory to return a fresh :class:`_MockProbeSession`."""
session = _MockProbeSession()
with patch.object(probe, "get_probe_session", lambda endpoint: session):
yield session
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def clear_caches(aio_session): def clear_caches(aio_session):
"""aio_session fixture already clears caches and sets up app_state.""" """aio_session fixture already clears caches and sets up app_state."""
@ -31,8 +106,8 @@ def clear_caches(aio_session):
class TestFetchAvailableModels: class TestFetchAvailableModels:
async def test_ollama_tags(self): async def test_ollama_tags(self):
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[]) cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get( m.add_get(
f"{MOCK_OLLAMA_EP}/api/tags", f"{MOCK_OLLAMA_EP}/api/tags",
payload={"models": [ payload={"models": [
{"name": "llama3.2:latest"}, {"name": "llama3.2:latest"},
@ -44,8 +119,8 @@ class TestFetchAvailableModels:
async def test_openai_compatible_models_endpoint(self): async def test_openai_compatible_models_endpoint(self):
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP]) cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get( m.add_get(
f"{MOCK_LLAMA_EP}/models", f"{MOCK_LLAMA_EP}/models",
payload={"data": [{"id": "unsloth/model:Q8_0"}]}, payload={"data": [{"id": "unsloth/model:Q8_0"}]},
) )
@ -54,8 +129,8 @@ class TestFetchAvailableModels:
async def test_caches_successful_result(self): async def test_caches_successful_result(self):
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[]) cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get( m.add_get(
f"{MOCK_OLLAMA_EP}/api/tags", f"{MOCK_OLLAMA_EP}/api/tags",
payload={"models": [{"name": "llama3.2:latest"}]}, payload={"models": [{"name": "llama3.2:latest"}]},
) )
@ -66,20 +141,19 @@ class TestFetchAvailableModels:
async def test_returns_empty_on_http_500(self): async def test_returns_empty_on_http_500(self):
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[]) cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get(f"{MOCK_OLLAMA_EP}/api/tags", status=500, payload={"error": "oops"}) m.add_get(f"{MOCK_OLLAMA_EP}/api/tags", status=500, payload={"error": "oops"})
models = await router.fetch.available_models(MOCK_OLLAMA_EP) models = await router.fetch.available_models(MOCK_OLLAMA_EP)
assert models == set() assert models == set()
async def test_returns_empty_on_connection_error(self): async def test_returns_empty_on_connection_error(self):
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
import aiohttp import aiohttp
with patch.object(router, "config", cfg), aioresponses() as m: cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
m.get( with patch.object(router, "config", cfg), mock_probe() as m:
m.add_get(
f"{MOCK_OLLAMA_EP}/api/tags", f"{MOCK_OLLAMA_EP}/api/tags",
exception=aiohttp.ClientConnectorError( exception=aiohttp.ClientConnectionError(
connection_key=MagicMock(host="mock-ollama", port=11434), "Cannot connect to host mock-ollama:11434 [Connection refused]"
os_error=OSError(111, "refused"),
), ),
) )
models = await router.fetch.available_models(MOCK_OLLAMA_EP) models = await router.fetch.available_models(MOCK_OLLAMA_EP)
@ -87,8 +161,8 @@ class TestFetchAvailableModels:
async def test_stale_cache_returned_while_refresh_runs(self): async def test_stale_cache_returned_while_refresh_runs(self):
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[]) cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get( m.add_get(
f"{MOCK_OLLAMA_EP}/api/tags", f"{MOCK_OLLAMA_EP}/api/tags",
payload={"models": [{"name": "llama3.2:latest"}]}, payload={"models": [{"name": "llama3.2:latest"}]},
) )
@ -99,8 +173,8 @@ class TestFetchAvailableModels:
models, _ = router._models_cache[MOCK_OLLAMA_EP] models, _ = router._models_cache[MOCK_OLLAMA_EP]
router._models_cache[MOCK_OLLAMA_EP] = (models, time.time() - 400) router._models_cache[MOCK_OLLAMA_EP] = (models, time.time() - 400)
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get( m.add_get(
f"{MOCK_OLLAMA_EP}/api/tags", f"{MOCK_OLLAMA_EP}/api/tags",
payload={"models": [{"name": "llama3.2:latest"}]}, payload={"models": [{"name": "llama3.2:latest"}]},
) )
@ -114,8 +188,8 @@ class TestFetchAvailableModels:
async with router._available_error_cache_lock: async with router._available_error_cache_lock:
router._available_error_cache[MOCK_OLLAMA_EP] = time.time() router._available_error_cache[MOCK_OLLAMA_EP] = time.time()
with patch.object(router, "config", cfg), aioresponses(): with patch.object(router, "config", cfg), mock_probe():
# No HTTP mock registered — if a call happens it will raise # No route registered — if a call happens it raises AssertionError
models = await router.fetch.available_models(MOCK_OLLAMA_EP) models = await router.fetch.available_models(MOCK_OLLAMA_EP)
assert models == set() assert models == set()
@ -123,8 +197,8 @@ class TestFetchAvailableModels:
class TestFetchLoadedModels: class TestFetchLoadedModels:
async def test_ollama_ps(self): async def test_ollama_ps(self):
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[]) cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get( m.add_get(
f"{MOCK_OLLAMA_EP}/api/ps", f"{MOCK_OLLAMA_EP}/api/ps",
payload={"models": [{"name": "llama3.2:latest"}]}, payload={"models": [{"name": "llama3.2:latest"}]},
) )
@ -133,8 +207,8 @@ class TestFetchLoadedModels:
async def test_llama_server_filters_loaded(self): async def test_llama_server_filters_loaded(self):
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP]) cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get( m.add_get(
f"{MOCK_LLAMA_EP}/models", f"{MOCK_LLAMA_EP}/models",
payload={"data": [ payload={"data": [
{"id": "model-a", "status": {"value": "loaded"}}, {"id": "model-a", "status": {"value": "loaded"}},
@ -146,8 +220,8 @@ class TestFetchLoadedModels:
async def test_llama_server_no_status_field_always_loaded(self): async def test_llama_server_no_status_field_always_loaded(self):
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP]) cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get( m.add_get(
f"{MOCK_LLAMA_EP}/models", f"{MOCK_LLAMA_EP}/models",
payload={"data": [{"id": "always-on-model"}]}, payload={"data": [{"id": "always-on-model"}]},
) )
@ -156,8 +230,8 @@ class TestFetchLoadedModels:
async def test_returns_empty_on_error(self): async def test_returns_empty_on_error(self):
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[]) cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=503, payload={}) m.add_get(f"{MOCK_OLLAMA_EP}/api/ps", status=503, payload={})
models = await router.fetch.loaded_models(MOCK_OLLAMA_EP) models = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
assert models == set() assert models == set()
@ -170,8 +244,8 @@ class TestFetchLoadedModels:
async def test_caches_result(self): async def test_caches_result(self):
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[]) cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get( m.add_get(
f"{MOCK_OLLAMA_EP}/api/ps", f"{MOCK_OLLAMA_EP}/api/ps",
payload={"models": [{"name": "qwen:7b"}]}, payload={"models": [{"name": "qwen:7b"}]},
) )
@ -183,15 +257,15 @@ class TestFetchLoadedModels:
# Regression: issue #83 — /api/ps failures must be recorded so # Regression: issue #83 — /api/ps failures must be recorded so
# `choose_endpoint` can exclude unhealthy backends from routing. # `choose_endpoint` can exclude unhealthy backends from routing.
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[]) cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={}) m.add_get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
await router.fetch.loaded_models(MOCK_OLLAMA_EP) await router.fetch.loaded_models(MOCK_OLLAMA_EP)
assert MOCK_OLLAMA_EP in router._loaded_error_cache assert MOCK_OLLAMA_EP in router._loaded_error_cache
async def test_records_error_for_llama_server_on_failure(self): async def test_records_error_for_llama_server_on_failure(self):
cfg = _make_cfg(ollama_eps=[], llama_eps=[MOCK_LLAMA_EP]) cfg = _make_cfg(ollama_eps=[], llama_eps=[MOCK_LLAMA_EP])
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get(f"{MOCK_LLAMA_EP}/models", status=502, payload={}) m.add_get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
await router.fetch.loaded_models(MOCK_LLAMA_EP) await router.fetch.loaded_models(MOCK_LLAMA_EP)
assert MOCK_LLAMA_EP in router._loaded_error_cache assert MOCK_LLAMA_EP in router._loaded_error_cache
@ -201,8 +275,8 @@ class TestFetchLoadedModels:
# network probe instead of short-circuiting on the error cache. # network probe instead of short-circuiting on the error cache.
async with router._loaded_error_cache_lock: async with router._loaded_error_cache_lock:
router._loaded_error_cache[MOCK_OLLAMA_EP] = time.time() - 301 router._loaded_error_cache[MOCK_OLLAMA_EP] = time.time() - 301
with patch.object(router, "config", cfg), aioresponses() as m: with patch.object(router, "config", cfg), mock_probe() as m:
m.get( m.add_get(
f"{MOCK_OLLAMA_EP}/api/ps", f"{MOCK_OLLAMA_EP}/api/ps",
payload={"models": [{"name": "qwen:7b"}]}, payload={"models": [{"name": "qwen:7b"}]},
) )

File diff suppressed because it is too large Load diff