fix: remove aioresponses

sec: bumb aiohttp 3.14 fix: tiktoken test issue by pre-cache the vocab file
2026-06-07 13:23:35 +02:00
5 changed files with 100381 additions and 40 deletions
--- a/context_window.py
+++ b/context_window.py
@ -7,6 +7,18 @@ fits inside (n_ctx - safety_margin).
 Also owns the per-(endpoint, model) n_ctx cache that the routes populate from
 exceed_context_size_error bodies and from finish_reason=="length" signals.
 """
+import os
+
+# Point tiktoken at the vendored cl100k_base vocab so the encoding loads offline,
+# without a network download. The download would otherwise fail anyway: this repo
+# has a top-level `requests` package that shadows the pip `requests` tiktoken's
+# downloader imports, so get_encoding() would silently fall back to char/4. See
+# vendor/tiktoken/. setdefault lets an explicit env override win.
+os.environ.setdefault(
+    "TIKTOKEN_CACHE_DIR",
+    os.path.join(os.path.dirname(os.path.abspath(__file__)), "vendor", "tiktoken"),
+)
+
 try:
    import tiktoken as _tiktoken
    _tiktoken_enc = _tiktoken.get_encoding("cl100k_base")
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 aiohappyeyeballs==2.6.1
-aiohttp==3.13.5
+aiohttp==3.14.0
 aiosignal==1.4.0
 annotated-types==0.7.0
 anyio==4.13.0
--- a/test/requirements_test.txt
+++ b/test/requirements_test.txt
@ -1,4 +1,3 @@
 pytest>=8.0
 pytest-asyncio>=0.24
 pytest-cov>=5.0
-aioresponses>=0.7
--- a/test/test_fetch.py
+++ b/test/test_fetch.py
@ -1,11 +1,19 @@
-"""Tests for fetch.available_models and fetch.loaded_models using aioresponses mocking."""
+"""Tests for fetch.available_models and fetch.loaded_models.
+
+The backend probes obtain their HTTP client via ``backends.probe.get_probe_session``
+and only ever call ``async with client.get(url, headers=...) as resp``. We patch that
+seam with a tiny fake session instead of mocking aiohttp's internals (aioresponses),
+so the suite stays independent of aiohttp's private ClientResponse/ConnectionKey
+structure across version bumps.
+"""
 import time
+from contextlib import contextmanager
 from unittest.mock import patch, MagicMock

 import pytest
-from aioresponses import aioresponses

 import router
+import backends.probe as probe
 from conftest import TEST_OLLAMA, TEST_LLAMA

 MOCK_OLLAMA_EP = "http://mock-ollama:11434"
@ -22,6 +30,73 @@ def _make_cfg(ollama_eps=None, llama_eps=None, api_keys=None):
    return cfg


+# ── Fake probe session ────────────────────────────────────────────────────────
+
+class _MockResponse:
+    """Minimal stand-in for the aiohttp response used by the probes."""
+
+    def __init__(self, *, status=200, payload=None, text=None):
+        self.status = status
+        self._payload = payload
+        self._text = text if text is not None else ""
+
+    async def json(self):
+        return self._payload
+
+    async def text(self):
+        return self._text
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *exc):
+        return False
+
+
+class _RaisingCtx:
+    """``async with client.get(...)`` that raises on entry — mimics a failed connection."""
+
+    def __init__(self, exc):
+        self._exc = exc
+
+    async def __aenter__(self):
+        raise self._exc
+
+    async def __aexit__(self, *exc):
+        return False
+
+
+class _MockProbeSession:
+    """Stand-in for the aiohttp ClientSession returned by ``get_probe_session``.
+
+    Routes are registered by exact URL via :meth:`add_get`. A registered exception
+    is raised when the route is entered; otherwise a :class:`_MockResponse` is yielded.
+    An unregistered GET fails loudly so tests can't silently pass on a wrong URL.
+    """
+
+    def __init__(self):
+        self._routes = {}
+
+    def add_get(self, url, *, status=200, payload=None, text=None, exception=None):
+        self._routes[url] = exception if exception is not None else _MockResponse(
+            status=status, payload=payload, text=text
+        )
+
+    def get(self, url, **kwargs):
+        if url not in self._routes:
+            raise AssertionError(f"unexpected probe GET {url}")
+        entry = self._routes[url]
+        return _RaisingCtx(entry) if isinstance(entry, Exception) else entry
+
+
+@contextmanager
+def mock_probe():
+    """Patch the probe's session factory to return a fresh :class:`_MockProbeSession`."""
+    session = _MockProbeSession()
+    with patch.object(probe, "get_probe_session", lambda endpoint: session):
+        yield session
+
+
@pytest.fixture(autouse=True)
 def clear_caches(aio_session):
    """aio_session fixture already clears caches and sets up app_state."""
@ -31,8 +106,8 @@ def clear_caches(aio_session):
 class TestFetchAvailableModels:
    async def test_ollama_tags(self):
        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_OLLAMA_EP}/api/tags",
                payload={"models": [
                    {"name": "llama3.2:latest"},
@ -44,8 +119,8 @@ class TestFetchAvailableModels:

    async def test_openai_compatible_models_endpoint(self):
        cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_LLAMA_EP}/models",
                payload={"data": [{"id": "unsloth/model:Q8_0"}]},
            )
@ -54,8 +129,8 @@ class TestFetchAvailableModels:

    async def test_caches_successful_result(self):
        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_OLLAMA_EP}/api/tags",
                payload={"models": [{"name": "llama3.2:latest"}]},
            )
@ -66,20 +141,19 @@ class TestFetchAvailableModels:

    async def test_returns_empty_on_http_500(self):
        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(f"{MOCK_OLLAMA_EP}/api/tags", status=500, payload={"error": "oops"})
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(f"{MOCK_OLLAMA_EP}/api/tags", status=500, payload={"error": "oops"})
            models = await router.fetch.available_models(MOCK_OLLAMA_EP)
        assert models == set()

    async def test_returns_empty_on_connection_error(self):
-        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
        import aiohttp
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_OLLAMA_EP}/api/tags",
-                exception=aiohttp.ClientConnectorError(
-                    connection_key=MagicMock(host="mock-ollama", port=11434),
-                    os_error=OSError(111, "refused"),
+                exception=aiohttp.ClientConnectionError(
+                    "Cannot connect to host mock-ollama:11434 [Connection refused]"
                ),
            )
            models = await router.fetch.available_models(MOCK_OLLAMA_EP)
@ -87,8 +161,8 @@ class TestFetchAvailableModels:

    async def test_stale_cache_returned_while_refresh_runs(self):
        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_OLLAMA_EP}/api/tags",
                payload={"models": [{"name": "llama3.2:latest"}]},
            )
@ -99,8 +173,8 @@ class TestFetchAvailableModels:
            models, _ = router._models_cache[MOCK_OLLAMA_EP]
            router._models_cache[MOCK_OLLAMA_EP] = (models, time.time() - 400)

-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_OLLAMA_EP}/api/tags",
                payload={"models": [{"name": "llama3.2:latest"}]},
            )
@ -114,8 +188,8 @@ class TestFetchAvailableModels:
        async with router._available_error_cache_lock:
            router._available_error_cache[MOCK_OLLAMA_EP] = time.time()

-        with patch.object(router, "config", cfg), aioresponses():
-            # No HTTP mock registered — if a call happens it will raise
+        with patch.object(router, "config", cfg), mock_probe():
+            # No route registered — if a call happens it raises AssertionError
            models = await router.fetch.available_models(MOCK_OLLAMA_EP)
        assert models == set()

@ -123,8 +197,8 @@ class TestFetchAvailableModels:
 class TestFetchLoadedModels:
    async def test_ollama_ps(self):
        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_OLLAMA_EP}/api/ps",
                payload={"models": [{"name": "llama3.2:latest"}]},
            )
@ -133,8 +207,8 @@ class TestFetchLoadedModels:

    async def test_llama_server_filters_loaded(self):
        cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_LLAMA_EP}/models",
                payload={"data": [
                    {"id": "model-a", "status": {"value": "loaded"}},
@ -146,8 +220,8 @@ class TestFetchLoadedModels:

    async def test_llama_server_no_status_field_always_loaded(self):
        cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_LLAMA_EP}/models",
                payload={"data": [{"id": "always-on-model"}]},
            )
@ -156,8 +230,8 @@ class TestFetchLoadedModels:

    async def test_returns_empty_on_error(self):
        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=503, payload={})
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(f"{MOCK_OLLAMA_EP}/api/ps", status=503, payload={})
            models = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
        assert models == set()

@ -170,8 +244,8 @@ class TestFetchLoadedModels:

    async def test_caches_result(self):
        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_OLLAMA_EP}/api/ps",
                payload={"models": [{"name": "qwen:7b"}]},
            )
@ -183,15 +257,15 @@ class TestFetchLoadedModels:
        # Regression: issue #83 — /api/ps failures must be recorded so
        # `choose_endpoint` can exclude unhealthy backends from routing.
        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
            await router.fetch.loaded_models(MOCK_OLLAMA_EP)
        assert MOCK_OLLAMA_EP in router._loaded_error_cache

    async def test_records_error_for_llama_server_on_failure(self):
        cfg = _make_cfg(ollama_eps=[], llama_eps=[MOCK_LLAMA_EP])
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
            await router.fetch.loaded_models(MOCK_LLAMA_EP)
        assert MOCK_LLAMA_EP in router._loaded_error_cache

@ -201,8 +275,8 @@ class TestFetchLoadedModels:
        # network probe instead of short-circuiting on the error cache.
        async with router._loaded_error_cache_lock:
            router._loaded_error_cache[MOCK_OLLAMA_EP] = time.time() - 301
-        with patch.object(router, "config", cfg), aioresponses() as m:
-            m.get(
+        with patch.object(router, "config", cfg), mock_probe() as m:
+            m.add_get(
                f"{MOCK_OLLAMA_EP}/api/ps",
                payload={"models": [{"name": "qwen:7b"}]},
            )
--- a/vendor/tiktoken/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
+++ b/vendor/tiktoken/9b5ad71b2ce5302211f9c61530b329a4922fc6a4