Compare commits
1 commit
v0.9.9-rc3
...
dev-0.9.x-
| Author | SHA1 | Date | |
|---|---|---|---|
| d163fea154 |
5 changed files with 100381 additions and 40 deletions
|
|
@ -7,6 +7,18 @@ fits inside (n_ctx - safety_margin).
|
||||||
Also owns the per-(endpoint, model) n_ctx cache that the routes populate from
|
Also owns the per-(endpoint, model) n_ctx cache that the routes populate from
|
||||||
exceed_context_size_error bodies and from finish_reason=="length" signals.
|
exceed_context_size_error bodies and from finish_reason=="length" signals.
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Point tiktoken at the vendored cl100k_base vocab so the encoding loads offline,
|
||||||
|
# without a network download. The download would otherwise fail anyway: this repo
|
||||||
|
# has a top-level `requests` package that shadows the pip `requests` tiktoken's
|
||||||
|
# downloader imports, so get_encoding() would silently fall back to char/4. See
|
||||||
|
# vendor/tiktoken/. setdefault lets an explicit env override win.
|
||||||
|
os.environ.setdefault(
|
||||||
|
"TIKTOKEN_CACHE_DIR",
|
||||||
|
os.path.join(os.path.dirname(os.path.abspath(__file__)), "vendor", "tiktoken"),
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import tiktoken as _tiktoken
|
import tiktoken as _tiktoken
|
||||||
_tiktoken_enc = _tiktoken.get_encoding("cl100k_base")
|
_tiktoken_enc = _tiktoken.get_encoding("cl100k_base")
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
aiohappyeyeballs==2.6.1
|
aiohappyeyeballs==2.6.1
|
||||||
aiohttp==3.13.5
|
aiohttp==3.14.0
|
||||||
aiosignal==1.4.0
|
aiosignal==1.4.0
|
||||||
annotated-types==0.7.0
|
annotated-types==0.7.0
|
||||||
anyio==4.13.0
|
anyio==4.13.0
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
pytest>=8.0
|
pytest>=8.0
|
||||||
pytest-asyncio>=0.24
|
pytest-asyncio>=0.24
|
||||||
pytest-cov>=5.0
|
pytest-cov>=5.0
|
||||||
aioresponses>=0.7
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,19 @@
|
||||||
"""Tests for fetch.available_models and fetch.loaded_models using aioresponses mocking."""
|
"""Tests for fetch.available_models and fetch.loaded_models.
|
||||||
|
|
||||||
|
The backend probes obtain their HTTP client via ``backends.probe.get_probe_session``
|
||||||
|
and only ever call ``async with client.get(url, headers=...) as resp``. We patch that
|
||||||
|
seam with a tiny fake session instead of mocking aiohttp's internals (aioresponses),
|
||||||
|
so the suite stays independent of aiohttp's private ClientResponse/ConnectionKey
|
||||||
|
structure across version bumps.
|
||||||
|
"""
|
||||||
import time
|
import time
|
||||||
|
from contextlib import contextmanager
|
||||||
from unittest.mock import patch, MagicMock
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from aioresponses import aioresponses
|
|
||||||
|
|
||||||
import router
|
import router
|
||||||
|
import backends.probe as probe
|
||||||
from conftest import TEST_OLLAMA, TEST_LLAMA
|
from conftest import TEST_OLLAMA, TEST_LLAMA
|
||||||
|
|
||||||
MOCK_OLLAMA_EP = "http://mock-ollama:11434"
|
MOCK_OLLAMA_EP = "http://mock-ollama:11434"
|
||||||
|
|
@ -22,6 +30,73 @@ def _make_cfg(ollama_eps=None, llama_eps=None, api_keys=None):
|
||||||
return cfg
|
return cfg
|
||||||
|
|
||||||
|
|
||||||
|
# ── Fake probe session ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
class _MockResponse:
|
||||||
|
"""Minimal stand-in for the aiohttp response used by the probes."""
|
||||||
|
|
||||||
|
def __init__(self, *, status=200, payload=None, text=None):
|
||||||
|
self.status = status
|
||||||
|
self._payload = payload
|
||||||
|
self._text = text if text is not None else ""
|
||||||
|
|
||||||
|
async def json(self):
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
async def text(self):
|
||||||
|
return self._text
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, *exc):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class _RaisingCtx:
|
||||||
|
"""``async with client.get(...)`` that raises on entry — mimics a failed connection."""
|
||||||
|
|
||||||
|
def __init__(self, exc):
|
||||||
|
self._exc = exc
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
raise self._exc
|
||||||
|
|
||||||
|
async def __aexit__(self, *exc):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class _MockProbeSession:
|
||||||
|
"""Stand-in for the aiohttp ClientSession returned by ``get_probe_session``.
|
||||||
|
|
||||||
|
Routes are registered by exact URL via :meth:`add_get`. A registered exception
|
||||||
|
is raised when the route is entered; otherwise a :class:`_MockResponse` is yielded.
|
||||||
|
An unregistered GET fails loudly so tests can't silently pass on a wrong URL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._routes = {}
|
||||||
|
|
||||||
|
def add_get(self, url, *, status=200, payload=None, text=None, exception=None):
|
||||||
|
self._routes[url] = exception if exception is not None else _MockResponse(
|
||||||
|
status=status, payload=payload, text=text
|
||||||
|
)
|
||||||
|
|
||||||
|
def get(self, url, **kwargs):
|
||||||
|
if url not in self._routes:
|
||||||
|
raise AssertionError(f"unexpected probe GET {url}")
|
||||||
|
entry = self._routes[url]
|
||||||
|
return _RaisingCtx(entry) if isinstance(entry, Exception) else entry
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def mock_probe():
|
||||||
|
"""Patch the probe's session factory to return a fresh :class:`_MockProbeSession`."""
|
||||||
|
session = _MockProbeSession()
|
||||||
|
with patch.object(probe, "get_probe_session", lambda endpoint: session):
|
||||||
|
yield session
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def clear_caches(aio_session):
|
def clear_caches(aio_session):
|
||||||
"""aio_session fixture already clears caches and sets up app_state."""
|
"""aio_session fixture already clears caches and sets up app_state."""
|
||||||
|
|
@ -31,8 +106,8 @@ def clear_caches(aio_session):
|
||||||
class TestFetchAvailableModels:
|
class TestFetchAvailableModels:
|
||||||
async def test_ollama_tags(self):
|
async def test_ollama_tags(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(
|
m.add_get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||||
payload={"models": [
|
payload={"models": [
|
||||||
{"name": "llama3.2:latest"},
|
{"name": "llama3.2:latest"},
|
||||||
|
|
@ -44,8 +119,8 @@ class TestFetchAvailableModels:
|
||||||
|
|
||||||
async def test_openai_compatible_models_endpoint(self):
|
async def test_openai_compatible_models_endpoint(self):
|
||||||
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(
|
m.add_get(
|
||||||
f"{MOCK_LLAMA_EP}/models",
|
f"{MOCK_LLAMA_EP}/models",
|
||||||
payload={"data": [{"id": "unsloth/model:Q8_0"}]},
|
payload={"data": [{"id": "unsloth/model:Q8_0"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -54,8 +129,8 @@ class TestFetchAvailableModels:
|
||||||
|
|
||||||
async def test_caches_successful_result(self):
|
async def test_caches_successful_result(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(
|
m.add_get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -66,20 +141,19 @@ class TestFetchAvailableModels:
|
||||||
|
|
||||||
async def test_returns_empty_on_http_500(self):
|
async def test_returns_empty_on_http_500(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(f"{MOCK_OLLAMA_EP}/api/tags", status=500, payload={"error": "oops"})
|
m.add_get(f"{MOCK_OLLAMA_EP}/api/tags", status=500, payload={"error": "oops"})
|
||||||
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
||||||
assert models == set()
|
assert models == set()
|
||||||
|
|
||||||
async def test_returns_empty_on_connection_error(self):
|
async def test_returns_empty_on_connection_error(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
m.get(
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
|
m.add_get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||||
exception=aiohttp.ClientConnectorError(
|
exception=aiohttp.ClientConnectionError(
|
||||||
connection_key=MagicMock(host="mock-ollama", port=11434),
|
"Cannot connect to host mock-ollama:11434 [Connection refused]"
|
||||||
os_error=OSError(111, "refused"),
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
||||||
|
|
@ -87,8 +161,8 @@ class TestFetchAvailableModels:
|
||||||
|
|
||||||
async def test_stale_cache_returned_while_refresh_runs(self):
|
async def test_stale_cache_returned_while_refresh_runs(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(
|
m.add_get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -99,8 +173,8 @@ class TestFetchAvailableModels:
|
||||||
models, _ = router._models_cache[MOCK_OLLAMA_EP]
|
models, _ = router._models_cache[MOCK_OLLAMA_EP]
|
||||||
router._models_cache[MOCK_OLLAMA_EP] = (models, time.time() - 400)
|
router._models_cache[MOCK_OLLAMA_EP] = (models, time.time() - 400)
|
||||||
|
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(
|
m.add_get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -114,8 +188,8 @@ class TestFetchAvailableModels:
|
||||||
async with router._available_error_cache_lock:
|
async with router._available_error_cache_lock:
|
||||||
router._available_error_cache[MOCK_OLLAMA_EP] = time.time()
|
router._available_error_cache[MOCK_OLLAMA_EP] = time.time()
|
||||||
|
|
||||||
with patch.object(router, "config", cfg), aioresponses():
|
with patch.object(router, "config", cfg), mock_probe():
|
||||||
# No HTTP mock registered — if a call happens it will raise
|
# No route registered — if a call happens it raises AssertionError
|
||||||
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
||||||
assert models == set()
|
assert models == set()
|
||||||
|
|
||||||
|
|
@ -123,8 +197,8 @@ class TestFetchAvailableModels:
|
||||||
class TestFetchLoadedModels:
|
class TestFetchLoadedModels:
|
||||||
async def test_ollama_ps(self):
|
async def test_ollama_ps(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(
|
m.add_get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/ps",
|
f"{MOCK_OLLAMA_EP}/api/ps",
|
||||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -133,8 +207,8 @@ class TestFetchLoadedModels:
|
||||||
|
|
||||||
async def test_llama_server_filters_loaded(self):
|
async def test_llama_server_filters_loaded(self):
|
||||||
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(
|
m.add_get(
|
||||||
f"{MOCK_LLAMA_EP}/models",
|
f"{MOCK_LLAMA_EP}/models",
|
||||||
payload={"data": [
|
payload={"data": [
|
||||||
{"id": "model-a", "status": {"value": "loaded"}},
|
{"id": "model-a", "status": {"value": "loaded"}},
|
||||||
|
|
@ -146,8 +220,8 @@ class TestFetchLoadedModels:
|
||||||
|
|
||||||
async def test_llama_server_no_status_field_always_loaded(self):
|
async def test_llama_server_no_status_field_always_loaded(self):
|
||||||
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(
|
m.add_get(
|
||||||
f"{MOCK_LLAMA_EP}/models",
|
f"{MOCK_LLAMA_EP}/models",
|
||||||
payload={"data": [{"id": "always-on-model"}]},
|
payload={"data": [{"id": "always-on-model"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -156,8 +230,8 @@ class TestFetchLoadedModels:
|
||||||
|
|
||||||
async def test_returns_empty_on_error(self):
|
async def test_returns_empty_on_error(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=503, payload={})
|
m.add_get(f"{MOCK_OLLAMA_EP}/api/ps", status=503, payload={})
|
||||||
models = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
|
models = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
|
||||||
assert models == set()
|
assert models == set()
|
||||||
|
|
||||||
|
|
@ -170,8 +244,8 @@ class TestFetchLoadedModels:
|
||||||
|
|
||||||
async def test_caches_result(self):
|
async def test_caches_result(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(
|
m.add_get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/ps",
|
f"{MOCK_OLLAMA_EP}/api/ps",
|
||||||
payload={"models": [{"name": "qwen:7b"}]},
|
payload={"models": [{"name": "qwen:7b"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -183,15 +257,15 @@ class TestFetchLoadedModels:
|
||||||
# Regression: issue #83 — /api/ps failures must be recorded so
|
# Regression: issue #83 — /api/ps failures must be recorded so
|
||||||
# `choose_endpoint` can exclude unhealthy backends from routing.
|
# `choose_endpoint` can exclude unhealthy backends from routing.
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
|
m.add_get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
|
||||||
await router.fetch.loaded_models(MOCK_OLLAMA_EP)
|
await router.fetch.loaded_models(MOCK_OLLAMA_EP)
|
||||||
assert MOCK_OLLAMA_EP in router._loaded_error_cache
|
assert MOCK_OLLAMA_EP in router._loaded_error_cache
|
||||||
|
|
||||||
async def test_records_error_for_llama_server_on_failure(self):
|
async def test_records_error_for_llama_server_on_failure(self):
|
||||||
cfg = _make_cfg(ollama_eps=[], llama_eps=[MOCK_LLAMA_EP])
|
cfg = _make_cfg(ollama_eps=[], llama_eps=[MOCK_LLAMA_EP])
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
|
m.add_get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
|
||||||
await router.fetch.loaded_models(MOCK_LLAMA_EP)
|
await router.fetch.loaded_models(MOCK_LLAMA_EP)
|
||||||
assert MOCK_LLAMA_EP in router._loaded_error_cache
|
assert MOCK_LLAMA_EP in router._loaded_error_cache
|
||||||
|
|
||||||
|
|
@ -201,8 +275,8 @@ class TestFetchLoadedModels:
|
||||||
# network probe instead of short-circuiting on the error cache.
|
# network probe instead of short-circuiting on the error cache.
|
||||||
async with router._loaded_error_cache_lock:
|
async with router._loaded_error_cache_lock:
|
||||||
router._loaded_error_cache[MOCK_OLLAMA_EP] = time.time() - 301
|
router._loaded_error_cache[MOCK_OLLAMA_EP] = time.time() - 301
|
||||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||||
m.get(
|
m.add_get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/ps",
|
f"{MOCK_OLLAMA_EP}/api/ps",
|
||||||
payload={"models": [{"name": "qwen:7b"}]},
|
payload={"models": [{"name": "qwen:7b"}]},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
100256
vendor/tiktoken/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
vendored
Normal file
100256
vendor/tiktoken/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
vendored
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue