Compare commits
No commits in common. "dev-0.9.x-ref" and "v0.9.9-rc3" have entirely different histories.
dev-0.9.x-
...
v0.9.9-rc3
5 changed files with 40 additions and 100381 deletions
|
|
@ -7,18 +7,6 @@ fits inside (n_ctx - safety_margin).
|
||||||
Also owns the per-(endpoint, model) n_ctx cache that the routes populate from
|
Also owns the per-(endpoint, model) n_ctx cache that the routes populate from
|
||||||
exceed_context_size_error bodies and from finish_reason=="length" signals.
|
exceed_context_size_error bodies and from finish_reason=="length" signals.
|
||||||
"""
|
"""
|
||||||
import os
|
|
||||||
|
|
||||||
# Point tiktoken at the vendored cl100k_base vocab so the encoding loads offline,
|
|
||||||
# without a network download. The download would otherwise fail anyway: this repo
|
|
||||||
# has a top-level `requests` package that shadows the pip `requests` tiktoken's
|
|
||||||
# downloader imports, so get_encoding() would silently fall back to char/4. See
|
|
||||||
# vendor/tiktoken/. setdefault lets an explicit env override win.
|
|
||||||
os.environ.setdefault(
|
|
||||||
"TIKTOKEN_CACHE_DIR",
|
|
||||||
os.path.join(os.path.dirname(os.path.abspath(__file__)), "vendor", "tiktoken"),
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import tiktoken as _tiktoken
|
import tiktoken as _tiktoken
|
||||||
_tiktoken_enc = _tiktoken.get_encoding("cl100k_base")
|
_tiktoken_enc = _tiktoken.get_encoding("cl100k_base")
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
aiohappyeyeballs==2.6.1
|
aiohappyeyeballs==2.6.1
|
||||||
aiohttp==3.14.0
|
aiohttp==3.13.5
|
||||||
aiosignal==1.4.0
|
aiosignal==1.4.0
|
||||||
annotated-types==0.7.0
|
annotated-types==0.7.0
|
||||||
anyio==4.13.0
|
anyio==4.13.0
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
pytest>=8.0
|
pytest>=8.0
|
||||||
pytest-asyncio>=0.24
|
pytest-asyncio>=0.24
|
||||||
pytest-cov>=5.0
|
pytest-cov>=5.0
|
||||||
|
aioresponses>=0.7
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,11 @@
|
||||||
"""Tests for fetch.available_models and fetch.loaded_models.
|
"""Tests for fetch.available_models and fetch.loaded_models using aioresponses mocking."""
|
||||||
|
|
||||||
The backend probes obtain their HTTP client via ``backends.probe.get_probe_session``
|
|
||||||
and only ever call ``async with client.get(url, headers=...) as resp``. We patch that
|
|
||||||
seam with a tiny fake session instead of mocking aiohttp's internals (aioresponses),
|
|
||||||
so the suite stays independent of aiohttp's private ClientResponse/ConnectionKey
|
|
||||||
structure across version bumps.
|
|
||||||
"""
|
|
||||||
import time
|
import time
|
||||||
from contextlib import contextmanager
|
|
||||||
from unittest.mock import patch, MagicMock
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from aioresponses import aioresponses
|
||||||
|
|
||||||
import router
|
import router
|
||||||
import backends.probe as probe
|
|
||||||
from conftest import TEST_OLLAMA, TEST_LLAMA
|
from conftest import TEST_OLLAMA, TEST_LLAMA
|
||||||
|
|
||||||
MOCK_OLLAMA_EP = "http://mock-ollama:11434"
|
MOCK_OLLAMA_EP = "http://mock-ollama:11434"
|
||||||
|
|
@ -30,73 +22,6 @@ def _make_cfg(ollama_eps=None, llama_eps=None, api_keys=None):
|
||||||
return cfg
|
return cfg
|
||||||
|
|
||||||
|
|
||||||
# ── Fake probe session ────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
class _MockResponse:
|
|
||||||
"""Minimal stand-in for the aiohttp response used by the probes."""
|
|
||||||
|
|
||||||
def __init__(self, *, status=200, payload=None, text=None):
|
|
||||||
self.status = status
|
|
||||||
self._payload = payload
|
|
||||||
self._text = text if text is not None else ""
|
|
||||||
|
|
||||||
async def json(self):
|
|
||||||
return self._payload
|
|
||||||
|
|
||||||
async def text(self):
|
|
||||||
return self._text
|
|
||||||
|
|
||||||
async def __aenter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
async def __aexit__(self, *exc):
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class _RaisingCtx:
|
|
||||||
"""``async with client.get(...)`` that raises on entry — mimics a failed connection."""
|
|
||||||
|
|
||||||
def __init__(self, exc):
|
|
||||||
self._exc = exc
|
|
||||||
|
|
||||||
async def __aenter__(self):
|
|
||||||
raise self._exc
|
|
||||||
|
|
||||||
async def __aexit__(self, *exc):
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class _MockProbeSession:
|
|
||||||
"""Stand-in for the aiohttp ClientSession returned by ``get_probe_session``.
|
|
||||||
|
|
||||||
Routes are registered by exact URL via :meth:`add_get`. A registered exception
|
|
||||||
is raised when the route is entered; otherwise a :class:`_MockResponse` is yielded.
|
|
||||||
An unregistered GET fails loudly so tests can't silently pass on a wrong URL.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._routes = {}
|
|
||||||
|
|
||||||
def add_get(self, url, *, status=200, payload=None, text=None, exception=None):
|
|
||||||
self._routes[url] = exception if exception is not None else _MockResponse(
|
|
||||||
status=status, payload=payload, text=text
|
|
||||||
)
|
|
||||||
|
|
||||||
def get(self, url, **kwargs):
|
|
||||||
if url not in self._routes:
|
|
||||||
raise AssertionError(f"unexpected probe GET {url}")
|
|
||||||
entry = self._routes[url]
|
|
||||||
return _RaisingCtx(entry) if isinstance(entry, Exception) else entry
|
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def mock_probe():
|
|
||||||
"""Patch the probe's session factory to return a fresh :class:`_MockProbeSession`."""
|
|
||||||
session = _MockProbeSession()
|
|
||||||
with patch.object(probe, "get_probe_session", lambda endpoint: session):
|
|
||||||
yield session
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def clear_caches(aio_session):
|
def clear_caches(aio_session):
|
||||||
"""aio_session fixture already clears caches and sets up app_state."""
|
"""aio_session fixture already clears caches and sets up app_state."""
|
||||||
|
|
@ -106,8 +31,8 @@ def clear_caches(aio_session):
|
||||||
class TestFetchAvailableModels:
|
class TestFetchAvailableModels:
|
||||||
async def test_ollama_tags(self):
|
async def test_ollama_tags(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(
|
m.get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||||
payload={"models": [
|
payload={"models": [
|
||||||
{"name": "llama3.2:latest"},
|
{"name": "llama3.2:latest"},
|
||||||
|
|
@ -119,8 +44,8 @@ class TestFetchAvailableModels:
|
||||||
|
|
||||||
async def test_openai_compatible_models_endpoint(self):
|
async def test_openai_compatible_models_endpoint(self):
|
||||||
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(
|
m.get(
|
||||||
f"{MOCK_LLAMA_EP}/models",
|
f"{MOCK_LLAMA_EP}/models",
|
||||||
payload={"data": [{"id": "unsloth/model:Q8_0"}]},
|
payload={"data": [{"id": "unsloth/model:Q8_0"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -129,8 +54,8 @@ class TestFetchAvailableModels:
|
||||||
|
|
||||||
async def test_caches_successful_result(self):
|
async def test_caches_successful_result(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(
|
m.get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -141,19 +66,20 @@ class TestFetchAvailableModels:
|
||||||
|
|
||||||
async def test_returns_empty_on_http_500(self):
|
async def test_returns_empty_on_http_500(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(f"{MOCK_OLLAMA_EP}/api/tags", status=500, payload={"error": "oops"})
|
m.get(f"{MOCK_OLLAMA_EP}/api/tags", status=500, payload={"error": "oops"})
|
||||||
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
||||||
assert models == set()
|
assert models == set()
|
||||||
|
|
||||||
async def test_returns_empty_on_connection_error(self):
|
async def test_returns_empty_on_connection_error(self):
|
||||||
import aiohttp
|
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
import aiohttp
|
||||||
m.add_get(
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
|
m.get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||||
exception=aiohttp.ClientConnectionError(
|
exception=aiohttp.ClientConnectorError(
|
||||||
"Cannot connect to host mock-ollama:11434 [Connection refused]"
|
connection_key=MagicMock(host="mock-ollama", port=11434),
|
||||||
|
os_error=OSError(111, "refused"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
||||||
|
|
@ -161,8 +87,8 @@ class TestFetchAvailableModels:
|
||||||
|
|
||||||
async def test_stale_cache_returned_while_refresh_runs(self):
|
async def test_stale_cache_returned_while_refresh_runs(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(
|
m.get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -173,8 +99,8 @@ class TestFetchAvailableModels:
|
||||||
models, _ = router._models_cache[MOCK_OLLAMA_EP]
|
models, _ = router._models_cache[MOCK_OLLAMA_EP]
|
||||||
router._models_cache[MOCK_OLLAMA_EP] = (models, time.time() - 400)
|
router._models_cache[MOCK_OLLAMA_EP] = (models, time.time() - 400)
|
||||||
|
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(
|
m.get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -188,8 +114,8 @@ class TestFetchAvailableModels:
|
||||||
async with router._available_error_cache_lock:
|
async with router._available_error_cache_lock:
|
||||||
router._available_error_cache[MOCK_OLLAMA_EP] = time.time()
|
router._available_error_cache[MOCK_OLLAMA_EP] = time.time()
|
||||||
|
|
||||||
with patch.object(router, "config", cfg), mock_probe():
|
with patch.object(router, "config", cfg), aioresponses():
|
||||||
# No route registered — if a call happens it raises AssertionError
|
# No HTTP mock registered — if a call happens it will raise
|
||||||
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
||||||
assert models == set()
|
assert models == set()
|
||||||
|
|
||||||
|
|
@ -197,8 +123,8 @@ class TestFetchAvailableModels:
|
||||||
class TestFetchLoadedModels:
|
class TestFetchLoadedModels:
|
||||||
async def test_ollama_ps(self):
|
async def test_ollama_ps(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(
|
m.get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/ps",
|
f"{MOCK_OLLAMA_EP}/api/ps",
|
||||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -207,8 +133,8 @@ class TestFetchLoadedModels:
|
||||||
|
|
||||||
async def test_llama_server_filters_loaded(self):
|
async def test_llama_server_filters_loaded(self):
|
||||||
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(
|
m.get(
|
||||||
f"{MOCK_LLAMA_EP}/models",
|
f"{MOCK_LLAMA_EP}/models",
|
||||||
payload={"data": [
|
payload={"data": [
|
||||||
{"id": "model-a", "status": {"value": "loaded"}},
|
{"id": "model-a", "status": {"value": "loaded"}},
|
||||||
|
|
@ -220,8 +146,8 @@ class TestFetchLoadedModels:
|
||||||
|
|
||||||
async def test_llama_server_no_status_field_always_loaded(self):
|
async def test_llama_server_no_status_field_always_loaded(self):
|
||||||
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(
|
m.get(
|
||||||
f"{MOCK_LLAMA_EP}/models",
|
f"{MOCK_LLAMA_EP}/models",
|
||||||
payload={"data": [{"id": "always-on-model"}]},
|
payload={"data": [{"id": "always-on-model"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -230,8 +156,8 @@ class TestFetchLoadedModels:
|
||||||
|
|
||||||
async def test_returns_empty_on_error(self):
|
async def test_returns_empty_on_error(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(f"{MOCK_OLLAMA_EP}/api/ps", status=503, payload={})
|
m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=503, payload={})
|
||||||
models = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
|
models = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
|
||||||
assert models == set()
|
assert models == set()
|
||||||
|
|
||||||
|
|
@ -244,8 +170,8 @@ class TestFetchLoadedModels:
|
||||||
|
|
||||||
async def test_caches_result(self):
|
async def test_caches_result(self):
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(
|
m.get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/ps",
|
f"{MOCK_OLLAMA_EP}/api/ps",
|
||||||
payload={"models": [{"name": "qwen:7b"}]},
|
payload={"models": [{"name": "qwen:7b"}]},
|
||||||
)
|
)
|
||||||
|
|
@ -257,15 +183,15 @@ class TestFetchLoadedModels:
|
||||||
# Regression: issue #83 — /api/ps failures must be recorded so
|
# Regression: issue #83 — /api/ps failures must be recorded so
|
||||||
# `choose_endpoint` can exclude unhealthy backends from routing.
|
# `choose_endpoint` can exclude unhealthy backends from routing.
|
||||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
|
m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
|
||||||
await router.fetch.loaded_models(MOCK_OLLAMA_EP)
|
await router.fetch.loaded_models(MOCK_OLLAMA_EP)
|
||||||
assert MOCK_OLLAMA_EP in router._loaded_error_cache
|
assert MOCK_OLLAMA_EP in router._loaded_error_cache
|
||||||
|
|
||||||
async def test_records_error_for_llama_server_on_failure(self):
|
async def test_records_error_for_llama_server_on_failure(self):
|
||||||
cfg = _make_cfg(ollama_eps=[], llama_eps=[MOCK_LLAMA_EP])
|
cfg = _make_cfg(ollama_eps=[], llama_eps=[MOCK_LLAMA_EP])
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
|
m.get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
|
||||||
await router.fetch.loaded_models(MOCK_LLAMA_EP)
|
await router.fetch.loaded_models(MOCK_LLAMA_EP)
|
||||||
assert MOCK_LLAMA_EP in router._loaded_error_cache
|
assert MOCK_LLAMA_EP in router._loaded_error_cache
|
||||||
|
|
||||||
|
|
@ -275,8 +201,8 @@ class TestFetchLoadedModels:
|
||||||
# network probe instead of short-circuiting on the error cache.
|
# network probe instead of short-circuiting on the error cache.
|
||||||
async with router._loaded_error_cache_lock:
|
async with router._loaded_error_cache_lock:
|
||||||
router._loaded_error_cache[MOCK_OLLAMA_EP] = time.time() - 301
|
router._loaded_error_cache[MOCK_OLLAMA_EP] = time.time() - 301
|
||||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||||
m.add_get(
|
m.get(
|
||||||
f"{MOCK_OLLAMA_EP}/api/ps",
|
f"{MOCK_OLLAMA_EP}/api/ps",
|
||||||
payload={"models": [{"name": "qwen:7b"}]},
|
payload={"models": [{"name": "qwen:7b"}]},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
100256
vendor/tiktoken/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
vendored
100256
vendor/tiktoken/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
vendored
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue