Compare commits
1 commit
v0.9.9-rc3
...
dev-0.9.x-
| Author | SHA1 | Date | |
|---|---|---|---|
| d163fea154 |
5 changed files with 100381 additions and 40 deletions
|
|
@ -7,6 +7,18 @@ fits inside (n_ctx - safety_margin).
|
|||
Also owns the per-(endpoint, model) n_ctx cache that the routes populate from
|
||||
exceed_context_size_error bodies and from finish_reason=="length" signals.
|
||||
"""
|
||||
import os
|
||||
|
||||
# Point tiktoken at the vendored cl100k_base vocab so the encoding loads offline,
|
||||
# without a network download. The download would otherwise fail anyway: this repo
|
||||
# has a top-level `requests` package that shadows the pip `requests` tiktoken's
|
||||
# downloader imports, so get_encoding() would silently fall back to char/4. See
|
||||
# vendor/tiktoken/. setdefault lets an explicit env override win.
|
||||
os.environ.setdefault(
|
||||
"TIKTOKEN_CACHE_DIR",
|
||||
os.path.join(os.path.dirname(os.path.abspath(__file__)), "vendor", "tiktoken"),
|
||||
)
|
||||
|
||||
try:
|
||||
import tiktoken as _tiktoken
|
||||
_tiktoken_enc = _tiktoken.get_encoding("cl100k_base")
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
aiohappyeyeballs==2.6.1
|
||||
aiohttp==3.13.5
|
||||
aiohttp==3.14.0
|
||||
aiosignal==1.4.0
|
||||
annotated-types==0.7.0
|
||||
anyio==4.13.0
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
pytest>=8.0
|
||||
pytest-asyncio>=0.24
|
||||
pytest-cov>=5.0
|
||||
aioresponses>=0.7
|
||||
|
|
|
|||
|
|
@ -1,11 +1,19 @@
|
|||
"""Tests for fetch.available_models and fetch.loaded_models using aioresponses mocking."""
|
||||
"""Tests for fetch.available_models and fetch.loaded_models.
|
||||
|
||||
The backend probes obtain their HTTP client via ``backends.probe.get_probe_session``
|
||||
and only ever call ``async with client.get(url, headers=...) as resp``. We patch that
|
||||
seam with a tiny fake session instead of mocking aiohttp's internals (aioresponses),
|
||||
so the suite stays independent of aiohttp's private ClientResponse/ConnectionKey
|
||||
structure across version bumps.
|
||||
"""
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
from aioresponses import aioresponses
|
||||
|
||||
import router
|
||||
import backends.probe as probe
|
||||
from conftest import TEST_OLLAMA, TEST_LLAMA
|
||||
|
||||
MOCK_OLLAMA_EP = "http://mock-ollama:11434"
|
||||
|
|
@ -22,6 +30,73 @@ def _make_cfg(ollama_eps=None, llama_eps=None, api_keys=None):
|
|||
return cfg
|
||||
|
||||
|
||||
# ── Fake probe session ────────────────────────────────────────────────────────
|
||||
|
||||
class _MockResponse:
|
||||
"""Minimal stand-in for the aiohttp response used by the probes."""
|
||||
|
||||
def __init__(self, *, status=200, payload=None, text=None):
|
||||
self.status = status
|
||||
self._payload = payload
|
||||
self._text = text if text is not None else ""
|
||||
|
||||
async def json(self):
|
||||
return self._payload
|
||||
|
||||
async def text(self):
|
||||
return self._text
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc):
|
||||
return False
|
||||
|
||||
|
||||
class _RaisingCtx:
|
||||
"""``async with client.get(...)`` that raises on entry — mimics a failed connection."""
|
||||
|
||||
def __init__(self, exc):
|
||||
self._exc = exc
|
||||
|
||||
async def __aenter__(self):
|
||||
raise self._exc
|
||||
|
||||
async def __aexit__(self, *exc):
|
||||
return False
|
||||
|
||||
|
||||
class _MockProbeSession:
|
||||
"""Stand-in for the aiohttp ClientSession returned by ``get_probe_session``.
|
||||
|
||||
Routes are registered by exact URL via :meth:`add_get`. A registered exception
|
||||
is raised when the route is entered; otherwise a :class:`_MockResponse` is yielded.
|
||||
An unregistered GET fails loudly so tests can't silently pass on a wrong URL.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._routes = {}
|
||||
|
||||
def add_get(self, url, *, status=200, payload=None, text=None, exception=None):
|
||||
self._routes[url] = exception if exception is not None else _MockResponse(
|
||||
status=status, payload=payload, text=text
|
||||
)
|
||||
|
||||
def get(self, url, **kwargs):
|
||||
if url not in self._routes:
|
||||
raise AssertionError(f"unexpected probe GET {url}")
|
||||
entry = self._routes[url]
|
||||
return _RaisingCtx(entry) if isinstance(entry, Exception) else entry
|
||||
|
||||
|
||||
@contextmanager
|
||||
def mock_probe():
|
||||
"""Patch the probe's session factory to return a fresh :class:`_MockProbeSession`."""
|
||||
session = _MockProbeSession()
|
||||
with patch.object(probe, "get_probe_session", lambda endpoint: session):
|
||||
yield session
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clear_caches(aio_session):
|
||||
"""aio_session fixture already clears caches and sets up app_state."""
|
||||
|
|
@ -31,8 +106,8 @@ def clear_caches(aio_session):
|
|||
class TestFetchAvailableModels:
|
||||
async def test_ollama_tags(self):
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||
payload={"models": [
|
||||
{"name": "llama3.2:latest"},
|
||||
|
|
@ -44,8 +119,8 @@ class TestFetchAvailableModels:
|
|||
|
||||
async def test_openai_compatible_models_endpoint(self):
|
||||
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_LLAMA_EP}/models",
|
||||
payload={"data": [{"id": "unsloth/model:Q8_0"}]},
|
||||
)
|
||||
|
|
@ -54,8 +129,8 @@ class TestFetchAvailableModels:
|
|||
|
||||
async def test_caches_successful_result(self):
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||
)
|
||||
|
|
@ -66,20 +141,19 @@ class TestFetchAvailableModels:
|
|||
|
||||
async def test_returns_empty_on_http_500(self):
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(f"{MOCK_OLLAMA_EP}/api/tags", status=500, payload={"error": "oops"})
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(f"{MOCK_OLLAMA_EP}/api/tags", status=500, payload={"error": "oops"})
|
||||
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
||||
assert models == set()
|
||||
|
||||
async def test_returns_empty_on_connection_error(self):
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
import aiohttp
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||
exception=aiohttp.ClientConnectorError(
|
||||
connection_key=MagicMock(host="mock-ollama", port=11434),
|
||||
os_error=OSError(111, "refused"),
|
||||
exception=aiohttp.ClientConnectionError(
|
||||
"Cannot connect to host mock-ollama:11434 [Connection refused]"
|
||||
),
|
||||
)
|
||||
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
||||
|
|
@ -87,8 +161,8 @@ class TestFetchAvailableModels:
|
|||
|
||||
async def test_stale_cache_returned_while_refresh_runs(self):
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||
)
|
||||
|
|
@ -99,8 +173,8 @@ class TestFetchAvailableModels:
|
|||
models, _ = router._models_cache[MOCK_OLLAMA_EP]
|
||||
router._models_cache[MOCK_OLLAMA_EP] = (models, time.time() - 400)
|
||||
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_OLLAMA_EP}/api/tags",
|
||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||
)
|
||||
|
|
@ -114,8 +188,8 @@ class TestFetchAvailableModels:
|
|||
async with router._available_error_cache_lock:
|
||||
router._available_error_cache[MOCK_OLLAMA_EP] = time.time()
|
||||
|
||||
with patch.object(router, "config", cfg), aioresponses():
|
||||
# No HTTP mock registered — if a call happens it will raise
|
||||
with patch.object(router, "config", cfg), mock_probe():
|
||||
# No route registered — if a call happens it raises AssertionError
|
||||
models = await router.fetch.available_models(MOCK_OLLAMA_EP)
|
||||
assert models == set()
|
||||
|
||||
|
|
@ -123,8 +197,8 @@ class TestFetchAvailableModels:
|
|||
class TestFetchLoadedModels:
|
||||
async def test_ollama_ps(self):
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_OLLAMA_EP}/api/ps",
|
||||
payload={"models": [{"name": "llama3.2:latest"}]},
|
||||
)
|
||||
|
|
@ -133,8 +207,8 @@ class TestFetchLoadedModels:
|
|||
|
||||
async def test_llama_server_filters_loaded(self):
|
||||
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_LLAMA_EP}/models",
|
||||
payload={"data": [
|
||||
{"id": "model-a", "status": {"value": "loaded"}},
|
||||
|
|
@ -146,8 +220,8 @@ class TestFetchLoadedModels:
|
|||
|
||||
async def test_llama_server_no_status_field_always_loaded(self):
|
||||
cfg = _make_cfg(llama_eps=[MOCK_LLAMA_EP])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_LLAMA_EP}/models",
|
||||
payload={"data": [{"id": "always-on-model"}]},
|
||||
)
|
||||
|
|
@ -156,8 +230,8 @@ class TestFetchLoadedModels:
|
|||
|
||||
async def test_returns_empty_on_error(self):
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=503, payload={})
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(f"{MOCK_OLLAMA_EP}/api/ps", status=503, payload={})
|
||||
models = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
|
||||
assert models == set()
|
||||
|
||||
|
|
@ -170,8 +244,8 @@ class TestFetchLoadedModels:
|
|||
|
||||
async def test_caches_result(self):
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_OLLAMA_EP}/api/ps",
|
||||
payload={"models": [{"name": "qwen:7b"}]},
|
||||
)
|
||||
|
|
@ -183,15 +257,15 @@ class TestFetchLoadedModels:
|
|||
# Regression: issue #83 — /api/ps failures must be recorded so
|
||||
# `choose_endpoint` can exclude unhealthy backends from routing.
|
||||
cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
|
||||
await router.fetch.loaded_models(MOCK_OLLAMA_EP)
|
||||
assert MOCK_OLLAMA_EP in router._loaded_error_cache
|
||||
|
||||
async def test_records_error_for_llama_server_on_failure(self):
|
||||
cfg = _make_cfg(ollama_eps=[], llama_eps=[MOCK_LLAMA_EP])
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
|
||||
await router.fetch.loaded_models(MOCK_LLAMA_EP)
|
||||
assert MOCK_LLAMA_EP in router._loaded_error_cache
|
||||
|
||||
|
|
@ -201,8 +275,8 @@ class TestFetchLoadedModels:
|
|||
# network probe instead of short-circuiting on the error cache.
|
||||
async with router._loaded_error_cache_lock:
|
||||
router._loaded_error_cache[MOCK_OLLAMA_EP] = time.time() - 301
|
||||
with patch.object(router, "config", cfg), aioresponses() as m:
|
||||
m.get(
|
||||
with patch.object(router, "config", cfg), mock_probe() as m:
|
||||
m.add_get(
|
||||
f"{MOCK_OLLAMA_EP}/api/ps",
|
||||
payload={"models": [{"name": "qwen:7b"}]},
|
||||
)
|
||||
|
|
|
|||
100256
vendor/tiktoken/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
vendored
Normal file
100256
vendor/tiktoken/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
vendored
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue