fix:

- _fetch_loaded_models_internal now writes _loaded_error_cache[endpoint] = time.time() on /api/ps or /v1/models failure, and clears the entry on success - choose_endpoint now filters out candidates with a fresh (<300s) loaded-models error. - /health now probes both /api/version and /api/ps for Ollama endpoints - dashboard adaption relates to #83
2026-05-18 13:45:06 +02:00 · 2026-05-18 13:45:06 +02:00 · db6aa73903
commit db6aa73903
parent 0b64a84e96
4 changed files with 251 additions and 90 deletions
--- a/test/test_fetch.py
+++ b/test/test_fetch.py
@ -178,3 +178,33 @@ class TestFetchLoadedModels:
            first  = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
            second = await router.fetch.loaded_models(MOCK_OLLAMA_EP)
        assert first == second
+
+    async def test_records_error_in_loaded_error_cache_on_failure(self):
+        # Regression: issue #83 — /api/ps failures must be recorded so
+        # `choose_endpoint` can exclude unhealthy backends from routing.
+        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
+        with patch.object(router, "config", cfg), aioresponses() as m:
+            m.get(f"{MOCK_OLLAMA_EP}/api/ps", status=502, payload={})
+            await router.fetch.loaded_models(MOCK_OLLAMA_EP)
+        assert MOCK_OLLAMA_EP in router._loaded_error_cache
+
+    async def test_records_error_for_llama_server_on_failure(self):
+        cfg = _make_cfg(ollama_eps=[], llama_eps=[MOCK_LLAMA_EP])
+        with patch.object(router, "config", cfg), aioresponses() as m:
+            m.get(f"{MOCK_LLAMA_EP}/models", status=502, payload={})
+            await router.fetch.loaded_models(MOCK_LLAMA_EP)
+        assert MOCK_LLAMA_EP in router._loaded_error_cache
+
+    async def test_clears_error_cache_on_subsequent_success(self):
+        cfg = _make_cfg(ollama_eps=[MOCK_OLLAMA_EP], llama_eps=[])
+        # Pre-seed an old error so loaded_models() falls through to the
+        # network probe instead of short-circuiting on the error cache.
+        async with router._loaded_error_cache_lock:
+            router._loaded_error_cache[MOCK_OLLAMA_EP] = time.time() - 301
+        with patch.object(router, "config", cfg), aioresponses() as m:
+            m.get(
+                f"{MOCK_OLLAMA_EP}/api/ps",
+                payload={"models": [{"name": "qwen:7b"}]},
+            )
+            await router.fetch.loaded_models(MOCK_OLLAMA_EP)
+        assert MOCK_OLLAMA_EP not in router._loaded_error_cache