feat: cache backend clients per endpoint instead of building one (with a fresh SSL context) per request

2026-06-07 09:55:54 +02:00 · 2026-06-07 09:55:54 +02:00 · 3cd530586c
commit 3cd530586c
parent 1ce792c48b
5 changed files with 87 additions and 15 deletions
--- a/test/test_stream_errors.py
+++ b/test/test_stream_errors.py
@ -80,8 +80,10 @@ def _patches(exc, mark_unhealthy):
    stack.enter_context(patch("api.ollama.is_openai_compatible", lambda ep: False))
    stack.enter_context(patch("api.ollama.decrement_usage", AsyncMock()))
    stack.enter_context(patch("api.ollama._mark_backend_unhealthy", mark_unhealthy))
+    # The native path now fetches a cached client via get_ollama_client() rather
+    # than constructing ollama.AsyncClient inline, so patch that seam.
    stack.enter_context(
-        patch("api.ollama.ollama.AsyncClient", lambda *a, **k: _FakeAsyncClient(exc))
+        patch("api.ollama.get_ollama_client", lambda *a, **k: _FakeAsyncClient(exc))
    )
    return stack