feat(middleware): scope model fallback to provider/network errors only

2026-05-07 14:52:39 +02:00 · 2026-05-05 18:04:47 +02:00 · 2026-05-05 18:04:47 +02:00 · 1745d7dccf
commit 1745d7dccf
parent f695298d30
6 changed files with 275 additions and 17 deletions
--- a/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py
+++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py
@ -31,12 +31,12 @@ from app.agents.multi_agent_chat.subagents.shared.subagent_builder import (
 )


-class _AlwaysFailingChatModel(BaseChatModel):
-    """Mimics a provider hard-failing on every call (rate limit / empty stream).
+class RateLimitError(Exception):
+    """Provider-style 429; matches the scoped-fallback eligibility allowlist by name."""

-    ``ModelFallbackMiddleware`` triggers on any ``Exception``, so the exact
-    error type doesn't matter for the contract under test.
-    """
+
+class _AlwaysFailingChatModel(BaseChatModel):
+    """Mimics a provider hard-failing on every call (rate limit / empty stream)."""

    @property
    def _llm_type(self) -> str:
@ -50,7 +50,7 @@ class _AlwaysFailingChatModel(BaseChatModel):
        **kwargs: Any,
    ) -> ChatResult:
        msg = "primary llm exploded"
-        raise RuntimeError(msg)
+        raise RateLimitError(msg)

    async def _agenerate(
        self,
@ -60,17 +60,17 @@ class _AlwaysFailingChatModel(BaseChatModel):
        **kwargs: Any,
    ) -> ChatResult:
        msg = "primary llm exploded"
-        raise RuntimeError(msg)
+        raise RateLimitError(msg)

    def _stream(self, *args: Any, **kwargs: Any) -> Iterator[ChatGeneration]:
        msg = "primary llm exploded"
-        raise RuntimeError(msg)
+        raise RateLimitError(msg)

    async def _astream(
        self, *args: Any, **kwargs: Any
    ) -> AsyncIterator[ChatGeneration]:
        msg = "primary llm exploded"
-        raise RuntimeError(msg)
+        raise RateLimitError(msg)
        yield  # pragma: no cover - unreachable, satisfies async generator typing