diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/__init__.py new file mode 100644 index 000000000..92596b771 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/__init__.py @@ -0,0 +1,7 @@ +"""Resilience middleware shared as the same instances across parent / general-purpose / registry.""" + +from __future__ import annotations + +from .bundle import ResilienceBundle, build_resilience_bundle + +__all__ = ["ResilienceBundle", "build_resilience_bundle"] diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/bundle.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/bundle.py new file mode 100644 index 000000000..45f76a6f3 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/bundle.py @@ -0,0 +1,51 @@ +"""Construct each resilience middleware once; same instances flow into every consumer.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from langchain.agents.middleware import ( + ModelCallLimitMiddleware, + ToolCallLimitMiddleware, +) + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import RetryAfterMiddleware +from app.agents.new_chat.middleware.scoped_model_fallback import ( + ScopedModelFallbackMiddleware, +) + +from .fallback import build_fallback_mw +from .model_call_limit import build_model_call_limit_mw +from .retry import build_retry_mw +from .tool_call_limit import build_tool_call_limit_mw + + +@dataclass(frozen=True) +class ResilienceBundle: + retry: RetryAfterMiddleware | None + fallback: ScopedModelFallbackMiddleware | None + model_call_limit: ModelCallLimitMiddleware | None + tool_call_limit: ToolCallLimitMiddleware | None + + def as_list(self) -> list[Any]: + return [ + m + for m in ( + self.retry, + self.fallback, + self.model_call_limit, + self.tool_call_limit, + ) + if m is not None + ] + + +def build_resilience_bundle(flags: AgentFeatureFlags) -> ResilienceBundle: + return ResilienceBundle( + retry=build_retry_mw(flags), + fallback=build_fallback_mw(flags), + model_call_limit=build_model_call_limit_mw(flags), + tool_call_limit=build_tool_call_limit_mw(flags), + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/fallback.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/fallback.py new file mode 100644 index 000000000..ea68a764e --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/fallback.py @@ -0,0 +1,27 @@ +"""Switch to a fallback model on provider/network errors only.""" + +from __future__ import annotations + +import logging + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware.scoped_model_fallback import ( + ScopedModelFallbackMiddleware, +) + +from ..flags import enabled + + +def build_fallback_mw( + flags: AgentFeatureFlags, +) -> ScopedModelFallbackMiddleware | None: + if not enabled(flags, "enable_model_fallback"): + return None + try: + return ScopedModelFallbackMiddleware( + "openai:gpt-4o-mini", + "anthropic:claude-3-5-haiku-20241022", + ) + except Exception: + logging.warning("ScopedModelFallbackMiddleware init failed; skipping.") + return None diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/model_call_limit.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/model_call_limit.py new file mode 100644 index 000000000..85707a385 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/model_call_limit.py @@ -0,0 +1,21 @@ +"""Cap model calls per thread / per run to prevent runaway cost.""" + +from __future__ import annotations + +from langchain.agents.middleware import ModelCallLimitMiddleware + +from app.agents.new_chat.feature_flags import AgentFeatureFlags + +from ..flags import enabled + + +def build_model_call_limit_mw( + flags: AgentFeatureFlags, +) -> ModelCallLimitMiddleware | None: + if not enabled(flags, "enable_model_call_limit"): + return None + return ModelCallLimitMiddleware( + thread_limit=120, + run_limit=80, + exit_behavior="end", + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/retry.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/retry.py new file mode 100644 index 000000000..c98fc4083 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/retry.py @@ -0,0 +1,16 @@ +"""Retry on transient model errors (e.g. Retry-After-bearing 429s).""" + +from __future__ import annotations + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import RetryAfterMiddleware + +from ..flags import enabled + + +def build_retry_mw(flags: AgentFeatureFlags) -> RetryAfterMiddleware | None: + return ( + RetryAfterMiddleware(max_retries=3) + if enabled(flags, "enable_retry_after") + else None + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/tool_call_limit.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/tool_call_limit.py new file mode 100644 index 000000000..dcde81f37 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/tool_call_limit.py @@ -0,0 +1,21 @@ +"""Cap tool calls per thread / per run to bound infinite-loop blast radius.""" + +from __future__ import annotations + +from langchain.agents.middleware import ToolCallLimitMiddleware + +from app.agents.new_chat.feature_flags import AgentFeatureFlags + +from ..flags import enabled + + +def build_tool_call_limit_mw( + flags: AgentFeatureFlags, +) -> ToolCallLimitMiddleware | None: + if not enabled(flags, "enable_tool_call_limit"): + return None + return ToolCallLimitMiddleware( + thread_limit=300, + run_limit=80, + exit_behavior="continue", + )