From 6c4ede5f9a9580618e24a85200809c34006043ce Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 02:00:34 +0200 Subject: [PATCH 01/29] fix(test): pin enable_kb_planner_runnable=false for KB-search planner tests --- .../tests/unit/middleware/test_knowledge_search.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py b/surfsense_backend/tests/unit/middleware/test_knowledge_search.py index 2933a0504..3529a946b 100644 --- a/surfsense_backend/tests/unit/middleware/test_knowledge_search.py +++ b/surfsense_backend/tests/unit/middleware/test_knowledge_search.py @@ -202,6 +202,15 @@ class FakeBudgetLLM: class TestKnowledgeBaseSearchMiddlewarePlanner: + @pytest.fixture(autouse=True) + def _disable_planner_runnable(self, monkeypatch): + # ``FakeLLM`` is a duck-typed mock; ``create_agent`` (used when the + # planner Runnable path is enabled) calls ``.bind()`` on the LLM, + # which the mock does not implement. Pin the flag off so the + # planner falls through to the legacy ``self.llm.ainvoke`` path + # these tests assert against (``llm.calls[0]["config"]``). + monkeypatch.setenv("SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", "false") + def test_render_recent_conversation_prefers_latest_messages_under_budget(self): messages = [ HumanMessage(content="old user context " * 40), From 102f77ab7f22e69d83b66a7201b4218b495199de Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 17:04:19 +0200 Subject: [PATCH 02/29] test(hitl): regression net for surfsense_resume_value pop-on-read --- .../unit/agents/multi_agent_chat/__init__.py | 0 .../multi_agent_chat/middleware/__init__.py | 0 .../__init__.py | 0 .../test_resume_helpers.py | 84 +++++++++++++++++++ 4 files changed, 84 insertions(+) create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/__init__.py create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/__init__.py create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/__init__.py create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/__init__.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/__init__.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/__init__.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py new file mode 100644 index 000000000..2060051a2 --- /dev/null +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py @@ -0,0 +1,84 @@ +"""Pure-function tests for the HITL resume side-channel helpers. + +Tests the invariant that backs the bridge: a queued resume value must be +read exactly once per turn. A second read returns ``None`` so the +parent ``task`` tool falls through to its fail-loud guard rather than +replaying the same resume payload (which would re-fire the interrupt). +""" + +from __future__ import annotations + +from langchain.tools import ToolRuntime + +from app.agents.multi_agent_chat.main_agent.graph.middleware.checkpointed_subagent_middleware.config import ( + consume_surfsense_resume, + has_surfsense_resume, +) + + +def _runtime_with_config(config: dict) -> ToolRuntime: + """Real ToolRuntime; only ``.config`` is exercised by the helpers.""" + return ToolRuntime( + state=None, + context=None, + config=config, + stream_writer=None, + tool_call_id="tcid-test", + store=None, + ) + + +class TestConsumeSurfsenseResume: + def test_pops_value_on_first_call(self): + runtime = _runtime_with_config( + {"configurable": {"surfsense_resume_value": {"decisions": ["approve"]}}} + ) + + assert consume_surfsense_resume(runtime) == {"decisions": ["approve"]} + + def test_second_call_returns_none(self): + # Regression guard: a second read must not replay the queued + # resume. If it did, the subagent would re-invoke with the + # same Command and the user-facing interrupt would fire twice. + configurable: dict = {"surfsense_resume_value": {"decisions": ["approve"]}} + runtime = _runtime_with_config({"configurable": configurable}) + + consume_surfsense_resume(runtime) + + assert consume_surfsense_resume(runtime) is None + assert "surfsense_resume_value" not in configurable + + def test_returns_none_when_no_payload_queued(self): + runtime = _runtime_with_config({"configurable": {}}) + + assert consume_surfsense_resume(runtime) is None + + def test_returns_none_when_configurable_missing(self): + runtime = _runtime_with_config({}) + + assert consume_surfsense_resume(runtime) is None + + +class TestHasSurfsenseResume: + def test_true_when_payload_queued(self): + runtime = _runtime_with_config( + {"configurable": {"surfsense_resume_value": "approve"}} + ) + + assert has_surfsense_resume(runtime) is True + + def test_does_not_consume_payload(self): + # The fail-loud guard in ``task_tool`` calls ``has_surfsense_resume`` + # *before* deciding to consume; the check itself must leave the + # payload queued for the matching ``consume_surfsense_resume`` call. + configurable = {"surfsense_resume_value": "approve"} + runtime = _runtime_with_config({"configurable": configurable}) + + has_surfsense_resume(runtime) + + assert configurable == {"surfsense_resume_value": "approve"} + + def test_false_when_payload_absent(self): + runtime = _runtime_with_config({"configurable": {}}) + + assert has_surfsense_resume(runtime) is False From 681895d28df0aa100159a981b38188e7dcdb4dd3 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 17:13:13 +0200 Subject: [PATCH 03/29] test(hitl): regression net for end-to-end resume bridge dispatch --- .../test_hitl_bridge.py | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py new file mode 100644 index 000000000..8e9f0a96c --- /dev/null +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py @@ -0,0 +1,132 @@ +"""End-to-end resume-bridge tests against a real LangGraph subagent. + +Builds a minimal Pregel subagent that calls ``interrupt(...)`` and drives the +``task`` tool directly with a hand-crafted ``ToolRuntime``. Exercises the only +runtime contract we own: parent stashes a decision in +``config["configurable"]["surfsense_resume_value"]`` -> bridge forwards it as +``Command(resume={interrupt_id: value})`` -> subagent completes -> return value +reflects the decision. + +We pause the subagent **outside** the parent task tool (calling +``subagent.ainvoke`` directly) to skip the ``_lg_interrupt`` re-raise path, +which requires a parent runnable context. The bridge logic under test is the +*resume* dispatch, not the propagation; propagation is exercised separately in +its own module's tests. +""" + +from __future__ import annotations + +import pytest +from langchain.tools import ToolRuntime +from langchain_core.messages import HumanMessage +from langgraph.checkpoint.memory import InMemorySaver +from langgraph.graph import END, START, StateGraph +from langgraph.types import Command, interrupt +from typing_extensions import TypedDict + +from app.agents.multi_agent_chat.main_agent.graph.middleware.checkpointed_subagent_middleware.task_tool import ( + build_task_tool_with_parent_config, +) + + +class _SubagentState(TypedDict, total=False): + messages: list + decision_text: str + + +def _build_single_interrupt_subagent(): + """Subagent that interrupts once, then echoes the resume decision into state.""" + + def approve_node(state): + from langchain_core.messages import AIMessage + + decision = interrupt( + { + "action_requests": [ + { + "name": "do_thing", + "args": {"x": 1}, + "description": "test action", + } + ], + "review_configs": [{}], + } + ) + # Capture the resume payload verbatim so the test can assert the + # bridge forwarded it intact (no reshape, no scalar broadcast). + return { + "messages": [AIMessage(content="done")], + "decision_text": repr(decision), + } + + graph = StateGraph(_SubagentState) + graph.add_node("approve", approve_node) + graph.add_edge(START, "approve") + graph.add_edge("approve", END) + return graph.compile(checkpointer=InMemorySaver()) + + +def _make_runtime(config: dict) -> ToolRuntime: + return ToolRuntime( + state={"messages": [HumanMessage(content="seed")]}, + context=None, + config=config, + stream_writer=None, + tool_call_id="parent-tcid-1", + store=None, + ) + + +@pytest.mark.asyncio +async def test_resume_bridge_dispatches_decision_into_pending_subagent(): + """Side-channel decision -> targeted Command(resume) -> subagent completes.""" + subagent = _build_single_interrupt_subagent() + task_tool = build_task_tool_with_parent_config( + [ + { + "name": "approver", + "description": "approves things", + "runnable": subagent, + } + ] + ) + + # 1. Pause the subagent directly so we can test only the resume path. + parent_config: dict = { + "configurable": {"thread_id": "shared-thread"}, + "recursion_limit": 100, + } + await subagent.ainvoke({"messages": [HumanMessage(content="seed")]}, parent_config) + snap = await subagent.aget_state(parent_config) + assert snap.tasks and snap.tasks[0].interrupts, ( + "fixture broken: subagent should be paused on its interrupt" + ) + + # 2. Stash the user's decision on the side-channel — this is what + # ``stream_resume_chat`` does in production. + parent_config["configurable"]["surfsense_resume_value"] = { + "decisions": ["APPROVED"] + } + runtime = _make_runtime(parent_config) + + # 3. Drive the bridge. Subagent has no remaining interrupt after resume, + # so propagation will not call ``_lg_interrupt`` (no parent ctx needed). + result = await task_tool.coroutine( + description="please approve", + subagent_type="approver", + runtime=runtime, + ) + + assert isinstance(result, Command) + update = result.update + # Bridge forwards the side-channel payload **verbatim** to the + # subagent's ``interrupt()``. A scalar broadcast or accidental + # unwrap would change this shape and we want to catch that. + assert update["decision_text"] == repr({"decisions": ["APPROVED"]}) + + # 4. Side-channel was consumed; a stale replay would re-prompt the user. + assert "surfsense_resume_value" not in parent_config["configurable"] + + # 5. Subagent moved past the interrupt (no pending tasks remain). + final = await subagent.aget_state(parent_config) + assert not final.tasks or all(not t.interrupts for t in final.tasks) From 0c556356032d536dc57d83057595dfd669b90bb3 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 17:15:28 +0200 Subject: [PATCH 04/29] test(hitl): regression net for fail-loud guard on missing resume value --- .../test_hitl_bridge.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py index 8e9f0a96c..2ba95dfef 100644 --- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py @@ -130,3 +130,41 @@ async def test_resume_bridge_dispatches_decision_into_pending_subagent(): # 5. Subagent moved past the interrupt (no pending tasks remain). final = await subagent.aget_state(parent_config) assert not final.tasks or all(not t.interrupts for t in final.tasks) + + +@pytest.mark.asyncio +async def test_pending_interrupt_without_resume_value_raises_runtime_error(): + """Bridge must fail loud if a paused subagent has no decision queued. + + The fail-open alternative (silently re-invoking) would re-fire the + same interrupt to the user. The error surfaces a real broken bridge + instead of confusing duplicate approval cards. + """ + subagent = _build_single_interrupt_subagent() + task_tool = build_task_tool_with_parent_config( + [ + { + "name": "approver", + "description": "approves things", + "runnable": subagent, + } + ] + ) + + parent_config: dict = { + "configurable": {"thread_id": "guard-thread"}, + "recursion_limit": 100, + } + await subagent.ainvoke({"messages": [HumanMessage(content="seed")]}, parent_config) + snap = await subagent.aget_state(parent_config) + assert snap.tasks and snap.tasks[0].interrupts, "fixture broken" + + # No surfsense_resume_value injected — bridge must refuse to proceed. + runtime = _make_runtime(parent_config) + + with pytest.raises(RuntimeError, match="resume bridge is broken"): + await task_tool.coroutine( + description="please approve", + subagent_type="approver", + runtime=runtime, + ) From fa6f3015a996640fb74fd7b483171d1f747c4302 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 17:19:39 +0200 Subject: [PATCH 05/29] test(hitl): regression net for bundle decision shape preservation --- .../test_hitl_bridge.py | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py index 2ba95dfef..188224074 100644 --- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py @@ -16,6 +16,8 @@ its own module's tests. from __future__ import annotations +import ast + import pytest from langchain.tools import ToolRuntime from langchain_core.messages import HumanMessage @@ -168,3 +170,84 @@ async def test_pending_interrupt_without_resume_value_raises_runtime_error(): subagent_type="approver", runtime=runtime, ) + + +def _build_bundle_subagent(): + """Subagent that raises a 3-action HITL bundle on its only node.""" + + def bundle_node(state): + from langchain_core.messages import AIMessage + + decision = interrupt( + { + "action_requests": [ + {"name": "create_a", "args": {}, "description": ""}, + {"name": "create_b", "args": {}, "description": ""}, + {"name": "create_c", "args": {}, "description": ""}, + ], + "review_configs": [{}, {}, {}], + } + ) + return { + "messages": [AIMessage(content="bundle-done")], + "decision_text": repr(decision), + } + + graph = StateGraph(_SubagentState) + graph.add_node("bundle", bundle_node) + graph.add_edge(START, "bundle") + graph.add_edge("bundle", END) + return graph.compile(checkpointer=InMemorySaver()) + + +@pytest.mark.asyncio +async def test_bundle_three_mixed_decisions_arrive_in_order(): + """Approve / edit / reject for a 3-action bundle land at ordinals 0/1/2. + + Catches reshape regressions: truncation, decision collapse, order + scrambling, and the legacy single-decision broadcast that would + fan-out one verdict to every action. + """ + subagent = _build_bundle_subagent() + task_tool = build_task_tool_with_parent_config( + [ + { + "name": "bundler", + "description": "creates a bundle", + "runnable": subagent, + } + ] + ) + + parent_config: dict = { + "configurable": {"thread_id": "bundle-thread"}, + "recursion_limit": 100, + } + await subagent.ainvoke({"messages": [HumanMessage(content="seed")]}, parent_config) + + decisions_payload = { + "decisions": [ + {"type": "approve", "args": {}}, + {"type": "edit", "args": {"args": {"name": "edited-b"}}}, + {"type": "reject", "args": {"message": "no thanks"}}, + ] + } + parent_config["configurable"]["surfsense_resume_value"] = decisions_payload + runtime = _make_runtime(parent_config) + + result = await task_tool.coroutine( + description="run bundle", + subagent_type="bundler", + runtime=runtime, + ) + + assert isinstance(result, Command) + decision_text = result.update["decision_text"] + received = ast.literal_eval(decision_text) + assert received == decisions_payload, "bundle decisions must arrive verbatim" + # Cross-checks for the regressions this test exists to catch. + assert len(received["decisions"]) == 3 + assert received["decisions"][0]["type"] == "approve" + assert received["decisions"][1]["type"] == "edit" + assert received["decisions"][1]["args"] == {"args": {"name": "edited-b"}} + assert received["decisions"][2]["type"] == "reject" From f695298d30f6e6d50bb701e3e613fb207c650d30 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 17:39:27 +0200 Subject: [PATCH 06/29] feat(multi-agent): wire model fallback and retry into subagent middleware --- .../graph/middleware/deepagent_stack.py | 39 ++++--- .../multi_agent_chat/subagents/__init__.py | 0 .../subagents/shared/__init__.py | 0 .../subagents/shared/test_subagent_builder.py | 105 ++++++++++++++++++ 4 files changed, 129 insertions(+), 15 deletions(-) create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/__init__.py create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/__init__.py create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py index 74e47cfab..8b7e3d0b0 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py @@ -208,6 +208,26 @@ def build_main_agent_deepagent_middleware( ) gp_middleware.insert(_patch_idx, subagent_deny_permission_mw) + # Defined here (instead of further down with the other ``wrap_model_call`` + # middlewares) so subagents share the same instances as the parent — + # otherwise a connector subagent would die on the first provider hiccup + # while the parent stays resilient. + retry_mw = ( + RetryAfterMiddleware(max_retries=3) + if flags.enable_retry_after and not flags.disable_new_agent_stack + else None + ) + fallback_mw: ModelFallbackMiddleware | None = None + if flags.enable_model_fallback and not flags.disable_new_agent_stack: + try: + fallback_mw = ModelFallbackMiddleware( + "openai:gpt-4o-mini", + "anthropic:claude-3-5-haiku-20241022", + ) + except Exception: + logging.warning("ModelFallbackMiddleware init failed; skipping.") + fallback_mw = None + registry_subagents: list[SubAgent] = [] try: subagent_extra_middleware: list[Any] = [ @@ -222,6 +242,10 @@ def build_main_agent_deepagent_middleware( ] if subagent_deny_permission_mw is not None: subagent_extra_middleware.append(subagent_deny_permission_mw) + if retry_mw is not None: + subagent_extra_middleware.append(retry_mw) + if fallback_mw is not None: + subagent_extra_middleware.append(fallback_mw) registry_subagents = build_subagents( dependencies=subagent_dependencies, model=llm, @@ -268,21 +292,6 @@ def build_main_agent_deepagent_middleware( backend_resolver=backend_resolver, ) - retry_mw = ( - RetryAfterMiddleware(max_retries=3) - if flags.enable_retry_after and not flags.disable_new_agent_stack - else None - ) - fallback_mw: ModelFallbackMiddleware | None = None - if flags.enable_model_fallback and not flags.disable_new_agent_stack: - try: - fallback_mw = ModelFallbackMiddleware( - "openai:gpt-4o-mini", - "anthropic:claude-3-5-haiku-20241022", - ) - except Exception: - logging.warning("ModelFallbackMiddleware init failed; skipping.") - fallback_mw = None model_call_limit_mw = ( ModelCallLimitMiddleware( thread_limit=120, diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/__init__.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/__init__.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py new file mode 100644 index 000000000..82f66891a --- /dev/null +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py @@ -0,0 +1,105 @@ +"""Resilience contract for subagents built via ``pack_subagent``. + +Subagents (jira, linear, notion, ...) run on the same LLM as the parent. When +the provider rate-limits or returns an empty stream, a single hiccup must not +abort the user's HITL flow — the connector subagent has to keep moving. This +relies on ``ModelFallbackMiddleware`` being usable as a subagent +``extra_middleware`` so the production builder can wire it in. +""" + +from __future__ import annotations + +from collections.abc import AsyncIterator, Iterator +from typing import Any + +import pytest +from langchain.agents import create_agent +from langchain.agents.middleware import ModelFallbackMiddleware +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.language_models.fake_chat_models import ( + FakeMessagesListChatModel, +) +from langchain_core.messages import AIMessage, BaseMessage, HumanMessage +from langchain_core.outputs import ChatGeneration, ChatResult + +from app.agents.multi_agent_chat.subagents.shared.subagent_builder import ( + pack_subagent, +) + + +class _AlwaysFailingChatModel(BaseChatModel): + """Mimics a provider hard-failing on every call (rate limit / empty stream). + + ``ModelFallbackMiddleware`` triggers on any ``Exception``, so the exact + error type doesn't matter for the contract under test. + """ + + @property + def _llm_type(self) -> str: + return "always-failing-test-model" + + def _generate( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: CallbackManagerForLLMRun | None = None, + **kwargs: Any, + ) -> ChatResult: + msg = "primary llm exploded" + raise RuntimeError(msg) + + async def _agenerate( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: AsyncCallbackManagerForLLMRun | None = None, + **kwargs: Any, + ) -> ChatResult: + msg = "primary llm exploded" + raise RuntimeError(msg) + + def _stream(self, *args: Any, **kwargs: Any) -> Iterator[ChatGeneration]: + msg = "primary llm exploded" + raise RuntimeError(msg) + + async def _astream( + self, *args: Any, **kwargs: Any + ) -> AsyncIterator[ChatGeneration]: + msg = "primary llm exploded" + raise RuntimeError(msg) + yield # pragma: no cover - unreachable, satisfies async generator typing + + +@pytest.mark.asyncio +async def test_subagent_recovers_when_primary_llm_fails(): + """Primary blows up → fallback in extra_middleware finishes the turn.""" + primary = _AlwaysFailingChatModel() + fallback = FakeMessagesListChatModel( + responses=[AIMessage(content="recovered via fallback")] + ) + + spec = pack_subagent( + name="resilience_test", + description="test subagent", + system_prompt="be helpful", + tools=[], + model=primary, + extra_middleware=[ModelFallbackMiddleware(fallback)], + ) + + agent = create_agent( + model=spec["model"], + tools=spec["tools"], + middleware=spec["middleware"], + system_prompt=spec["system_prompt"], + ) + + result = await agent.ainvoke({"messages": [HumanMessage(content="hi")]}) + + final = result["messages"][-1] + assert isinstance(final, AIMessage) + assert final.content == "recovered via fallback" From 1745d7dccf250fe489c1f9ff422491ca11661d4d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 18:04:47 +0200 Subject: [PATCH 07/29] feat(middleware): scope model fallback to provider/network errors only --- .../graph/middleware/deepagent_stack.py | 10 +- .../app/agents/new_chat/chat_deepagent.py | 10 +- .../middleware/scoped_model_fallback.py | 106 +++++++++++++ .../subagents/shared/test_subagent_builder.py | 18 +-- .../agents/new_chat/middleware/__init__.py | 0 .../middleware/test_scoped_model_fallback.py | 148 ++++++++++++++++++ 6 files changed, 275 insertions(+), 17 deletions(-) create mode 100644 surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py create mode 100644 surfsense_backend/tests/unit/agents/new_chat/middleware/__init__.py create mode 100644 surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py index 8b7e3d0b0..e490b6b47 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py @@ -14,7 +14,6 @@ from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT from langchain.agents.middleware import ( LLMToolSelectorMiddleware, ModelCallLimitMiddleware, - ModelFallbackMiddleware, TodoListMiddleware, ToolCallLimitMiddleware, ) @@ -56,6 +55,9 @@ from app.agents.new_chat.middleware import ( create_surfsense_compaction_middleware, default_skills_sources, ) +from app.agents.new_chat.middleware.scoped_model_fallback import ( + ScopedModelFallbackMiddleware, +) from app.agents.new_chat.permissions import Rule, Ruleset from app.agents.new_chat.plugin_loader import ( PluginContext, @@ -217,15 +219,15 @@ def build_main_agent_deepagent_middleware( if flags.enable_retry_after and not flags.disable_new_agent_stack else None ) - fallback_mw: ModelFallbackMiddleware | None = None + fallback_mw: ScopedModelFallbackMiddleware | None = None if flags.enable_model_fallback and not flags.disable_new_agent_stack: try: - fallback_mw = ModelFallbackMiddleware( + fallback_mw = ScopedModelFallbackMiddleware( "openai:gpt-4o-mini", "anthropic:claude-3-5-haiku-20241022", ) except Exception: - logging.warning("ModelFallbackMiddleware init failed; skipping.") + logging.warning("ScopedModelFallbackMiddleware init failed; skipping.") fallback_mw = None registry_subagents: list[SubAgent] = [] diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index 1f4024d9d..605c31416 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -31,7 +31,6 @@ from langchain.agents import create_agent from langchain.agents.middleware import ( LLMToolSelectorMiddleware, ModelCallLimitMiddleware, - ModelFallbackMiddleware, TodoListMiddleware, ToolCallLimitMiddleware, ) @@ -77,6 +76,9 @@ from app.agents.new_chat.middleware import ( create_surfsense_compaction_middleware, default_skills_sources, ) +from app.agents.new_chat.middleware.scoped_model_fallback import ( + ScopedModelFallbackMiddleware, +) from app.agents.new_chat.permissions import Rule, Ruleset from app.agents.new_chat.plugin_loader import ( PluginContext, @@ -792,15 +794,15 @@ def _build_compiled_agent_blocking( # Fallback chain — primary is the agent's own model; we add cheap # alternatives. Off by default; only the first call site that # configures the chain via env should enable it. - fallback_mw: ModelFallbackMiddleware | None = None + fallback_mw: ScopedModelFallbackMiddleware | None = None if flags.enable_model_fallback and not flags.disable_new_agent_stack: try: - fallback_mw = ModelFallbackMiddleware( + fallback_mw = ScopedModelFallbackMiddleware( "openai:gpt-4o-mini", "anthropic:claude-3-5-haiku-20241022", ) except Exception: - logging.warning("ModelFallbackMiddleware init failed; skipping.") + logging.warning("ScopedModelFallbackMiddleware init failed; skipping.") fallback_mw = None model_call_limit_mw = ( ModelCallLimitMiddleware( diff --git a/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py b/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py new file mode 100644 index 000000000..de367fda9 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py @@ -0,0 +1,106 @@ +"""Fallback only on provider/network errors; let programming bugs raise. + +Upstream :class:`langchain.agents.middleware.ModelFallbackMiddleware` catches +every ``Exception``. With a non-provider bug (``KeyError``, ``TypeError``, +``AttributeError`` from middleware/state), every fallback model in the chain +hits the same bug — burning latency and tokens before the real cause finally +surfaces. Scoping the catch to provider-style exception types lets bugs fail +fast with clean tracebacks. + +Class-name matching (instead of ``isinstance`` against imported provider +types) keeps the dependency surface flat: openai, anthropic, google, +mistral, etc. all ship their own ``RateLimitError`` and we don't want to +import them all. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from langchain.agents.middleware import ModelFallbackMiddleware + +if TYPE_CHECKING: + from collections.abc import Awaitable, Callable + + from langchain.agents.middleware.types import ModelRequest, ModelResponse + from langchain_core.messages import AIMessage + + +_FALLBACK_ELIGIBLE_NAMES: frozenset[str] = frozenset( + { + # Rate / quota + "RateLimitError", + # Server-side + "APIStatusError", + "InternalServerError", + "ServiceUnavailableError", + "BadGatewayError", + "GatewayTimeoutError", + # Network + "APIConnectionError", + "APITimeoutError", + "ConnectError", + "ConnectTimeout", + "ReadTimeout", + "RemoteProtocolError", + "TimeoutError", + "TimeoutException", + } +) + + +def _is_fallback_eligible(exc: BaseException) -> bool: + """Eligible if the exception or any base in its MRO matches by class name.""" + return any(cls.__name__ in _FALLBACK_ELIGIBLE_NAMES for cls in type(exc).__mro__) + + +class ScopedModelFallbackMiddleware(ModelFallbackMiddleware): + """``ModelFallbackMiddleware`` that re-raises non-provider exceptions.""" + + def wrap_model_call( # type: ignore[override] + self, + request: ModelRequest[Any], + handler: Callable[[ModelRequest[Any]], ModelResponse[Any]], + ) -> ModelResponse[Any] | AIMessage: + last_exception: Exception + try: + return handler(request) + except Exception as e: + if not _is_fallback_eligible(e): + raise + last_exception = e + + for fallback_model in self.models: + try: + return handler(request.override(model=fallback_model)) + except Exception as e: + if not _is_fallback_eligible(e): + raise + last_exception = e + continue + + raise last_exception + + async def awrap_model_call( # type: ignore[override] + self, + request: ModelRequest[Any], + handler: Callable[[ModelRequest[Any]], Awaitable[ModelResponse[Any]]], + ) -> ModelResponse[Any] | AIMessage: + last_exception: Exception + try: + return await handler(request) + except Exception as e: + if not _is_fallback_eligible(e): + raise + last_exception = e + + for fallback_model in self.models: + try: + return await handler(request.override(model=fallback_model)) + except Exception as e: + if not _is_fallback_eligible(e): + raise + last_exception = e + continue + + raise last_exception diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py index 82f66891a..859833f1c 100644 --- a/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py @@ -31,12 +31,12 @@ from app.agents.multi_agent_chat.subagents.shared.subagent_builder import ( ) -class _AlwaysFailingChatModel(BaseChatModel): - """Mimics a provider hard-failing on every call (rate limit / empty stream). +class RateLimitError(Exception): + """Provider-style 429; matches the scoped-fallback eligibility allowlist by name.""" - ``ModelFallbackMiddleware`` triggers on any ``Exception``, so the exact - error type doesn't matter for the contract under test. - """ + +class _AlwaysFailingChatModel(BaseChatModel): + """Mimics a provider hard-failing on every call (rate limit / empty stream).""" @property def _llm_type(self) -> str: @@ -50,7 +50,7 @@ class _AlwaysFailingChatModel(BaseChatModel): **kwargs: Any, ) -> ChatResult: msg = "primary llm exploded" - raise RuntimeError(msg) + raise RateLimitError(msg) async def _agenerate( self, @@ -60,17 +60,17 @@ class _AlwaysFailingChatModel(BaseChatModel): **kwargs: Any, ) -> ChatResult: msg = "primary llm exploded" - raise RuntimeError(msg) + raise RateLimitError(msg) def _stream(self, *args: Any, **kwargs: Any) -> Iterator[ChatGeneration]: msg = "primary llm exploded" - raise RuntimeError(msg) + raise RateLimitError(msg) async def _astream( self, *args: Any, **kwargs: Any ) -> AsyncIterator[ChatGeneration]: msg = "primary llm exploded" - raise RuntimeError(msg) + raise RateLimitError(msg) yield # pragma: no cover - unreachable, satisfies async generator typing diff --git a/surfsense_backend/tests/unit/agents/new_chat/middleware/__init__.py b/surfsense_backend/tests/unit/agents/new_chat/middleware/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py b/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py new file mode 100644 index 000000000..af464d1dc --- /dev/null +++ b/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py @@ -0,0 +1,148 @@ +"""Exception-scope contract for ``ScopedModelFallbackMiddleware``. + +Upstream ``ModelFallbackMiddleware`` catches every ``Exception`` and walks +the fallback chain. That means a programming bug (``KeyError`` from a +botched tool config, ``TypeError`` from middleware, ...) burns 1+N model +round-trips and ~Nx tokens before its real cause surfaces. The scoped +variant only falls back on provider/network exception types so bugs fail +fast, with clean tracebacks. +""" + +from __future__ import annotations + +from collections.abc import AsyncIterator, Iterator +from typing import Any + +import pytest +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import AIMessage, BaseMessage +from langchain_core.outputs import ChatGeneration, ChatResult + + +class _RaisingChatModel(BaseChatModel): + """LLM that raises a configurable exception on every invocation.""" + + exc_to_raise: Any + + @property + def _llm_type(self) -> str: + return "raising-test-model" + + def _generate( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: CallbackManagerForLLMRun | None = None, + **kwargs: Any, + ) -> ChatResult: + raise self.exc_to_raise + + async def _agenerate( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: AsyncCallbackManagerForLLMRun | None = None, + **kwargs: Any, + ) -> ChatResult: + raise self.exc_to_raise + + def _stream(self, *args: Any, **kwargs: Any) -> Iterator[ChatGeneration]: + raise self.exc_to_raise + + async def _astream( + self, *args: Any, **kwargs: Any + ) -> AsyncIterator[ChatGeneration]: + raise self.exc_to_raise + yield # pragma: no cover - unreachable + + +class _RecordingChatModel(BaseChatModel): + """Returns a fixed message and counts how often it was called.""" + + response_text: str = "fallback-ok" + call_count: int = 0 + + @property + def _llm_type(self) -> str: + return "recording-test-model" + + def _generate( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: CallbackManagerForLLMRun | None = None, + **kwargs: Any, + ) -> ChatResult: + self.call_count += 1 + return ChatResult( + generations=[ + ChatGeneration(message=AIMessage(content=self.response_text)) + ] + ) + + async def _agenerate( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: AsyncCallbackManagerForLLMRun | None = None, + **kwargs: Any, + ) -> ChatResult: + return self._generate(messages, stop, None, **kwargs) + + +# Locally defined provider-style error: importing openai/anthropic/etc. +# would couple the test to provider SDKs the contract intentionally avoids. +class RateLimitError(Exception): + """Mimics ``openai.RateLimitError`` for name-based eligibility.""" + + +def _build_agent(primary: BaseChatModel, fallback: BaseChatModel): + """Compile a no-tools agent with the scoped fallback wired in.""" + from langchain.agents import create_agent + + from app.agents.new_chat.middleware.scoped_model_fallback import ( + ScopedModelFallbackMiddleware, + ) + + return create_agent( + model=primary, + tools=[], + middleware=[ScopedModelFallbackMiddleware(fallback)], + system_prompt="be helpful", + ) + + +@pytest.mark.asyncio +async def test_provider_errors_trigger_fallback(): + """Class names matching the provider allowlist drive the fallback chain.""" + primary = _RaisingChatModel(exc_to_raise=RateLimitError("429 from provider")) + fallback = _RecordingChatModel(response_text="recovered") + + agent = _build_agent(primary, fallback) + result = await agent.ainvoke({"messages": [("user", "hi")]}) + + final = result["messages"][-1] + assert isinstance(final, AIMessage) + assert final.content == "recovered" + assert fallback.call_count == 1 + + +@pytest.mark.asyncio +async def test_programming_errors_propagate_without_invoking_fallback(): + """``KeyError`` from agent-side bugs must surface immediately, no fallback retry.""" + primary = _RaisingChatModel(exc_to_raise=KeyError("missing_state_field")) + fallback = _RecordingChatModel(response_text="should-never-arrive") + + agent = _build_agent(primary, fallback) + + with pytest.raises(KeyError, match="missing_state_field"): + await agent.ainvoke({"messages": [("user", "hi")]}) + + assert fallback.call_count == 0, ( + "fallback was invoked for a programming error; " + "scoping rule is broken" + ) From b394dc71c717ad42d73fea4aa874ee0dace866aa Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 18:12:38 +0200 Subject: [PATCH 08/29] feat(multi-agent): extend model fallback to general-purpose subagent --- .../graph/middleware/deepagent_stack.py | 16 ++++++++++++++++ .../new_chat/middleware/scoped_model_fallback.py | 1 + 2 files changed, 17 insertions(+) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py index e490b6b47..af7fceffa 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py @@ -230,6 +230,22 @@ def build_main_agent_deepagent_middleware( logging.warning("ScopedModelFallbackMiddleware init failed; skipping.") fallback_mw = None + # Mirror the parent's ordering: retry / fallback wrap caching, which wraps + # the model. ``gp_middleware`` is held by reference inside + # ``general_purpose_spec`` so this insertion propagates into the spec. + _gp_resilience: list[Any] = [m for m in (retry_mw, fallback_mw) if m is not None] + if _gp_resilience: + _cache_idx = next( + ( + i + for i, m in enumerate(gp_middleware) + if isinstance(m, AnthropicPromptCachingMiddleware) + ), + len(gp_middleware), + ) + for offset, mw in enumerate(_gp_resilience): + gp_middleware.insert(_cache_idx + offset, mw) + registry_subagents: list[SubAgent] = [] try: subagent_extra_middleware: list[Any] = [ diff --git a/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py b/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py index de367fda9..bde8edeba 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py +++ b/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py @@ -45,6 +45,7 @@ _FALLBACK_ELIGIBLE_NAMES: frozenset[str] = frozenset( "RemoteProtocolError", "TimeoutError", "TimeoutException", + # Can be extended to other exceptions in the future } ) From 309c69553123483a2f1a23b6c6cb544f38699203 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 18:16:00 +0200 Subject: [PATCH 09/29] feat(multi-agent): cap subagent model and tool call counts --- .../graph/middleware/deepagent_stack.py | 59 +++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py index af7fceffa..b76b54c27 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py @@ -230,10 +230,34 @@ def build_main_agent_deepagent_middleware( logging.warning("ScopedModelFallbackMiddleware init failed; skipping.") fallback_mw = None - # Mirror the parent's ordering: retry / fallback wrap caching, which wraps - # the model. ``gp_middleware`` is held by reference inside + # Cost / loop ceiling shared with subagents. ``state_schema`` of these + # middlewares is per-agent; counts are not summed across parent + sub — + # the cap acts as a safety net per agent, not a global budget. + model_call_limit_mw = ( + ModelCallLimitMiddleware( + thread_limit=120, + run_limit=80, + exit_behavior="end", + ) + if flags.enable_model_call_limit and not flags.disable_new_agent_stack + else None + ) + tool_call_limit_mw = ( + ToolCallLimitMiddleware( + thread_limit=300, run_limit=80, exit_behavior="continue" + ) + if flags.enable_tool_call_limit and not flags.disable_new_agent_stack + else None + ) + + # Mirror the parent's ordering: retry / fallback / limits wrap caching, + # which wraps the model. ``gp_middleware`` is held by reference inside # ``general_purpose_spec`` so this insertion propagates into the spec. - _gp_resilience: list[Any] = [m for m in (retry_mw, fallback_mw) if m is not None] + _gp_resilience: list[Any] = [ + m + for m in (retry_mw, fallback_mw, model_call_limit_mw, tool_call_limit_mw) + if m is not None + ] if _gp_resilience: _cache_idx = next( ( @@ -260,10 +284,14 @@ def build_main_agent_deepagent_middleware( ] if subagent_deny_permission_mw is not None: subagent_extra_middleware.append(subagent_deny_permission_mw) - if retry_mw is not None: - subagent_extra_middleware.append(retry_mw) - if fallback_mw is not None: - subagent_extra_middleware.append(fallback_mw) + for _resilience_mw in ( + retry_mw, + fallback_mw, + model_call_limit_mw, + tool_call_limit_mw, + ): + if _resilience_mw is not None: + subagent_extra_middleware.append(_resilience_mw) registry_subagents = build_subagents( dependencies=subagent_dependencies, model=llm, @@ -310,23 +338,6 @@ def build_main_agent_deepagent_middleware( backend_resolver=backend_resolver, ) - model_call_limit_mw = ( - ModelCallLimitMiddleware( - thread_limit=120, - run_limit=80, - exit_behavior="end", - ) - if flags.enable_model_call_limit and not flags.disable_new_agent_stack - else None - ) - tool_call_limit_mw = ( - ToolCallLimitMiddleware( - thread_limit=300, run_limit=80, exit_behavior="continue" - ) - if flags.enable_tool_call_limit and not flags.disable_new_agent_stack - else None - ) - noop_mw = ( NoopInjectionMiddleware() if flags.enable_compaction_v2 and not flags.disable_new_agent_stack From 9a4ee5d16bedff12f76810c6b432d76c1a9f615d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 18:27:46 +0200 Subject: [PATCH 10/29] chore: trim narrative comments and docstrings --- .../graph/middleware/deepagent_stack.py | 15 ++--- .../middleware/scoped_model_fallback.py | 24 ++------ .../test_hitl_bridge.py | 57 ++----------------- .../test_resume_helpers.py | 15 +---- .../subagents/shared/test_subagent_builder.py | 14 +---- .../middleware/test_scoped_model_fallback.py | 28 ++------- 6 files changed, 24 insertions(+), 129 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py index b76b54c27..1d6a8763e 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py @@ -210,10 +210,8 @@ def build_main_agent_deepagent_middleware( ) gp_middleware.insert(_patch_idx, subagent_deny_permission_mw) - # Defined here (instead of further down with the other ``wrap_model_call`` - # middlewares) so subagents share the same instances as the parent — - # otherwise a connector subagent would die on the first provider hiccup - # while the parent stays resilient. + # Defined early so the same instances reach both gp_middleware and + # subagent_extra_middleware below. retry_mw = ( RetryAfterMiddleware(max_retries=3) if flags.enable_retry_after and not flags.disable_new_agent_stack @@ -230,9 +228,7 @@ def build_main_agent_deepagent_middleware( logging.warning("ScopedModelFallbackMiddleware init failed; skipping.") fallback_mw = None - # Cost / loop ceiling shared with subagents. ``state_schema`` of these - # middlewares is per-agent; counts are not summed across parent + sub — - # the cap acts as a safety net per agent, not a global budget. + # Per-agent caps; counts are not summed across parent + subagents. model_call_limit_mw = ( ModelCallLimitMiddleware( thread_limit=120, @@ -250,9 +246,8 @@ def build_main_agent_deepagent_middleware( else None ) - # Mirror the parent's ordering: retry / fallback / limits wrap caching, - # which wraps the model. ``gp_middleware`` is held by reference inside - # ``general_purpose_spec`` so this insertion propagates into the spec. + # gp_middleware is held by reference inside general_purpose_spec, so + # mutating it here propagates into the spec. _gp_resilience: list[Any] = [ m for m in (retry_mw, fallback_mw, model_call_limit_mw, tool_call_limit_mw) diff --git a/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py b/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py index bde8edeba..99eb2d74a 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py +++ b/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py @@ -1,17 +1,4 @@ -"""Fallback only on provider/network errors; let programming bugs raise. - -Upstream :class:`langchain.agents.middleware.ModelFallbackMiddleware` catches -every ``Exception``. With a non-provider bug (``KeyError``, ``TypeError``, -``AttributeError`` from middleware/state), every fallback model in the chain -hits the same bug — burning latency and tokens before the real cause finally -surfaces. Scoping the catch to provider-style exception types lets bugs fail -fast with clean tracebacks. - -Class-name matching (instead of ``isinstance`` against imported provider -types) keeps the dependency surface flat: openai, anthropic, google, -mistral, etc. all ship their own ``RateLimitError`` and we don't want to -import them all. -""" +"""Fallback only on provider/network errors; let programming bugs raise.""" from __future__ import annotations @@ -26,17 +13,16 @@ if TYPE_CHECKING: from langchain_core.messages import AIMessage +# Matched by class name across the MRO so we don't have to import every +# provider SDK (openai/anthropic/google/...). Extend as new providers ship. _FALLBACK_ELIGIBLE_NAMES: frozenset[str] = frozenset( { - # Rate / quota "RateLimitError", - # Server-side "APIStatusError", "InternalServerError", "ServiceUnavailableError", "BadGatewayError", "GatewayTimeoutError", - # Network "APIConnectionError", "APITimeoutError", "ConnectError", @@ -45,18 +31,16 @@ _FALLBACK_ELIGIBLE_NAMES: frozenset[str] = frozenset( "RemoteProtocolError", "TimeoutError", "TimeoutException", - # Can be extended to other exceptions in the future } ) def _is_fallback_eligible(exc: BaseException) -> bool: - """Eligible if the exception or any base in its MRO matches by class name.""" return any(cls.__name__ in _FALLBACK_ELIGIBLE_NAMES for cls in type(exc).__mro__) class ScopedModelFallbackMiddleware(ModelFallbackMiddleware): - """``ModelFallbackMiddleware`` that re-raises non-provider exceptions.""" + """Re-raise non-provider exceptions instead of walking the fallback chain.""" def wrap_model_call( # type: ignore[override] self, diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py index 188224074..ab6a644b5 100644 --- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py @@ -1,18 +1,4 @@ -"""End-to-end resume-bridge tests against a real LangGraph subagent. - -Builds a minimal Pregel subagent that calls ``interrupt(...)`` and drives the -``task`` tool directly with a hand-crafted ``ToolRuntime``. Exercises the only -runtime contract we own: parent stashes a decision in -``config["configurable"]["surfsense_resume_value"]`` -> bridge forwards it as -``Command(resume={interrupt_id: value})`` -> subagent completes -> return value -reflects the decision. - -We pause the subagent **outside** the parent task tool (calling -``subagent.ainvoke`` directly) to skip the ``_lg_interrupt`` re-raise path, -which requires a parent runnable context. The bridge logic under test is the -*resume* dispatch, not the propagation; propagation is exercised separately in -its own module's tests. -""" +"""End-to-end resume-bridge tests against a real LangGraph subagent.""" from __future__ import annotations @@ -37,8 +23,6 @@ class _SubagentState(TypedDict, total=False): def _build_single_interrupt_subagent(): - """Subagent that interrupts once, then echoes the resume decision into state.""" - def approve_node(state): from langchain_core.messages import AIMessage @@ -54,8 +38,6 @@ def _build_single_interrupt_subagent(): "review_configs": [{}], } ) - # Capture the resume payload verbatim so the test can assert the - # bridge forwarded it intact (no reshape, no scalar broadcast). return { "messages": [AIMessage(content="done")], "decision_text": repr(decision), @@ -81,7 +63,7 @@ def _make_runtime(config: dict) -> ToolRuntime: @pytest.mark.asyncio async def test_resume_bridge_dispatches_decision_into_pending_subagent(): - """Side-channel decision -> targeted Command(resume) -> subagent completes.""" + """Side-channel decision must reach the subagent's pending interrupt verbatim.""" subagent = _build_single_interrupt_subagent() task_tool = build_task_tool_with_parent_config( [ @@ -93,7 +75,6 @@ async def test_resume_bridge_dispatches_decision_into_pending_subagent(): ] ) - # 1. Pause the subagent directly so we can test only the resume path. parent_config: dict = { "configurable": {"thread_id": "shared-thread"}, "recursion_limit": 100, @@ -104,15 +85,11 @@ async def test_resume_bridge_dispatches_decision_into_pending_subagent(): "fixture broken: subagent should be paused on its interrupt" ) - # 2. Stash the user's decision on the side-channel — this is what - # ``stream_resume_chat`` does in production. parent_config["configurable"]["surfsense_resume_value"] = { "decisions": ["APPROVED"] } runtime = _make_runtime(parent_config) - # 3. Drive the bridge. Subagent has no remaining interrupt after resume, - # so propagation will not call ``_lg_interrupt`` (no parent ctx needed). result = await task_tool.coroutine( description="please approve", subagent_type="approver", @@ -121,27 +98,16 @@ async def test_resume_bridge_dispatches_decision_into_pending_subagent(): assert isinstance(result, Command) update = result.update - # Bridge forwards the side-channel payload **verbatim** to the - # subagent's ``interrupt()``. A scalar broadcast or accidental - # unwrap would change this shape and we want to catch that. assert update["decision_text"] == repr({"decisions": ["APPROVED"]}) - - # 4. Side-channel was consumed; a stale replay would re-prompt the user. assert "surfsense_resume_value" not in parent_config["configurable"] - # 5. Subagent moved past the interrupt (no pending tasks remain). final = await subagent.aget_state(parent_config) assert not final.tasks or all(not t.interrupts for t in final.tasks) @pytest.mark.asyncio async def test_pending_interrupt_without_resume_value_raises_runtime_error(): - """Bridge must fail loud if a paused subagent has no decision queued. - - The fail-open alternative (silently re-invoking) would re-fire the - same interrupt to the user. The error surfaces a real broken bridge - instead of confusing duplicate approval cards. - """ + """Bridge must fail loud rather than silently replay the user's interrupt.""" subagent = _build_single_interrupt_subagent() task_tool = build_task_tool_with_parent_config( [ @@ -161,7 +127,6 @@ async def test_pending_interrupt_without_resume_value_raises_runtime_error(): snap = await subagent.aget_state(parent_config) assert snap.tasks and snap.tasks[0].interrupts, "fixture broken" - # No surfsense_resume_value injected — bridge must refuse to proceed. runtime = _make_runtime(parent_config) with pytest.raises(RuntimeError, match="resume bridge is broken"): @@ -173,8 +138,6 @@ async def test_pending_interrupt_without_resume_value_raises_runtime_error(): def _build_bundle_subagent(): - """Subagent that raises a 3-action HITL bundle on its only node.""" - def bundle_node(state): from langchain_core.messages import AIMessage @@ -202,12 +165,7 @@ def _build_bundle_subagent(): @pytest.mark.asyncio async def test_bundle_three_mixed_decisions_arrive_in_order(): - """Approve / edit / reject for a 3-action bundle land at ordinals 0/1/2. - - Catches reshape regressions: truncation, decision collapse, order - scrambling, and the legacy single-decision broadcast that would - fan-out one verdict to every action. - """ + """Approve / edit / reject for a 3-action bundle must land at ordinals 0/1/2.""" subagent = _build_bundle_subagent() task_tool = build_task_tool_with_parent_config( [ @@ -242,11 +200,8 @@ async def test_bundle_three_mixed_decisions_arrive_in_order(): ) assert isinstance(result, Command) - decision_text = result.update["decision_text"] - received = ast.literal_eval(decision_text) - assert received == decisions_payload, "bundle decisions must arrive verbatim" - # Cross-checks for the regressions this test exists to catch. - assert len(received["decisions"]) == 3 + received = ast.literal_eval(result.update["decision_text"]) + assert received == decisions_payload assert received["decisions"][0]["type"] == "approve" assert received["decisions"][1]["type"] == "edit" assert received["decisions"][1]["args"] == {"args": {"name": "edited-b"}} diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py index 2060051a2..e73fb2823 100644 --- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py @@ -1,10 +1,4 @@ -"""Pure-function tests for the HITL resume side-channel helpers. - -Tests the invariant that backs the bridge: a queued resume value must be -read exactly once per turn. A second read returns ``None`` so the -parent ``task`` tool falls through to its fail-loud guard rather than -replaying the same resume payload (which would re-fire the interrupt). -""" +"""Resume side-channel must be read exactly once per turn.""" from __future__ import annotations @@ -17,7 +11,6 @@ from app.agents.multi_agent_chat.main_agent.graph.middleware.checkpointed_subage def _runtime_with_config(config: dict) -> ToolRuntime: - """Real ToolRuntime; only ``.config`` is exercised by the helpers.""" return ToolRuntime( state=None, context=None, @@ -37,9 +30,6 @@ class TestConsumeSurfsenseResume: assert consume_surfsense_resume(runtime) == {"decisions": ["approve"]} def test_second_call_returns_none(self): - # Regression guard: a second read must not replay the queued - # resume. If it did, the subagent would re-invoke with the - # same Command and the user-facing interrupt would fire twice. configurable: dict = {"surfsense_resume_value": {"decisions": ["approve"]}} runtime = _runtime_with_config({"configurable": configurable}) @@ -68,9 +58,6 @@ class TestHasSurfsenseResume: assert has_surfsense_resume(runtime) is True def test_does_not_consume_payload(self): - # The fail-loud guard in ``task_tool`` calls ``has_surfsense_resume`` - # *before* deciding to consume; the check itself must leave the - # payload queued for the matching ``consume_surfsense_resume`` call. configurable = {"surfsense_resume_value": "approve"} runtime = _runtime_with_config({"configurable": configurable}) diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py index 859833f1c..5cd62ed36 100644 --- a/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/subagents/shared/test_subagent_builder.py @@ -1,11 +1,4 @@ -"""Resilience contract for subagents built via ``pack_subagent``. - -Subagents (jira, linear, notion, ...) run on the same LLM as the parent. When -the provider rate-limits or returns an empty stream, a single hiccup must not -abort the user's HITL flow — the connector subagent has to keep moving. This -relies on ``ModelFallbackMiddleware`` being usable as a subagent -``extra_middleware`` so the production builder can wire it in. -""" +"""Subagent resilience contract: ``extra_middleware`` reaches the agent chain.""" from __future__ import annotations @@ -32,11 +25,10 @@ from app.agents.multi_agent_chat.subagents.shared.subagent_builder import ( class RateLimitError(Exception): - """Provider-style 429; matches the scoped-fallback eligibility allowlist by name.""" + """Name matches the scoped-fallback eligibility allowlist.""" class _AlwaysFailingChatModel(BaseChatModel): - """Mimics a provider hard-failing on every call (rate limit / empty stream).""" @property def _llm_type(self) -> str: @@ -76,7 +68,7 @@ class _AlwaysFailingChatModel(BaseChatModel): @pytest.mark.asyncio async def test_subagent_recovers_when_primary_llm_fails(): - """Primary blows up → fallback in extra_middleware finishes the turn.""" + """Fallback in ``extra_middleware`` must finish the turn when primary raises.""" primary = _AlwaysFailingChatModel() fallback = FakeMessagesListChatModel( responses=[AIMessage(content="recovered via fallback")] diff --git a/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py b/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py index af464d1dc..69f6fe6b7 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py +++ b/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py @@ -1,12 +1,4 @@ -"""Exception-scope contract for ``ScopedModelFallbackMiddleware``. - -Upstream ``ModelFallbackMiddleware`` catches every ``Exception`` and walks -the fallback chain. That means a programming bug (``KeyError`` from a -botched tool config, ``TypeError`` from middleware, ...) burns 1+N model -round-trips and ~Nx tokens before its real cause surfaces. The scoped -variant only falls back on provider/network exception types so bugs fail -fast, with clean tracebacks. -""" +"""``ScopedModelFallbackMiddleware`` triggers fallback only on provider errors.""" from __future__ import annotations @@ -24,8 +16,6 @@ from langchain_core.outputs import ChatGeneration, ChatResult class _RaisingChatModel(BaseChatModel): - """LLM that raises a configurable exception on every invocation.""" - exc_to_raise: Any @property @@ -61,8 +51,6 @@ class _RaisingChatModel(BaseChatModel): class _RecordingChatModel(BaseChatModel): - """Returns a fixed message and counts how often it was called.""" - response_text: str = "fallback-ok" call_count: int = 0 @@ -94,14 +82,11 @@ class _RecordingChatModel(BaseChatModel): return self._generate(messages, stop, None, **kwargs) -# Locally defined provider-style error: importing openai/anthropic/etc. -# would couple the test to provider SDKs the contract intentionally avoids. class RateLimitError(Exception): - """Mimics ``openai.RateLimitError`` for name-based eligibility.""" + """Name matches the scoped-fallback eligibility allowlist.""" def _build_agent(primary: BaseChatModel, fallback: BaseChatModel): - """Compile a no-tools agent with the scoped fallback wired in.""" from langchain.agents import create_agent from app.agents.new_chat.middleware.scoped_model_fallback import ( @@ -118,7 +103,7 @@ def _build_agent(primary: BaseChatModel, fallback: BaseChatModel): @pytest.mark.asyncio async def test_provider_errors_trigger_fallback(): - """Class names matching the provider allowlist drive the fallback chain.""" + """Eligible exception names must drive the fallback chain.""" primary = _RaisingChatModel(exc_to_raise=RateLimitError("429 from provider")) fallback = _RecordingChatModel(response_text="recovered") @@ -133,7 +118,7 @@ async def test_provider_errors_trigger_fallback(): @pytest.mark.asyncio async def test_programming_errors_propagate_without_invoking_fallback(): - """``KeyError`` from agent-side bugs must surface immediately, no fallback retry.""" + """Non-eligible exceptions must propagate; fallback must not be invoked.""" primary = _RaisingChatModel(exc_to_raise=KeyError("missing_state_field")) fallback = _RecordingChatModel(response_text="should-never-arrive") @@ -142,7 +127,4 @@ async def test_programming_errors_propagate_without_invoking_fallback(): with pytest.raises(KeyError, match="missing_state_field"): await agent.ainvoke({"messages": [("user", "hi")]}) - assert fallback.call_count == 0, ( - "fallback was invoked for a programming error; " - "scoping rule is broken" - ) + assert fallback.call_count == 0 From a6df944247219a1e2ad6f1d3f0ee06dd6c17ae44 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 20:49:47 +0200 Subject: [PATCH 11/29] refactor(multi-agent): introduce shared flags helper and permissions package --- .../middleware/shared/__init__.py | 0 .../middleware/shared/flags.py | 10 ++ .../middleware/shared/permissions/__init__.py | 12 ++ .../middleware/shared/permissions/context.py | 109 ++++++++++++++++++ .../shared/permissions/middleware.py | 10 ++ 5 files changed, 141 insertions(+) create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/__init__.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/flags.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/__init__.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/context.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/middleware.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/flags.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/flags.py new file mode 100644 index 000000000..69994ae00 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/flags.py @@ -0,0 +1,10 @@ +"""Single source of truth for the feature-flag predicate.""" + +from __future__ import annotations + +from app.agents.new_chat.feature_flags import AgentFeatureFlags + + +def enabled(flags: AgentFeatureFlags, attr: str) -> bool: + """``flags.`` is on AND the new-agent-stack kill switch is off.""" + return getattr(flags, attr) and not flags.disable_new_agent_stack diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/__init__.py new file mode 100644 index 000000000..4f2228170 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/__init__.py @@ -0,0 +1,12 @@ +"""Permission rulesets fanned out to parent / general-purpose / subagent stacks.""" + +from __future__ import annotations + +from .context import PermissionContext, build_permission_context +from .middleware import build_full_permission_mw + +__all__ = [ + "PermissionContext", + "build_full_permission_mw", + "build_permission_context", +] diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/context.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/context.py new file mode 100644 index 000000000..f14d52714 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/context.py @@ -0,0 +1,109 @@ +"""Derive shared permission context once; fan out to all three stack layers. + +The context carries: +- ``rulesets``: full ask/deny/allow rules for the main-agent permission middleware. +- ``general_purpose_interrupt_on``: ``ask`` rules mirrored as deepagents + ``interrupt_on`` so HITL still triggers from inside ``task`` runs (subagents + bypass the main-agent permission middleware). +- ``subagent_deny_mw``: a deny-only ``PermissionMiddleware`` instance shared + across the general-purpose and registry subagent stacks. +""" + +from __future__ import annotations + +from collections.abc import Sequence +from dataclasses import dataclass + +from langchain_core.tools import BaseTool + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.filesystem_selection import FilesystemMode +from app.agents.new_chat.middleware import PermissionMiddleware +from app.agents.new_chat.permissions import Rule, Ruleset +from app.agents.new_chat.tools.registry import BUILTIN_TOOLS + +from ..flags import enabled + + +@dataclass(frozen=True) +class PermissionContext: + rulesets: list[Ruleset] + general_purpose_interrupt_on: dict[str, bool] + subagent_deny_mw: PermissionMiddleware | None + + +def build_permission_context( + *, + flags: AgentFeatureFlags, + filesystem_mode: FilesystemMode, + tools: Sequence[BaseTool], + available_connectors: list[str] | None, +) -> PermissionContext: + is_desktop_fs = filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER + permission_enabled = enabled(flags, "enable_permission") + + rulesets: list[Ruleset] = [] + if permission_enabled or is_desktop_fs: + rulesets.append( + Ruleset( + rules=[Rule(permission="*", pattern="*", action="allow")], + origin="surfsense_defaults", + ) + ) + if is_desktop_fs: + rulesets.append( + Ruleset( + rules=[ + Rule(permission="rm", pattern="*", action="ask"), + Rule(permission="rmdir", pattern="*", action="ask"), + Rule(permission="move_file", pattern="*", action="ask"), + Rule(permission="edit_file", pattern="*", action="ask"), + Rule(permission="write_file", pattern="*", action="ask"), + ], + origin="desktop_safety", + ) + ) + + tool_names_in_use = {t.name for t in tools} + + if permission_enabled: + available_set = set(available_connectors or []) + synthesized: list[Rule] = [] + for tool_def in BUILTIN_TOOLS: + if tool_def.name not in tool_names_in_use: + continue + rc = tool_def.required_connector + if rc and rc not in available_set: + synthesized.append( + Rule(permission=tool_def.name, pattern="*", action="deny") + ) + if synthesized: + rulesets.append( + Ruleset(rules=synthesized, origin="connector_synthesized") + ) + + general_purpose_interrupt_on: dict[str, bool] = { + rule.permission: True + for rs in rulesets + for rule in rs.rules + if rule.action == "ask" and rule.permission in tool_names_in_use + } + + deny_rulesets = [ + Ruleset( + rules=[r for r in rs.rules if r.action == "deny"], + origin=rs.origin, + ) + for rs in rulesets + ] + deny_rulesets = [rs for rs in deny_rulesets if rs.rules] + + subagent_deny_mw: PermissionMiddleware | None = ( + PermissionMiddleware(rulesets=deny_rulesets) if deny_rulesets else None + ) + + return PermissionContext( + rulesets=rulesets, + general_purpose_interrupt_on=general_purpose_interrupt_on, + subagent_deny_mw=subagent_deny_mw, + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/middleware.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/middleware.py new file mode 100644 index 000000000..704a26fb3 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/middleware.py @@ -0,0 +1,10 @@ +"""Main-agent permission middleware (full ask/deny/allow rules).""" + +from __future__ import annotations + +from app.agents.new_chat.middleware import PermissionMiddleware +from app.agents.new_chat.permissions import Ruleset + + +def build_full_permission_mw(rulesets: list[Ruleset]) -> PermissionMiddleware | None: + return PermissionMiddleware(rulesets=rulesets) if rulesets else None From 91701bb49afe93578c99f25cb5b019b2b261ceb8 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 20:51:02 +0200 Subject: [PATCH 12/29] refactor(multi-agent): split shared resilience bundle into per-concept files --- .../middleware/shared/resilience/__init__.py | 7 +++ .../middleware/shared/resilience/bundle.py | 51 +++++++++++++++++++ .../middleware/shared/resilience/fallback.py | 27 ++++++++++ .../shared/resilience/model_call_limit.py | 21 ++++++++ .../middleware/shared/resilience/retry.py | 16 ++++++ .../shared/resilience/tool_call_limit.py | 21 ++++++++ 6 files changed, 143 insertions(+) create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/__init__.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/bundle.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/fallback.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/model_call_limit.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/retry.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/tool_call_limit.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/__init__.py new file mode 100644 index 000000000..92596b771 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/__init__.py @@ -0,0 +1,7 @@ +"""Resilience middleware shared as the same instances across parent / general-purpose / registry.""" + +from __future__ import annotations + +from .bundle import ResilienceBundle, build_resilience_bundle + +__all__ = ["ResilienceBundle", "build_resilience_bundle"] diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/bundle.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/bundle.py new file mode 100644 index 000000000..45f76a6f3 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/bundle.py @@ -0,0 +1,51 @@ +"""Construct each resilience middleware once; same instances flow into every consumer.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from langchain.agents.middleware import ( + ModelCallLimitMiddleware, + ToolCallLimitMiddleware, +) + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import RetryAfterMiddleware +from app.agents.new_chat.middleware.scoped_model_fallback import ( + ScopedModelFallbackMiddleware, +) + +from .fallback import build_fallback_mw +from .model_call_limit import build_model_call_limit_mw +from .retry import build_retry_mw +from .tool_call_limit import build_tool_call_limit_mw + + +@dataclass(frozen=True) +class ResilienceBundle: + retry: RetryAfterMiddleware | None + fallback: ScopedModelFallbackMiddleware | None + model_call_limit: ModelCallLimitMiddleware | None + tool_call_limit: ToolCallLimitMiddleware | None + + def as_list(self) -> list[Any]: + return [ + m + for m in ( + self.retry, + self.fallback, + self.model_call_limit, + self.tool_call_limit, + ) + if m is not None + ] + + +def build_resilience_bundle(flags: AgentFeatureFlags) -> ResilienceBundle: + return ResilienceBundle( + retry=build_retry_mw(flags), + fallback=build_fallback_mw(flags), + model_call_limit=build_model_call_limit_mw(flags), + tool_call_limit=build_tool_call_limit_mw(flags), + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/fallback.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/fallback.py new file mode 100644 index 000000000..ea68a764e --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/fallback.py @@ -0,0 +1,27 @@ +"""Switch to a fallback model on provider/network errors only.""" + +from __future__ import annotations + +import logging + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware.scoped_model_fallback import ( + ScopedModelFallbackMiddleware, +) + +from ..flags import enabled + + +def build_fallback_mw( + flags: AgentFeatureFlags, +) -> ScopedModelFallbackMiddleware | None: + if not enabled(flags, "enable_model_fallback"): + return None + try: + return ScopedModelFallbackMiddleware( + "openai:gpt-4o-mini", + "anthropic:claude-3-5-haiku-20241022", + ) + except Exception: + logging.warning("ScopedModelFallbackMiddleware init failed; skipping.") + return None diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/model_call_limit.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/model_call_limit.py new file mode 100644 index 000000000..85707a385 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/model_call_limit.py @@ -0,0 +1,21 @@ +"""Cap model calls per thread / per run to prevent runaway cost.""" + +from __future__ import annotations + +from langchain.agents.middleware import ModelCallLimitMiddleware + +from app.agents.new_chat.feature_flags import AgentFeatureFlags + +from ..flags import enabled + + +def build_model_call_limit_mw( + flags: AgentFeatureFlags, +) -> ModelCallLimitMiddleware | None: + if not enabled(flags, "enable_model_call_limit"): + return None + return ModelCallLimitMiddleware( + thread_limit=120, + run_limit=80, + exit_behavior="end", + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/retry.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/retry.py new file mode 100644 index 000000000..c98fc4083 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/retry.py @@ -0,0 +1,16 @@ +"""Retry on transient model errors (e.g. Retry-After-bearing 429s).""" + +from __future__ import annotations + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import RetryAfterMiddleware + +from ..flags import enabled + + +def build_retry_mw(flags: AgentFeatureFlags) -> RetryAfterMiddleware | None: + return ( + RetryAfterMiddleware(max_retries=3) + if enabled(flags, "enable_retry_after") + else None + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/tool_call_limit.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/tool_call_limit.py new file mode 100644 index 000000000..dcde81f37 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/tool_call_limit.py @@ -0,0 +1,21 @@ +"""Cap tool calls per thread / per run to bound infinite-loop blast radius.""" + +from __future__ import annotations + +from langchain.agents.middleware import ToolCallLimitMiddleware + +from app.agents.new_chat.feature_flags import AgentFeatureFlags + +from ..flags import enabled + + +def build_tool_call_limit_mw( + flags: AgentFeatureFlags, +) -> ToolCallLimitMiddleware | None: + if not enabled(flags, "enable_tool_call_limit"): + return None + return ToolCallLimitMiddleware( + thread_limit=300, + run_limit=80, + exit_behavior="continue", + ) From 67036448f9ae42ae788780e2893e13eb86e2f842 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 20:51:17 +0200 Subject: [PATCH 13/29] refactor(multi-agent): add shared middleware factory per concept --- .../middleware/shared/anthropic_cache.py | 9 +++++++ .../middleware/shared/compaction.py | 14 +++++++++++ .../middleware/shared/file_intent.py | 11 ++++++++ .../middleware/shared/filesystem.py | 25 +++++++++++++++++++ .../middleware/shared/memory.py | 19 ++++++++++++++ .../middleware/shared/patch_tool_calls.py | 9 +++++++ .../middleware/shared/todos.py | 9 +++++++ 7 files changed, 96 insertions(+) create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/anthropic_cache.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/compaction.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/file_intent.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/memory.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/patch_tool_calls.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/shared/todos.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/anthropic_cache.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/anthropic_cache.py new file mode 100644 index 000000000..f99fb9c7f --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/anthropic_cache.py @@ -0,0 +1,9 @@ +"""Anthropic prompt caching annotations on system/tool/message blocks.""" + +from __future__ import annotations + +from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware + + +def build_anthropic_cache_mw() -> AnthropicPromptCachingMiddleware: + return AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore") diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/compaction.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/compaction.py new file mode 100644 index 000000000..b59e7d2c4 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/compaction.py @@ -0,0 +1,14 @@ +"""Context-window summarization with SurfSense protected sections.""" + +from __future__ import annotations + +from typing import Any + +from deepagents.backends import StateBackend +from langchain_core.language_models import BaseChatModel + +from app.agents.new_chat.middleware import create_surfsense_compaction_middleware + + +def build_compaction_mw(llm: BaseChatModel) -> Any: + return create_surfsense_compaction_middleware(llm, StateBackend) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/file_intent.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/file_intent.py new file mode 100644 index 000000000..5ff65aa12 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/file_intent.py @@ -0,0 +1,11 @@ +"""File-intent classifier that gates strict write contracts.""" + +from __future__ import annotations + +from langchain_core.language_models import BaseChatModel + +from app.agents.new_chat.middleware import FileIntentMiddleware + + +def build_file_intent_mw(llm: BaseChatModel) -> FileIntentMiddleware: + return FileIntentMiddleware(llm=llm) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem.py new file mode 100644 index 000000000..9481f5167 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem.py @@ -0,0 +1,25 @@ +"""SurfSense filesystem tools/middleware.""" + +from __future__ import annotations + +from typing import Any + +from app.agents.new_chat.filesystem_selection import FilesystemMode +from app.agents.new_chat.middleware import SurfSenseFilesystemMiddleware + + +def build_filesystem_mw( + *, + backend_resolver: Any, + filesystem_mode: FilesystemMode, + search_space_id: int, + user_id: str | None, + thread_id: int | None, +) -> SurfSenseFilesystemMiddleware: + return SurfSenseFilesystemMiddleware( + backend=backend_resolver, + filesystem_mode=filesystem_mode, + search_space_id=search_space_id, + created_by_id=user_id, + thread_id=thread_id, + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/memory.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/memory.py new file mode 100644 index 000000000..9316b3e21 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/memory.py @@ -0,0 +1,19 @@ +"""User/team memory injection prepended to the conversation.""" + +from __future__ import annotations + +from app.agents.new_chat.middleware import MemoryInjectionMiddleware +from app.db import ChatVisibility + + +def build_memory_mw( + *, + user_id: str | None, + search_space_id: int, + visibility: ChatVisibility, +) -> MemoryInjectionMiddleware: + return MemoryInjectionMiddleware( + user_id=user_id, + search_space_id=search_space_id, + thread_visibility=visibility, + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/patch_tool_calls.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/patch_tool_calls.py new file mode 100644 index 000000000..50036dbbe --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/patch_tool_calls.py @@ -0,0 +1,9 @@ +"""Repair dangling tool-call sequences before each agent turn.""" + +from __future__ import annotations + +from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware + + +def build_patch_tool_calls_mw() -> PatchToolCallsMiddleware: + return PatchToolCallsMiddleware() diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/todos.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/todos.py new file mode 100644 index 000000000..ea9173a1d --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/todos.py @@ -0,0 +1,9 @@ +"""Todo-list middleware (each consumer needs its own instance).""" + +from __future__ import annotations + +from langchain.agents.middleware import TodoListMiddleware + + +def build_todos_mw() -> TodoListMiddleware: + return TodoListMiddleware() From 6a4dacda72bce83c6ce072e07ed928bdfe851240 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 20:53:49 +0200 Subject: [PATCH 14/29] refactor(multi-agent): add main-agent observability and lifecycle middleware factories --- .../middleware/main_agent/__init__.py | 0 .../middleware/main_agent/action_log.py | 36 ++++++++++++++ .../middleware/main_agent/anonymous_doc.py | 16 ++++++ .../middleware/main_agent/busy_mutex.py | 12 +++++ .../middleware/main_agent/otel.py | 12 +++++ .../middleware/main_agent/plugins.py | 49 +++++++++++++++++++ 6 files changed, 125 insertions(+) create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/__init__.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/action_log.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/anonymous_doc.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/busy_mutex.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/otel.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/plugins.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/action_log.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/action_log.py new file mode 100644 index 000000000..c9f893d97 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/action_log.py @@ -0,0 +1,36 @@ +"""Audit row per tool call (reversibility metadata).""" + +from __future__ import annotations + +import logging + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import ActionLogMiddleware +from app.agents.new_chat.tools.registry import BUILTIN_TOOLS + +from ..shared.flags import enabled + + +def build_action_log_mw( + *, + flags: AgentFeatureFlags, + thread_id: int | None, + search_space_id: int, + user_id: str | None, +) -> ActionLogMiddleware | None: + if not enabled(flags, "enable_action_log") or thread_id is None: + return None + try: + tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS} + return ActionLogMiddleware( + thread_id=thread_id, + search_space_id=search_space_id, + user_id=user_id, + tool_definitions=tool_defs_by_name, + ) + except Exception: # pragma: no cover - defensive + logging.warning( + "ActionLogMiddleware init failed; running without it.", + exc_info=True, + ) + return None diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/anonymous_doc.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/anonymous_doc.py new file mode 100644 index 000000000..afd54a2d3 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/anonymous_doc.py @@ -0,0 +1,16 @@ +"""Anonymous document hydration from Redis (cloud only).""" + +from __future__ import annotations + +from app.agents.new_chat.filesystem_selection import FilesystemMode +from app.agents.new_chat.middleware import AnonymousDocumentMiddleware + + +def build_anonymous_doc_mw( + *, + filesystem_mode: FilesystemMode, + anon_session_id: str | None, +) -> AnonymousDocumentMiddleware | None: + if filesystem_mode != FilesystemMode.CLOUD: + return None + return AnonymousDocumentMiddleware(anon_session_id=anon_session_id) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/busy_mutex.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/busy_mutex.py new file mode 100644 index 000000000..0ea53bf16 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/busy_mutex.py @@ -0,0 +1,12 @@ +"""Per-thread cooperative lock around the whole turn.""" + +from __future__ import annotations + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import BusyMutexMiddleware + +from ..shared.flags import enabled + + +def build_busy_mutex_mw(flags: AgentFeatureFlags) -> BusyMutexMiddleware | None: + return BusyMutexMiddleware() if enabled(flags, "enable_busy_mutex") else None diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/otel.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/otel.py new file mode 100644 index 000000000..bd7516e65 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/otel.py @@ -0,0 +1,12 @@ +"""OTel spans on model and tool calls.""" + +from __future__ import annotations + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import OtelSpanMiddleware + +from ..shared.flags import enabled + + +def build_otel_mw(flags: AgentFeatureFlags) -> OtelSpanMiddleware | None: + return OtelSpanMiddleware() if enabled(flags, "enable_otel") else None diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/plugins.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/plugins.py new file mode 100644 index 000000000..4418e3806 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/plugins.py @@ -0,0 +1,49 @@ +"""Tail-of-stack plugin slot driven by env allowlist.""" + +from __future__ import annotations + +import logging +from typing import Any + +from langchain_core.language_models import BaseChatModel + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.plugin_loader import ( + PluginContext, + load_allowed_plugin_names_from_env, + load_plugin_middlewares, +) +from app.db import ChatVisibility + +from ..shared.flags import enabled + + +def build_plugin_middlewares( + *, + flags: AgentFeatureFlags, + search_space_id: int, + user_id: str | None, + visibility: ChatVisibility, + llm: BaseChatModel, +) -> list[Any]: + if not enabled(flags, "enable_plugin_loader"): + return [] + try: + allowed_names = load_allowed_plugin_names_from_env() + if not allowed_names: + return [] + return load_plugin_middlewares( + PluginContext.build( + search_space_id=search_space_id, + user_id=user_id, + thread_visibility=visibility, + llm=llm, + ), + allowed_plugin_names=allowed_names, + ) + except Exception: # pragma: no cover - defensive + logging.warning( + "Plugin loader failed; continuing without plugins.", + exc_info=True, + ) + return [] From 390dc9307fac6733df8848b043cac7d7fabd1365 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 20:53:59 +0200 Subject: [PATCH 15/29] refactor(multi-agent): add main-agent knowledge middleware factories --- .../middleware/main_agent/kb_persistence.py | 23 ++++++++++++++++ .../main_agent/knowledge_priority.py | 27 +++++++++++++++++++ .../middleware/main_agent/knowledge_tree.py | 23 ++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/kb_persistence.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_tree.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/kb_persistence.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/kb_persistence.py new file mode 100644 index 000000000..4b27581e7 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/kb_persistence.py @@ -0,0 +1,23 @@ +"""Commit staged cloud filesystem mutations to Postgres at end of turn.""" + +from __future__ import annotations + +from app.agents.new_chat.filesystem_selection import FilesystemMode +from app.agents.new_chat.middleware import KnowledgeBasePersistenceMiddleware + + +def build_kb_persistence_mw( + *, + filesystem_mode: FilesystemMode, + search_space_id: int, + user_id: str | None, + thread_id: int | None, +) -> KnowledgeBasePersistenceMiddleware | None: + if filesystem_mode != FilesystemMode.CLOUD: + return None + return KnowledgeBasePersistenceMiddleware( + search_space_id=search_space_id, + created_by_id=user_id, + filesystem_mode=filesystem_mode, + thread_id=thread_id, + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py new file mode 100644 index 000000000..395d2a7af --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py @@ -0,0 +1,27 @@ +"""KB priority planner: injection.""" + +from __future__ import annotations + +from langchain_core.language_models import BaseChatModel + +from app.agents.new_chat.filesystem_selection import FilesystemMode +from app.agents.new_chat.middleware import KnowledgePriorityMiddleware + + +def build_knowledge_priority_mw( + *, + llm: BaseChatModel, + search_space_id: int, + filesystem_mode: FilesystemMode, + available_connectors: list[str] | None, + available_document_types: list[str] | None, + mentioned_document_ids: list[int] | None, +) -> KnowledgePriorityMiddleware: + return KnowledgePriorityMiddleware( + llm=llm, + search_space_id=search_space_id, + filesystem_mode=filesystem_mode, + available_connectors=available_connectors, + available_document_types=available_document_types, + mentioned_document_ids=mentioned_document_ids, + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_tree.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_tree.py new file mode 100644 index 000000000..404082401 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_tree.py @@ -0,0 +1,23 @@ +""" injection (cloud only).""" + +from __future__ import annotations + +from langchain_core.language_models import BaseChatModel + +from app.agents.new_chat.filesystem_selection import FilesystemMode +from app.agents.new_chat.middleware import KnowledgeTreeMiddleware + + +def build_knowledge_tree_mw( + *, + filesystem_mode: FilesystemMode, + search_space_id: int, + llm: BaseChatModel, +) -> KnowledgeTreeMiddleware | None: + if filesystem_mode != FilesystemMode.CLOUD: + return None + return KnowledgeTreeMiddleware( + search_space_id=search_space_id, + filesystem_mode=filesystem_mode, + llm=llm, + ) From b0ee44b2f15adfdf379c2b7f1967733a66a8e090 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 20:54:13 +0200 Subject: [PATCH 16/29] refactor(multi-agent): add main-agent safety and llm-shaping middleware factories --- .../middleware/main_agent/context_editing.py | 50 +++++++++++++++++++ .../middleware/main_agent/dedup_hitl.py | 13 +++++ .../middleware/main_agent/doom_loop.py | 12 +++++ .../middleware/main_agent/noop_injection.py | 12 +++++ .../middleware/main_agent/repair.py | 50 +++++++++++++++++++ .../middleware/main_agent/selector.py | 39 +++++++++++++++ .../middleware/main_agent/skills.py | 39 +++++++++++++++ 7 files changed, 215 insertions(+) create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/context_editing.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/dedup_hitl.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/doom_loop.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/noop_injection.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/repair.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/selector.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/skills.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/context_editing.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/context_editing.py new file mode 100644 index 000000000..e8f99933e --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/context_editing.py @@ -0,0 +1,50 @@ +"""Spill + clear-tool-uses passes to keep payloads under budget.""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any + +from langchain_core.tools import BaseTool + +from app.agents.multi_agent_chat.main_agent.context_prune.prune_tool_names import ( + safe_exclude_tools, +) +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import ( + ClearToolUsesEdit, + SpillingContextEditingMiddleware, + SpillToBackendEdit, +) + +from ..shared.flags import enabled + + +def build_context_editing_mw( + *, + flags: AgentFeatureFlags, + max_input_tokens: int | None, + tools: Sequence[BaseTool], + backend_resolver: Any, +) -> SpillingContextEditingMiddleware | None: + if not enabled(flags, "enable_context_editing") or not max_input_tokens: + return None + spill_edit = SpillToBackendEdit( + trigger=int(max_input_tokens * 0.55), + clear_at_least=int(max_input_tokens * 0.15), + keep=5, + exclude_tools=safe_exclude_tools(tools), + clear_tool_inputs=True, + ) + clear_edit = ClearToolUsesEdit( + trigger=int(max_input_tokens * 0.55), + clear_at_least=int(max_input_tokens * 0.15), + keep=5, + exclude_tools=safe_exclude_tools(tools), + clear_tool_inputs=True, + placeholder="[cleared - older tool output trimmed for context]", + ) + return SpillingContextEditingMiddleware( + edits=[spill_edit, clear_edit], + backend_resolver=backend_resolver, + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/dedup_hitl.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/dedup_hitl.py new file mode 100644 index 000000000..66cae300b --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/dedup_hitl.py @@ -0,0 +1,13 @@ +"""Drop duplicate HITL tool calls before execution.""" + +from __future__ import annotations + +from collections.abc import Sequence + +from langchain_core.tools import BaseTool + +from app.agents.new_chat.middleware import DedupHITLToolCallsMiddleware + + +def build_dedup_hitl_mw(tools: Sequence[BaseTool]) -> DedupHITLToolCallsMiddleware: + return DedupHITLToolCallsMiddleware(agent_tools=list(tools)) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/doom_loop.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/doom_loop.py new file mode 100644 index 000000000..a0b294092 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/doom_loop.py @@ -0,0 +1,12 @@ +"""Stop N identical tool calls in a row via interrupt.""" + +from __future__ import annotations + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import DoomLoopMiddleware + +from ..shared.flags import enabled + + +def build_doom_loop_mw(flags: AgentFeatureFlags) -> DoomLoopMiddleware | None: + return DoomLoopMiddleware(threshold=3) if enabled(flags, "enable_doom_loop") else None diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/noop_injection.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/noop_injection.py new file mode 100644 index 000000000..6e6467ad0 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/noop_injection.py @@ -0,0 +1,12 @@ +"""Provider-compat: append a `_noop` tool when tools=[] but history has tool calls.""" + +from __future__ import annotations + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import NoopInjectionMiddleware + +from ..shared.flags import enabled + + +def build_noop_injection_mw(flags: AgentFeatureFlags) -> NoopInjectionMiddleware | None: + return NoopInjectionMiddleware() if enabled(flags, "enable_compaction_v2") else None diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/repair.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/repair.py new file mode 100644 index 000000000..378b61be1 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/repair.py @@ -0,0 +1,50 @@ +"""Repair miscased / unknown tool names to the registered set or invalid_tool.""" + +from __future__ import annotations + +from collections.abc import Sequence + +from langchain_core.tools import BaseTool + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.middleware import ToolCallNameRepairMiddleware + +from ..shared.flags import enabled + +# deepagents-built-in tool names the repair pass treats as known. +_DEEPAGENT_BUILTIN_TOOL_NAMES: frozenset[str] = frozenset( + { + "write_todos", + "ls", + "read_file", + "write_file", + "edit_file", + "glob", + "grep", + "execute", + "task", + "mkdir", + "cd", + "pwd", + "move_file", + "rm", + "rmdir", + "list_tree", + "execute_code", + } +) + + +def build_repair_mw( + *, + flags: AgentFeatureFlags, + tools: Sequence[BaseTool], +) -> ToolCallNameRepairMiddleware | None: + if not enabled(flags, "enable_tool_call_repair"): + return None + registered_names: set[str] = {t.name for t in tools} + registered_names |= _DEEPAGENT_BUILTIN_TOOL_NAMES + return ToolCallNameRepairMiddleware( + registered_tool_names=registered_names, + fuzzy_match_threshold=None, + ) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/selector.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/selector.py new file mode 100644 index 000000000..8e7a32be8 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/selector.py @@ -0,0 +1,39 @@ +"""LLM-based tool subset selection (only when >30 tools).""" + +from __future__ import annotations + +import logging +from collections.abc import Sequence + +from langchain.agents.middleware import LLMToolSelectorMiddleware +from langchain_core.tools import BaseTool + +from app.agents.new_chat.feature_flags import AgentFeatureFlags + +from ..shared.flags import enabled + + +def build_selector_mw( + *, + flags: AgentFeatureFlags, + tools: Sequence[BaseTool], +) -> LLMToolSelectorMiddleware | None: + if not enabled(flags, "enable_llm_tool_selector") or len(tools) <= 30: + return None + try: + return LLMToolSelectorMiddleware( + model="openai:gpt-4o-mini", + max_tools=12, + always_include=[ + name + for name in ( + "update_memory", + "get_connected_accounts", + "scrape_webpage", + ) + if name in {t.name for t in tools} + ], + ) + except Exception: + logging.warning("LLMToolSelectorMiddleware init failed; skipping.") + return None diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/skills.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/skills.py new file mode 100644 index 000000000..63a57c5a0 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/skills.py @@ -0,0 +1,39 @@ +"""Skill discovery + injection.""" + +from __future__ import annotations + +import logging + +from deepagents.middleware.skills import SkillsMiddleware + +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.filesystem_selection import FilesystemMode +from app.agents.new_chat.middleware import ( + build_skills_backend_factory, + default_skills_sources, +) + +from ..shared.flags import enabled + + +def build_skills_mw( + *, + flags: AgentFeatureFlags, + filesystem_mode: FilesystemMode, + search_space_id: int, +) -> SkillsMiddleware | None: + if not enabled(flags, "enable_skills"): + return None + try: + skills_factory = build_skills_backend_factory( + search_space_id=search_space_id + if filesystem_mode == FilesystemMode.CLOUD + else None, + ) + return SkillsMiddleware( + backend=skills_factory, + sources=default_skills_sources(), + ) + except Exception as exc: # pragma: no cover - defensive + logging.warning("SkillsMiddleware init failed; skipping: %s", exc) + return None From 611fef8666990b4994d8f990b8b796a36027c6fd Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 20:54:27 +0200 Subject: [PATCH 17/29] refactor(multi-agent): add subagent extras builder and drop filesystem from registry subagents --- .../middleware/subagent/__init__.py | 0 .../middleware/subagent/extras.py | 28 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/subagent/__init__.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/subagent/extras.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/subagent/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/subagent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/subagent/extras.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/subagent/extras.py new file mode 100644 index 000000000..46dca8a81 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/subagent/extras.py @@ -0,0 +1,28 @@ +"""Extra middleware threaded into every registry subagent's stack. + +Registry subagents are scoped to one domain (deliverables, research, memory, +connectors, MCP) and never read or write the SurfSense filesystem — that +capability belongs to the main agent and is delegated to the general-purpose +subagent as an escape hatch. Keeping FS off the registry stacks avoids +polluting their tool surface with FS tools they never act on. +""" + +from __future__ import annotations + +from typing import Any + +from ..shared.permissions import PermissionContext +from ..shared.resilience import ResilienceBundle +from ..shared.todos import build_todos_mw + + +def build_subagent_extras( + *, + permissions: PermissionContext, + resilience: ResilienceBundle, +) -> list[Any]: + extras: list[Any] = [build_todos_mw()] + if permissions.subagent_deny_mw is not None: + extras.append(permissions.subagent_deny_mw) + extras.extend(resilience.as_list()) + return extras From 7690e8b278e525cae61291ac9bec6cfa773b06db Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 20:54:45 +0200 Subject: [PATCH 18/29] refactor(multi-agent): relocate general-purpose subagent next to other builtins --- .../builtins/general_purpose/__init__.py | 0 .../builtins/general_purpose/agent.py | 105 ++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/general_purpose/__init__.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/general_purpose/agent.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/general_purpose/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/general_purpose/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/general_purpose/agent.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/general_purpose/agent.py new file mode 100644 index 000000000..1c3c44f12 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/general_purpose/agent.py @@ -0,0 +1,105 @@ +"""General-purpose subagent for the multi-agent main agent.""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any, cast + +from deepagents import SubAgent +from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware +from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT +from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware +from langchain_core.language_models import BaseChatModel +from langchain_core.tools import BaseTool + +from app.agents.multi_agent_chat.middleware.shared.anthropic_cache import ( + build_anthropic_cache_mw, +) +from app.agents.multi_agent_chat.middleware.shared.compaction import ( + build_compaction_mw, +) +from app.agents.multi_agent_chat.middleware.shared.file_intent import ( + build_file_intent_mw, +) +from app.agents.multi_agent_chat.middleware.shared.filesystem import ( + build_filesystem_mw, +) +from app.agents.multi_agent_chat.middleware.shared.patch_tool_calls import ( + build_patch_tool_calls_mw, +) +from app.agents.multi_agent_chat.middleware.shared.permissions import ( + PermissionContext, +) +from app.agents.multi_agent_chat.middleware.shared.resilience import ( + ResilienceBundle, +) +from app.agents.multi_agent_chat.middleware.shared.todos import build_todos_mw +from app.agents.new_chat.filesystem_selection import FilesystemMode +from app.agents.new_chat.middleware import MemoryInjectionMiddleware + +NAME = "general-purpose" + + +def build_subagent( + *, + llm: BaseChatModel, + tools: Sequence[BaseTool], + backend_resolver: Any, + filesystem_mode: FilesystemMode, + search_space_id: int, + user_id: str | None, + thread_id: int | None, + permissions: PermissionContext, + resilience: ResilienceBundle, + memory_mw: MemoryInjectionMiddleware, +) -> SubAgent: + """Deny + resilience inserts encapsulated here so the orchestrator never mutates the list.""" + middleware: list[Any] = [ + build_todos_mw(), + memory_mw, + build_file_intent_mw(llm), + build_filesystem_mw( + backend_resolver=backend_resolver, + filesystem_mode=filesystem_mode, + search_space_id=search_space_id, + user_id=user_id, + thread_id=thread_id, + ), + build_compaction_mw(llm), + build_patch_tool_calls_mw(), + build_anthropic_cache_mw(), + ] + + if permissions.subagent_deny_mw is not None: + patch_idx = next( + ( + i + for i, m in enumerate(middleware) + if isinstance(m, PatchToolCallsMiddleware) + ), + len(middleware), + ) + middleware.insert(patch_idx, permissions.subagent_deny_mw) + + resilience_mws = resilience.as_list() + if resilience_mws: + cache_idx = next( + ( + i + for i, m in enumerate(middleware) + if isinstance(m, AnthropicPromptCachingMiddleware) + ), + len(middleware), + ) + for offset, mw in enumerate(resilience_mws): + middleware.insert(cache_idx + offset, mw) + + spec: dict[str, Any] = { + **GENERAL_PURPOSE_SUBAGENT, + "model": llm, + "tools": tools, + "middleware": middleware, + } + if permissions.general_purpose_interrupt_on: + spec["interrupt_on"] = permissions.general_purpose_interrupt_on + return cast(SubAgent, spec) From 5abae09435eaae8885978d12f48898eed83751dc Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 20:55:03 +0200 Subject: [PATCH 19/29] refactor(multi-agent): add slimmed orchestrator at middleware/stack.py --- .../multi_agent_chat/middleware/__init__.py | 7 + .../multi_agent_chat/middleware/stack.py | 212 ++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/__init__.py create mode 100644 surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/__init__.py new file mode 100644 index 000000000..e6eed9fbe --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/__init__.py @@ -0,0 +1,7 @@ +"""Multi-agent middleware stack assembly.""" + +from __future__ import annotations + +from .stack import build_main_agent_deepagent_middleware + +__all__ = ["build_main_agent_deepagent_middleware"] diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py new file mode 100644 index 000000000..563332986 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py @@ -0,0 +1,212 @@ +"""Main-agent middleware list assembly: one line per slot.""" + +from __future__ import annotations + +import logging +from collections.abc import Sequence +from typing import Any + +from deepagents import SubAgent +from deepagents.backends import StateBackend +from langchain_core.language_models import BaseChatModel +from langchain_core.tools import BaseTool +from langgraph.types import Checkpointer + +from app.agents.multi_agent_chat.subagents import ( + build_subagents, + get_subagents_to_exclude, +) +from app.agents.multi_agent_chat.subagents.builtins.general_purpose.agent import ( + build_subagent as build_general_purpose_subagent, +) +from app.agents.multi_agent_chat.subagents.shared.permissions import ToolsPermissions +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.filesystem_selection import FilesystemMode +from app.db import ChatVisibility + +from .main_agent.action_log import build_action_log_mw +from .main_agent.anonymous_doc import build_anonymous_doc_mw +from .main_agent.busy_mutex import build_busy_mutex_mw +from .main_agent.checkpointed_subagent_middleware import ( + SurfSenseCheckpointedSubAgentMiddleware, +) +from .main_agent.context_editing import build_context_editing_mw +from .main_agent.dedup_hitl import build_dedup_hitl_mw +from .main_agent.doom_loop import build_doom_loop_mw +from .main_agent.kb_persistence import build_kb_persistence_mw +from .main_agent.knowledge_priority import build_knowledge_priority_mw +from .main_agent.knowledge_tree import build_knowledge_tree_mw +from .main_agent.noop_injection import build_noop_injection_mw +from .main_agent.otel import build_otel_mw +from .main_agent.plugins import build_plugin_middlewares +from .main_agent.repair import build_repair_mw +from .main_agent.selector import build_selector_mw +from .main_agent.skills import build_skills_mw +from .shared.anthropic_cache import build_anthropic_cache_mw +from .shared.compaction import build_compaction_mw +from .shared.file_intent import build_file_intent_mw +from .shared.filesystem import build_filesystem_mw +from .shared.memory import build_memory_mw +from .shared.patch_tool_calls import build_patch_tool_calls_mw +from .shared.permissions import ( + build_full_permission_mw, + build_permission_context, +) +from .shared.resilience import build_resilience_bundle +from .shared.todos import build_todos_mw +from .subagent.extras import build_subagent_extras + + +def build_main_agent_deepagent_middleware( + *, + llm: BaseChatModel, + tools: Sequence[BaseTool], + backend_resolver: Any, + filesystem_mode: FilesystemMode, + search_space_id: int, + user_id: str | None, + thread_id: int | None, + visibility: ChatVisibility, + anon_session_id: str | None, + available_connectors: list[str] | None, + available_document_types: list[str] | None, + mentioned_document_ids: list[int] | None, + max_input_tokens: int | None, + flags: AgentFeatureFlags, + subagent_dependencies: dict[str, Any], + checkpointer: Checkpointer, + mcp_tools_by_agent: dict[str, ToolsPermissions] | None = None, + disabled_tools: list[str] | None = None, +) -> list[Any]: + """Ordered middleware for ``create_agent`` (None entries already stripped).""" + permissions = build_permission_context( + flags=flags, + filesystem_mode=filesystem_mode, + tools=tools, + available_connectors=available_connectors, + ) + resilience = build_resilience_bundle(flags) + + # Single instance threaded into both the main-agent stack and the general-purpose subagent. + memory_mw = build_memory_mw( + user_id=user_id, + search_space_id=search_space_id, + visibility=visibility, + ) + + general_purpose_subagent = build_general_purpose_subagent( + llm=llm, + tools=tools, + backend_resolver=backend_resolver, + filesystem_mode=filesystem_mode, + search_space_id=search_space_id, + user_id=user_id, + thread_id=thread_id, + permissions=permissions, + resilience=resilience, + memory_mw=memory_mw, + ) + + subagents_registry: list[SubAgent] = [] + try: + subagent_extras = build_subagent_extras( + permissions=permissions, + resilience=resilience, + ) + subagents_registry = build_subagents( + dependencies=subagent_dependencies, + model=llm, + extra_middleware=subagent_extras, + mcp_tools_by_agent=mcp_tools_by_agent or {}, + exclude=get_subagents_to_exclude(available_connectors), + disabled_tools=disabled_tools, + ) + logging.info( + "Subagents registry: %s", + [s["name"] for s in subagents_registry], + ) + except Exception: + logging.exception("Subagents registry build failed") + raise + + subagents: list[SubAgent] = [general_purpose_subagent, *subagents_registry] + + stack: list[Any] = [ + build_busy_mutex_mw(flags), + build_otel_mw(flags), + build_todos_mw(), + memory_mw, + build_anonymous_doc_mw( + filesystem_mode=filesystem_mode, anon_session_id=anon_session_id + ), + build_knowledge_tree_mw( + filesystem_mode=filesystem_mode, + search_space_id=search_space_id, + llm=llm, + ), + build_knowledge_priority_mw( + llm=llm, + search_space_id=search_space_id, + filesystem_mode=filesystem_mode, + available_connectors=available_connectors, + available_document_types=available_document_types, + mentioned_document_ids=mentioned_document_ids, + ), + build_file_intent_mw(llm), + build_filesystem_mw( + backend_resolver=backend_resolver, + filesystem_mode=filesystem_mode, + search_space_id=search_space_id, + user_id=user_id, + thread_id=thread_id, + ), + build_kb_persistence_mw( + filesystem_mode=filesystem_mode, + search_space_id=search_space_id, + user_id=user_id, + thread_id=thread_id, + ), + build_skills_mw( + flags=flags, + filesystem_mode=filesystem_mode, + search_space_id=search_space_id, + ), + SurfSenseCheckpointedSubAgentMiddleware( + checkpointer=checkpointer, + backend=StateBackend, + subagents=subagents, + ), + build_selector_mw(flags=flags, tools=tools), + resilience.model_call_limit, + resilience.tool_call_limit, + build_context_editing_mw( + flags=flags, + max_input_tokens=max_input_tokens, + tools=tools, + backend_resolver=backend_resolver, + ), + build_compaction_mw(llm), + build_noop_injection_mw(flags), + resilience.retry, + resilience.fallback, + build_repair_mw(flags=flags, tools=tools), + build_full_permission_mw(permissions.rulesets), + build_doom_loop_mw(flags), + build_action_log_mw( + flags=flags, + thread_id=thread_id, + search_space_id=search_space_id, + user_id=user_id, + ), + build_patch_tool_calls_mw(), + build_dedup_hitl_mw(tools), + *build_plugin_middlewares( + flags=flags, + search_space_id=search_space_id, + user_id=user_id, + visibility=visibility, + llm=llm, + ), + build_anthropic_cache_mw(), + ] + return [m for m in stack if m is not None] From 73272ce348e5da1dd92ae4c301a5dff8eb215fef Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 20:55:38 +0200 Subject: [PATCH 20/29] refactor(multi-agent): switch compile graph to new orchestrator and drop deepagent_stack --- .../main_agent/graph/compile_graph_sync.py | 5 +- .../main_agent/graph/middleware/__init__.py | 7 - .../graph/middleware/deepagent_stack.py | 539 ------------------ .../__init__.py | 0 .../config.py | 0 .../constants.py | 0 .../middleware.py | 0 .../propagation.py | 0 .../resume.py | 0 .../task_tool.py | 0 .../test_hitl_bridge.py | 2 +- .../test_resume_helpers.py | 2 +- 12 files changed, 5 insertions(+), 550 deletions(-) delete mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/__init__.py delete mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py rename surfsense_backend/app/agents/multi_agent_chat/{main_agent/graph/middleware => middleware/main_agent}/checkpointed_subagent_middleware/__init__.py (100%) rename surfsense_backend/app/agents/multi_agent_chat/{main_agent/graph/middleware => middleware/main_agent}/checkpointed_subagent_middleware/config.py (100%) rename surfsense_backend/app/agents/multi_agent_chat/{main_agent/graph/middleware => middleware/main_agent}/checkpointed_subagent_middleware/constants.py (100%) rename surfsense_backend/app/agents/multi_agent_chat/{main_agent/graph/middleware => middleware/main_agent}/checkpointed_subagent_middleware/middleware.py (100%) rename surfsense_backend/app/agents/multi_agent_chat/{main_agent/graph/middleware => middleware/main_agent}/checkpointed_subagent_middleware/propagation.py (100%) rename surfsense_backend/app/agents/multi_agent_chat/{main_agent/graph/middleware => middleware/main_agent}/checkpointed_subagent_middleware/resume.py (100%) rename surfsense_backend/app/agents/multi_agent_chat/{main_agent/graph/middleware => middleware/main_agent}/checkpointed_subagent_middleware/task_tool.py (100%) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/compile_graph_sync.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/compile_graph_sync.py index 7afa30a31..4ed94bf7b 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/compile_graph_sync.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/compile_graph_sync.py @@ -11,6 +11,9 @@ from langchain_core.language_models import BaseChatModel from langchain_core.tools import BaseTool from langgraph.types import Checkpointer +from app.agents.multi_agent_chat.middleware import ( + build_main_agent_deepagent_middleware, +) from app.agents.multi_agent_chat.subagents.shared.permissions import ( ToolsPermissions, ) @@ -19,8 +22,6 @@ from app.agents.new_chat.feature_flags import AgentFeatureFlags from app.agents.new_chat.filesystem_selection import FilesystemMode from app.db import ChatVisibility -from .middleware import build_main_agent_deepagent_middleware - def build_compiled_agent_graph_sync( *, diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/__init__.py deleted file mode 100644 index 757ee02f8..000000000 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Main-agent graph middleware assembly (SurfSense + LangChain + deepagents).""" - -from __future__ import annotations - -from .deepagent_stack import build_main_agent_deepagent_middleware - -__all__ = ["build_main_agent_deepagent_middleware"] diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py deleted file mode 100644 index 1d6a8763e..000000000 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py +++ /dev/null @@ -1,539 +0,0 @@ -"""Assemble the main-agent deep-agent middleware list (LangChain + SurfSense + deepagents).""" - -from __future__ import annotations - -import logging -from collections.abc import Sequence -from typing import Any - -from deepagents import SubAgent -from deepagents.backends import StateBackend -from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware -from deepagents.middleware.skills import SkillsMiddleware -from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT -from langchain.agents.middleware import ( - LLMToolSelectorMiddleware, - ModelCallLimitMiddleware, - TodoListMiddleware, - ToolCallLimitMiddleware, -) -from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware -from langchain_core.language_models import BaseChatModel -from langchain_core.tools import BaseTool -from langgraph.types import Checkpointer - -from app.agents.multi_agent_chat.subagents import ( - build_subagents, - get_subagents_to_exclude, -) -from app.agents.multi_agent_chat.subagents.shared.permissions import ( - ToolsPermissions, -) -from app.agents.new_chat.feature_flags import AgentFeatureFlags -from app.agents.new_chat.filesystem_selection import FilesystemMode -from app.agents.new_chat.middleware import ( - ActionLogMiddleware, - AnonymousDocumentMiddleware, - BusyMutexMiddleware, - ClearToolUsesEdit, - DedupHITLToolCallsMiddleware, - DoomLoopMiddleware, - FileIntentMiddleware, - KnowledgeBasePersistenceMiddleware, - KnowledgePriorityMiddleware, - KnowledgeTreeMiddleware, - MemoryInjectionMiddleware, - NoopInjectionMiddleware, - OtelSpanMiddleware, - PermissionMiddleware, - RetryAfterMiddleware, - SpillingContextEditingMiddleware, - SpillToBackendEdit, - SurfSenseFilesystemMiddleware, - ToolCallNameRepairMiddleware, - build_skills_backend_factory, - create_surfsense_compaction_middleware, - default_skills_sources, -) -from app.agents.new_chat.middleware.scoped_model_fallback import ( - ScopedModelFallbackMiddleware, -) -from app.agents.new_chat.permissions import Rule, Ruleset -from app.agents.new_chat.plugin_loader import ( - PluginContext, - load_allowed_plugin_names_from_env, - load_plugin_middlewares, -) -from app.agents.new_chat.tools.registry import BUILTIN_TOOLS -from app.db import ChatVisibility - -from ...context_prune.prune_tool_names import safe_exclude_tools -from .checkpointed_subagent_middleware import SurfSenseCheckpointedSubAgentMiddleware - - -def build_main_agent_deepagent_middleware( - *, - llm: BaseChatModel, - tools: Sequence[BaseTool], - backend_resolver: Any, - filesystem_mode: FilesystemMode, - search_space_id: int, - user_id: str | None, - thread_id: int | None, - visibility: ChatVisibility, - anon_session_id: str | None, - available_connectors: list[str] | None, - available_document_types: list[str] | None, - mentioned_document_ids: list[int] | None, - max_input_tokens: int | None, - flags: AgentFeatureFlags, - subagent_dependencies: dict[str, Any], - checkpointer: Checkpointer, - mcp_tools_by_agent: dict[str, ToolsPermissions] | None = None, - disabled_tools: list[str] | None = None, -) -> list[Any]: - """Build ordered middleware for ``create_agent`` (Nones already stripped).""" - _memory_middleware = MemoryInjectionMiddleware( - user_id=user_id, - search_space_id=search_space_id, - thread_visibility=visibility, - ) - - gp_middleware = [ - TodoListMiddleware(), - _memory_middleware, - FileIntentMiddleware(llm=llm), - SurfSenseFilesystemMiddleware( - backend=backend_resolver, - filesystem_mode=filesystem_mode, - search_space_id=search_space_id, - created_by_id=user_id, - thread_id=thread_id, - ), - create_surfsense_compaction_middleware(llm, StateBackend), - PatchToolCallsMiddleware(), - AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"), - ] - - # Build permission rulesets up front so the GP subagent can mirror ``ask`` - # rules into ``interrupt_on``: tool calls emitted from within ``task`` runs - # never reach the parent's ``PermissionMiddleware``. - is_desktop_fs = filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER - permission_enabled = flags.enable_permission and not flags.disable_new_agent_stack - permission_rulesets: list[Ruleset] = [] - if permission_enabled or is_desktop_fs: - permission_rulesets.append( - Ruleset( - rules=[Rule(permission="*", pattern="*", action="allow")], - origin="surfsense_defaults", - ) - ) - if is_desktop_fs: - permission_rulesets.append( - Ruleset( - rules=[ - Rule(permission="rm", pattern="*", action="ask"), - Rule(permission="rmdir", pattern="*", action="ask"), - Rule(permission="move_file", pattern="*", action="ask"), - Rule(permission="edit_file", pattern="*", action="ask"), - Rule(permission="write_file", pattern="*", action="ask"), - ], - origin="desktop_safety", - ) - ) - - # Tools that self-prompt via ``request_approval`` must not also appear - # as ``ask`` rules — that would double-prompt the user for one call. - _tool_names_in_use = {t.name for t in tools} - - # Deny parent-bound tools whose ``required_connector`` is missing. - # No-op today (connector subagents are pruned upstream); guards future - # additions to the parent's tool list. - if permission_enabled: - _available_set = set(available_connectors or []) - _synthesized: list[Rule] = [] - for tool_def in BUILTIN_TOOLS: - if tool_def.name not in _tool_names_in_use: - continue - rc = tool_def.required_connector - if rc and rc not in _available_set: - _synthesized.append( - Rule(permission=tool_def.name, pattern="*", action="deny") - ) - if _synthesized: - permission_rulesets.append( - Ruleset(rules=_synthesized, origin="connector_synthesized") - ) - gp_interrupt_on: dict[str, bool] = { - rule.permission: True - for rs in permission_rulesets - for rule in rs.rules - if rule.action == "ask" and rule.permission in _tool_names_in_use - } - - general_purpose_spec: SubAgent = { # type: ignore[typeddict-unknown-key] - **GENERAL_PURPOSE_SUBAGENT, - "model": llm, - "tools": tools, - "middleware": gp_middleware, - } - if gp_interrupt_on: - general_purpose_spec["interrupt_on"] = gp_interrupt_on - - # Deny-only on subagents: ``task`` runs bypass the parent's - # PermissionMiddleware, while bucket-based ask gates own the ask path. - subagent_deny_rulesets: list[Ruleset] = [ - Ruleset( - rules=[r for r in rs.rules if r.action == "deny"], - origin=rs.origin, - ) - for rs in permission_rulesets - ] - subagent_deny_rulesets = [rs for rs in subagent_deny_rulesets if rs.rules] - - subagent_deny_permission_mw: PermissionMiddleware | None = ( - PermissionMiddleware(rulesets=subagent_deny_rulesets) - if subagent_deny_rulesets - else None - ) - - if subagent_deny_permission_mw is not None: - # Run deny check on already-repaired tool calls; insert before - # PatchToolCallsMiddleware (append if the slot moves). - _patch_idx = next( - ( - i - for i, m in enumerate(gp_middleware) - if isinstance(m, PatchToolCallsMiddleware) - ), - len(gp_middleware), - ) - gp_middleware.insert(_patch_idx, subagent_deny_permission_mw) - - # Defined early so the same instances reach both gp_middleware and - # subagent_extra_middleware below. - retry_mw = ( - RetryAfterMiddleware(max_retries=3) - if flags.enable_retry_after and not flags.disable_new_agent_stack - else None - ) - fallback_mw: ScopedModelFallbackMiddleware | None = None - if flags.enable_model_fallback and not flags.disable_new_agent_stack: - try: - fallback_mw = ScopedModelFallbackMiddleware( - "openai:gpt-4o-mini", - "anthropic:claude-3-5-haiku-20241022", - ) - except Exception: - logging.warning("ScopedModelFallbackMiddleware init failed; skipping.") - fallback_mw = None - - # Per-agent caps; counts are not summed across parent + subagents. - model_call_limit_mw = ( - ModelCallLimitMiddleware( - thread_limit=120, - run_limit=80, - exit_behavior="end", - ) - if flags.enable_model_call_limit and not flags.disable_new_agent_stack - else None - ) - tool_call_limit_mw = ( - ToolCallLimitMiddleware( - thread_limit=300, run_limit=80, exit_behavior="continue" - ) - if flags.enable_tool_call_limit and not flags.disable_new_agent_stack - else None - ) - - # gp_middleware is held by reference inside general_purpose_spec, so - # mutating it here propagates into the spec. - _gp_resilience: list[Any] = [ - m - for m in (retry_mw, fallback_mw, model_call_limit_mw, tool_call_limit_mw) - if m is not None - ] - if _gp_resilience: - _cache_idx = next( - ( - i - for i, m in enumerate(gp_middleware) - if isinstance(m, AnthropicPromptCachingMiddleware) - ), - len(gp_middleware), - ) - for offset, mw in enumerate(_gp_resilience): - gp_middleware.insert(_cache_idx + offset, mw) - - registry_subagents: list[SubAgent] = [] - try: - subagent_extra_middleware: list[Any] = [ - TodoListMiddleware(), - SurfSenseFilesystemMiddleware( - backend=backend_resolver, - filesystem_mode=filesystem_mode, - search_space_id=search_space_id, - created_by_id=user_id, - thread_id=thread_id, - ), - ] - if subagent_deny_permission_mw is not None: - subagent_extra_middleware.append(subagent_deny_permission_mw) - for _resilience_mw in ( - retry_mw, - fallback_mw, - model_call_limit_mw, - tool_call_limit_mw, - ): - if _resilience_mw is not None: - subagent_extra_middleware.append(_resilience_mw) - registry_subagents = build_subagents( - dependencies=subagent_dependencies, - model=llm, - extra_middleware=subagent_extra_middleware, - mcp_tools_by_agent=mcp_tools_by_agent or {}, - exclude=get_subagents_to_exclude(available_connectors), - disabled_tools=disabled_tools, - ) - logging.info( - "Registry subagents: %s", - [s["name"] for s in registry_subagents], - ) - except Exception: - logging.exception("Registry subagent build failed") - raise - - subagent_specs: list[SubAgent] = [general_purpose_spec, *registry_subagents] - - summarization_mw = create_surfsense_compaction_middleware(llm, StateBackend) - - context_edit_mw = None - if ( - flags.enable_context_editing - and not flags.disable_new_agent_stack - and max_input_tokens - ): - spill_edit = SpillToBackendEdit( - trigger=int(max_input_tokens * 0.55), - clear_at_least=int(max_input_tokens * 0.15), - keep=5, - exclude_tools=safe_exclude_tools(tools), - clear_tool_inputs=True, - ) - clear_edit = ClearToolUsesEdit( - trigger=int(max_input_tokens * 0.55), - clear_at_least=int(max_input_tokens * 0.15), - keep=5, - exclude_tools=safe_exclude_tools(tools), - clear_tool_inputs=True, - placeholder="[cleared - older tool output trimmed for context]", - ) - context_edit_mw = SpillingContextEditingMiddleware( - edits=[spill_edit, clear_edit], - backend_resolver=backend_resolver, - ) - - noop_mw = ( - NoopInjectionMiddleware() - if flags.enable_compaction_v2 and not flags.disable_new_agent_stack - else None - ) - - repair_mw = None - if flags.enable_tool_call_repair and not flags.disable_new_agent_stack: - registered_names: set[str] = {t.name for t in tools} - registered_names |= { - "write_todos", - "ls", - "read_file", - "write_file", - "edit_file", - "glob", - "grep", - "execute", - "task", - "mkdir", - "cd", - "pwd", - "move_file", - "rm", - "rmdir", - "list_tree", - "execute_code", - } - repair_mw = ToolCallNameRepairMiddleware( - registered_tool_names=registered_names, - fuzzy_match_threshold=None, - ) - - doom_loop_mw = ( - DoomLoopMiddleware(threshold=3) - if flags.enable_doom_loop and not flags.disable_new_agent_stack - else None - ) - - permission_mw: PermissionMiddleware | None = ( - PermissionMiddleware(rulesets=permission_rulesets) - if permission_rulesets - else None - ) - - action_log_mw: ActionLogMiddleware | None = None - if ( - flags.enable_action_log - and not flags.disable_new_agent_stack - and thread_id is not None - ): - try: - tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS} - action_log_mw = ActionLogMiddleware( - thread_id=thread_id, - search_space_id=search_space_id, - user_id=user_id, - tool_definitions=tool_defs_by_name, - ) - except Exception: # pragma: no cover - defensive - logging.warning( - "ActionLogMiddleware init failed; running without it.", - exc_info=True, - ) - action_log_mw = None - - busy_mutex_mw: BusyMutexMiddleware | None = ( - BusyMutexMiddleware() - if flags.enable_busy_mutex and not flags.disable_new_agent_stack - else None - ) - - otel_mw: OtelSpanMiddleware | None = ( - OtelSpanMiddleware() - if flags.enable_otel and not flags.disable_new_agent_stack - else None - ) - - plugin_middlewares: list[Any] = [] - if flags.enable_plugin_loader and not flags.disable_new_agent_stack: - try: - allowed_names = load_allowed_plugin_names_from_env() - if allowed_names: - plugin_middlewares = load_plugin_middlewares( - PluginContext.build( - search_space_id=search_space_id, - user_id=user_id, - thread_visibility=visibility, - llm=llm, - ), - allowed_plugin_names=allowed_names, - ) - except Exception: # pragma: no cover - defensive - logging.warning( - "Plugin loader failed; continuing without plugins.", - exc_info=True, - ) - plugin_middlewares = [] - - skills_mw: SkillsMiddleware | None = None - if flags.enable_skills and not flags.disable_new_agent_stack: - try: - skills_factory = build_skills_backend_factory( - search_space_id=search_space_id - if filesystem_mode == FilesystemMode.CLOUD - else None, - ) - skills_mw = SkillsMiddleware( - backend=skills_factory, - sources=default_skills_sources(), - ) - except Exception as exc: # pragma: no cover - defensive - logging.warning("SkillsMiddleware init failed; skipping: %s", exc) - skills_mw = None - - selector_mw: LLMToolSelectorMiddleware | None = None - if ( - flags.enable_llm_tool_selector - and not flags.disable_new_agent_stack - and len(tools) > 30 - ): - try: - selector_mw = LLMToolSelectorMiddleware( - model="openai:gpt-4o-mini", - max_tools=12, - always_include=[ - name - for name in ( - "update_memory", - "get_connected_accounts", - "scrape_webpage", - ) - if name in {t.name for t in tools} - ], - ) - except Exception: - logging.warning("LLMToolSelectorMiddleware init failed; skipping.") - selector_mw = None - - deepagent_middleware = [ - busy_mutex_mw, - otel_mw, - TodoListMiddleware(), - _memory_middleware, - AnonymousDocumentMiddleware( - anon_session_id=anon_session_id, - ) - if filesystem_mode == FilesystemMode.CLOUD - else None, - KnowledgeTreeMiddleware( - search_space_id=search_space_id, - filesystem_mode=filesystem_mode, - llm=llm, - ) - if filesystem_mode == FilesystemMode.CLOUD - else None, - KnowledgePriorityMiddleware( - llm=llm, - search_space_id=search_space_id, - filesystem_mode=filesystem_mode, - available_connectors=available_connectors, - available_document_types=available_document_types, - mentioned_document_ids=mentioned_document_ids, - ), - FileIntentMiddleware(llm=llm), - SurfSenseFilesystemMiddleware( - backend=backend_resolver, - filesystem_mode=filesystem_mode, - search_space_id=search_space_id, - created_by_id=user_id, - thread_id=thread_id, - ), - KnowledgeBasePersistenceMiddleware( - search_space_id=search_space_id, - created_by_id=user_id, - filesystem_mode=filesystem_mode, - thread_id=thread_id, - ) - if filesystem_mode == FilesystemMode.CLOUD - else None, - skills_mw, - SurfSenseCheckpointedSubAgentMiddleware( - checkpointer=checkpointer, - backend=StateBackend, - subagents=subagent_specs, - ), - selector_mw, - model_call_limit_mw, - tool_call_limit_mw, - context_edit_mw, - summarization_mw, - noop_mw, - retry_mw, - fallback_mw, - repair_mw, - permission_mw, - doom_loop_mw, - action_log_mw, - PatchToolCallsMiddleware(), - DedupHITLToolCallsMiddleware(agent_tools=list(tools)), - *plugin_middlewares, - AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"), - ] - return [m for m in deepagent_middleware if m is not None] diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/__init__.py similarity index 100% rename from surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/__init__.py rename to surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/__init__.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/config.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/config.py similarity index 100% rename from surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/config.py rename to surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/config.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/constants.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/constants.py similarity index 100% rename from surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/constants.py rename to surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/constants.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/middleware.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py similarity index 100% rename from surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/middleware.py rename to surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/propagation.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/propagation.py similarity index 100% rename from surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/propagation.py rename to surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/propagation.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/resume.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/resume.py similarity index 100% rename from surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/resume.py rename to surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/resume.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/task_tool.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py similarity index 100% rename from surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/checkpointed_subagent_middleware/task_tool.py rename to surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py index ab6a644b5..dbc2c9c00 100644 --- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_hitl_bridge.py @@ -12,7 +12,7 @@ from langgraph.graph import END, START, StateGraph from langgraph.types import Command, interrupt from typing_extensions import TypedDict -from app.agents.multi_agent_chat.main_agent.graph.middleware.checkpointed_subagent_middleware.task_tool import ( +from app.agents.multi_agent_chat.middleware.main_agent.checkpointed_subagent_middleware.task_tool import ( build_task_tool_with_parent_config, ) diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py index e73fb2823..347b32dbd 100644 --- a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_resume_helpers.py @@ -4,7 +4,7 @@ from __future__ import annotations from langchain.tools import ToolRuntime -from app.agents.multi_agent_chat.main_agent.graph.middleware.checkpointed_subagent_middleware.config import ( +from app.agents.multi_agent_chat.middleware.main_agent.checkpointed_subagent_middleware.config import ( consume_surfsense_resume, has_surfsense_resume, ) From bba5fb1db830aa4a95e2011d237726f45d69e9b4 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 22:22:57 +0200 Subject: [PATCH 21/29] fix(multi-agent): fail closed when connector discovery raises --- .../multi_agent_chat/main_agent/runtime/factory.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py index 6a6fd39b7..81123d450 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py @@ -85,7 +85,18 @@ async def create_surfsense_deep_agent( ) except Exception as e: - logging.warning("Failed to discover available connectors/document types: %s", e) + logging.warning( + "Connector/doc-type discovery failed; excluding connector subagents this turn: %s", + e, + ) + + # Fail closed: a None list short-circuits ``get_subagents_to_exclude`` to "exclude + # nothing", which would silently advertise every connector specialist on a flaky + # discovery call. Empty list excludes connector-gated subagents while keeping builtins. + if available_connectors is None: + available_connectors = [] + if available_document_types is None: + available_document_types = [] _perf_log.info( "[create_agent] Connector/doc-type discovery in %.3fs", time.perf_counter() - _t0, From 744ad92971d179af234b06fec0e57fde435e4cf2 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 22:23:17 +0200 Subject: [PATCH 22/29] chore(multi-agent): demote subagent registry listing log to debug --- .../app/agents/multi_agent_chat/middleware/stack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py index 563332986..f894acc7e 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py @@ -121,7 +121,7 @@ def build_main_agent_deepagent_middleware( exclude=get_subagents_to_exclude(available_connectors), disabled_tools=disabled_tools, ) - logging.info( + logging.debug( "Subagents registry: %s", [s["name"] for s in subagents_registry], ) From c8ed70a26c5726339fe5b5451efdc6f520e1a1a8 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 22:30:08 +0200 Subject: [PATCH 23/29] fix(multi-agent): layer per-thread prompt cache key onto LLM at agent build --- .../app/agents/multi_agent_chat/main_agent/runtime/factory.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py index 81123d450..86da53a1a 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py @@ -26,6 +26,7 @@ from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags from app.agents.new_chat.filesystem_backends import build_backend_resolver from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection from app.agents.new_chat.llm_config import AgentConfig +from app.agents.new_chat.prompt_caching import apply_litellm_prompt_caching from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool from app.agents.new_chat.tools.registry import build_tools_async from app.db import ChatVisibility @@ -62,6 +63,9 @@ async def create_surfsense_deep_agent( ): """Deep agent with SurfSense tools/middleware; registry route subagents behind ``task`` when enabled.""" _t_agent_total = time.perf_counter() + + apply_litellm_prompt_caching(llm, agent_config=agent_config, thread_id=thread_id) + filesystem_selection = filesystem_selection or FilesystemSelection() backend_resolver = build_backend_resolver( filesystem_selection, From 07a84d1a41051f3a3c6068b8deb3c5cf674b7de0 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 22:50:02 +0200 Subject: [PATCH 24/29] fix(multi-agent): cache compiled agent graph keyed on per-request inputs --- .../main_agent/runtime/agent_cache.py | 117 ++++++++++++++++++ .../main_agent/runtime/factory.py | 9 +- 2 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py new file mode 100644 index 000000000..42f984b79 --- /dev/null +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py @@ -0,0 +1,117 @@ +"""Compiled agent graph caching for the multi-agent path.""" + +from __future__ import annotations + +import asyncio +from collections.abc import Sequence +from typing import Any + +from langchain_core.language_models import BaseChatModel +from langchain_core.tools import BaseTool +from langgraph.types import Checkpointer + +from app.agents.multi_agent_chat.subagents.shared.permissions import ToolsPermissions +from app.agents.new_chat.agent_cache import ( + flags_signature, + get_cache, + stable_hash, + system_prompt_hash, + tools_signature, +) +from app.agents.new_chat.feature_flags import AgentFeatureFlags +from app.agents.new_chat.filesystem_selection import FilesystemMode +from app.db import ChatVisibility + +from ..graph.compile_graph_sync import build_compiled_agent_graph_sync + + +def mcp_signature(mcp_tools_by_agent: dict[str, ToolsPermissions]) -> str: + """Hash the per-agent MCP tool surface so a change rotates the cache key.""" + rows = [] + for agent_name in sorted(mcp_tools_by_agent.keys()): + perms = mcp_tools_by_agent[agent_name] + allow_names = sorted(item.get("name", "") for item in perms.get("allow", [])) + ask_names = sorted(item.get("name", "") for item in perms.get("ask", [])) + rows.append((agent_name, allow_names, ask_names)) + return stable_hash(rows) + + +async def build_agent_with_cache( + *, + llm: BaseChatModel, + tools: Sequence[BaseTool], + final_system_prompt: str, + backend_resolver: Any, + filesystem_mode: FilesystemMode, + search_space_id: int, + user_id: str | None, + thread_id: int | None, + visibility: ChatVisibility, + anon_session_id: str | None, + available_connectors: list[str], + available_document_types: list[str], + mentioned_document_ids: list[int] | None, + max_input_tokens: int | None, + flags: AgentFeatureFlags, + checkpointer: Checkpointer, + subagent_dependencies: dict[str, Any], + mcp_tools_by_agent: dict[str, ToolsPermissions], + disabled_tools: list[str] | None, + config_id: str | None, +) -> Any: + """Compile the multi-agent graph, serving from cache when key components are stable.""" + + async def _build() -> Any: + return await asyncio.to_thread( + build_compiled_agent_graph_sync, + llm=llm, + tools=tools, + final_system_prompt=final_system_prompt, + backend_resolver=backend_resolver, + filesystem_mode=filesystem_mode, + search_space_id=search_space_id, + user_id=user_id, + thread_id=thread_id, + visibility=visibility, + anon_session_id=anon_session_id, + available_connectors=available_connectors, + available_document_types=available_document_types, + mentioned_document_ids=mentioned_document_ids, + max_input_tokens=max_input_tokens, + flags=flags, + checkpointer=checkpointer, + subagent_dependencies=subagent_dependencies, + mcp_tools_by_agent=mcp_tools_by_agent, + disabled_tools=disabled_tools, + ) + + if not (flags.enable_agent_cache and not flags.disable_new_agent_stack): + return await _build() + + # Every per-request value any middleware closes over at __init__ must be in + # the key, otherwise a hit will leak state across threads. Bump the schema + # version when the component list changes shape. + cache_key = stable_hash( + "multi-agent-v1", + config_id, + thread_id, + user_id, + search_space_id, + visibility, + filesystem_mode, + anon_session_id, + tools_signature( + tools, + available_connectors=available_connectors, + available_document_types=available_document_types, + ), + mcp_signature(mcp_tools_by_agent), + flags_signature(flags), + system_prompt_hash(final_system_prompt), + max_input_tokens, + sorted(disabled_tools) if disabled_tools else None, + ) + return await get_cache().get_or_build(cache_key, builder=_build) + + +__all__ = ["build_agent_with_cache", "mcp_signature"] diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py index 86da53a1a..9e3c8eab4 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py @@ -2,7 +2,6 @@ from __future__ import annotations -import asyncio import logging import time from collections.abc import Sequence @@ -33,12 +32,12 @@ from app.db import ChatVisibility from app.services.connector_service import ConnectorService from app.utils.perf import get_perf_logger -from ..graph.compile_graph_sync import build_compiled_agent_graph_sync from ..system_prompt import build_main_agent_system_prompt from ..tools import ( MAIN_AGENT_SURFSENSE_TOOL_NAMES, MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED, ) +from .agent_cache import build_agent_with_cache _perf_log = get_perf_logger() @@ -210,9 +209,10 @@ async def create_surfsense_deep_agent( final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT + config_id = agent_config.config_id if agent_config is not None else None + _t0 = time.perf_counter() - agent = await asyncio.to_thread( - build_compiled_agent_graph_sync, + agent = await build_agent_with_cache( llm=llm, tools=tools, final_system_prompt=final_system_prompt, @@ -232,6 +232,7 @@ async def create_surfsense_deep_agent( subagent_dependencies=dependencies, mcp_tools_by_agent=mcp_tools_by_agent, disabled_tools=disabled_tools, + config_id=config_id, ) _perf_log.info( "[create_agent] Middleware stack + graph compiled in %.3fs", From 997d86079046ed6f02bedb4c4a16adba5d1c4ed9 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 22:52:03 +0200 Subject: [PATCH 25/29] fix(multi-agent): defensive message extraction on subagent task return --- .../checkpointed_subagent_middleware/task_tool.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py index d23dc33a9..5668f8ddb 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py @@ -69,9 +69,16 @@ def build_task_tool_with_parent_config( raise ValueError(msg) state_update = {k: v for k, v in result.items() if k not in EXCLUDED_STATE_KEYS} - message_text = ( - result["messages"][-1].text.rstrip() if result["messages"][-1].text else "" - ) + messages = result["messages"] + if not messages: + msg = ( + "CompiledSubAgent returned an empty 'messages' list. " + "Subagents must produce at least one message so the parent has " + "output to forward back to the user." + ) + raise ValueError(msg) + last_text = getattr(messages[-1], "text", None) or "" + message_text = last_text.rstrip() return Command( update={ **state_update, From a3c3db2a18d270f73dbc19e6b502a6d0b01c2431 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 22:57:35 +0200 Subject: [PATCH 26/29] test(multi-agent): pin first-wins assumption on pending subagent interrupts --- .../test_pending_interrupt.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_pending_interrupt.py diff --git a/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_pending_interrupt.py b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_pending_interrupt.py new file mode 100644 index 000000000..75242689d --- /dev/null +++ b/surfsense_backend/tests/unit/agents/multi_agent_chat/middleware/checkpointed_subagent_middleware/test_pending_interrupt.py @@ -0,0 +1,55 @@ +"""Pins the first-wins assumption of ``get_first_pending_subagent_interrupt``. + +The bridge currently relies on at-most-one pending interrupt per snapshot +(sequential tool nodes). If parallel tool calls are ever enabled, the bridge +needs an id-aware lookup; these tests will need to be revisited at that point. +""" + +from __future__ import annotations + +from types import SimpleNamespace + +from app.agents.multi_agent_chat.middleware.main_agent.checkpointed_subagent_middleware.resume import ( + get_first_pending_subagent_interrupt, +) + + +class TestGetFirstPendingSubagentInterrupt: + def test_returns_first_when_multiple_top_level_interrupts_pending(self): + first = SimpleNamespace(id="i-1", value={"decision": "approve"}) + second = SimpleNamespace(id="i-2", value={"decision": "reject"}) + state = SimpleNamespace(interrupts=(first, second), tasks=()) + + assert get_first_pending_subagent_interrupt(state) == ( + "i-1", + {"decision": "approve"}, + ) + + def test_returns_first_when_multiple_subtask_interrupts_pending(self): + first = SimpleNamespace(id="i-A", value="approve") + second = SimpleNamespace(id="i-B", value="reject") + sub_task = SimpleNamespace(interrupts=(first, second)) + state = SimpleNamespace(interrupts=(), tasks=(sub_task,)) + + assert get_first_pending_subagent_interrupt(state) == ("i-A", "approve") + + def test_returns_none_when_no_interrupts(self): + state = SimpleNamespace(interrupts=(), tasks=()) + + assert get_first_pending_subagent_interrupt(state) == (None, None) + + def test_returns_none_when_state_is_none(self): + assert get_first_pending_subagent_interrupt(None) == (None, None) + + def test_skips_interrupts_with_none_value(self): + empty = SimpleNamespace(id="i-empty", value=None) + real = SimpleNamespace(id="i-real", value="approve") + state = SimpleNamespace(interrupts=(empty, real), tasks=()) + + assert get_first_pending_subagent_interrupt(state) == ("i-real", "approve") + + def test_normalizes_non_string_id_to_none(self): + interrupt = SimpleNamespace(id=12345, value="approve") + state = SimpleNamespace(interrupts=(interrupt,), tasks=()) + + assert get_first_pending_subagent_interrupt(state) == (None, "approve") From 657c31fdf47ecbd141ea1d64ab71b7e3e0727388 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 23:01:24 +0200 Subject: [PATCH 27/29] refactor(stream): rename multi-agent factory alias for clarity --- surfsense_backend/app/tasks/chat/stream_new_chat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 3ba3912eb..2ee3f075a 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -29,7 +29,7 @@ from sqlalchemy.future import select from sqlalchemy.orm import selectinload from app.agents.multi_agent_chat import ( - create_surfsense_deep_agent as create_registry_deep_agent, + create_surfsense_deep_agent as create_multi_agent_chat, ) from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent from app.agents.new_chat.checkpointer import get_checkpointer @@ -2767,7 +2767,7 @@ async def stream_new_chat( _t0 = time.perf_counter() agent_factory = ( - create_registry_deep_agent + create_multi_agent_chat if use_multi_agent else create_surfsense_deep_agent ) @@ -4130,7 +4130,7 @@ async def stream_resume_chat( _t0 = time.perf_counter() agent_factory = ( - create_registry_deep_agent + create_multi_agent_chat if _app_config.MULTI_AGENT_CHAT_ENABLED else create_surfsense_deep_agent ) From 3cb2c3056ea2df059122250f7084ff7258930603 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 23:35:23 +0200 Subject: [PATCH 28/29] fix(stream): route every agent (re)build through one helper to prevent factory drift --- .../app/agents/multi_agent_chat/__init__.py | 4 +- .../multi_agent_chat/main_agent/__init__.py | 4 +- .../main_agent/runtime/__init__.py | 4 +- .../main_agent/runtime/factory.py | 2 +- .../app/tasks/chat/stream_new_chat.py | 65 +++++++++++++++---- 5 files changed, 60 insertions(+), 19 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/__init__.py index f568dc6b2..6c7d79eb8 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/__init__.py +++ b/surfsense_backend/app/agents/multi_agent_chat/__init__.py @@ -2,6 +2,6 @@ from __future__ import annotations -from .main_agent import create_surfsense_deep_agent +from .main_agent import create_multi_agent_chat_deep_agent -__all__ = ["create_surfsense_deep_agent"] +__all__ = ["create_multi_agent_chat_deep_agent"] diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/__init__.py index b9a18fe53..f74ca0cd0 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/__init__.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/__init__.py @@ -2,6 +2,6 @@ from __future__ import annotations -from .runtime import create_surfsense_deep_agent +from .runtime import create_multi_agent_chat_deep_agent -__all__ = ["create_surfsense_deep_agent"] +__all__ = ["create_multi_agent_chat_deep_agent"] diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/__init__.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/__init__.py index 3d4ae977d..593e8da20 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/__init__.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/__init__.py @@ -2,6 +2,6 @@ from __future__ import annotations -from .factory import create_surfsense_deep_agent +from .factory import create_multi_agent_chat_deep_agent -__all__ = ["create_surfsense_deep_agent"] +__all__ = ["create_multi_agent_chat_deep_agent"] diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py index 9e3c8eab4..630455694 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py @@ -42,7 +42,7 @@ from .agent_cache import build_agent_with_cache _perf_log = get_perf_logger() -async def create_surfsense_deep_agent( +async def create_multi_agent_chat_deep_agent( llm: BaseChatModel, search_space_id: int, db_session: AsyncSession, diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 2ee3f075a..1a2f38077 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -28,9 +28,7 @@ from langchain_core.messages import HumanMessage from sqlalchemy.future import select from sqlalchemy.orm import selectinload -from app.agents.multi_agent_chat import ( - create_surfsense_deep_agent as create_multi_agent_chat, -) +from app.agents.multi_agent_chat import create_multi_agent_chat_deep_agent from app.agents.new_chat.chat_deepagent import create_surfsense_deep_agent from app.agents.new_chat.checkpointer import get_checkpointer from app.agents.new_chat.context import SurfSenseContextSchema @@ -577,6 +575,43 @@ async def _preflight_llm(llm: Any) -> None: ) +async def _build_main_agent_for_thread( + agent_factory: Any, + *, + llm: Any, + search_space_id: int, + db_session: Any, + connector_service: ConnectorService, + checkpointer: Any, + user_id: str | None, + thread_id: int | None, + agent_config: AgentConfig | None, + firecrawl_api_key: str | None, + thread_visibility: ChatVisibility | None, + filesystem_selection: FilesystemSelection | None, + disabled_tools: list[str] | None = None, + mentioned_document_ids: list[int] | None = None, +) -> Any: + """Single (re)build path so the agent factory cannot drift across + initial build, preflight repin, and mid-stream 429 recovery for one + ``thread_id``: a graph swap mid-turn would corrupt checkpointer state.""" + return await agent_factory( + llm=llm, + search_space_id=search_space_id, + db_session=db_session, + connector_service=connector_service, + checkpointer=checkpointer, + user_id=user_id, + thread_id=thread_id, + agent_config=agent_config, + firecrawl_api_key=firecrawl_api_key, + thread_visibility=thread_visibility, + filesystem_selection=filesystem_selection, + disabled_tools=disabled_tools, + mentioned_document_ids=mentioned_document_ids, + ) + + async def _settle_speculative_agent_build(task: asyncio.Task[Any]) -> None: """Wait for a discarded speculative agent build to release shared state. @@ -2767,7 +2802,7 @@ async def stream_new_chat( _t0 = time.perf_counter() agent_factory = ( - create_multi_agent_chat + create_multi_agent_chat_deep_agent if use_multi_agent else create_surfsense_deep_agent ) @@ -2776,7 +2811,8 @@ async def stream_new_chat( # if preflight reports 429 we will discard this future and rebuild # against the freshly pinned config below. agent_build_task = asyncio.create_task( - agent_factory( + _build_main_agent_for_thread( + agent_factory, llm=llm, search_space_id=search_space_id, db_session=session, @@ -2787,9 +2823,9 @@ async def stream_new_chat( agent_config=agent_config, firecrawl_api_key=firecrawl_api_key, thread_visibility=visibility, + filesystem_selection=filesystem_selection, disabled_tools=disabled_tools, mentioned_document_ids=mentioned_document_ids, - filesystem_selection=filesystem_selection, ), name="agent_build:stream_new_chat", ) @@ -3466,7 +3502,8 @@ async def stream_new_chat( title_task = None _t0 = time.perf_counter() - agent = await create_surfsense_deep_agent( + agent = await _build_main_agent_for_thread( + agent_factory, llm=llm, search_space_id=search_space_id, db_session=session, @@ -3477,9 +3514,9 @@ async def stream_new_chat( agent_config=agent_config, firecrawl_api_key=firecrawl_api_key, thread_visibility=visibility, + filesystem_selection=filesystem_selection, disabled_tools=disabled_tools, mentioned_document_ids=mentioned_document_ids, - filesystem_selection=filesystem_selection, ) _perf_log.info( "[stream_new_chat] Runtime rate-limit recovery repinned " @@ -4130,12 +4167,13 @@ async def stream_resume_chat( _t0 = time.perf_counter() agent_factory = ( - create_multi_agent_chat + create_multi_agent_chat_deep_agent if _app_config.MULTI_AGENT_CHAT_ENABLED else create_surfsense_deep_agent ) agent_build_task = asyncio.create_task( - agent_factory( + _build_main_agent_for_thread( + agent_factory, llm=llm, search_space_id=search_space_id, db_session=session, @@ -4224,7 +4262,8 @@ async def stream_resume_chat( "fallback_config_id": llm_config_id, }, ) - agent = await agent_factory( + agent = await _build_main_agent_for_thread( + agent_factory, llm=llm, search_space_id=search_space_id, db_session=session, @@ -4409,7 +4448,8 @@ async def stream_resume_chat( raise stream_exc _t0 = time.perf_counter() - agent = await create_surfsense_deep_agent( + agent = await _build_main_agent_for_thread( + agent_factory, llm=llm, search_space_id=search_space_id, db_session=session, @@ -4421,6 +4461,7 @@ async def stream_resume_chat( firecrawl_api_key=firecrawl_api_key, thread_visibility=visibility, filesystem_selection=filesystem_selection, + disabled_tools=disabled_tools, ) _perf_log.info( "[stream_resume] Runtime rate-limit recovery repinned " From a421e7d792ca7dcdd77c6c2ebdc4944042bb0646 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 23:42:11 +0200 Subject: [PATCH 29/29] fix(multi-agent): degrade to builtins-only when MCP or subagent registry build fails --- .../multi_agent_chat/main_agent/runtime/factory.py | 11 ++++++++++- .../app/agents/multi_agent_chat/middleware/stack.py | 8 ++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py index 630455694..d0354aca3 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py @@ -129,7 +129,16 @@ async def create_multi_agent_chat_deep_agent( } _t0 = time.perf_counter() - mcp_tools_by_agent = await load_mcp_tools_by_connector(db_session, search_space_id) + try: + mcp_tools_by_agent = await load_mcp_tools_by_connector(db_session, search_space_id) + except Exception as e: + # Degrade to builtins-only rather than aborting the turn: a transient + # DB or MCP-server hiccup should not deny the user a response. + logging.warning( + "MCP tool discovery failed; subagents will run without MCP tools this turn: %s", + e, + ) + mcp_tools_by_agent = {} _perf_log.info( "[create_agent] load_mcp_tools_by_connector in %.3fs (%d buckets)", time.perf_counter() - _t0, diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py index f894acc7e..6d8faa3f4 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/stack.py @@ -126,8 +126,12 @@ def build_main_agent_deepagent_middleware( [s["name"] for s in subagents_registry], ) except Exception: - logging.exception("Subagents registry build failed") - raise + # Degrade to general-purpose-only rather than aborting the turn: + # one bad subagent dep should not deny the user a response. + logging.exception( + "Subagents registry build failed; falling back to general-purpose only" + ) + subagents_registry = [] subagents: list[SubAgent] = [general_purpose_subagent, *subagents_registry]