From 309c69553123483a2f1a23b6c6cb544f38699203 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Tue, 5 May 2026 18:16:00 +0200 Subject: [PATCH] feat(multi-agent): cap subagent model and tool call counts --- .../graph/middleware/deepagent_stack.py | 59 +++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py index af7fceffa..b76b54c27 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/middleware/deepagent_stack.py @@ -230,10 +230,34 @@ def build_main_agent_deepagent_middleware( logging.warning("ScopedModelFallbackMiddleware init failed; skipping.") fallback_mw = None - # Mirror the parent's ordering: retry / fallback wrap caching, which wraps - # the model. ``gp_middleware`` is held by reference inside + # Cost / loop ceiling shared with subagents. ``state_schema`` of these + # middlewares is per-agent; counts are not summed across parent + sub — + # the cap acts as a safety net per agent, not a global budget. + model_call_limit_mw = ( + ModelCallLimitMiddleware( + thread_limit=120, + run_limit=80, + exit_behavior="end", + ) + if flags.enable_model_call_limit and not flags.disable_new_agent_stack + else None + ) + tool_call_limit_mw = ( + ToolCallLimitMiddleware( + thread_limit=300, run_limit=80, exit_behavior="continue" + ) + if flags.enable_tool_call_limit and not flags.disable_new_agent_stack + else None + ) + + # Mirror the parent's ordering: retry / fallback / limits wrap caching, + # which wraps the model. ``gp_middleware`` is held by reference inside # ``general_purpose_spec`` so this insertion propagates into the spec. - _gp_resilience: list[Any] = [m for m in (retry_mw, fallback_mw) if m is not None] + _gp_resilience: list[Any] = [ + m + for m in (retry_mw, fallback_mw, model_call_limit_mw, tool_call_limit_mw) + if m is not None + ] if _gp_resilience: _cache_idx = next( ( @@ -260,10 +284,14 @@ def build_main_agent_deepagent_middleware( ] if subagent_deny_permission_mw is not None: subagent_extra_middleware.append(subagent_deny_permission_mw) - if retry_mw is not None: - subagent_extra_middleware.append(retry_mw) - if fallback_mw is not None: - subagent_extra_middleware.append(fallback_mw) + for _resilience_mw in ( + retry_mw, + fallback_mw, + model_call_limit_mw, + tool_call_limit_mw, + ): + if _resilience_mw is not None: + subagent_extra_middleware.append(_resilience_mw) registry_subagents = build_subagents( dependencies=subagent_dependencies, model=llm, @@ -310,23 +338,6 @@ def build_main_agent_deepagent_middleware( backend_resolver=backend_resolver, ) - model_call_limit_mw = ( - ModelCallLimitMiddleware( - thread_limit=120, - run_limit=80, - exit_behavior="end", - ) - if flags.enable_model_call_limit and not flags.disable_new_agent_stack - else None - ) - tool_call_limit_mw = ( - ToolCallLimitMiddleware( - thread_limit=300, run_limit=80, exit_behavior="continue" - ) - if flags.enable_tool_call_limit and not flags.disable_new_agent_stack - else None - ) - noop_mw = ( NoopInjectionMiddleware() if flags.enable_compaction_v2 and not flags.disable_new_agent_stack