feat(multi-agent): cap subagent model and tool call counts

This commit is contained in:
CREDO23 2026-05-05 18:16:00 +02:00
parent b394dc71c7
commit 309c695531

View file

@ -230,10 +230,34 @@ def build_main_agent_deepagent_middleware(
logging.warning("ScopedModelFallbackMiddleware init failed; skipping.")
fallback_mw = None
# Mirror the parent's ordering: retry / fallback wrap caching, which wraps
# the model. ``gp_middleware`` is held by reference inside
# Cost / loop ceiling shared with subagents. ``state_schema`` of these
# middlewares is per-agent; counts are not summed across parent + sub —
# the cap acts as a safety net per agent, not a global budget.
model_call_limit_mw = (
ModelCallLimitMiddleware(
thread_limit=120,
run_limit=80,
exit_behavior="end",
)
if flags.enable_model_call_limit and not flags.disable_new_agent_stack
else None
)
tool_call_limit_mw = (
ToolCallLimitMiddleware(
thread_limit=300, run_limit=80, exit_behavior="continue"
)
if flags.enable_tool_call_limit and not flags.disable_new_agent_stack
else None
)
# Mirror the parent's ordering: retry / fallback / limits wrap caching,
# which wraps the model. ``gp_middleware`` is held by reference inside
# ``general_purpose_spec`` so this insertion propagates into the spec.
_gp_resilience: list[Any] = [m for m in (retry_mw, fallback_mw) if m is not None]
_gp_resilience: list[Any] = [
m
for m in (retry_mw, fallback_mw, model_call_limit_mw, tool_call_limit_mw)
if m is not None
]
if _gp_resilience:
_cache_idx = next(
(
@ -260,10 +284,14 @@ def build_main_agent_deepagent_middleware(
]
if subagent_deny_permission_mw is not None:
subagent_extra_middleware.append(subagent_deny_permission_mw)
if retry_mw is not None:
subagent_extra_middleware.append(retry_mw)
if fallback_mw is not None:
subagent_extra_middleware.append(fallback_mw)
for _resilience_mw in (
retry_mw,
fallback_mw,
model_call_limit_mw,
tool_call_limit_mw,
):
if _resilience_mw is not None:
subagent_extra_middleware.append(_resilience_mw)
registry_subagents = build_subagents(
dependencies=subagent_dependencies,
model=llm,
@ -310,23 +338,6 @@ def build_main_agent_deepagent_middleware(
backend_resolver=backend_resolver,
)
model_call_limit_mw = (
ModelCallLimitMiddleware(
thread_limit=120,
run_limit=80,
exit_behavior="end",
)
if flags.enable_model_call_limit and not flags.disable_new_agent_stack
else None
)
tool_call_limit_mw = (
ToolCallLimitMiddleware(
thread_limit=300, run_limit=80, exit_behavior="continue"
)
if flags.enable_tool_call_limit and not flags.disable_new_agent_stack
else None
)
noop_mw = (
NoopInjectionMiddleware()
if flags.enable_compaction_v2 and not flags.disable_new_agent_stack