mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
feat: updated agent harness
This commit is contained in:
parent
9ec9b64348
commit
31a372bb84
139 changed files with 12583 additions and 1111 deletions
|
|
@ -23,9 +23,16 @@ from deepagents import SubAgent, SubAgentMiddleware, __version__ as deepagents_v
|
|||
from deepagents.backends import StateBackend
|
||||
from deepagents.graph import BASE_AGENT_PROMPT
|
||||
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
|
||||
from deepagents.middleware.skills import SkillsMiddleware
|
||||
from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT
|
||||
from langchain.agents import create_agent
|
||||
from langchain.agents.middleware import TodoListMiddleware
|
||||
from langchain.agents.middleware import (
|
||||
LLMToolSelectorMiddleware,
|
||||
ModelCallLimitMiddleware,
|
||||
ModelFallbackMiddleware,
|
||||
TodoListMiddleware,
|
||||
ToolCallLimitMiddleware,
|
||||
)
|
||||
from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_core.tools import BaseTool
|
||||
|
|
@ -33,27 +40,51 @@ from langgraph.types import Checkpointer
|
|||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.new_chat.context import SurfSenseContextSchema
|
||||
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
|
||||
from app.agents.new_chat.filesystem_backends import build_backend_resolver
|
||||
from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
|
||||
from app.agents.new_chat.llm_config import AgentConfig
|
||||
from app.agents.new_chat.middleware import (
|
||||
ActionLogMiddleware,
|
||||
AnonymousDocumentMiddleware,
|
||||
BusyMutexMiddleware,
|
||||
ClearToolUsesEdit,
|
||||
DedupHITLToolCallsMiddleware,
|
||||
DoomLoopMiddleware,
|
||||
FileIntentMiddleware,
|
||||
KnowledgeBasePersistenceMiddleware,
|
||||
KnowledgePriorityMiddleware,
|
||||
KnowledgeTreeMiddleware,
|
||||
MemoryInjectionMiddleware,
|
||||
NoopInjectionMiddleware,
|
||||
OtelSpanMiddleware,
|
||||
PermissionMiddleware,
|
||||
RetryAfterMiddleware,
|
||||
SpillingContextEditingMiddleware,
|
||||
SpillToBackendEdit,
|
||||
SurfSenseFilesystemMiddleware,
|
||||
ToolCallNameRepairMiddleware,
|
||||
build_skills_backend_factory,
|
||||
create_surfsense_compaction_middleware,
|
||||
default_skills_sources,
|
||||
)
|
||||
from app.agents.new_chat.middleware.safe_summarization import (
|
||||
create_safe_summarization_middleware,
|
||||
from app.agents.new_chat.permissions import Rule, Ruleset
|
||||
from app.agents.new_chat.plugin_loader import (
|
||||
PluginContext,
|
||||
load_allowed_plugin_names_from_env,
|
||||
load_plugin_middlewares,
|
||||
)
|
||||
from app.agents.new_chat.subagents import build_specialized_subagents
|
||||
from app.agents.new_chat.system_prompt import (
|
||||
build_configurable_system_prompt,
|
||||
build_surfsense_system_prompt,
|
||||
)
|
||||
from app.agents.new_chat.tools.invalid_tool import (
|
||||
INVALID_TOOL_NAME,
|
||||
invalid_tool,
|
||||
)
|
||||
from app.agents.new_chat.tools.registry import (
|
||||
BUILTIN_TOOLS,
|
||||
build_tools_async,
|
||||
get_connector_gated_tools,
|
||||
)
|
||||
|
|
@ -321,6 +352,17 @@ async def create_surfsense_deep_agent(
|
|||
disabled_tools=modified_disabled_tools,
|
||||
additional_tools=list(additional_tools) if additional_tools else None,
|
||||
)
|
||||
|
||||
# Tier 1.6: register `invalid` tool. It is dispatched only when
|
||||
# ToolCallNameRepairMiddleware rewrites a malformed call. We
|
||||
# intentionally append it AFTER ``build_tools_async`` so it never
|
||||
# appears in the system-prompt tool list (which is built from the
|
||||
# registry, not the bound tool list).
|
||||
_flags: AgentFeatureFlags = get_flags()
|
||||
if _flags.enable_tool_call_repair and INVALID_TOOL_NAME not in {
|
||||
t.name for t in tools
|
||||
}:
|
||||
tools = [*list(tools), invalid_tool]
|
||||
_perf_log.info(
|
||||
"[create_agent] build_tools_async in %.3fs (%d tools)",
|
||||
time.perf_counter() - _t0,
|
||||
|
|
@ -397,6 +439,8 @@ async def create_surfsense_deep_agent(
|
|||
available_connectors=available_connectors,
|
||||
available_document_types=available_document_types,
|
||||
mentioned_document_ids=mentioned_document_ids,
|
||||
max_input_tokens=_max_input_tokens,
|
||||
flags=_flags,
|
||||
checkpointer=checkpointer,
|
||||
)
|
||||
_perf_log.info(
|
||||
|
|
@ -411,6 +455,71 @@ async def create_surfsense_deep_agent(
|
|||
return agent
|
||||
|
||||
|
||||
# Tier 1.1: tools whose output is too costly / lossy to discard. Keep
|
||||
# this conservative — anything listed here is *never* pruned by
|
||||
# ContextEditingMiddleware. The list is filtered against actually-bound
|
||||
# tool names so disabled connectors don't show up here.
|
||||
_PRUNE_PROTECTED_TOOL_NAMES: frozenset[str] = frozenset(
|
||||
{
|
||||
"generate_report",
|
||||
"generate_resume",
|
||||
"generate_podcast",
|
||||
"generate_video_presentation",
|
||||
"generate_image",
|
||||
# Read-heavy connector reads — recomputing them is expensive
|
||||
"read_email",
|
||||
"search_emails",
|
||||
# The fallback for malformed tool calls — keep its replies visible
|
||||
"invalid",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _safe_exclude_tools(tools: Sequence[BaseTool]) -> tuple[str, ...]:
|
||||
"""Return ``exclude_tools`` derived from the actually-bound tool list.
|
||||
|
||||
Filters :data:`_PRUNE_PROTECTED_TOOL_NAMES` against the bound tools
|
||||
so we never list tools that don't exist (would be a silent no-op).
|
||||
"""
|
||||
enabled = {t.name for t in tools}
|
||||
return tuple(name for name in _PRUNE_PROTECTED_TOOL_NAMES if name in enabled)
|
||||
|
||||
|
||||
# Tier 2.1 / cleanup: opencode `Permission.disabled` parity. Replaces the
|
||||
# legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE``-based gating with a
|
||||
# declarative pass over :data:`BUILTIN_TOOLS`. Each tool that declares a
|
||||
# ``required_connector`` not present in ``available_connectors`` gets a
|
||||
# deny rule so any execution attempt short-circuits with permission_denied.
|
||||
def _synthesize_connector_deny_rules(
|
||||
*,
|
||||
available_connectors: list[str] | None,
|
||||
enabled_tool_names: set[str],
|
||||
) -> list[Rule]:
|
||||
"""Build deny rules for tools whose required connector is not enabled.
|
||||
|
||||
Source of truth is ``ToolDefinition.required_connector`` in
|
||||
:data:`BUILTIN_TOOLS`. A tool only gets a deny rule when:
|
||||
|
||||
1. It is currently bound (``enabled_tool_names``).
|
||||
2. It declares a ``required_connector``.
|
||||
3. That connector is *not* in ``available_connectors``.
|
||||
|
||||
This expresses the OpenCode ``Permission.disabled`` semantics
|
||||
declaratively, replacing the substring-heuristic binary gating
|
||||
that used to consult the hardcoded ``_CONNECTOR_TYPE_TO_SEARCHABLE``
|
||||
map.
|
||||
"""
|
||||
available = set(available_connectors or [])
|
||||
deny: list[Rule] = []
|
||||
for tool_def in BUILTIN_TOOLS:
|
||||
if tool_def.name not in enabled_tool_names:
|
||||
continue
|
||||
rc = tool_def.required_connector
|
||||
if rc and rc not in available:
|
||||
deny.append(Rule(permission=tool_def.name, pattern="*", action="deny"))
|
||||
return deny
|
||||
|
||||
|
||||
def _build_compiled_agent_blocking(
|
||||
*,
|
||||
llm: BaseChatModel,
|
||||
|
|
@ -426,6 +535,8 @@ def _build_compiled_agent_blocking(
|
|||
available_connectors: list[str] | None,
|
||||
available_document_types: list[str] | None,
|
||||
mentioned_document_ids: list[int] | None,
|
||||
max_input_tokens: int | None,
|
||||
flags: AgentFeatureFlags,
|
||||
checkpointer: Checkpointer,
|
||||
):
|
||||
"""Build the middleware stack and compile the agent graph synchronously.
|
||||
|
|
@ -458,7 +569,7 @@ def _build_compiled_agent_blocking(
|
|||
created_by_id=user_id,
|
||||
thread_id=thread_id,
|
||||
),
|
||||
create_safe_summarization_middleware(llm, StateBackend),
|
||||
create_surfsense_compaction_middleware(llm, StateBackend),
|
||||
PatchToolCallsMiddleware(),
|
||||
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
|
||||
]
|
||||
|
|
@ -470,13 +581,319 @@ def _build_compiled_agent_blocking(
|
|||
"middleware": gp_middleware,
|
||||
}
|
||||
|
||||
# Tier 4.3: specialized user-facing subagents (explore, report_writer,
|
||||
# connector_negotiator). Registered through SubAgentMiddleware alongside
|
||||
# the general-purpose spec so the parent's `task` tool can address them
|
||||
# by name. Off by default until the flag flips so existing deployments
|
||||
# don't see new agent types in the task tool description.
|
||||
specialized_subagents: list[SubAgent] = []
|
||||
if (
|
||||
flags.enable_specialized_subagents
|
||||
and not flags.disable_new_agent_stack
|
||||
):
|
||||
try:
|
||||
# Specialized subagents share the parent's filesystem +
|
||||
# todo view so their system prompts (which promise
|
||||
# ``read_file``, ``ls``, ``grep``, ``glob``, ``write_todos``)
|
||||
# actually match runtime behavior. Build *fresh* instances
|
||||
# rather than aliasing the parent's GP middleware to avoid
|
||||
# subtle state coupling across compiled graphs.
|
||||
subagent_extra_middleware: list = [
|
||||
TodoListMiddleware(),
|
||||
SurfSenseFilesystemMiddleware(
|
||||
backend=backend_resolver,
|
||||
filesystem_mode=filesystem_mode,
|
||||
search_space_id=search_space_id,
|
||||
created_by_id=user_id,
|
||||
thread_id=thread_id,
|
||||
),
|
||||
]
|
||||
specialized_subagents = build_specialized_subagents(
|
||||
tools=tools,
|
||||
model=llm,
|
||||
extra_middleware=subagent_extra_middleware,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logging.warning(
|
||||
"Specialized subagent build failed; running without them: %s",
|
||||
exc,
|
||||
)
|
||||
specialized_subagents = []
|
||||
|
||||
subagent_specs: list[SubAgent] = [general_purpose_spec, *specialized_subagents]
|
||||
|
||||
# Main agent middleware
|
||||
# Order: AnonDoc -> Tree -> Priority -> FileIntent -> Filesystem -> Persistence -> ...
|
||||
# before_agent hooks run in declared order; later injections sit closer to
|
||||
# the latest human turn. Tree (large + cacheable) is injected earliest so
|
||||
# provider-side prefix caching has more material to hit; FileIntent (most
|
||||
# actionable per-turn contract) is injected closest to the user message.
|
||||
#
|
||||
# ``wrap_model_call`` ordering: the FIRST middleware in the list is the
|
||||
# OUTERMOST wrapper. To ensure prune executes before summarization,
|
||||
# place ``SpillingContextEditingMiddleware`` before
|
||||
# ``SurfSenseCompactionMiddleware`` (Tier 1.1 + 1.3).
|
||||
# Compaction is the canonical token-budget defense after the
|
||||
# cleanup tier removed ``SafeSummarizationMiddleware``. The Bedrock
|
||||
# buffer-empty defense is folded into ``SurfSenseCompactionMiddleware``.
|
||||
summarization_mw = create_surfsense_compaction_middleware(llm, StateBackend)
|
||||
_ = flags.enable_compaction_v2 # historical flag; retained for telemetry parity
|
||||
|
||||
# Tier 1.1: ContextEditing prune. Trigger at 55% of model_max_input,
|
||||
# earlier than summarization (~85%). When disabled, no edit runs.
|
||||
context_edit_mw = None
|
||||
if (
|
||||
flags.enable_context_editing
|
||||
and not flags.disable_new_agent_stack
|
||||
and max_input_tokens
|
||||
):
|
||||
spill_edit = SpillToBackendEdit(
|
||||
trigger=int(max_input_tokens * 0.55),
|
||||
clear_at_least=int(max_input_tokens * 0.15),
|
||||
keep=5,
|
||||
exclude_tools=_safe_exclude_tools(tools),
|
||||
clear_tool_inputs=True,
|
||||
)
|
||||
clear_edit = ClearToolUsesEdit(
|
||||
trigger=int(max_input_tokens * 0.55),
|
||||
clear_at_least=int(max_input_tokens * 0.15),
|
||||
keep=5,
|
||||
exclude_tools=_safe_exclude_tools(tools),
|
||||
clear_tool_inputs=True,
|
||||
placeholder="[cleared - older tool output trimmed for context]",
|
||||
)
|
||||
context_edit_mw = SpillingContextEditingMiddleware(
|
||||
edits=[spill_edit, clear_edit],
|
||||
backend_resolver=backend_resolver,
|
||||
)
|
||||
|
||||
# Tier 1.4 / 1.8 / 1.9 / 1.10: built-in retry/fallback/limits.
|
||||
retry_mw = (
|
||||
RetryAfterMiddleware(max_retries=3)
|
||||
if flags.enable_retry_after and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
# Fallback chain — primary is the agent's own model; we add cheap
|
||||
# alternatives. Off by default; only the first call site that
|
||||
# configures the chain via env should enable it.
|
||||
fallback_mw: ModelFallbackMiddleware | None = None
|
||||
if flags.enable_model_fallback and not flags.disable_new_agent_stack:
|
||||
try:
|
||||
fallback_mw = ModelFallbackMiddleware(
|
||||
"openai:gpt-4o-mini",
|
||||
"anthropic:claude-3-5-haiku-20241022",
|
||||
)
|
||||
except Exception:
|
||||
logging.warning("ModelFallbackMiddleware init failed; skipping.")
|
||||
fallback_mw = None
|
||||
model_call_limit_mw = (
|
||||
ModelCallLimitMiddleware(
|
||||
thread_limit=120,
|
||||
run_limit=80,
|
||||
exit_behavior="end",
|
||||
)
|
||||
if flags.enable_model_call_limit and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
tool_call_limit_mw = (
|
||||
ToolCallLimitMiddleware(thread_limit=300, run_limit=80, exit_behavior="continue")
|
||||
if flags.enable_tool_call_limit and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
|
||||
# Tier 1.5: provider-compat _noop injection.
|
||||
noop_mw = (
|
||||
NoopInjectionMiddleware()
|
||||
if flags.enable_compaction_v2 and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
|
||||
# Tier 1.7: tool-call name repair (lowercase + invalid fallback).
|
||||
#
|
||||
# ``registered_tool_names`` MUST cover every tool the model can legitimately
|
||||
# call. That includes the bound ``tools`` list AND every tool provided by
|
||||
# middleware in the stack — ``FilesystemMiddleware`` (read_file, ls, grep,
|
||||
# glob, edit_file, write_file, execute), ``TodoListMiddleware``
|
||||
# (write_todos), ``SubAgentMiddleware`` (task), ``SkillsMiddleware`` (skill
|
||||
# loaders), etc. If we only inspect ``tools`` here, every call to
|
||||
# ``read_file`` / ``ls`` / ``grep`` from the model will be rewritten to
|
||||
# ``invalid`` because the repair middleware doesn't recognize them. The
|
||||
# built-in deepagents middleware aren't in scope yet at this point of the
|
||||
# function but they're added unconditionally below, so we hard-code their
|
||||
# canonical names alongside the dynamic ``tools`` set.
|
||||
repair_mw = None
|
||||
if flags.enable_tool_call_repair and not flags.disable_new_agent_stack:
|
||||
registered_names: set[str] = {t.name for t in tools}
|
||||
# Tools owned by the standard deepagents middleware stack.
|
||||
registered_names |= {
|
||||
"write_todos",
|
||||
"ls",
|
||||
"read_file",
|
||||
"write_file",
|
||||
"edit_file",
|
||||
"glob",
|
||||
"grep",
|
||||
"execute",
|
||||
"task",
|
||||
}
|
||||
repair_mw = ToolCallNameRepairMiddleware(
|
||||
registered_tool_names=registered_names,
|
||||
fuzzy_match_threshold=None, # opencode parity: no fuzzy step
|
||||
)
|
||||
|
||||
# Tier 1.11: doom-loop detector. Off by default until UI handles.
|
||||
doom_loop_mw = (
|
||||
DoomLoopMiddleware(threshold=3)
|
||||
if flags.enable_doom_loop and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
|
||||
# Tier 2.1: PermissionMiddleware. Layers, earliest -> latest (last
|
||||
# match wins per opencode):
|
||||
#
|
||||
# 1. ``surfsense_defaults`` — single ``allow */*`` rule. SurfSense
|
||||
# already runs per-tool HITL (see ``tools/hitl.py``) for mutating
|
||||
# connector tools, so we only want PermissionMiddleware to *deny*
|
||||
# things the user has gated off; the default fallback in
|
||||
# ``permissions.evaluate`` is ``ask``, which would double-prompt
|
||||
# on every safe read-only call (``ls``, ``read_file``, ``grep``,
|
||||
# ``glob``, ``web_search`` …) and, on resume, replay the previous
|
||||
# reject decision into innocent calls.
|
||||
# 2. ``connector_synthesized`` — deny rules for tools whose required
|
||||
# connector is not connected to this space. Overrides #1.
|
||||
# 3. (future) user-defined rules from ``agent_permission_rules`` table
|
||||
# via the Agent Permissions UI. Loaded last so they override both.
|
||||
permission_mw: PermissionMiddleware | None = None
|
||||
if flags.enable_permission and not flags.disable_new_agent_stack:
|
||||
synthesized = _synthesize_connector_deny_rules(
|
||||
available_connectors=available_connectors,
|
||||
enabled_tool_names={t.name for t in tools},
|
||||
)
|
||||
permission_mw = PermissionMiddleware(
|
||||
rulesets=[
|
||||
Ruleset(
|
||||
rules=[Rule(permission="*", pattern="*", action="allow")],
|
||||
origin="surfsense_defaults",
|
||||
),
|
||||
Ruleset(rules=synthesized, origin="connector_synthesized"),
|
||||
],
|
||||
)
|
||||
|
||||
# Tier 5.2: ActionLogMiddleware. Off by default until the
|
||||
# ``agent_action_log`` table is migrated. When enabled, persists one
|
||||
# row per tool call with optional reverse_descriptor for
|
||||
# /api/threads/{thread_id}/revert/{action_id}. Sits inside permission
|
||||
# so denied calls aren't logged as completions.
|
||||
action_log_mw: ActionLogMiddleware | None = None
|
||||
if (
|
||||
flags.enable_action_log
|
||||
and not flags.disable_new_agent_stack
|
||||
and thread_id is not None
|
||||
):
|
||||
try:
|
||||
tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS}
|
||||
action_log_mw = ActionLogMiddleware(
|
||||
thread_id=thread_id,
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
tool_definitions=tool_defs_by_name,
|
||||
)
|
||||
except Exception: # pragma: no cover - defensive
|
||||
logging.warning(
|
||||
"ActionLogMiddleware init failed; running without it.",
|
||||
exc_info=True,
|
||||
)
|
||||
action_log_mw = None
|
||||
|
||||
# Tier 2.2: per-thread busy mutex.
|
||||
busy_mutex_mw: BusyMutexMiddleware | None = (
|
||||
BusyMutexMiddleware()
|
||||
if flags.enable_busy_mutex and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
|
||||
# Tier 3b: OpenTelemetry spans (model.call + tool.call). Lives just
|
||||
# inside BusyMutex so it spans every retry/fallback attempt of the
|
||||
# current turn but never wraps a queued/blocked turn.
|
||||
otel_mw: OtelSpanMiddleware | None = (
|
||||
OtelSpanMiddleware()
|
||||
if flags.enable_otel and not flags.disable_new_agent_stack
|
||||
else None
|
||||
)
|
||||
|
||||
# Tier 6: plugin entry-point loader. Off by default; opt-in via the
|
||||
# ``SURFSENSE_ENABLE_PLUGIN_LOADER`` flag. The allowlist is read from
|
||||
# the ``SURFSENSE_ALLOWED_PLUGINS`` env var (comma-separated). A future
|
||||
# PR can wire it through ``global_llm_config.yaml``.
|
||||
plugin_middlewares: list[Any] = []
|
||||
if flags.enable_plugin_loader and not flags.disable_new_agent_stack:
|
||||
try:
|
||||
allowed_names = load_allowed_plugin_names_from_env()
|
||||
if allowed_names:
|
||||
plugin_middlewares = load_plugin_middlewares(
|
||||
PluginContext.build(
|
||||
search_space_id=search_space_id,
|
||||
user_id=user_id,
|
||||
thread_visibility=visibility,
|
||||
llm=llm,
|
||||
),
|
||||
allowed_plugin_names=allowed_names,
|
||||
)
|
||||
except Exception: # pragma: no cover - defensive
|
||||
logging.warning(
|
||||
"Plugin loader failed; continuing without plugins.",
|
||||
exc_info=True,
|
||||
)
|
||||
plugin_middlewares = []
|
||||
|
||||
# Tier 4.1: SkillsMiddleware. Loads built-in + space-authored skills
|
||||
# via a CompositeBackend. Sources are layered: built-in first, space
|
||||
# last, so a search-space-authored skill of the same name overrides
|
||||
# the bundled one.
|
||||
skills_mw: SkillsMiddleware | None = None
|
||||
if flags.enable_skills and not flags.disable_new_agent_stack:
|
||||
try:
|
||||
skills_factory = build_skills_backend_factory(
|
||||
search_space_id=search_space_id
|
||||
if filesystem_mode == FilesystemMode.CLOUD
|
||||
else None,
|
||||
)
|
||||
skills_mw = SkillsMiddleware(
|
||||
backend=skills_factory,
|
||||
sources=default_skills_sources(),
|
||||
)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logging.warning("SkillsMiddleware init failed; skipping: %s", exc)
|
||||
skills_mw = None
|
||||
|
||||
# Tier 2.5: LLM-driven tool selection for >30 tools.
|
||||
selector_mw: LLMToolSelectorMiddleware | None = None
|
||||
if (
|
||||
flags.enable_llm_tool_selector
|
||||
and not flags.disable_new_agent_stack
|
||||
and len(tools) > 30
|
||||
):
|
||||
try:
|
||||
selector_mw = LLMToolSelectorMiddleware(
|
||||
model="openai:gpt-4o-mini",
|
||||
max_tools=12,
|
||||
always_include=[
|
||||
name
|
||||
for name in ("update_memory", "get_connected_accounts", "scrape_webpage")
|
||||
if name in {t.name for t in tools}
|
||||
],
|
||||
)
|
||||
except Exception:
|
||||
logging.warning("LLMToolSelectorMiddleware init failed; skipping.")
|
||||
selector_mw = None
|
||||
|
||||
deepagent_middleware = [
|
||||
# BusyMutex is OUTERMOST: it must wrap the entire stream so no
|
||||
# other turn can sneak in while this one is mid-flight.
|
||||
busy_mutex_mw,
|
||||
# OTel spans sit just inside BusyMutex so each retry attempt
|
||||
# gets its own model.call / tool.call span.
|
||||
otel_mw,
|
||||
TodoListMiddleware(),
|
||||
_memory_middleware,
|
||||
AnonymousDocumentMiddleware(
|
||||
|
|
@ -514,10 +931,40 @@ def _build_compiled_agent_blocking(
|
|||
)
|
||||
if filesystem_mode == FilesystemMode.CLOUD
|
||||
else None,
|
||||
SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]),
|
||||
create_safe_summarization_middleware(llm, StateBackend),
|
||||
# Tier 4.1: skill loader. Placed before SubAgentMiddleware so
|
||||
# subagents inherit the same skill metadata (subagent specs reference
|
||||
# the same source paths via `default_skills_sources()`).
|
||||
skills_mw,
|
||||
SubAgentMiddleware(backend=StateBackend, subagents=subagent_specs),
|
||||
# Tier 2.5: tool selection (only when >30 tools and flag on).
|
||||
selector_mw,
|
||||
# Defensive caps, then prune, then summarize.
|
||||
model_call_limit_mw,
|
||||
tool_call_limit_mw,
|
||||
context_edit_mw,
|
||||
summarization_mw,
|
||||
# Provider compatibility + retry chain — placed after prune/compact
|
||||
# so retries happen on the already-trimmed payload.
|
||||
noop_mw,
|
||||
retry_mw,
|
||||
fallback_mw,
|
||||
# Tool-call repair must run after model emits but before
|
||||
# permission / dedup / doom-loop interpret the calls.
|
||||
repair_mw,
|
||||
# Tier 2.1: deny/ask BEFORE the calls are forwarded to tool nodes.
|
||||
permission_mw,
|
||||
doom_loop_mw,
|
||||
# Tier 5.2: action log sits inside permission so denied calls
|
||||
# don't appear as completions, and outside dedup so each unique
|
||||
# tool invocation gets its own row.
|
||||
action_log_mw,
|
||||
PatchToolCallsMiddleware(),
|
||||
DedupHITLToolCallsMiddleware(agent_tools=list(tools)),
|
||||
# Tier 6: plugin slot — sits just before AnthropicCache so plugin-side
|
||||
# transforms see the final tool result and run before any caching
|
||||
# heuristics. Multiple plugins in declared order; loader filtered by
|
||||
# the admin allowlist already.
|
||||
*plugin_middlewares,
|
||||
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
|
||||
]
|
||||
deepagent_middleware = [m for m in deepagent_middleware if m is not None]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue