""" SurfSense deep agent implementation. This module provides the factory function for creating SurfSense deep agents with configurable tools via the tools registry and configurable prompts via NewLLMConfig. We use ``create_agent`` (from langchain) rather than ``create_deep_agent`` (from deepagents) so that the middleware stack is fully under our control. This lets us swap in ``SurfSenseFilesystemMiddleware`` — a customisable subclass of the default ``FilesystemMiddleware`` — while preserving every other behaviour that ``create_deep_agent`` provides (todo-list, subagents, summarisation, prompt-caching, etc.). """ import asyncio import logging import time from collections.abc import Sequence from typing import Any from deepagents import SubAgent, SubAgentMiddleware, __version__ as deepagents_version from deepagents.backends import StateBackend from deepagents.graph import BASE_AGENT_PROMPT from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware from deepagents.middleware.skills import SkillsMiddleware from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT from langchain.agents import create_agent from langchain.agents.middleware import ( LLMToolSelectorMiddleware, ModelCallLimitMiddleware, ModelFallbackMiddleware, TodoListMiddleware, ToolCallLimitMiddleware, ) from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware from langchain_core.language_models import BaseChatModel from langchain_core.tools import BaseTool from langgraph.types import Checkpointer from sqlalchemy.ext.asyncio import AsyncSession from app.agents.new_chat.context import SurfSenseContextSchema from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags from app.agents.new_chat.filesystem_backends import build_backend_resolver from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection from app.agents.new_chat.llm_config import AgentConfig from app.agents.new_chat.middleware import ( ActionLogMiddleware, AnonymousDocumentMiddleware, BusyMutexMiddleware, ClearToolUsesEdit, DedupHITLToolCallsMiddleware, DoomLoopMiddleware, FileIntentMiddleware, KnowledgeBasePersistenceMiddleware, KnowledgePriorityMiddleware, KnowledgeTreeMiddleware, MemoryInjectionMiddleware, NoopInjectionMiddleware, OtelSpanMiddleware, PermissionMiddleware, RetryAfterMiddleware, SpillingContextEditingMiddleware, SpillToBackendEdit, SurfSenseFilesystemMiddleware, ToolCallNameRepairMiddleware, build_skills_backend_factory, create_surfsense_compaction_middleware, default_skills_sources, ) from app.agents.new_chat.permissions import Rule, Ruleset from app.agents.new_chat.plugin_loader import ( PluginContext, load_allowed_plugin_names_from_env, load_plugin_middlewares, ) from app.agents.new_chat.subagents import build_specialized_subagents from app.agents.new_chat.system_prompt import ( build_configurable_system_prompt, build_surfsense_system_prompt, ) from app.agents.new_chat.tools.invalid_tool import ( INVALID_TOOL_NAME, invalid_tool, ) from app.agents.new_chat.tools.registry import ( BUILTIN_TOOLS, build_tools_async, get_connector_gated_tools, ) from app.db import ChatVisibility from app.services.connector_service import ConnectorService from app.utils.perf import get_perf_logger _perf_log = get_perf_logger() # ============================================================================= # Connector Type Mapping # ============================================================================= # Maps SearchSourceConnectorType enum values to the searchable document/connector types # used by pre-search middleware and web_search. # Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to # the web_search tool; all others are considered local/indexed data. _CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = { # Live search connectors (handled by web_search tool) "TAVILY_API": "TAVILY_API", "LINKUP_API": "LINKUP_API", "BAIDU_SEARCH_API": "BAIDU_SEARCH_API", # Local/indexed connectors (handled by KB pre-search middleware) "SLACK_CONNECTOR": "SLACK_CONNECTOR", "TEAMS_CONNECTOR": "TEAMS_CONNECTOR", "NOTION_CONNECTOR": "NOTION_CONNECTOR", "GITHUB_CONNECTOR": "GITHUB_CONNECTOR", "LINEAR_CONNECTOR": "LINEAR_CONNECTOR", "DISCORD_CONNECTOR": "DISCORD_CONNECTOR", "JIRA_CONNECTOR": "JIRA_CONNECTOR", "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR", "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR", "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR", "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR", "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR", "LUMA_CONNECTOR": "LUMA_CONNECTOR", "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR", "WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR", "CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR", "DROPBOX_CONNECTOR": "DROPBOX_FILE", # Connector type differs from document type "ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type # Composio connectors (unified to native document types). # Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db. "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", "COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR", "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR", } # Document types that don't come from SearchSourceConnector but should always be searchable _ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [ "EXTENSION", # Browser extension data "FILE", # Uploaded files "NOTE", # User notes "YOUTUBE_VIDEO", # YouTube videos ] def _map_connectors_to_searchable_types( connector_types: list[Any], ) -> list[str]: """ Map SearchSourceConnectorType enums to searchable document/connector types. This function: 1. Converts connector type enums to their searchable counterparts 2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO) 3. Deduplicates while preserving order Args: connector_types: List of SearchSourceConnectorType enum values Returns: List of searchable connector/document type strings """ result_set: set[str] = set() result_list: list[str] = [] # Add always-available document types first for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES: if doc_type not in result_set: result_set.add(doc_type) result_list.append(doc_type) # Map each connector type to its searchable equivalent for ct in connector_types: # Handle both enum and string types ct_str = ct.value if hasattr(ct, "value") else str(ct) searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str) if searchable and searchable not in result_set: result_set.add(searchable) result_list.append(searchable) return result_list # ============================================================================= # Deep Agent Factory # ============================================================================= async def create_surfsense_deep_agent( llm: BaseChatModel, search_space_id: int, db_session: AsyncSession, connector_service: ConnectorService, checkpointer: Checkpointer, user_id: str | None = None, thread_id: int | None = None, agent_config: AgentConfig | None = None, enabled_tools: list[str] | None = None, disabled_tools: list[str] | None = None, additional_tools: Sequence[BaseTool] | None = None, firecrawl_api_key: str | None = None, thread_visibility: ChatVisibility | None = None, mentioned_document_ids: list[int] | None = None, anon_session_id: str | None = None, filesystem_selection: FilesystemSelection | None = None, ): """ Create a SurfSense deep agent with configurable tools and prompts. The agent comes with built-in tools that can be configured: - generate_podcast: Generate audio podcasts from content - generate_image: Generate images from text descriptions using AI models - scrape_webpage: Extract content from webpages - update_memory: Update the user's personal or team memory document The agent also includes TodoListMiddleware by default (via create_deep_agent) which provides: - write_todos: Create and update planning/todo lists for complex tasks The system prompt can be configured via agent_config: - Custom system instructions (or use defaults) - Citation toggle (enable/disable citation requirements) Args: llm: ChatLiteLLM instance for the agent's language model search_space_id: The user's search space ID db_session: Database session for tools that need DB access connector_service: Initialized connector service for knowledge base search checkpointer: LangGraph checkpointer for conversation state persistence. Use AsyncPostgresSaver for production or MemorySaver for testing. user_id: The current user's UUID string (required for memory tools) agent_config: Optional AgentConfig from NewLLMConfig for prompt configuration. If None, uses default system prompt with citations enabled. enabled_tools: Explicit list of tool names to enable. If None, all default tools are enabled. Use this to limit which tools are available. disabled_tools: List of tool names to disable. Applied after enabled_tools. Use this to exclude specific tools from the defaults. additional_tools: Extra custom tools to add beyond the built-in ones. These are always added regardless of enabled/disabled settings. firecrawl_api_key: Optional Firecrawl API key for premium web scraping. Falls back to Chromium/Trafilatura if not provided. Returns: CompiledStateGraph: The configured deep agent Examples: # Create agent with all default tools and default prompt agent = create_surfsense_deep_agent(llm, search_space_id, db_session, ...) # Create agent with custom prompt configuration agent = create_surfsense_deep_agent( llm, search_space_id, db_session, ..., agent_config=AgentConfig( provider="OPENAI", model_name="gpt-4", api_key="...", system_instructions="Custom instructions...", citations_enabled=False, ) ) # Create agent with only specific tools agent = create_surfsense_deep_agent( llm, search_space_id, db_session, ..., enabled_tools=["scrape_webpage"] ) # Create agent without podcast generation agent = create_surfsense_deep_agent( llm, search_space_id, db_session, ..., disabled_tools=["generate_podcast"] ) # Add custom tools agent = create_surfsense_deep_agent( llm, search_space_id, db_session, ..., additional_tools=[my_custom_tool] ) """ _t_agent_total = time.perf_counter() filesystem_selection = filesystem_selection or FilesystemSelection() backend_resolver = build_backend_resolver( filesystem_selection, search_space_id=search_space_id if filesystem_selection.mode == FilesystemMode.CLOUD else None, ) # Discover available connectors and document types for this search space available_connectors: list[str] | None = None available_document_types: list[str] | None = None _t0 = time.perf_counter() try: connector_types = await connector_service.get_available_connectors( search_space_id ) if connector_types: available_connectors = _map_connectors_to_searchable_types(connector_types) available_document_types = await connector_service.get_available_document_types( search_space_id ) except Exception as e: logging.warning(f"Failed to discover available connectors/document types: {e}") _perf_log.info( "[create_agent] Connector/doc-type discovery in %.3fs", time.perf_counter() - _t0, ) # Build dependencies dict for the tools registry visibility = thread_visibility or ChatVisibility.PRIVATE # Extract the model's context window so tools can size their output. _model_profile = getattr(llm, "profile", None) _max_input_tokens: int | None = ( _model_profile.get("max_input_tokens") if isinstance(_model_profile, dict) else None ) dependencies = { "search_space_id": search_space_id, "db_session": db_session, "connector_service": connector_service, "firecrawl_api_key": firecrawl_api_key, "user_id": user_id, "thread_id": thread_id, "thread_visibility": visibility, "available_connectors": available_connectors, "available_document_types": available_document_types, "max_input_tokens": _max_input_tokens, "llm": llm, } modified_disabled_tools = list(disabled_tools) if disabled_tools else [] modified_disabled_tools.extend(get_connector_gated_tools(available_connectors)) # Remove direct KB search tool; KnowledgePriorityMiddleware now runs hybrid # search per turn and surfaces hits as a hint plus # `` markers inside lazy-loaded XML. if "search_knowledge_base" not in modified_disabled_tools: modified_disabled_tools.append("search_knowledge_base") # Build tools using the async registry (includes MCP tools) _t0 = time.perf_counter() tools = await build_tools_async( dependencies=dependencies, enabled_tools=enabled_tools, disabled_tools=modified_disabled_tools, additional_tools=list(additional_tools) if additional_tools else None, ) # Register the ``invalid`` tool only when tool-call repair is on. It # is dispatched only when :class:`ToolCallNameRepairMiddleware` # rewrites a malformed call. We intentionally append it AFTER # ``build_tools_async`` so it never appears in the system-prompt # tool list (which is built from the registry, not the bound tool # list). _flags: AgentFeatureFlags = get_flags() if _flags.enable_tool_call_repair and INVALID_TOOL_NAME not in { t.name for t in tools }: tools = [*list(tools), invalid_tool] _perf_log.info( "[create_agent] build_tools_async in %.3fs (%d tools)", time.perf_counter() - _t0, len(tools), ) # Build system prompt based on agent_config, scoped to the tools actually enabled _t0 = time.perf_counter() _enabled_tool_names = {t.name for t in tools} _user_disabled_tool_names = set(disabled_tools) if disabled_tools else set() # Collect generic MCP connector info so the system prompt can route queries # to their tools instead of falling back to "not in knowledge base". _mcp_connector_tools: dict[str, list[str]] = {} for t in tools: meta = getattr(t, "metadata", None) or {} if meta.get("mcp_is_generic") and meta.get("mcp_connector_name"): _mcp_connector_tools.setdefault( meta["mcp_connector_name"], [], ).append(t.name) if _mcp_connector_tools: _perf_log.info("MCP connector tool routing: %s", _mcp_connector_tools) if agent_config is not None: system_prompt = build_configurable_system_prompt( custom_system_instructions=agent_config.system_instructions, use_default_system_instructions=agent_config.use_default_system_instructions, citations_enabled=agent_config.citations_enabled, thread_visibility=thread_visibility, enabled_tool_names=_enabled_tool_names, disabled_tool_names=_user_disabled_tool_names, mcp_connector_tools=_mcp_connector_tools, ) else: system_prompt = build_surfsense_system_prompt( thread_visibility=thread_visibility, enabled_tool_names=_enabled_tool_names, disabled_tool_names=_user_disabled_tool_names, mcp_connector_tools=_mcp_connector_tools, ) _perf_log.info( "[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0 ) # Combine system_prompt with BASE_AGENT_PROMPT (same as create_deep_agent) final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT # The middleware stack — and especially ``SubAgentMiddleware`` — is *not* # cheap to build. ``SubAgentMiddleware.__init__`` calls ``create_agent`` # synchronously to compile the general-purpose subagent's full state graph # (every tool + every middleware → pydantic schemas + langgraph compile). # On gpt-5.x agents that's roughly 1.5-2s of pure CPU work. If we run it # directly here it blocks the asyncio event loop for the whole streaming # task (and any other coroutine sharing this loop), which is why # "agent creation" wall-clock time used to stretch to ~3-4s. Move the # entire middleware build + main-graph compile into a single # ``asyncio.to_thread`` so the heavy CPU work runs off-loop and the # event loop stays responsive. _t0 = time.perf_counter() agent = await asyncio.to_thread( _build_compiled_agent_blocking, llm=llm, tools=tools, final_system_prompt=final_system_prompt, backend_resolver=backend_resolver, filesystem_mode=filesystem_selection.mode, search_space_id=search_space_id, user_id=user_id, thread_id=thread_id, visibility=visibility, anon_session_id=anon_session_id, available_connectors=available_connectors, available_document_types=available_document_types, mentioned_document_ids=mentioned_document_ids, max_input_tokens=_max_input_tokens, flags=_flags, checkpointer=checkpointer, ) _perf_log.info( "[create_agent] Middleware stack + graph compiled in %.3fs", time.perf_counter() - _t0, ) _perf_log.info( "[create_agent] Total agent creation in %.3fs", time.perf_counter() - _t_agent_total, ) return agent # Tools whose output is too costly / lossy to discard. Keep this # conservative — anything listed here is *never* pruned by # :class:`ContextEditingMiddleware`. The list is filtered against # actually-bound tool names so disabled connectors don't show up here. _PRUNE_PROTECTED_TOOL_NAMES: frozenset[str] = frozenset( { "generate_report", "generate_resume", "generate_podcast", "generate_video_presentation", "generate_image", # Read-heavy connector reads — recomputing them is expensive "read_email", "search_emails", # The fallback for malformed tool calls — keep its replies visible "invalid", } ) def _safe_exclude_tools(tools: Sequence[BaseTool]) -> tuple[str, ...]: """Return ``exclude_tools`` derived from the actually-bound tool list. Filters :data:`_PRUNE_PROTECTED_TOOL_NAMES` against the bound tools so we never list tools that don't exist (would be a silent no-op). """ enabled = {t.name for t in tools} return tuple(name for name in _PRUNE_PROTECTED_TOOL_NAMES if name in enabled) # Connector gating: any tool whose ``ToolDefinition.required_connector`` # isn't actually wired up gets a synthesized permission deny rule so # execution attempts short-circuit with ``permission_denied`` instead of # bubbling up provider-specific 401/404 errors. Mirrors OpenCode's # ``Permission.disabled`` (declarative, per-tool gating) — replaces the # legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE`` substring-heuristic. def _synthesize_connector_deny_rules( *, available_connectors: list[str] | None, enabled_tool_names: set[str], ) -> list[Rule]: """Build deny rules for tools whose required connector is not enabled. Source of truth is ``ToolDefinition.required_connector`` in :data:`BUILTIN_TOOLS`. A tool only gets a deny rule when: 1. It is currently bound (``enabled_tool_names``). 2. It declares a ``required_connector``. 3. That connector is *not* in ``available_connectors``. """ available = set(available_connectors or []) deny: list[Rule] = [] for tool_def in BUILTIN_TOOLS: if tool_def.name not in enabled_tool_names: continue rc = tool_def.required_connector if rc and rc not in available: deny.append(Rule(permission=tool_def.name, pattern="*", action="deny")) return deny def _build_compiled_agent_blocking( *, llm: BaseChatModel, tools: Sequence[BaseTool], final_system_prompt: str, backend_resolver: Any, filesystem_mode: FilesystemMode, search_space_id: int, user_id: str | None, thread_id: int | None, visibility: ChatVisibility, anon_session_id: str | None, available_connectors: list[str] | None, available_document_types: list[str] | None, mentioned_document_ids: list[int] | None, max_input_tokens: int | None, flags: AgentFeatureFlags, checkpointer: Checkpointer, ): """Build the middleware stack and compile the agent graph synchronously. Runs in a worker thread (see ``asyncio.to_thread`` call site) so the heavy CPU work — most notably ``SubAgentMiddleware.__init__`` eagerly calling ``create_agent`` to compile the general-purpose subagent — does not block the event loop. """ _memory_middleware = MemoryInjectionMiddleware( user_id=user_id, search_space_id=search_space_id, thread_visibility=visibility, ) # General-purpose subagent middleware # Subagent omits AnonymousDocumentMiddleware, KnowledgeTreeMiddleware, # KnowledgePriorityMiddleware, and KnowledgeBasePersistenceMiddleware - it # inherits state and tools from the parent, but should not (a) re-load # anon docs / re-render the tree / re-run hybrid search, or (b) commit at # its own completion (only the top-level agent's aafter_agent commits). gp_middleware = [ TodoListMiddleware(), _memory_middleware, FileIntentMiddleware(llm=llm), SurfSenseFilesystemMiddleware( backend=backend_resolver, filesystem_mode=filesystem_mode, search_space_id=search_space_id, created_by_id=user_id, thread_id=thread_id, ), create_surfsense_compaction_middleware(llm, StateBackend), PatchToolCallsMiddleware(), AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"), ] general_purpose_spec: SubAgent = { # type: ignore[typeddict-unknown-key] **GENERAL_PURPOSE_SUBAGENT, "model": llm, "tools": tools, "middleware": gp_middleware, } # Specialized user-facing subagents (explore, report_writer, # connector_negotiator). Registered through SubAgentMiddleware alongside # the general-purpose spec so the parent's `task` tool can address them # by name. Off by default until the flag flips so existing deployments # don't see new agent types in the task tool description. specialized_subagents: list[SubAgent] = [] if flags.enable_specialized_subagents and not flags.disable_new_agent_stack: try: # Specialized subagents share the parent's filesystem + # todo view so their system prompts (which promise # ``read_file``, ``ls``, ``grep``, ``glob``, ``write_todos``) # actually match runtime behavior. Build *fresh* instances # rather than aliasing the parent's GP middleware to avoid # subtle state coupling across compiled graphs. subagent_extra_middleware: list = [ TodoListMiddleware(), SurfSenseFilesystemMiddleware( backend=backend_resolver, filesystem_mode=filesystem_mode, search_space_id=search_space_id, created_by_id=user_id, thread_id=thread_id, ), ] specialized_subagents = build_specialized_subagents( tools=tools, model=llm, extra_middleware=subagent_extra_middleware, ) except Exception as exc: # pragma: no cover - defensive logging.warning( "Specialized subagent build failed; running without them: %s", exc, ) specialized_subagents = [] subagent_specs: list[SubAgent] = [general_purpose_spec, *specialized_subagents] # Main agent middleware # Order: AnonDoc -> Tree -> Priority -> FileIntent -> Filesystem -> Persistence -> ... # before_agent hooks run in declared order; later injections sit closer to # the latest human turn. Tree (large + cacheable) is injected earliest so # provider-side prefix caching has more material to hit; FileIntent (most # actionable per-turn contract) is injected closest to the user message. # # ``wrap_model_call`` ordering: the FIRST middleware in the list is the # OUTERMOST wrapper. To ensure prune executes before summarization, # place ``SpillingContextEditingMiddleware`` before # ``SurfSenseCompactionMiddleware``. Compaction is the canonical # token-budget defense; the Bedrock buffer-empty defense is folded # into ``SurfSenseCompactionMiddleware``. summarization_mw = create_surfsense_compaction_middleware(llm, StateBackend) _ = flags.enable_compaction_v2 # historical flag; retained for telemetry parity # ContextEditing prune. Trigger at 55% of ``max_input_tokens``, # earlier than summarization (~85%). When disabled, no edit runs. context_edit_mw = None if ( flags.enable_context_editing and not flags.disable_new_agent_stack and max_input_tokens ): spill_edit = SpillToBackendEdit( trigger=int(max_input_tokens * 0.55), clear_at_least=int(max_input_tokens * 0.15), keep=5, exclude_tools=_safe_exclude_tools(tools), clear_tool_inputs=True, ) clear_edit = ClearToolUsesEdit( trigger=int(max_input_tokens * 0.55), clear_at_least=int(max_input_tokens * 0.15), keep=5, exclude_tools=_safe_exclude_tools(tools), clear_tool_inputs=True, placeholder="[cleared - older tool output trimmed for context]", ) context_edit_mw = SpillingContextEditingMiddleware( edits=[spill_edit, clear_edit], backend_resolver=backend_resolver, ) # Resilience knobs: header-aware retry, model fallback, and # per-thread / per-run call-count limits. The fallback / limit # middlewares are vanilla LangChain primitives; ``RetryAfter`` is # SurfSense's header-aware variant (see its module docstring). retry_mw = ( RetryAfterMiddleware(max_retries=3) if flags.enable_retry_after and not flags.disable_new_agent_stack else None ) # Fallback chain — primary is the agent's own model; we add cheap # alternatives. Off by default; only the first call site that # configures the chain via env should enable it. fallback_mw: ModelFallbackMiddleware | None = None if flags.enable_model_fallback and not flags.disable_new_agent_stack: try: fallback_mw = ModelFallbackMiddleware( "openai:gpt-4o-mini", "anthropic:claude-3-5-haiku-20241022", ) except Exception: logging.warning("ModelFallbackMiddleware init failed; skipping.") fallback_mw = None model_call_limit_mw = ( ModelCallLimitMiddleware( thread_limit=120, run_limit=80, exit_behavior="end", ) if flags.enable_model_call_limit and not flags.disable_new_agent_stack else None ) tool_call_limit_mw = ( ToolCallLimitMiddleware( thread_limit=300, run_limit=80, exit_behavior="continue" ) if flags.enable_tool_call_limit and not flags.disable_new_agent_stack else None ) # Provider-compat ``_noop`` injection (mirrors OpenCode's # ``llm.ts`` workaround for providers that reject empty assistant # turns or alternating-role constraints). noop_mw = ( NoopInjectionMiddleware() if flags.enable_compaction_v2 and not flags.disable_new_agent_stack else None ) # Tool-call name repair (lowercase + ``invalid`` fallback). # # ``registered_tool_names`` MUST cover every tool the model can legitimately # call. That includes the bound ``tools`` list AND every tool provided by # middleware in the stack — ``FilesystemMiddleware`` (read_file, ls, grep, # glob, edit_file, write_file, execute), ``TodoListMiddleware`` # (write_todos), ``SubAgentMiddleware`` (task), ``SkillsMiddleware`` (skill # loaders), etc. If we only inspect ``tools`` here, every call to # ``read_file`` / ``ls`` / ``grep`` from the model will be rewritten to # ``invalid`` because the repair middleware doesn't recognize them. The # built-in deepagents middleware aren't in scope yet at this point of the # function but they're added unconditionally below, so we hard-code their # canonical names alongside the dynamic ``tools`` set. repair_mw = None if flags.enable_tool_call_repair and not flags.disable_new_agent_stack: registered_names: set[str] = {t.name for t in tools} # Tools owned by the standard deepagents middleware stack. registered_names |= { "write_todos", "ls", "read_file", "write_file", "edit_file", "glob", "grep", "execute", "task", } repair_mw = ToolCallNameRepairMiddleware( registered_tool_names=registered_names, # Disable fuzzy matching to avoid silent rewrites; the # lowercase + ``invalid`` fallback alone covers >95% of # observed model errors. fuzzy_match_threshold=None, ) # Doom-loop detector. Off by default until the frontend handles # ``permission == "doom_loop"`` interrupts. doom_loop_mw = ( DoomLoopMiddleware(threshold=3) if flags.enable_doom_loop and not flags.disable_new_agent_stack else None ) # PermissionMiddleware. Layers, earliest -> latest (last match wins, # same evaluation order as OpenCode's ``permission/index.ts``): # # 1. ``surfsense_defaults`` — single ``allow */*`` rule. SurfSense # already runs per-tool HITL (see ``tools/hitl.py``) for mutating # connector tools, so we only want PermissionMiddleware to *deny* # things the user has gated off; the default fallback in # ``permissions.evaluate`` is ``ask``, which would double-prompt # on every safe read-only call (``ls``, ``read_file``, ``grep``, # ``glob``, ``web_search`` …) and, on resume, replay the previous # reject decision into innocent calls. # 2. ``connector_synthesized`` — deny rules for tools whose required # connector is not connected to this space. Overrides #1. # 3. (future) user-defined rules from ``agent_permission_rules`` table # via the Agent Permissions UI. Loaded last so they override both. permission_mw: PermissionMiddleware | None = None if flags.enable_permission and not flags.disable_new_agent_stack: synthesized = _synthesize_connector_deny_rules( available_connectors=available_connectors, enabled_tool_names={t.name for t in tools}, ) permission_mw = PermissionMiddleware( rulesets=[ Ruleset( rules=[Rule(permission="*", pattern="*", action="allow")], origin="surfsense_defaults", ), Ruleset(rules=synthesized, origin="connector_synthesized"), ], ) # ActionLogMiddleware. Off by default until the ``agent_action_log`` # table is migrated. When enabled, persists one row per tool call # with optional reverse_descriptor for # ``POST /api/threads/{thread_id}/revert/{action_id}``. Sits inside # ``permission`` so denied calls aren't logged as completions. action_log_mw: ActionLogMiddleware | None = None if ( flags.enable_action_log and not flags.disable_new_agent_stack and thread_id is not None ): try: tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS} action_log_mw = ActionLogMiddleware( thread_id=thread_id, search_space_id=search_space_id, user_id=user_id, tool_definitions=tool_defs_by_name, ) except Exception: # pragma: no cover - defensive logging.warning( "ActionLogMiddleware init failed; running without it.", exc_info=True, ) action_log_mw = None # Per-thread busy mutex (refuse a second concurrent turn on the same # thread; see :class:`BusyMutexMiddleware` docstring). busy_mutex_mw: BusyMutexMiddleware | None = ( BusyMutexMiddleware() if flags.enable_busy_mutex and not flags.disable_new_agent_stack else None ) # OpenTelemetry spans (model.call + tool.call). Lives just inside # BusyMutex so it spans every retry/fallback attempt of the current # turn but never wraps a queued/blocked turn. otel_mw: OtelSpanMiddleware | None = ( OtelSpanMiddleware() if flags.enable_otel and not flags.disable_new_agent_stack else None ) # Plugin entry-point loader. Off by default; opt-in via the # ``SURFSENSE_ENABLE_PLUGIN_LOADER`` flag. The allowlist is read from # the ``SURFSENSE_ALLOWED_PLUGINS`` env var (comma-separated). A future # PR can wire it through ``global_llm_config.yaml``. plugin_middlewares: list[Any] = [] if flags.enable_plugin_loader and not flags.disable_new_agent_stack: try: allowed_names = load_allowed_plugin_names_from_env() if allowed_names: plugin_middlewares = load_plugin_middlewares( PluginContext.build( search_space_id=search_space_id, user_id=user_id, thread_visibility=visibility, llm=llm, ), allowed_plugin_names=allowed_names, ) except Exception: # pragma: no cover - defensive logging.warning( "Plugin loader failed; continuing without plugins.", exc_info=True, ) plugin_middlewares = [] # SkillsMiddleware (deepagents) loads built-in + space-authored # skills via a CompositeBackend. Sources are layered: built-in first, # space last, so a search-space-authored skill of the same name # overrides the bundled one. skills_mw: SkillsMiddleware | None = None if flags.enable_skills and not flags.disable_new_agent_stack: try: skills_factory = build_skills_backend_factory( search_space_id=search_space_id if filesystem_mode == FilesystemMode.CLOUD else None, ) skills_mw = SkillsMiddleware( backend=skills_factory, sources=default_skills_sources(), ) except Exception as exc: # pragma: no cover - defensive logging.warning("SkillsMiddleware init failed; skipping: %s", exc) skills_mw = None # LangChain's LLM-driven tool selection — only enabled for stacks # large enough to need narrowing (>30 tools). selector_mw: LLMToolSelectorMiddleware | None = None if ( flags.enable_llm_tool_selector and not flags.disable_new_agent_stack and len(tools) > 30 ): try: selector_mw = LLMToolSelectorMiddleware( model="openai:gpt-4o-mini", max_tools=12, always_include=[ name for name in ( "update_memory", "get_connected_accounts", "scrape_webpage", ) if name in {t.name for t in tools} ], ) except Exception: logging.warning("LLMToolSelectorMiddleware init failed; skipping.") selector_mw = None deepagent_middleware = [ # BusyMutex is OUTERMOST: it must wrap the entire stream so no # other turn can sneak in while this one is mid-flight. busy_mutex_mw, # OTel spans sit just inside BusyMutex so each retry attempt # gets its own model.call / tool.call span. otel_mw, TodoListMiddleware(), _memory_middleware, AnonymousDocumentMiddleware( anon_session_id=anon_session_id, ) if filesystem_mode == FilesystemMode.CLOUD else None, KnowledgeTreeMiddleware( search_space_id=search_space_id, filesystem_mode=filesystem_mode, llm=llm, ) if filesystem_mode == FilesystemMode.CLOUD else None, KnowledgePriorityMiddleware( llm=llm, search_space_id=search_space_id, filesystem_mode=filesystem_mode, available_connectors=available_connectors, available_document_types=available_document_types, mentioned_document_ids=mentioned_document_ids, ), FileIntentMiddleware(llm=llm), SurfSenseFilesystemMiddleware( backend=backend_resolver, filesystem_mode=filesystem_mode, search_space_id=search_space_id, created_by_id=user_id, thread_id=thread_id, ), KnowledgeBasePersistenceMiddleware( search_space_id=search_space_id, created_by_id=user_id, filesystem_mode=filesystem_mode, ) if filesystem_mode == FilesystemMode.CLOUD else None, # Skill loader. Placed before SubAgentMiddleware so subagents # inherit the same skill metadata (subagent specs reference the # same source paths via ``default_skills_sources()``). skills_mw, SubAgentMiddleware(backend=StateBackend, subagents=subagent_specs), # Tool selection (only when >30 tools and flag on). selector_mw, # Defensive caps, then prune, then summarize. model_call_limit_mw, tool_call_limit_mw, context_edit_mw, summarization_mw, # Provider compatibility + retry chain — placed after prune/compact # so retries happen on the already-trimmed payload. noop_mw, retry_mw, fallback_mw, # Tool-call repair must run after model emits but before # permission / dedup / doom-loop interpret the calls. repair_mw, # Permission deny/ask BEFORE the calls are forwarded to tool nodes. permission_mw, doom_loop_mw, # Action log sits inside permission so denied calls don't appear # as completions, and outside dedup so each unique tool invocation # gets its own row. action_log_mw, PatchToolCallsMiddleware(), DedupHITLToolCallsMiddleware(agent_tools=list(tools)), # Plugin slot — sits just before AnthropicCache so plugin-side # transforms see the final tool result and run before any # caching heuristics. Multiple plugins in declared order; loader # filtered by the admin allowlist already. *plugin_middlewares, AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"), ] deepagent_middleware = [m for m in deepagent_middleware if m is not None] agent = create_agent( llm, system_prompt=final_system_prompt, tools=list(tools), middleware=deepagent_middleware, context_schema=SurfSenseContextSchema, checkpointer=checkpointer, ) return agent.with_config( { "recursion_limit": 10_000, "metadata": { "ls_integration": "deepagents", "versions": {"deepagents": deepagents_version}, }, } )