SurfSense/surfsense_backend/app/agents/new_chat/chat_deepagent.py

"""
SurfSense deep agent implementation.

This module provides the factory function for creating SurfSense deep agents
with configurable tools via the tools registry and configurable prompts
via NewLLMConfig.

We use ``create_agent`` (from langchain) rather than ``create_deep_agent``
(from deepagents) so that the middleware stack is fully under our control.
This lets us swap in ``SurfSenseFilesystemMiddleware`` — a customisable
subclass of the default ``FilesystemMiddleware`` — while preserving every
other behaviour that ``create_deep_agent`` provides (todo-list, subagents,
summarisation, etc.). Prompt caching is configured at LLM-build time via
``apply_litellm_prompt_caching`` (LiteLLM-native, multi-provider) rather
than as a middleware.
"""

import asyncio
import logging
import time
from collections.abc import Sequence
from typing import Any

from deepagents import SubAgent, SubAgentMiddleware, __version__ as deepagents_version
from deepagents.backends import StateBackend
from deepagents.graph import BASE_AGENT_PROMPT
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
from deepagents.middleware.skills import SkillsMiddleware
from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT
from langchain.agents import create_agent
from langchain.agents.middleware import (
    LLMToolSelectorMiddleware,
    ModelCallLimitMiddleware,
    ModelFallbackMiddleware,
    TodoListMiddleware,
    ToolCallLimitMiddleware,
)
from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from sqlalchemy.ext.asyncio import AsyncSession

from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
from app.agents.new_chat.filesystem_backends import build_backend_resolver
from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
from app.agents.new_chat.llm_config import AgentConfig
from app.agents.new_chat.middleware import (
    ActionLogMiddleware,
    AnonymousDocumentMiddleware,
    BusyMutexMiddleware,
    ClearToolUsesEdit,
    DedupHITLToolCallsMiddleware,
    DoomLoopMiddleware,
    FileIntentMiddleware,
    KnowledgeBasePersistenceMiddleware,
    KnowledgePriorityMiddleware,
    KnowledgeTreeMiddleware,
    MemoryInjectionMiddleware,
    NoopInjectionMiddleware,
    OtelSpanMiddleware,
    PermissionMiddleware,
    RetryAfterMiddleware,
    SpillingContextEditingMiddleware,
    SpillToBackendEdit,
    SurfSenseFilesystemMiddleware,
    ToolCallNameRepairMiddleware,
    build_skills_backend_factory,
    create_surfsense_compaction_middleware,
    default_skills_sources,
)
from app.agents.new_chat.permissions import Rule, Ruleset
from app.agents.new_chat.plugin_loader import (
    PluginContext,
    load_allowed_plugin_names_from_env,
    load_plugin_middlewares,
)
from app.agents.new_chat.prompt_caching import apply_litellm_prompt_caching
from app.agents.new_chat.subagents import build_specialized_subagents
from app.agents.new_chat.system_prompt import (
    build_configurable_system_prompt,
    build_surfsense_system_prompt,
)
from app.agents.new_chat.tools.invalid_tool import (
    INVALID_TOOL_NAME,
    invalid_tool,
)
from app.agents.new_chat.tools.registry import (
    BUILTIN_TOOLS,
    build_tools_async,
    get_connector_gated_tools,
)
from app.db import ChatVisibility
from app.services.connector_service import ConnectorService
from app.utils.perf import get_perf_logger

_perf_log = get_perf_logger()


def _resolve_prompt_model_name(
    agent_config: AgentConfig | None,
    llm: BaseChatModel,
) -> str | None:
    """Resolve the model id to feed to provider-variant detection.

    Preference order (matches the established idiom in
    ``llm_router_service.py`` — see ``params.get("base_model") or
    params.get("model", "")`` usages there):

    1. ``agent_config.litellm_params["base_model"]`` — required for Azure
       deployments where ``model_name`` is the deployment slug, not the
       underlying family. Without this, a deployment named e.g.
       ``"prod-chat-001"`` would silently miss every provider regex.
    2. ``agent_config.model_name`` — the user's configured model id.
    3. ``getattr(llm, "model", None)`` — fallback for direct callers that
       don't supply an ``AgentConfig`` (currently a defensive path; all
       production callers pass ``agent_config``).

    Returns ``None`` when nothing is available; ``compose_system_prompt``
    treats that as the ``"default"`` variant (no provider block emitted).
    """
    if agent_config is not None:
        params = agent_config.litellm_params or {}
        base_model = params.get("base_model")
        if isinstance(base_model, str) and base_model.strip():
            return base_model
        if agent_config.model_name:
            return agent_config.model_name
    return getattr(llm, "model", None)


# =============================================================================
# Connector Type Mapping
# =============================================================================

# Maps SearchSourceConnectorType enum values to the searchable document/connector types
# used by pre-search middleware and web_search.
# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
# the web_search tool; all others are considered local/indexed data.
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
    # Live search connectors (handled by web_search tool)
    "TAVILY_API": "TAVILY_API",
    "LINKUP_API": "LINKUP_API",
    "BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
    # Local/indexed connectors (handled by KB pre-search middleware)
    "SLACK_CONNECTOR": "SLACK_CONNECTOR",
    "TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
    "NOTION_CONNECTOR": "NOTION_CONNECTOR",
    "GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
    "LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
    "DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
    "JIRA_CONNECTOR": "JIRA_CONNECTOR",
    "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
    "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
    "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
    "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
    "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",  # Connector type differs from document type
    "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
    "LUMA_CONNECTOR": "LUMA_CONNECTOR",
    "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
    "WEBCRAWLER_CONNECTOR": "CRAWLED_URL",  # Maps to document type
    "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
    "CIRCLEBACK_CONNECTOR": "CIRCLEBACK",  # Connector type differs from document type
    "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
    "DROPBOX_CONNECTOR": "DROPBOX_FILE",  # Connector type differs from document type
    "ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE",  # Connector type differs from document type
    # Composio connectors (unified to native document types).
    # Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
    "COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
}

# Document types that don't come from SearchSourceConnector but should always be searchable
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
    "EXTENSION",  # Browser extension data
    "FILE",  # Uploaded files
    "NOTE",  # User notes
    "YOUTUBE_VIDEO",  # YouTube videos
]


def _map_connectors_to_searchable_types(
    connector_types: list[Any],
) -> list[str]:
    """
    Map SearchSourceConnectorType enums to searchable document/connector types.

    This function:
    1. Converts connector type enums to their searchable counterparts
    2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
    3. Deduplicates while preserving order

    Args:
        connector_types: List of SearchSourceConnectorType enum values

    Returns:
        List of searchable connector/document type strings
    """
    result_set: set[str] = set()
    result_list: list[str] = []

    # Add always-available document types first
    for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
        if doc_type not in result_set:
            result_set.add(doc_type)
            result_list.append(doc_type)

    # Map each connector type to its searchable equivalent
    for ct in connector_types:
        # Handle both enum and string types
        ct_str = ct.value if hasattr(ct, "value") else str(ct)
        searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
        if searchable and searchable not in result_set:
            result_set.add(searchable)
            result_list.append(searchable)

    return result_list


# =============================================================================
# Deep Agent Factory
# =============================================================================


async def create_surfsense_deep_agent(
    llm: BaseChatModel,
    search_space_id: int,
    db_session: AsyncSession,
    connector_service: ConnectorService,
    checkpointer: Checkpointer,
    user_id: str | None = None,
    thread_id: int | None = None,
    agent_config: AgentConfig | None = None,
    enabled_tools: list[str] | None = None,
    disabled_tools: list[str] | None = None,
    additional_tools: Sequence[BaseTool] | None = None,
    firecrawl_api_key: str | None = None,
    thread_visibility: ChatVisibility | None = None,
    mentioned_document_ids: list[int] | None = None,
    anon_session_id: str | None = None,
    filesystem_selection: FilesystemSelection | None = None,
):
    """
    Create a SurfSense deep agent with configurable tools and prompts.

    The agent comes with built-in tools that can be configured:
    - generate_podcast: Generate audio podcasts from content
    - generate_image: Generate images from text descriptions using AI models
    - scrape_webpage: Extract content from webpages
    - update_memory: Update the user's personal or team memory document

    The agent also includes TodoListMiddleware by default (via create_deep_agent) which provides:
    - write_todos: Create and update planning/todo lists for complex tasks

    The system prompt can be configured via agent_config:
    - Custom system instructions (or use defaults)
    - Citation toggle (enable/disable citation requirements)

    Args:
        llm: ChatLiteLLM instance for the agent's language model
        search_space_id: The user's search space ID
        db_session: Database session for tools that need DB access
        connector_service: Initialized connector service for knowledge base search
        checkpointer: LangGraph checkpointer for conversation state persistence.
                      Use AsyncPostgresSaver for production or MemorySaver for testing.
        user_id: The current user's UUID string (required for memory tools)
        agent_config: Optional AgentConfig from NewLLMConfig for prompt configuration.
                     If None, uses default system prompt with citations enabled.
        enabled_tools: Explicit list of tool names to enable. If None, all default tools
                      are enabled. Use this to limit which tools are available.
        disabled_tools: List of tool names to disable. Applied after enabled_tools.
                       Use this to exclude specific tools from the defaults.
        additional_tools: Extra custom tools to add beyond the built-in ones.
                         These are always added regardless of enabled/disabled settings.
        firecrawl_api_key: Optional Firecrawl API key for premium web scraping.
                          Falls back to Chromium/Trafilatura if not provided.

    Returns:
        CompiledStateGraph: The configured deep agent

    Examples:
        # Create agent with all default tools and default prompt
        agent = create_surfsense_deep_agent(llm, search_space_id, db_session, ...)

        # Create agent with custom prompt configuration
        agent = create_surfsense_deep_agent(
            llm, search_space_id, db_session, ...,
            agent_config=AgentConfig(
                provider="OPENAI",
                model_name="gpt-4",
                api_key="...",
                system_instructions="Custom instructions...",
                citations_enabled=False,
            )
        )

        # Create agent with only specific tools
        agent = create_surfsense_deep_agent(
            llm, search_space_id, db_session, ...,
            enabled_tools=["scrape_webpage"]
        )

        # Create agent without podcast generation
        agent = create_surfsense_deep_agent(
            llm, search_space_id, db_session, ...,
            disabled_tools=["generate_podcast"]
        )

        # Add custom tools
        agent = create_surfsense_deep_agent(
            llm, search_space_id, db_session, ...,
            additional_tools=[my_custom_tool]
        )
    """
    _t_agent_total = time.perf_counter()

    # Layer thread-aware prompt caching onto the LLM. Idempotent with the
    # build-time call in ``llm_config.py``; this run merely adds
    # ``prompt_cache_key=f"surfsense-thread-{thread_id}"`` for OpenAI-family
    # configs now that ``thread_id`` is known. No-op when ``thread_id`` is
    # None or the provider is non-OpenAI-family.
    apply_litellm_prompt_caching(llm, agent_config=agent_config, thread_id=thread_id)

    filesystem_selection = filesystem_selection or FilesystemSelection()
    backend_resolver = build_backend_resolver(
        filesystem_selection,
        search_space_id=search_space_id
        if filesystem_selection.mode == FilesystemMode.CLOUD
        else None,
    )

    # Discover available connectors and document types for this search space
    available_connectors: list[str] | None = None
    available_document_types: list[str] | None = None

    _t0 = time.perf_counter()
    try:
        connector_types = await connector_service.get_available_connectors(
            search_space_id
        )
        if connector_types:
            available_connectors = _map_connectors_to_searchable_types(connector_types)

        available_document_types = await connector_service.get_available_document_types(
            search_space_id
        )

    except Exception as e:
        logging.warning(f"Failed to discover available connectors/document types: {e}")
    _perf_log.info(
        "[create_agent] Connector/doc-type discovery in %.3fs",
        time.perf_counter() - _t0,
    )

    # Build dependencies dict for the tools registry
    visibility = thread_visibility or ChatVisibility.PRIVATE

    # Extract the model's context window so tools can size their output.
    _model_profile = getattr(llm, "profile", None)
    _max_input_tokens: int | None = (
        _model_profile.get("max_input_tokens")
        if isinstance(_model_profile, dict)
        else None
    )

    dependencies = {
        "search_space_id": search_space_id,
        "db_session": db_session,
        "connector_service": connector_service,
        "firecrawl_api_key": firecrawl_api_key,
        "user_id": user_id,
        "thread_id": thread_id,
        "thread_visibility": visibility,
        "available_connectors": available_connectors,
        "available_document_types": available_document_types,
        "max_input_tokens": _max_input_tokens,
        "llm": llm,
    }

    modified_disabled_tools = list(disabled_tools) if disabled_tools else []
    modified_disabled_tools.extend(get_connector_gated_tools(available_connectors))

    # Remove direct KB search tool; KnowledgePriorityMiddleware now runs hybrid
    # search per turn and surfaces hits as a <priority_documents> hint plus
    # `<chunk_index matched="true">` markers inside lazy-loaded XML.
    if "search_knowledge_base" not in modified_disabled_tools:
        modified_disabled_tools.append("search_knowledge_base")

    # Build tools using the async registry (includes MCP tools)
    _t0 = time.perf_counter()
    tools = await build_tools_async(
        dependencies=dependencies,
        enabled_tools=enabled_tools,
        disabled_tools=modified_disabled_tools,
        additional_tools=list(additional_tools) if additional_tools else None,
    )

    # Register the ``invalid`` tool only when tool-call repair is on. It
    # is dispatched only when :class:`ToolCallNameRepairMiddleware`
    # rewrites a malformed call. We intentionally append it AFTER
    # ``build_tools_async`` so it never appears in the system-prompt
    # tool list (which is built from the registry, not the bound tool
    # list).
    _flags: AgentFeatureFlags = get_flags()
    if _flags.enable_tool_call_repair and INVALID_TOOL_NAME not in {
        t.name for t in tools
    }:
        tools = [*list(tools), invalid_tool]
    _perf_log.info(
        "[create_agent] build_tools_async in %.3fs (%d tools)",
        time.perf_counter() - _t0,
        len(tools),
    )

    # Build system prompt based on agent_config, scoped to the tools actually enabled
    _t0 = time.perf_counter()
    _enabled_tool_names = {t.name for t in tools}
    _user_disabled_tool_names = set(disabled_tools) if disabled_tools else set()

    # Collect generic MCP connector info so the system prompt can route queries
    # to their tools instead of falling back to "not in knowledge base".
    _mcp_connector_tools: dict[str, list[str]] = {}
    for t in tools:
        meta = getattr(t, "metadata", None) or {}
        if meta.get("mcp_is_generic") and meta.get("mcp_connector_name"):
            _mcp_connector_tools.setdefault(
                meta["mcp_connector_name"],
                [],
            ).append(t.name)

    if _mcp_connector_tools:
        _perf_log.info("MCP connector tool routing: %s", _mcp_connector_tools)

    if agent_config is not None:
        system_prompt = build_configurable_system_prompt(
            custom_system_instructions=agent_config.system_instructions,
            use_default_system_instructions=agent_config.use_default_system_instructions,
            citations_enabled=agent_config.citations_enabled,
            thread_visibility=thread_visibility,
            enabled_tool_names=_enabled_tool_names,
            disabled_tool_names=_user_disabled_tool_names,
            mcp_connector_tools=_mcp_connector_tools,
            model_name=_resolve_prompt_model_name(agent_config, llm),
        )
    else:
        system_prompt = build_surfsense_system_prompt(
            thread_visibility=thread_visibility,
            enabled_tool_names=_enabled_tool_names,
            disabled_tool_names=_user_disabled_tool_names,
            mcp_connector_tools=_mcp_connector_tools,
            model_name=_resolve_prompt_model_name(agent_config, llm),
        )
    _perf_log.info(
        "[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
    )

    # Combine system_prompt with BASE_AGENT_PROMPT (same as create_deep_agent)
    final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT

    # The middleware stack — and especially ``SubAgentMiddleware`` — is *not*
    # cheap to build. ``SubAgentMiddleware.__init__`` calls ``create_agent``
    # synchronously to compile the general-purpose subagent's full state graph
    # (every tool + every middleware → pydantic schemas + langgraph compile).
    # On gpt-5.x agents that's roughly 1.5-2s of pure CPU work. If we run it
    # directly here it blocks the asyncio event loop for the whole streaming
    # task (and any other coroutine sharing this loop), which is why
    # "agent creation" wall-clock time used to stretch to ~3-4s. Move the
    # entire middleware build + main-graph compile into a single
    # ``asyncio.to_thread`` so the heavy CPU work runs off-loop and the
    # event loop stays responsive.
    _t0 = time.perf_counter()
    agent = await asyncio.to_thread(
        _build_compiled_agent_blocking,
        llm=llm,
        tools=tools,
        final_system_prompt=final_system_prompt,
        backend_resolver=backend_resolver,
        filesystem_mode=filesystem_selection.mode,
        search_space_id=search_space_id,
        user_id=user_id,
        thread_id=thread_id,
        visibility=visibility,
        anon_session_id=anon_session_id,
        available_connectors=available_connectors,
        available_document_types=available_document_types,
        mentioned_document_ids=mentioned_document_ids,
        max_input_tokens=_max_input_tokens,
        flags=_flags,
        checkpointer=checkpointer,
    )
    _perf_log.info(
        "[create_agent] Middleware stack + graph compiled in %.3fs",
        time.perf_counter() - _t0,
    )

    _perf_log.info(
        "[create_agent] Total agent creation in %.3fs",
        time.perf_counter() - _t_agent_total,
    )
    return agent


# Tools whose output is too costly / lossy to discard. Keep this
# conservative — anything listed here is *never* pruned by
# :class:`ContextEditingMiddleware`. The list is filtered against
# actually-bound tool names so disabled connectors don't show up here.
_PRUNE_PROTECTED_TOOL_NAMES: frozenset[str] = frozenset(
    {
        "generate_report",
        "generate_resume",
        "generate_podcast",
        "generate_video_presentation",
        "generate_image",
        # Read-heavy connector reads — recomputing them is expensive
        "read_email",
        "search_emails",
        # The fallback for malformed tool calls — keep its replies visible
        "invalid",
    }
)


def _safe_exclude_tools(tools: Sequence[BaseTool]) -> tuple[str, ...]:
    """Return ``exclude_tools`` derived from the actually-bound tool list.

    Filters :data:`_PRUNE_PROTECTED_TOOL_NAMES` against the bound tools
    so we never list tools that don't exist (would be a silent no-op).
    """
    enabled = {t.name for t in tools}
    return tuple(name for name in _PRUNE_PROTECTED_TOOL_NAMES if name in enabled)


# Connector gating: any tool whose ``ToolDefinition.required_connector``
# isn't actually wired up gets a synthesized permission deny rule so
# execution attempts short-circuit with ``permission_denied`` instead of
# bubbling up provider-specific 401/404 errors. Mirrors OpenCode's
# ``Permission.disabled`` (declarative, per-tool gating) — replaces the
# legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE`` substring-heuristic.
def _synthesize_connector_deny_rules(
    *,
    available_connectors: list[str] | None,
    enabled_tool_names: set[str],
) -> list[Rule]:
    """Build deny rules for tools whose required connector is not enabled.

    Source of truth is ``ToolDefinition.required_connector`` in
    :data:`BUILTIN_TOOLS`. A tool only gets a deny rule when:

    1. It is currently bound (``enabled_tool_names``).
    2. It declares a ``required_connector``.
    3. That connector is *not* in ``available_connectors``.
    """
    available = set(available_connectors or [])
    deny: list[Rule] = []
    for tool_def in BUILTIN_TOOLS:
        if tool_def.name not in enabled_tool_names:
            continue
        rc = tool_def.required_connector
        if rc and rc not in available:
            deny.append(Rule(permission=tool_def.name, pattern="*", action="deny"))
    return deny


def _build_compiled_agent_blocking(
    *,
    llm: BaseChatModel,
    tools: Sequence[BaseTool],
    final_system_prompt: str,
    backend_resolver: Any,
    filesystem_mode: FilesystemMode,
    search_space_id: int,
    user_id: str | None,
    thread_id: int | None,
    visibility: ChatVisibility,
    anon_session_id: str | None,
    available_connectors: list[str] | None,
    available_document_types: list[str] | None,
    mentioned_document_ids: list[int] | None,
    max_input_tokens: int | None,
    flags: AgentFeatureFlags,
    checkpointer: Checkpointer,
):
    """Build the middleware stack and compile the agent graph synchronously.

    Runs in a worker thread (see ``asyncio.to_thread`` call site) so the heavy
    CPU work — most notably ``SubAgentMiddleware.__init__`` eagerly calling
    ``create_agent`` to compile the general-purpose subagent — does not block
    the event loop.
    """
    _memory_middleware = MemoryInjectionMiddleware(
        user_id=user_id,
        search_space_id=search_space_id,
        thread_visibility=visibility,
    )

    # General-purpose subagent middleware
    # Subagent omits AnonymousDocumentMiddleware, KnowledgeTreeMiddleware,
    # KnowledgePriorityMiddleware, and KnowledgeBasePersistenceMiddleware - it
    # inherits state and tools from the parent, but should not (a) re-load
    # anon docs / re-render the tree / re-run hybrid search, or (b) commit at
    # its own completion (only the top-level agent's aafter_agent commits).
    gp_middleware = [
        TodoListMiddleware(),
        _memory_middleware,
        FileIntentMiddleware(llm=llm),
        SurfSenseFilesystemMiddleware(
            backend=backend_resolver,
            filesystem_mode=filesystem_mode,
            search_space_id=search_space_id,
            created_by_id=user_id,
            thread_id=thread_id,
        ),
        create_surfsense_compaction_middleware(llm, StateBackend),
        PatchToolCallsMiddleware(),
    ]

    general_purpose_spec: SubAgent = {  # type: ignore[typeddict-unknown-key]
        **GENERAL_PURPOSE_SUBAGENT,
        "model": llm,
        "tools": tools,
        "middleware": gp_middleware,
    }

    # Specialized user-facing subagents (explore, report_writer,
    # connector_negotiator). Registered through SubAgentMiddleware alongside
    # the general-purpose spec so the parent's `task` tool can address them
    # by name. Off by default until the flag flips so existing deployments
    # don't see new agent types in the task tool description.
    specialized_subagents: list[SubAgent] = []
    if flags.enable_specialized_subagents and not flags.disable_new_agent_stack:
        try:
            # Specialized subagents share the parent's filesystem +
            # todo view so their system prompts (which promise
            # ``read_file``, ``ls``, ``grep``, ``glob``, ``write_todos``)
            # actually match runtime behavior. Build *fresh* instances
            # rather than aliasing the parent's GP middleware to avoid
            # subtle state coupling across compiled graphs.
            subagent_extra_middleware: list = [
                TodoListMiddleware(),
                SurfSenseFilesystemMiddleware(
                    backend=backend_resolver,
                    filesystem_mode=filesystem_mode,
                    search_space_id=search_space_id,
                    created_by_id=user_id,
                    thread_id=thread_id,
                ),
            ]
            specialized_subagents = build_specialized_subagents(
                tools=tools,
                model=llm,
                extra_middleware=subagent_extra_middleware,
            )
        except Exception as exc:  # pragma: no cover - defensive
            logging.warning(
                "Specialized subagent build failed; running without them: %s",
                exc,
            )
            specialized_subagents = []

    subagent_specs: list[SubAgent] = [general_purpose_spec, *specialized_subagents]

    # Main agent middleware
    # Order: AnonDoc -> Tree -> Priority -> FileIntent -> Filesystem -> Persistence -> ...
    # before_agent hooks run in declared order; later injections sit closer to
    # the latest human turn. Tree (large + cacheable) is injected earliest so
    # provider-side prefix caching has more material to hit; FileIntent (most
    # actionable per-turn contract) is injected closest to the user message.
    #
    # ``wrap_model_call`` ordering: the FIRST middleware in the list is the
    # OUTERMOST wrapper. To ensure prune executes before summarization,
    # place ``SpillingContextEditingMiddleware`` before
    # ``SurfSenseCompactionMiddleware``. Compaction is the canonical
    # token-budget defense; the Bedrock buffer-empty defense is folded
    # into ``SurfSenseCompactionMiddleware``.
    summarization_mw = create_surfsense_compaction_middleware(llm, StateBackend)
    _ = flags.enable_compaction_v2  # historical flag; retained for telemetry parity

    # ContextEditing prune. Trigger at 55% of ``max_input_tokens``,
    # earlier than summarization (~85%). When disabled, no edit runs.
    context_edit_mw = None
    if (
        flags.enable_context_editing
        and not flags.disable_new_agent_stack
        and max_input_tokens
    ):
        spill_edit = SpillToBackendEdit(
            trigger=int(max_input_tokens * 0.55),
            clear_at_least=int(max_input_tokens * 0.15),
            keep=5,
            exclude_tools=_safe_exclude_tools(tools),
            clear_tool_inputs=True,
        )
        clear_edit = ClearToolUsesEdit(
            trigger=int(max_input_tokens * 0.55),
            clear_at_least=int(max_input_tokens * 0.15),
            keep=5,
            exclude_tools=_safe_exclude_tools(tools),
            clear_tool_inputs=True,
            placeholder="[cleared - older tool output trimmed for context]",
        )
        context_edit_mw = SpillingContextEditingMiddleware(
            edits=[spill_edit, clear_edit],
            backend_resolver=backend_resolver,
        )

    # Resilience knobs: header-aware retry, model fallback, and
    # per-thread / per-run call-count limits. The fallback / limit
    # middlewares are vanilla LangChain primitives; ``RetryAfter`` is
    # SurfSense's header-aware variant (see its module docstring).
    retry_mw = (
        RetryAfterMiddleware(max_retries=3)
        if flags.enable_retry_after and not flags.disable_new_agent_stack
        else None
    )
    # Fallback chain — primary is the agent's own model; we add cheap
    # alternatives. Off by default; only the first call site that
    # configures the chain via env should enable it.
    fallback_mw: ModelFallbackMiddleware | None = None
    if flags.enable_model_fallback and not flags.disable_new_agent_stack:
        try:
            fallback_mw = ModelFallbackMiddleware(
                "openai:gpt-4o-mini",
                "anthropic:claude-3-5-haiku-20241022",
            )
        except Exception:
            logging.warning("ModelFallbackMiddleware init failed; skipping.")
            fallback_mw = None
    model_call_limit_mw = (
        ModelCallLimitMiddleware(
            thread_limit=120,
            run_limit=80,
            exit_behavior="end",
        )
        if flags.enable_model_call_limit and not flags.disable_new_agent_stack
        else None
    )
    tool_call_limit_mw = (
        ToolCallLimitMiddleware(
            thread_limit=300, run_limit=80, exit_behavior="continue"
        )
        if flags.enable_tool_call_limit and not flags.disable_new_agent_stack
        else None
    )

    # Provider-compat ``_noop`` injection (mirrors OpenCode's
    # ``llm.ts`` workaround for providers that reject empty assistant
    # turns or alternating-role constraints).
    noop_mw = (
        NoopInjectionMiddleware()
        if flags.enable_compaction_v2 and not flags.disable_new_agent_stack
        else None
    )

    # Tool-call name repair (lowercase + ``invalid`` fallback).
    #
    # ``registered_tool_names`` MUST cover every tool the model can legitimately
    # call. That includes the bound ``tools`` list AND every tool provided by
    # middleware in the stack — ``FilesystemMiddleware`` (read_file, ls, grep,
    # glob, edit_file, write_file, execute), ``TodoListMiddleware``
    # (write_todos), ``SubAgentMiddleware`` (task), ``SkillsMiddleware`` (skill
    # loaders), etc. If we only inspect ``tools`` here, every call to
    # ``read_file`` / ``ls`` / ``grep`` from the model will be rewritten to
    # ``invalid`` because the repair middleware doesn't recognize them. The
    # built-in deepagents middleware aren't in scope yet at this point of the
    # function but they're added unconditionally below, so we hard-code their
    # canonical names alongside the dynamic ``tools`` set.
    repair_mw = None
    if flags.enable_tool_call_repair and not flags.disable_new_agent_stack:
        registered_names: set[str] = {t.name for t in tools}
        # Tools owned by the standard deepagents middleware stack and the
        # SurfSense filesystem extension.
        registered_names |= {
            "write_todos",
            "ls",
            "read_file",
            "write_file",
            "edit_file",
            "glob",
            "grep",
            "execute",
            "task",
            "mkdir",
            "cd",
            "pwd",
            "move_file",
            "rm",
            "rmdir",
            "list_tree",
            "execute_code",
        }
        repair_mw = ToolCallNameRepairMiddleware(
            registered_tool_names=registered_names,
            # Disable fuzzy matching to avoid silent rewrites; the
            # lowercase + ``invalid`` fallback alone covers >95% of
            # observed model errors.
            fuzzy_match_threshold=None,
        )

    # Doom-loop detector. Off by default until the frontend handles
    # ``permission == "doom_loop"`` interrupts.
    doom_loop_mw = (
        DoomLoopMiddleware(threshold=3)
        if flags.enable_doom_loop and not flags.disable_new_agent_stack
        else None
    )

    # PermissionMiddleware. Layers, earliest -> latest (last match wins,
    # same evaluation order as OpenCode's ``permission/index.ts``):
    #
    # 1. ``surfsense_defaults`` — single ``allow */*`` rule. SurfSense
    #    already runs per-tool HITL (see ``tools/hitl.py``) for mutating
    #    connector tools, so we only want PermissionMiddleware to *deny*
    #    things the user has gated off; the default fallback in
    #    ``permissions.evaluate`` is ``ask``, which would double-prompt
    #    on every safe read-only call (``ls``, ``read_file``, ``grep``,
    #    ``glob``, ``web_search`` …) and, on resume, replay the previous
    #    reject decision into innocent calls.
    # 2. ``desktop_safety`` — ``ask`` for destructive filesystem ops when
    #    the agent is operating against the user's real disk. Cloud mode
    #    has full revision-based revert via ``revert_service``, but
    #    desktop mode hits disk immediately with no undo, so an
    #    accidental ``rm`` / ``rmdir`` / ``move_file`` / ``edit_file`` /
    #    ``write_file`` is unrecoverable. This layer is forced on in
    #    desktop mode regardless of ``enable_permission`` because the
    #    safety net is non-negotiable.
    # 3. ``connector_synthesized`` — deny rules for tools whose required
    #    connector is not connected to this space. Overrides #1/#2.
    # 4. (future) user-defined rules from ``agent_permission_rules`` table
    #    via the Agent Permissions UI. Loaded last so they override all.
    permission_mw: PermissionMiddleware | None = None
    is_desktop_fs = filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER
    permission_enabled = flags.enable_permission and not flags.disable_new_agent_stack
    # Build the middleware whenever it has work to do: either the user
    # opted into the rule engine, OR we're in desktop mode and need the
    # safety rules unconditionally.
    if permission_enabled or is_desktop_fs:
        rulesets: list[Ruleset] = [
            Ruleset(
                rules=[Rule(permission="*", pattern="*", action="allow")],
                origin="surfsense_defaults",
            ),
        ]
        if is_desktop_fs:
            rulesets.append(
                Ruleset(
                    rules=[
                        Rule(permission="rm", pattern="*", action="ask"),
                        Rule(permission="rmdir", pattern="*", action="ask"),
                        Rule(permission="move_file", pattern="*", action="ask"),
                        Rule(permission="edit_file", pattern="*", action="ask"),
                        Rule(permission="write_file", pattern="*", action="ask"),
                    ],
                    origin="desktop_safety",
                )
            )
        if permission_enabled:
            synthesized = _synthesize_connector_deny_rules(
                available_connectors=available_connectors,
                enabled_tool_names={t.name for t in tools},
            )
            rulesets.append(Ruleset(rules=synthesized, origin="connector_synthesized"))
        permission_mw = PermissionMiddleware(rulesets=rulesets)

    # ActionLogMiddleware. Off by default until the ``agent_action_log``
    # table is migrated. When enabled, persists one row per tool call
    # with optional reverse_descriptor for
    # ``POST /api/threads/{thread_id}/revert/{action_id}``. Sits inside
    # ``permission`` so denied calls aren't logged as completions.
    action_log_mw: ActionLogMiddleware | None = None
    if (
        flags.enable_action_log
        and not flags.disable_new_agent_stack
        and thread_id is not None
    ):
        try:
            tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS}
            action_log_mw = ActionLogMiddleware(
                thread_id=thread_id,
                search_space_id=search_space_id,
                user_id=user_id,
                tool_definitions=tool_defs_by_name,
            )
        except Exception:  # pragma: no cover - defensive
            logging.warning(
                "ActionLogMiddleware init failed; running without it.",
                exc_info=True,
            )
            action_log_mw = None

    # Per-thread busy mutex (refuse a second concurrent turn on the same
    # thread; see :class:`BusyMutexMiddleware` docstring).
    busy_mutex_mw: BusyMutexMiddleware | None = (
        BusyMutexMiddleware()
        if flags.enable_busy_mutex and not flags.disable_new_agent_stack
        else None
    )

    # OpenTelemetry spans (model.call + tool.call). Lives just inside
    # BusyMutex so it spans every retry/fallback attempt of the current
    # turn but never wraps a queued/blocked turn.
    otel_mw: OtelSpanMiddleware | None = (
        OtelSpanMiddleware()
        if flags.enable_otel and not flags.disable_new_agent_stack
        else None
    )

    # Plugin entry-point loader. Off by default; opt-in via the
    # ``SURFSENSE_ENABLE_PLUGIN_LOADER`` flag. The allowlist is read from
    # the ``SURFSENSE_ALLOWED_PLUGINS`` env var (comma-separated). A future
    # PR can wire it through ``global_llm_config.yaml``.
    plugin_middlewares: list[Any] = []
    if flags.enable_plugin_loader and not flags.disable_new_agent_stack:
        try:
            allowed_names = load_allowed_plugin_names_from_env()
            if allowed_names:
                plugin_middlewares = load_plugin_middlewares(
                    PluginContext.build(
                        search_space_id=search_space_id,
                        user_id=user_id,
                        thread_visibility=visibility,
                        llm=llm,
                    ),
                    allowed_plugin_names=allowed_names,
                )
        except Exception:  # pragma: no cover - defensive
            logging.warning(
                "Plugin loader failed; continuing without plugins.",
                exc_info=True,
            )
            plugin_middlewares = []

    # SkillsMiddleware (deepagents) loads built-in + space-authored
    # skills via a CompositeBackend. Sources are layered: built-in first,
    # space last, so a search-space-authored skill of the same name
    # overrides the bundled one.
    skills_mw: SkillsMiddleware | None = None
    if flags.enable_skills and not flags.disable_new_agent_stack:
        try:
            skills_factory = build_skills_backend_factory(
                search_space_id=search_space_id
                if filesystem_mode == FilesystemMode.CLOUD
                else None,
            )
            skills_mw = SkillsMiddleware(
                backend=skills_factory,
                sources=default_skills_sources(),
            )
        except Exception as exc:  # pragma: no cover - defensive
            logging.warning("SkillsMiddleware init failed; skipping: %s", exc)
            skills_mw = None

    # LangChain's LLM-driven tool selection — only enabled for stacks
    # large enough to need narrowing (>30 tools).
    selector_mw: LLMToolSelectorMiddleware | None = None
    if (
        flags.enable_llm_tool_selector
        and not flags.disable_new_agent_stack
        and len(tools) > 30
    ):
        try:
            selector_mw = LLMToolSelectorMiddleware(
                model="openai:gpt-4o-mini",
                max_tools=12,
                always_include=[
                    name
                    for name in (
                        "update_memory",
                        "get_connected_accounts",
                        "scrape_webpage",
                    )
                    if name in {t.name for t in tools}
                ],
            )
        except Exception:
            logging.warning("LLMToolSelectorMiddleware init failed; skipping.")
            selector_mw = None

    deepagent_middleware = [
        # BusyMutex is OUTERMOST: it must wrap the entire stream so no
        # other turn can sneak in while this one is mid-flight.
        busy_mutex_mw,
        # OTel spans sit just inside BusyMutex so each retry attempt
        # gets its own model.call / tool.call span.
        otel_mw,
        TodoListMiddleware(),
        _memory_middleware,
        AnonymousDocumentMiddleware(
            anon_session_id=anon_session_id,
        )
        if filesystem_mode == FilesystemMode.CLOUD
        else None,
        KnowledgeTreeMiddleware(
            search_space_id=search_space_id,
            filesystem_mode=filesystem_mode,
            llm=llm,
        )
        if filesystem_mode == FilesystemMode.CLOUD
        else None,
        KnowledgePriorityMiddleware(
            llm=llm,
            search_space_id=search_space_id,
            filesystem_mode=filesystem_mode,
            available_connectors=available_connectors,
            available_document_types=available_document_types,
            mentioned_document_ids=mentioned_document_ids,
        ),
        FileIntentMiddleware(llm=llm),
        SurfSenseFilesystemMiddleware(
            backend=backend_resolver,
            filesystem_mode=filesystem_mode,
            search_space_id=search_space_id,
            created_by_id=user_id,
            thread_id=thread_id,
        ),
        KnowledgeBasePersistenceMiddleware(
            search_space_id=search_space_id,
            created_by_id=user_id,
            filesystem_mode=filesystem_mode,
            thread_id=thread_id,
        )
        if filesystem_mode == FilesystemMode.CLOUD
        else None,
        # Skill loader. Placed before SubAgentMiddleware so subagents
        # inherit the same skill metadata (subagent specs reference the
        # same source paths via ``default_skills_sources()``).
        skills_mw,
        SubAgentMiddleware(backend=StateBackend, subagents=subagent_specs),
        # Tool selection (only when >30 tools and flag on).
        selector_mw,
        # Defensive caps, then prune, then summarize.
        model_call_limit_mw,
        tool_call_limit_mw,
        context_edit_mw,
        summarization_mw,
        # Provider compatibility + retry chain — placed after prune/compact
        # so retries happen on the already-trimmed payload.
        noop_mw,
        retry_mw,
        fallback_mw,
        # Tool-call repair must run after model emits but before
        # permission / dedup / doom-loop interpret the calls.
        repair_mw,
        # Permission deny/ask BEFORE the calls are forwarded to tool nodes.
        permission_mw,
        doom_loop_mw,
        # Action log sits inside permission so denied calls don't appear
        # as completions, and outside dedup so each unique tool invocation
        # gets its own row.
        action_log_mw,
        PatchToolCallsMiddleware(),
        DedupHITLToolCallsMiddleware(agent_tools=list(tools)),
        # Plugin slot — sits at the tail so plugin-side transforms see the
        # final tool result. Prompt caching is now applied at LLM build time
        # via ``apply_litellm_prompt_caching`` (see prompt_caching.py), so no
        # caching middleware is needed here. Multiple plugins run in declared
        # order; loader filtered by the admin allowlist already.
        *plugin_middlewares,
    ]
    deepagent_middleware = [m for m in deepagent_middleware if m is not None]

    agent = create_agent(
        llm,
        system_prompt=final_system_prompt,
        tools=list(tools),
        middleware=deepagent_middleware,
        context_schema=SurfSenseContextSchema,
        checkpointer=checkpointer,
    )
    return agent.with_config(
        {
            "recursion_limit": 10_000,
            "metadata": {
                "ls_integration": "deepagents",
                "versions": {"deepagents": deepagents_version},
            },
        }
    )