Merge pull request #1352 from MODSetter/dev

feat: Add multi-agent orchestration
2026-05-06 06:12:40 +02:00 · 2026-05-05 19:28:28 -07:00 · 2026-05-05 19:28:28 -07:00 · 83ee58016e
commit 83ee58016e
parent 9576d1f01f 499c6be099
345 changed files with 21459 additions and 363 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-0.0.22
+0.0.23
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@ -282,6 +282,9 @@ LANGSMITH_PROJECT=surfsense
 # =============================================================================
 # OPTIONAL: New-chat agent feature flags
 # =============================================================================
+# Multi-agent orchestrator switch for authenticated chat streaming.
+# MULTI_AGENT_CHAT_ENABLED=false
+
 # Master kill-switch — when true, every flag below is forced OFF.
 # SURFSENSE_DISABLE_NEW_AGENT_STACK=false

--- a/surfsense_backend/app/agents/multi_agent_chat/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/init.py
@ -0,0 +1,7 @@
+"""Deepagents-backed routes: ``subagents/``; main-agent graph under ``main_agent/`` (SRP subpackages)."""
+
+from __future__ import annotations
+
+from .main_agent import create_multi_agent_chat_deep_agent
+
+__all__ = ["create_multi_agent_chat_deep_agent"]
--- a/surfsense_backend/app/agents/multi_agent_chat/constants.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/constants.py
@ -0,0 +1,43 @@
+"""Connector-type to subagent name; subagent name to availability tokens for build_subagents."""
+
+from __future__ import annotations
+
+CONNECTOR_TYPE_TO_CONNECTOR_AGENT_MAPS: dict[str, str] = {
+    "GOOGLE_GMAIL_CONNECTOR": "gmail",
+    "COMPOSIO_GMAIL_CONNECTOR": "gmail",
+    "GOOGLE_CALENDAR_CONNECTOR": "calendar",
+    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "calendar",
+    "DISCORD_CONNECTOR": "discord",
+    "TEAMS_CONNECTOR": "teams",
+    "LUMA_CONNECTOR": "luma",
+    "LINEAR_CONNECTOR": "linear",
+    "JIRA_CONNECTOR": "jira",
+    "CLICKUP_CONNECTOR": "clickup",
+    "SLACK_CONNECTOR": "slack",
+    "AIRTABLE_CONNECTOR": "airtable",
+    "NOTION_CONNECTOR": "notion",
+    "CONFLUENCE_CONNECTOR": "confluence",
+    "GOOGLE_DRIVE_CONNECTOR": "google_drive",
+    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "google_drive",
+    "DROPBOX_CONNECTOR": "dropbox",
+    "ONEDRIVE_CONNECTOR": "onedrive",
+}
+
+SUBAGENT_TO_REQUIRED_CONNECTOR_MAP: dict[str, frozenset[str]] = {
+    "deliverables": frozenset(),
+    "airtable": frozenset({"AIRTABLE_CONNECTOR"}),
+    "calendar": frozenset({"GOOGLE_CALENDAR_CONNECTOR"}),
+    "clickup": frozenset({"CLICKUP_CONNECTOR"}),
+    "confluence": frozenset({"CONFLUENCE_CONNECTOR"}),
+    "discord": frozenset({"DISCORD_CONNECTOR"}),
+    "dropbox": frozenset({"DROPBOX_FILE"}),
+    "gmail": frozenset({"GOOGLE_GMAIL_CONNECTOR"}),
+    "google_drive": frozenset({"GOOGLE_DRIVE_FILE"}),
+    "jira": frozenset({"JIRA_CONNECTOR"}),
+    "linear": frozenset({"LINEAR_CONNECTOR"}),
+    "luma": frozenset({"LUMA_CONNECTOR"}),
+    "notion": frozenset({"NOTION_CONNECTOR"}),
+    "onedrive": frozenset({"ONEDRIVE_FILE"}),
+    "slack": frozenset({"SLACK_CONNECTOR"}),
+    "teams": frozenset({"TEAMS_CONNECTOR"}),
+}
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/init.py
@ -0,0 +1,7 @@
+"""Main-agent deep agent: ``runtime/`` (factory), ``graph/`` (compile), ``system_prompt/``, etc."""
+
+from __future__ import annotations
+
+from .runtime import create_multi_agent_chat_deep_agent
+
+__all__ = ["create_multi_agent_chat_deep_agent"]
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/context_prune/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/context_prune/init.py
@ -0,0 +1,7 @@
+"""Tool-name pruning for context editing (exclude lists without dropping protected tools)."""
+
+from __future__ import annotations
+
+from .prune_tool_names import PRUNE_PROTECTED_TOOL_NAMES, safe_exclude_tools
+
+__all__ = ["PRUNE_PROTECTED_TOOL_NAMES", "safe_exclude_tools"]
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/context_prune/prune_tool_names.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/context_prune/prune_tool_names.py
@ -0,0 +1,26 @@
+"""Tool names excluded from context-editing prune when bound."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from langchain_core.tools import BaseTool
+
+PRUNE_PROTECTED_TOOL_NAMES: frozenset[str] = frozenset(
+    {
+        "generate_report",
+        "generate_resume",
+        "generate_podcast",
+        "generate_video_presentation",
+        "generate_image",
+        "read_email",
+        "search_emails",
+        "invalid",
+    },
+)
+
+
+def safe_exclude_tools(tools: Sequence[BaseTool]) -> tuple[str, ...]:
+    """Names from ``PRUNE_PROTECTED_TOOL_NAMES`` that appear in ``tools``."""
+    enabled = {t.name for t in tools}
+    return tuple(n for n in PRUNE_PROTECTED_TOOL_NAMES if n in enabled)
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/init.py
@ -0,0 +1,7 @@
+"""Sync compile of the main-agent LangGraph graph (middleware + ``create_agent``)."""
+
+from __future__ import annotations
+
+from .compile_graph_sync import build_compiled_agent_graph_sync
+
+__all__ = ["build_compiled_agent_graph_sync"]
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/compile_graph_sync.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/graph/compile_graph_sync.py
@ -0,0 +1,86 @@
+"""Synchronous graph compile (middleware + ``create_agent``)."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from deepagents import __version__ as deepagents_version
+from langchain.agents import create_agent
+from langchain_core.language_models import BaseChatModel
+from langchain_core.tools import BaseTool
+from langgraph.types import Checkpointer
+
+from app.agents.multi_agent_chat.middleware import (
+    build_main_agent_deepagent_middleware,
+)
+from app.agents.multi_agent_chat.subagents.shared.permissions import (
+    ToolsPermissions,
+)
+from app.agents.new_chat.context import SurfSenseContextSchema
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.db import ChatVisibility
+
+
+def build_compiled_agent_graph_sync(
+    *,
+    llm: BaseChatModel,
+    tools: Sequence[BaseTool],
+    final_system_prompt: str,
+    backend_resolver: Any,
+    filesystem_mode: FilesystemMode,
+    search_space_id: int,
+    user_id: str | None,
+    thread_id: int | None,
+    visibility: ChatVisibility,
+    anon_session_id: str | None,
+    available_connectors: list[str] | None,
+    available_document_types: list[str] | None,
+    mentioned_document_ids: list[int] | None,
+    max_input_tokens: int | None,
+    flags: AgentFeatureFlags,
+    checkpointer: Checkpointer,
+    subagent_dependencies: dict[str, Any],
+    mcp_tools_by_agent: dict[str, ToolsPermissions] | None = None,
+    disabled_tools: list[str] | None = None,
+):
+    """Sync compile: middleware + ``create_agent`` (run via ``asyncio.to_thread``)."""
+    main_agent_middleware = build_main_agent_deepagent_middleware(
+        llm=llm,
+        tools=tools,
+        backend_resolver=backend_resolver,
+        filesystem_mode=filesystem_mode,
+        search_space_id=search_space_id,
+        user_id=user_id,
+        thread_id=thread_id,
+        visibility=visibility,
+        anon_session_id=anon_session_id,
+        available_connectors=available_connectors,
+        available_document_types=available_document_types,
+        mentioned_document_ids=mentioned_document_ids,
+        max_input_tokens=max_input_tokens,
+        flags=flags,
+        subagent_dependencies=subagent_dependencies,
+        checkpointer=checkpointer,
+        mcp_tools_by_agent=mcp_tools_by_agent,
+        disabled_tools=disabled_tools,
+    )
+
+    agent = create_agent(
+        llm,
+        system_prompt=final_system_prompt,
+        tools=list(tools),
+        middleware=main_agent_middleware,
+        context_schema=SurfSenseContextSchema,
+        checkpointer=checkpointer,
+    )
+    return agent.with_config(
+        {
+            "recursion_limit": 10_000,
+            "metadata": {
+                "ls_integration": "deepagents",
+                "versions": {"deepagents": deepagents_version},
+            },
+        }
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/init.py
@ -0,0 +1,7 @@
+"""Async factory: wiring tools, prompts, MCP buckets, then graph compile."""
+
+from __future__ import annotations
+
+from .factory import create_multi_agent_chat_deep_agent
+
+__all__ = ["create_multi_agent_chat_deep_agent"]
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py
@ -0,0 +1,117 @@
+"""Compiled agent graph caching for the multi-agent path."""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import Sequence
+from typing import Any
+
+from langchain_core.language_models import BaseChatModel
+from langchain_core.tools import BaseTool
+from langgraph.types import Checkpointer
+
+from app.agents.multi_agent_chat.subagents.shared.permissions import ToolsPermissions
+from app.agents.new_chat.agent_cache import (
+    flags_signature,
+    get_cache,
+    stable_hash,
+    system_prompt_hash,
+    tools_signature,
+)
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.db import ChatVisibility
+
+from ..graph.compile_graph_sync import build_compiled_agent_graph_sync
+
+
+def mcp_signature(mcp_tools_by_agent: dict[str, ToolsPermissions]) -> str:
+    """Hash the per-agent MCP tool surface so a change rotates the cache key."""
+    rows = []
+    for agent_name in sorted(mcp_tools_by_agent.keys()):
+        perms = mcp_tools_by_agent[agent_name]
+        allow_names = sorted(item.get("name", "") for item in perms.get("allow", []))
+        ask_names = sorted(item.get("name", "") for item in perms.get("ask", []))
+        rows.append((agent_name, allow_names, ask_names))
+    return stable_hash(rows)
+
+
+async def build_agent_with_cache(
+    *,
+    llm: BaseChatModel,
+    tools: Sequence[BaseTool],
+    final_system_prompt: str,
+    backend_resolver: Any,
+    filesystem_mode: FilesystemMode,
+    search_space_id: int,
+    user_id: str | None,
+    thread_id: int | None,
+    visibility: ChatVisibility,
+    anon_session_id: str | None,
+    available_connectors: list[str],
+    available_document_types: list[str],
+    mentioned_document_ids: list[int] | None,
+    max_input_tokens: int | None,
+    flags: AgentFeatureFlags,
+    checkpointer: Checkpointer,
+    subagent_dependencies: dict[str, Any],
+    mcp_tools_by_agent: dict[str, ToolsPermissions],
+    disabled_tools: list[str] | None,
+    config_id: str | None,
+) -> Any:
+    """Compile the multi-agent graph, serving from cache when key components are stable."""
+
+    async def _build() -> Any:
+        return await asyncio.to_thread(
+            build_compiled_agent_graph_sync,
+            llm=llm,
+            tools=tools,
+            final_system_prompt=final_system_prompt,
+            backend_resolver=backend_resolver,
+            filesystem_mode=filesystem_mode,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            thread_id=thread_id,
+            visibility=visibility,
+            anon_session_id=anon_session_id,
+            available_connectors=available_connectors,
+            available_document_types=available_document_types,
+            mentioned_document_ids=mentioned_document_ids,
+            max_input_tokens=max_input_tokens,
+            flags=flags,
+            checkpointer=checkpointer,
+            subagent_dependencies=subagent_dependencies,
+            mcp_tools_by_agent=mcp_tools_by_agent,
+            disabled_tools=disabled_tools,
+        )
+
+    if not (flags.enable_agent_cache and not flags.disable_new_agent_stack):
+        return await _build()
+
+    # Every per-request value any middleware closes over at __init__ must be in
+    # the key, otherwise a hit will leak state across threads. Bump the schema
+    # version when the component list changes shape.
+    cache_key = stable_hash(
+        "multi-agent-v1",
+        config_id,
+        thread_id,
+        user_id,
+        search_space_id,
+        visibility,
+        filesystem_mode,
+        anon_session_id,
+        tools_signature(
+            tools,
+            available_connectors=available_connectors,
+            available_document_types=available_document_types,
+        ),
+        mcp_signature(mcp_tools_by_agent),
+        flags_signature(flags),
+        system_prompt_hash(final_system_prompt),
+        max_input_tokens,
+        sorted(disabled_tools) if disabled_tools else None,
+    )
+    return await get_cache().get_or_build(cache_key, builder=_build)
+
+
+__all__ = ["build_agent_with_cache", "mcp_signature"]
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py
@ -0,0 +1,257 @@
+"""Async factory: tools, system prompt, MCP buckets for subagents, then sync graph compile."""
+
+from __future__ import annotations
+
+import logging
+import time
+from collections.abc import Sequence
+from typing import Any
+
+from deepagents.graph import BASE_AGENT_PROMPT
+from langchain_core.language_models import BaseChatModel
+from langchain_core.tools import BaseTool
+from langgraph.types import Checkpointer
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.multi_agent_chat.subagents import (
+    get_subagents_to_exclude,
+    main_prompt_registry_subagent_lines,
+)
+from app.agents.multi_agent_chat.subagents.mcp_tools.index import (
+    load_mcp_tools_by_connector,
+)
+from app.agents.new_chat.chat_deepagent import _map_connectors_to_searchable_types
+from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
+from app.agents.new_chat.filesystem_backends import build_backend_resolver
+from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
+from app.agents.new_chat.llm_config import AgentConfig
+from app.agents.new_chat.prompt_caching import apply_litellm_prompt_caching
+from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool
+from app.agents.new_chat.tools.registry import build_tools_async
+from app.db import ChatVisibility
+from app.services.connector_service import ConnectorService
+from app.utils.perf import get_perf_logger
+
+from ..system_prompt import build_main_agent_system_prompt
+from ..tools import (
+    MAIN_AGENT_SURFSENSE_TOOL_NAMES,
+    MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED,
+)
+from .agent_cache import build_agent_with_cache
+
+_perf_log = get_perf_logger()
+
+
+async def create_multi_agent_chat_deep_agent(
+    llm: BaseChatModel,
+    search_space_id: int,
+    db_session: AsyncSession,
+    connector_service: ConnectorService,
+    checkpointer: Checkpointer,
+    user_id: str | None = None,
+    thread_id: int | None = None,
+    agent_config: AgentConfig | None = None,
+    enabled_tools: list[str] | None = None,
+    disabled_tools: list[str] | None = None,
+    additional_tools: Sequence[BaseTool] | None = None,
+    firecrawl_api_key: str | None = None,
+    thread_visibility: ChatVisibility | None = None,
+    mentioned_document_ids: list[int] | None = None,
+    anon_session_id: str | None = None,
+    filesystem_selection: FilesystemSelection | None = None,
+):
+    """Deep agent with SurfSense tools/middleware; registry route subagents behind ``task`` when enabled."""
+    _t_agent_total = time.perf_counter()
+
+    apply_litellm_prompt_caching(llm, agent_config=agent_config, thread_id=thread_id)
+
+    filesystem_selection = filesystem_selection or FilesystemSelection()
+    backend_resolver = build_backend_resolver(
+        filesystem_selection,
+        search_space_id=search_space_id
+        if filesystem_selection.mode == FilesystemMode.CLOUD
+        else None,
+    )
+
+    available_connectors: list[str] | None = None
+    available_document_types: list[str] | None = None
+
+    _t0 = time.perf_counter()
+    try:
+        connector_types = await connector_service.get_available_connectors(
+            search_space_id
+        )
+        available_connectors = _map_connectors_to_searchable_types(connector_types)
+
+        available_document_types = await connector_service.get_available_document_types(
+            search_space_id
+        )
+
+    except Exception as e:
+        logging.warning(
+            "Connector/doc-type discovery failed; excluding connector subagents this turn: %s",
+            e,
+        )
+
+    # Fail closed: a None list short-circuits ``get_subagents_to_exclude`` to "exclude
+    # nothing", which would silently advertise every connector specialist on a flaky
+    # discovery call. Empty list excludes connector-gated subagents while keeping builtins.
+    if available_connectors is None:
+        available_connectors = []
+    if available_document_types is None:
+        available_document_types = []
+    _perf_log.info(
+        "[create_agent] Connector/doc-type discovery in %.3fs",
+        time.perf_counter() - _t0,
+    )
+
+    visibility = thread_visibility or ChatVisibility.PRIVATE
+
+    _model_profile = getattr(llm, "profile", None)
+    _max_input_tokens: int | None = (
+        _model_profile.get("max_input_tokens")
+        if isinstance(_model_profile, dict)
+        else None
+    )
+
+    dependencies: dict[str, Any] = {
+        "search_space_id": search_space_id,
+        "db_session": db_session,
+        "connector_service": connector_service,
+        "firecrawl_api_key": firecrawl_api_key,
+        "user_id": user_id,
+        "thread_id": thread_id,
+        "thread_visibility": visibility,
+        "available_connectors": available_connectors,
+        "available_document_types": available_document_types,
+        "max_input_tokens": _max_input_tokens,
+        "llm": llm,
+    }
+
+    _t0 = time.perf_counter()
+    try:
+        mcp_tools_by_agent = await load_mcp_tools_by_connector(
+            db_session, search_space_id
+        )
+    except Exception as e:
+        # Degrade to builtins-only rather than aborting the turn: a transient
+        # DB or MCP-server hiccup should not deny the user a response.
+        logging.warning(
+            "MCP tool discovery failed; subagents will run without MCP tools this turn: %s",
+            e,
+        )
+        mcp_tools_by_agent = {}
+    _perf_log.info(
+        "[create_agent] load_mcp_tools_by_connector in %.3fs (%d buckets)",
+        time.perf_counter() - _t0,
+        len(mcp_tools_by_agent),
+    )
+
+    modified_disabled_tools = list(disabled_tools) if disabled_tools else []
+
+    if "search_knowledge_base" not in modified_disabled_tools:
+        modified_disabled_tools.append("search_knowledge_base")
+
+    if enabled_tools is not None:
+        main_agent_enabled_tools = [
+            n for n in enabled_tools if n in MAIN_AGENT_SURFSENSE_TOOL_NAMES
+        ]
+    else:
+        main_agent_enabled_tools = list(MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED)
+
+    _t0 = time.perf_counter()
+    tools = await build_tools_async(
+        dependencies=dependencies,
+        enabled_tools=main_agent_enabled_tools,
+        disabled_tools=modified_disabled_tools,
+        additional_tools=list(additional_tools) if additional_tools else None,
+        include_mcp_tools=False,
+    )
+
+    _flags: AgentFeatureFlags = get_flags()
+    if _flags.enable_tool_call_repair and INVALID_TOOL_NAME not in {
+        t.name for t in tools
+    }:
+        tools = [*list(tools), invalid_tool]
+    _perf_log.info(
+        "[create_agent] build_tools_async in %.3fs (%d tools)",
+        time.perf_counter() - _t0,
+        len(tools),
+    )
+
+    _t0 = time.perf_counter()
+    _enabled_tool_names = {t.name for t in tools}
+    _user_disabled_tool_names = set(disabled_tools) if disabled_tools else set()
+
+    _model_name: str | None = None
+    prof = getattr(llm, "model_name", None) or getattr(llm, "model", None)
+    if isinstance(prof, str):
+        _model_name = prof
+
+    _connector_exclude = get_subagents_to_exclude(available_connectors)
+    _registry_subagent_prompt_lines = main_prompt_registry_subagent_lines(
+        _connector_exclude
+    )
+
+    if agent_config is not None:
+        system_prompt = build_main_agent_system_prompt(
+            today=None,
+            thread_visibility=thread_visibility,
+            enabled_tool_names=_enabled_tool_names,
+            disabled_tool_names=_user_disabled_tool_names,
+            custom_system_instructions=agent_config.system_instructions,
+            use_default_system_instructions=agent_config.use_default_system_instructions,
+            citations_enabled=agent_config.citations_enabled,
+            model_name=_model_name or getattr(agent_config, "model_name", None),
+            registry_subagent_prompt_lines=_registry_subagent_prompt_lines,
+        )
+    else:
+        system_prompt = build_main_agent_system_prompt(
+            thread_visibility=thread_visibility,
+            enabled_tool_names=_enabled_tool_names,
+            disabled_tool_names=_user_disabled_tool_names,
+            citations_enabled=True,
+            model_name=_model_name,
+            registry_subagent_prompt_lines=_registry_subagent_prompt_lines,
+        )
+    _perf_log.info(
+        "[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
+    )
+
+    final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
+
+    config_id = agent_config.config_id if agent_config is not None else None
+
+    _t0 = time.perf_counter()
+    agent = await build_agent_with_cache(
+        llm=llm,
+        tools=tools,
+        final_system_prompt=final_system_prompt,
+        backend_resolver=backend_resolver,
+        filesystem_mode=filesystem_selection.mode,
+        search_space_id=search_space_id,
+        user_id=user_id,
+        thread_id=thread_id,
+        visibility=visibility,
+        anon_session_id=anon_session_id,
+        available_connectors=available_connectors,
+        available_document_types=available_document_types,
+        mentioned_document_ids=mentioned_document_ids,
+        max_input_tokens=_max_input_tokens,
+        flags=_flags,
+        checkpointer=checkpointer,
+        subagent_dependencies=dependencies,
+        mcp_tools_by_agent=mcp_tools_by_agent,
+        disabled_tools=disabled_tools,
+        config_id=config_id,
+    )
+    _perf_log.info(
+        "[create_agent] Middleware stack + graph compiled in %.3fs",
+        time.perf_counter() - _t0,
+    )
+
+    _perf_log.info(
+        "[create_agent] Total agent creation in %.3fs",
+        time.perf_counter() - _t_agent_total,
+    )
+    return agent
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/init.py
@ -0,0 +1,7 @@
+"""Main-agent system prompt — not shared verbatim with single-agent ``new_chat``."""
+
+from __future__ import annotations
+
+from .builder import build_main_agent_system_prompt
+
+__all__ = ["build_main_agent_system_prompt"]
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/init.py
@ -0,0 +1,7 @@
+"""Assemble the main-agent system prompt from ``markdown/*.md`` fragments."""
+
+from __future__ import annotations
+
+from .compose import build_main_agent_system_prompt
+
+__all__ = ["build_main_agent_system_prompt"]
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/compose.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/compose.py
@ -0,0 +1,53 @@
+"""Assemble the **main-agent** deep-agent system string only.
+
+Sections (order matters): core instructions → provider → citations → dynamic
+``<registry_subagents>`` → SurfSense ``<tools>``.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from app.db import ChatVisibility
+
+from .sections.citations import build_citations_section
+from .sections.provider import build_provider_section
+from .sections.registry_subagents import build_registry_subagents_section
+from .sections.system_instruction import build_default_system_instruction_xml
+from .sections.tools import build_tools_section
+
+
+def build_main_agent_system_prompt(
+    *,
+    today: datetime | None = None,
+    thread_visibility: ChatVisibility | None = None,
+    enabled_tool_names: set[str] | None = None,
+    disabled_tool_names: set[str] | None = None,
+    custom_system_instructions: str | None = None,
+    use_default_system_instructions: bool = True,
+    citations_enabled: bool = True,
+    model_name: str | None = None,
+    registry_subagent_prompt_lines: list[tuple[str, str]] | None = None,
+) -> str:
+    resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat()
+    visibility = thread_visibility or ChatVisibility.PRIVATE
+
+    if custom_system_instructions and custom_system_instructions.strip():
+        system_block = custom_system_instructions.format(resolved_today=resolved_today)
+    elif use_default_system_instructions:
+        system_block = build_default_system_instruction_xml(
+            visibility=visibility,
+            resolved_today=resolved_today,
+        )
+    else:
+        system_block = ""
+
+    system_block += build_provider_section(model_name=model_name)
+    system_block += build_citations_section(citations_enabled=citations_enabled)
+    system_block += build_registry_subagents_section(registry_subagent_prompt_lines)
+    system_block += build_tools_section(
+        visibility=visibility,
+        enabled_tool_names=enabled_tool_names,
+        disabled_tool_names=disabled_tool_names,
+    )
+    return system_block
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/load_md.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/load_md.py
@ -0,0 +1,16 @@
+"""Load main-agent-only markdown from ``system_prompt/markdown/`` (``importlib.resources``)."""
+
+from __future__ import annotations
+
+from importlib import resources
+
+_PROMPTS_PACKAGE = "app.agents.multi_agent_chat.main_agent.system_prompt.markdown"
+
+
+def read_prompt_md(filename: str) -> str:
+    """Load ``markdown/{filename}`` (e.g. ``agent_private.md`` or ``tools/_preamble.md``)."""
+    ref = resources.files(_PROMPTS_PACKAGE).joinpath(filename)
+    if not ref.is_file():
+        return ""
+    text = ref.read_text(encoding="utf-8")
+    return text[:-1] if text.endswith("\n") else text
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/provider_hints.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/provider_hints.py
@ -0,0 +1,50 @@
+"""Provider-specific style hints from ``markdown/providers/`` (main agent only)."""
+
+from __future__ import annotations
+
+import re
+
+from .load_md import read_prompt_md
+
+ProviderVariant = str
+
+_OPENAI_CODEX_RE = re.compile(
+    r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE
+)
+_OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
+_OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
+_ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
+_GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
+_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
+_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
+_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)
+
+
+def detect_provider_variant(model_name: str | None) -> ProviderVariant:
+    if not model_name:
+        return "default"
+    name = model_name.strip()
+    if _OPENAI_CODEX_RE.search(name):
+        return "openai_codex"
+    if _OPENAI_REASONING_RE.search(name):
+        return "openai_reasoning"
+    if _OPENAI_CLASSIC_RE.search(name):
+        return "openai_classic"
+    if _ANTHROPIC_RE.search(name):
+        return "anthropic"
+    if _GOOGLE_RE.search(name):
+        return "google"
+    if _KIMI_RE.search(name):
+        return "kimi"
+    if _GROK_RE.search(name):
+        return "grok"
+    if _DEEPSEEK_RE.search(name):
+        return "deepseek"
+    return "default"
+
+
+def build_provider_hint_block(provider_variant: ProviderVariant) -> str:
+    if not provider_variant or provider_variant == "default":
+        return ""
+    text = read_prompt_md(f"providers/{provider_variant}.md")
+    return f"\n{text}\n" if text else ""
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/init.py
@ -0,0 +1 @@
+"""Rendered slices of the main-agent system prompt."""
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/citations.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/citations.py
@ -0,0 +1,11 @@
+"""Citation fragment for the main agent (chunk-tagged context only)."""
+
+from __future__ import annotations
+
+from ..load_md import read_prompt_md
+
+
+def build_citations_section(*, citations_enabled: bool) -> str:
+    name = "citations_on.md" if citations_enabled else "citations_off.md"
+    fragment = read_prompt_md(name)
+    return f"\n{fragment}\n" if fragment else ""
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/provider.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/provider.py
@ -0,0 +1,9 @@
+"""Provider-specific style hints."""
+
+from __future__ import annotations
+
+from ..provider_hints import build_provider_hint_block, detect_provider_variant
+
+
+def build_provider_section(*, model_name: str | None) -> str:
+    return build_provider_hint_block(detect_provider_variant(model_name))
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/registry_subagents.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/registry_subagents.py
@ -0,0 +1,27 @@
+"""Dynamic ``<registry_subagents>`` block: **task** specialists actually built for this workspace."""
+
+from __future__ import annotations
+
+
+def build_registry_subagents_section(
+    registry_subagent_lines: list[tuple[str, str]] | None,
+) -> str:
+    if registry_subagent_lines is None:
+        return ""
+    if not registry_subagent_lines:
+        return (
+            "\n<registry_subagents>\n"
+            "No registry specialists are listed for **task** in this workspace.\n"
+            "</registry_subagents>\n"
+        )
+    bullets = "\n".join(
+        f"- **{name}** — {desc}" for name, desc in registry_subagent_lines
+    )
+    return (
+        "\n<registry_subagents>\n"
+        "These specialists are registered for **task** (routes without a matching connector are omitted).\n"
+        f"{bullets}\n"
+        "The runtime may also offer a general-purpose **task** helper with your tools in a separate context.\n"
+        "Pick the specialist by **name**. Put full instructions in the task prompt; they do not see this thread.\n"
+        "</registry_subagents>\n"
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/system_instruction.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/system_instruction.py
@ -0,0 +1,35 @@
+"""Default ``<system_instruction>`` block for the main agent only."""
+
+from __future__ import annotations
+
+from app.db import ChatVisibility
+
+from ..load_md import read_prompt_md
+
+_PRIVATE_ORDER = (
+    "agent_private.md",
+    "kb_only_policy_private.md",
+    "main_agent_tool_routing.md",
+    "parameter_resolution.md",
+    "memory_protocol_private.md",
+)
+_TEAM_ORDER = (
+    "agent_team.md",
+    "kb_only_policy_team.md",
+    "main_agent_tool_routing.md",
+    "parameter_resolution.md",
+    "memory_protocol_team.md",
+)
+
+
+def build_default_system_instruction_xml(
+    *,
+    visibility: ChatVisibility,
+    resolved_today: str,
+) -> str:
+    order = _TEAM_ORDER if visibility == ChatVisibility.SEARCH_SPACE else _PRIVATE_ORDER
+    parts = [read_prompt_md(name) for name in order]
+    body = "\n\n".join(p for p in parts if p)
+    return f"\n<system_instruction>\n{body}\n\n</system_instruction>\n".format(
+        resolved_today=resolved_today,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/tools.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/sections/tools.py
@ -0,0 +1,20 @@
+"""Main-agent ``<tools>`` block (memory + research builtins only; see ``main_agent.tools``)."""
+
+from __future__ import annotations
+
+from app.db import ChatVisibility
+
+from ..tool_instruction_block import build_tools_instruction_block
+
+
+def build_tools_section(
+    *,
+    visibility: ChatVisibility,
+    enabled_tool_names: set[str] | None,
+    disabled_tool_names: set[str] | None,
+) -> str:
+    return build_tools_instruction_block(
+        visibility=visibility,
+        enabled_tool_names=enabled_tool_names,
+        disabled_tool_names=disabled_tool_names,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/tool_instruction_block.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/builder/tool_instruction_block.py
@ -0,0 +1,86 @@
+"""``<tools>`` + ``<tool_call_examples>`` from ``system_prompt/markdown/{tools,examples}/``.
+
+Only documents tools the main agent actually binds — not full ``new_chat``.
+"""
+
+from __future__ import annotations
+
+from app.db import ChatVisibility
+
+from ...tools import MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED
+from .load_md import read_prompt_md
+
+_MEMORY_VARIANT_TOOLS: frozenset[str] = frozenset({"update_memory"})
+
+
+def _tool_fragment_path(tool_name: str, variant: str) -> str:
+    if tool_name in _MEMORY_VARIANT_TOOLS:
+        return f"tools/{tool_name}_{variant}.md"
+    return f"tools/{tool_name}.md"
+
+
+def _example_fragment_path(tool_name: str, variant: str) -> str:
+    if tool_name in _MEMORY_VARIANT_TOOLS:
+        return f"examples/{tool_name}_{variant}.md"
+    return f"examples/{tool_name}.md"
+
+
+def _format_tool_label(tool_name: str) -> str:
+    return tool_name.replace("_", " ").title()
+
+
+def build_tools_instruction_block(
+    *,
+    visibility: ChatVisibility,
+    enabled_tool_names: set[str] | None,
+    disabled_tool_names: set[str] | None,
+) -> str:
+    variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
+
+    parts: list[str] = []
+    preamble = read_prompt_md("tools/_preamble.md")
+    if preamble:
+        parts.append(preamble + "\n")
+
+    examples: list[str] = []
+
+    for tool_name in MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED:
+        if enabled_tool_names is not None and tool_name not in enabled_tool_names:
+            continue
+
+        instruction = read_prompt_md(_tool_fragment_path(tool_name, variant))
+        if instruction:
+            parts.append(instruction + "\n")
+
+        example = read_prompt_md(_example_fragment_path(tool_name, variant))
+        if example:
+            examples.append(example + "\n")
+
+    known_disabled = (
+        set(disabled_tool_names) & set(MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED)
+        if disabled_tool_names
+        else set()
+    )
+    if known_disabled:
+        disabled_list = ", ".join(
+            _format_tool_label(n)
+            for n in MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED
+            if n in known_disabled
+        )
+        parts.append(
+            "\n"
+            "DISABLED TOOLS (by user, main-agent scope):\n"
+            f"These SurfSense tools were disabled on the main agent for this session: {disabled_list}.\n"
+            "You do NOT have access to them and MUST NOT claim you can use them.\n"
+            "If the user still needs that capability, delegate with **task** if a subagent covers it,\n"
+            "otherwise explain it is disabled on the main agent for this session.\n"
+        )
+
+    parts.append("\n</tools>\n")
+
+    if examples:
+        parts.append("<tool_call_examples>")
+        parts.extend(examples)
+        parts.append("</tool_call_examples>\n")
+
+    return "".join(parts)
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/init.py
@ -0,0 +1 @@
+"""Markdown fragments for the **main-agent** system prompt only (`importlib.resources`)."""
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/agent_private.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/agent_private.md
@ -0,0 +1,9 @@
+You are SurfSense’s **main agent**: you answer using the user’s knowledge context,
+lightweight research tools, and memory — and you **delegate** integrations and
+specialized work via **task** (see `<tool_routing>` in this prompt).
+
+Today's date (UTC): {resolved_today}
+
+When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
+
+NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/agent_team.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/agent_team.md
@ -0,0 +1,11 @@
+You are SurfSense’s **main agent** for this team space: you answer using shared
+knowledge context, lightweight research tools, and memory — and you **delegate**
+integrations and specialized work via **task** (see `<tool_routing>` in this prompt).
+
+In this team thread, each message is prefixed with **[DisplayName of the author]**. Use this to attribute and reference the author of anything in the discussion (who asked a question, made a suggestion, or contributed an idea) and to cite who said what in your answers.
+
+Today's date (UTC): {resolved_today}
+
+When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
+
+NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/citations_off.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/citations_off.md
@ -0,0 +1,15 @@
+<citation_instructions>
+IMPORTANT: Citations are DISABLED for this configuration.
+
+DO NOT include `[citation:…]` markers anywhere — even if tool descriptions or examples
+mention them. Ignore citation-format reminders elsewhere in this prompt when they conflict
+with this block.
+
+Instead:
+1. Answer in plain prose; optional markdown links to public URLs when sources are URLs.
+2. Do NOT expose raw chunk IDs, document IDs, or internal IDs to the user.
+3. Present indexed or doc-search facts naturally without attribution markers.
+
+When answering from workspace or docs context: integrate facts cleanly without claiming
+“this comes from chunk X”.
+</citation_instructions>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/citations_on.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/citations_on.md
@ -0,0 +1,15 @@
+<citation_instructions>
+This block appears **before** `<tools>` so it wins over any tool-example wording below.
+
+Apply chunk citations **only** when the runtime injects `<document>` / `<chunk id='…'>` blocks
+(e.g. from SurfSense docs search or priority documents).
+
+1. For each factual statement taken from those chunks, add `[citation:chunk_id]` using the **exact** `chunk_id` string from `<chunk id='…'>`.
+2. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated).
+3. Never invent or normalize ids; if unsure, omit the citation.
+4. Plain brackets only — no markdown links, no `([citation:…](url))`, no footnote numbering.
+
+Chunk ids may be numeric, prefixed (e.g. `doc-45`), or URLs when the source is web-shaped — copy verbatim.
+
+If no chunk-tagged documents appear in context this turn, do not fabricate citations.
+</citation_instructions>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/init.py
@ -0,0 +1 @@
+
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/scrape_webpage.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/scrape_webpage.md
@ -0,0 +1,13 @@
+
+- User: "Check out https://dev.to/some-article"
+  - Call: `scrape_webpage(url="https://dev.to/some-article")`
+  - Respond with a structured analysis — key points, takeaways.
+- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
+  - Call: `scrape_webpage(url="https://example.com/blog/ai-trends")`
+  - Respond with a thorough summary using headings and bullet points.
+- User: (after discussing https://example.com/stats) "Can you get the live data from that page?"
+  - Call: `scrape_webpage(url="https://example.com/stats")`
+  - IMPORTANT: Always attempt scraping first. Never refuse before trying the tool.
+- User: "https://example.com/blog/weekend-recipes"
+  - Call: `scrape_webpage(url="https://example.com/blog/weekend-recipes")`
+  - When a user sends just a URL with no instructions, scrape it and provide a concise summary of the content.
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/search_surfsense_docs.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/search_surfsense_docs.md
@ -0,0 +1,9 @@
+
+- User: "How do I install SurfSense?"
+  - Call: `search_surfsense_docs(query="installation setup")`
+- User: "What connectors does SurfSense support?"
+  - Call: `search_surfsense_docs(query="available connectors integrations")`
+- User: "How do I set up the Notion connector?"
+  - Call: `search_surfsense_docs(query="Notion connector setup configuration")` (how-to docs). Changing data inside Notion itself → **task**.
+- User: "How do I use Docker to run SurfSense?"
+  - Call: `search_surfsense_docs(query="Docker installation setup")`
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/update_memory_private.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/update_memory_private.md
@ -0,0 +1,16 @@
+
+- <user_name>Alex</user_name>, <user_memory> is empty. User: "I'm a space enthusiast, explain astrophage to me"
+  - The user casually shared a durable fact. Use their first name in the entry, short neutral heading:
+    update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n")
+- User: "Remember that I prefer concise answers over detailed explanations"
+  - Durable preference. Merge with existing memory, add a new heading:
+    update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n\n## Response style\n- (2025-03-15) [pref] Alex prefers concise answers over detailed explanations\n")
+- User: "I actually moved to Tokyo last month"
+  - Updated fact, date prefix reflects when recorded:
+    update_memory(updated_memory="## Interests & background\n...\n\n## Personal context\n- (2025-03-15) [fact] Alex lives in Tokyo (previously London)\n...")
+- User: "I'm a freelance photographer working on a nature documentary"
+  - Durable background info under a fitting heading:
+    update_memory(updated_memory="...\n\n## Current focus\n- (2025-03-15) [fact] Alex is a freelance photographer\n- (2025-03-15) [fact] Alex is working on a nature documentary\n")
+- User: "Always respond in bullet points"
+  - Standing instruction:
+    update_memory(updated_memory="...\n\n## Response style\n- (2025-03-15) [instr] Always respond to Alex in bullet points\n")
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/update_memory_team.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/update_memory_team.md
@ -0,0 +1,7 @@
+
+- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
+  - Durable team decision:
+    update_memory(updated_memory="- (2025-03-15) [fact] Weekly standup meetings on Mondays\n...")
+- User: "Our office is in downtown Seattle, 5th floor"
+  - Durable team fact:
+    update_memory(updated_memory="- (2025-03-15) [fact] Office location: downtown Seattle, 5th floor\n...")
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/web_search.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/examples/web_search.md
@ -0,0 +1,8 @@
+
+- User: "What's the current USD to INR exchange rate?"
+  - Call: `web_search(query="current USD to INR exchange rate")`
+  - Answer from returned snippets or scrape a top URL if needed; use markdown links to sources.
+- User: "What's the latest news about AI?"
+  - Call: `web_search(query="latest AI news today")`
+- User: "What's the weather in New York?"
+  - Call: `web_search(query="weather New York today")`
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/kb_only_policy_private.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/kb_only_policy_private.md
@ -0,0 +1,19 @@
+<knowledge_base_only_policy>
+CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
+- Ground factual answers in what you actually receive this turn: injected workspace
+  documents (when present), **search_surfsense_docs**, **web_search**, **scrape_webpage**,
+  or substantive results summarized from a **task** subagent you invoked.
+- Do NOT answer factual or informational questions from general knowledge unless the user
+  explicitly grants permission after you say you did not find enough in those sources.
+- If indexed/docs search returns nothing relevant AND **web_search** / **scrape_webpage**
+  (and **task**, if already tried appropriately) still do not supply an answer, you MUST:
+  1. Say you could not find enough in their workspace/docs/tools output.
+  2. Ask: "Would you like me to answer from my general knowledge instead?"
+  3. ONLY then answer from general knowledge after they clearly say yes.
+- This policy does NOT apply to:
+  * Casual conversation, greetings, or meta-questions about SurfSense (e.g. "what can you do?")
+  * Formatting or analysis of content already in the chat
+  * Clear rewrite/edit instructions ("bullet-point this paragraph")
+  * Lightweight research with **web_search** / **scrape_webpage**
+  * Work that belongs on a specialist — use **task**; see `<tool_routing>`
+</knowledge_base_only_policy>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/kb_only_policy_team.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/kb_only_policy_team.md
@ -0,0 +1,19 @@
+<knowledge_base_only_policy>
+CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
+- Ground factual answers in what you actually receive this turn: injected shared
+  workspace documents (when present), **search_surfsense_docs**, **web_search**,
+  **scrape_webpage**, or substantive results summarized from a **task** subagent you invoked.
+- Do NOT answer factual questions from general knowledge unless a team member explicitly
+  grants permission after you say you did not find enough in those sources.
+- If indexed/docs search returns nothing relevant AND **web_search** / **scrape_webpage**
+  (and **task**, if already tried appropriately) still do not supply an answer, you MUST:
+  1. Say you could not find enough in shared docs/tools output.
+  2. Ask: "Would you like me to answer from my general knowledge instead?"
+  3. ONLY then answer from general knowledge after they clearly say yes.
+- This policy does NOT apply to:
+  * Casual conversation, greetings, or meta-questions about SurfSense
+  * Formatting or analysis of content already in the chat
+  * Clear rewrite/edit instructions
+  * Lightweight research with **web_search** / **scrape_webpage**
+  * Work that belongs on a specialist — use **task**; see `<tool_routing>`
+</knowledge_base_only_policy>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/main_agent_tool_routing.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/main_agent_tool_routing.md
@ -0,0 +1,27 @@
+<tool_routing>
+Use **task** for anything beyond your direct SurfSense tools: calendar, mail,
+chat, tickets, documents in third-party systems, connector-specific discovery,
+deliverables (reports, podcasts, images, etc.), and other specialized routes.
+The live list of specialists you may target with **task** for this workspace is in
+`<registry_subagents>` (later in this prompt).
+
+Your **direct** SurfSense tools are only: **update_memory**, **web_search**,
+**scrape_webpage**, and **search_surfsense_docs**. The runtime may also attach
+deep-agent helpers (e.g. todos, filesystem, **task** itself). Use **task** whenever
+the user needs capabilities **not** listed in the `<tools>` section (that section appears
+later in this system prompt, after citation rules).
+
+Do not treat live third-party state as if it were already in the indexed knowledge
+base; reach it via **task**.
+
+Never emit more than one **task** tool call in the same turn. Bundle related work
+for the same specialist into a single **task** invocation (the subagent itself can
+call its own tools in parallel inside that one run). Parallel **task** calls would
+fan out into multiple concurrent subagent runs whose human-approval interrupts
+cannot be coordinated; one **task** at a time is required.
+</tool_routing>
+
+<!-- TODO: lift the single-task constraint once the runtime supports parallel task
+interrupts end-to-end (multi-interrupt SSE + interrupt-id-keyed Command(resume)
+ keyed surfsense_resume_value side-channel). Until then this nudge is the only
+guard; the parent graph's resume cannot address multiple pending interrupts. -->
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/memory_protocol_private.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/memory_protocol_private.md
@ -0,0 +1,6 @@
+<memory_protocol>
+IMPORTANT — After understanding each user message, ALWAYS check: does this message
+reveal durable facts about the user (role, interests, preferences, projects,
+background, or standing instructions)? If yes, you MUST call update_memory
+alongside your normal response — do not defer this to a later turn.
+</memory_protocol>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/memory_protocol_team.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/memory_protocol_team.md
@ -0,0 +1,6 @@
+<memory_protocol>
+IMPORTANT — After understanding each user message, ALWAYS check: does this message
+reveal durable facts about the team (decisions, conventions, architecture, processes,
+or key facts)? If yes, you MUST call update_memory alongside your normal response —
+do not defer this to a later turn.
+</memory_protocol>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/parameter_resolution.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/parameter_resolution.md
@ -0,0 +1,15 @@
+<parameter_resolution>
+You do **not** call connector-specific discovery tools yourself (accounts, channels,
+Jira cloud IDs, Airtable bases, Slack channels, etc.). Those tools exist only on
+**task** subagents.
+
+When the user needs work inside a connected product, delegate with **task** and a
+clear goal. If several Slack channels, Jira projects, calendar calendars, etc. could
+match and only the integration can list them, **you must not** ask the human for
+internal IDs (UUIDs, cloud IDs, opaque keys). The **task** subagent uses connector
+tools to list candidates and either picks the only sensible match or asks the user
+to choose using **normal labels** (e.g. channel display name, project title), not raw IDs.
+
+If you already have plain-language choices from the user or from prior tool output,
+you may pass them through to **task** without re-discovery.
+</parameter_resolution>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/init.py
@ -0,0 +1 @@
+
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/anthropic.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/anthropic.md
@ -0,0 +1,16 @@
+<provider_hints>
+You are running on an Anthropic Claude model (SurfSense **main agent**).
+
+Structured reasoning:
+- For non-trivial work, `<thinking>` / short `<plan>` before tool calls is fine.
+
+Professional objectivity:
+- Accuracy over flattery; verify with **search_surfsense_docs**, **web_search**, **scrape_webpage**, or **task** when unsure — don’t invent connector access.
+
+Task management:
+- For 3+ steps, use todo tooling; update statuses promptly.
+
+Tool calls:
+- Parallelise independent calls; sequence only when outputs chain.
+- Never pretend you can run connector-specific tools directly — route through **task** when needed.
+</provider_hints>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/deepseek.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/deepseek.md
@ -0,0 +1,18 @@
+<provider_hints>
+You are running on a DeepSeek model (SurfSense **main agent**).
+
+Reasoning hygiene (R1-aware):
+- Keep internal scratch separate from the user-facing answer; don’t leak chain-of-thought into tool arguments.
+
+Output style:
+- Concise; lead with the answer or the next action; avoid sycophantic openers.
+
+Attribution:
+- When citations are **enabled** and facts come from chunk-tagged context, follow the citation block above.
+- When citations are **disabled**, do not use `[citation:…]`.
+
+Tool calls:
+- Parallelise independent calls.
+- Prefer **search_surfsense_docs** for SurfSense docs/product questions before **web_search** when that fits the ask.
+- Don’t invent paths, chunk ids, or URLs — only values from tools or the user.
+</provider_hints>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/default.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/default.md
@ -0,0 +1 @@
+
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/google.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/google.md
@ -0,0 +1,18 @@
+<provider_hints>
+You are running on a Google Gemini model (SurfSense **main agent**).
+
+Output style:
+- Concise & direct. Fewer than ~3 lines of prose when the task allows (excluding tool output and code).
+- No filler openers/closers — move straight to the answer or the tool call.
+- GitHub-flavoured Markdown; monospace-friendly.
+
+Workflow (Understand → Plan → Act → Verify):
+1. **Understand:** parse the ask; use **search_surfsense_docs** / injected workspace context before guessing.
+2. **Plan:** for multi-step work, a short plan first.
+3. **Act:** only with tools you actually have on this agent (see `<tools>` and `<tool_routing>`). Connector work → **task**.
+4. **Verify:** re-read or re-search only when it materially reduces risk.
+
+Discipline:
+- Do not imply access to connectors, MCP tools, or deliverable generators except via **task**.
+- Path arguments for filesystem tools must be exact strings from tool results — never invent paths.
+</provider_hints>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/grok.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/grok.md
@ -0,0 +1,16 @@
+<provider_hints>
+You are running on an xAI Grok model (SurfSense **main agent**).
+
+Maximum terseness:
+- Fewer than 4 lines unless detail is requested; skip preamble/postamble.
+
+Tool discipline:
+- Typically one investigative tool per turn unless several independent read-only queries are clearly needed; don’t repeat identical calls.
+
+Attribution:
+- When citations are **enabled** (see citation block above) and you answer from chunk-tagged documents, use `[citation:chunk_id]` exactly as specified there.
+- When citations are **disabled**, never emit `[citation:…]` — plain prose and links per tool guidance.
+
+Style:
+- No emojis unless asked; flat lists for short answers.
+</provider_hints>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/kimi.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/kimi.md
@ -0,0 +1,21 @@
+<provider_hints>
+You are running on a Moonshot Kimi model (Kimi-K1.5 / Kimi-K2 / Kimi-K2.5+), SurfSense **main agent**.
+
+Action bias:
+- Default to taking action with tools rather than describing solutions in prose. If a tool can answer the question, call the tool.
+- Don't narrate routine reads, searches, or obvious next steps. Combine related progress into one short status line.
+- Be thorough in actions (test what you build, verify what you change). Be brief in explanations.
+
+Tool calls:
+- Output multiple non-interfering tool calls in a SINGLE response — parallelism is a major efficiency win on this model.
+- When the `task` tool is available, delegate focused subtasks to a subagent with full context (subagents don't inherit yours).
+- Don't apologise or pre-announce tool calls. The tool call itself is self-explanatory.
+
+Language:
+- Respond in the SAME language as the user's most recent turn unless explicitly instructed otherwise.
+
+Discipline:
+- Stay on track. Never give the user more than what they asked for.
+- Fact-check with tools; don’t fabricate chunk ids or connector outcomes.
+- Keep it stupidly simple. Don't overcomplicate.
+</provider_hints>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/openai_classic.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/openai_classic.md
@ -0,0 +1,20 @@
+<provider_hints>
+You are running on a classic OpenAI chat model (GPT-4 family), SurfSense **main agent**.
+
+Persistence:
+- Finish the user’s request in the same turn when tools allow — don’t stop at intent only.
+- If a tool errors, fix arguments and retry once before giving up.
+
+Planning:
+- For 3+ steps, use the todo / planning tool; mark `in_progress` / `completed` promptly.
+- One short sentence before non-trivial tool use is fine.
+
+Output style:
+- Conversational but professional; bullets for findings; fenced code with language tags when needed.
+- Summarize tool output — don’t paste walls of text.
+
+Tool calls:
+- Parallelise independent calls in one turn.
+- Prefer **search_surfsense_docs** for SurfSense-product questions, **web_search** / **scrape_webpage**
+  for fresh public facts; integrations and heavy workflows → **task**.
+</provider_hints>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/openai_codex.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/openai_codex.md
@ -0,0 +1,13 @@
+<provider_hints>
+You are running on an OpenAI Codex-class model (SurfSense **main agent**).
+
+Output style:
+- Concise; don’t paste huge fetch blobs — summarize.
+- When citations are **enabled** and you rely on chunk-tagged docs, references may use `[citation:chunk_id]` per the citation block above; when **disabled**, use prose and URLs only.
+- Numbered lists work well when the user should reply with a single option index.
+- No emojis; single-level bullets.
+
+Tool calls:
+- Parallelise independent calls; chain only when required.
+- Don’t ask permission for obvious safe defaults — state what you did.
+</provider_hints>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/openai_reasoning.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/providers/openai_reasoning.md
@ -0,0 +1,22 @@
+<provider_hints>
+You are running on an OpenAI reasoning model (GPT-5+ / o-series), SurfSense **main agent**.
+
+Output style:
+- Be terse and direct. Don't restate the user's request before answering.
+- Don't begin with conversational openers ("Done!", "Got it", "Great question", "Sure thing"). Get to the answer or the action.
+- Match response complexity to the task: simple questions → one-line answer; substantial work → lead with the outcome, then context, then any next steps.
+- No nested bullets — keep lists flat (single level). For options the user can pick by replying with a number, use `1.` `2.` `3.`.
+- Use inline backticks for paths/commands/identifiers; fenced code blocks (with language tags) for multi-line snippets.
+
+Channels (for clients that support them):
+- `commentary` — short progress updates only when they add genuinely new information (a discovery, a tradeoff, a blocker, the start of a non-trivial step). Don't narrate routine reads or obvious next steps.
+- `final` — the completed response. Keep it self-contained; no "see above" / "see below" cross-references.
+
+Tool calls:
+- Parallelise independent tool calls in a single response (`multi_tool_use.parallel` where supported). Only sequence when a later call needs an earlier one's output.
+- Connector or integration execution belongs in **task**, not invented main-agent tools.
+- Don't ask permission ("Should I proceed?", "Do you want me to…?"). Pick the most reasonable default, do it, and state what you did.
+
+Autonomy:
+- Persist until the task is fully resolved within the current turn whenever feasible — within tools you actually have; delegate the rest via **task**.
+</provider_hints>
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/init.py
@ -0,0 +1 @@
+
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/_preamble.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/_preamble.md
@ -0,0 +1,9 @@
+<tools>
+You have access to the following **SurfSense** tools (main-agent scope only):
+
+IMPORTANT: You can ONLY use the tools listed below. Anything else — connectors,
+deliverables, or multi-step integration work — goes through **task**, not as a
+tool in this list.
+
+Do NOT claim you can use a capability if it is not listed here.
+
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/scrape_webpage.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/scrape_webpage.md
@ -0,0 +1,10 @@
+
+- scrape_webpage: Fetch and extract readable content from a single HTTP(S) URL.
+  - Use when the user wants the *actual page body* (article, table, dashboard snapshot), not just search snippets.
+  - Try the tool when a URL is given or referenced; don’t refuse without attempting unless the URL is clearly unsafe/invalid.
+  - Args:
+    - url: Page to fetch
+    - max_length: Cap on returned characters (default: 50000)
+  - Returns: Title, metadata, and markdown-ish body.
+  - Summarize clearly afterward; link back with `[label](url)`.
+  - If indexed workspace material is insufficient and the user points at a public URL, scraping is appropriate — still not a substitute for **task** on private connectors.
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/search_surfsense_docs.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/search_surfsense_docs.md
@ -0,0 +1,9 @@
+
+- search_surfsense_docs: Search official SurfSense documentation (product help).
+  - Use when the user asks how SurfSense works, setup, connectors at a high level, configuration, etc.
+  - Not a substitute for **task** when they need actions inside Gmail/Slack/Jira/etc.
+  - Args:
+    - query: What to look up in SurfSense docs
+    - top_k: Number of chunks to retrieve (default: 10)
+  - Returns: Doc excerpts; chunk ids may appear for attribution — follow the **citation**
+    instructions block above when citations are enabled; otherwise summarize without `[citation:…]`.
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/update_memory_private.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/update_memory_private.md
@ -0,0 +1,12 @@
+
+- update_memory: Curate the **personal** long-term memory document for this user.
+  - Current memory (if any) appears in `<user_memory>` with usage vs limit.
+  - Call when the user asks to remember/forget, or shares durable facts/preferences/instructions.
+  - Use the first name from `<user_name>` when writing entries — write “Alex prefers…” not “The user prefers…”.
+    Do not store the name alone as a memory entry.
+  - Skip ephemeral chat noise (one-off q/a, greetings, session logistics).
+  - Args:
+    - updated_memory: FULL replacement markdown (merge and curate — don’t only append).
+  - Formatting rules:
+    - Bullets: `- (YYYY-MM-DD) [marker] text` with markers `[fact]`, `[pref]`, `[instr]` (priority when trimming: instr > pref > fact).
+    - Each bullet under a short `##` heading; keep total size under the limit shown in `<user_memory>`.
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/update_memory_team.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/update_memory_team.md
@ -0,0 +1,26 @@
+
+- update_memory: Update the team's shared memory document for this search space.
+  - Your current team memory is already in <team_memory> in your context.  The `chars`
+    and `limit` attributes show current usage and the maximum allowed size.
+  - This is the team's curated long-term memory — decisions, conventions, key facts.
+  - NEVER store personal memory in team memory (e.g. personal bio, individual
+    preferences, or user-only standing instructions).
+  - Call update_memory when:
+    * A team member explicitly asks to remember or forget something
+    * The conversation surfaces durable team decisions, conventions, or facts
+      that will matter in future conversations
+  - Do not store short-lived or ephemeral info: one-off questions, greetings,
+    session logistics, or things that only matter for the current task.
+  - Args:
+    - updated_memory: The FULL updated markdown document (not a diff).
+      Merge new facts with existing ones, update contradictions, remove outdated entries.
+      Treat every update as a curation pass — consolidate, don't just append.
+  - Every bullet MUST use this format: - (YYYY-MM-DD) [fact] text
+    Team memory uses ONLY the [fact] marker. Never use [pref] or [instr] in team memory.
+  - Keep it concise and well under the character limit shown in <team_memory>.
+  - Every entry MUST be under a `##` heading. Keep heading names short (2-3 words) and
+    natural. Organize by context — e.g. what the team decided, current architecture,
+    active processes. Create, split, or merge headings freely as the memory grows.
+  - Each entry MUST be a single bullet point. Be descriptive but concise — include relevant
+    details and context rather than just a few words.
+  - During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities.
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/web_search.md
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/markdown/tools/web_search.md
@ -0,0 +1,10 @@
+
+- web_search: Live public-web search (whatever search backends the workspace configured).
+  - Use for current events, prices, weather, news, or anything needing fresh public web data.
+  - For those queries, call this tool rather than guessing from memory or claiming you lack network access.
+  - If results are thin, say so and offer to refine the query.
+  - Args:
+    - query: Specific search terms
+    - top_k: Max hits (default: 10, max: 50)
+  - If snippets are too shallow, follow up with **scrape_webpage** on the best URL.
+  - Present sources with readable markdown links `[label](url)` — never bare URLs.
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/init.py
@ -0,0 +1,10 @@
+"""Main-agent SurfSense tool allowlist."""
+
+from __future__ import annotations
+
+from .index import (
+    MAIN_AGENT_SURFSENSE_TOOL_NAMES,
+    MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED,
+)
+
+__all__ = ["MAIN_AGENT_SURFSENSE_TOOL_NAMES", "MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED"]
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/tools/index.py
@ -0,0 +1,17 @@
+"""Main-agent SurfSense builtin tool names (not full ``new_chat``).
+
+Connector integrations, MCP, deliverables, etc. are delegated via ``task`` subagents.
+"""
+
+from __future__ import annotations
+
+MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
+    "search_surfsense_docs",
+    "web_search",
+    "scrape_webpage",
+    "update_memory",
+)
+
+MAIN_AGENT_SURFSENSE_TOOL_NAMES: frozenset[str] = frozenset(
+    MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED,
+)
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/init.py
@ -0,0 +1,7 @@
+"""Multi-agent middleware stack assembly."""
+
+from __future__ import annotations
+
+from .stack import build_main_agent_deepagent_middleware
+
+__all__ = ["build_main_agent_deepagent_middleware"]
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/init.py
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/action_log.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/action_log.py
@ -0,0 +1,36 @@
+"""Audit row per tool call (reversibility metadata)."""
+
+from __future__ import annotations
+
+import logging
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.middleware import ActionLogMiddleware
+from app.agents.new_chat.tools.registry import BUILTIN_TOOLS
+
+from ..shared.flags import enabled
+
+
+def build_action_log_mw(
+    *,
+    flags: AgentFeatureFlags,
+    thread_id: int | None,
+    search_space_id: int,
+    user_id: str | None,
+) -> ActionLogMiddleware | None:
+    if not enabled(flags, "enable_action_log") or thread_id is None:
+        return None
+    try:
+        tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS}
+        return ActionLogMiddleware(
+            thread_id=thread_id,
+            search_space_id=search_space_id,
+            user_id=user_id,
+            tool_definitions=tool_defs_by_name,
+        )
+    except Exception:  # pragma: no cover - defensive
+        logging.warning(
+            "ActionLogMiddleware init failed; running without it.",
+            exc_info=True,
+        )
+        return None
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/anonymous_doc.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/anonymous_doc.py
@ -0,0 +1,16 @@
+"""Anonymous document hydration from Redis (cloud only)."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.new_chat.middleware import AnonymousDocumentMiddleware
+
+
+def build_anonymous_doc_mw(
+    *,
+    filesystem_mode: FilesystemMode,
+    anon_session_id: str | None,
+) -> AnonymousDocumentMiddleware | None:
+    if filesystem_mode != FilesystemMode.CLOUD:
+        return None
+    return AnonymousDocumentMiddleware(anon_session_id=anon_session_id)
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/busy_mutex.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/busy_mutex.py
@ -0,0 +1,12 @@
+"""Per-thread cooperative lock around the whole turn."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.middleware import BusyMutexMiddleware
+
+from ..shared.flags import enabled
+
+
+def build_busy_mutex_mw(flags: AgentFeatureFlags) -> BusyMutexMiddleware | None:
+    return BusyMutexMiddleware() if enabled(flags, "enable_busy_mutex") else None
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/init.py
@ -0,0 +1,26 @@
+"""SubAgent ``task`` tool wiring required for HITL inside subagents.
+
+Replaces upstream ``SubAgentMiddleware`` to:
+
+- share the parent's checkpointer with each subagent,
+- forward ``runtime.config`` (thread_id, recursion_limit, …) into nested invokes,
+- bridge ``Command(resume=...)`` from the parent into the subagent via the
+  ``config["configurable"]["surfsense_resume_value"]`` side-channel,
+- target the resume at the captured interrupt id so a follow-up
+  ``HumanInTheLoopMiddleware.after_model`` does not consume the same payload,
+- re-raise any new subagent interrupt at the parent so the SSE stream surfaces it.
+
+Module layout
+-------------
+
+- ``constants``   — shared keys / limits.
+- ``config``      — RunnableConfig + side-channel resume read.
+- ``resume``      — pending-interrupt detection, fan-out, ``Command(resume=...)`` builder.
+- ``propagation`` — re-raise pending subagent interrupts at the parent.
+- ``task_tool``   — the ``task`` tool factory (sync + async).
+- ``middleware``  — :class:`SurfSenseCheckpointedSubAgentMiddleware` itself.
+"""
+
+from .middleware import SurfSenseCheckpointedSubAgentMiddleware
+
+__all__ = ["SurfSenseCheckpointedSubAgentMiddleware"]
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/config.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/config.py
@ -0,0 +1,44 @@
+"""RunnableConfig wiring for nested subagent invocations.
+
+Forwards the parent's ``runtime.config`` (thread_id, …) into the subagent and
+exposes the side-channel ``stream_resume_chat`` uses to ferry resume payloads.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from langchain.tools import ToolRuntime
+
+from .constants import DEFAULT_SUBAGENT_RECURSION_LIMIT
+
+
+def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
+    """RunnableConfig for the nested invoke; raises ``recursion_limit`` to the parent's budget."""
+    merged: dict[str, Any] = dict(runtime.config) if runtime.config else {}
+    current_limit = merged.get("recursion_limit")
+    try:
+        current_int = int(current_limit) if current_limit is not None else 0
+    except (TypeError, ValueError):
+        current_int = 0
+    if current_int < DEFAULT_SUBAGENT_RECURSION_LIMIT:
+        merged["recursion_limit"] = DEFAULT_SUBAGENT_RECURSION_LIMIT
+    return merged
+
+
+def consume_surfsense_resume(runtime: ToolRuntime) -> Any:
+    """Pop the resume payload; siblings share ``configurable`` by reference."""
+    cfg = runtime.config or {}
+    configurable = cfg.get("configurable") if isinstance(cfg, dict) else None
+    if not isinstance(configurable, dict):
+        return None
+    return configurable.pop("surfsense_resume_value", None)
+
+
+def has_surfsense_resume(runtime: ToolRuntime) -> bool:
+    """True iff a resume payload is queued on this runtime (non-destructive)."""
+    cfg = runtime.config or {}
+    configurable = cfg.get("configurable") if isinstance(cfg, dict) else None
+    if not isinstance(configurable, dict):
+        return False
+    return "surfsense_resume_value" in configurable
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/constants.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/constants.py
@ -0,0 +1,18 @@
+"""Constants shared by the checkpointed subagent middleware."""
+
+from __future__ import annotations
+
+# Mirror of deepagents.middleware.subagents._EXCLUDED_STATE_KEYS.
+EXCLUDED_STATE_KEYS = frozenset(
+    {
+        "messages",
+        "todos",
+        "structured_response",
+        "skills_metadata",
+        "memory_contents",
+    }
+)
+
+# Match the parent graph's budget; the LangGraph default of 25 trips on
+# multi-step subagent runs.
+DEFAULT_SUBAGENT_RECURSION_LIMIT = 10_000
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py
@ -0,0 +1,103 @@
+"""SubAgent middleware that compiles each subagent against the parent checkpointer."""
+
+from __future__ import annotations
+
+from typing import Any, cast
+
+from deepagents.backends.protocol import BackendFactory, BackendProtocol
+from deepagents.middleware.subagents import (
+    TASK_SYSTEM_PROMPT,
+    CompiledSubAgent,
+    SubAgent,
+    SubAgentMiddleware,
+)
+from langchain.agents import create_agent
+from langchain.agents.middleware import HumanInTheLoopMiddleware
+from langchain.chat_models import init_chat_model
+from langgraph.types import Checkpointer
+
+from .task_tool import build_task_tool_with_parent_config
+
+
+class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
+    """``SubAgentMiddleware`` variant that compiles each subagent against the parent checkpointer."""
+
+    def __init__(
+        self,
+        *,
+        checkpointer: Checkpointer,
+        backend: BackendProtocol | BackendFactory,
+        subagents: list[SubAgent | CompiledSubAgent],
+        system_prompt: str | None = TASK_SYSTEM_PROMPT,
+        task_description: str | None = None,
+    ) -> None:
+        self._surf_checkpointer = checkpointer
+        super(SubAgentMiddleware, self).__init__()
+        if not subagents:
+            raise ValueError(
+                "At least one subagent must be specified when using the new API"
+            )
+        self._backend = backend
+        self._subagents = subagents
+        subagent_specs = self._surf_compile_subagent_graphs()
+        task_tool = build_task_tool_with_parent_config(subagent_specs, task_description)
+        if system_prompt and subagent_specs:
+            agents_desc = "\n".join(
+                f"- {s['name']}: {s['description']}" for s in subagent_specs
+            )
+            self.system_prompt = (
+                system_prompt + "\n\nAvailable subagent types:\n" + agents_desc
+            )
+        else:
+            self.system_prompt = system_prompt
+        self.tools = [task_tool]
+
+    def _surf_compile_subagent_graphs(self) -> list[dict[str, Any]]:
+        """Mirror of ``SubAgentMiddleware._get_subagents`` that threads the parent checkpointer."""
+        specs: list[dict[str, Any]] = []
+
+        for spec in self._subagents:
+            if "runnable" in spec:
+                compiled = cast(CompiledSubAgent, spec)
+                specs.append(
+                    {
+                        "name": compiled["name"],
+                        "description": compiled["description"],
+                        "runnable": compiled["runnable"],
+                    }
+                )
+                continue
+
+            if "model" not in spec:
+                msg = f"SubAgent '{spec['name']}' must specify 'model'"
+                raise ValueError(msg)
+            if "tools" not in spec:
+                msg = f"SubAgent '{spec['name']}' must specify 'tools'"
+                raise ValueError(msg)
+
+            model = spec["model"]
+            if isinstance(model, str):
+                model = init_chat_model(model)
+
+            middleware: list[Any] = list(spec.get("middleware", []))
+
+            interrupt_on = spec.get("interrupt_on")
+            if interrupt_on:
+                middleware.append(HumanInTheLoopMiddleware(interrupt_on=interrupt_on))
+
+            specs.append(
+                {
+                    "name": spec["name"],
+                    "description": spec["description"],
+                    "runnable": create_agent(
+                        model,
+                        system_prompt=spec["system_prompt"],
+                        tools=spec["tools"],
+                        middleware=middleware,
+                        name=spec["name"],
+                        checkpointer=self._surf_checkpointer,
+                    ),
+                }
+            )
+
+        return specs
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/propagation.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/propagation.py
@ -0,0 +1,74 @@
+"""Re-raise still-pending subagent interrupts at the parent graph level.
+
+After ``subagent.[a]invoke(Command(resume=...))`` returns, the subagent may
+still hold a pending interrupt (e.g. the LLM produced a follow-up tool call
+that fired a fresh ``interrupt()``). The parent's pregel cannot see that
+interrupt because it lives in a separate compiled graph; we re-raise it here
+so the parent's SSE stream surfaces it as the next approval card.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from langchain_core.runnables import Runnable
+from langgraph.types import interrupt as _lg_interrupt
+
+from .resume import get_first_pending_subagent_interrupt
+
+logger = logging.getLogger(__name__)
+
+
+def maybe_propagate_subagent_interrupt(
+    subagent: Runnable,
+    sub_config: dict[str, Any],
+    subagent_type: str,
+) -> None:
+    """Re-raise a still-pending subagent interrupt at the parent so the SSE stream surfaces it."""
+    get_state_sync = getattr(subagent, "get_state", None)
+    if not callable(get_state_sync):
+        return
+    try:
+        snapshot = get_state_sync(sub_config)
+    except Exception:  # pragma: no cover - defensive
+        logger.debug(
+            "Subagent get_state failed during re-interrupt check",
+            exc_info=True,
+        )
+        return
+    _pending_id, pending_value = get_first_pending_subagent_interrupt(snapshot)
+    if pending_value is None:
+        return
+    logger.info(
+        "Re-raising subagent %r interrupt to parent (multi-step HITL)",
+        subagent_type,
+    )
+    _lg_interrupt(pending_value)
+
+
+async def amaybe_propagate_subagent_interrupt(
+    subagent: Runnable,
+    sub_config: dict[str, Any],
+    subagent_type: str,
+) -> None:
+    """Async counterpart of :func:`maybe_propagate_subagent_interrupt`."""
+    aget_state = getattr(subagent, "aget_state", None)
+    if not callable(aget_state):
+        return
+    try:
+        snapshot = await aget_state(sub_config)
+    except Exception:  # pragma: no cover - defensive
+        logger.debug(
+            "Subagent aget_state failed during re-interrupt check",
+            exc_info=True,
+        )
+        return
+    _pending_id, pending_value = get_first_pending_subagent_interrupt(snapshot)
+    if pending_value is None:
+        return
+    logger.info(
+        "Re-raising subagent %r interrupt to parent (multi-step HITL)",
+        subagent_type,
+    )
+    _lg_interrupt(pending_value)
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/resume.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/resume.py
@ -0,0 +1,76 @@
+"""Resume-payload shaping and pending-interrupt detection for subagents.
+
+Splits the work of "given a state snapshot and a parent-stashed resume value,
+produce the right ``Command(resume=...)`` for the subagent" into pure helpers.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from langgraph.types import Command
+
+
+def hitlrequest_action_count(pending_value: Any) -> int:
+    """Bundle size for a LangChain ``HITLRequest`` payload; ``0`` for non-bundle interrupts."""
+    if not isinstance(pending_value, dict):
+        return 0
+    actions = pending_value.get("action_requests")
+    if isinstance(actions, list):
+        return len(actions)
+    return 0
+
+
+def fan_out_decisions_to_match(resume_value: Any, expected_count: int) -> Any:
+    """Legacy fallback: pad a 1-decision resume to N for an ``action_requests=N`` bundle.
+
+    Modern frontend submits N decisions per bundle (one per action_request) so
+    this is a no-op; kept for backwards compatibility with old in-flight
+    threads or non-bundle clients that send a single decision.
+    """
+    if expected_count <= 1:
+        return resume_value
+    if not isinstance(resume_value, dict):
+        return resume_value
+    decisions = resume_value.get("decisions")
+    if not isinstance(decisions, list) or len(decisions) >= expected_count:
+        return resume_value
+    if not decisions:
+        return resume_value
+    padded = list(decisions) + [decisions[-1]] * (expected_count - len(decisions))
+    return {**resume_value, "decisions": padded}
+
+
+def get_first_pending_subagent_interrupt(state: Any) -> tuple[str | None, Any]:
+    """First pending ``(interrupt_id, value)``; ``(None, None)`` if no interrupt.
+
+    Assumes at most one pending interrupt per snapshot (sequential tool nodes).
+    Parallel tool nodes would need an id-aware lookup instead of first-wins.
+    """
+    if state is None:
+        return None, None
+    for it in getattr(state, "interrupts", None) or ():
+        value = getattr(it, "value", None)
+        interrupt_id = getattr(it, "id", None)
+        if value is not None:
+            return (
+                interrupt_id if isinstance(interrupt_id, str) else None,
+                value,
+            )
+    for sub_task in getattr(state, "tasks", None) or ():
+        for it in getattr(sub_task, "interrupts", None) or ():
+            value = getattr(it, "value", None)
+            interrupt_id = getattr(it, "id", None)
+            if value is not None:
+                return (
+                    interrupt_id if isinstance(interrupt_id, str) else None,
+                    value,
+                )
+    return None, None
+
+
+def build_resume_command(resume_value: Any, pending_id: str | None) -> Command:
+    """``Command(resume={id: value})`` when ``id`` is known, else fall back to scalar."""
+    if pending_id is None:
+        return Command(resume=resume_value)
+    return Command(resume={pending_id: resume_value})
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py
@ -0,0 +1,238 @@
+"""Build the ``task`` tool that invokes subagents with HITL bridging.
+
+The tool's body is the only place where the parent and the subagent meet at
+runtime: it reads the parent's stashed resume value, decides whether to send
+fresh state or a targeted ``Command(resume=...)`` to the subagent, then
+re-raises any new pending interrupt back to the parent.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Annotated, Any
+
+from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION
+from langchain.tools import BaseTool, ToolRuntime
+from langchain_core.messages import HumanMessage, ToolMessage
+from langchain_core.runnables import Runnable
+from langchain_core.tools import StructuredTool
+from langgraph.types import Command
+
+from .config import (
+    consume_surfsense_resume,
+    has_surfsense_resume,
+    subagent_invoke_config,
+)
+from .constants import EXCLUDED_STATE_KEYS
+from .propagation import (
+    amaybe_propagate_subagent_interrupt,
+    maybe_propagate_subagent_interrupt,
+)
+from .resume import (
+    build_resume_command,
+    fan_out_decisions_to_match,
+    get_first_pending_subagent_interrupt,
+    hitlrequest_action_count,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def build_task_tool_with_parent_config(
+    subagents: list[dict[str, Any]],
+    task_description: str | None = None,
+) -> BaseTool:
+    """Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging."""
+    subagent_graphs: dict[str, Runnable] = {
+        spec["name"]: spec["runnable"] for spec in subagents
+    }
+    subagent_description_str = "\n".join(
+        f"- {s['name']}: {s['description']}" for s in subagents
+    )
+
+    if task_description is None:
+        description = TASK_TOOL_DESCRIPTION.format(
+            available_agents=subagent_description_str
+        )
+    elif "{available_agents}" in task_description:
+        description = task_description.format(available_agents=subagent_description_str)
+    else:
+        description = task_description
+
+    def _return_command_with_state_update(result: dict, tool_call_id: str) -> Command:
+        if "messages" not in result:
+            msg = (
+                "CompiledSubAgent must return a state containing a 'messages' key. "
+                "Custom StateGraphs used with CompiledSubAgent should include 'messages' "
+                "in their state schema to communicate results back to the main agent."
+            )
+            raise ValueError(msg)
+
+        state_update = {k: v for k, v in result.items() if k not in EXCLUDED_STATE_KEYS}
+        messages = result["messages"]
+        if not messages:
+            msg = (
+                "CompiledSubAgent returned an empty 'messages' list. "
+                "Subagents must produce at least one message so the parent has "
+                "output to forward back to the user."
+            )
+            raise ValueError(msg)
+        last_text = getattr(messages[-1], "text", None) or ""
+        message_text = last_text.rstrip()
+        return Command(
+            update={
+                **state_update,
+                "messages": [ToolMessage(message_text, tool_call_id=tool_call_id)],
+            }
+        )
+
+    def _validate_and_prepare_state(
+        subagent_type: str, description: str, runtime: ToolRuntime
+    ) -> tuple[Runnable, dict]:
+        subagent = subagent_graphs[subagent_type]
+        subagent_state = {
+            k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
+        }
+        subagent_state["messages"] = [HumanMessage(content=description)]
+        return subagent, subagent_state
+
+    def task(
+        description: Annotated[
+            str,
+            "A detailed description of the task for the subagent to perform autonomously. Include all necessary context and specify the expected output format.",
+        ],
+        subagent_type: Annotated[
+            str,
+            "The type of subagent to use. Must be one of the available agent types listed in the tool description.",
+        ],
+        runtime: ToolRuntime,
+    ) -> str | Command:
+        if subagent_type not in subagent_graphs:
+            allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
+            return (
+                f"We cannot invoke subagent {subagent_type} because it does not exist, "
+                f"the only allowed types are {allowed_types}"
+            )
+        if not runtime.tool_call_id:
+            raise ValueError("Tool call ID is required for subagent invocation")
+        subagent, subagent_state = _validate_and_prepare_state(
+            subagent_type, description, runtime
+        )
+        sub_config = subagent_invoke_config(runtime)
+
+        # Resume bridge: forward the parent's stashed decision into the
+        # subagent's pending ``interrupt()``, targeted by id.
+        pending_id: str | None = None
+        pending_value: Any = None
+        get_state = getattr(subagent, "get_state", None)
+        if callable(get_state):
+            try:
+                snapshot = get_state(sub_config)
+                pending_id, pending_value = get_first_pending_subagent_interrupt(
+                    snapshot
+                )
+            except Exception:
+                # Fail loud if a resume is queued: silent fallback would
+                # replay the original interrupt to the user.
+                if has_surfsense_resume(runtime):
+                    logger.exception(
+                        "Subagent %r get_state raised with resume queued; re-raising.",
+                        subagent_type,
+                    )
+                    raise
+                logger.debug(
+                    "Subagent get_state failed; falling back to fresh invoke",
+                    exc_info=True,
+                )
+
+        if pending_value is not None:
+            resume_value = consume_surfsense_resume(runtime)
+            if resume_value is None:
+                # Bridge invariant: a queued resume must accompany any pending
+                # subagent interrupt. Fall-through replay would silently re-prompt
+                # the user; raise so the streaming layer surfaces a clear error.
+                raise RuntimeError(
+                    f"Subagent {subagent_type!r} has a pending interrupt but no "
+                    "surfsense_resume_value on config; resume bridge is broken."
+                )
+            expected = hitlrequest_action_count(pending_value)
+            resume_value = fan_out_decisions_to_match(resume_value, expected)
+            result = subagent.invoke(
+                build_resume_command(resume_value, pending_id),
+                config=sub_config,
+            )
+        else:
+            result = subagent.invoke(subagent_state, config=sub_config)
+        maybe_propagate_subagent_interrupt(subagent, sub_config, subagent_type)
+        return _return_command_with_state_update(result, runtime.tool_call_id)
+
+    async def atask(
+        description: Annotated[
+            str,
+            "A detailed description of the task for the subagent to perform autonomously. Include all necessary context and specify the expected output format.",
+        ],
+        subagent_type: Annotated[
+            str,
+            "The type of subagent to use. Must be one of the available agent types listed in the tool description.",
+        ],
+        runtime: ToolRuntime,
+    ) -> str | Command:
+        if subagent_type not in subagent_graphs:
+            allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
+            return (
+                f"We cannot invoke subagent {subagent_type} because it does not exist, "
+                f"the only allowed types are {allowed_types}"
+            )
+        if not runtime.tool_call_id:
+            raise ValueError("Tool call ID is required for subagent invocation")
+        subagent, subagent_state = _validate_and_prepare_state(
+            subagent_type, description, runtime
+        )
+        sub_config = subagent_invoke_config(runtime)
+
+        # Resume bridge — see ``task`` above.
+        pending_id: str | None = None
+        pending_value: Any = None
+        aget_state = getattr(subagent, "aget_state", None)
+        if callable(aget_state):
+            try:
+                snapshot = await aget_state(sub_config)
+                pending_id, pending_value = get_first_pending_subagent_interrupt(
+                    snapshot
+                )
+            except Exception:
+                if has_surfsense_resume(runtime):
+                    logger.exception(
+                        "Subagent %r aget_state raised with resume queued; re-raising.",
+                        subagent_type,
+                    )
+                    raise
+                logger.debug(
+                    "Subagent aget_state failed; falling back to fresh ainvoke",
+                    exc_info=True,
+                )
+
+        if pending_value is not None:
+            resume_value = consume_surfsense_resume(runtime)
+            if resume_value is None:
+                raise RuntimeError(
+                    f"Subagent {subagent_type!r} has a pending interrupt but no "
+                    "surfsense_resume_value on config; resume bridge is broken."
+                )
+            expected = hitlrequest_action_count(pending_value)
+            resume_value = fan_out_decisions_to_match(resume_value, expected)
+            result = await subagent.ainvoke(
+                build_resume_command(resume_value, pending_id),
+                config=sub_config,
+            )
+        else:
+            result = await subagent.ainvoke(subagent_state, config=sub_config)
+        await amaybe_propagate_subagent_interrupt(subagent, sub_config, subagent_type)
+        return _return_command_with_state_update(result, runtime.tool_call_id)
+
+    return StructuredTool.from_function(
+        name="task",
+        func=task,
+        coroutine=atask,
+        description=description,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/context_editing.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/context_editing.py
@ -0,0 +1,50 @@
+"""Spill + clear-tool-uses passes to keep payloads under budget."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from langchain_core.tools import BaseTool
+
+from app.agents.multi_agent_chat.main_agent.context_prune.prune_tool_names import (
+    safe_exclude_tools,
+)
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.middleware import (
+    ClearToolUsesEdit,
+    SpillingContextEditingMiddleware,
+    SpillToBackendEdit,
+)
+
+from ..shared.flags import enabled
+
+
+def build_context_editing_mw(
+    *,
+    flags: AgentFeatureFlags,
+    max_input_tokens: int | None,
+    tools: Sequence[BaseTool],
+    backend_resolver: Any,
+) -> SpillingContextEditingMiddleware | None:
+    if not enabled(flags, "enable_context_editing") or not max_input_tokens:
+        return None
+    spill_edit = SpillToBackendEdit(
+        trigger=int(max_input_tokens * 0.55),
+        clear_at_least=int(max_input_tokens * 0.15),
+        keep=5,
+        exclude_tools=safe_exclude_tools(tools),
+        clear_tool_inputs=True,
+    )
+    clear_edit = ClearToolUsesEdit(
+        trigger=int(max_input_tokens * 0.55),
+        clear_at_least=int(max_input_tokens * 0.15),
+        keep=5,
+        exclude_tools=safe_exclude_tools(tools),
+        clear_tool_inputs=True,
+        placeholder="[cleared - older tool output trimmed for context]",
+    )
+    return SpillingContextEditingMiddleware(
+        edits=[spill_edit, clear_edit],
+        backend_resolver=backend_resolver,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/dedup_hitl.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/dedup_hitl.py
@ -0,0 +1,13 @@
+"""Drop duplicate HITL tool calls before execution."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from langchain_core.tools import BaseTool
+
+from app.agents.new_chat.middleware import DedupHITLToolCallsMiddleware
+
+
+def build_dedup_hitl_mw(tools: Sequence[BaseTool]) -> DedupHITLToolCallsMiddleware:
+    return DedupHITLToolCallsMiddleware(agent_tools=list(tools))
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/doom_loop.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/doom_loop.py
@ -0,0 +1,14 @@
+"""Stop N identical tool calls in a row via interrupt."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.middleware import DoomLoopMiddleware
+
+from ..shared.flags import enabled
+
+
+def build_doom_loop_mw(flags: AgentFeatureFlags) -> DoomLoopMiddleware | None:
+    return (
+        DoomLoopMiddleware(threshold=3) if enabled(flags, "enable_doom_loop") else None
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/kb_persistence.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/kb_persistence.py
@ -0,0 +1,23 @@
+"""Commit staged cloud filesystem mutations to Postgres at end of turn."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.new_chat.middleware import KnowledgeBasePersistenceMiddleware
+
+
+def build_kb_persistence_mw(
+    *,
+    filesystem_mode: FilesystemMode,
+    search_space_id: int,
+    user_id: str | None,
+    thread_id: int | None,
+) -> KnowledgeBasePersistenceMiddleware | None:
+    if filesystem_mode != FilesystemMode.CLOUD:
+        return None
+    return KnowledgeBasePersistenceMiddleware(
+        search_space_id=search_space_id,
+        created_by_id=user_id,
+        filesystem_mode=filesystem_mode,
+        thread_id=thread_id,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py
@ -0,0 +1,27 @@
+"""KB priority planner: <priority_documents> injection."""
+
+from __future__ import annotations
+
+from langchain_core.language_models import BaseChatModel
+
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.new_chat.middleware import KnowledgePriorityMiddleware
+
+
+def build_knowledge_priority_mw(
+    *,
+    llm: BaseChatModel,
+    search_space_id: int,
+    filesystem_mode: FilesystemMode,
+    available_connectors: list[str] | None,
+    available_document_types: list[str] | None,
+    mentioned_document_ids: list[int] | None,
+) -> KnowledgePriorityMiddleware:
+    return KnowledgePriorityMiddleware(
+        llm=llm,
+        search_space_id=search_space_id,
+        filesystem_mode=filesystem_mode,
+        available_connectors=available_connectors,
+        available_document_types=available_document_types,
+        mentioned_document_ids=mentioned_document_ids,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_tree.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_tree.py
@ -0,0 +1,23 @@
+"""<workspace_tree> injection (cloud only)."""
+
+from __future__ import annotations
+
+from langchain_core.language_models import BaseChatModel
+
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.new_chat.middleware import KnowledgeTreeMiddleware
+
+
+def build_knowledge_tree_mw(
+    *,
+    filesystem_mode: FilesystemMode,
+    search_space_id: int,
+    llm: BaseChatModel,
+) -> KnowledgeTreeMiddleware | None:
+    if filesystem_mode != FilesystemMode.CLOUD:
+        return None
+    return KnowledgeTreeMiddleware(
+        search_space_id=search_space_id,
+        filesystem_mode=filesystem_mode,
+        llm=llm,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/noop_injection.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/noop_injection.py
@ -0,0 +1,12 @@
+"""Provider-compat: append a `_noop` tool when tools=[] but history has tool calls."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.middleware import NoopInjectionMiddleware
+
+from ..shared.flags import enabled
+
+
+def build_noop_injection_mw(flags: AgentFeatureFlags) -> NoopInjectionMiddleware | None:
+    return NoopInjectionMiddleware() if enabled(flags, "enable_compaction_v2") else None
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/otel.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/otel.py
@ -0,0 +1,12 @@
+"""OTel spans on model and tool calls."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.middleware import OtelSpanMiddleware
+
+from ..shared.flags import enabled
+
+
+def build_otel_mw(flags: AgentFeatureFlags) -> OtelSpanMiddleware | None:
+    return OtelSpanMiddleware() if enabled(flags, "enable_otel") else None
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/plugins.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/plugins.py
@ -0,0 +1,49 @@
+"""Tail-of-stack plugin slot driven by env allowlist."""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from langchain_core.language_models import BaseChatModel
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.plugin_loader import (
+    PluginContext,
+    load_allowed_plugin_names_from_env,
+    load_plugin_middlewares,
+)
+from app.db import ChatVisibility
+
+from ..shared.flags import enabled
+
+
+def build_plugin_middlewares(
+    *,
+    flags: AgentFeatureFlags,
+    search_space_id: int,
+    user_id: str | None,
+    visibility: ChatVisibility,
+    llm: BaseChatModel,
+) -> list[Any]:
+    if not enabled(flags, "enable_plugin_loader"):
+        return []
+    try:
+        allowed_names = load_allowed_plugin_names_from_env()
+        if not allowed_names:
+            return []
+        return load_plugin_middlewares(
+            PluginContext.build(
+                search_space_id=search_space_id,
+                user_id=user_id,
+                thread_visibility=visibility,
+                llm=llm,
+            ),
+            allowed_plugin_names=allowed_names,
+        )
+    except Exception:  # pragma: no cover - defensive
+        logging.warning(
+            "Plugin loader failed; continuing without plugins.",
+            exc_info=True,
+        )
+        return []
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/repair.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/repair.py
@ -0,0 +1,50 @@
+"""Repair miscased / unknown tool names to the registered set or invalid_tool."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from langchain_core.tools import BaseTool
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.middleware import ToolCallNameRepairMiddleware
+
+from ..shared.flags import enabled
+
+# deepagents-built-in tool names the repair pass treats as known.
+_DEEPAGENT_BUILTIN_TOOL_NAMES: frozenset[str] = frozenset(
+    {
+        "write_todos",
+        "ls",
+        "read_file",
+        "write_file",
+        "edit_file",
+        "glob",
+        "grep",
+        "execute",
+        "task",
+        "mkdir",
+        "cd",
+        "pwd",
+        "move_file",
+        "rm",
+        "rmdir",
+        "list_tree",
+        "execute_code",
+    }
+)
+
+
+def build_repair_mw(
+    *,
+    flags: AgentFeatureFlags,
+    tools: Sequence[BaseTool],
+) -> ToolCallNameRepairMiddleware | None:
+    if not enabled(flags, "enable_tool_call_repair"):
+        return None
+    registered_names: set[str] = {t.name for t in tools}
+    registered_names |= _DEEPAGENT_BUILTIN_TOOL_NAMES
+    return ToolCallNameRepairMiddleware(
+        registered_tool_names=registered_names,
+        fuzzy_match_threshold=None,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/selector.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/selector.py
@ -0,0 +1,39 @@
+"""LLM-based tool subset selection (only when >30 tools)."""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import Sequence
+
+from langchain.agents.middleware import LLMToolSelectorMiddleware
+from langchain_core.tools import BaseTool
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+
+from ..shared.flags import enabled
+
+
+def build_selector_mw(
+    *,
+    flags: AgentFeatureFlags,
+    tools: Sequence[BaseTool],
+) -> LLMToolSelectorMiddleware | None:
+    if not enabled(flags, "enable_llm_tool_selector") or len(tools) <= 30:
+        return None
+    try:
+        return LLMToolSelectorMiddleware(
+            model="openai:gpt-4o-mini",
+            max_tools=12,
+            always_include=[
+                name
+                for name in (
+                    "update_memory",
+                    "get_connected_accounts",
+                    "scrape_webpage",
+                )
+                if name in {t.name for t in tools}
+            ],
+        )
+    except Exception:
+        logging.warning("LLMToolSelectorMiddleware init failed; skipping.")
+        return None
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/skills.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/skills.py
@ -0,0 +1,39 @@
+"""Skill discovery + injection."""
+
+from __future__ import annotations
+
+import logging
+
+from deepagents.middleware.skills import SkillsMiddleware
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.new_chat.middleware import (
+    build_skills_backend_factory,
+    default_skills_sources,
+)
+
+from ..shared.flags import enabled
+
+
+def build_skills_mw(
+    *,
+    flags: AgentFeatureFlags,
+    filesystem_mode: FilesystemMode,
+    search_space_id: int,
+) -> SkillsMiddleware | None:
+    if not enabled(flags, "enable_skills"):
+        return None
+    try:
+        skills_factory = build_skills_backend_factory(
+            search_space_id=search_space_id
+            if filesystem_mode == FilesystemMode.CLOUD
+            else None,
+        )
+        return SkillsMiddleware(
+            backend=skills_factory,
+            sources=default_skills_sources(),
+        )
+    except Exception as exc:  # pragma: no cover - defensive
+        logging.warning("SkillsMiddleware init failed; skipping: %s", exc)
+        return None
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/init.py
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/anthropic_cache.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/anthropic_cache.py
@ -0,0 +1,9 @@
+"""Anthropic prompt caching annotations on system/tool/message blocks."""
+
+from __future__ import annotations
+
+from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
+
+
+def build_anthropic_cache_mw() -> AnthropicPromptCachingMiddleware:
+    return AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore")
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/compaction.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/compaction.py
@ -0,0 +1,14 @@
+"""Context-window summarization with SurfSense protected sections."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from deepagents.backends import StateBackend
+from langchain_core.language_models import BaseChatModel
+
+from app.agents.new_chat.middleware import create_surfsense_compaction_middleware
+
+
+def build_compaction_mw(llm: BaseChatModel) -> Any:
+    return create_surfsense_compaction_middleware(llm, StateBackend)
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/file_intent.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/file_intent.py
@ -0,0 +1,11 @@
+"""File-intent classifier that gates strict write contracts."""
+
+from __future__ import annotations
+
+from langchain_core.language_models import BaseChatModel
+
+from app.agents.new_chat.middleware import FileIntentMiddleware
+
+
+def build_file_intent_mw(llm: BaseChatModel) -> FileIntentMiddleware:
+    return FileIntentMiddleware(llm=llm)
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/filesystem.py
@ -0,0 +1,25 @@
+"""SurfSense filesystem tools/middleware."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.new_chat.middleware import SurfSenseFilesystemMiddleware
+
+
+def build_filesystem_mw(
+    *,
+    backend_resolver: Any,
+    filesystem_mode: FilesystemMode,
+    search_space_id: int,
+    user_id: str | None,
+    thread_id: int | None,
+) -> SurfSenseFilesystemMiddleware:
+    return SurfSenseFilesystemMiddleware(
+        backend=backend_resolver,
+        filesystem_mode=filesystem_mode,
+        search_space_id=search_space_id,
+        created_by_id=user_id,
+        thread_id=thread_id,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/flags.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/flags.py
@ -0,0 +1,10 @@
+"""Single source of truth for the feature-flag predicate."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+
+
+def enabled(flags: AgentFeatureFlags, attr: str) -> bool:
+    """``flags.<attr>`` is on AND the new-agent-stack kill switch is off."""
+    return getattr(flags, attr) and not flags.disable_new_agent_stack
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/memory.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/memory.py
@ -0,0 +1,19 @@
+"""User/team memory injection prepended to the conversation."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.middleware import MemoryInjectionMiddleware
+from app.db import ChatVisibility
+
+
+def build_memory_mw(
+    *,
+    user_id: str | None,
+    search_space_id: int,
+    visibility: ChatVisibility,
+) -> MemoryInjectionMiddleware:
+    return MemoryInjectionMiddleware(
+        user_id=user_id,
+        search_space_id=search_space_id,
+        thread_visibility=visibility,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/patch_tool_calls.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/patch_tool_calls.py
@ -0,0 +1,9 @@
+"""Repair dangling tool-call sequences before each agent turn."""
+
+from __future__ import annotations
+
+from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
+
+
+def build_patch_tool_calls_mw() -> PatchToolCallsMiddleware:
+    return PatchToolCallsMiddleware()
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/init.py
@ -0,0 +1,12 @@
+"""Permission rulesets fanned out to parent / general-purpose / subagent stacks."""
+
+from __future__ import annotations
+
+from .context import PermissionContext, build_permission_context
+from .middleware import build_full_permission_mw
+
+__all__ = [
+    "PermissionContext",
+    "build_full_permission_mw",
+    "build_permission_context",
+]
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/context.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/context.py
@ -0,0 +1,107 @@
+"""Derive shared permission context once; fan out to all three stack layers.
+
+The context carries:
+- ``rulesets``: full ask/deny/allow rules for the main-agent permission middleware.
+- ``general_purpose_interrupt_on``: ``ask`` rules mirrored as deepagents
+  ``interrupt_on`` so HITL still triggers from inside ``task`` runs (subagents
+  bypass the main-agent permission middleware).
+- ``subagent_deny_mw``: a deny-only ``PermissionMiddleware`` instance shared
+  across the general-purpose and registry subagent stacks.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+
+from langchain_core.tools import BaseTool
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.filesystem_selection import FilesystemMode
+from app.agents.new_chat.middleware import PermissionMiddleware
+from app.agents.new_chat.permissions import Rule, Ruleset
+from app.agents.new_chat.tools.registry import BUILTIN_TOOLS
+
+from ..flags import enabled
+
+
+@dataclass(frozen=True)
+class PermissionContext:
+    rulesets: list[Ruleset]
+    general_purpose_interrupt_on: dict[str, bool]
+    subagent_deny_mw: PermissionMiddleware | None
+
+
+def build_permission_context(
+    *,
+    flags: AgentFeatureFlags,
+    filesystem_mode: FilesystemMode,
+    tools: Sequence[BaseTool],
+    available_connectors: list[str] | None,
+) -> PermissionContext:
+    is_desktop_fs = filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER
+    permission_enabled = enabled(flags, "enable_permission")
+
+    rulesets: list[Ruleset] = []
+    if permission_enabled or is_desktop_fs:
+        rulesets.append(
+            Ruleset(
+                rules=[Rule(permission="*", pattern="*", action="allow")],
+                origin="surfsense_defaults",
+            )
+        )
+        if is_desktop_fs:
+            rulesets.append(
+                Ruleset(
+                    rules=[
+                        Rule(permission="rm", pattern="*", action="ask"),
+                        Rule(permission="rmdir", pattern="*", action="ask"),
+                        Rule(permission="move_file", pattern="*", action="ask"),
+                        Rule(permission="edit_file", pattern="*", action="ask"),
+                        Rule(permission="write_file", pattern="*", action="ask"),
+                    ],
+                    origin="desktop_safety",
+                )
+            )
+
+    tool_names_in_use = {t.name for t in tools}
+
+    if permission_enabled:
+        available_set = set(available_connectors or [])
+        synthesized: list[Rule] = []
+        for tool_def in BUILTIN_TOOLS:
+            if tool_def.name not in tool_names_in_use:
+                continue
+            rc = tool_def.required_connector
+            if rc and rc not in available_set:
+                synthesized.append(
+                    Rule(permission=tool_def.name, pattern="*", action="deny")
+                )
+        if synthesized:
+            rulesets.append(Ruleset(rules=synthesized, origin="connector_synthesized"))
+
+    general_purpose_interrupt_on: dict[str, bool] = {
+        rule.permission: True
+        for rs in rulesets
+        for rule in rs.rules
+        if rule.action == "ask" and rule.permission in tool_names_in_use
+    }
+
+    deny_rulesets = [
+        Ruleset(
+            rules=[r for r in rs.rules if r.action == "deny"],
+            origin=rs.origin,
+        )
+        for rs in rulesets
+    ]
+    deny_rulesets = [rs for rs in deny_rulesets if rs.rules]
+
+    subagent_deny_mw: PermissionMiddleware | None = (
+        PermissionMiddleware(rulesets=deny_rulesets) if deny_rulesets else None
+    )
+
+    return PermissionContext(
+        rulesets=rulesets,
+        general_purpose_interrupt_on=general_purpose_interrupt_on,
+        subagent_deny_mw=subagent_deny_mw,
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/middleware.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/permissions/middleware.py
@ -0,0 +1,10 @@
+"""Main-agent permission middleware (full ask/deny/allow rules)."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.middleware import PermissionMiddleware
+from app.agents.new_chat.permissions import Ruleset
+
+
+def build_full_permission_mw(rulesets: list[Ruleset]) -> PermissionMiddleware | None:
+    return PermissionMiddleware(rulesets=rulesets) if rulesets else None
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/init.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/init.py
@ -0,0 +1,7 @@
+"""Resilience middleware shared as the same instances across parent / general-purpose / registry."""
+
+from __future__ import annotations
+
+from .bundle import ResilienceBundle, build_resilience_bundle
+
+__all__ = ["ResilienceBundle", "build_resilience_bundle"]
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/bundle.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/bundle.py
@ -0,0 +1,51 @@
+"""Construct each resilience middleware once; same instances flow into every consumer."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from langchain.agents.middleware import (
+    ModelCallLimitMiddleware,
+    ToolCallLimitMiddleware,
+)
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.middleware import RetryAfterMiddleware
+from app.agents.new_chat.middleware.scoped_model_fallback import (
+    ScopedModelFallbackMiddleware,
+)
+
+from .fallback import build_fallback_mw
+from .model_call_limit import build_model_call_limit_mw
+from .retry import build_retry_mw
+from .tool_call_limit import build_tool_call_limit_mw
+
+
+@dataclass(frozen=True)
+class ResilienceBundle:
+    retry: RetryAfterMiddleware | None
+    fallback: ScopedModelFallbackMiddleware | None
+    model_call_limit: ModelCallLimitMiddleware | None
+    tool_call_limit: ToolCallLimitMiddleware | None
+
+    def as_list(self) -> list[Any]:
+        return [
+            m
+            for m in (
+                self.retry,
+                self.fallback,
+                self.model_call_limit,
+                self.tool_call_limit,
+            )
+            if m is not None
+        ]
+
+
+def build_resilience_bundle(flags: AgentFeatureFlags) -> ResilienceBundle:
+    return ResilienceBundle(
+        retry=build_retry_mw(flags),
+        fallback=build_fallback_mw(flags),
+        model_call_limit=build_model_call_limit_mw(flags),
+        tool_call_limit=build_tool_call_limit_mw(flags),
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/fallback.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/fallback.py
@ -0,0 +1,27 @@
+"""Switch to a fallback model on provider/network errors only."""
+
+from __future__ import annotations
+
+import logging
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.middleware.scoped_model_fallback import (
+    ScopedModelFallbackMiddleware,
+)
+
+from ..flags import enabled
+
+
+def build_fallback_mw(
+    flags: AgentFeatureFlags,
+) -> ScopedModelFallbackMiddleware | None:
+    if not enabled(flags, "enable_model_fallback"):
+        return None
+    try:
+        return ScopedModelFallbackMiddleware(
+            "openai:gpt-4o-mini",
+            "anthropic:claude-3-5-haiku-20241022",
+        )
+    except Exception:
+        logging.warning("ScopedModelFallbackMiddleware init failed; skipping.")
+        return None
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/model_call_limit.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/model_call_limit.py
@ -0,0 +1,21 @@
+"""Cap model calls per thread / per run to prevent runaway cost."""
+
+from __future__ import annotations
+
+from langchain.agents.middleware import ModelCallLimitMiddleware
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+
+from ..flags import enabled
+
+
+def build_model_call_limit_mw(
+    flags: AgentFeatureFlags,
+) -> ModelCallLimitMiddleware | None:
+    if not enabled(flags, "enable_model_call_limit"):
+        return None
+    return ModelCallLimitMiddleware(
+        thread_limit=120,
+        run_limit=80,
+        exit_behavior="end",
+    )
--- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/retry.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/resilience/retry.py
@ -0,0 +1,16 @@
+"""Retry on transient model errors (e.g. Retry-After-bearing 429s)."""
+
+from __future__ import annotations
+
+from app.agents.new_chat.feature_flags import AgentFeatureFlags
+from app.agents.new_chat.middleware import RetryAfterMiddleware
+
+from ..flags import enabled
+
+
+def build_retry_mw(flags: AgentFeatureFlags) -> RetryAfterMiddleware | None:
+    return (
+        RetryAfterMiddleware(max_retries=3)
+        if enabled(flags, "enable_retry_after")
+        else None
+    )
--- a/Show more
+++ b/Show more
 @ -1 +1 @@
 .0.22
 .0.23
				`@ -0,0 +1 @@`
				`"""Rendered slices of the main-agent system prompt."""`
				`@ -0,0 +1 @@`
				"""Markdown fragments for the main-agent system prompt only (`importlib.resources`)."""