refactor(agents): move connector_searchable_types, agent_cache, system_prompt + prompts to app/agents/shared (slice 7b)

Three live shared leaves discovered while taking stock after slice 7 (all are consumed by the multi-agent stack and/or live routes, not single-agent-only): - connector_searchable_types -> shared + shim (multi-agent factory uses it) - agent_cache -> shared + shim (multi-agent runtime/agent_cache uses it) - system_prompt + prompts/ (42 .md fragments) -> shared together + shim. Repointed composer's _PROMPTS_PACKAGE to app.agents.shared.prompts so importlib.resources fragment loading keeps working; system_prompt's relative ".prompts.composer" import is preserved by moving both as a unit. Each keeps a re-export shim for the frozen chat_deepagent. After this slice, new_chat/ holds only the frozen single-agent stack (chat_deepagent, subagents/, __init__) plus shims.
2026-07-22 23:31:12 +02:00 · 2026-06-04 13:21:45 +02:00 · 2026-06-04 13:21:45 +02:00 · a019f18d1c
commit a019f18d1c
parent 13a96851ef
60 changed files with 627 additions and 564 deletions
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py
@ -10,7 +10,7 @@ from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool
 from langgraph.types import Checkpointer

-from app.agents.new_chat.agent_cache import (
+from app.agents.shared.agent_cache import (
    flags_signature,
    get_cache,
    stable_hash,
--- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py
+++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py
@ -19,7 +19,7 @@ from app.agents.multi_agent_chat.subagents import (
 from app.agents.multi_agent_chat.subagents.mcp_tools.index import (
    load_mcp_tools_by_connector,
 )
-from app.agents.new_chat.connector_searchable_types import (
+from app.agents.shared.connector_searchable_types import (
    map_connectors_to_searchable_types,
 )
 from app.agents.shared.feature_flags import AgentFeatureFlags, get_flags
--- a/surfsense_backend/app/agents/new_chat/agent_cache.py
+++ b/surfsense_backend/app/agents/new_chat/agent_cache.py
@ -1,351 +1,17 @@
-"""TTL-LRU cache for compiled SurfSense deep agents.
+"""Backward-compatible shim.

-Why this exists
---------------
-
-``create_surfsense_deep_agent`` runs a 4-5 second pipeline on EVERY chat
-turn:
-
-1. Discover connectors & document types from Postgres (~50-200ms)
-2. Build the tool list (built-in + MCP) (~200ms-1.7s)
-3. Compose the system prompt
-4. Construct ~15 middleware instances (CPU)
-5. Eagerly compile the general-purpose subagent
-   (``SubAgentMiddleware.__init__`` calls ``create_agent`` synchronously,
-   which builds a second LangGraph + Pydantic schemas — ~1.5-2s of pure
-   CPU work)
-6. Compile the outer LangGraph
-
-For a single thread, all six steps produce the SAME object on every turn
-unless the user has changed their LLM config, toggled a feature flag,
-added a connector, etc. The right answer is to compile ONCE per
-"agent shape" and reuse the resulting :class:`CompiledStateGraph` for
-every subsequent turn on the same thread.
-
-Why a per-thread key (not a global pool)
----------------------------------------
-
-Most middleware in the SurfSense stack captures per-thread state in
-``__init__`` closures (``thread_id``, ``user_id``, ``search_space_id``,
-``filesystem_mode``, ``mentioned_document_ids``). Cross-thread reuse
-would silently leak state across users and threads. Keying the cache on
-``(llm_config_id, thread_id, ...)`` gives us safe reuse for repeated
-turns on the same thread without changing any middleware's behavior.
-
-Phase 2 will move those captured fields onto :class:`SurfSenseContextSchema`
-(read via ``runtime.context``) so the cache can collapse to a single
-``(llm_config_id, search_space_id, ...)`` key shared across threads. Until
-then, per-thread keying is the only safe option.
-
-Cache shape
-----------
-
-* TTL-LRU: entries auto-expire after ``ttl_seconds`` (default 1800s, 30
-  minutes — matches a typical chat session). ``maxsize`` (default 256)
-  caps memory; LRU evicts least-recently-used on overflow.
-* In-flight de-duplication: per-key :class:`asyncio.Lock` so concurrent
-  cold misses on the same key wait for the first build instead of
-  building N times.
-* Process-local: this is an in-memory cache. Multi-replica deployments
-  pay the build cost once per replica per key. That's fine; the working
-  set per replica is small (one entry per active thread on that replica).
-
-Telemetry
---------
-
-Every lookup logs ``[agent_cache]`` lines through ``surfsense.perf``:
-
-  * ``hit`` — cache hit, microseconds-fast
-  * ``miss`` — first build for this key, includes build duration
-  * ``stale`` — entry was found but expired; rebuilt
-  * ``evict`` — LRU eviction (size-limited)
-  * ``size`` — current cache occupancy at lookup time
+Moved to ``app.agents.shared.agent_cache``. Re-exported here for the frozen
+single-agent stack (``chat_deepagent``) until that stack is retired.
 """

-from __future__ import annotations
-
-import asyncio
-import hashlib
-import logging
-import os
-import time
-from collections import OrderedDict
-from collections.abc import Awaitable, Callable
-from dataclasses import dataclass
-from typing import Any
-
-from app.utils.perf import get_perf_logger
-
-logger = logging.getLogger(__name__)
-_perf_log = get_perf_logger()
-
-
-# ---------------------------------------------------------------------------
-# Public API: signature helpers (cache key components)
-# ---------------------------------------------------------------------------
-
-
-def stable_hash(*parts: Any) -> str:
-    """Compute a deterministic SHA1 of the str repr of ``parts``.
-
-    Used for cache key components that need a fixed-width representation
-    (system prompt, tool list, etc.). SHA1 is fine here — this is not a
-    security boundary, just a content fingerprint.
-    """
-    h = hashlib.sha1(usedforsecurity=False)
-    for p in parts:
-        h.update(repr(p).encode("utf-8", errors="replace"))
-        h.update(b"\x1f")  # ASCII unit separator between parts
-    return h.hexdigest()
-
-
-def tools_signature(
-    tools: list[Any] | tuple[Any, ...],
-    *,
-    available_connectors: list[str] | None,
-    available_document_types: list[str] | None,
-) -> str:
-    """Hash the bound-tool surface for cache-key purposes.
-
-    The signature changes whenever:
-
-    * A tool is added or removed from the bound list (built-in toggles,
-      MCP tools loaded for the user changes, gating rules flip, etc.).
-    * The available connectors / document types for the search space
-      change (new connector added, last connector removed, new document
-      type indexed). Because :func:`get_connector_gated_tools` derives
-      ``modified_disabled_tools`` from ``available_connectors``, the
-      tool surface is technically already covered — but we hash the
-      connector list separately so an empty-list "no tools changed"
-      situation still rotates the key when, say, the user re-adds a
-      connector that gates a tool we were already not exposing.
-
-    Stays stable across:
-
-    * Process restarts (tool names + descriptions are static).
-    * Different replicas (everyone gets the same hash for the same
-      inputs).
-    """
-    tool_descriptors = sorted(
-        (getattr(t, "name", repr(t)), getattr(t, "description", "")) for t in tools
-    )
-    connectors = sorted(available_connectors or [])
-    doc_types = sorted(available_document_types or [])
-    return stable_hash(tool_descriptors, connectors, doc_types)
-
-
-def flags_signature(flags: Any) -> str:
-    """Hash the resolved :class:`AgentFeatureFlags` dataclass.
-
-    Frozen dataclasses are deterministically reprable, so a SHA1 of their
-    repr is a stable fingerprint. Restart safe (flags are read once at
-    process boot).
-    """
-    return stable_hash(repr(flags))
-
-
-def system_prompt_hash(system_prompt: str) -> str:
-    """Hash a system prompt string. Cheap, ~30µs for typical prompts."""
-    return hashlib.sha1(
-        system_prompt.encode("utf-8", errors="replace"),
-        usedforsecurity=False,
-    ).hexdigest()
-
-
-# ---------------------------------------------------------------------------
-# Cache implementation
-# ---------------------------------------------------------------------------
-
-
-@dataclass
-class _Entry:
-    value: Any
-    created_at: float
-    last_used_at: float
-
-
-class _AgentCache:
-    """In-process TTL-LRU cache with per-key in-flight de-duplication.
-
-    NOT THREAD-SAFE in the multithreading sense — designed for a single
-    asyncio event loop. Uvicorn runs one event loop per worker process,
-    so this is fine; multi-worker deployments simply each maintain their
-    own cache.
-    """
-
-    def __init__(self, *, maxsize: int, ttl_seconds: float) -> None:
-        self._maxsize = maxsize
-        self._ttl = ttl_seconds
-        self._entries: OrderedDict[str, _Entry] = OrderedDict()
-        # One lock per key — guards "build" so concurrent cold misses on
-        # the same key wait for the first build instead of all racing.
-        self._locks: dict[str, asyncio.Lock] = {}
-
-    def _now(self) -> float:
-        return time.monotonic()
-
-    def _is_fresh(self, entry: _Entry) -> bool:
-        return (self._now() - entry.created_at) < self._ttl
-
-    def _evict_if_full(self) -> None:
-        while len(self._entries) >= self._maxsize:
-            evicted_key, _ = self._entries.popitem(last=False)
-            self._locks.pop(evicted_key, None)
-            _perf_log.info(
-                "[agent_cache] evict key=%s reason=lru size=%d",
-                _short(evicted_key),
-                len(self._entries),
-            )
-
-    def _touch(self, key: str, entry: _Entry) -> None:
-        entry.last_used_at = self._now()
-        self._entries.move_to_end(key, last=True)
-
-    async def get_or_build(
-        self,
-        key: str,
-        *,
-        builder: Callable[[], Awaitable[Any]],
-    ) -> Any:
-        """Return the cached value for ``key`` or call ``builder()`` to make it.
-
-        ``builder`` MUST be idempotent — concurrent cold misses on the
-        same key collapse to a single ``builder()`` call (the others
-        wait on the in-flight lock and observe the populated entry on
-        wake).
-        """
-        # Fast path: hot hit.
-        entry = self._entries.get(key)
-        if entry is not None and self._is_fresh(entry):
-            self._touch(key, entry)
-            _perf_log.info(
-                "[agent_cache] hit key=%s age=%.1fs size=%d",
-                _short(key),
-                self._now() - entry.created_at,
-                len(self._entries),
-            )
-            return entry.value
-
-        # Stale entry — drop it; rebuild below.
-        if entry is not None and not self._is_fresh(entry):
-            _perf_log.info(
-                "[agent_cache] stale key=%s age=%.1fs ttl=%.0fs",
-                _short(key),
-                self._now() - entry.created_at,
-                self._ttl,
-            )
-            self._entries.pop(key, None)
-
-        # Slow path: serialize concurrent misses for the same key.
-        lock = self._locks.setdefault(key, asyncio.Lock())
-        async with lock:
-            # Double-check after acquiring the lock — another waiter may
-            # have populated the entry while we slept.
-            entry = self._entries.get(key)
-            if entry is not None and self._is_fresh(entry):
-                self._touch(key, entry)
-                _perf_log.info(
-                    "[agent_cache] hit key=%s age=%.1fs size=%d coalesced=true",
-                    _short(key),
-                    self._now() - entry.created_at,
-                    len(self._entries),
-                )
-                return entry.value
-
-            t0 = time.perf_counter()
-            try:
-                value = await builder()
-            except BaseException:
-                # Don't cache failed builds; let the next caller retry.
-                _perf_log.warning(
-                    "[agent_cache] build_failed key=%s elapsed=%.3fs",
-                    _short(key),
-                    time.perf_counter() - t0,
-                )
-                raise
-            elapsed = time.perf_counter() - t0
-
-            # Insert + evict.
-            self._evict_if_full()
-            now = self._now()
-            self._entries[key] = _Entry(value=value, created_at=now, last_used_at=now)
-            self._entries.move_to_end(key, last=True)
-            _perf_log.info(
-                "[agent_cache] miss key=%s build=%.3fs size=%d",
-                _short(key),
-                elapsed,
-                len(self._entries),
-            )
-            return value
-
-    def invalidate(self, key: str) -> bool:
-        """Drop a single entry; return True if anything was removed."""
-        removed = self._entries.pop(key, None) is not None
-        self._locks.pop(key, None)
-        if removed:
-            _perf_log.info(
-                "[agent_cache] invalidate key=%s size=%d",
-                _short(key),
-                len(self._entries),
-            )
-        return removed
-
-    def invalidate_prefix(self, prefix: str) -> int:
-        """Drop every entry whose key starts with ``prefix``. Returns count."""
-        keys = [k for k in self._entries if k.startswith(prefix)]
-        for k in keys:
-            self._entries.pop(k, None)
-            self._locks.pop(k, None)
-        if keys:
-            _perf_log.info(
-                "[agent_cache] invalidate_prefix prefix=%s removed=%d size=%d",
-                _short(prefix),
-                len(keys),
-                len(self._entries),
-            )
-        return len(keys)
-
-    def clear(self) -> None:
-        n = len(self._entries)
-        self._entries.clear()
-        self._locks.clear()
-        if n:
-            _perf_log.info("[agent_cache] clear removed=%d", n)
-
-    def stats(self) -> dict[str, Any]:
-        return {
-            "size": len(self._entries),
-            "maxsize": self._maxsize,
-            "ttl_seconds": self._ttl,
-        }
-
-
-def _short(key: str, n: int = 16) -> str:
-    """Truncate keys for log lines so they don't blow up log volume."""
-    return key if len(key) <= n else f"{key[:n]}..."
-
-
-# ---------------------------------------------------------------------------
-# Module-level singleton
-# ---------------------------------------------------------------------------
-
-_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
-_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
-
-_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)
-
-
-def get_cache() -> _AgentCache:
-    """Return the process-wide compiled-agent cache singleton."""
-    return _cache
-
-
-def reload_for_tests(*, maxsize: int = 256, ttl_seconds: float = 1800.0) -> _AgentCache:
-    """Replace the singleton with a fresh cache. Tests only."""
-    global _cache
-    _cache = _AgentCache(maxsize=maxsize, ttl_seconds=ttl_seconds)
-    return _cache
-
+from app.agents.shared.agent_cache import (
+    flags_signature,
+    get_cache,
+    reload_for_tests,
+    stable_hash,
+    system_prompt_hash,
+    tools_signature,
+)

 __all__ = [
    "flags_signature",
--- a/surfsense_backend/app/agents/new_chat/connector_searchable_types.py
+++ b/surfsense_backend/app/agents/new_chat/connector_searchable_types.py
@ -1,100 +1,11 @@
-"""Map configured connectors to the searchable document/connector types.
+"""Backward-compatible shim.

-This is agent-agnostic infrastructure shared by every agent factory (single-
-and multi-agent). It translates the connectors a search space has enabled into
-the set of searchable type strings that pre-search middleware and ``web_search``
-understand, and always layers in the document types that exist independently of
-any connector (uploads, notes, extension captures, YouTube).
-
-It lives in its own module — rather than inside a specific agent factory — so
-that retiring or moving any single agent never disturbs the others' access to
-this mapping.
+Moved to ``app.agents.shared.connector_searchable_types``. Re-exported here for
+the frozen single-agent stack (``chat_deepagent``) until that stack is retired.
 """

-from __future__ import annotations
+from app.agents.shared.connector_searchable_types import (
+    map_connectors_to_searchable_types,
+)

-from typing import Any
-
-# Maps SearchSourceConnectorType enum values to the searchable document/connector types
-# used by pre-search middleware and web_search.
-# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
-# the web_search tool; all others are considered local/indexed data.
-_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
-    # Live search connectors (handled by web_search tool)
-    "TAVILY_API": "TAVILY_API",
-    "LINKUP_API": "LINKUP_API",
-    "BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
-    # Local/indexed connectors (handled by KB pre-search middleware)
-    "SLACK_CONNECTOR": "SLACK_CONNECTOR",
-    "TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
-    "NOTION_CONNECTOR": "NOTION_CONNECTOR",
-    "GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
-    "LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
-    "DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
-    "JIRA_CONNECTOR": "JIRA_CONNECTOR",
-    "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
-    "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
-    "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
-    "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
-    "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",  # Connector type differs from document type
-    "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
-    "LUMA_CONNECTOR": "LUMA_CONNECTOR",
-    "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
-    "WEBCRAWLER_CONNECTOR": "CRAWLED_URL",  # Maps to document type
-    "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
-    "CIRCLEBACK_CONNECTOR": "CIRCLEBACK",  # Connector type differs from document type
-    "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
-    "DROPBOX_CONNECTOR": "DROPBOX_FILE",  # Connector type differs from document type
-    "ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE",  # Connector type differs from document type
-    # Composio connectors (unified to native document types).
-    # Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
-    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
-    "COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
-    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
-}
-
-# Document types that don't come from SearchSourceConnector but should always be searchable
-_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
-    "EXTENSION",  # Browser extension data
-    "FILE",  # Uploaded files
-    "NOTE",  # User notes
-    "YOUTUBE_VIDEO",  # YouTube videos
-]
-
-
-def map_connectors_to_searchable_types(
-    connector_types: list[Any],
-) -> list[str]:
-    """
-    Map SearchSourceConnectorType enums to searchable document/connector types.
-
-    This function:
-    1. Converts connector type enums to their searchable counterparts
-    2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
-    3. Deduplicates while preserving order
-
-    Args:
-        connector_types: List of SearchSourceConnectorType enum values
-
-    Returns:
-        List of searchable connector/document type strings
-    """
-    result_set: set[str] = set()
-    result_list: list[str] = []
-
-    # Add always-available document types first
-    for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
-        if doc_type not in result_set:
-            result_set.add(doc_type)
-            result_list.append(doc_type)
-
-    # Map each connector type to its searchable equivalent
-    for ct in connector_types:
-        # Handle both enum and string types
-        ct_str = ct.value if hasattr(ct, "value") else str(ct)
-        searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
-        if searchable and searchable not in result_set:
-            result_set.add(searchable)
-            result_list.append(searchable)
-
-    return result_list
+__all__ = ["map_connectors_to_searchable_types"]
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@ -1,125 +1,21 @@
-"""
-Thin compatibility wrapper around :mod:`app.agents.new_chat.prompts.composer`.
+"""Backward-compatible shim.

-The composer split the previous monolithic prompt string into a fragment
-tree under ``prompts/`` plus a model-family dispatch step (see the
-composer module docstring for credits). This module preserves the public
-function surface (``build_surfsense_system_prompt`` /
-``build_configurable_system_prompt`` /
-``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
-that existing call sites — `chat_deepagent.py`, anonymous chat routes,
-and the configurable-prompt admin path — keep working without churn.
-
-For new call sites prefer importing ``compose_system_prompt`` directly
-from :mod:`app.agents.new_chat.prompts.composer`.
+Moved to ``app.agents.shared.system_prompt``. Re-exported here for the frozen
+single-agent stack (``chat_deepagent``) until that stack is retired.
 """

-from __future__ import annotations
-
-from datetime import UTC, datetime
-
-from app.db import ChatVisibility
-
-from .prompts.composer import (
-    _read_fragment,
+from app.agents.shared.system_prompt import (
+    SURFSENSE_CITATION_INSTRUCTIONS,
+    SURFSENSE_NO_CITATION_INSTRUCTIONS,
+    SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE,
+    SURFSENSE_SYSTEM_PROMPT,
+    build_configurable_system_prompt,
+    build_surfsense_system_prompt,
    compose_system_prompt,
    detect_provider_variant,
+    get_default_system_instructions,
 )

-# Optional routing fragments under ``prompts/routing/`` (see composer).
-_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack")
-
-# Public re-exports for backwards compatibility (some legacy code reads the
-# raw default-instructions text directly).
-SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = (
-    "<system_instruction>\nDefault SurfSense agent system instructions are now\n"
-    "composed from prompts/base/*.md. See compose_system_prompt() for details.\n"
-    "</system_instruction>"
-)
-
-# Citation block re-exposed for legacy importers that referenced this constant
-# directly. The composer is the canonical source; this is a frozen snapshot
-# loaded at module-init time.
-SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md")
-SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md")
-
-
-def build_surfsense_system_prompt(
-    today: datetime | None = None,
-    thread_visibility: ChatVisibility | None = None,
-    enabled_tool_names: set[str] | None = None,
-    disabled_tool_names: set[str] | None = None,
-    mcp_connector_tools: dict[str, list[str]] | None = None,
-    *,
-    model_name: str | None = None,
-) -> str:
-    """Build the default SurfSense system prompt (citations on, defaults).
-
-    See :func:`app.agents.new_chat.prompts.composer.compose_system_prompt`
-    for full parameter docs.
-    """
-    return compose_system_prompt(
-        today=today,
-        thread_visibility=thread_visibility,
-        enabled_tool_names=enabled_tool_names,
-        disabled_tool_names=disabled_tool_names,
-        mcp_connector_tools=mcp_connector_tools,
-        citations_enabled=True,
-        model_name=model_name,
-        connector_routing=_DEFAULT_CONNECTOR_ROUTING,
-    )
-
-
-def build_configurable_system_prompt(
-    custom_system_instructions: str | None = None,
-    use_default_system_instructions: bool = True,
-    citations_enabled: bool = True,
-    today: datetime | None = None,
-    thread_visibility: ChatVisibility | None = None,
-    enabled_tool_names: set[str] | None = None,
-    disabled_tool_names: set[str] | None = None,
-    mcp_connector_tools: dict[str, list[str]] | None = None,
-    *,
-    model_name: str | None = None,
-) -> str:
-    """Build a configurable SurfSense system prompt (NewLLMConfig path).
-
-    See :func:`app.agents.new_chat.prompts.composer.compose_system_prompt`
-    for full parameter docs.
-    """
-    return compose_system_prompt(
-        today=today,
-        thread_visibility=thread_visibility,
-        enabled_tool_names=enabled_tool_names,
-        disabled_tool_names=disabled_tool_names,
-        mcp_connector_tools=mcp_connector_tools,
-        custom_system_instructions=custom_system_instructions,
-        use_default_system_instructions=use_default_system_instructions,
-        citations_enabled=citations_enabled,
-        model_name=model_name,
-        connector_routing=_DEFAULT_CONNECTOR_ROUTING,
-    )
-
-
-def get_default_system_instructions() -> str:
-    """Return the default ``<system_instruction>`` block (no tools / citations).
-
-    Useful for populating the UI when seeding ``NewLLMConfig.system_instructions``.
-    The output reflects the current fragment tree, not a baked-in constant.
-    """
-    resolved_today = datetime.now(UTC).date().isoformat()
-    from .prompts.composer import _build_system_instructions  # local import
-
-    return _build_system_instructions(
-        visibility=ChatVisibility.PRIVATE,
-        resolved_today=resolved_today,
-    ).strip()
-
-
-# Backwards compatibility — some modules import the constant directly.
-SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt()
-
-
 __all__ = [
    "SURFSENSE_CITATION_INSTRUCTIONS",
    "SURFSENSE_NO_CITATION_INSTRUCTIONS",
--- a/surfsense_backend/app/agents/shared/agent_cache.py
+++ b/surfsense_backend/app/agents/shared/agent_cache.py
@ -0,0 +1,357 @@
+"""TTL-LRU cache for compiled SurfSense deep agents.
+
+Why this exists
+---------------
+
+``create_surfsense_deep_agent`` runs a 4-5 second pipeline on EVERY chat
+turn:
+
+1. Discover connectors & document types from Postgres (~50-200ms)
+2. Build the tool list (built-in + MCP) (~200ms-1.7s)
+3. Compose the system prompt
+4. Construct ~15 middleware instances (CPU)
+5. Eagerly compile the general-purpose subagent
+   (``SubAgentMiddleware.__init__`` calls ``create_agent`` synchronously,
+   which builds a second LangGraph + Pydantic schemas — ~1.5-2s of pure
+   CPU work)
+6. Compile the outer LangGraph
+
+For a single thread, all six steps produce the SAME object on every turn
+unless the user has changed their LLM config, toggled a feature flag,
+added a connector, etc. The right answer is to compile ONCE per
+"agent shape" and reuse the resulting :class:`CompiledStateGraph` for
+every subsequent turn on the same thread.
+
+Why a per-thread key (not a global pool)
+----------------------------------------
+
+Most middleware in the SurfSense stack captures per-thread state in
+``__init__`` closures (``thread_id``, ``user_id``, ``search_space_id``,
+``filesystem_mode``, ``mentioned_document_ids``). Cross-thread reuse
+would silently leak state across users and threads. Keying the cache on
+``(llm_config_id, thread_id, ...)`` gives us safe reuse for repeated
+turns on the same thread without changing any middleware's behavior.
+
+Phase 2 will move those captured fields onto :class:`SurfSenseContextSchema`
+(read via ``runtime.context``) so the cache can collapse to a single
+``(llm_config_id, search_space_id, ...)`` key shared across threads. Until
+then, per-thread keying is the only safe option.
+
+Cache shape
+-----------
+
+* TTL-LRU: entries auto-expire after ``ttl_seconds`` (default 1800s, 30
+  minutes — matches a typical chat session). ``maxsize`` (default 256)
+  caps memory; LRU evicts least-recently-used on overflow.
+* In-flight de-duplication: per-key :class:`asyncio.Lock` so concurrent
+  cold misses on the same key wait for the first build instead of
+  building N times.
+* Process-local: this is an in-memory cache. Multi-replica deployments
+  pay the build cost once per replica per key. That's fine; the working
+  set per replica is small (one entry per active thread on that replica).
+
+Telemetry
+---------
+
+Every lookup logs ``[agent_cache]`` lines through ``surfsense.perf``:
+
+  * ``hit`` — cache hit, microseconds-fast
+  * ``miss`` — first build for this key, includes build duration
+  * ``stale`` — entry was found but expired; rebuilt
+  * ``evict`` — LRU eviction (size-limited)
+  * ``size`` — current cache occupancy at lookup time
+"""
+
+from __future__ import annotations
+
+import asyncio
+import hashlib
+import logging
+import os
+import time
+from collections import OrderedDict
+from collections.abc import Awaitable, Callable
+from dataclasses import dataclass
+from typing import Any
+
+from app.utils.perf import get_perf_logger
+
+logger = logging.getLogger(__name__)
+_perf_log = get_perf_logger()
+
+
+# ---------------------------------------------------------------------------
+# Public API: signature helpers (cache key components)
+# ---------------------------------------------------------------------------
+
+
+def stable_hash(*parts: Any) -> str:
+    """Compute a deterministic SHA1 of the str repr of ``parts``.
+
+    Used for cache key components that need a fixed-width representation
+    (system prompt, tool list, etc.). SHA1 is fine here — this is not a
+    security boundary, just a content fingerprint.
+    """
+    h = hashlib.sha1(usedforsecurity=False)
+    for p in parts:
+        h.update(repr(p).encode("utf-8", errors="replace"))
+        h.update(b"\x1f")  # ASCII unit separator between parts
+    return h.hexdigest()
+
+
+def tools_signature(
+    tools: list[Any] | tuple[Any, ...],
+    *,
+    available_connectors: list[str] | None,
+    available_document_types: list[str] | None,
+) -> str:
+    """Hash the bound-tool surface for cache-key purposes.
+
+    The signature changes whenever:
+
+    * A tool is added or removed from the bound list (built-in toggles,
+      MCP tools loaded for the user changes, gating rules flip, etc.).
+    * The available connectors / document types for the search space
+      change (new connector added, last connector removed, new document
+      type indexed). Because :func:`get_connector_gated_tools` derives
+      ``modified_disabled_tools`` from ``available_connectors``, the
+      tool surface is technically already covered — but we hash the
+      connector list separately so an empty-list "no tools changed"
+      situation still rotates the key when, say, the user re-adds a
+      connector that gates a tool we were already not exposing.
+
+    Stays stable across:
+
+    * Process restarts (tool names + descriptions are static).
+    * Different replicas (everyone gets the same hash for the same
+      inputs).
+    """
+    tool_descriptors = sorted(
+        (getattr(t, "name", repr(t)), getattr(t, "description", "")) for t in tools
+    )
+    connectors = sorted(available_connectors or [])
+    doc_types = sorted(available_document_types or [])
+    return stable_hash(tool_descriptors, connectors, doc_types)
+
+
+def flags_signature(flags: Any) -> str:
+    """Hash the resolved :class:`AgentFeatureFlags` dataclass.
+
+    Frozen dataclasses are deterministically reprable, so a SHA1 of their
+    repr is a stable fingerprint. Restart safe (flags are read once at
+    process boot).
+    """
+    return stable_hash(repr(flags))
+
+
+def system_prompt_hash(system_prompt: str) -> str:
+    """Hash a system prompt string. Cheap, ~30µs for typical prompts."""
+    return hashlib.sha1(
+        system_prompt.encode("utf-8", errors="replace"),
+        usedforsecurity=False,
+    ).hexdigest()
+
+
+# ---------------------------------------------------------------------------
+# Cache implementation
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _Entry:
+    value: Any
+    created_at: float
+    last_used_at: float
+
+
+class _AgentCache:
+    """In-process TTL-LRU cache with per-key in-flight de-duplication.
+
+    NOT THREAD-SAFE in the multithreading sense — designed for a single
+    asyncio event loop. Uvicorn runs one event loop per worker process,
+    so this is fine; multi-worker deployments simply each maintain their
+    own cache.
+    """
+
+    def __init__(self, *, maxsize: int, ttl_seconds: float) -> None:
+        self._maxsize = maxsize
+        self._ttl = ttl_seconds
+        self._entries: OrderedDict[str, _Entry] = OrderedDict()
+        # One lock per key — guards "build" so concurrent cold misses on
+        # the same key wait for the first build instead of all racing.
+        self._locks: dict[str, asyncio.Lock] = {}
+
+    def _now(self) -> float:
+        return time.monotonic()
+
+    def _is_fresh(self, entry: _Entry) -> bool:
+        return (self._now() - entry.created_at) < self._ttl
+
+    def _evict_if_full(self) -> None:
+        while len(self._entries) >= self._maxsize:
+            evicted_key, _ = self._entries.popitem(last=False)
+            self._locks.pop(evicted_key, None)
+            _perf_log.info(
+                "[agent_cache] evict key=%s reason=lru size=%d",
+                _short(evicted_key),
+                len(self._entries),
+            )
+
+    def _touch(self, key: str, entry: _Entry) -> None:
+        entry.last_used_at = self._now()
+        self._entries.move_to_end(key, last=True)
+
+    async def get_or_build(
+        self,
+        key: str,
+        *,
+        builder: Callable[[], Awaitable[Any]],
+    ) -> Any:
+        """Return the cached value for ``key`` or call ``builder()`` to make it.
+
+        ``builder`` MUST be idempotent — concurrent cold misses on the
+        same key collapse to a single ``builder()`` call (the others
+        wait on the in-flight lock and observe the populated entry on
+        wake).
+        """
+        # Fast path: hot hit.
+        entry = self._entries.get(key)
+        if entry is not None and self._is_fresh(entry):
+            self._touch(key, entry)
+            _perf_log.info(
+                "[agent_cache] hit key=%s age=%.1fs size=%d",
+                _short(key),
+                self._now() - entry.created_at,
+                len(self._entries),
+            )
+            return entry.value
+
+        # Stale entry — drop it; rebuild below.
+        if entry is not None and not self._is_fresh(entry):
+            _perf_log.info(
+                "[agent_cache] stale key=%s age=%.1fs ttl=%.0fs",
+                _short(key),
+                self._now() - entry.created_at,
+                self._ttl,
+            )
+            self._entries.pop(key, None)
+
+        # Slow path: serialize concurrent misses for the same key.
+        lock = self._locks.setdefault(key, asyncio.Lock())
+        async with lock:
+            # Double-check after acquiring the lock — another waiter may
+            # have populated the entry while we slept.
+            entry = self._entries.get(key)
+            if entry is not None and self._is_fresh(entry):
+                self._touch(key, entry)
+                _perf_log.info(
+                    "[agent_cache] hit key=%s age=%.1fs size=%d coalesced=true",
+                    _short(key),
+                    self._now() - entry.created_at,
+                    len(self._entries),
+                )
+                return entry.value
+
+            t0 = time.perf_counter()
+            try:
+                value = await builder()
+            except BaseException:
+                # Don't cache failed builds; let the next caller retry.
+                _perf_log.warning(
+                    "[agent_cache] build_failed key=%s elapsed=%.3fs",
+                    _short(key),
+                    time.perf_counter() - t0,
+                )
+                raise
+            elapsed = time.perf_counter() - t0
+
+            # Insert + evict.
+            self._evict_if_full()
+            now = self._now()
+            self._entries[key] = _Entry(value=value, created_at=now, last_used_at=now)
+            self._entries.move_to_end(key, last=True)
+            _perf_log.info(
+                "[agent_cache] miss key=%s build=%.3fs size=%d",
+                _short(key),
+                elapsed,
+                len(self._entries),
+            )
+            return value
+
+    def invalidate(self, key: str) -> bool:
+        """Drop a single entry; return True if anything was removed."""
+        removed = self._entries.pop(key, None) is not None
+        self._locks.pop(key, None)
+        if removed:
+            _perf_log.info(
+                "[agent_cache] invalidate key=%s size=%d",
+                _short(key),
+                len(self._entries),
+            )
+        return removed
+
+    def invalidate_prefix(self, prefix: str) -> int:
+        """Drop every entry whose key starts with ``prefix``. Returns count."""
+        keys = [k for k in self._entries if k.startswith(prefix)]
+        for k in keys:
+            self._entries.pop(k, None)
+            self._locks.pop(k, None)
+        if keys:
+            _perf_log.info(
+                "[agent_cache] invalidate_prefix prefix=%s removed=%d size=%d",
+                _short(prefix),
+                len(keys),
+                len(self._entries),
+            )
+        return len(keys)
+
+    def clear(self) -> None:
+        n = len(self._entries)
+        self._entries.clear()
+        self._locks.clear()
+        if n:
+            _perf_log.info("[agent_cache] clear removed=%d", n)
+
+    def stats(self) -> dict[str, Any]:
+        return {
+            "size": len(self._entries),
+            "maxsize": self._maxsize,
+            "ttl_seconds": self._ttl,
+        }
+
+
+def _short(key: str, n: int = 16) -> str:
+    """Truncate keys for log lines so they don't blow up log volume."""
+    return key if len(key) <= n else f"{key[:n]}..."
+
+
+# ---------------------------------------------------------------------------
+# Module-level singleton
+# ---------------------------------------------------------------------------
+
+_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
+_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
+
+_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)
+
+
+def get_cache() -> _AgentCache:
+    """Return the process-wide compiled-agent cache singleton."""
+    return _cache
+
+
+def reload_for_tests(*, maxsize: int = 256, ttl_seconds: float = 1800.0) -> _AgentCache:
+    """Replace the singleton with a fresh cache. Tests only."""
+    global _cache
+    _cache = _AgentCache(maxsize=maxsize, ttl_seconds=ttl_seconds)
+    return _cache
+
+
+__all__ = [
+    "flags_signature",
+    "get_cache",
+    "reload_for_tests",
+    "stable_hash",
+    "system_prompt_hash",
+    "tools_signature",
+]
--- a/surfsense_backend/app/agents/shared/connector_searchable_types.py
+++ b/surfsense_backend/app/agents/shared/connector_searchable_types.py
@ -0,0 +1,100 @@
+"""Map configured connectors to the searchable document/connector types.
+
+This is agent-agnostic infrastructure shared by every agent factory (single-
+and multi-agent). It translates the connectors a search space has enabled into
+the set of searchable type strings that pre-search middleware and ``web_search``
+understand, and always layers in the document types that exist independently of
+any connector (uploads, notes, extension captures, YouTube).
+
+It lives in its own module — rather than inside a specific agent factory — so
+that retiring or moving any single agent never disturbs the others' access to
+this mapping.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+# Maps SearchSourceConnectorType enum values to the searchable document/connector types
+# used by pre-search middleware and web_search.
+# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
+# the web_search tool; all others are considered local/indexed data.
+_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
+    # Live search connectors (handled by web_search tool)
+    "TAVILY_API": "TAVILY_API",
+    "LINKUP_API": "LINKUP_API",
+    "BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
+    # Local/indexed connectors (handled by KB pre-search middleware)
+    "SLACK_CONNECTOR": "SLACK_CONNECTOR",
+    "TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
+    "NOTION_CONNECTOR": "NOTION_CONNECTOR",
+    "GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
+    "LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
+    "DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
+    "JIRA_CONNECTOR": "JIRA_CONNECTOR",
+    "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
+    "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
+    "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
+    "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
+    "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",  # Connector type differs from document type
+    "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
+    "LUMA_CONNECTOR": "LUMA_CONNECTOR",
+    "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
+    "WEBCRAWLER_CONNECTOR": "CRAWLED_URL",  # Maps to document type
+    "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
+    "CIRCLEBACK_CONNECTOR": "CIRCLEBACK",  # Connector type differs from document type
+    "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
+    "DROPBOX_CONNECTOR": "DROPBOX_FILE",  # Connector type differs from document type
+    "ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE",  # Connector type differs from document type
+    # Composio connectors (unified to native document types).
+    # Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
+    "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
+    "COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
+    "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
+}
+
+# Document types that don't come from SearchSourceConnector but should always be searchable
+_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
+    "EXTENSION",  # Browser extension data
+    "FILE",  # Uploaded files
+    "NOTE",  # User notes
+    "YOUTUBE_VIDEO",  # YouTube videos
+]
+
+
+def map_connectors_to_searchable_types(
+    connector_types: list[Any],
+) -> list[str]:
+    """
+    Map SearchSourceConnectorType enums to searchable document/connector types.
+
+    This function:
+    1. Converts connector type enums to their searchable counterparts
+    2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
+    3. Deduplicates while preserving order
+
+    Args:
+        connector_types: List of SearchSourceConnectorType enum values
+
+    Returns:
+        List of searchable connector/document type strings
+    """
+    result_set: set[str] = set()
+    result_list: list[str] = []
+
+    # Add always-available document types first
+    for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
+        if doc_type not in result_set:
+            result_set.add(doc_type)
+            result_list.append(doc_type)
+
+    # Map each connector type to its searchable equivalent
+    for ct in connector_types:
+        # Handle both enum and string types
+        ct_str = ct.value if hasattr(ct, "value") else str(ct)
+        searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
+        if searchable and searchable not in result_set:
+            result_set.add(searchable)
+            result_list.append(searchable)
+
+    return result_list
--- a/surfsense_backend/app/agents/new_chat/prompts/init.py
+++ b/surfsense_backend/app/agents/new_chat/prompts/init.py
--- a/surfsense_backend/app/agents/new_chat/prompts/base/init.py
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/init.py
--- a/surfsense_backend/app/agents/new_chat/prompts/base/agent_private.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/agent_private.md
--- a/surfsense_backend/app/agents/new_chat/prompts/base/agent_team.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/agent_team.md
--- a/surfsense_backend/app/agents/new_chat/prompts/base/citations_off.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/citations_off.md
--- a/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md
--- a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md
--- a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md
--- a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md
--- a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md
--- a/surfsense_backend/app/agents/new_chat/prompts/base/parameter_resolution.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/parameter_resolution.md
--- a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md
--- a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md
--- a/surfsense_backend/app/agents/new_chat/prompts/composer.py
+++ b/surfsense_backend/app/agents/new_chat/prompts/composer.py
@ -2,7 +2,7 @@
 Prompt composer for the SurfSense ``new_chat`` agent.

 This module assembles the agent's system prompt from the markdown fragments
-under :mod:`app.agents.new_chat.prompts`. It replaces the monolithic
+under :mod:`app.agents.shared.prompts`. It replaces the monolithic
 ``system_prompt.py`` with a clean, fragment-based composition:

 ::
@ -119,7 +119,7 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
 # -----------------------------------------------------------------------------


-_PROMPTS_PACKAGE = "app.agents.new_chat.prompts"
+_PROMPTS_PACKAGE = "app.agents.shared.prompts"


 def _read_fragment(subpath: str) -> str:
--- a/surfsense_backend/app/agents/new_chat/prompts/examples/init.py
+++ b/surfsense_backend/app/agents/new_chat/prompts/examples/init.py
--- a/surfsense_backend/app/agents/new_chat/prompts/examples/generate_image.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/examples/generate_image.md
--- a/surfsense_backend/app/agents/new_chat/prompts/examples/generate_podcast.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/examples/generate_podcast.md
--- a/surfsense_backend/app/agents/new_chat/prompts/examples/generate_report.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/examples/generate_report.md
--- a/surfsense_backend/app/agents/new_chat/prompts/examples/generate_resume.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/examples/generate_resume.md
--- a/surfsense_backend/app/agents/new_chat/prompts/examples/generate_video_presentation.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/examples/generate_video_presentation.md
--- a/surfsense_backend/app/agents/new_chat/prompts/examples/scrape_webpage.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/examples/scrape_webpage.md
--- a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md
--- a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md
--- a/surfsense_backend/app/agents/new_chat/prompts/examples/web_search.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/examples/web_search.md
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/init.py
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/init.py
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/default.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/default.md
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/google.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/google.md
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/grok.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/grok.md
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md
--- a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md
--- a/surfsense_backend/app/agents/new_chat/prompts/routing/init.py
+++ b/surfsense_backend/app/agents/new_chat/prompts/routing/init.py
--- a/surfsense_backend/app/agents/new_chat/prompts/routing/jira.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/routing/jira.md
--- a/surfsense_backend/app/agents/new_chat/prompts/routing/linear.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/routing/linear.md
--- a/surfsense_backend/app/agents/new_chat/prompts/routing/slack.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/routing/slack.md
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/init.py
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/init.py
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/_preamble.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/_preamble.md
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/generate_image.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/generate_image.md
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/generate_podcast.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/generate_podcast.md
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/generate_report.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/generate_report.md
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/generate_resume.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/generate_resume.md
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/generate_video_presentation.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/generate_video_presentation.md
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/scrape_webpage.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/scrape_webpage.md
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md
--- a/surfsense_backend/app/agents/new_chat/prompts/tools/web_search.md
+++ b/surfsense_backend/app/agents/new_chat/prompts/tools/web_search.md
--- a/surfsense_backend/app/agents/shared/system_prompt.py
+++ b/surfsense_backend/app/agents/shared/system_prompt.py
@ -0,0 +1,133 @@
+"""
+Thin compatibility wrapper around :mod:`app.agents.shared.prompts.composer`.
+
+The composer split the previous monolithic prompt string into a fragment
+tree under ``prompts/`` plus a model-family dispatch step (see the
+composer module docstring for credits). This module preserves the public
+function surface (``build_surfsense_system_prompt`` /
+``build_configurable_system_prompt`` /
+``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
+that existing call sites — `chat_deepagent.py`, anonymous chat routes,
+and the configurable-prompt admin path — keep working without churn.
+
+For new call sites prefer importing ``compose_system_prompt`` directly
+from :mod:`app.agents.shared.prompts.composer`.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from app.db import ChatVisibility
+
+from .prompts.composer import (
+    _read_fragment,
+    compose_system_prompt,
+    detect_provider_variant,
+)
+
+# Optional routing fragments under ``prompts/routing/`` (see composer).
+_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack")
+
+# Public re-exports for backwards compatibility (some legacy code reads the
+# raw default-instructions text directly).
+SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = (
+    "<system_instruction>\nDefault SurfSense agent system instructions are now\n"
+    "composed from prompts/base/*.md. See compose_system_prompt() for details.\n"
+    "</system_instruction>"
+)
+
+# Citation block re-exposed for legacy importers that referenced this constant
+# directly. The composer is the canonical source; this is a frozen snapshot
+# loaded at module-init time.
+SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md")
+SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md")
+
+
+def build_surfsense_system_prompt(
+    today: datetime | None = None,
+    thread_visibility: ChatVisibility | None = None,
+    enabled_tool_names: set[str] | None = None,
+    disabled_tool_names: set[str] | None = None,
+    mcp_connector_tools: dict[str, list[str]] | None = None,
+    *,
+    model_name: str | None = None,
+) -> str:
+    """Build the default SurfSense system prompt (citations on, defaults).
+
+    See :func:`app.agents.shared.prompts.composer.compose_system_prompt`
+    for full parameter docs.
+    """
+    return compose_system_prompt(
+        today=today,
+        thread_visibility=thread_visibility,
+        enabled_tool_names=enabled_tool_names,
+        disabled_tool_names=disabled_tool_names,
+        mcp_connector_tools=mcp_connector_tools,
+        citations_enabled=True,
+        model_name=model_name,
+        connector_routing=_DEFAULT_CONNECTOR_ROUTING,
+    )
+
+
+def build_configurable_system_prompt(
+    custom_system_instructions: str | None = None,
+    use_default_system_instructions: bool = True,
+    citations_enabled: bool = True,
+    today: datetime | None = None,
+    thread_visibility: ChatVisibility | None = None,
+    enabled_tool_names: set[str] | None = None,
+    disabled_tool_names: set[str] | None = None,
+    mcp_connector_tools: dict[str, list[str]] | None = None,
+    *,
+    model_name: str | None = None,
+) -> str:
+    """Build a configurable SurfSense system prompt (NewLLMConfig path).
+
+    See :func:`app.agents.shared.prompts.composer.compose_system_prompt`
+    for full parameter docs.
+    """
+    return compose_system_prompt(
+        today=today,
+        thread_visibility=thread_visibility,
+        enabled_tool_names=enabled_tool_names,
+        disabled_tool_names=disabled_tool_names,
+        mcp_connector_tools=mcp_connector_tools,
+        custom_system_instructions=custom_system_instructions,
+        use_default_system_instructions=use_default_system_instructions,
+        citations_enabled=citations_enabled,
+        model_name=model_name,
+        connector_routing=_DEFAULT_CONNECTOR_ROUTING,
+    )
+
+
+def get_default_system_instructions() -> str:
+    """Return the default ``<system_instruction>`` block (no tools / citations).
+
+    Useful for populating the UI when seeding ``NewLLMConfig.system_instructions``.
+    The output reflects the current fragment tree, not a baked-in constant.
+    """
+    resolved_today = datetime.now(UTC).date().isoformat()
+    from .prompts.composer import _build_system_instructions  # local import
+
+    return _build_system_instructions(
+        visibility=ChatVisibility.PRIVATE,
+        resolved_today=resolved_today,
+    ).strip()
+
+
+# Backwards compatibility — some modules import the constant directly.
+SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt()
+
+
+__all__ = [
+    "SURFSENSE_CITATION_INSTRUCTIONS",
+    "SURFSENSE_NO_CITATION_INSTRUCTIONS",
+    "SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE",
+    "SURFSENSE_SYSTEM_PROMPT",
+    "build_configurable_system_prompt",
+    "build_surfsense_system_prompt",
+    "compose_system_prompt",
+    "detect_provider_variant",
+    "get_default_system_instructions",
+]
--- a/surfsense_backend/app/routes/new_llm_config_routes.py
+++ b/surfsense_backend/app/routes/new_llm_config_routes.py
@ -13,7 +13,7 @@ from fastapi import APIRouter, Depends, HTTPException
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.future import select

-from app.agents.new_chat.system_prompt import get_default_system_instructions
+from app.agents.shared.system_prompt import get_default_system_instructions
 from app.config import config
 from app.db import (
    NewLLMConfig,
--- a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py
@ -6,7 +6,7 @@ from datetime import UTC, datetime

 import pytest

-from app.agents.new_chat.prompts.composer import (
+from app.agents.shared.prompts.composer import (
    ALL_TOOL_NAMES_ORDERED,
    compose_system_prompt,
    detect_provider_variant,
@ -64,7 +64,7 @@ class TestProviderVariantDetection:
        ``gpt-5`` reasoning regex first. Codex is the more specialised
        prompt and mirrors OpenCode's dispatch order.
        """
-        from app.agents.new_chat.prompts.composer import detect_provider_variant
+        from app.agents.shared.prompts.composer import detect_provider_variant

        assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
        assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"
--- a/surfsense_backend/tests/unit/agents/new_chat/test_agent_cache.py
+++ b/surfsense_backend/tests/unit/agents/new_chat/test_agent_cache.py
@ -16,7 +16,7 @@ from dataclasses import dataclass

 import pytest

-from app.agents.new_chat.agent_cache import (
+from app.agents.shared.agent_cache import (
    flags_signature,
    reload_for_tests,
    stable_hash,