diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py index dac8c0631..4d726abb6 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/agent_cache.py @@ -10,7 +10,7 @@ from langchain_core.language_models import BaseChatModel from langchain_core.tools import BaseTool from langgraph.types import Checkpointer -from app.agents.new_chat.agent_cache import ( +from app.agents.shared.agent_cache import ( flags_signature, get_cache, stable_hash, diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py index 27f17b0db..cac59ae97 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/runtime/factory.py @@ -19,7 +19,7 @@ from app.agents.multi_agent_chat.subagents import ( from app.agents.multi_agent_chat.subagents.mcp_tools.index import ( load_mcp_tools_by_connector, ) -from app.agents.new_chat.connector_searchable_types import ( +from app.agents.shared.connector_searchable_types import ( map_connectors_to_searchable_types, ) from app.agents.shared.feature_flags import AgentFeatureFlags, get_flags diff --git a/surfsense_backend/app/agents/new_chat/agent_cache.py b/surfsense_backend/app/agents/new_chat/agent_cache.py index fa8e6fb72..d0500ce2b 100644 --- a/surfsense_backend/app/agents/new_chat/agent_cache.py +++ b/surfsense_backend/app/agents/new_chat/agent_cache.py @@ -1,351 +1,17 @@ -"""TTL-LRU cache for compiled SurfSense deep agents. +"""Backward-compatible shim. -Why this exists ---------------- - -``create_surfsense_deep_agent`` runs a 4-5 second pipeline on EVERY chat -turn: - -1. Discover connectors & document types from Postgres (~50-200ms) -2. Build the tool list (built-in + MCP) (~200ms-1.7s) -3. Compose the system prompt -4. Construct ~15 middleware instances (CPU) -5. Eagerly compile the general-purpose subagent - (``SubAgentMiddleware.__init__`` calls ``create_agent`` synchronously, - which builds a second LangGraph + Pydantic schemas — ~1.5-2s of pure - CPU work) -6. Compile the outer LangGraph - -For a single thread, all six steps produce the SAME object on every turn -unless the user has changed their LLM config, toggled a feature flag, -added a connector, etc. The right answer is to compile ONCE per -"agent shape" and reuse the resulting :class:`CompiledStateGraph` for -every subsequent turn on the same thread. - -Why a per-thread key (not a global pool) ----------------------------------------- - -Most middleware in the SurfSense stack captures per-thread state in -``__init__`` closures (``thread_id``, ``user_id``, ``search_space_id``, -``filesystem_mode``, ``mentioned_document_ids``). Cross-thread reuse -would silently leak state across users and threads. Keying the cache on -``(llm_config_id, thread_id, ...)`` gives us safe reuse for repeated -turns on the same thread without changing any middleware's behavior. - -Phase 2 will move those captured fields onto :class:`SurfSenseContextSchema` -(read via ``runtime.context``) so the cache can collapse to a single -``(llm_config_id, search_space_id, ...)`` key shared across threads. Until -then, per-thread keying is the only safe option. - -Cache shape ------------ - -* TTL-LRU: entries auto-expire after ``ttl_seconds`` (default 1800s, 30 - minutes — matches a typical chat session). ``maxsize`` (default 256) - caps memory; LRU evicts least-recently-used on overflow. -* In-flight de-duplication: per-key :class:`asyncio.Lock` so concurrent - cold misses on the same key wait for the first build instead of - building N times. -* Process-local: this is an in-memory cache. Multi-replica deployments - pay the build cost once per replica per key. That's fine; the working - set per replica is small (one entry per active thread on that replica). - -Telemetry ---------- - -Every lookup logs ``[agent_cache]`` lines through ``surfsense.perf``: - - * ``hit`` — cache hit, microseconds-fast - * ``miss`` — first build for this key, includes build duration - * ``stale`` — entry was found but expired; rebuilt - * ``evict`` — LRU eviction (size-limited) - * ``size`` — current cache occupancy at lookup time +Moved to ``app.agents.shared.agent_cache``. Re-exported here for the frozen +single-agent stack (``chat_deepagent``) until that stack is retired. """ -from __future__ import annotations - -import asyncio -import hashlib -import logging -import os -import time -from collections import OrderedDict -from collections.abc import Awaitable, Callable -from dataclasses import dataclass -from typing import Any - -from app.utils.perf import get_perf_logger - -logger = logging.getLogger(__name__) -_perf_log = get_perf_logger() - - -# --------------------------------------------------------------------------- -# Public API: signature helpers (cache key components) -# --------------------------------------------------------------------------- - - -def stable_hash(*parts: Any) -> str: - """Compute a deterministic SHA1 of the str repr of ``parts``. - - Used for cache key components that need a fixed-width representation - (system prompt, tool list, etc.). SHA1 is fine here — this is not a - security boundary, just a content fingerprint. - """ - h = hashlib.sha1(usedforsecurity=False) - for p in parts: - h.update(repr(p).encode("utf-8", errors="replace")) - h.update(b"\x1f") # ASCII unit separator between parts - return h.hexdigest() - - -def tools_signature( - tools: list[Any] | tuple[Any, ...], - *, - available_connectors: list[str] | None, - available_document_types: list[str] | None, -) -> str: - """Hash the bound-tool surface for cache-key purposes. - - The signature changes whenever: - - * A tool is added or removed from the bound list (built-in toggles, - MCP tools loaded for the user changes, gating rules flip, etc.). - * The available connectors / document types for the search space - change (new connector added, last connector removed, new document - type indexed). Because :func:`get_connector_gated_tools` derives - ``modified_disabled_tools`` from ``available_connectors``, the - tool surface is technically already covered — but we hash the - connector list separately so an empty-list "no tools changed" - situation still rotates the key when, say, the user re-adds a - connector that gates a tool we were already not exposing. - - Stays stable across: - - * Process restarts (tool names + descriptions are static). - * Different replicas (everyone gets the same hash for the same - inputs). - """ - tool_descriptors = sorted( - (getattr(t, "name", repr(t)), getattr(t, "description", "")) for t in tools - ) - connectors = sorted(available_connectors or []) - doc_types = sorted(available_document_types or []) - return stable_hash(tool_descriptors, connectors, doc_types) - - -def flags_signature(flags: Any) -> str: - """Hash the resolved :class:`AgentFeatureFlags` dataclass. - - Frozen dataclasses are deterministically reprable, so a SHA1 of their - repr is a stable fingerprint. Restart safe (flags are read once at - process boot). - """ - return stable_hash(repr(flags)) - - -def system_prompt_hash(system_prompt: str) -> str: - """Hash a system prompt string. Cheap, ~30µs for typical prompts.""" - return hashlib.sha1( - system_prompt.encode("utf-8", errors="replace"), - usedforsecurity=False, - ).hexdigest() - - -# --------------------------------------------------------------------------- -# Cache implementation -# --------------------------------------------------------------------------- - - -@dataclass -class _Entry: - value: Any - created_at: float - last_used_at: float - - -class _AgentCache: - """In-process TTL-LRU cache with per-key in-flight de-duplication. - - NOT THREAD-SAFE in the multithreading sense — designed for a single - asyncio event loop. Uvicorn runs one event loop per worker process, - so this is fine; multi-worker deployments simply each maintain their - own cache. - """ - - def __init__(self, *, maxsize: int, ttl_seconds: float) -> None: - self._maxsize = maxsize - self._ttl = ttl_seconds - self._entries: OrderedDict[str, _Entry] = OrderedDict() - # One lock per key — guards "build" so concurrent cold misses on - # the same key wait for the first build instead of all racing. - self._locks: dict[str, asyncio.Lock] = {} - - def _now(self) -> float: - return time.monotonic() - - def _is_fresh(self, entry: _Entry) -> bool: - return (self._now() - entry.created_at) < self._ttl - - def _evict_if_full(self) -> None: - while len(self._entries) >= self._maxsize: - evicted_key, _ = self._entries.popitem(last=False) - self._locks.pop(evicted_key, None) - _perf_log.info( - "[agent_cache] evict key=%s reason=lru size=%d", - _short(evicted_key), - len(self._entries), - ) - - def _touch(self, key: str, entry: _Entry) -> None: - entry.last_used_at = self._now() - self._entries.move_to_end(key, last=True) - - async def get_or_build( - self, - key: str, - *, - builder: Callable[[], Awaitable[Any]], - ) -> Any: - """Return the cached value for ``key`` or call ``builder()`` to make it. - - ``builder`` MUST be idempotent — concurrent cold misses on the - same key collapse to a single ``builder()`` call (the others - wait on the in-flight lock and observe the populated entry on - wake). - """ - # Fast path: hot hit. - entry = self._entries.get(key) - if entry is not None and self._is_fresh(entry): - self._touch(key, entry) - _perf_log.info( - "[agent_cache] hit key=%s age=%.1fs size=%d", - _short(key), - self._now() - entry.created_at, - len(self._entries), - ) - return entry.value - - # Stale entry — drop it; rebuild below. - if entry is not None and not self._is_fresh(entry): - _perf_log.info( - "[agent_cache] stale key=%s age=%.1fs ttl=%.0fs", - _short(key), - self._now() - entry.created_at, - self._ttl, - ) - self._entries.pop(key, None) - - # Slow path: serialize concurrent misses for the same key. - lock = self._locks.setdefault(key, asyncio.Lock()) - async with lock: - # Double-check after acquiring the lock — another waiter may - # have populated the entry while we slept. - entry = self._entries.get(key) - if entry is not None and self._is_fresh(entry): - self._touch(key, entry) - _perf_log.info( - "[agent_cache] hit key=%s age=%.1fs size=%d coalesced=true", - _short(key), - self._now() - entry.created_at, - len(self._entries), - ) - return entry.value - - t0 = time.perf_counter() - try: - value = await builder() - except BaseException: - # Don't cache failed builds; let the next caller retry. - _perf_log.warning( - "[agent_cache] build_failed key=%s elapsed=%.3fs", - _short(key), - time.perf_counter() - t0, - ) - raise - elapsed = time.perf_counter() - t0 - - # Insert + evict. - self._evict_if_full() - now = self._now() - self._entries[key] = _Entry(value=value, created_at=now, last_used_at=now) - self._entries.move_to_end(key, last=True) - _perf_log.info( - "[agent_cache] miss key=%s build=%.3fs size=%d", - _short(key), - elapsed, - len(self._entries), - ) - return value - - def invalidate(self, key: str) -> bool: - """Drop a single entry; return True if anything was removed.""" - removed = self._entries.pop(key, None) is not None - self._locks.pop(key, None) - if removed: - _perf_log.info( - "[agent_cache] invalidate key=%s size=%d", - _short(key), - len(self._entries), - ) - return removed - - def invalidate_prefix(self, prefix: str) -> int: - """Drop every entry whose key starts with ``prefix``. Returns count.""" - keys = [k for k in self._entries if k.startswith(prefix)] - for k in keys: - self._entries.pop(k, None) - self._locks.pop(k, None) - if keys: - _perf_log.info( - "[agent_cache] invalidate_prefix prefix=%s removed=%d size=%d", - _short(prefix), - len(keys), - len(self._entries), - ) - return len(keys) - - def clear(self) -> None: - n = len(self._entries) - self._entries.clear() - self._locks.clear() - if n: - _perf_log.info("[agent_cache] clear removed=%d", n) - - def stats(self) -> dict[str, Any]: - return { - "size": len(self._entries), - "maxsize": self._maxsize, - "ttl_seconds": self._ttl, - } - - -def _short(key: str, n: int = 16) -> str: - """Truncate keys for log lines so they don't blow up log volume.""" - return key if len(key) <= n else f"{key[:n]}..." - - -# --------------------------------------------------------------------------- -# Module-level singleton -# --------------------------------------------------------------------------- - -_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256")) -_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800")) - -_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL) - - -def get_cache() -> _AgentCache: - """Return the process-wide compiled-agent cache singleton.""" - return _cache - - -def reload_for_tests(*, maxsize: int = 256, ttl_seconds: float = 1800.0) -> _AgentCache: - """Replace the singleton with a fresh cache. Tests only.""" - global _cache - _cache = _AgentCache(maxsize=maxsize, ttl_seconds=ttl_seconds) - return _cache - +from app.agents.shared.agent_cache import ( + flags_signature, + get_cache, + reload_for_tests, + stable_hash, + system_prompt_hash, + tools_signature, +) __all__ = [ "flags_signature", diff --git a/surfsense_backend/app/agents/new_chat/connector_searchable_types.py b/surfsense_backend/app/agents/new_chat/connector_searchable_types.py index be193be04..c6efb1a68 100644 --- a/surfsense_backend/app/agents/new_chat/connector_searchable_types.py +++ b/surfsense_backend/app/agents/new_chat/connector_searchable_types.py @@ -1,100 +1,11 @@ -"""Map configured connectors to the searchable document/connector types. +"""Backward-compatible shim. -This is agent-agnostic infrastructure shared by every agent factory (single- -and multi-agent). It translates the connectors a search space has enabled into -the set of searchable type strings that pre-search middleware and ``web_search`` -understand, and always layers in the document types that exist independently of -any connector (uploads, notes, extension captures, YouTube). - -It lives in its own module — rather than inside a specific agent factory — so -that retiring or moving any single agent never disturbs the others' access to -this mapping. +Moved to ``app.agents.shared.connector_searchable_types``. Re-exported here for +the frozen single-agent stack (``chat_deepagent``) until that stack is retired. """ -from __future__ import annotations +from app.agents.shared.connector_searchable_types import ( + map_connectors_to_searchable_types, +) -from typing import Any - -# Maps SearchSourceConnectorType enum values to the searchable document/connector types -# used by pre-search middleware and web_search. -# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to -# the web_search tool; all others are considered local/indexed data. -_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = { - # Live search connectors (handled by web_search tool) - "TAVILY_API": "TAVILY_API", - "LINKUP_API": "LINKUP_API", - "BAIDU_SEARCH_API": "BAIDU_SEARCH_API", - # Local/indexed connectors (handled by KB pre-search middleware) - "SLACK_CONNECTOR": "SLACK_CONNECTOR", - "TEAMS_CONNECTOR": "TEAMS_CONNECTOR", - "NOTION_CONNECTOR": "NOTION_CONNECTOR", - "GITHUB_CONNECTOR": "GITHUB_CONNECTOR", - "LINEAR_CONNECTOR": "LINEAR_CONNECTOR", - "DISCORD_CONNECTOR": "DISCORD_CONNECTOR", - "JIRA_CONNECTOR": "JIRA_CONNECTOR", - "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR", - "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR", - "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR", - "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR", - "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type - "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR", - "LUMA_CONNECTOR": "LUMA_CONNECTOR", - "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR", - "WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type - "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR", - "CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type - "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR", - "DROPBOX_CONNECTOR": "DROPBOX_FILE", # Connector type differs from document type - "ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type - # Composio connectors (unified to native document types). - # Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db. - "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", - "COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR", - "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR", -} - -# Document types that don't come from SearchSourceConnector but should always be searchable -_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [ - "EXTENSION", # Browser extension data - "FILE", # Uploaded files - "NOTE", # User notes - "YOUTUBE_VIDEO", # YouTube videos -] - - -def map_connectors_to_searchable_types( - connector_types: list[Any], -) -> list[str]: - """ - Map SearchSourceConnectorType enums to searchable document/connector types. - - This function: - 1. Converts connector type enums to their searchable counterparts - 2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO) - 3. Deduplicates while preserving order - - Args: - connector_types: List of SearchSourceConnectorType enum values - - Returns: - List of searchable connector/document type strings - """ - result_set: set[str] = set() - result_list: list[str] = [] - - # Add always-available document types first - for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES: - if doc_type not in result_set: - result_set.add(doc_type) - result_list.append(doc_type) - - # Map each connector type to its searchable equivalent - for ct in connector_types: - # Handle both enum and string types - ct_str = ct.value if hasattr(ct, "value") else str(ct) - searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str) - if searchable and searchable not in result_set: - result_set.add(searchable) - result_list.append(searchable) - - return result_list +__all__ = ["map_connectors_to_searchable_types"] diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index 70634c65d..363cf5507 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -1,125 +1,21 @@ -""" -Thin compatibility wrapper around :mod:`app.agents.new_chat.prompts.composer`. +"""Backward-compatible shim. -The composer split the previous monolithic prompt string into a fragment -tree under ``prompts/`` plus a model-family dispatch step (see the -composer module docstring for credits). This module preserves the public -function surface (``build_surfsense_system_prompt`` / -``build_configurable_system_prompt`` / -``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so -that existing call sites — `chat_deepagent.py`, anonymous chat routes, -and the configurable-prompt admin path — keep working without churn. - -For new call sites prefer importing ``compose_system_prompt`` directly -from :mod:`app.agents.new_chat.prompts.composer`. +Moved to ``app.agents.shared.system_prompt``. Re-exported here for the frozen +single-agent stack (``chat_deepagent``) until that stack is retired. """ -from __future__ import annotations - -from datetime import UTC, datetime - -from app.db import ChatVisibility - -from .prompts.composer import ( - _read_fragment, +from app.agents.shared.system_prompt import ( + SURFSENSE_CITATION_INSTRUCTIONS, + SURFSENSE_NO_CITATION_INSTRUCTIONS, + SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE, + SURFSENSE_SYSTEM_PROMPT, + build_configurable_system_prompt, + build_surfsense_system_prompt, compose_system_prompt, detect_provider_variant, + get_default_system_instructions, ) -# Optional routing fragments under ``prompts/routing/`` (see composer). -_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack") - -# Public re-exports for backwards compatibility (some legacy code reads the -# raw default-instructions text directly). -SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = ( - "\nDefault SurfSense agent system instructions are now\n" - "composed from prompts/base/*.md. See compose_system_prompt() for details.\n" - "" -) - -# Citation block re-exposed for legacy importers that referenced this constant -# directly. The composer is the canonical source; this is a frozen snapshot -# loaded at module-init time. -SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md") -SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md") - - -def build_surfsense_system_prompt( - today: datetime | None = None, - thread_visibility: ChatVisibility | None = None, - enabled_tool_names: set[str] | None = None, - disabled_tool_names: set[str] | None = None, - mcp_connector_tools: dict[str, list[str]] | None = None, - *, - model_name: str | None = None, -) -> str: - """Build the default SurfSense system prompt (citations on, defaults). - - See :func:`app.agents.new_chat.prompts.composer.compose_system_prompt` - for full parameter docs. - """ - return compose_system_prompt( - today=today, - thread_visibility=thread_visibility, - enabled_tool_names=enabled_tool_names, - disabled_tool_names=disabled_tool_names, - mcp_connector_tools=mcp_connector_tools, - citations_enabled=True, - model_name=model_name, - connector_routing=_DEFAULT_CONNECTOR_ROUTING, - ) - - -def build_configurable_system_prompt( - custom_system_instructions: str | None = None, - use_default_system_instructions: bool = True, - citations_enabled: bool = True, - today: datetime | None = None, - thread_visibility: ChatVisibility | None = None, - enabled_tool_names: set[str] | None = None, - disabled_tool_names: set[str] | None = None, - mcp_connector_tools: dict[str, list[str]] | None = None, - *, - model_name: str | None = None, -) -> str: - """Build a configurable SurfSense system prompt (NewLLMConfig path). - - See :func:`app.agents.new_chat.prompts.composer.compose_system_prompt` - for full parameter docs. - """ - return compose_system_prompt( - today=today, - thread_visibility=thread_visibility, - enabled_tool_names=enabled_tool_names, - disabled_tool_names=disabled_tool_names, - mcp_connector_tools=mcp_connector_tools, - custom_system_instructions=custom_system_instructions, - use_default_system_instructions=use_default_system_instructions, - citations_enabled=citations_enabled, - model_name=model_name, - connector_routing=_DEFAULT_CONNECTOR_ROUTING, - ) - - -def get_default_system_instructions() -> str: - """Return the default ```` block (no tools / citations). - - Useful for populating the UI when seeding ``NewLLMConfig.system_instructions``. - The output reflects the current fragment tree, not a baked-in constant. - """ - resolved_today = datetime.now(UTC).date().isoformat() - from .prompts.composer import _build_system_instructions # local import - - return _build_system_instructions( - visibility=ChatVisibility.PRIVATE, - resolved_today=resolved_today, - ).strip() - - -# Backwards compatibility — some modules import the constant directly. -SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt() - - __all__ = [ "SURFSENSE_CITATION_INSTRUCTIONS", "SURFSENSE_NO_CITATION_INSTRUCTIONS", diff --git a/surfsense_backend/app/agents/shared/agent_cache.py b/surfsense_backend/app/agents/shared/agent_cache.py new file mode 100644 index 000000000..fa8e6fb72 --- /dev/null +++ b/surfsense_backend/app/agents/shared/agent_cache.py @@ -0,0 +1,357 @@ +"""TTL-LRU cache for compiled SurfSense deep agents. + +Why this exists +--------------- + +``create_surfsense_deep_agent`` runs a 4-5 second pipeline on EVERY chat +turn: + +1. Discover connectors & document types from Postgres (~50-200ms) +2. Build the tool list (built-in + MCP) (~200ms-1.7s) +3. Compose the system prompt +4. Construct ~15 middleware instances (CPU) +5. Eagerly compile the general-purpose subagent + (``SubAgentMiddleware.__init__`` calls ``create_agent`` synchronously, + which builds a second LangGraph + Pydantic schemas — ~1.5-2s of pure + CPU work) +6. Compile the outer LangGraph + +For a single thread, all six steps produce the SAME object on every turn +unless the user has changed their LLM config, toggled a feature flag, +added a connector, etc. The right answer is to compile ONCE per +"agent shape" and reuse the resulting :class:`CompiledStateGraph` for +every subsequent turn on the same thread. + +Why a per-thread key (not a global pool) +---------------------------------------- + +Most middleware in the SurfSense stack captures per-thread state in +``__init__`` closures (``thread_id``, ``user_id``, ``search_space_id``, +``filesystem_mode``, ``mentioned_document_ids``). Cross-thread reuse +would silently leak state across users and threads. Keying the cache on +``(llm_config_id, thread_id, ...)`` gives us safe reuse for repeated +turns on the same thread without changing any middleware's behavior. + +Phase 2 will move those captured fields onto :class:`SurfSenseContextSchema` +(read via ``runtime.context``) so the cache can collapse to a single +``(llm_config_id, search_space_id, ...)`` key shared across threads. Until +then, per-thread keying is the only safe option. + +Cache shape +----------- + +* TTL-LRU: entries auto-expire after ``ttl_seconds`` (default 1800s, 30 + minutes — matches a typical chat session). ``maxsize`` (default 256) + caps memory; LRU evicts least-recently-used on overflow. +* In-flight de-duplication: per-key :class:`asyncio.Lock` so concurrent + cold misses on the same key wait for the first build instead of + building N times. +* Process-local: this is an in-memory cache. Multi-replica deployments + pay the build cost once per replica per key. That's fine; the working + set per replica is small (one entry per active thread on that replica). + +Telemetry +--------- + +Every lookup logs ``[agent_cache]`` lines through ``surfsense.perf``: + + * ``hit`` — cache hit, microseconds-fast + * ``miss`` — first build for this key, includes build duration + * ``stale`` — entry was found but expired; rebuilt + * ``evict`` — LRU eviction (size-limited) + * ``size`` — current cache occupancy at lookup time +""" + +from __future__ import annotations + +import asyncio +import hashlib +import logging +import os +import time +from collections import OrderedDict +from collections.abc import Awaitable, Callable +from dataclasses import dataclass +from typing import Any + +from app.utils.perf import get_perf_logger + +logger = logging.getLogger(__name__) +_perf_log = get_perf_logger() + + +# --------------------------------------------------------------------------- +# Public API: signature helpers (cache key components) +# --------------------------------------------------------------------------- + + +def stable_hash(*parts: Any) -> str: + """Compute a deterministic SHA1 of the str repr of ``parts``. + + Used for cache key components that need a fixed-width representation + (system prompt, tool list, etc.). SHA1 is fine here — this is not a + security boundary, just a content fingerprint. + """ + h = hashlib.sha1(usedforsecurity=False) + for p in parts: + h.update(repr(p).encode("utf-8", errors="replace")) + h.update(b"\x1f") # ASCII unit separator between parts + return h.hexdigest() + + +def tools_signature( + tools: list[Any] | tuple[Any, ...], + *, + available_connectors: list[str] | None, + available_document_types: list[str] | None, +) -> str: + """Hash the bound-tool surface for cache-key purposes. + + The signature changes whenever: + + * A tool is added or removed from the bound list (built-in toggles, + MCP tools loaded for the user changes, gating rules flip, etc.). + * The available connectors / document types for the search space + change (new connector added, last connector removed, new document + type indexed). Because :func:`get_connector_gated_tools` derives + ``modified_disabled_tools`` from ``available_connectors``, the + tool surface is technically already covered — but we hash the + connector list separately so an empty-list "no tools changed" + situation still rotates the key when, say, the user re-adds a + connector that gates a tool we were already not exposing. + + Stays stable across: + + * Process restarts (tool names + descriptions are static). + * Different replicas (everyone gets the same hash for the same + inputs). + """ + tool_descriptors = sorted( + (getattr(t, "name", repr(t)), getattr(t, "description", "")) for t in tools + ) + connectors = sorted(available_connectors or []) + doc_types = sorted(available_document_types or []) + return stable_hash(tool_descriptors, connectors, doc_types) + + +def flags_signature(flags: Any) -> str: + """Hash the resolved :class:`AgentFeatureFlags` dataclass. + + Frozen dataclasses are deterministically reprable, so a SHA1 of their + repr is a stable fingerprint. Restart safe (flags are read once at + process boot). + """ + return stable_hash(repr(flags)) + + +def system_prompt_hash(system_prompt: str) -> str: + """Hash a system prompt string. Cheap, ~30µs for typical prompts.""" + return hashlib.sha1( + system_prompt.encode("utf-8", errors="replace"), + usedforsecurity=False, + ).hexdigest() + + +# --------------------------------------------------------------------------- +# Cache implementation +# --------------------------------------------------------------------------- + + +@dataclass +class _Entry: + value: Any + created_at: float + last_used_at: float + + +class _AgentCache: + """In-process TTL-LRU cache with per-key in-flight de-duplication. + + NOT THREAD-SAFE in the multithreading sense — designed for a single + asyncio event loop. Uvicorn runs one event loop per worker process, + so this is fine; multi-worker deployments simply each maintain their + own cache. + """ + + def __init__(self, *, maxsize: int, ttl_seconds: float) -> None: + self._maxsize = maxsize + self._ttl = ttl_seconds + self._entries: OrderedDict[str, _Entry] = OrderedDict() + # One lock per key — guards "build" so concurrent cold misses on + # the same key wait for the first build instead of all racing. + self._locks: dict[str, asyncio.Lock] = {} + + def _now(self) -> float: + return time.monotonic() + + def _is_fresh(self, entry: _Entry) -> bool: + return (self._now() - entry.created_at) < self._ttl + + def _evict_if_full(self) -> None: + while len(self._entries) >= self._maxsize: + evicted_key, _ = self._entries.popitem(last=False) + self._locks.pop(evicted_key, None) + _perf_log.info( + "[agent_cache] evict key=%s reason=lru size=%d", + _short(evicted_key), + len(self._entries), + ) + + def _touch(self, key: str, entry: _Entry) -> None: + entry.last_used_at = self._now() + self._entries.move_to_end(key, last=True) + + async def get_or_build( + self, + key: str, + *, + builder: Callable[[], Awaitable[Any]], + ) -> Any: + """Return the cached value for ``key`` or call ``builder()`` to make it. + + ``builder`` MUST be idempotent — concurrent cold misses on the + same key collapse to a single ``builder()`` call (the others + wait on the in-flight lock and observe the populated entry on + wake). + """ + # Fast path: hot hit. + entry = self._entries.get(key) + if entry is not None and self._is_fresh(entry): + self._touch(key, entry) + _perf_log.info( + "[agent_cache] hit key=%s age=%.1fs size=%d", + _short(key), + self._now() - entry.created_at, + len(self._entries), + ) + return entry.value + + # Stale entry — drop it; rebuild below. + if entry is not None and not self._is_fresh(entry): + _perf_log.info( + "[agent_cache] stale key=%s age=%.1fs ttl=%.0fs", + _short(key), + self._now() - entry.created_at, + self._ttl, + ) + self._entries.pop(key, None) + + # Slow path: serialize concurrent misses for the same key. + lock = self._locks.setdefault(key, asyncio.Lock()) + async with lock: + # Double-check after acquiring the lock — another waiter may + # have populated the entry while we slept. + entry = self._entries.get(key) + if entry is not None and self._is_fresh(entry): + self._touch(key, entry) + _perf_log.info( + "[agent_cache] hit key=%s age=%.1fs size=%d coalesced=true", + _short(key), + self._now() - entry.created_at, + len(self._entries), + ) + return entry.value + + t0 = time.perf_counter() + try: + value = await builder() + except BaseException: + # Don't cache failed builds; let the next caller retry. + _perf_log.warning( + "[agent_cache] build_failed key=%s elapsed=%.3fs", + _short(key), + time.perf_counter() - t0, + ) + raise + elapsed = time.perf_counter() - t0 + + # Insert + evict. + self._evict_if_full() + now = self._now() + self._entries[key] = _Entry(value=value, created_at=now, last_used_at=now) + self._entries.move_to_end(key, last=True) + _perf_log.info( + "[agent_cache] miss key=%s build=%.3fs size=%d", + _short(key), + elapsed, + len(self._entries), + ) + return value + + def invalidate(self, key: str) -> bool: + """Drop a single entry; return True if anything was removed.""" + removed = self._entries.pop(key, None) is not None + self._locks.pop(key, None) + if removed: + _perf_log.info( + "[agent_cache] invalidate key=%s size=%d", + _short(key), + len(self._entries), + ) + return removed + + def invalidate_prefix(self, prefix: str) -> int: + """Drop every entry whose key starts with ``prefix``. Returns count.""" + keys = [k for k in self._entries if k.startswith(prefix)] + for k in keys: + self._entries.pop(k, None) + self._locks.pop(k, None) + if keys: + _perf_log.info( + "[agent_cache] invalidate_prefix prefix=%s removed=%d size=%d", + _short(prefix), + len(keys), + len(self._entries), + ) + return len(keys) + + def clear(self) -> None: + n = len(self._entries) + self._entries.clear() + self._locks.clear() + if n: + _perf_log.info("[agent_cache] clear removed=%d", n) + + def stats(self) -> dict[str, Any]: + return { + "size": len(self._entries), + "maxsize": self._maxsize, + "ttl_seconds": self._ttl, + } + + +def _short(key: str, n: int = 16) -> str: + """Truncate keys for log lines so they don't blow up log volume.""" + return key if len(key) <= n else f"{key[:n]}..." + + +# --------------------------------------------------------------------------- +# Module-level singleton +# --------------------------------------------------------------------------- + +_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256")) +_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800")) + +_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL) + + +def get_cache() -> _AgentCache: + """Return the process-wide compiled-agent cache singleton.""" + return _cache + + +def reload_for_tests(*, maxsize: int = 256, ttl_seconds: float = 1800.0) -> _AgentCache: + """Replace the singleton with a fresh cache. Tests only.""" + global _cache + _cache = _AgentCache(maxsize=maxsize, ttl_seconds=ttl_seconds) + return _cache + + +__all__ = [ + "flags_signature", + "get_cache", + "reload_for_tests", + "stable_hash", + "system_prompt_hash", + "tools_signature", +] diff --git a/surfsense_backend/app/agents/shared/connector_searchable_types.py b/surfsense_backend/app/agents/shared/connector_searchable_types.py new file mode 100644 index 000000000..be193be04 --- /dev/null +++ b/surfsense_backend/app/agents/shared/connector_searchable_types.py @@ -0,0 +1,100 @@ +"""Map configured connectors to the searchable document/connector types. + +This is agent-agnostic infrastructure shared by every agent factory (single- +and multi-agent). It translates the connectors a search space has enabled into +the set of searchable type strings that pre-search middleware and ``web_search`` +understand, and always layers in the document types that exist independently of +any connector (uploads, notes, extension captures, YouTube). + +It lives in its own module — rather than inside a specific agent factory — so +that retiring or moving any single agent never disturbs the others' access to +this mapping. +""" + +from __future__ import annotations + +from typing import Any + +# Maps SearchSourceConnectorType enum values to the searchable document/connector types +# used by pre-search middleware and web_search. +# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to +# the web_search tool; all others are considered local/indexed data. +_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = { + # Live search connectors (handled by web_search tool) + "TAVILY_API": "TAVILY_API", + "LINKUP_API": "LINKUP_API", + "BAIDU_SEARCH_API": "BAIDU_SEARCH_API", + # Local/indexed connectors (handled by KB pre-search middleware) + "SLACK_CONNECTOR": "SLACK_CONNECTOR", + "TEAMS_CONNECTOR": "TEAMS_CONNECTOR", + "NOTION_CONNECTOR": "NOTION_CONNECTOR", + "GITHUB_CONNECTOR": "GITHUB_CONNECTOR", + "LINEAR_CONNECTOR": "LINEAR_CONNECTOR", + "DISCORD_CONNECTOR": "DISCORD_CONNECTOR", + "JIRA_CONNECTOR": "JIRA_CONNECTOR", + "CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR", + "CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR", + "GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR", + "GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR", + "GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type + "AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR", + "LUMA_CONNECTOR": "LUMA_CONNECTOR", + "ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR", + "WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type + "BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR", + "CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type + "OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR", + "DROPBOX_CONNECTOR": "DROPBOX_FILE", # Connector type differs from document type + "ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type + # Composio connectors (unified to native document types). + # Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db. + "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", + "COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR", + "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR", +} + +# Document types that don't come from SearchSourceConnector but should always be searchable +_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [ + "EXTENSION", # Browser extension data + "FILE", # Uploaded files + "NOTE", # User notes + "YOUTUBE_VIDEO", # YouTube videos +] + + +def map_connectors_to_searchable_types( + connector_types: list[Any], +) -> list[str]: + """ + Map SearchSourceConnectorType enums to searchable document/connector types. + + This function: + 1. Converts connector type enums to their searchable counterparts + 2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO) + 3. Deduplicates while preserving order + + Args: + connector_types: List of SearchSourceConnectorType enum values + + Returns: + List of searchable connector/document type strings + """ + result_set: set[str] = set() + result_list: list[str] = [] + + # Add always-available document types first + for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES: + if doc_type not in result_set: + result_set.add(doc_type) + result_list.append(doc_type) + + # Map each connector type to its searchable equivalent + for ct in connector_types: + # Handle both enum and string types + ct_str = ct.value if hasattr(ct, "value") else str(ct) + searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str) + if searchable and searchable not in result_set: + result_set.add(searchable) + result_list.append(searchable) + + return result_list diff --git a/surfsense_backend/app/agents/new_chat/prompts/__init__.py b/surfsense_backend/app/agents/shared/prompts/__init__.py similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/__init__.py rename to surfsense_backend/app/agents/shared/prompts/__init__.py diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/__init__.py b/surfsense_backend/app/agents/shared/prompts/base/__init__.py similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/__init__.py rename to surfsense_backend/app/agents/shared/prompts/base/__init__.py diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/agent_private.md b/surfsense_backend/app/agents/shared/prompts/base/agent_private.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/agent_private.md rename to surfsense_backend/app/agents/shared/prompts/base/agent_private.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/agent_team.md b/surfsense_backend/app/agents/shared/prompts/base/agent_team.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/agent_team.md rename to surfsense_backend/app/agents/shared/prompts/base/agent_team.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/citations_off.md b/surfsense_backend/app/agents/shared/prompts/base/citations_off.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/citations_off.md rename to surfsense_backend/app/agents/shared/prompts/base/citations_off.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md b/surfsense_backend/app/agents/shared/prompts/base/citations_on.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/citations_on.md rename to surfsense_backend/app/agents/shared/prompts/base/citations_on.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md b/surfsense_backend/app/agents/shared/prompts/base/kb_only_policy_private.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_private.md rename to surfsense_backend/app/agents/shared/prompts/base/kb_only_policy_private.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md b/surfsense_backend/app/agents/shared/prompts/base/kb_only_policy_team.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/kb_only_policy_team.md rename to surfsense_backend/app/agents/shared/prompts/base/kb_only_policy_team.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md b/surfsense_backend/app/agents/shared/prompts/base/memory_protocol_private.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md rename to surfsense_backend/app/agents/shared/prompts/base/memory_protocol_private.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md b/surfsense_backend/app/agents/shared/prompts/base/memory_protocol_team.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md rename to surfsense_backend/app/agents/shared/prompts/base/memory_protocol_team.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/parameter_resolution.md b/surfsense_backend/app/agents/shared/prompts/base/parameter_resolution.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/parameter_resolution.md rename to surfsense_backend/app/agents/shared/prompts/base/parameter_resolution.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md b/surfsense_backend/app/agents/shared/prompts/base/tool_routing_private.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_private.md rename to surfsense_backend/app/agents/shared/prompts/base/tool_routing_private.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md b/surfsense_backend/app/agents/shared/prompts/base/tool_routing_team.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/base/tool_routing_team.md rename to surfsense_backend/app/agents/shared/prompts/base/tool_routing_team.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/composer.py b/surfsense_backend/app/agents/shared/prompts/composer.py similarity index 99% rename from surfsense_backend/app/agents/new_chat/prompts/composer.py rename to surfsense_backend/app/agents/shared/prompts/composer.py index 412665813..26640a864 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/composer.py +++ b/surfsense_backend/app/agents/shared/prompts/composer.py @@ -2,7 +2,7 @@ Prompt composer for the SurfSense ``new_chat`` agent. This module assembles the agent's system prompt from the markdown fragments -under :mod:`app.agents.new_chat.prompts`. It replaces the monolithic +under :mod:`app.agents.shared.prompts`. It replaces the monolithic ``system_prompt.py`` with a clean, fragment-based composition: :: @@ -119,7 +119,7 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant: # ----------------------------------------------------------------------------- -_PROMPTS_PACKAGE = "app.agents.new_chat.prompts" +_PROMPTS_PACKAGE = "app.agents.shared.prompts" def _read_fragment(subpath: str) -> str: diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/__init__.py b/surfsense_backend/app/agents/shared/prompts/examples/__init__.py similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/examples/__init__.py rename to surfsense_backend/app/agents/shared/prompts/examples/__init__.py diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/generate_image.md b/surfsense_backend/app/agents/shared/prompts/examples/generate_image.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/examples/generate_image.md rename to surfsense_backend/app/agents/shared/prompts/examples/generate_image.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/generate_podcast.md b/surfsense_backend/app/agents/shared/prompts/examples/generate_podcast.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/examples/generate_podcast.md rename to surfsense_backend/app/agents/shared/prompts/examples/generate_podcast.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/generate_report.md b/surfsense_backend/app/agents/shared/prompts/examples/generate_report.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/examples/generate_report.md rename to surfsense_backend/app/agents/shared/prompts/examples/generate_report.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/generate_resume.md b/surfsense_backend/app/agents/shared/prompts/examples/generate_resume.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/examples/generate_resume.md rename to surfsense_backend/app/agents/shared/prompts/examples/generate_resume.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/generate_video_presentation.md b/surfsense_backend/app/agents/shared/prompts/examples/generate_video_presentation.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/examples/generate_video_presentation.md rename to surfsense_backend/app/agents/shared/prompts/examples/generate_video_presentation.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/scrape_webpage.md b/surfsense_backend/app/agents/shared/prompts/examples/scrape_webpage.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/examples/scrape_webpage.md rename to surfsense_backend/app/agents/shared/prompts/examples/scrape_webpage.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md b/surfsense_backend/app/agents/shared/prompts/examples/update_memory_private.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md rename to surfsense_backend/app/agents/shared/prompts/examples/update_memory_private.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md b/surfsense_backend/app/agents/shared/prompts/examples/update_memory_team.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md rename to surfsense_backend/app/agents/shared/prompts/examples/update_memory_team.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/web_search.md b/surfsense_backend/app/agents/shared/prompts/examples/web_search.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/examples/web_search.md rename to surfsense_backend/app/agents/shared/prompts/examples/web_search.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/__init__.py b/surfsense_backend/app/agents/shared/prompts/providers/__init__.py similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/providers/__init__.py rename to surfsense_backend/app/agents/shared/prompts/providers/__init__.py diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md b/surfsense_backend/app/agents/shared/prompts/providers/anthropic.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/providers/anthropic.md rename to surfsense_backend/app/agents/shared/prompts/providers/anthropic.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md b/surfsense_backend/app/agents/shared/prompts/providers/deepseek.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/providers/deepseek.md rename to surfsense_backend/app/agents/shared/prompts/providers/deepseek.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/default.md b/surfsense_backend/app/agents/shared/prompts/providers/default.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/providers/default.md rename to surfsense_backend/app/agents/shared/prompts/providers/default.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/google.md b/surfsense_backend/app/agents/shared/prompts/providers/google.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/providers/google.md rename to surfsense_backend/app/agents/shared/prompts/providers/google.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/grok.md b/surfsense_backend/app/agents/shared/prompts/providers/grok.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/providers/grok.md rename to surfsense_backend/app/agents/shared/prompts/providers/grok.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md b/surfsense_backend/app/agents/shared/prompts/providers/kimi.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/providers/kimi.md rename to surfsense_backend/app/agents/shared/prompts/providers/kimi.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md b/surfsense_backend/app/agents/shared/prompts/providers/openai_classic.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/providers/openai_classic.md rename to surfsense_backend/app/agents/shared/prompts/providers/openai_classic.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md b/surfsense_backend/app/agents/shared/prompts/providers/openai_codex.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/providers/openai_codex.md rename to surfsense_backend/app/agents/shared/prompts/providers/openai_codex.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md b/surfsense_backend/app/agents/shared/prompts/providers/openai_reasoning.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/providers/openai_reasoning.md rename to surfsense_backend/app/agents/shared/prompts/providers/openai_reasoning.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/routing/__init__.py b/surfsense_backend/app/agents/shared/prompts/routing/__init__.py similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/routing/__init__.py rename to surfsense_backend/app/agents/shared/prompts/routing/__init__.py diff --git a/surfsense_backend/app/agents/new_chat/prompts/routing/jira.md b/surfsense_backend/app/agents/shared/prompts/routing/jira.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/routing/jira.md rename to surfsense_backend/app/agents/shared/prompts/routing/jira.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/routing/linear.md b/surfsense_backend/app/agents/shared/prompts/routing/linear.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/routing/linear.md rename to surfsense_backend/app/agents/shared/prompts/routing/linear.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/routing/slack.md b/surfsense_backend/app/agents/shared/prompts/routing/slack.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/routing/slack.md rename to surfsense_backend/app/agents/shared/prompts/routing/slack.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/__init__.py b/surfsense_backend/app/agents/shared/prompts/tools/__init__.py similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/__init__.py rename to surfsense_backend/app/agents/shared/prompts/tools/__init__.py diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/_preamble.md b/surfsense_backend/app/agents/shared/prompts/tools/_preamble.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/_preamble.md rename to surfsense_backend/app/agents/shared/prompts/tools/_preamble.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/generate_image.md b/surfsense_backend/app/agents/shared/prompts/tools/generate_image.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/generate_image.md rename to surfsense_backend/app/agents/shared/prompts/tools/generate_image.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/generate_podcast.md b/surfsense_backend/app/agents/shared/prompts/tools/generate_podcast.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/generate_podcast.md rename to surfsense_backend/app/agents/shared/prompts/tools/generate_podcast.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/generate_report.md b/surfsense_backend/app/agents/shared/prompts/tools/generate_report.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/generate_report.md rename to surfsense_backend/app/agents/shared/prompts/tools/generate_report.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/generate_resume.md b/surfsense_backend/app/agents/shared/prompts/tools/generate_resume.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/generate_resume.md rename to surfsense_backend/app/agents/shared/prompts/tools/generate_resume.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/generate_video_presentation.md b/surfsense_backend/app/agents/shared/prompts/tools/generate_video_presentation.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/generate_video_presentation.md rename to surfsense_backend/app/agents/shared/prompts/tools/generate_video_presentation.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/scrape_webpage.md b/surfsense_backend/app/agents/shared/prompts/tools/scrape_webpage.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/scrape_webpage.md rename to surfsense_backend/app/agents/shared/prompts/tools/scrape_webpage.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md b/surfsense_backend/app/agents/shared/prompts/tools/update_memory_private.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md rename to surfsense_backend/app/agents/shared/prompts/tools/update_memory_private.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md b/surfsense_backend/app/agents/shared/prompts/tools/update_memory_team.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md rename to surfsense_backend/app/agents/shared/prompts/tools/update_memory_team.md diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/web_search.md b/surfsense_backend/app/agents/shared/prompts/tools/web_search.md similarity index 100% rename from surfsense_backend/app/agents/new_chat/prompts/tools/web_search.md rename to surfsense_backend/app/agents/shared/prompts/tools/web_search.md diff --git a/surfsense_backend/app/agents/shared/system_prompt.py b/surfsense_backend/app/agents/shared/system_prompt.py new file mode 100644 index 000000000..ea717e74d --- /dev/null +++ b/surfsense_backend/app/agents/shared/system_prompt.py @@ -0,0 +1,133 @@ +""" +Thin compatibility wrapper around :mod:`app.agents.shared.prompts.composer`. + +The composer split the previous monolithic prompt string into a fragment +tree under ``prompts/`` plus a model-family dispatch step (see the +composer module docstring for credits). This module preserves the public +function surface (``build_surfsense_system_prompt`` / +``build_configurable_system_prompt`` / +``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so +that existing call sites — `chat_deepagent.py`, anonymous chat routes, +and the configurable-prompt admin path — keep working without churn. + +For new call sites prefer importing ``compose_system_prompt`` directly +from :mod:`app.agents.shared.prompts.composer`. +""" + +from __future__ import annotations + +from datetime import UTC, datetime + +from app.db import ChatVisibility + +from .prompts.composer import ( + _read_fragment, + compose_system_prompt, + detect_provider_variant, +) + +# Optional routing fragments under ``prompts/routing/`` (see composer). +_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack") + +# Public re-exports for backwards compatibility (some legacy code reads the +# raw default-instructions text directly). +SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = ( + "\nDefault SurfSense agent system instructions are now\n" + "composed from prompts/base/*.md. See compose_system_prompt() for details.\n" + "" +) + +# Citation block re-exposed for legacy importers that referenced this constant +# directly. The composer is the canonical source; this is a frozen snapshot +# loaded at module-init time. +SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md") +SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md") + + +def build_surfsense_system_prompt( + today: datetime | None = None, + thread_visibility: ChatVisibility | None = None, + enabled_tool_names: set[str] | None = None, + disabled_tool_names: set[str] | None = None, + mcp_connector_tools: dict[str, list[str]] | None = None, + *, + model_name: str | None = None, +) -> str: + """Build the default SurfSense system prompt (citations on, defaults). + + See :func:`app.agents.shared.prompts.composer.compose_system_prompt` + for full parameter docs. + """ + return compose_system_prompt( + today=today, + thread_visibility=thread_visibility, + enabled_tool_names=enabled_tool_names, + disabled_tool_names=disabled_tool_names, + mcp_connector_tools=mcp_connector_tools, + citations_enabled=True, + model_name=model_name, + connector_routing=_DEFAULT_CONNECTOR_ROUTING, + ) + + +def build_configurable_system_prompt( + custom_system_instructions: str | None = None, + use_default_system_instructions: bool = True, + citations_enabled: bool = True, + today: datetime | None = None, + thread_visibility: ChatVisibility | None = None, + enabled_tool_names: set[str] | None = None, + disabled_tool_names: set[str] | None = None, + mcp_connector_tools: dict[str, list[str]] | None = None, + *, + model_name: str | None = None, +) -> str: + """Build a configurable SurfSense system prompt (NewLLMConfig path). + + See :func:`app.agents.shared.prompts.composer.compose_system_prompt` + for full parameter docs. + """ + return compose_system_prompt( + today=today, + thread_visibility=thread_visibility, + enabled_tool_names=enabled_tool_names, + disabled_tool_names=disabled_tool_names, + mcp_connector_tools=mcp_connector_tools, + custom_system_instructions=custom_system_instructions, + use_default_system_instructions=use_default_system_instructions, + citations_enabled=citations_enabled, + model_name=model_name, + connector_routing=_DEFAULT_CONNECTOR_ROUTING, + ) + + +def get_default_system_instructions() -> str: + """Return the default ```` block (no tools / citations). + + Useful for populating the UI when seeding ``NewLLMConfig.system_instructions``. + The output reflects the current fragment tree, not a baked-in constant. + """ + resolved_today = datetime.now(UTC).date().isoformat() + from .prompts.composer import _build_system_instructions # local import + + return _build_system_instructions( + visibility=ChatVisibility.PRIVATE, + resolved_today=resolved_today, + ).strip() + + +# Backwards compatibility — some modules import the constant directly. +SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt() + + +__all__ = [ + "SURFSENSE_CITATION_INSTRUCTIONS", + "SURFSENSE_NO_CITATION_INSTRUCTIONS", + "SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE", + "SURFSENSE_SYSTEM_PROMPT", + "build_configurable_system_prompt", + "build_surfsense_system_prompt", + "compose_system_prompt", + "detect_provider_variant", + "get_default_system_instructions", +] diff --git a/surfsense_backend/app/routes/new_llm_config_routes.py b/surfsense_backend/app/routes/new_llm_config_routes.py index e090a1a7c..7d9af7b0c 100644 --- a/surfsense_backend/app/routes/new_llm_config_routes.py +++ b/surfsense_backend/app/routes/new_llm_config_routes.py @@ -13,7 +13,7 @@ from fastapi import APIRouter, Depends, HTTPException from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select -from app.agents.new_chat.system_prompt import get_default_system_instructions +from app.agents.shared.system_prompt import get_default_system_instructions from app.config import config from app.db import ( NewLLMConfig, diff --git a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py index 36fe04aa2..e2cfaf610 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py +++ b/surfsense_backend/tests/unit/agents/new_chat/prompts/test_composer.py @@ -6,7 +6,7 @@ from datetime import UTC, datetime import pytest -from app.agents.new_chat.prompts.composer import ( +from app.agents.shared.prompts.composer import ( ALL_TOOL_NAMES_ORDERED, compose_system_prompt, detect_provider_variant, @@ -64,7 +64,7 @@ class TestProviderVariantDetection: ``gpt-5`` reasoning regex first. Codex is the more specialised prompt and mirrors OpenCode's dispatch order. """ - from app.agents.new_chat.prompts.composer import detect_provider_variant + from app.agents.shared.prompts.composer import detect_provider_variant assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex" assert detect_provider_variant("openai:gpt-5") == "openai_reasoning" diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_agent_cache.py b/surfsense_backend/tests/unit/agents/new_chat/test_agent_cache.py index 9b3de2db7..48eb86a2c 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_agent_cache.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_agent_cache.py @@ -16,7 +16,7 @@ from dataclasses import dataclass import pytest -from app.agents.new_chat.agent_cache import ( +from app.agents.shared.agent_cache import ( flags_signature, reload_for_tests, stable_hash,