refactor(agents): move connector_searchable_types, agent_cache, system_prompt + prompts to app/agents/shared (slice 7b)

Three live shared leaves discovered while taking stock after slice 7 (all are
consumed by the multi-agent stack and/or live routes, not single-agent-only):

- connector_searchable_types -> shared + shim (multi-agent factory uses it)
- agent_cache -> shared + shim (multi-agent runtime/agent_cache uses it)
- system_prompt + prompts/ (42 .md fragments) -> shared together + shim.
  Repointed composer's _PROMPTS_PACKAGE to app.agents.shared.prompts so
  importlib.resources fragment loading keeps working; system_prompt's relative
  ".prompts.composer" import is preserved by moving both as a unit.

Each keeps a re-export shim for the frozen chat_deepagent. After this slice,
new_chat/ holds only the frozen single-agent stack (chat_deepagent, subagents/,
__init__) plus shims.
This commit is contained in:
CREDO23 2026-06-04 13:21:45 +02:00
parent 13a96851ef
commit a019f18d1c
60 changed files with 627 additions and 564 deletions

View file

@ -10,7 +10,7 @@ from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from app.agents.new_chat.agent_cache import (
from app.agents.shared.agent_cache import (
flags_signature,
get_cache,
stable_hash,

View file

@ -19,7 +19,7 @@ from app.agents.multi_agent_chat.subagents import (
from app.agents.multi_agent_chat.subagents.mcp_tools.index import (
load_mcp_tools_by_connector,
)
from app.agents.new_chat.connector_searchable_types import (
from app.agents.shared.connector_searchable_types import (
map_connectors_to_searchable_types,
)
from app.agents.shared.feature_flags import AgentFeatureFlags, get_flags

View file

@ -1,351 +1,17 @@
"""TTL-LRU cache for compiled SurfSense deep agents.
"""Backward-compatible shim.
Why this exists
---------------
``create_surfsense_deep_agent`` runs a 4-5 second pipeline on EVERY chat
turn:
1. Discover connectors & document types from Postgres (~50-200ms)
2. Build the tool list (built-in + MCP) (~200ms-1.7s)
3. Compose the system prompt
4. Construct ~15 middleware instances (CPU)
5. Eagerly compile the general-purpose subagent
(``SubAgentMiddleware.__init__`` calls ``create_agent`` synchronously,
which builds a second LangGraph + Pydantic schemas ~1.5-2s of pure
CPU work)
6. Compile the outer LangGraph
For a single thread, all six steps produce the SAME object on every turn
unless the user has changed their LLM config, toggled a feature flag,
added a connector, etc. The right answer is to compile ONCE per
"agent shape" and reuse the resulting :class:`CompiledStateGraph` for
every subsequent turn on the same thread.
Why a per-thread key (not a global pool)
----------------------------------------
Most middleware in the SurfSense stack captures per-thread state in
``__init__`` closures (``thread_id``, ``user_id``, ``search_space_id``,
``filesystem_mode``, ``mentioned_document_ids``). Cross-thread reuse
would silently leak state across users and threads. Keying the cache on
``(llm_config_id, thread_id, ...)`` gives us safe reuse for repeated
turns on the same thread without changing any middleware's behavior.
Phase 2 will move those captured fields onto :class:`SurfSenseContextSchema`
(read via ``runtime.context``) so the cache can collapse to a single
``(llm_config_id, search_space_id, ...)`` key shared across threads. Until
then, per-thread keying is the only safe option.
Cache shape
-----------
* TTL-LRU: entries auto-expire after ``ttl_seconds`` (default 1800s, 30
minutes matches a typical chat session). ``maxsize`` (default 256)
caps memory; LRU evicts least-recently-used on overflow.
* In-flight de-duplication: per-key :class:`asyncio.Lock` so concurrent
cold misses on the same key wait for the first build instead of
building N times.
* Process-local: this is an in-memory cache. Multi-replica deployments
pay the build cost once per replica per key. That's fine; the working
set per replica is small (one entry per active thread on that replica).
Telemetry
---------
Every lookup logs ``[agent_cache]`` lines through ``surfsense.perf``:
* ``hit`` cache hit, microseconds-fast
* ``miss`` first build for this key, includes build duration
* ``stale`` entry was found but expired; rebuilt
* ``evict`` LRU eviction (size-limited)
* ``size`` current cache occupancy at lookup time
Moved to ``app.agents.shared.agent_cache``. Re-exported here for the frozen
single-agent stack (``chat_deepagent``) until that stack is retired.
"""
from __future__ import annotations
import asyncio
import hashlib
import logging
import os
import time
from collections import OrderedDict
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import Any
from app.utils.perf import get_perf_logger
logger = logging.getLogger(__name__)
_perf_log = get_perf_logger()
# ---------------------------------------------------------------------------
# Public API: signature helpers (cache key components)
# ---------------------------------------------------------------------------
def stable_hash(*parts: Any) -> str:
"""Compute a deterministic SHA1 of the str repr of ``parts``.
Used for cache key components that need a fixed-width representation
(system prompt, tool list, etc.). SHA1 is fine here this is not a
security boundary, just a content fingerprint.
"""
h = hashlib.sha1(usedforsecurity=False)
for p in parts:
h.update(repr(p).encode("utf-8", errors="replace"))
h.update(b"\x1f") # ASCII unit separator between parts
return h.hexdigest()
def tools_signature(
tools: list[Any] | tuple[Any, ...],
*,
available_connectors: list[str] | None,
available_document_types: list[str] | None,
) -> str:
"""Hash the bound-tool surface for cache-key purposes.
The signature changes whenever:
* A tool is added or removed from the bound list (built-in toggles,
MCP tools loaded for the user changes, gating rules flip, etc.).
* The available connectors / document types for the search space
change (new connector added, last connector removed, new document
type indexed). Because :func:`get_connector_gated_tools` derives
``modified_disabled_tools`` from ``available_connectors``, the
tool surface is technically already covered but we hash the
connector list separately so an empty-list "no tools changed"
situation still rotates the key when, say, the user re-adds a
connector that gates a tool we were already not exposing.
Stays stable across:
* Process restarts (tool names + descriptions are static).
* Different replicas (everyone gets the same hash for the same
inputs).
"""
tool_descriptors = sorted(
(getattr(t, "name", repr(t)), getattr(t, "description", "")) for t in tools
)
connectors = sorted(available_connectors or [])
doc_types = sorted(available_document_types or [])
return stable_hash(tool_descriptors, connectors, doc_types)
def flags_signature(flags: Any) -> str:
"""Hash the resolved :class:`AgentFeatureFlags` dataclass.
Frozen dataclasses are deterministically reprable, so a SHA1 of their
repr is a stable fingerprint. Restart safe (flags are read once at
process boot).
"""
return stable_hash(repr(flags))
def system_prompt_hash(system_prompt: str) -> str:
"""Hash a system prompt string. Cheap, ~30µs for typical prompts."""
return hashlib.sha1(
system_prompt.encode("utf-8", errors="replace"),
usedforsecurity=False,
).hexdigest()
# ---------------------------------------------------------------------------
# Cache implementation
# ---------------------------------------------------------------------------
@dataclass
class _Entry:
value: Any
created_at: float
last_used_at: float
class _AgentCache:
"""In-process TTL-LRU cache with per-key in-flight de-duplication.
NOT THREAD-SAFE in the multithreading sense designed for a single
asyncio event loop. Uvicorn runs one event loop per worker process,
so this is fine; multi-worker deployments simply each maintain their
own cache.
"""
def __init__(self, *, maxsize: int, ttl_seconds: float) -> None:
self._maxsize = maxsize
self._ttl = ttl_seconds
self._entries: OrderedDict[str, _Entry] = OrderedDict()
# One lock per key — guards "build" so concurrent cold misses on
# the same key wait for the first build instead of all racing.
self._locks: dict[str, asyncio.Lock] = {}
def _now(self) -> float:
return time.monotonic()
def _is_fresh(self, entry: _Entry) -> bool:
return (self._now() - entry.created_at) < self._ttl
def _evict_if_full(self) -> None:
while len(self._entries) >= self._maxsize:
evicted_key, _ = self._entries.popitem(last=False)
self._locks.pop(evicted_key, None)
_perf_log.info(
"[agent_cache] evict key=%s reason=lru size=%d",
_short(evicted_key),
len(self._entries),
)
def _touch(self, key: str, entry: _Entry) -> None:
entry.last_used_at = self._now()
self._entries.move_to_end(key, last=True)
async def get_or_build(
self,
key: str,
*,
builder: Callable[[], Awaitable[Any]],
) -> Any:
"""Return the cached value for ``key`` or call ``builder()`` to make it.
``builder`` MUST be idempotent concurrent cold misses on the
same key collapse to a single ``builder()`` call (the others
wait on the in-flight lock and observe the populated entry on
wake).
"""
# Fast path: hot hit.
entry = self._entries.get(key)
if entry is not None and self._is_fresh(entry):
self._touch(key, entry)
_perf_log.info(
"[agent_cache] hit key=%s age=%.1fs size=%d",
_short(key),
self._now() - entry.created_at,
len(self._entries),
)
return entry.value
# Stale entry — drop it; rebuild below.
if entry is not None and not self._is_fresh(entry):
_perf_log.info(
"[agent_cache] stale key=%s age=%.1fs ttl=%.0fs",
_short(key),
self._now() - entry.created_at,
self._ttl,
)
self._entries.pop(key, None)
# Slow path: serialize concurrent misses for the same key.
lock = self._locks.setdefault(key, asyncio.Lock())
async with lock:
# Double-check after acquiring the lock — another waiter may
# have populated the entry while we slept.
entry = self._entries.get(key)
if entry is not None and self._is_fresh(entry):
self._touch(key, entry)
_perf_log.info(
"[agent_cache] hit key=%s age=%.1fs size=%d coalesced=true",
_short(key),
self._now() - entry.created_at,
len(self._entries),
)
return entry.value
t0 = time.perf_counter()
try:
value = await builder()
except BaseException:
# Don't cache failed builds; let the next caller retry.
_perf_log.warning(
"[agent_cache] build_failed key=%s elapsed=%.3fs",
_short(key),
time.perf_counter() - t0,
)
raise
elapsed = time.perf_counter() - t0
# Insert + evict.
self._evict_if_full()
now = self._now()
self._entries[key] = _Entry(value=value, created_at=now, last_used_at=now)
self._entries.move_to_end(key, last=True)
_perf_log.info(
"[agent_cache] miss key=%s build=%.3fs size=%d",
_short(key),
elapsed,
len(self._entries),
)
return value
def invalidate(self, key: str) -> bool:
"""Drop a single entry; return True if anything was removed."""
removed = self._entries.pop(key, None) is not None
self._locks.pop(key, None)
if removed:
_perf_log.info(
"[agent_cache] invalidate key=%s size=%d",
_short(key),
len(self._entries),
)
return removed
def invalidate_prefix(self, prefix: str) -> int:
"""Drop every entry whose key starts with ``prefix``. Returns count."""
keys = [k for k in self._entries if k.startswith(prefix)]
for k in keys:
self._entries.pop(k, None)
self._locks.pop(k, None)
if keys:
_perf_log.info(
"[agent_cache] invalidate_prefix prefix=%s removed=%d size=%d",
_short(prefix),
len(keys),
len(self._entries),
)
return len(keys)
def clear(self) -> None:
n = len(self._entries)
self._entries.clear()
self._locks.clear()
if n:
_perf_log.info("[agent_cache] clear removed=%d", n)
def stats(self) -> dict[str, Any]:
return {
"size": len(self._entries),
"maxsize": self._maxsize,
"ttl_seconds": self._ttl,
}
def _short(key: str, n: int = 16) -> str:
"""Truncate keys for log lines so they don't blow up log volume."""
return key if len(key) <= n else f"{key[:n]}..."
# ---------------------------------------------------------------------------
# Module-level singleton
# ---------------------------------------------------------------------------
_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)
def get_cache() -> _AgentCache:
"""Return the process-wide compiled-agent cache singleton."""
return _cache
def reload_for_tests(*, maxsize: int = 256, ttl_seconds: float = 1800.0) -> _AgentCache:
"""Replace the singleton with a fresh cache. Tests only."""
global _cache
_cache = _AgentCache(maxsize=maxsize, ttl_seconds=ttl_seconds)
return _cache
from app.agents.shared.agent_cache import (
flags_signature,
get_cache,
reload_for_tests,
stable_hash,
system_prompt_hash,
tools_signature,
)
__all__ = [
"flags_signature",

View file

@ -1,100 +1,11 @@
"""Map configured connectors to the searchable document/connector types.
"""Backward-compatible shim.
This is agent-agnostic infrastructure shared by every agent factory (single-
and multi-agent). It translates the connectors a search space has enabled into
the set of searchable type strings that pre-search middleware and ``web_search``
understand, and always layers in the document types that exist independently of
any connector (uploads, notes, extension captures, YouTube).
It lives in its own module rather than inside a specific agent factory so
that retiring or moving any single agent never disturbs the others' access to
this mapping.
Moved to ``app.agents.shared.connector_searchable_types``. Re-exported here for
the frozen single-agent stack (``chat_deepagent``) until that stack is retired.
"""
from __future__ import annotations
from app.agents.shared.connector_searchable_types import (
map_connectors_to_searchable_types,
)
from typing import Any
# Maps SearchSourceConnectorType enum values to the searchable document/connector types
# used by pre-search middleware and web_search.
# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
# the web_search tool; all others are considered local/indexed data.
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
# Live search connectors (handled by web_search tool)
"TAVILY_API": "TAVILY_API",
"LINKUP_API": "LINKUP_API",
"BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
# Local/indexed connectors (handled by KB pre-search middleware)
"SLACK_CONNECTOR": "SLACK_CONNECTOR",
"TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
"NOTION_CONNECTOR": "NOTION_CONNECTOR",
"GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
"LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
"DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
"JIRA_CONNECTOR": "JIRA_CONNECTOR",
"CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
"CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
"GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
"GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
"GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
"AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
"LUMA_CONNECTOR": "LUMA_CONNECTOR",
"ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
"WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
"BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
"CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
"OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
"DROPBOX_CONNECTOR": "DROPBOX_FILE", # Connector type differs from document type
"ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type
# Composio connectors (unified to native document types).
# Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
"COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
}
# Document types that don't come from SearchSourceConnector but should always be searchable
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
"EXTENSION", # Browser extension data
"FILE", # Uploaded files
"NOTE", # User notes
"YOUTUBE_VIDEO", # YouTube videos
]
def map_connectors_to_searchable_types(
connector_types: list[Any],
) -> list[str]:
"""
Map SearchSourceConnectorType enums to searchable document/connector types.
This function:
1. Converts connector type enums to their searchable counterparts
2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
3. Deduplicates while preserving order
Args:
connector_types: List of SearchSourceConnectorType enum values
Returns:
List of searchable connector/document type strings
"""
result_set: set[str] = set()
result_list: list[str] = []
# Add always-available document types first
for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
if doc_type not in result_set:
result_set.add(doc_type)
result_list.append(doc_type)
# Map each connector type to its searchable equivalent
for ct in connector_types:
# Handle both enum and string types
ct_str = ct.value if hasattr(ct, "value") else str(ct)
searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
if searchable and searchable not in result_set:
result_set.add(searchable)
result_list.append(searchable)
return result_list
__all__ = ["map_connectors_to_searchable_types"]

View file

@ -1,125 +1,21 @@
"""
Thin compatibility wrapper around :mod:`app.agents.new_chat.prompts.composer`.
"""Backward-compatible shim.
The composer split the previous monolithic prompt string into a fragment
tree under ``prompts/`` plus a model-family dispatch step (see the
composer module docstring for credits). This module preserves the public
function surface (``build_surfsense_system_prompt`` /
``build_configurable_system_prompt`` /
``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
that existing call sites `chat_deepagent.py`, anonymous chat routes,
and the configurable-prompt admin path keep working without churn.
For new call sites prefer importing ``compose_system_prompt`` directly
from :mod:`app.agents.new_chat.prompts.composer`.
Moved to ``app.agents.shared.system_prompt``. Re-exported here for the frozen
single-agent stack (``chat_deepagent``) until that stack is retired.
"""
from __future__ import annotations
from datetime import UTC, datetime
from app.db import ChatVisibility
from .prompts.composer import (
_read_fragment,
from app.agents.shared.system_prompt import (
SURFSENSE_CITATION_INSTRUCTIONS,
SURFSENSE_NO_CITATION_INSTRUCTIONS,
SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE,
SURFSENSE_SYSTEM_PROMPT,
build_configurable_system_prompt,
build_surfsense_system_prompt,
compose_system_prompt,
detect_provider_variant,
get_default_system_instructions,
)
# Optional routing fragments under ``prompts/routing/`` (see composer).
_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack")
# Public re-exports for backwards compatibility (some legacy code reads the
# raw default-instructions text directly).
SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = (
"<system_instruction>\nDefault SurfSense agent system instructions are now\n"
"composed from prompts/base/*.md. See compose_system_prompt() for details.\n"
"</system_instruction>"
)
# Citation block re-exposed for legacy importers that referenced this constant
# directly. The composer is the canonical source; this is a frozen snapshot
# loaded at module-init time.
SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md")
SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md")
def build_surfsense_system_prompt(
today: datetime | None = None,
thread_visibility: ChatVisibility | None = None,
enabled_tool_names: set[str] | None = None,
disabled_tool_names: set[str] | None = None,
mcp_connector_tools: dict[str, list[str]] | None = None,
*,
model_name: str | None = None,
) -> str:
"""Build the default SurfSense system prompt (citations on, defaults).
See :func:`app.agents.new_chat.prompts.composer.compose_system_prompt`
for full parameter docs.
"""
return compose_system_prompt(
today=today,
thread_visibility=thread_visibility,
enabled_tool_names=enabled_tool_names,
disabled_tool_names=disabled_tool_names,
mcp_connector_tools=mcp_connector_tools,
citations_enabled=True,
model_name=model_name,
connector_routing=_DEFAULT_CONNECTOR_ROUTING,
)
def build_configurable_system_prompt(
custom_system_instructions: str | None = None,
use_default_system_instructions: bool = True,
citations_enabled: bool = True,
today: datetime | None = None,
thread_visibility: ChatVisibility | None = None,
enabled_tool_names: set[str] | None = None,
disabled_tool_names: set[str] | None = None,
mcp_connector_tools: dict[str, list[str]] | None = None,
*,
model_name: str | None = None,
) -> str:
"""Build a configurable SurfSense system prompt (NewLLMConfig path).
See :func:`app.agents.new_chat.prompts.composer.compose_system_prompt`
for full parameter docs.
"""
return compose_system_prompt(
today=today,
thread_visibility=thread_visibility,
enabled_tool_names=enabled_tool_names,
disabled_tool_names=disabled_tool_names,
mcp_connector_tools=mcp_connector_tools,
custom_system_instructions=custom_system_instructions,
use_default_system_instructions=use_default_system_instructions,
citations_enabled=citations_enabled,
model_name=model_name,
connector_routing=_DEFAULT_CONNECTOR_ROUTING,
)
def get_default_system_instructions() -> str:
"""Return the default ``<system_instruction>`` block (no tools / citations).
Useful for populating the UI when seeding ``NewLLMConfig.system_instructions``.
The output reflects the current fragment tree, not a baked-in constant.
"""
resolved_today = datetime.now(UTC).date().isoformat()
from .prompts.composer import _build_system_instructions # local import
return _build_system_instructions(
visibility=ChatVisibility.PRIVATE,
resolved_today=resolved_today,
).strip()
# Backwards compatibility — some modules import the constant directly.
SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt()
__all__ = [
"SURFSENSE_CITATION_INSTRUCTIONS",
"SURFSENSE_NO_CITATION_INSTRUCTIONS",

View file

@ -0,0 +1,357 @@
"""TTL-LRU cache for compiled SurfSense deep agents.
Why this exists
---------------
``create_surfsense_deep_agent`` runs a 4-5 second pipeline on EVERY chat
turn:
1. Discover connectors & document types from Postgres (~50-200ms)
2. Build the tool list (built-in + MCP) (~200ms-1.7s)
3. Compose the system prompt
4. Construct ~15 middleware instances (CPU)
5. Eagerly compile the general-purpose subagent
(``SubAgentMiddleware.__init__`` calls ``create_agent`` synchronously,
which builds a second LangGraph + Pydantic schemas ~1.5-2s of pure
CPU work)
6. Compile the outer LangGraph
For a single thread, all six steps produce the SAME object on every turn
unless the user has changed their LLM config, toggled a feature flag,
added a connector, etc. The right answer is to compile ONCE per
"agent shape" and reuse the resulting :class:`CompiledStateGraph` for
every subsequent turn on the same thread.
Why a per-thread key (not a global pool)
----------------------------------------
Most middleware in the SurfSense stack captures per-thread state in
``__init__`` closures (``thread_id``, ``user_id``, ``search_space_id``,
``filesystem_mode``, ``mentioned_document_ids``). Cross-thread reuse
would silently leak state across users and threads. Keying the cache on
``(llm_config_id, thread_id, ...)`` gives us safe reuse for repeated
turns on the same thread without changing any middleware's behavior.
Phase 2 will move those captured fields onto :class:`SurfSenseContextSchema`
(read via ``runtime.context``) so the cache can collapse to a single
``(llm_config_id, search_space_id, ...)`` key shared across threads. Until
then, per-thread keying is the only safe option.
Cache shape
-----------
* TTL-LRU: entries auto-expire after ``ttl_seconds`` (default 1800s, 30
minutes matches a typical chat session). ``maxsize`` (default 256)
caps memory; LRU evicts least-recently-used on overflow.
* In-flight de-duplication: per-key :class:`asyncio.Lock` so concurrent
cold misses on the same key wait for the first build instead of
building N times.
* Process-local: this is an in-memory cache. Multi-replica deployments
pay the build cost once per replica per key. That's fine; the working
set per replica is small (one entry per active thread on that replica).
Telemetry
---------
Every lookup logs ``[agent_cache]`` lines through ``surfsense.perf``:
* ``hit`` cache hit, microseconds-fast
* ``miss`` first build for this key, includes build duration
* ``stale`` entry was found but expired; rebuilt
* ``evict`` LRU eviction (size-limited)
* ``size`` current cache occupancy at lookup time
"""
from __future__ import annotations
import asyncio
import hashlib
import logging
import os
import time
from collections import OrderedDict
from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from typing import Any
from app.utils.perf import get_perf_logger
logger = logging.getLogger(__name__)
_perf_log = get_perf_logger()
# ---------------------------------------------------------------------------
# Public API: signature helpers (cache key components)
# ---------------------------------------------------------------------------
def stable_hash(*parts: Any) -> str:
"""Compute a deterministic SHA1 of the str repr of ``parts``.
Used for cache key components that need a fixed-width representation
(system prompt, tool list, etc.). SHA1 is fine here this is not a
security boundary, just a content fingerprint.
"""
h = hashlib.sha1(usedforsecurity=False)
for p in parts:
h.update(repr(p).encode("utf-8", errors="replace"))
h.update(b"\x1f") # ASCII unit separator between parts
return h.hexdigest()
def tools_signature(
tools: list[Any] | tuple[Any, ...],
*,
available_connectors: list[str] | None,
available_document_types: list[str] | None,
) -> str:
"""Hash the bound-tool surface for cache-key purposes.
The signature changes whenever:
* A tool is added or removed from the bound list (built-in toggles,
MCP tools loaded for the user changes, gating rules flip, etc.).
* The available connectors / document types for the search space
change (new connector added, last connector removed, new document
type indexed). Because :func:`get_connector_gated_tools` derives
``modified_disabled_tools`` from ``available_connectors``, the
tool surface is technically already covered but we hash the
connector list separately so an empty-list "no tools changed"
situation still rotates the key when, say, the user re-adds a
connector that gates a tool we were already not exposing.
Stays stable across:
* Process restarts (tool names + descriptions are static).
* Different replicas (everyone gets the same hash for the same
inputs).
"""
tool_descriptors = sorted(
(getattr(t, "name", repr(t)), getattr(t, "description", "")) for t in tools
)
connectors = sorted(available_connectors or [])
doc_types = sorted(available_document_types or [])
return stable_hash(tool_descriptors, connectors, doc_types)
def flags_signature(flags: Any) -> str:
"""Hash the resolved :class:`AgentFeatureFlags` dataclass.
Frozen dataclasses are deterministically reprable, so a SHA1 of their
repr is a stable fingerprint. Restart safe (flags are read once at
process boot).
"""
return stable_hash(repr(flags))
def system_prompt_hash(system_prompt: str) -> str:
"""Hash a system prompt string. Cheap, ~30µs for typical prompts."""
return hashlib.sha1(
system_prompt.encode("utf-8", errors="replace"),
usedforsecurity=False,
).hexdigest()
# ---------------------------------------------------------------------------
# Cache implementation
# ---------------------------------------------------------------------------
@dataclass
class _Entry:
value: Any
created_at: float
last_used_at: float
class _AgentCache:
"""In-process TTL-LRU cache with per-key in-flight de-duplication.
NOT THREAD-SAFE in the multithreading sense designed for a single
asyncio event loop. Uvicorn runs one event loop per worker process,
so this is fine; multi-worker deployments simply each maintain their
own cache.
"""
def __init__(self, *, maxsize: int, ttl_seconds: float) -> None:
self._maxsize = maxsize
self._ttl = ttl_seconds
self._entries: OrderedDict[str, _Entry] = OrderedDict()
# One lock per key — guards "build" so concurrent cold misses on
# the same key wait for the first build instead of all racing.
self._locks: dict[str, asyncio.Lock] = {}
def _now(self) -> float:
return time.monotonic()
def _is_fresh(self, entry: _Entry) -> bool:
return (self._now() - entry.created_at) < self._ttl
def _evict_if_full(self) -> None:
while len(self._entries) >= self._maxsize:
evicted_key, _ = self._entries.popitem(last=False)
self._locks.pop(evicted_key, None)
_perf_log.info(
"[agent_cache] evict key=%s reason=lru size=%d",
_short(evicted_key),
len(self._entries),
)
def _touch(self, key: str, entry: _Entry) -> None:
entry.last_used_at = self._now()
self._entries.move_to_end(key, last=True)
async def get_or_build(
self,
key: str,
*,
builder: Callable[[], Awaitable[Any]],
) -> Any:
"""Return the cached value for ``key`` or call ``builder()`` to make it.
``builder`` MUST be idempotent concurrent cold misses on the
same key collapse to a single ``builder()`` call (the others
wait on the in-flight lock and observe the populated entry on
wake).
"""
# Fast path: hot hit.
entry = self._entries.get(key)
if entry is not None and self._is_fresh(entry):
self._touch(key, entry)
_perf_log.info(
"[agent_cache] hit key=%s age=%.1fs size=%d",
_short(key),
self._now() - entry.created_at,
len(self._entries),
)
return entry.value
# Stale entry — drop it; rebuild below.
if entry is not None and not self._is_fresh(entry):
_perf_log.info(
"[agent_cache] stale key=%s age=%.1fs ttl=%.0fs",
_short(key),
self._now() - entry.created_at,
self._ttl,
)
self._entries.pop(key, None)
# Slow path: serialize concurrent misses for the same key.
lock = self._locks.setdefault(key, asyncio.Lock())
async with lock:
# Double-check after acquiring the lock — another waiter may
# have populated the entry while we slept.
entry = self._entries.get(key)
if entry is not None and self._is_fresh(entry):
self._touch(key, entry)
_perf_log.info(
"[agent_cache] hit key=%s age=%.1fs size=%d coalesced=true",
_short(key),
self._now() - entry.created_at,
len(self._entries),
)
return entry.value
t0 = time.perf_counter()
try:
value = await builder()
except BaseException:
# Don't cache failed builds; let the next caller retry.
_perf_log.warning(
"[agent_cache] build_failed key=%s elapsed=%.3fs",
_short(key),
time.perf_counter() - t0,
)
raise
elapsed = time.perf_counter() - t0
# Insert + evict.
self._evict_if_full()
now = self._now()
self._entries[key] = _Entry(value=value, created_at=now, last_used_at=now)
self._entries.move_to_end(key, last=True)
_perf_log.info(
"[agent_cache] miss key=%s build=%.3fs size=%d",
_short(key),
elapsed,
len(self._entries),
)
return value
def invalidate(self, key: str) -> bool:
"""Drop a single entry; return True if anything was removed."""
removed = self._entries.pop(key, None) is not None
self._locks.pop(key, None)
if removed:
_perf_log.info(
"[agent_cache] invalidate key=%s size=%d",
_short(key),
len(self._entries),
)
return removed
def invalidate_prefix(self, prefix: str) -> int:
"""Drop every entry whose key starts with ``prefix``. Returns count."""
keys = [k for k in self._entries if k.startswith(prefix)]
for k in keys:
self._entries.pop(k, None)
self._locks.pop(k, None)
if keys:
_perf_log.info(
"[agent_cache] invalidate_prefix prefix=%s removed=%d size=%d",
_short(prefix),
len(keys),
len(self._entries),
)
return len(keys)
def clear(self) -> None:
n = len(self._entries)
self._entries.clear()
self._locks.clear()
if n:
_perf_log.info("[agent_cache] clear removed=%d", n)
def stats(self) -> dict[str, Any]:
return {
"size": len(self._entries),
"maxsize": self._maxsize,
"ttl_seconds": self._ttl,
}
def _short(key: str, n: int = 16) -> str:
"""Truncate keys for log lines so they don't blow up log volume."""
return key if len(key) <= n else f"{key[:n]}..."
# ---------------------------------------------------------------------------
# Module-level singleton
# ---------------------------------------------------------------------------
_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)
def get_cache() -> _AgentCache:
"""Return the process-wide compiled-agent cache singleton."""
return _cache
def reload_for_tests(*, maxsize: int = 256, ttl_seconds: float = 1800.0) -> _AgentCache:
"""Replace the singleton with a fresh cache. Tests only."""
global _cache
_cache = _AgentCache(maxsize=maxsize, ttl_seconds=ttl_seconds)
return _cache
__all__ = [
"flags_signature",
"get_cache",
"reload_for_tests",
"stable_hash",
"system_prompt_hash",
"tools_signature",
]

View file

@ -0,0 +1,100 @@
"""Map configured connectors to the searchable document/connector types.
This is agent-agnostic infrastructure shared by every agent factory (single-
and multi-agent). It translates the connectors a search space has enabled into
the set of searchable type strings that pre-search middleware and ``web_search``
understand, and always layers in the document types that exist independently of
any connector (uploads, notes, extension captures, YouTube).
It lives in its own module rather than inside a specific agent factory so
that retiring or moving any single agent never disturbs the others' access to
this mapping.
"""
from __future__ import annotations
from typing import Any
# Maps SearchSourceConnectorType enum values to the searchable document/connector types
# used by pre-search middleware and web_search.
# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
# the web_search tool; all others are considered local/indexed data.
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
# Live search connectors (handled by web_search tool)
"TAVILY_API": "TAVILY_API",
"LINKUP_API": "LINKUP_API",
"BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
# Local/indexed connectors (handled by KB pre-search middleware)
"SLACK_CONNECTOR": "SLACK_CONNECTOR",
"TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
"NOTION_CONNECTOR": "NOTION_CONNECTOR",
"GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
"LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
"DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
"JIRA_CONNECTOR": "JIRA_CONNECTOR",
"CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
"CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
"GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
"GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
"GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
"AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
"LUMA_CONNECTOR": "LUMA_CONNECTOR",
"ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
"WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
"BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
"CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
"OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
"DROPBOX_CONNECTOR": "DROPBOX_FILE", # Connector type differs from document type
"ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type
# Composio connectors (unified to native document types).
# Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
"COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
}
# Document types that don't come from SearchSourceConnector but should always be searchable
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
"EXTENSION", # Browser extension data
"FILE", # Uploaded files
"NOTE", # User notes
"YOUTUBE_VIDEO", # YouTube videos
]
def map_connectors_to_searchable_types(
connector_types: list[Any],
) -> list[str]:
"""
Map SearchSourceConnectorType enums to searchable document/connector types.
This function:
1. Converts connector type enums to their searchable counterparts
2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
3. Deduplicates while preserving order
Args:
connector_types: List of SearchSourceConnectorType enum values
Returns:
List of searchable connector/document type strings
"""
result_set: set[str] = set()
result_list: list[str] = []
# Add always-available document types first
for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
if doc_type not in result_set:
result_set.add(doc_type)
result_list.append(doc_type)
# Map each connector type to its searchable equivalent
for ct in connector_types:
# Handle both enum and string types
ct_str = ct.value if hasattr(ct, "value") else str(ct)
searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
if searchable and searchable not in result_set:
result_set.add(searchable)
result_list.append(searchable)
return result_list

View file

@ -2,7 +2,7 @@
Prompt composer for the SurfSense ``new_chat`` agent.
This module assembles the agent's system prompt from the markdown fragments
under :mod:`app.agents.new_chat.prompts`. It replaces the monolithic
under :mod:`app.agents.shared.prompts`. It replaces the monolithic
``system_prompt.py`` with a clean, fragment-based composition:
::
@ -119,7 +119,7 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
# -----------------------------------------------------------------------------
_PROMPTS_PACKAGE = "app.agents.new_chat.prompts"
_PROMPTS_PACKAGE = "app.agents.shared.prompts"
def _read_fragment(subpath: str) -> str:

View file

@ -0,0 +1,133 @@
"""
Thin compatibility wrapper around :mod:`app.agents.shared.prompts.composer`.
The composer split the previous monolithic prompt string into a fragment
tree under ``prompts/`` plus a model-family dispatch step (see the
composer module docstring for credits). This module preserves the public
function surface (``build_surfsense_system_prompt`` /
``build_configurable_system_prompt`` /
``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
that existing call sites `chat_deepagent.py`, anonymous chat routes,
and the configurable-prompt admin path keep working without churn.
For new call sites prefer importing ``compose_system_prompt`` directly
from :mod:`app.agents.shared.prompts.composer`.
"""
from __future__ import annotations
from datetime import UTC, datetime
from app.db import ChatVisibility
from .prompts.composer import (
_read_fragment,
compose_system_prompt,
detect_provider_variant,
)
# Optional routing fragments under ``prompts/routing/`` (see composer).
_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack")
# Public re-exports for backwards compatibility (some legacy code reads the
# raw default-instructions text directly).
SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = (
"<system_instruction>\nDefault SurfSense agent system instructions are now\n"
"composed from prompts/base/*.md. See compose_system_prompt() for details.\n"
"</system_instruction>"
)
# Citation block re-exposed for legacy importers that referenced this constant
# directly. The composer is the canonical source; this is a frozen snapshot
# loaded at module-init time.
SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md")
SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md")
def build_surfsense_system_prompt(
today: datetime | None = None,
thread_visibility: ChatVisibility | None = None,
enabled_tool_names: set[str] | None = None,
disabled_tool_names: set[str] | None = None,
mcp_connector_tools: dict[str, list[str]] | None = None,
*,
model_name: str | None = None,
) -> str:
"""Build the default SurfSense system prompt (citations on, defaults).
See :func:`app.agents.shared.prompts.composer.compose_system_prompt`
for full parameter docs.
"""
return compose_system_prompt(
today=today,
thread_visibility=thread_visibility,
enabled_tool_names=enabled_tool_names,
disabled_tool_names=disabled_tool_names,
mcp_connector_tools=mcp_connector_tools,
citations_enabled=True,
model_name=model_name,
connector_routing=_DEFAULT_CONNECTOR_ROUTING,
)
def build_configurable_system_prompt(
custom_system_instructions: str | None = None,
use_default_system_instructions: bool = True,
citations_enabled: bool = True,
today: datetime | None = None,
thread_visibility: ChatVisibility | None = None,
enabled_tool_names: set[str] | None = None,
disabled_tool_names: set[str] | None = None,
mcp_connector_tools: dict[str, list[str]] | None = None,
*,
model_name: str | None = None,
) -> str:
"""Build a configurable SurfSense system prompt (NewLLMConfig path).
See :func:`app.agents.shared.prompts.composer.compose_system_prompt`
for full parameter docs.
"""
return compose_system_prompt(
today=today,
thread_visibility=thread_visibility,
enabled_tool_names=enabled_tool_names,
disabled_tool_names=disabled_tool_names,
mcp_connector_tools=mcp_connector_tools,
custom_system_instructions=custom_system_instructions,
use_default_system_instructions=use_default_system_instructions,
citations_enabled=citations_enabled,
model_name=model_name,
connector_routing=_DEFAULT_CONNECTOR_ROUTING,
)
def get_default_system_instructions() -> str:
"""Return the default ``<system_instruction>`` block (no tools / citations).
Useful for populating the UI when seeding ``NewLLMConfig.system_instructions``.
The output reflects the current fragment tree, not a baked-in constant.
"""
resolved_today = datetime.now(UTC).date().isoformat()
from .prompts.composer import _build_system_instructions # local import
return _build_system_instructions(
visibility=ChatVisibility.PRIVATE,
resolved_today=resolved_today,
).strip()
# Backwards compatibility — some modules import the constant directly.
SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt()
__all__ = [
"SURFSENSE_CITATION_INSTRUCTIONS",
"SURFSENSE_NO_CITATION_INSTRUCTIONS",
"SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE",
"SURFSENSE_SYSTEM_PROMPT",
"build_configurable_system_prompt",
"build_surfsense_system_prompt",
"compose_system_prompt",
"detect_provider_variant",
"get_default_system_instructions",
]

View file

@ -13,7 +13,7 @@ from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from app.agents.new_chat.system_prompt import get_default_system_instructions
from app.agents.shared.system_prompt import get_default_system_instructions
from app.config import config
from app.db import (
NewLLMConfig,

View file

@ -6,7 +6,7 @@ from datetime import UTC, datetime
import pytest
from app.agents.new_chat.prompts.composer import (
from app.agents.shared.prompts.composer import (
ALL_TOOL_NAMES_ORDERED,
compose_system_prompt,
detect_provider_variant,
@ -64,7 +64,7 @@ class TestProviderVariantDetection:
``gpt-5`` reasoning regex first. Codex is the more specialised
prompt and mirrors OpenCode's dispatch order.
"""
from app.agents.new_chat.prompts.composer import detect_provider_variant
from app.agents.shared.prompts.composer import detect_provider_variant
assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"

View file

@ -16,7 +16,7 @@ from dataclasses import dataclass
import pytest
from app.agents.new_chat.agent_cache import (
from app.agents.shared.agent_cache import (
flags_signature,
reload_for_tests,
stable_hash,