mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
refactor(agents): move connector_searchable_types, agent_cache, system_prompt + prompts to app/agents/shared (slice 7b)
Three live shared leaves discovered while taking stock after slice 7 (all are consumed by the multi-agent stack and/or live routes, not single-agent-only): - connector_searchable_types -> shared + shim (multi-agent factory uses it) - agent_cache -> shared + shim (multi-agent runtime/agent_cache uses it) - system_prompt + prompts/ (42 .md fragments) -> shared together + shim. Repointed composer's _PROMPTS_PACKAGE to app.agents.shared.prompts so importlib.resources fragment loading keeps working; system_prompt's relative ".prompts.composer" import is preserved by moving both as a unit. Each keeps a re-export shim for the frozen chat_deepagent. After this slice, new_chat/ holds only the frozen single-agent stack (chat_deepagent, subagents/, __init__) plus shims.
This commit is contained in:
parent
13a96851ef
commit
a019f18d1c
60 changed files with 627 additions and 564 deletions
|
|
@ -10,7 +10,7 @@ from langchain_core.language_models import BaseChatModel
|
|||
from langchain_core.tools import BaseTool
|
||||
from langgraph.types import Checkpointer
|
||||
|
||||
from app.agents.new_chat.agent_cache import (
|
||||
from app.agents.shared.agent_cache import (
|
||||
flags_signature,
|
||||
get_cache,
|
||||
stable_hash,
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ from app.agents.multi_agent_chat.subagents import (
|
|||
from app.agents.multi_agent_chat.subagents.mcp_tools.index import (
|
||||
load_mcp_tools_by_connector,
|
||||
)
|
||||
from app.agents.new_chat.connector_searchable_types import (
|
||||
from app.agents.shared.connector_searchable_types import (
|
||||
map_connectors_to_searchable_types,
|
||||
)
|
||||
from app.agents.shared.feature_flags import AgentFeatureFlags, get_flags
|
||||
|
|
|
|||
|
|
@ -1,351 +1,17 @@
|
|||
"""TTL-LRU cache for compiled SurfSense deep agents.
|
||||
"""Backward-compatible shim.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
|
||||
``create_surfsense_deep_agent`` runs a 4-5 second pipeline on EVERY chat
|
||||
turn:
|
||||
|
||||
1. Discover connectors & document types from Postgres (~50-200ms)
|
||||
2. Build the tool list (built-in + MCP) (~200ms-1.7s)
|
||||
3. Compose the system prompt
|
||||
4. Construct ~15 middleware instances (CPU)
|
||||
5. Eagerly compile the general-purpose subagent
|
||||
(``SubAgentMiddleware.__init__`` calls ``create_agent`` synchronously,
|
||||
which builds a second LangGraph + Pydantic schemas — ~1.5-2s of pure
|
||||
CPU work)
|
||||
6. Compile the outer LangGraph
|
||||
|
||||
For a single thread, all six steps produce the SAME object on every turn
|
||||
unless the user has changed their LLM config, toggled a feature flag,
|
||||
added a connector, etc. The right answer is to compile ONCE per
|
||||
"agent shape" and reuse the resulting :class:`CompiledStateGraph` for
|
||||
every subsequent turn on the same thread.
|
||||
|
||||
Why a per-thread key (not a global pool)
|
||||
----------------------------------------
|
||||
|
||||
Most middleware in the SurfSense stack captures per-thread state in
|
||||
``__init__`` closures (``thread_id``, ``user_id``, ``search_space_id``,
|
||||
``filesystem_mode``, ``mentioned_document_ids``). Cross-thread reuse
|
||||
would silently leak state across users and threads. Keying the cache on
|
||||
``(llm_config_id, thread_id, ...)`` gives us safe reuse for repeated
|
||||
turns on the same thread without changing any middleware's behavior.
|
||||
|
||||
Phase 2 will move those captured fields onto :class:`SurfSenseContextSchema`
|
||||
(read via ``runtime.context``) so the cache can collapse to a single
|
||||
``(llm_config_id, search_space_id, ...)`` key shared across threads. Until
|
||||
then, per-thread keying is the only safe option.
|
||||
|
||||
Cache shape
|
||||
-----------
|
||||
|
||||
* TTL-LRU: entries auto-expire after ``ttl_seconds`` (default 1800s, 30
|
||||
minutes — matches a typical chat session). ``maxsize`` (default 256)
|
||||
caps memory; LRU evicts least-recently-used on overflow.
|
||||
* In-flight de-duplication: per-key :class:`asyncio.Lock` so concurrent
|
||||
cold misses on the same key wait for the first build instead of
|
||||
building N times.
|
||||
* Process-local: this is an in-memory cache. Multi-replica deployments
|
||||
pay the build cost once per replica per key. That's fine; the working
|
||||
set per replica is small (one entry per active thread on that replica).
|
||||
|
||||
Telemetry
|
||||
---------
|
||||
|
||||
Every lookup logs ``[agent_cache]`` lines through ``surfsense.perf``:
|
||||
|
||||
* ``hit`` — cache hit, microseconds-fast
|
||||
* ``miss`` — first build for this key, includes build duration
|
||||
* ``stale`` — entry was found but expired; rebuilt
|
||||
* ``evict`` — LRU eviction (size-limited)
|
||||
* ``size`` — current cache occupancy at lookup time
|
||||
Moved to ``app.agents.shared.agent_cache``. Re-exported here for the frozen
|
||||
single-agent stack (``chat_deepagent``) until that stack is retired.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
from collections.abc import Awaitable, Callable
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_perf_log = get_perf_logger()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API: signature helpers (cache key components)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def stable_hash(*parts: Any) -> str:
|
||||
"""Compute a deterministic SHA1 of the str repr of ``parts``.
|
||||
|
||||
Used for cache key components that need a fixed-width representation
|
||||
(system prompt, tool list, etc.). SHA1 is fine here — this is not a
|
||||
security boundary, just a content fingerprint.
|
||||
"""
|
||||
h = hashlib.sha1(usedforsecurity=False)
|
||||
for p in parts:
|
||||
h.update(repr(p).encode("utf-8", errors="replace"))
|
||||
h.update(b"\x1f") # ASCII unit separator between parts
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def tools_signature(
|
||||
tools: list[Any] | tuple[Any, ...],
|
||||
*,
|
||||
available_connectors: list[str] | None,
|
||||
available_document_types: list[str] | None,
|
||||
) -> str:
|
||||
"""Hash the bound-tool surface for cache-key purposes.
|
||||
|
||||
The signature changes whenever:
|
||||
|
||||
* A tool is added or removed from the bound list (built-in toggles,
|
||||
MCP tools loaded for the user changes, gating rules flip, etc.).
|
||||
* The available connectors / document types for the search space
|
||||
change (new connector added, last connector removed, new document
|
||||
type indexed). Because :func:`get_connector_gated_tools` derives
|
||||
``modified_disabled_tools`` from ``available_connectors``, the
|
||||
tool surface is technically already covered — but we hash the
|
||||
connector list separately so an empty-list "no tools changed"
|
||||
situation still rotates the key when, say, the user re-adds a
|
||||
connector that gates a tool we were already not exposing.
|
||||
|
||||
Stays stable across:
|
||||
|
||||
* Process restarts (tool names + descriptions are static).
|
||||
* Different replicas (everyone gets the same hash for the same
|
||||
inputs).
|
||||
"""
|
||||
tool_descriptors = sorted(
|
||||
(getattr(t, "name", repr(t)), getattr(t, "description", "")) for t in tools
|
||||
)
|
||||
connectors = sorted(available_connectors or [])
|
||||
doc_types = sorted(available_document_types or [])
|
||||
return stable_hash(tool_descriptors, connectors, doc_types)
|
||||
|
||||
|
||||
def flags_signature(flags: Any) -> str:
|
||||
"""Hash the resolved :class:`AgentFeatureFlags` dataclass.
|
||||
|
||||
Frozen dataclasses are deterministically reprable, so a SHA1 of their
|
||||
repr is a stable fingerprint. Restart safe (flags are read once at
|
||||
process boot).
|
||||
"""
|
||||
return stable_hash(repr(flags))
|
||||
|
||||
|
||||
def system_prompt_hash(system_prompt: str) -> str:
|
||||
"""Hash a system prompt string. Cheap, ~30µs for typical prompts."""
|
||||
return hashlib.sha1(
|
||||
system_prompt.encode("utf-8", errors="replace"),
|
||||
usedforsecurity=False,
|
||||
).hexdigest()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cache implementation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Entry:
|
||||
value: Any
|
||||
created_at: float
|
||||
last_used_at: float
|
||||
|
||||
|
||||
class _AgentCache:
|
||||
"""In-process TTL-LRU cache with per-key in-flight de-duplication.
|
||||
|
||||
NOT THREAD-SAFE in the multithreading sense — designed for a single
|
||||
asyncio event loop. Uvicorn runs one event loop per worker process,
|
||||
so this is fine; multi-worker deployments simply each maintain their
|
||||
own cache.
|
||||
"""
|
||||
|
||||
def __init__(self, *, maxsize: int, ttl_seconds: float) -> None:
|
||||
self._maxsize = maxsize
|
||||
self._ttl = ttl_seconds
|
||||
self._entries: OrderedDict[str, _Entry] = OrderedDict()
|
||||
# One lock per key — guards "build" so concurrent cold misses on
|
||||
# the same key wait for the first build instead of all racing.
|
||||
self._locks: dict[str, asyncio.Lock] = {}
|
||||
|
||||
def _now(self) -> float:
|
||||
return time.monotonic()
|
||||
|
||||
def _is_fresh(self, entry: _Entry) -> bool:
|
||||
return (self._now() - entry.created_at) < self._ttl
|
||||
|
||||
def _evict_if_full(self) -> None:
|
||||
while len(self._entries) >= self._maxsize:
|
||||
evicted_key, _ = self._entries.popitem(last=False)
|
||||
self._locks.pop(evicted_key, None)
|
||||
_perf_log.info(
|
||||
"[agent_cache] evict key=%s reason=lru size=%d",
|
||||
_short(evicted_key),
|
||||
len(self._entries),
|
||||
)
|
||||
|
||||
def _touch(self, key: str, entry: _Entry) -> None:
|
||||
entry.last_used_at = self._now()
|
||||
self._entries.move_to_end(key, last=True)
|
||||
|
||||
async def get_or_build(
|
||||
self,
|
||||
key: str,
|
||||
*,
|
||||
builder: Callable[[], Awaitable[Any]],
|
||||
) -> Any:
|
||||
"""Return the cached value for ``key`` or call ``builder()`` to make it.
|
||||
|
||||
``builder`` MUST be idempotent — concurrent cold misses on the
|
||||
same key collapse to a single ``builder()`` call (the others
|
||||
wait on the in-flight lock and observe the populated entry on
|
||||
wake).
|
||||
"""
|
||||
# Fast path: hot hit.
|
||||
entry = self._entries.get(key)
|
||||
if entry is not None and self._is_fresh(entry):
|
||||
self._touch(key, entry)
|
||||
_perf_log.info(
|
||||
"[agent_cache] hit key=%s age=%.1fs size=%d",
|
||||
_short(key),
|
||||
self._now() - entry.created_at,
|
||||
len(self._entries),
|
||||
)
|
||||
return entry.value
|
||||
|
||||
# Stale entry — drop it; rebuild below.
|
||||
if entry is not None and not self._is_fresh(entry):
|
||||
_perf_log.info(
|
||||
"[agent_cache] stale key=%s age=%.1fs ttl=%.0fs",
|
||||
_short(key),
|
||||
self._now() - entry.created_at,
|
||||
self._ttl,
|
||||
)
|
||||
self._entries.pop(key, None)
|
||||
|
||||
# Slow path: serialize concurrent misses for the same key.
|
||||
lock = self._locks.setdefault(key, asyncio.Lock())
|
||||
async with lock:
|
||||
# Double-check after acquiring the lock — another waiter may
|
||||
# have populated the entry while we slept.
|
||||
entry = self._entries.get(key)
|
||||
if entry is not None and self._is_fresh(entry):
|
||||
self._touch(key, entry)
|
||||
_perf_log.info(
|
||||
"[agent_cache] hit key=%s age=%.1fs size=%d coalesced=true",
|
||||
_short(key),
|
||||
self._now() - entry.created_at,
|
||||
len(self._entries),
|
||||
)
|
||||
return entry.value
|
||||
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
value = await builder()
|
||||
except BaseException:
|
||||
# Don't cache failed builds; let the next caller retry.
|
||||
_perf_log.warning(
|
||||
"[agent_cache] build_failed key=%s elapsed=%.3fs",
|
||||
_short(key),
|
||||
time.perf_counter() - t0,
|
||||
)
|
||||
raise
|
||||
elapsed = time.perf_counter() - t0
|
||||
|
||||
# Insert + evict.
|
||||
self._evict_if_full()
|
||||
now = self._now()
|
||||
self._entries[key] = _Entry(value=value, created_at=now, last_used_at=now)
|
||||
self._entries.move_to_end(key, last=True)
|
||||
_perf_log.info(
|
||||
"[agent_cache] miss key=%s build=%.3fs size=%d",
|
||||
_short(key),
|
||||
elapsed,
|
||||
len(self._entries),
|
||||
)
|
||||
return value
|
||||
|
||||
def invalidate(self, key: str) -> bool:
|
||||
"""Drop a single entry; return True if anything was removed."""
|
||||
removed = self._entries.pop(key, None) is not None
|
||||
self._locks.pop(key, None)
|
||||
if removed:
|
||||
_perf_log.info(
|
||||
"[agent_cache] invalidate key=%s size=%d",
|
||||
_short(key),
|
||||
len(self._entries),
|
||||
)
|
||||
return removed
|
||||
|
||||
def invalidate_prefix(self, prefix: str) -> int:
|
||||
"""Drop every entry whose key starts with ``prefix``. Returns count."""
|
||||
keys = [k for k in self._entries if k.startswith(prefix)]
|
||||
for k in keys:
|
||||
self._entries.pop(k, None)
|
||||
self._locks.pop(k, None)
|
||||
if keys:
|
||||
_perf_log.info(
|
||||
"[agent_cache] invalidate_prefix prefix=%s removed=%d size=%d",
|
||||
_short(prefix),
|
||||
len(keys),
|
||||
len(self._entries),
|
||||
)
|
||||
return len(keys)
|
||||
|
||||
def clear(self) -> None:
|
||||
n = len(self._entries)
|
||||
self._entries.clear()
|
||||
self._locks.clear()
|
||||
if n:
|
||||
_perf_log.info("[agent_cache] clear removed=%d", n)
|
||||
|
||||
def stats(self) -> dict[str, Any]:
|
||||
return {
|
||||
"size": len(self._entries),
|
||||
"maxsize": self._maxsize,
|
||||
"ttl_seconds": self._ttl,
|
||||
}
|
||||
|
||||
|
||||
def _short(key: str, n: int = 16) -> str:
|
||||
"""Truncate keys for log lines so they don't blow up log volume."""
|
||||
return key if len(key) <= n else f"{key[:n]}..."
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level singleton
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
|
||||
_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
|
||||
|
||||
_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)
|
||||
|
||||
|
||||
def get_cache() -> _AgentCache:
|
||||
"""Return the process-wide compiled-agent cache singleton."""
|
||||
return _cache
|
||||
|
||||
|
||||
def reload_for_tests(*, maxsize: int = 256, ttl_seconds: float = 1800.0) -> _AgentCache:
|
||||
"""Replace the singleton with a fresh cache. Tests only."""
|
||||
global _cache
|
||||
_cache = _AgentCache(maxsize=maxsize, ttl_seconds=ttl_seconds)
|
||||
return _cache
|
||||
|
||||
from app.agents.shared.agent_cache import (
|
||||
flags_signature,
|
||||
get_cache,
|
||||
reload_for_tests,
|
||||
stable_hash,
|
||||
system_prompt_hash,
|
||||
tools_signature,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"flags_signature",
|
||||
|
|
|
|||
|
|
@ -1,100 +1,11 @@
|
|||
"""Map configured connectors to the searchable document/connector types.
|
||||
"""Backward-compatible shim.
|
||||
|
||||
This is agent-agnostic infrastructure shared by every agent factory (single-
|
||||
and multi-agent). It translates the connectors a search space has enabled into
|
||||
the set of searchable type strings that pre-search middleware and ``web_search``
|
||||
understand, and always layers in the document types that exist independently of
|
||||
any connector (uploads, notes, extension captures, YouTube).
|
||||
|
||||
It lives in its own module — rather than inside a specific agent factory — so
|
||||
that retiring or moving any single agent never disturbs the others' access to
|
||||
this mapping.
|
||||
Moved to ``app.agents.shared.connector_searchable_types``. Re-exported here for
|
||||
the frozen single-agent stack (``chat_deepagent``) until that stack is retired.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from app.agents.shared.connector_searchable_types import (
|
||||
map_connectors_to_searchable_types,
|
||||
)
|
||||
|
||||
from typing import Any
|
||||
|
||||
# Maps SearchSourceConnectorType enum values to the searchable document/connector types
|
||||
# used by pre-search middleware and web_search.
|
||||
# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
|
||||
# the web_search tool; all others are considered local/indexed data.
|
||||
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
|
||||
# Live search connectors (handled by web_search tool)
|
||||
"TAVILY_API": "TAVILY_API",
|
||||
"LINKUP_API": "LINKUP_API",
|
||||
"BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
|
||||
# Local/indexed connectors (handled by KB pre-search middleware)
|
||||
"SLACK_CONNECTOR": "SLACK_CONNECTOR",
|
||||
"TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
|
||||
"NOTION_CONNECTOR": "NOTION_CONNECTOR",
|
||||
"GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
|
||||
"LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
|
||||
"DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
|
||||
"JIRA_CONNECTOR": "JIRA_CONNECTOR",
|
||||
"CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
|
||||
"CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
|
||||
"GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
|
||||
"GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
|
||||
"GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
|
||||
"AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
|
||||
"LUMA_CONNECTOR": "LUMA_CONNECTOR",
|
||||
"ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
|
||||
"WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
|
||||
"BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
|
||||
"CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
|
||||
"OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
|
||||
"DROPBOX_CONNECTOR": "DROPBOX_FILE", # Connector type differs from document type
|
||||
"ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type
|
||||
# Composio connectors (unified to native document types).
|
||||
# Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
|
||||
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
|
||||
"COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
|
||||
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
|
||||
}
|
||||
|
||||
# Document types that don't come from SearchSourceConnector but should always be searchable
|
||||
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
|
||||
"EXTENSION", # Browser extension data
|
||||
"FILE", # Uploaded files
|
||||
"NOTE", # User notes
|
||||
"YOUTUBE_VIDEO", # YouTube videos
|
||||
]
|
||||
|
||||
|
||||
def map_connectors_to_searchable_types(
|
||||
connector_types: list[Any],
|
||||
) -> list[str]:
|
||||
"""
|
||||
Map SearchSourceConnectorType enums to searchable document/connector types.
|
||||
|
||||
This function:
|
||||
1. Converts connector type enums to their searchable counterparts
|
||||
2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
|
||||
3. Deduplicates while preserving order
|
||||
|
||||
Args:
|
||||
connector_types: List of SearchSourceConnectorType enum values
|
||||
|
||||
Returns:
|
||||
List of searchable connector/document type strings
|
||||
"""
|
||||
result_set: set[str] = set()
|
||||
result_list: list[str] = []
|
||||
|
||||
# Add always-available document types first
|
||||
for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
|
||||
if doc_type not in result_set:
|
||||
result_set.add(doc_type)
|
||||
result_list.append(doc_type)
|
||||
|
||||
# Map each connector type to its searchable equivalent
|
||||
for ct in connector_types:
|
||||
# Handle both enum and string types
|
||||
ct_str = ct.value if hasattr(ct, "value") else str(ct)
|
||||
searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
|
||||
if searchable and searchable not in result_set:
|
||||
result_set.add(searchable)
|
||||
result_list.append(searchable)
|
||||
|
||||
return result_list
|
||||
__all__ = ["map_connectors_to_searchable_types"]
|
||||
|
|
|
|||
|
|
@ -1,125 +1,21 @@
|
|||
"""
|
||||
Thin compatibility wrapper around :mod:`app.agents.new_chat.prompts.composer`.
|
||||
"""Backward-compatible shim.
|
||||
|
||||
The composer split the previous monolithic prompt string into a fragment
|
||||
tree under ``prompts/`` plus a model-family dispatch step (see the
|
||||
composer module docstring for credits). This module preserves the public
|
||||
function surface (``build_surfsense_system_prompt`` /
|
||||
``build_configurable_system_prompt`` /
|
||||
``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
|
||||
that existing call sites — `chat_deepagent.py`, anonymous chat routes,
|
||||
and the configurable-prompt admin path — keep working without churn.
|
||||
|
||||
For new call sites prefer importing ``compose_system_prompt`` directly
|
||||
from :mod:`app.agents.new_chat.prompts.composer`.
|
||||
Moved to ``app.agents.shared.system_prompt``. Re-exported here for the frozen
|
||||
single-agent stack (``chat_deepagent``) until that stack is retired.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from app.db import ChatVisibility
|
||||
|
||||
from .prompts.composer import (
|
||||
_read_fragment,
|
||||
from app.agents.shared.system_prompt import (
|
||||
SURFSENSE_CITATION_INSTRUCTIONS,
|
||||
SURFSENSE_NO_CITATION_INSTRUCTIONS,
|
||||
SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE,
|
||||
SURFSENSE_SYSTEM_PROMPT,
|
||||
build_configurable_system_prompt,
|
||||
build_surfsense_system_prompt,
|
||||
compose_system_prompt,
|
||||
detect_provider_variant,
|
||||
get_default_system_instructions,
|
||||
)
|
||||
|
||||
# Optional routing fragments under ``prompts/routing/`` (see composer).
|
||||
_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack")
|
||||
|
||||
# Public re-exports for backwards compatibility (some legacy code reads the
|
||||
# raw default-instructions text directly).
|
||||
SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = (
|
||||
"<system_instruction>\nDefault SurfSense agent system instructions are now\n"
|
||||
"composed from prompts/base/*.md. See compose_system_prompt() for details.\n"
|
||||
"</system_instruction>"
|
||||
)
|
||||
|
||||
# Citation block re-exposed for legacy importers that referenced this constant
|
||||
# directly. The composer is the canonical source; this is a frozen snapshot
|
||||
# loaded at module-init time.
|
||||
SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md")
|
||||
SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md")
|
||||
|
||||
|
||||
def build_surfsense_system_prompt(
|
||||
today: datetime | None = None,
|
||||
thread_visibility: ChatVisibility | None = None,
|
||||
enabled_tool_names: set[str] | None = None,
|
||||
disabled_tool_names: set[str] | None = None,
|
||||
mcp_connector_tools: dict[str, list[str]] | None = None,
|
||||
*,
|
||||
model_name: str | None = None,
|
||||
) -> str:
|
||||
"""Build the default SurfSense system prompt (citations on, defaults).
|
||||
|
||||
See :func:`app.agents.new_chat.prompts.composer.compose_system_prompt`
|
||||
for full parameter docs.
|
||||
"""
|
||||
return compose_system_prompt(
|
||||
today=today,
|
||||
thread_visibility=thread_visibility,
|
||||
enabled_tool_names=enabled_tool_names,
|
||||
disabled_tool_names=disabled_tool_names,
|
||||
mcp_connector_tools=mcp_connector_tools,
|
||||
citations_enabled=True,
|
||||
model_name=model_name,
|
||||
connector_routing=_DEFAULT_CONNECTOR_ROUTING,
|
||||
)
|
||||
|
||||
|
||||
def build_configurable_system_prompt(
|
||||
custom_system_instructions: str | None = None,
|
||||
use_default_system_instructions: bool = True,
|
||||
citations_enabled: bool = True,
|
||||
today: datetime | None = None,
|
||||
thread_visibility: ChatVisibility | None = None,
|
||||
enabled_tool_names: set[str] | None = None,
|
||||
disabled_tool_names: set[str] | None = None,
|
||||
mcp_connector_tools: dict[str, list[str]] | None = None,
|
||||
*,
|
||||
model_name: str | None = None,
|
||||
) -> str:
|
||||
"""Build a configurable SurfSense system prompt (NewLLMConfig path).
|
||||
|
||||
See :func:`app.agents.new_chat.prompts.composer.compose_system_prompt`
|
||||
for full parameter docs.
|
||||
"""
|
||||
return compose_system_prompt(
|
||||
today=today,
|
||||
thread_visibility=thread_visibility,
|
||||
enabled_tool_names=enabled_tool_names,
|
||||
disabled_tool_names=disabled_tool_names,
|
||||
mcp_connector_tools=mcp_connector_tools,
|
||||
custom_system_instructions=custom_system_instructions,
|
||||
use_default_system_instructions=use_default_system_instructions,
|
||||
citations_enabled=citations_enabled,
|
||||
model_name=model_name,
|
||||
connector_routing=_DEFAULT_CONNECTOR_ROUTING,
|
||||
)
|
||||
|
||||
|
||||
def get_default_system_instructions() -> str:
|
||||
"""Return the default ``<system_instruction>`` block (no tools / citations).
|
||||
|
||||
Useful for populating the UI when seeding ``NewLLMConfig.system_instructions``.
|
||||
The output reflects the current fragment tree, not a baked-in constant.
|
||||
"""
|
||||
resolved_today = datetime.now(UTC).date().isoformat()
|
||||
from .prompts.composer import _build_system_instructions # local import
|
||||
|
||||
return _build_system_instructions(
|
||||
visibility=ChatVisibility.PRIVATE,
|
||||
resolved_today=resolved_today,
|
||||
).strip()
|
||||
|
||||
|
||||
# Backwards compatibility — some modules import the constant directly.
|
||||
SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SURFSENSE_CITATION_INSTRUCTIONS",
|
||||
"SURFSENSE_NO_CITATION_INSTRUCTIONS",
|
||||
|
|
|
|||
357
surfsense_backend/app/agents/shared/agent_cache.py
Normal file
357
surfsense_backend/app/agents/shared/agent_cache.py
Normal file
|
|
@ -0,0 +1,357 @@
|
|||
"""TTL-LRU cache for compiled SurfSense deep agents.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
|
||||
``create_surfsense_deep_agent`` runs a 4-5 second pipeline on EVERY chat
|
||||
turn:
|
||||
|
||||
1. Discover connectors & document types from Postgres (~50-200ms)
|
||||
2. Build the tool list (built-in + MCP) (~200ms-1.7s)
|
||||
3. Compose the system prompt
|
||||
4. Construct ~15 middleware instances (CPU)
|
||||
5. Eagerly compile the general-purpose subagent
|
||||
(``SubAgentMiddleware.__init__`` calls ``create_agent`` synchronously,
|
||||
which builds a second LangGraph + Pydantic schemas — ~1.5-2s of pure
|
||||
CPU work)
|
||||
6. Compile the outer LangGraph
|
||||
|
||||
For a single thread, all six steps produce the SAME object on every turn
|
||||
unless the user has changed their LLM config, toggled a feature flag,
|
||||
added a connector, etc. The right answer is to compile ONCE per
|
||||
"agent shape" and reuse the resulting :class:`CompiledStateGraph` for
|
||||
every subsequent turn on the same thread.
|
||||
|
||||
Why a per-thread key (not a global pool)
|
||||
----------------------------------------
|
||||
|
||||
Most middleware in the SurfSense stack captures per-thread state in
|
||||
``__init__`` closures (``thread_id``, ``user_id``, ``search_space_id``,
|
||||
``filesystem_mode``, ``mentioned_document_ids``). Cross-thread reuse
|
||||
would silently leak state across users and threads. Keying the cache on
|
||||
``(llm_config_id, thread_id, ...)`` gives us safe reuse for repeated
|
||||
turns on the same thread without changing any middleware's behavior.
|
||||
|
||||
Phase 2 will move those captured fields onto :class:`SurfSenseContextSchema`
|
||||
(read via ``runtime.context``) so the cache can collapse to a single
|
||||
``(llm_config_id, search_space_id, ...)`` key shared across threads. Until
|
||||
then, per-thread keying is the only safe option.
|
||||
|
||||
Cache shape
|
||||
-----------
|
||||
|
||||
* TTL-LRU: entries auto-expire after ``ttl_seconds`` (default 1800s, 30
|
||||
minutes — matches a typical chat session). ``maxsize`` (default 256)
|
||||
caps memory; LRU evicts least-recently-used on overflow.
|
||||
* In-flight de-duplication: per-key :class:`asyncio.Lock` so concurrent
|
||||
cold misses on the same key wait for the first build instead of
|
||||
building N times.
|
||||
* Process-local: this is an in-memory cache. Multi-replica deployments
|
||||
pay the build cost once per replica per key. That's fine; the working
|
||||
set per replica is small (one entry per active thread on that replica).
|
||||
|
||||
Telemetry
|
||||
---------
|
||||
|
||||
Every lookup logs ``[agent_cache]`` lines through ``surfsense.perf``:
|
||||
|
||||
* ``hit`` — cache hit, microseconds-fast
|
||||
* ``miss`` — first build for this key, includes build duration
|
||||
* ``stale`` — entry was found but expired; rebuilt
|
||||
* ``evict`` — LRU eviction (size-limited)
|
||||
* ``size`` — current cache occupancy at lookup time
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
from collections.abc import Awaitable, Callable
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from app.utils.perf import get_perf_logger
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_perf_log = get_perf_logger()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API: signature helpers (cache key components)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def stable_hash(*parts: Any) -> str:
|
||||
"""Compute a deterministic SHA1 of the str repr of ``parts``.
|
||||
|
||||
Used for cache key components that need a fixed-width representation
|
||||
(system prompt, tool list, etc.). SHA1 is fine here — this is not a
|
||||
security boundary, just a content fingerprint.
|
||||
"""
|
||||
h = hashlib.sha1(usedforsecurity=False)
|
||||
for p in parts:
|
||||
h.update(repr(p).encode("utf-8", errors="replace"))
|
||||
h.update(b"\x1f") # ASCII unit separator between parts
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def tools_signature(
|
||||
tools: list[Any] | tuple[Any, ...],
|
||||
*,
|
||||
available_connectors: list[str] | None,
|
||||
available_document_types: list[str] | None,
|
||||
) -> str:
|
||||
"""Hash the bound-tool surface for cache-key purposes.
|
||||
|
||||
The signature changes whenever:
|
||||
|
||||
* A tool is added or removed from the bound list (built-in toggles,
|
||||
MCP tools loaded for the user changes, gating rules flip, etc.).
|
||||
* The available connectors / document types for the search space
|
||||
change (new connector added, last connector removed, new document
|
||||
type indexed). Because :func:`get_connector_gated_tools` derives
|
||||
``modified_disabled_tools`` from ``available_connectors``, the
|
||||
tool surface is technically already covered — but we hash the
|
||||
connector list separately so an empty-list "no tools changed"
|
||||
situation still rotates the key when, say, the user re-adds a
|
||||
connector that gates a tool we were already not exposing.
|
||||
|
||||
Stays stable across:
|
||||
|
||||
* Process restarts (tool names + descriptions are static).
|
||||
* Different replicas (everyone gets the same hash for the same
|
||||
inputs).
|
||||
"""
|
||||
tool_descriptors = sorted(
|
||||
(getattr(t, "name", repr(t)), getattr(t, "description", "")) for t in tools
|
||||
)
|
||||
connectors = sorted(available_connectors or [])
|
||||
doc_types = sorted(available_document_types or [])
|
||||
return stable_hash(tool_descriptors, connectors, doc_types)
|
||||
|
||||
|
||||
def flags_signature(flags: Any) -> str:
|
||||
"""Hash the resolved :class:`AgentFeatureFlags` dataclass.
|
||||
|
||||
Frozen dataclasses are deterministically reprable, so a SHA1 of their
|
||||
repr is a stable fingerprint. Restart safe (flags are read once at
|
||||
process boot).
|
||||
"""
|
||||
return stable_hash(repr(flags))
|
||||
|
||||
|
||||
def system_prompt_hash(system_prompt: str) -> str:
|
||||
"""Hash a system prompt string. Cheap, ~30µs for typical prompts."""
|
||||
return hashlib.sha1(
|
||||
system_prompt.encode("utf-8", errors="replace"),
|
||||
usedforsecurity=False,
|
||||
).hexdigest()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cache implementation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Entry:
|
||||
value: Any
|
||||
created_at: float
|
||||
last_used_at: float
|
||||
|
||||
|
||||
class _AgentCache:
|
||||
"""In-process TTL-LRU cache with per-key in-flight de-duplication.
|
||||
|
||||
NOT THREAD-SAFE in the multithreading sense — designed for a single
|
||||
asyncio event loop. Uvicorn runs one event loop per worker process,
|
||||
so this is fine; multi-worker deployments simply each maintain their
|
||||
own cache.
|
||||
"""
|
||||
|
||||
def __init__(self, *, maxsize: int, ttl_seconds: float) -> None:
|
||||
self._maxsize = maxsize
|
||||
self._ttl = ttl_seconds
|
||||
self._entries: OrderedDict[str, _Entry] = OrderedDict()
|
||||
# One lock per key — guards "build" so concurrent cold misses on
|
||||
# the same key wait for the first build instead of all racing.
|
||||
self._locks: dict[str, asyncio.Lock] = {}
|
||||
|
||||
def _now(self) -> float:
|
||||
return time.monotonic()
|
||||
|
||||
def _is_fresh(self, entry: _Entry) -> bool:
|
||||
return (self._now() - entry.created_at) < self._ttl
|
||||
|
||||
def _evict_if_full(self) -> None:
|
||||
while len(self._entries) >= self._maxsize:
|
||||
evicted_key, _ = self._entries.popitem(last=False)
|
||||
self._locks.pop(evicted_key, None)
|
||||
_perf_log.info(
|
||||
"[agent_cache] evict key=%s reason=lru size=%d",
|
||||
_short(evicted_key),
|
||||
len(self._entries),
|
||||
)
|
||||
|
||||
def _touch(self, key: str, entry: _Entry) -> None:
|
||||
entry.last_used_at = self._now()
|
||||
self._entries.move_to_end(key, last=True)
|
||||
|
||||
async def get_or_build(
|
||||
self,
|
||||
key: str,
|
||||
*,
|
||||
builder: Callable[[], Awaitable[Any]],
|
||||
) -> Any:
|
||||
"""Return the cached value for ``key`` or call ``builder()`` to make it.
|
||||
|
||||
``builder`` MUST be idempotent — concurrent cold misses on the
|
||||
same key collapse to a single ``builder()`` call (the others
|
||||
wait on the in-flight lock and observe the populated entry on
|
||||
wake).
|
||||
"""
|
||||
# Fast path: hot hit.
|
||||
entry = self._entries.get(key)
|
||||
if entry is not None and self._is_fresh(entry):
|
||||
self._touch(key, entry)
|
||||
_perf_log.info(
|
||||
"[agent_cache] hit key=%s age=%.1fs size=%d",
|
||||
_short(key),
|
||||
self._now() - entry.created_at,
|
||||
len(self._entries),
|
||||
)
|
||||
return entry.value
|
||||
|
||||
# Stale entry — drop it; rebuild below.
|
||||
if entry is not None and not self._is_fresh(entry):
|
||||
_perf_log.info(
|
||||
"[agent_cache] stale key=%s age=%.1fs ttl=%.0fs",
|
||||
_short(key),
|
||||
self._now() - entry.created_at,
|
||||
self._ttl,
|
||||
)
|
||||
self._entries.pop(key, None)
|
||||
|
||||
# Slow path: serialize concurrent misses for the same key.
|
||||
lock = self._locks.setdefault(key, asyncio.Lock())
|
||||
async with lock:
|
||||
# Double-check after acquiring the lock — another waiter may
|
||||
# have populated the entry while we slept.
|
||||
entry = self._entries.get(key)
|
||||
if entry is not None and self._is_fresh(entry):
|
||||
self._touch(key, entry)
|
||||
_perf_log.info(
|
||||
"[agent_cache] hit key=%s age=%.1fs size=%d coalesced=true",
|
||||
_short(key),
|
||||
self._now() - entry.created_at,
|
||||
len(self._entries),
|
||||
)
|
||||
return entry.value
|
||||
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
value = await builder()
|
||||
except BaseException:
|
||||
# Don't cache failed builds; let the next caller retry.
|
||||
_perf_log.warning(
|
||||
"[agent_cache] build_failed key=%s elapsed=%.3fs",
|
||||
_short(key),
|
||||
time.perf_counter() - t0,
|
||||
)
|
||||
raise
|
||||
elapsed = time.perf_counter() - t0
|
||||
|
||||
# Insert + evict.
|
||||
self._evict_if_full()
|
||||
now = self._now()
|
||||
self._entries[key] = _Entry(value=value, created_at=now, last_used_at=now)
|
||||
self._entries.move_to_end(key, last=True)
|
||||
_perf_log.info(
|
||||
"[agent_cache] miss key=%s build=%.3fs size=%d",
|
||||
_short(key),
|
||||
elapsed,
|
||||
len(self._entries),
|
||||
)
|
||||
return value
|
||||
|
||||
def invalidate(self, key: str) -> bool:
|
||||
"""Drop a single entry; return True if anything was removed."""
|
||||
removed = self._entries.pop(key, None) is not None
|
||||
self._locks.pop(key, None)
|
||||
if removed:
|
||||
_perf_log.info(
|
||||
"[agent_cache] invalidate key=%s size=%d",
|
||||
_short(key),
|
||||
len(self._entries),
|
||||
)
|
||||
return removed
|
||||
|
||||
def invalidate_prefix(self, prefix: str) -> int:
|
||||
"""Drop every entry whose key starts with ``prefix``. Returns count."""
|
||||
keys = [k for k in self._entries if k.startswith(prefix)]
|
||||
for k in keys:
|
||||
self._entries.pop(k, None)
|
||||
self._locks.pop(k, None)
|
||||
if keys:
|
||||
_perf_log.info(
|
||||
"[agent_cache] invalidate_prefix prefix=%s removed=%d size=%d",
|
||||
_short(prefix),
|
||||
len(keys),
|
||||
len(self._entries),
|
||||
)
|
||||
return len(keys)
|
||||
|
||||
def clear(self) -> None:
|
||||
n = len(self._entries)
|
||||
self._entries.clear()
|
||||
self._locks.clear()
|
||||
if n:
|
||||
_perf_log.info("[agent_cache] clear removed=%d", n)
|
||||
|
||||
def stats(self) -> dict[str, Any]:
|
||||
return {
|
||||
"size": len(self._entries),
|
||||
"maxsize": self._maxsize,
|
||||
"ttl_seconds": self._ttl,
|
||||
}
|
||||
|
||||
|
||||
def _short(key: str, n: int = 16) -> str:
|
||||
"""Truncate keys for log lines so they don't blow up log volume."""
|
||||
return key if len(key) <= n else f"{key[:n]}..."
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level singleton
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
|
||||
_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
|
||||
|
||||
_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)
|
||||
|
||||
|
||||
def get_cache() -> _AgentCache:
|
||||
"""Return the process-wide compiled-agent cache singleton."""
|
||||
return _cache
|
||||
|
||||
|
||||
def reload_for_tests(*, maxsize: int = 256, ttl_seconds: float = 1800.0) -> _AgentCache:
|
||||
"""Replace the singleton with a fresh cache. Tests only."""
|
||||
global _cache
|
||||
_cache = _AgentCache(maxsize=maxsize, ttl_seconds=ttl_seconds)
|
||||
return _cache
|
||||
|
||||
|
||||
__all__ = [
|
||||
"flags_signature",
|
||||
"get_cache",
|
||||
"reload_for_tests",
|
||||
"stable_hash",
|
||||
"system_prompt_hash",
|
||||
"tools_signature",
|
||||
]
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
"""Map configured connectors to the searchable document/connector types.
|
||||
|
||||
This is agent-agnostic infrastructure shared by every agent factory (single-
|
||||
and multi-agent). It translates the connectors a search space has enabled into
|
||||
the set of searchable type strings that pre-search middleware and ``web_search``
|
||||
understand, and always layers in the document types that exist independently of
|
||||
any connector (uploads, notes, extension captures, YouTube).
|
||||
|
||||
It lives in its own module — rather than inside a specific agent factory — so
|
||||
that retiring or moving any single agent never disturbs the others' access to
|
||||
this mapping.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
# Maps SearchSourceConnectorType enum values to the searchable document/connector types
|
||||
# used by pre-search middleware and web_search.
|
||||
# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
|
||||
# the web_search tool; all others are considered local/indexed data.
|
||||
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
|
||||
# Live search connectors (handled by web_search tool)
|
||||
"TAVILY_API": "TAVILY_API",
|
||||
"LINKUP_API": "LINKUP_API",
|
||||
"BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
|
||||
# Local/indexed connectors (handled by KB pre-search middleware)
|
||||
"SLACK_CONNECTOR": "SLACK_CONNECTOR",
|
||||
"TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
|
||||
"NOTION_CONNECTOR": "NOTION_CONNECTOR",
|
||||
"GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
|
||||
"LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
|
||||
"DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
|
||||
"JIRA_CONNECTOR": "JIRA_CONNECTOR",
|
||||
"CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
|
||||
"CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
|
||||
"GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
|
||||
"GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
|
||||
"GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
|
||||
"AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
|
||||
"LUMA_CONNECTOR": "LUMA_CONNECTOR",
|
||||
"ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
|
||||
"WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
|
||||
"BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
|
||||
"CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
|
||||
"OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
|
||||
"DROPBOX_CONNECTOR": "DROPBOX_FILE", # Connector type differs from document type
|
||||
"ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type
|
||||
# Composio connectors (unified to native document types).
|
||||
# Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
|
||||
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
|
||||
"COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
|
||||
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
|
||||
}
|
||||
|
||||
# Document types that don't come from SearchSourceConnector but should always be searchable
|
||||
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
|
||||
"EXTENSION", # Browser extension data
|
||||
"FILE", # Uploaded files
|
||||
"NOTE", # User notes
|
||||
"YOUTUBE_VIDEO", # YouTube videos
|
||||
]
|
||||
|
||||
|
||||
def map_connectors_to_searchable_types(
|
||||
connector_types: list[Any],
|
||||
) -> list[str]:
|
||||
"""
|
||||
Map SearchSourceConnectorType enums to searchable document/connector types.
|
||||
|
||||
This function:
|
||||
1. Converts connector type enums to their searchable counterparts
|
||||
2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
|
||||
3. Deduplicates while preserving order
|
||||
|
||||
Args:
|
||||
connector_types: List of SearchSourceConnectorType enum values
|
||||
|
||||
Returns:
|
||||
List of searchable connector/document type strings
|
||||
"""
|
||||
result_set: set[str] = set()
|
||||
result_list: list[str] = []
|
||||
|
||||
# Add always-available document types first
|
||||
for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
|
||||
if doc_type not in result_set:
|
||||
result_set.add(doc_type)
|
||||
result_list.append(doc_type)
|
||||
|
||||
# Map each connector type to its searchable equivalent
|
||||
for ct in connector_types:
|
||||
# Handle both enum and string types
|
||||
ct_str = ct.value if hasattr(ct, "value") else str(ct)
|
||||
searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
|
||||
if searchable and searchable not in result_set:
|
||||
result_set.add(searchable)
|
||||
result_list.append(searchable)
|
||||
|
||||
return result_list
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
Prompt composer for the SurfSense ``new_chat`` agent.
|
||||
|
||||
This module assembles the agent's system prompt from the markdown fragments
|
||||
under :mod:`app.agents.new_chat.prompts`. It replaces the monolithic
|
||||
under :mod:`app.agents.shared.prompts`. It replaces the monolithic
|
||||
``system_prompt.py`` with a clean, fragment-based composition:
|
||||
|
||||
::
|
||||
|
|
@ -119,7 +119,7 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
|
|||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
_PROMPTS_PACKAGE = "app.agents.new_chat.prompts"
|
||||
_PROMPTS_PACKAGE = "app.agents.shared.prompts"
|
||||
|
||||
|
||||
def _read_fragment(subpath: str) -> str:
|
||||
133
surfsense_backend/app/agents/shared/system_prompt.py
Normal file
133
surfsense_backend/app/agents/shared/system_prompt.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
"""
|
||||
Thin compatibility wrapper around :mod:`app.agents.shared.prompts.composer`.
|
||||
|
||||
The composer split the previous monolithic prompt string into a fragment
|
||||
tree under ``prompts/`` plus a model-family dispatch step (see the
|
||||
composer module docstring for credits). This module preserves the public
|
||||
function surface (``build_surfsense_system_prompt`` /
|
||||
``build_configurable_system_prompt`` /
|
||||
``get_default_system_instructions`` / ``SURFSENSE_SYSTEM_PROMPT``) so
|
||||
that existing call sites — `chat_deepagent.py`, anonymous chat routes,
|
||||
and the configurable-prompt admin path — keep working without churn.
|
||||
|
||||
For new call sites prefer importing ``compose_system_prompt`` directly
|
||||
from :mod:`app.agents.shared.prompts.composer`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from app.db import ChatVisibility
|
||||
|
||||
from .prompts.composer import (
|
||||
_read_fragment,
|
||||
compose_system_prompt,
|
||||
detect_provider_variant,
|
||||
)
|
||||
|
||||
# Optional routing fragments under ``prompts/routing/`` (see composer).
|
||||
_DEFAULT_CONNECTOR_ROUTING: tuple[str, ...] = ("linear", "slack")
|
||||
|
||||
# Public re-exports for backwards compatibility (some legacy code reads the
|
||||
# raw default-instructions text directly).
|
||||
SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE = (
|
||||
"<system_instruction>\nDefault SurfSense agent system instructions are now\n"
|
||||
"composed from prompts/base/*.md. See compose_system_prompt() for details.\n"
|
||||
"</system_instruction>"
|
||||
)
|
||||
|
||||
# Citation block re-exposed for legacy importers that referenced this constant
|
||||
# directly. The composer is the canonical source; this is a frozen snapshot
|
||||
# loaded at module-init time.
|
||||
SURFSENSE_CITATION_INSTRUCTIONS = _read_fragment("base/citations_on.md")
|
||||
SURFSENSE_NO_CITATION_INSTRUCTIONS = _read_fragment("base/citations_off.md")
|
||||
|
||||
|
||||
def build_surfsense_system_prompt(
|
||||
today: datetime | None = None,
|
||||
thread_visibility: ChatVisibility | None = None,
|
||||
enabled_tool_names: set[str] | None = None,
|
||||
disabled_tool_names: set[str] | None = None,
|
||||
mcp_connector_tools: dict[str, list[str]] | None = None,
|
||||
*,
|
||||
model_name: str | None = None,
|
||||
) -> str:
|
||||
"""Build the default SurfSense system prompt (citations on, defaults).
|
||||
|
||||
See :func:`app.agents.shared.prompts.composer.compose_system_prompt`
|
||||
for full parameter docs.
|
||||
"""
|
||||
return compose_system_prompt(
|
||||
today=today,
|
||||
thread_visibility=thread_visibility,
|
||||
enabled_tool_names=enabled_tool_names,
|
||||
disabled_tool_names=disabled_tool_names,
|
||||
mcp_connector_tools=mcp_connector_tools,
|
||||
citations_enabled=True,
|
||||
model_name=model_name,
|
||||
connector_routing=_DEFAULT_CONNECTOR_ROUTING,
|
||||
)
|
||||
|
||||
|
||||
def build_configurable_system_prompt(
|
||||
custom_system_instructions: str | None = None,
|
||||
use_default_system_instructions: bool = True,
|
||||
citations_enabled: bool = True,
|
||||
today: datetime | None = None,
|
||||
thread_visibility: ChatVisibility | None = None,
|
||||
enabled_tool_names: set[str] | None = None,
|
||||
disabled_tool_names: set[str] | None = None,
|
||||
mcp_connector_tools: dict[str, list[str]] | None = None,
|
||||
*,
|
||||
model_name: str | None = None,
|
||||
) -> str:
|
||||
"""Build a configurable SurfSense system prompt (NewLLMConfig path).
|
||||
|
||||
See :func:`app.agents.shared.prompts.composer.compose_system_prompt`
|
||||
for full parameter docs.
|
||||
"""
|
||||
return compose_system_prompt(
|
||||
today=today,
|
||||
thread_visibility=thread_visibility,
|
||||
enabled_tool_names=enabled_tool_names,
|
||||
disabled_tool_names=disabled_tool_names,
|
||||
mcp_connector_tools=mcp_connector_tools,
|
||||
custom_system_instructions=custom_system_instructions,
|
||||
use_default_system_instructions=use_default_system_instructions,
|
||||
citations_enabled=citations_enabled,
|
||||
model_name=model_name,
|
||||
connector_routing=_DEFAULT_CONNECTOR_ROUTING,
|
||||
)
|
||||
|
||||
|
||||
def get_default_system_instructions() -> str:
|
||||
"""Return the default ``<system_instruction>`` block (no tools / citations).
|
||||
|
||||
Useful for populating the UI when seeding ``NewLLMConfig.system_instructions``.
|
||||
The output reflects the current fragment tree, not a baked-in constant.
|
||||
"""
|
||||
resolved_today = datetime.now(UTC).date().isoformat()
|
||||
from .prompts.composer import _build_system_instructions # local import
|
||||
|
||||
return _build_system_instructions(
|
||||
visibility=ChatVisibility.PRIVATE,
|
||||
resolved_today=resolved_today,
|
||||
).strip()
|
||||
|
||||
|
||||
# Backwards compatibility — some modules import the constant directly.
|
||||
SURFSENSE_SYSTEM_PROMPT = build_surfsense_system_prompt()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SURFSENSE_CITATION_INSTRUCTIONS",
|
||||
"SURFSENSE_NO_CITATION_INSTRUCTIONS",
|
||||
"SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE",
|
||||
"SURFSENSE_SYSTEM_PROMPT",
|
||||
"build_configurable_system_prompt",
|
||||
"build_surfsense_system_prompt",
|
||||
"compose_system_prompt",
|
||||
"detect_provider_variant",
|
||||
"get_default_system_instructions",
|
||||
]
|
||||
|
|
@ -13,7 +13,7 @@ from fastapi import APIRouter, Depends, HTTPException
|
|||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.future import select
|
||||
|
||||
from app.agents.new_chat.system_prompt import get_default_system_instructions
|
||||
from app.agents.shared.system_prompt import get_default_system_instructions
|
||||
from app.config import config
|
||||
from app.db import (
|
||||
NewLLMConfig,
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from datetime import UTC, datetime
|
|||
|
||||
import pytest
|
||||
|
||||
from app.agents.new_chat.prompts.composer import (
|
||||
from app.agents.shared.prompts.composer import (
|
||||
ALL_TOOL_NAMES_ORDERED,
|
||||
compose_system_prompt,
|
||||
detect_provider_variant,
|
||||
|
|
@ -64,7 +64,7 @@ class TestProviderVariantDetection:
|
|||
``gpt-5`` reasoning regex first. Codex is the more specialised
|
||||
prompt and mirrors OpenCode's dispatch order.
|
||||
"""
|
||||
from app.agents.new_chat.prompts.composer import detect_provider_variant
|
||||
from app.agents.shared.prompts.composer import detect_provider_variant
|
||||
|
||||
assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
|
||||
assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ from dataclasses import dataclass
|
|||
|
||||
import pytest
|
||||
|
||||
from app.agents.new_chat.agent_cache import (
|
||||
from app.agents.shared.agent_cache import (
|
||||
flags_signature,
|
||||
reload_for_tests,
|
||||
stable_hash,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue