feat: enhance performance logging and caching in various components

- Introduced slow callback logging in FastAPI to identify blocking calls. - Added performance logging for agent creation and tool loading processes. - Implemented caching for MCP tools to reduce redundant server calls. - Enhanced sandbox management with in-process caching for improved efficiency. - Refactored several functions for better readability and performance tracking. - Updated tests to ensure proper functionality of new features and optimizations.
2026-05-05 13:52:40 +02:00 · 2026-02-26 13:00:31 -08:00 · 2026-02-26 13:00:31 -08:00 · aabc24f82c
commit aabc24f82c
parent 2e99f1e853
22 changed files with 637 additions and 200 deletions
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@ -6,6 +6,9 @@ with configurable tools via the tools registry and configurable prompts
 via NewLLMConfig.
 """

+import asyncio
+import logging
+import time
 from collections.abc import Sequence
 from typing import Any

@ -26,6 +29,8 @@ from app.agents.new_chat.tools.registry import build_tools_async
 from app.db import ChatVisibility
 from app.services.connector_service import ConnectorService

+_perf_log = logging.getLogger("surfsense.perf")
+
 # =============================================================================
 # Connector Type Mapping
 # =============================================================================
@ -210,29 +215,29 @@ async def create_surfsense_deep_agent(
            additional_tools=[my_custom_tool]
        )
    """
+    _t_agent_total = time.perf_counter()
+
    # Discover available connectors and document types for this search space
-    # This enables dynamic tool docstrings that inform the LLM about what's actually available
    available_connectors: list[str] | None = None
    available_document_types: list[str] | None = None

+    _t0 = time.perf_counter()
    try:
-        # Get enabled search source connectors for this search space
        connector_types = await connector_service.get_available_connectors(
            search_space_id
        )
        if connector_types:
-            # Convert enum values to strings and also include mapped document types
            available_connectors = _map_connectors_to_searchable_types(connector_types)

-        # Get document types that have at least one document indexed
        available_document_types = await connector_service.get_available_document_types(
            search_space_id
        )
    except Exception as e:
-        # Log but don't fail - fall back to all connectors if discovery fails
-        import logging
-
        logging.warning(f"Failed to discover available connectors/document types: {e}")
+    _perf_log.info(
+        "[create_agent] Connector/doc-type discovery in %.3fs",
+        time.perf_counter() - _t0,
+    )

    # Build dependencies dict for the tools registry
    visibility = thread_visibility or ChatVisibility.PRIVATE
@ -274,14 +279,21 @@ async def create_surfsense_deep_agent(
        modified_disabled_tools.extend(linear_tools)

    # Build tools using the async registry (includes MCP tools)
+    _t0 = time.perf_counter()
    tools = await build_tools_async(
        dependencies=dependencies,
        enabled_tools=enabled_tools,
        disabled_tools=modified_disabled_tools,
        additional_tools=list(additional_tools) if additional_tools else None,
    )
+    _perf_log.info(
+        "[create_agent] build_tools_async in %.3fs (%d tools)",
+        time.perf_counter() - _t0,
+        len(tools),
+    )

    # Build system prompt based on agent_config
+    _t0 = time.perf_counter()
    _sandbox_enabled = sandbox_backend is not None
    if agent_config is not None:
        system_prompt = build_configurable_system_prompt(
@ -296,15 +308,18 @@ async def create_surfsense_deep_agent(
            thread_visibility=thread_visibility,
            sandbox_enabled=_sandbox_enabled,
        )
+    _perf_log.info(
+        "[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
+    )

    # Build optional kwargs for the deep agent
    deep_agent_kwargs: dict[str, Any] = {}
    if sandbox_backend is not None:
        deep_agent_kwargs["backend"] = sandbox_backend

-    # Create the deep agent with system prompt and checkpointer
-    # Note: TodoListMiddleware (write_todos) is included by default in create_deep_agent
-    agent = create_deep_agent(
+    _t0 = time.perf_counter()
+    agent = await asyncio.to_thread(
+        create_deep_agent,
        model=llm,
        tools=tools,
        system_prompt=system_prompt,
@ -312,5 +327,13 @@ async def create_surfsense_deep_agent(
        checkpointer=checkpointer,
        **deep_agent_kwargs,
    )
+    _perf_log.info(
+        "[create_agent] Graph compiled (create_deep_agent) in %.3fs",
+        time.perf_counter() - _t0,
+    )

+    _perf_log.info(
+        "[create_agent] Total agent creation in %.3fs",
+        time.perf_counter() - _t_agent_total,
+    )
    return agent
--- a/surfsense_backend/app/agents/new_chat/sandbox.py
+++ b/surfsense_backend/app/agents/new_chat/sandbox.py
@ -12,6 +12,7 @@ the sandbox is deleted so they remain downloadable after cleanup.
 from __future__ import annotations

 import asyncio
+import contextlib
 import logging
 import os
 import shutil
@ -56,6 +57,7 @@ class _TimeoutAwareSandbox(DaytonaSandbox):


 _daytona_client: Daytona | None = None
+_sandbox_cache: dict[str, _TimeoutAwareSandbox] = {}
 THREAD_LABEL_KEY = "surfsense_thread"


@ -126,8 +128,8 @@ def _find_or_create(thread_id: str) -> _TimeoutAwareSandbox:
 async def get_or_create_sandbox(thread_id: int | str) -> _TimeoutAwareSandbox:
    """Get or create a sandbox for a conversation thread.

-    Uses the thread_id as a label so the same sandbox persists
-    across multiple messages within the same conversation.
+    Uses an in-process cache keyed by thread_id so subsequent messages
+    in the same conversation reuse the sandbox object without an API call.

    Args:
        thread_id: The conversation thread identifier.
@ -135,11 +137,19 @@ async def get_or_create_sandbox(thread_id: int | str) -> _TimeoutAwareSandbox:
    Returns:
        DaytonaSandbox connected to the sandbox.
    """
-    return await asyncio.to_thread(_find_or_create, str(thread_id))
+    key = str(thread_id)
+    cached = _sandbox_cache.get(key)
+    if cached is not None:
+        logger.info("Reusing cached sandbox for thread %s", key)
+        return cached
+    sandbox = await asyncio.to_thread(_find_or_create, key)
+    _sandbox_cache[key] = sandbox
+    return sandbox


 async def delete_sandbox(thread_id: int | str) -> None:
    """Delete the sandbox for a conversation thread."""
+    _sandbox_cache.pop(str(thread_id), None)

    def _delete() -> None:
        client = _get_client()
@ -147,7 +157,9 @@ async def delete_sandbox(thread_id: int | str) -> None:
        try:
            sandbox = client.find_one(labels=labels)
        except DaytonaError:
-            logger.debug("No sandbox to delete for thread %s (already removed)", thread_id)
+            logger.debug(
+                "No sandbox to delete for thread %s (already removed)", thread_id
+            )
            return
        try:
            client.delete(sandbox)
@ -166,6 +178,7 @@ async def delete_sandbox(thread_id: int | str) -> None:
 # Local file persistence
 # ---------------------------------------------------------------------------

+
 def _get_sandbox_files_dir() -> Path:
    return Path(os.environ.get("SANDBOX_FILES_DIR", "sandbox_files"))

@ -206,6 +219,7 @@ async def persist_and_delete_sandbox(
    Per-file errors are logged but do **not** prevent the sandbox from
    being deleted — freeing Daytona storage is the priority.
    """
+    _sandbox_cache.pop(str(thread_id), None)

    def _persist_and_delete() -> None:
        client = _get_client()
@ -229,10 +243,8 @@ async def persist_and_delete_sandbox(
                    sandbox.id,
                    exc_info=True,
                )
-                try:
+                with contextlib.suppress(Exception):
                    client.delete(sandbox)
-                except Exception:
-                    pass
                return

        for path in sandbox_file_paths:
--- a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py
+++ b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py
@ -11,6 +11,7 @@ This implements real MCP protocol support similar to Cursor's implementation.
 """

 import logging
+import time
 from typing import Any

 from langchain_core.tools import StructuredTool
@ -25,6 +26,9 @@ from app.db import SearchSourceConnector, SearchSourceConnectorType

 logger = logging.getLogger(__name__)

+_MCP_CACHE_TTL_SECONDS = 300  # 5 minutes
+_mcp_tools_cache: dict[int, tuple[float, list[StructuredTool]]] = {}
+

 def _create_dynamic_input_model_from_schema(
    tool_name: str,
@ -355,6 +359,19 @@ async def _load_http_mcp_tools(
    return tools


+def invalidate_mcp_tools_cache(search_space_id: int | None = None) -> None:
+    """Invalidate cached MCP tools.
+
+    Args:
+        search_space_id: If provided, only invalidate for this search space.
+                        If None, invalidate all cached MCP tools.
+    """
+    if search_space_id is not None:
+        _mcp_tools_cache.pop(search_space_id, None)
+    else:
+        _mcp_tools_cache.clear()
+
+
 async def load_mcp_tools(
    session: AsyncSession,
    search_space_id: int,
@ -364,6 +381,9 @@ async def load_mcp_tools(
    This discovers tools dynamically from MCP servers using the protocol.
    Supports both stdio (local process) and HTTP (remote server) transports.

+    Results are cached per search space for up to 5 minutes to avoid
+    re-spawning MCP server processes on every chat message.
+
    Args:
        session: Database session
        search_space_id: User's search space ID
@ -372,8 +392,20 @@ async def load_mcp_tools(
        List of LangChain StructuredTool instances

    """
+    now = time.monotonic()
+    cached = _mcp_tools_cache.get(search_space_id)
+    if cached is not None:
+        cached_at, cached_tools = cached
+        if now - cached_at < _MCP_CACHE_TTL_SECONDS:
+            logger.info(
+                "Using cached MCP tools for search space %s (%d tools, age=%.0fs)",
+                search_space_id,
+                len(cached_tools),
+                now - cached_at,
+            )
+            return list(cached_tools)
+
    try:
-        # Fetch all MCP connectors for this search space
        result = await session.execute(
            select(SearchSourceConnector).filter(
                SearchSourceConnector.connector_type
@ -385,27 +417,22 @@ async def load_mcp_tools(
        tools: list[StructuredTool] = []
        for connector in result.scalars():
            try:
-                # Early validation: Extract and validate connector config
                config = connector.config or {}
                server_config = config.get("server_config", {})

-                # Validate server_config exists and is a dict
                if not server_config or not isinstance(server_config, dict):
                    logger.warning(
                        f"MCP connector {connector.id} (name: '{connector.name}') has invalid or missing server_config, skipping"
                    )
                    continue

-                # Determine transport type
                transport = server_config.get("transport", "stdio")

                if transport in ("streamable-http", "http", "sse"):
-                    # HTTP-based MCP server
                    connector_tools = await _load_http_mcp_tools(
                        connector.id, connector.name, server_config
                    )
                else:
-                    # stdio-based MCP server (default)
                    connector_tools = await _load_stdio_mcp_tools(
                        connector.id, connector.name, server_config
                    )
@ -417,6 +444,7 @@ async def load_mcp_tools(
                    f"Failed to load tools from MCP connector {connector.id}: {e!s}"
                )

+        _mcp_tools_cache[search_space_id] = (now, tools)
        logger.info(f"Loaded {len(tools)} MCP tools for search space {search_space_id}")
        return tools

--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@ -444,8 +444,18 @@ async def build_tools_async(
        List of configured tool instances ready for the agent, including MCP tools.

    """
-    # Build standard tools
+    import time
+
+    _perf_log = logging.getLogger("surfsense.perf")
+    _perf_log.setLevel(logging.DEBUG)
+
+    _t0 = time.perf_counter()
    tools = build_tools(dependencies, enabled_tools, disabled_tools, additional_tools)
+    _perf_log.info(
+        "[build_tools_async] Built-in tools in %.3fs (%d tools)",
+        time.perf_counter() - _t0,
+        len(tools),
+    )

    # Load MCP tools if requested and dependencies are available
    if (
@ -454,10 +464,16 @@ async def build_tools_async(
        and "search_space_id" in dependencies
    ):
        try:
+            _t0 = time.perf_counter()
            mcp_tools = await load_mcp_tools(
                dependencies["db_session"],
                dependencies["search_space_id"],
            )
+            _perf_log.info(
+                "[build_tools_async] MCP tools loaded in %.3fs (%d tools)",
+                time.perf_counter() - _t0,
+                len(mcp_tools),
+            )
            tools.extend(mcp_tools)
            logging.info(
                f"Registered {len(mcp_tools)} MCP tools: {[t.name for t in mcp_tools]}",