feat: enhance performance logging and caching in various components

- Introduced slow callback logging in FastAPI to identify blocking calls.
- Added performance logging for agent creation and tool loading processes.
- Implemented caching for MCP tools to reduce redundant server calls.
- Enhanced sandbox management with in-process caching for improved efficiency.
- Refactored several functions for better readability and performance tracking.
- Updated tests to ensure proper functionality of new features and optimizations.
This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-02-26 13:00:31 -08:00
parent 2e99f1e853
commit aabc24f82c
22 changed files with 637 additions and 200 deletions

View file

@ -6,6 +6,9 @@ with configurable tools via the tools registry and configurable prompts
via NewLLMConfig.
"""
import asyncio
import logging
import time
from collections.abc import Sequence
from typing import Any
@ -26,6 +29,8 @@ from app.agents.new_chat.tools.registry import build_tools_async
from app.db import ChatVisibility
from app.services.connector_service import ConnectorService
_perf_log = logging.getLogger("surfsense.perf")
# =============================================================================
# Connector Type Mapping
# =============================================================================
@ -210,29 +215,29 @@ async def create_surfsense_deep_agent(
additional_tools=[my_custom_tool]
)
"""
_t_agent_total = time.perf_counter()
# Discover available connectors and document types for this search space
# This enables dynamic tool docstrings that inform the LLM about what's actually available
available_connectors: list[str] | None = None
available_document_types: list[str] | None = None
_t0 = time.perf_counter()
try:
# Get enabled search source connectors for this search space
connector_types = await connector_service.get_available_connectors(
search_space_id
)
if connector_types:
# Convert enum values to strings and also include mapped document types
available_connectors = _map_connectors_to_searchable_types(connector_types)
# Get document types that have at least one document indexed
available_document_types = await connector_service.get_available_document_types(
search_space_id
)
except Exception as e:
# Log but don't fail - fall back to all connectors if discovery fails
import logging
logging.warning(f"Failed to discover available connectors/document types: {e}")
_perf_log.info(
"[create_agent] Connector/doc-type discovery in %.3fs",
time.perf_counter() - _t0,
)
# Build dependencies dict for the tools registry
visibility = thread_visibility or ChatVisibility.PRIVATE
@ -274,14 +279,21 @@ async def create_surfsense_deep_agent(
modified_disabled_tools.extend(linear_tools)
# Build tools using the async registry (includes MCP tools)
_t0 = time.perf_counter()
tools = await build_tools_async(
dependencies=dependencies,
enabled_tools=enabled_tools,
disabled_tools=modified_disabled_tools,
additional_tools=list(additional_tools) if additional_tools else None,
)
_perf_log.info(
"[create_agent] build_tools_async in %.3fs (%d tools)",
time.perf_counter() - _t0,
len(tools),
)
# Build system prompt based on agent_config
_t0 = time.perf_counter()
_sandbox_enabled = sandbox_backend is not None
if agent_config is not None:
system_prompt = build_configurable_system_prompt(
@ -296,15 +308,18 @@ async def create_surfsense_deep_agent(
thread_visibility=thread_visibility,
sandbox_enabled=_sandbox_enabled,
)
_perf_log.info(
"[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
)
# Build optional kwargs for the deep agent
deep_agent_kwargs: dict[str, Any] = {}
if sandbox_backend is not None:
deep_agent_kwargs["backend"] = sandbox_backend
# Create the deep agent with system prompt and checkpointer
# Note: TodoListMiddleware (write_todos) is included by default in create_deep_agent
agent = create_deep_agent(
_t0 = time.perf_counter()
agent = await asyncio.to_thread(
create_deep_agent,
model=llm,
tools=tools,
system_prompt=system_prompt,
@ -312,5 +327,13 @@ async def create_surfsense_deep_agent(
checkpointer=checkpointer,
**deep_agent_kwargs,
)
_perf_log.info(
"[create_agent] Graph compiled (create_deep_agent) in %.3fs",
time.perf_counter() - _t0,
)
_perf_log.info(
"[create_agent] Total agent creation in %.3fs",
time.perf_counter() - _t_agent_total,
)
return agent

View file

@ -12,6 +12,7 @@ the sandbox is deleted so they remain downloadable after cleanup.
from __future__ import annotations
import asyncio
import contextlib
import logging
import os
import shutil
@ -56,6 +57,7 @@ class _TimeoutAwareSandbox(DaytonaSandbox):
_daytona_client: Daytona | None = None
_sandbox_cache: dict[str, _TimeoutAwareSandbox] = {}
THREAD_LABEL_KEY = "surfsense_thread"
@ -126,8 +128,8 @@ def _find_or_create(thread_id: str) -> _TimeoutAwareSandbox:
async def get_or_create_sandbox(thread_id: int | str) -> _TimeoutAwareSandbox:
"""Get or create a sandbox for a conversation thread.
Uses the thread_id as a label so the same sandbox persists
across multiple messages within the same conversation.
Uses an in-process cache keyed by thread_id so subsequent messages
in the same conversation reuse the sandbox object without an API call.
Args:
thread_id: The conversation thread identifier.
@ -135,11 +137,19 @@ async def get_or_create_sandbox(thread_id: int | str) -> _TimeoutAwareSandbox:
Returns:
DaytonaSandbox connected to the sandbox.
"""
return await asyncio.to_thread(_find_or_create, str(thread_id))
key = str(thread_id)
cached = _sandbox_cache.get(key)
if cached is not None:
logger.info("Reusing cached sandbox for thread %s", key)
return cached
sandbox = await asyncio.to_thread(_find_or_create, key)
_sandbox_cache[key] = sandbox
return sandbox
async def delete_sandbox(thread_id: int | str) -> None:
"""Delete the sandbox for a conversation thread."""
_sandbox_cache.pop(str(thread_id), None)
def _delete() -> None:
client = _get_client()
@ -147,7 +157,9 @@ async def delete_sandbox(thread_id: int | str) -> None:
try:
sandbox = client.find_one(labels=labels)
except DaytonaError:
logger.debug("No sandbox to delete for thread %s (already removed)", thread_id)
logger.debug(
"No sandbox to delete for thread %s (already removed)", thread_id
)
return
try:
client.delete(sandbox)
@ -166,6 +178,7 @@ async def delete_sandbox(thread_id: int | str) -> None:
# Local file persistence
# ---------------------------------------------------------------------------
def _get_sandbox_files_dir() -> Path:
return Path(os.environ.get("SANDBOX_FILES_DIR", "sandbox_files"))
@ -206,6 +219,7 @@ async def persist_and_delete_sandbox(
Per-file errors are logged but do **not** prevent the sandbox from
being deleted freeing Daytona storage is the priority.
"""
_sandbox_cache.pop(str(thread_id), None)
def _persist_and_delete() -> None:
client = _get_client()
@ -229,10 +243,8 @@ async def persist_and_delete_sandbox(
sandbox.id,
exc_info=True,
)
try:
with contextlib.suppress(Exception):
client.delete(sandbox)
except Exception:
pass
return
for path in sandbox_file_paths:

View file

@ -11,6 +11,7 @@ This implements real MCP protocol support similar to Cursor's implementation.
"""
import logging
import time
from typing import Any
from langchain_core.tools import StructuredTool
@ -25,6 +26,9 @@ from app.db import SearchSourceConnector, SearchSourceConnectorType
logger = logging.getLogger(__name__)
_MCP_CACHE_TTL_SECONDS = 300 # 5 minutes
_mcp_tools_cache: dict[int, tuple[float, list[StructuredTool]]] = {}
def _create_dynamic_input_model_from_schema(
tool_name: str,
@ -355,6 +359,19 @@ async def _load_http_mcp_tools(
return tools
def invalidate_mcp_tools_cache(search_space_id: int | None = None) -> None:
"""Invalidate cached MCP tools.
Args:
search_space_id: If provided, only invalidate for this search space.
If None, invalidate all cached MCP tools.
"""
if search_space_id is not None:
_mcp_tools_cache.pop(search_space_id, None)
else:
_mcp_tools_cache.clear()
async def load_mcp_tools(
session: AsyncSession,
search_space_id: int,
@ -364,6 +381,9 @@ async def load_mcp_tools(
This discovers tools dynamically from MCP servers using the protocol.
Supports both stdio (local process) and HTTP (remote server) transports.
Results are cached per search space for up to 5 minutes to avoid
re-spawning MCP server processes on every chat message.
Args:
session: Database session
search_space_id: User's search space ID
@ -372,8 +392,20 @@ async def load_mcp_tools(
List of LangChain StructuredTool instances
"""
now = time.monotonic()
cached = _mcp_tools_cache.get(search_space_id)
if cached is not None:
cached_at, cached_tools = cached
if now - cached_at < _MCP_CACHE_TTL_SECONDS:
logger.info(
"Using cached MCP tools for search space %s (%d tools, age=%.0fs)",
search_space_id,
len(cached_tools),
now - cached_at,
)
return list(cached_tools)
try:
# Fetch all MCP connectors for this search space
result = await session.execute(
select(SearchSourceConnector).filter(
SearchSourceConnector.connector_type
@ -385,27 +417,22 @@ async def load_mcp_tools(
tools: list[StructuredTool] = []
for connector in result.scalars():
try:
# Early validation: Extract and validate connector config
config = connector.config or {}
server_config = config.get("server_config", {})
# Validate server_config exists and is a dict
if not server_config or not isinstance(server_config, dict):
logger.warning(
f"MCP connector {connector.id} (name: '{connector.name}') has invalid or missing server_config, skipping"
)
continue
# Determine transport type
transport = server_config.get("transport", "stdio")
if transport in ("streamable-http", "http", "sse"):
# HTTP-based MCP server
connector_tools = await _load_http_mcp_tools(
connector.id, connector.name, server_config
)
else:
# stdio-based MCP server (default)
connector_tools = await _load_stdio_mcp_tools(
connector.id, connector.name, server_config
)
@ -417,6 +444,7 @@ async def load_mcp_tools(
f"Failed to load tools from MCP connector {connector.id}: {e!s}"
)
_mcp_tools_cache[search_space_id] = (now, tools)
logger.info(f"Loaded {len(tools)} MCP tools for search space {search_space_id}")
return tools

View file

@ -444,8 +444,18 @@ async def build_tools_async(
List of configured tool instances ready for the agent, including MCP tools.
"""
# Build standard tools
import time
_perf_log = logging.getLogger("surfsense.perf")
_perf_log.setLevel(logging.DEBUG)
_t0 = time.perf_counter()
tools = build_tools(dependencies, enabled_tools, disabled_tools, additional_tools)
_perf_log.info(
"[build_tools_async] Built-in tools in %.3fs (%d tools)",
time.perf_counter() - _t0,
len(tools),
)
# Load MCP tools if requested and dependencies are available
if (
@ -454,10 +464,16 @@ async def build_tools_async(
and "search_space_id" in dependencies
):
try:
_t0 = time.perf_counter()
mcp_tools = await load_mcp_tools(
dependencies["db_session"],
dependencies["search_space_id"],
)
_perf_log.info(
"[build_tools_async] MCP tools loaded in %.3fs (%d tools)",
time.perf_counter() - _t0,
len(mcp_tools),
)
tools.extend(mcp_tools)
logging.info(
f"Registered {len(mcp_tools)} MCP tools: {[t.name for t in mcp_tools]}",