mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 16:56:22 +02:00
- Introduced a mechanism to identify degenerate queries that lack meaningful search signals, improving search accuracy. - Implemented a fallback method for browsing recent documents when queries are degenerate, ensuring relevant results are returned. - Added limits on the number of chunks fetched per document to optimize performance and prevent excessive data loading. - Updated the ConnectorService to allow for reusable query embeddings, enhancing efficiency in search operations. - Enhanced LLM router service to support context window fallbacks, improving robustness during context window limitations.
493 lines
19 KiB
Python
493 lines
19 KiB
Python
"""Tools registry for SurfSense deep agent.
|
|
|
|
This module provides a registry pattern for managing tools in the SurfSense agent.
|
|
It makes it easy for OSS contributors to add new tools by:
|
|
1. Creating a tool factory function in a new file in this directory
|
|
2. Registering the tool in the BUILTIN_TOOLS list below
|
|
|
|
Example of adding a new tool:
|
|
------------------------------
|
|
1. Create your tool file (e.g., `tools/my_tool.py`):
|
|
|
|
from langchain_core.tools import tool
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
def create_my_tool(search_space_id: int, db_session: AsyncSession):
|
|
@tool
|
|
async def my_tool(param: str) -> dict:
|
|
'''My tool description.'''
|
|
# Your implementation
|
|
return {"result": "success"}
|
|
return my_tool
|
|
|
|
2. Import and register in this file:
|
|
|
|
from .my_tool import create_my_tool
|
|
|
|
# Add to BUILTIN_TOOLS list:
|
|
ToolDefinition(
|
|
name="my_tool",
|
|
description="Description of what your tool does",
|
|
factory=lambda deps: create_my_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
),
|
|
requires=["search_space_id", "db_session"],
|
|
),
|
|
"""
|
|
|
|
import logging
|
|
from collections.abc import Callable
|
|
from dataclasses import dataclass, field
|
|
from typing import Any
|
|
|
|
from langchain_core.tools import BaseTool
|
|
|
|
from app.db import ChatVisibility
|
|
|
|
from .display_image import create_display_image_tool
|
|
from .generate_image import create_generate_image_tool
|
|
from .google_drive import (
|
|
create_create_google_drive_file_tool,
|
|
create_delete_google_drive_file_tool,
|
|
)
|
|
from .knowledge_base import create_search_knowledge_base_tool
|
|
from .linear import (
|
|
create_create_linear_issue_tool,
|
|
create_delete_linear_issue_tool,
|
|
create_update_linear_issue_tool,
|
|
)
|
|
from .link_preview import create_link_preview_tool
|
|
from .mcp_tool import load_mcp_tools
|
|
from .notion import (
|
|
create_create_notion_page_tool,
|
|
create_delete_notion_page_tool,
|
|
create_update_notion_page_tool,
|
|
)
|
|
from .podcast import create_generate_podcast_tool
|
|
from .report import create_generate_report_tool
|
|
from .scrape_webpage import create_scrape_webpage_tool
|
|
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
|
from .shared_memory import (
|
|
create_recall_shared_memory_tool,
|
|
create_save_shared_memory_tool,
|
|
)
|
|
from .user_memory import create_recall_memory_tool, create_save_memory_tool
|
|
|
|
# =============================================================================
|
|
# Tool Definition
|
|
# =============================================================================
|
|
|
|
|
|
@dataclass
|
|
class ToolDefinition:
|
|
"""Definition of a tool that can be added to the agent.
|
|
|
|
Attributes:
|
|
name: Unique identifier for the tool
|
|
description: Human-readable description of what the tool does
|
|
factory: Callable that creates the tool. Receives a dict of dependencies.
|
|
requires: List of dependency names this tool needs (e.g., "search_space_id", "db_session")
|
|
enabled_by_default: Whether the tool is enabled when no explicit config is provided
|
|
|
|
"""
|
|
|
|
name: str
|
|
description: str
|
|
factory: Callable[[dict[str, Any]], BaseTool]
|
|
requires: list[str] = field(default_factory=list)
|
|
enabled_by_default: bool = True
|
|
|
|
|
|
# =============================================================================
|
|
# Built-in Tools Registry
|
|
# =============================================================================
|
|
|
|
# Registry of all built-in tools
|
|
# Contributors: Add your new tools here!
|
|
BUILTIN_TOOLS: list[ToolDefinition] = [
|
|
# Core tool - searches the user's knowledge base
|
|
# Now supports dynamic connector/document type discovery
|
|
ToolDefinition(
|
|
name="search_knowledge_base",
|
|
description="Search the user's personal knowledge base for relevant information",
|
|
factory=lambda deps: create_search_knowledge_base_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
connector_service=deps["connector_service"],
|
|
# Optional: dynamically discovered connectors/document types
|
|
available_connectors=deps.get("available_connectors"),
|
|
available_document_types=deps.get("available_document_types"),
|
|
max_input_tokens=deps.get("max_input_tokens"),
|
|
),
|
|
requires=["search_space_id", "db_session", "connector_service"],
|
|
# Note: available_connectors and available_document_types are optional
|
|
),
|
|
# Podcast generation tool
|
|
ToolDefinition(
|
|
name="generate_podcast",
|
|
description="Generate an audio podcast from provided content",
|
|
factory=lambda deps: create_generate_podcast_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
thread_id=deps["thread_id"],
|
|
),
|
|
requires=["search_space_id", "db_session", "thread_id"],
|
|
),
|
|
# Report generation tool (inline, short-lived sessions for DB ops)
|
|
# Supports internal KB search via source_strategy so the agent doesn't
|
|
# need to call search_knowledge_base separately before generating.
|
|
ToolDefinition(
|
|
name="generate_report",
|
|
description="Generate a structured Markdown report from provided content",
|
|
factory=lambda deps: create_generate_report_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
thread_id=deps["thread_id"],
|
|
connector_service=deps.get("connector_service"),
|
|
available_connectors=deps.get("available_connectors"),
|
|
available_document_types=deps.get("available_document_types"),
|
|
),
|
|
requires=["search_space_id", "thread_id"],
|
|
# connector_service, available_connectors, and available_document_types
|
|
# are optional — when missing, source_strategy="kb_search" degrades
|
|
# gracefully to "provided"
|
|
),
|
|
# Link preview tool - fetches Open Graph metadata for URLs
|
|
ToolDefinition(
|
|
name="link_preview",
|
|
description="Fetch metadata for a URL to display a rich preview card",
|
|
factory=lambda deps: create_link_preview_tool(),
|
|
requires=[],
|
|
),
|
|
# Display image tool - shows images in the chat
|
|
ToolDefinition(
|
|
name="display_image",
|
|
description="Display an image in the chat with metadata",
|
|
factory=lambda deps: create_display_image_tool(),
|
|
requires=[],
|
|
),
|
|
# Generate image tool - creates images using AI models (DALL-E, GPT Image, etc.)
|
|
ToolDefinition(
|
|
name="generate_image",
|
|
description="Generate images from text descriptions using AI image models",
|
|
factory=lambda deps: create_generate_image_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
),
|
|
requires=["search_space_id", "db_session"],
|
|
),
|
|
# Web scraping tool - extracts content from webpages
|
|
ToolDefinition(
|
|
name="scrape_webpage",
|
|
description="Scrape and extract the main content from a webpage",
|
|
factory=lambda deps: create_scrape_webpage_tool(
|
|
firecrawl_api_key=deps.get("firecrawl_api_key"),
|
|
),
|
|
requires=[], # firecrawl_api_key is optional
|
|
),
|
|
# Note: write_todos is now provided by TodoListMiddleware from deepagents
|
|
# Surfsense documentation search tool
|
|
ToolDefinition(
|
|
name="search_surfsense_docs",
|
|
description="Search Surfsense documentation for help with using the application",
|
|
factory=lambda deps: create_search_surfsense_docs_tool(
|
|
db_session=deps["db_session"],
|
|
),
|
|
requires=["db_session"],
|
|
),
|
|
# =========================================================================
|
|
# USER MEMORY TOOLS - private or team store by thread_visibility
|
|
# =========================================================================
|
|
ToolDefinition(
|
|
name="save_memory",
|
|
description="Save facts, preferences, or context for personalized or team responses",
|
|
factory=lambda deps: (
|
|
create_save_shared_memory_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
created_by_id=deps["user_id"],
|
|
db_session=deps["db_session"],
|
|
)
|
|
if deps["thread_visibility"] == ChatVisibility.SEARCH_SPACE
|
|
else create_save_memory_tool(
|
|
user_id=deps["user_id"],
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
)
|
|
),
|
|
requires=["user_id", "search_space_id", "db_session", "thread_visibility"],
|
|
),
|
|
ToolDefinition(
|
|
name="recall_memory",
|
|
description="Recall relevant memories (personal or team) for context",
|
|
factory=lambda deps: (
|
|
create_recall_shared_memory_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
)
|
|
if deps["thread_visibility"] == ChatVisibility.SEARCH_SPACE
|
|
else create_recall_memory_tool(
|
|
user_id=deps["user_id"],
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
)
|
|
),
|
|
requires=["user_id", "search_space_id", "db_session", "thread_visibility"],
|
|
),
|
|
# =========================================================================
|
|
# LINEAR TOOLS - create, update, delete issues
|
|
# =========================================================================
|
|
ToolDefinition(
|
|
name="create_linear_issue",
|
|
description="Create a new issue in the user's Linear workspace",
|
|
factory=lambda deps: create_create_linear_issue_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
),
|
|
ToolDefinition(
|
|
name="update_linear_issue",
|
|
description="Update an existing indexed Linear issue",
|
|
factory=lambda deps: create_update_linear_issue_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
),
|
|
ToolDefinition(
|
|
name="delete_linear_issue",
|
|
description="Archive (delete) an existing indexed Linear issue",
|
|
factory=lambda deps: create_delete_linear_issue_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
),
|
|
# =========================================================================
|
|
# NOTION TOOLS - create, update, delete pages
|
|
# =========================================================================
|
|
ToolDefinition(
|
|
name="create_notion_page",
|
|
description="Create a new page in the user's Notion workspace",
|
|
factory=lambda deps: create_create_notion_page_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
),
|
|
ToolDefinition(
|
|
name="update_notion_page",
|
|
description="Append new content to an existing Notion page",
|
|
factory=lambda deps: create_update_notion_page_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
),
|
|
ToolDefinition(
|
|
name="delete_notion_page",
|
|
description="Delete an existing Notion page",
|
|
factory=lambda deps: create_delete_notion_page_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
),
|
|
# =========================================================================
|
|
# GOOGLE DRIVE TOOLS - create files, delete files
|
|
# =========================================================================
|
|
ToolDefinition(
|
|
name="create_google_drive_file",
|
|
description="Create a new Google Doc or Google Sheet in Google Drive",
|
|
factory=lambda deps: create_create_google_drive_file_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
),
|
|
ToolDefinition(
|
|
name="delete_google_drive_file",
|
|
description="Move an indexed Google Drive file to trash",
|
|
factory=lambda deps: create_delete_google_drive_file_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
),
|
|
]
|
|
|
|
|
|
# =============================================================================
|
|
# Registry Functions
|
|
# =============================================================================
|
|
|
|
|
|
def get_tool_by_name(name: str) -> ToolDefinition | None:
|
|
"""Get a tool definition by its name."""
|
|
for tool_def in BUILTIN_TOOLS:
|
|
if tool_def.name == name:
|
|
return tool_def
|
|
return None
|
|
|
|
|
|
def get_all_tool_names() -> list[str]:
|
|
"""Get names of all registered tools."""
|
|
return [tool_def.name for tool_def in BUILTIN_TOOLS]
|
|
|
|
|
|
def get_default_enabled_tools() -> list[str]:
|
|
"""Get names of tools that are enabled by default."""
|
|
return [tool_def.name for tool_def in BUILTIN_TOOLS if tool_def.enabled_by_default]
|
|
|
|
|
|
def build_tools(
|
|
dependencies: dict[str, Any],
|
|
enabled_tools: list[str] | None = None,
|
|
disabled_tools: list[str] | None = None,
|
|
additional_tools: list[BaseTool] | None = None,
|
|
) -> list[BaseTool]:
|
|
"""Build the list of tools for the agent.
|
|
|
|
Args:
|
|
dependencies: Dict containing all possible dependencies:
|
|
- search_space_id: The search space ID
|
|
- db_session: Database session
|
|
- connector_service: Connector service instance
|
|
- firecrawl_api_key: Optional Firecrawl API key
|
|
enabled_tools: Explicit list of tool names to enable. If None, uses defaults.
|
|
disabled_tools: List of tool names to disable (applied after enabled_tools).
|
|
additional_tools: Extra tools to add (e.g., custom tools not in registry).
|
|
|
|
Returns:
|
|
List of configured tool instances ready for the agent.
|
|
|
|
Example:
|
|
# Use all default tools
|
|
tools = build_tools(deps)
|
|
|
|
# Use only specific tools
|
|
tools = build_tools(deps, enabled_tools=["search_knowledge_base", "link_preview"])
|
|
|
|
# Use defaults but disable podcast
|
|
tools = build_tools(deps, disabled_tools=["generate_podcast"])
|
|
|
|
# Add custom tools
|
|
tools = build_tools(deps, additional_tools=[my_custom_tool])
|
|
|
|
"""
|
|
# Determine which tools to enable
|
|
if enabled_tools is not None:
|
|
tool_names_to_use = set(enabled_tools)
|
|
else:
|
|
tool_names_to_use = set(get_default_enabled_tools())
|
|
|
|
# Apply disabled list
|
|
if disabled_tools:
|
|
tool_names_to_use -= set(disabled_tools)
|
|
|
|
# Build the tools
|
|
tools: list[BaseTool] = []
|
|
for tool_def in BUILTIN_TOOLS:
|
|
if tool_def.name not in tool_names_to_use:
|
|
continue
|
|
|
|
# Check that all required dependencies are provided
|
|
missing_deps = [dep for dep in tool_def.requires if dep not in dependencies]
|
|
if missing_deps:
|
|
msg = f"Tool '{tool_def.name}' requires dependencies: {missing_deps}"
|
|
raise ValueError(
|
|
msg,
|
|
)
|
|
|
|
# Create the tool
|
|
tool = tool_def.factory(dependencies)
|
|
tools.append(tool)
|
|
|
|
# Add any additional custom tools
|
|
if additional_tools:
|
|
tools.extend(additional_tools)
|
|
|
|
return tools
|
|
|
|
|
|
async def build_tools_async(
|
|
dependencies: dict[str, Any],
|
|
enabled_tools: list[str] | None = None,
|
|
disabled_tools: list[str] | None = None,
|
|
additional_tools: list[BaseTool] | None = None,
|
|
include_mcp_tools: bool = True,
|
|
) -> list[BaseTool]:
|
|
"""Async version of build_tools that also loads MCP tools from database.
|
|
|
|
Design Note:
|
|
This function exists because MCP tools require database queries to load user configs,
|
|
while built-in tools are created synchronously from static code.
|
|
|
|
Alternative: We could make build_tools() itself async and always query the database,
|
|
but that would force async everywhere even when only using built-in tools. The current
|
|
design keeps the simple case (static tools only) synchronous while supporting dynamic
|
|
database-loaded tools through this async wrapper.
|
|
|
|
Args:
|
|
dependencies: Dict containing all possible dependencies
|
|
enabled_tools: Explicit list of tool names to enable. If None, uses defaults.
|
|
disabled_tools: List of tool names to disable (applied after enabled_tools).
|
|
additional_tools: Extra tools to add (e.g., custom tools not in registry).
|
|
include_mcp_tools: Whether to load user's MCP tools from database.
|
|
|
|
Returns:
|
|
List of configured tool instances ready for the agent, including MCP tools.
|
|
|
|
"""
|
|
import time
|
|
|
|
_perf_log = logging.getLogger("surfsense.perf")
|
|
_perf_log.setLevel(logging.DEBUG)
|
|
|
|
_t0 = time.perf_counter()
|
|
tools = build_tools(dependencies, enabled_tools, disabled_tools, additional_tools)
|
|
_perf_log.info(
|
|
"[build_tools_async] Built-in tools in %.3fs (%d tools)",
|
|
time.perf_counter() - _t0,
|
|
len(tools),
|
|
)
|
|
|
|
# Load MCP tools if requested and dependencies are available
|
|
if (
|
|
include_mcp_tools
|
|
and "db_session" in dependencies
|
|
and "search_space_id" in dependencies
|
|
):
|
|
try:
|
|
_t0 = time.perf_counter()
|
|
mcp_tools = await load_mcp_tools(
|
|
dependencies["db_session"],
|
|
dependencies["search_space_id"],
|
|
)
|
|
_perf_log.info(
|
|
"[build_tools_async] MCP tools loaded in %.3fs (%d tools)",
|
|
time.perf_counter() - _t0,
|
|
len(mcp_tools),
|
|
)
|
|
tools.extend(mcp_tools)
|
|
logging.info(
|
|
f"Registered {len(mcp_tools)} MCP tools: {[t.name for t in mcp_tools]}",
|
|
)
|
|
except Exception as e:
|
|
# Log error but don't fail - just continue without MCP tools
|
|
logging.exception(f"Failed to load MCP tools: {e!s}")
|
|
|
|
# Log all tools being returned to agent
|
|
logging.info(
|
|
f"Total tools for agent: {len(tools)} - {[t.name for t in tools]}",
|
|
)
|
|
|
|
return tools
|