mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
532 lines
20 KiB
Python
532 lines
20 KiB
Python
"""Tools registry for SurfSense deep agent.
|
|
|
|
This module provides a registry pattern for managing tools in the SurfSense agent.
|
|
It makes it easy for OSS contributors to add new tools by:
|
|
1. Creating a tool factory function in a new file in this directory
|
|
2. Registering the tool in the BUILTIN_TOOLS list below
|
|
|
|
Example of adding a new tool:
|
|
------------------------------
|
|
1. Create your tool file (e.g., `tools/my_tool.py`):
|
|
|
|
from langchain_core.tools import tool
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
def create_my_tool(search_space_id: int, db_session: AsyncSession):
|
|
@tool
|
|
async def my_tool(param: str) -> dict:
|
|
'''My tool description.'''
|
|
# Your implementation
|
|
return {"result": "success"}
|
|
return my_tool
|
|
|
|
2. Import and register in this file:
|
|
|
|
from .my_tool import create_my_tool
|
|
|
|
# Add to BUILTIN_TOOLS list:
|
|
ToolDefinition(
|
|
name="my_tool",
|
|
description="Description of what your tool does",
|
|
factory=lambda deps: create_my_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
),
|
|
requires=["search_space_id", "db_session"],
|
|
),
|
|
"""
|
|
|
|
import logging
|
|
from collections.abc import Callable
|
|
from dataclasses import dataclass, field
|
|
from typing import Any
|
|
|
|
from langchain_core.tools import BaseTool
|
|
|
|
from app.db import ChatVisibility
|
|
|
|
from .display_image import create_display_image_tool
|
|
from .generate_image import create_generate_image_tool
|
|
from .google_drive import (
|
|
create_create_google_drive_file_tool,
|
|
create_delete_google_drive_file_tool,
|
|
)
|
|
from .knowledge_base import create_search_knowledge_base_tool
|
|
from .linear import (
|
|
create_create_linear_issue_tool,
|
|
create_delete_linear_issue_tool,
|
|
create_update_linear_issue_tool,
|
|
)
|
|
from .link_preview import create_link_preview_tool
|
|
from .mcp_tool import load_mcp_tools
|
|
from .notion import (
|
|
create_create_notion_page_tool,
|
|
create_delete_notion_page_tool,
|
|
create_update_notion_page_tool,
|
|
)
|
|
from .podcast import create_generate_podcast_tool
|
|
from .report import create_generate_report_tool
|
|
from .scrape_webpage import create_scrape_webpage_tool
|
|
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
|
from .shared_memory import (
|
|
create_recall_shared_memory_tool,
|
|
create_save_shared_memory_tool,
|
|
)
|
|
from .user_memory import create_recall_memory_tool, create_save_memory_tool
|
|
from .video_presentation import create_generate_video_presentation_tool
|
|
from .web_search import create_web_search_tool
|
|
|
|
# =============================================================================
|
|
# Tool Definition
|
|
# =============================================================================
|
|
|
|
|
|
@dataclass
|
|
class ToolDefinition:
|
|
"""Definition of a tool that can be added to the agent.
|
|
|
|
Attributes:
|
|
name: Unique identifier for the tool
|
|
description: Human-readable description of what the tool does
|
|
factory: Callable that creates the tool. Receives a dict of dependencies.
|
|
requires: List of dependency names this tool needs (e.g., "search_space_id", "db_session")
|
|
enabled_by_default: Whether the tool is enabled when no explicit config is provided
|
|
|
|
"""
|
|
|
|
name: str
|
|
description: str
|
|
factory: Callable[[dict[str, Any]], BaseTool]
|
|
requires: list[str] = field(default_factory=list)
|
|
enabled_by_default: bool = True
|
|
hidden: bool = False
|
|
|
|
|
|
# =============================================================================
|
|
# Built-in Tools Registry
|
|
# =============================================================================
|
|
|
|
# Registry of all built-in tools
|
|
# Contributors: Add your new tools here!
|
|
BUILTIN_TOOLS: list[ToolDefinition] = [
|
|
# Core tool - searches the user's knowledge base
|
|
# Now supports dynamic connector/document type discovery
|
|
ToolDefinition(
|
|
name="search_knowledge_base",
|
|
description="Search the user's personal knowledge base for relevant information",
|
|
factory=lambda deps: create_search_knowledge_base_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
connector_service=deps["connector_service"],
|
|
# Optional: dynamically discovered connectors/document types
|
|
available_connectors=deps.get("available_connectors"),
|
|
available_document_types=deps.get("available_document_types"),
|
|
max_input_tokens=deps.get("max_input_tokens"),
|
|
),
|
|
requires=["search_space_id", "db_session", "connector_service"],
|
|
# Note: available_connectors and available_document_types are optional
|
|
),
|
|
# Podcast generation tool
|
|
ToolDefinition(
|
|
name="generate_podcast",
|
|
description="Generate an audio podcast from provided content",
|
|
factory=lambda deps: create_generate_podcast_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
thread_id=deps["thread_id"],
|
|
),
|
|
requires=["search_space_id", "db_session", "thread_id"],
|
|
),
|
|
# Video presentation generation tool
|
|
ToolDefinition(
|
|
name="generate_video_presentation",
|
|
description="Generate a video presentation with slides and narration from provided content",
|
|
factory=lambda deps: create_generate_video_presentation_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
thread_id=deps["thread_id"],
|
|
),
|
|
requires=["search_space_id", "db_session", "thread_id"],
|
|
),
|
|
# Report generation tool (inline, short-lived sessions for DB ops)
|
|
# Supports internal KB search via source_strategy so the agent doesn't
|
|
# need to call search_knowledge_base separately before generating.
|
|
ToolDefinition(
|
|
name="generate_report",
|
|
description="Generate a structured report from provided content and export it",
|
|
factory=lambda deps: create_generate_report_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
thread_id=deps["thread_id"],
|
|
connector_service=deps.get("connector_service"),
|
|
available_connectors=deps.get("available_connectors"),
|
|
available_document_types=deps.get("available_document_types"),
|
|
),
|
|
requires=["search_space_id", "thread_id"],
|
|
# connector_service, available_connectors, and available_document_types
|
|
# are optional — when missing, source_strategy="kb_search" degrades
|
|
# gracefully to "provided"
|
|
),
|
|
# Link preview tool - fetches Open Graph metadata for URLs
|
|
ToolDefinition(
|
|
name="link_preview",
|
|
description="Fetch metadata for a URL to display a rich preview card",
|
|
factory=lambda deps: create_link_preview_tool(),
|
|
requires=[],
|
|
),
|
|
# Display image tool - shows images in the chat
|
|
ToolDefinition(
|
|
name="display_image",
|
|
description="Display an image in the chat with metadata",
|
|
factory=lambda deps: create_display_image_tool(),
|
|
requires=[],
|
|
),
|
|
# Generate image tool - creates images using AI models (DALL-E, GPT Image, etc.)
|
|
ToolDefinition(
|
|
name="generate_image",
|
|
description="Generate images from text descriptions using AI image models",
|
|
factory=lambda deps: create_generate_image_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
),
|
|
requires=["search_space_id", "db_session"],
|
|
),
|
|
# Web scraping tool - extracts content from webpages
|
|
ToolDefinition(
|
|
name="scrape_webpage",
|
|
description="Scrape and extract the main content from a webpage",
|
|
factory=lambda deps: create_scrape_webpage_tool(
|
|
firecrawl_api_key=deps.get("firecrawl_api_key"),
|
|
),
|
|
requires=[], # firecrawl_api_key is optional
|
|
),
|
|
# Web search tool — real-time web search via SearXNG + user-configured engines
|
|
ToolDefinition(
|
|
name="web_search",
|
|
description="Search the web for real-time information using configured search engines",
|
|
factory=lambda deps: create_web_search_tool(
|
|
search_space_id=deps.get("search_space_id"),
|
|
available_connectors=deps.get("available_connectors"),
|
|
),
|
|
requires=[],
|
|
),
|
|
# Surfsense documentation search tool
|
|
ToolDefinition(
|
|
name="search_surfsense_docs",
|
|
description="Search Surfsense documentation for help with using the application",
|
|
factory=lambda deps: create_search_surfsense_docs_tool(
|
|
db_session=deps["db_session"],
|
|
),
|
|
requires=["db_session"],
|
|
),
|
|
# =========================================================================
|
|
# USER MEMORY TOOLS - private or team store by thread_visibility
|
|
# =========================================================================
|
|
ToolDefinition(
|
|
name="save_memory",
|
|
description="Save facts, preferences, or context for personalized or team responses",
|
|
factory=lambda deps: (
|
|
create_save_shared_memory_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
created_by_id=deps["user_id"],
|
|
db_session=deps["db_session"],
|
|
)
|
|
if deps["thread_visibility"] == ChatVisibility.SEARCH_SPACE
|
|
else create_save_memory_tool(
|
|
user_id=deps["user_id"],
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
)
|
|
),
|
|
requires=["user_id", "search_space_id", "db_session", "thread_visibility"],
|
|
),
|
|
ToolDefinition(
|
|
name="recall_memory",
|
|
description="Recall relevant memories (personal or team) for context",
|
|
factory=lambda deps: (
|
|
create_recall_shared_memory_tool(
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
)
|
|
if deps["thread_visibility"] == ChatVisibility.SEARCH_SPACE
|
|
else create_recall_memory_tool(
|
|
user_id=deps["user_id"],
|
|
search_space_id=deps["search_space_id"],
|
|
db_session=deps["db_session"],
|
|
)
|
|
),
|
|
requires=["user_id", "search_space_id", "db_session", "thread_visibility"],
|
|
),
|
|
# =========================================================================
|
|
# LINEAR TOOLS - create, update, delete issues (WIP - hidden from UI)
|
|
# =========================================================================
|
|
ToolDefinition(
|
|
name="create_linear_issue",
|
|
description="Create a new issue in the user's Linear workspace",
|
|
factory=lambda deps: create_create_linear_issue_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
enabled_by_default=False,
|
|
hidden=True,
|
|
),
|
|
ToolDefinition(
|
|
name="update_linear_issue",
|
|
description="Update an existing indexed Linear issue",
|
|
factory=lambda deps: create_update_linear_issue_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
enabled_by_default=False,
|
|
hidden=True,
|
|
),
|
|
ToolDefinition(
|
|
name="delete_linear_issue",
|
|
description="Archive (delete) an existing indexed Linear issue",
|
|
factory=lambda deps: create_delete_linear_issue_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
enabled_by_default=False,
|
|
hidden=True,
|
|
),
|
|
# =========================================================================
|
|
# NOTION TOOLS - create, update, delete pages (WIP - hidden from UI)
|
|
# =========================================================================
|
|
ToolDefinition(
|
|
name="create_notion_page",
|
|
description="Create a new page in the user's Notion workspace",
|
|
factory=lambda deps: create_create_notion_page_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
enabled_by_default=False,
|
|
hidden=True,
|
|
),
|
|
ToolDefinition(
|
|
name="update_notion_page",
|
|
description="Append new content to an existing Notion page",
|
|
factory=lambda deps: create_update_notion_page_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
enabled_by_default=False,
|
|
hidden=True,
|
|
),
|
|
ToolDefinition(
|
|
name="delete_notion_page",
|
|
description="Delete an existing Notion page",
|
|
factory=lambda deps: create_delete_notion_page_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
enabled_by_default=False,
|
|
hidden=True,
|
|
),
|
|
# =========================================================================
|
|
# GOOGLE DRIVE TOOLS - create files, delete files (WIP - hidden from UI)
|
|
# =========================================================================
|
|
ToolDefinition(
|
|
name="create_google_drive_file",
|
|
description="Create a new Google Doc or Google Sheet in Google Drive",
|
|
factory=lambda deps: create_create_google_drive_file_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
enabled_by_default=False,
|
|
hidden=True,
|
|
),
|
|
ToolDefinition(
|
|
name="delete_google_drive_file",
|
|
description="Move an indexed Google Drive file to trash",
|
|
factory=lambda deps: create_delete_google_drive_file_tool(
|
|
db_session=deps["db_session"],
|
|
search_space_id=deps["search_space_id"],
|
|
user_id=deps["user_id"],
|
|
),
|
|
requires=["db_session", "search_space_id", "user_id"],
|
|
enabled_by_default=False,
|
|
hidden=True,
|
|
),
|
|
]
|
|
|
|
|
|
# =============================================================================
|
|
# Registry Functions
|
|
# =============================================================================
|
|
|
|
|
|
def get_tool_by_name(name: str) -> ToolDefinition | None:
|
|
"""Get a tool definition by its name."""
|
|
for tool_def in BUILTIN_TOOLS:
|
|
if tool_def.name == name:
|
|
return tool_def
|
|
return None
|
|
|
|
|
|
def get_all_tool_names() -> list[str]:
|
|
"""Get names of all registered tools."""
|
|
return [tool_def.name for tool_def in BUILTIN_TOOLS]
|
|
|
|
|
|
def get_default_enabled_tools() -> list[str]:
|
|
"""Get names of tools that are enabled by default (excludes hidden tools)."""
|
|
return [tool_def.name for tool_def in BUILTIN_TOOLS if tool_def.enabled_by_default]
|
|
|
|
|
|
def build_tools(
|
|
dependencies: dict[str, Any],
|
|
enabled_tools: list[str] | None = None,
|
|
disabled_tools: list[str] | None = None,
|
|
additional_tools: list[BaseTool] | None = None,
|
|
) -> list[BaseTool]:
|
|
"""Build the list of tools for the agent.
|
|
|
|
Args:
|
|
dependencies: Dict containing all possible dependencies:
|
|
- search_space_id: The search space ID
|
|
- db_session: Database session
|
|
- connector_service: Connector service instance
|
|
- firecrawl_api_key: Optional Firecrawl API key
|
|
enabled_tools: Explicit list of tool names to enable. If None, uses defaults.
|
|
disabled_tools: List of tool names to disable (applied after enabled_tools).
|
|
additional_tools: Extra tools to add (e.g., custom tools not in registry).
|
|
|
|
Returns:
|
|
List of configured tool instances ready for the agent.
|
|
|
|
Example:
|
|
# Use all default tools
|
|
tools = build_tools(deps)
|
|
|
|
# Use only specific tools
|
|
tools = build_tools(deps, enabled_tools=["search_knowledge_base", "link_preview"])
|
|
|
|
# Use defaults but disable podcast
|
|
tools = build_tools(deps, disabled_tools=["generate_podcast"])
|
|
|
|
# Add custom tools
|
|
tools = build_tools(deps, additional_tools=[my_custom_tool])
|
|
|
|
"""
|
|
# Determine which tools to enable
|
|
if enabled_tools is not None:
|
|
tool_names_to_use = set(enabled_tools)
|
|
else:
|
|
tool_names_to_use = set(get_default_enabled_tools())
|
|
|
|
# Apply disabled list
|
|
if disabled_tools:
|
|
tool_names_to_use -= set(disabled_tools)
|
|
|
|
# Build the tools (skip hidden/WIP tools unconditionally)
|
|
tools: list[BaseTool] = []
|
|
for tool_def in BUILTIN_TOOLS:
|
|
if tool_def.hidden or tool_def.name not in tool_names_to_use:
|
|
continue
|
|
|
|
# Check that all required dependencies are provided
|
|
missing_deps = [dep for dep in tool_def.requires if dep not in dependencies]
|
|
if missing_deps:
|
|
msg = f"Tool '{tool_def.name}' requires dependencies: {missing_deps}"
|
|
raise ValueError(
|
|
msg,
|
|
)
|
|
|
|
# Create the tool
|
|
tool = tool_def.factory(dependencies)
|
|
tools.append(tool)
|
|
|
|
# Add any additional custom tools
|
|
if additional_tools:
|
|
tools.extend(additional_tools)
|
|
|
|
return tools
|
|
|
|
|
|
async def build_tools_async(
|
|
dependencies: dict[str, Any],
|
|
enabled_tools: list[str] | None = None,
|
|
disabled_tools: list[str] | None = None,
|
|
additional_tools: list[BaseTool] | None = None,
|
|
include_mcp_tools: bool = True,
|
|
) -> list[BaseTool]:
|
|
"""Async version of build_tools that also loads MCP tools from database.
|
|
|
|
Design Note:
|
|
This function exists because MCP tools require database queries to load user configs,
|
|
while built-in tools are created synchronously from static code.
|
|
|
|
Alternative: We could make build_tools() itself async and always query the database,
|
|
but that would force async everywhere even when only using built-in tools. The current
|
|
design keeps the simple case (static tools only) synchronous while supporting dynamic
|
|
database-loaded tools through this async wrapper.
|
|
|
|
Args:
|
|
dependencies: Dict containing all possible dependencies
|
|
enabled_tools: Explicit list of tool names to enable. If None, uses defaults.
|
|
disabled_tools: List of tool names to disable (applied after enabled_tools).
|
|
additional_tools: Extra tools to add (e.g., custom tools not in registry).
|
|
include_mcp_tools: Whether to load user's MCP tools from database.
|
|
|
|
Returns:
|
|
List of configured tool instances ready for the agent, including MCP tools.
|
|
|
|
"""
|
|
import time
|
|
|
|
_perf_log = logging.getLogger("surfsense.perf")
|
|
_perf_log.setLevel(logging.DEBUG)
|
|
|
|
_t0 = time.perf_counter()
|
|
tools = build_tools(dependencies, enabled_tools, disabled_tools, additional_tools)
|
|
_perf_log.info(
|
|
"[build_tools_async] Built-in tools in %.3fs (%d tools)",
|
|
time.perf_counter() - _t0,
|
|
len(tools),
|
|
)
|
|
|
|
# Load MCP tools if requested and dependencies are available
|
|
if (
|
|
include_mcp_tools
|
|
and "db_session" in dependencies
|
|
and "search_space_id" in dependencies
|
|
):
|
|
try:
|
|
_t0 = time.perf_counter()
|
|
mcp_tools = await load_mcp_tools(
|
|
dependencies["db_session"],
|
|
dependencies["search_space_id"],
|
|
)
|
|
_perf_log.info(
|
|
"[build_tools_async] MCP tools loaded in %.3fs (%d tools)",
|
|
time.perf_counter() - _t0,
|
|
len(mcp_tools),
|
|
)
|
|
tools.extend(mcp_tools)
|
|
logging.info(
|
|
f"Registered {len(mcp_tools)} MCP tools: {[t.name for t in mcp_tools]}",
|
|
)
|
|
except Exception as e:
|
|
# Log error but don't fail - just continue without MCP tools
|
|
logging.exception(f"Failed to load MCP tools: {e!s}")
|
|
|
|
# Log all tools being returned to agent
|
|
logging.info(
|
|
f"Total tools for agent: {len(tools)} - {[t.name for t in tools]}",
|
|
)
|
|
|
|
return tools
|