SurfSense/surfsense_backend/app/agents/new_chat/chat_deepagent.py

154 lines
6.1 KiB
Python
Raw Normal View History

2025-12-18 23:57:57 -08:00
"""
2025-12-19 20:40:10 +02:00
SurfSense deep agent implementation.
2025-12-18 23:57:57 -08:00
2025-12-19 20:40:10 +02:00
This module provides the factory function for creating SurfSense deep agents
2025-12-23 01:16:25 -08:00
with configurable tools via the tools registry and configurable prompts
via NewLLMConfig.
2025-12-18 23:57:57 -08:00
"""
2025-12-19 20:21:39 +02:00
from collections.abc import Sequence
2025-12-18 23:57:57 -08:00
from deepagents import create_deep_agent
2025-12-19 20:21:39 +02:00
from langchain_core.tools import BaseTool
2025-12-18 23:57:57 -08:00
from langchain_litellm import ChatLiteLLM
from langgraph.types import Checkpointer
2025-12-18 23:57:57 -08:00
from sqlalchemy.ext.asyncio import AsyncSession
2025-12-19 20:40:10 +02:00
from app.agents.new_chat.context import SurfSenseContextSchema
2025-12-23 01:16:25 -08:00
from app.agents.new_chat.llm_config import AgentConfig
from app.agents.new_chat.system_prompt import (
build_configurable_system_prompt,
build_surfsense_system_prompt,
)
from app.agents.new_chat.tools.registry import build_tools_async
2025-12-18 23:57:57 -08:00
from app.services.connector_service import ConnectorService
# =============================================================================
# Deep Agent Factory
# =============================================================================
async def create_surfsense_deep_agent(
2025-12-18 23:57:57 -08:00
llm: ChatLiteLLM,
search_space_id: int,
db_session: AsyncSession,
connector_service: ConnectorService,
checkpointer: Checkpointer,
2025-12-23 01:16:25 -08:00
agent_config: AgentConfig | None = None,
enabled_tools: list[str] | None = None,
disabled_tools: list[str] | None = None,
2025-12-19 20:21:39 +02:00
additional_tools: Sequence[BaseTool] | None = None,
firecrawl_api_key: str | None = None,
2025-12-18 23:57:57 -08:00
):
"""
2025-12-23 01:16:25 -08:00
Create a SurfSense deep agent with configurable tools and prompts.
The agent comes with built-in tools that can be configured:
- search_knowledge_base: Search the user's personal knowledge base
- generate_podcast: Generate audio podcasts from content
- link_preview: Fetch rich previews for URLs
- display_image: Display images in chat
- scrape_webpage: Extract content from webpages
2025-12-18 23:57:57 -08:00
The agent also includes TodoListMiddleware by default (via create_deep_agent) which provides:
- write_todos: Create and update planning/todo lists for complex tasks
2025-12-23 01:16:25 -08:00
The system prompt can be configured via agent_config:
- Custom system instructions (or use defaults)
- Citation toggle (enable/disable citation requirements)
2025-12-18 23:57:57 -08:00
Args:
llm: ChatLiteLLM instance for the agent's language model
2025-12-18 23:57:57 -08:00
search_space_id: The user's search space ID
db_session: Database session for tools that need DB access
connector_service: Initialized connector service for knowledge base search
checkpointer: LangGraph checkpointer for conversation state persistence.
Use AsyncPostgresSaver for production or MemorySaver for testing.
2025-12-23 01:16:25 -08:00
agent_config: Optional AgentConfig from NewLLMConfig for prompt configuration.
If None, uses default system prompt with citations enabled.
enabled_tools: Explicit list of tool names to enable. If None, all default tools
are enabled. Use this to limit which tools are available.
disabled_tools: List of tool names to disable. Applied after enabled_tools.
Use this to exclude specific tools from the defaults.
additional_tools: Extra custom tools to add beyond the built-in ones.
These are always added regardless of enabled/disabled settings.
firecrawl_api_key: Optional Firecrawl API key for premium web scraping.
Falls back to Chromium/Trafilatura if not provided.
2025-12-18 23:57:57 -08:00
Returns:
CompiledStateGraph: The configured deep agent
Examples:
2025-12-23 01:16:25 -08:00
# Create agent with all default tools and default prompt
agent = create_surfsense_deep_agent(llm, search_space_id, db_session, ...)
2025-12-23 01:16:25 -08:00
# Create agent with custom prompt configuration
agent = create_surfsense_deep_agent(
llm, search_space_id, db_session, ...,
agent_config=AgentConfig(
provider="OPENAI",
model_name="gpt-4",
api_key="...",
system_instructions="Custom instructions...",
citations_enabled=False,
)
)
# Create agent with only specific tools
agent = create_surfsense_deep_agent(
llm, search_space_id, db_session, ...,
enabled_tools=["search_knowledge_base", "link_preview"]
)
# Create agent without podcast generation
agent = create_surfsense_deep_agent(
llm, search_space_id, db_session, ...,
disabled_tools=["generate_podcast"]
)
# Add custom tools
agent = create_surfsense_deep_agent(
llm, search_space_id, db_session, ...,
additional_tools=[my_custom_tool]
)
"""
# Build dependencies dict for the tools registry
dependencies = {
"search_space_id": search_space_id,
"db_session": db_session,
"connector_service": connector_service,
"firecrawl_api_key": firecrawl_api_key,
}
# Build tools using the async registry (includes MCP tools)
tools = await build_tools_async(
dependencies=dependencies,
enabled_tools=enabled_tools,
disabled_tools=disabled_tools,
additional_tools=list(additional_tools) if additional_tools else None,
)
2025-12-19 20:21:39 +02:00
2025-12-23 01:16:25 -08:00
# Build system prompt based on agent_config
if agent_config is not None:
# Use configurable prompt with settings from NewLLMConfig
system_prompt = build_configurable_system_prompt(
custom_system_instructions=agent_config.system_instructions,
use_default_system_instructions=agent_config.use_default_system_instructions,
citations_enabled=agent_config.citations_enabled,
)
else:
# Use default prompt (with citations enabled)
system_prompt = build_surfsense_system_prompt()
# Create the deep agent with system prompt and checkpointer
# Note: TodoListMiddleware (write_todos) is included by default in create_deep_agent
2025-12-18 23:57:57 -08:00
agent = create_deep_agent(
model=llm,
2025-12-19 20:21:39 +02:00
tools=tools,
2025-12-23 01:16:25 -08:00
system_prompt=system_prompt,
2025-12-18 23:57:57 -08:00
context_schema=SurfSenseContextSchema,
checkpointer=checkpointer,
2025-12-18 23:57:57 -08:00
)
return agent