mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-03 21:02:40 +02:00
991 lines
40 KiB
Python
991 lines
40 KiB
Python
"""
|
|
SurfSense deep agent implementation.
|
|
|
|
This module provides the factory function for creating SurfSense deep agents
|
|
with configurable tools via the tools registry and configurable prompts
|
|
via NewLLMConfig.
|
|
|
|
We use ``create_agent`` (from langchain) rather than ``create_deep_agent``
|
|
(from deepagents) so that the middleware stack is fully under our control.
|
|
This lets us swap in ``SurfSenseFilesystemMiddleware`` — a customisable
|
|
subclass of the default ``FilesystemMiddleware`` — while preserving every
|
|
other behaviour that ``create_deep_agent`` provides (todo-list, subagents,
|
|
summarisation, prompt-caching, etc.).
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
from collections.abc import Sequence
|
|
from typing import Any
|
|
|
|
from deepagents import SubAgent, SubAgentMiddleware, __version__ as deepagents_version
|
|
from deepagents.backends import StateBackend
|
|
from deepagents.graph import BASE_AGENT_PROMPT
|
|
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
|
|
from deepagents.middleware.skills import SkillsMiddleware
|
|
from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT
|
|
from langchain.agents import create_agent
|
|
from langchain.agents.middleware import (
|
|
LLMToolSelectorMiddleware,
|
|
ModelCallLimitMiddleware,
|
|
ModelFallbackMiddleware,
|
|
TodoListMiddleware,
|
|
ToolCallLimitMiddleware,
|
|
)
|
|
from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
|
|
from langchain_core.language_models import BaseChatModel
|
|
from langchain_core.tools import BaseTool
|
|
from langgraph.types import Checkpointer
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.agents.new_chat.context import SurfSenseContextSchema
|
|
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
|
|
from app.agents.new_chat.filesystem_backends import build_backend_resolver
|
|
from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
|
|
from app.agents.new_chat.llm_config import AgentConfig
|
|
from app.agents.new_chat.middleware import (
|
|
ActionLogMiddleware,
|
|
AnonymousDocumentMiddleware,
|
|
BusyMutexMiddleware,
|
|
ClearToolUsesEdit,
|
|
DedupHITLToolCallsMiddleware,
|
|
DoomLoopMiddleware,
|
|
FileIntentMiddleware,
|
|
KnowledgeBasePersistenceMiddleware,
|
|
KnowledgePriorityMiddleware,
|
|
KnowledgeTreeMiddleware,
|
|
MemoryInjectionMiddleware,
|
|
NoopInjectionMiddleware,
|
|
OtelSpanMiddleware,
|
|
PermissionMiddleware,
|
|
RetryAfterMiddleware,
|
|
SpillingContextEditingMiddleware,
|
|
SpillToBackendEdit,
|
|
SurfSenseFilesystemMiddleware,
|
|
ToolCallNameRepairMiddleware,
|
|
build_skills_backend_factory,
|
|
create_surfsense_compaction_middleware,
|
|
default_skills_sources,
|
|
)
|
|
from app.agents.new_chat.permissions import Rule, Ruleset
|
|
from app.agents.new_chat.plugin_loader import (
|
|
PluginContext,
|
|
load_allowed_plugin_names_from_env,
|
|
load_plugin_middlewares,
|
|
)
|
|
from app.agents.new_chat.subagents import build_specialized_subagents
|
|
from app.agents.new_chat.system_prompt import (
|
|
build_configurable_system_prompt,
|
|
build_surfsense_system_prompt,
|
|
)
|
|
from app.agents.new_chat.tools.invalid_tool import (
|
|
INVALID_TOOL_NAME,
|
|
invalid_tool,
|
|
)
|
|
from app.agents.new_chat.tools.registry import (
|
|
BUILTIN_TOOLS,
|
|
build_tools_async,
|
|
get_connector_gated_tools,
|
|
)
|
|
from app.db import ChatVisibility
|
|
from app.services.connector_service import ConnectorService
|
|
from app.utils.perf import get_perf_logger
|
|
|
|
_perf_log = get_perf_logger()
|
|
|
|
# =============================================================================
|
|
# Connector Type Mapping
|
|
# =============================================================================
|
|
|
|
# Maps SearchSourceConnectorType enum values to the searchable document/connector types
|
|
# used by pre-search middleware and web_search.
|
|
# Live search connectors (TAVILY_API, LINKUP_API, BAIDU_SEARCH_API) are routed to
|
|
# the web_search tool; all others are considered local/indexed data.
|
|
_CONNECTOR_TYPE_TO_SEARCHABLE: dict[str, str] = {
|
|
# Live search connectors (handled by web_search tool)
|
|
"TAVILY_API": "TAVILY_API",
|
|
"LINKUP_API": "LINKUP_API",
|
|
"BAIDU_SEARCH_API": "BAIDU_SEARCH_API",
|
|
# Local/indexed connectors (handled by KB pre-search middleware)
|
|
"SLACK_CONNECTOR": "SLACK_CONNECTOR",
|
|
"TEAMS_CONNECTOR": "TEAMS_CONNECTOR",
|
|
"NOTION_CONNECTOR": "NOTION_CONNECTOR",
|
|
"GITHUB_CONNECTOR": "GITHUB_CONNECTOR",
|
|
"LINEAR_CONNECTOR": "LINEAR_CONNECTOR",
|
|
"DISCORD_CONNECTOR": "DISCORD_CONNECTOR",
|
|
"JIRA_CONNECTOR": "JIRA_CONNECTOR",
|
|
"CONFLUENCE_CONNECTOR": "CONFLUENCE_CONNECTOR",
|
|
"CLICKUP_CONNECTOR": "CLICKUP_CONNECTOR",
|
|
"GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
|
|
"GOOGLE_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
|
|
"GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE", # Connector type differs from document type
|
|
"AIRTABLE_CONNECTOR": "AIRTABLE_CONNECTOR",
|
|
"LUMA_CONNECTOR": "LUMA_CONNECTOR",
|
|
"ELASTICSEARCH_CONNECTOR": "ELASTICSEARCH_CONNECTOR",
|
|
"WEBCRAWLER_CONNECTOR": "CRAWLED_URL", # Maps to document type
|
|
"BOOKSTACK_CONNECTOR": "BOOKSTACK_CONNECTOR",
|
|
"CIRCLEBACK_CONNECTOR": "CIRCLEBACK", # Connector type differs from document type
|
|
"OBSIDIAN_CONNECTOR": "OBSIDIAN_CONNECTOR",
|
|
"DROPBOX_CONNECTOR": "DROPBOX_FILE", # Connector type differs from document type
|
|
"ONEDRIVE_CONNECTOR": "ONEDRIVE_FILE", # Connector type differs from document type
|
|
# Composio connectors (unified to native document types).
|
|
# Reverse of NATIVE_TO_LEGACY_DOCTYPE in app.db.
|
|
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "GOOGLE_DRIVE_FILE",
|
|
"COMPOSIO_GMAIL_CONNECTOR": "GOOGLE_GMAIL_CONNECTOR",
|
|
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "GOOGLE_CALENDAR_CONNECTOR",
|
|
}
|
|
|
|
# Document types that don't come from SearchSourceConnector but should always be searchable
|
|
_ALWAYS_AVAILABLE_DOC_TYPES: list[str] = [
|
|
"EXTENSION", # Browser extension data
|
|
"FILE", # Uploaded files
|
|
"NOTE", # User notes
|
|
"YOUTUBE_VIDEO", # YouTube videos
|
|
]
|
|
|
|
|
|
def _map_connectors_to_searchable_types(
|
|
connector_types: list[Any],
|
|
) -> list[str]:
|
|
"""
|
|
Map SearchSourceConnectorType enums to searchable document/connector types.
|
|
|
|
This function:
|
|
1. Converts connector type enums to their searchable counterparts
|
|
2. Includes always-available document types (EXTENSION, FILE, NOTE, YOUTUBE_VIDEO)
|
|
3. Deduplicates while preserving order
|
|
|
|
Args:
|
|
connector_types: List of SearchSourceConnectorType enum values
|
|
|
|
Returns:
|
|
List of searchable connector/document type strings
|
|
"""
|
|
result_set: set[str] = set()
|
|
result_list: list[str] = []
|
|
|
|
# Add always-available document types first
|
|
for doc_type in _ALWAYS_AVAILABLE_DOC_TYPES:
|
|
if doc_type not in result_set:
|
|
result_set.add(doc_type)
|
|
result_list.append(doc_type)
|
|
|
|
# Map each connector type to its searchable equivalent
|
|
for ct in connector_types:
|
|
# Handle both enum and string types
|
|
ct_str = ct.value if hasattr(ct, "value") else str(ct)
|
|
searchable = _CONNECTOR_TYPE_TO_SEARCHABLE.get(ct_str)
|
|
if searchable and searchable not in result_set:
|
|
result_set.add(searchable)
|
|
result_list.append(searchable)
|
|
|
|
return result_list
|
|
|
|
|
|
# =============================================================================
|
|
# Deep Agent Factory
|
|
# =============================================================================
|
|
|
|
|
|
async def create_surfsense_deep_agent(
|
|
llm: BaseChatModel,
|
|
search_space_id: int,
|
|
db_session: AsyncSession,
|
|
connector_service: ConnectorService,
|
|
checkpointer: Checkpointer,
|
|
user_id: str | None = None,
|
|
thread_id: int | None = None,
|
|
agent_config: AgentConfig | None = None,
|
|
enabled_tools: list[str] | None = None,
|
|
disabled_tools: list[str] | None = None,
|
|
additional_tools: Sequence[BaseTool] | None = None,
|
|
firecrawl_api_key: str | None = None,
|
|
thread_visibility: ChatVisibility | None = None,
|
|
mentioned_document_ids: list[int] | None = None,
|
|
anon_session_id: str | None = None,
|
|
filesystem_selection: FilesystemSelection | None = None,
|
|
):
|
|
"""
|
|
Create a SurfSense deep agent with configurable tools and prompts.
|
|
|
|
The agent comes with built-in tools that can be configured:
|
|
- generate_podcast: Generate audio podcasts from content
|
|
- generate_image: Generate images from text descriptions using AI models
|
|
- scrape_webpage: Extract content from webpages
|
|
- update_memory: Update the user's personal or team memory document
|
|
|
|
The agent also includes TodoListMiddleware by default (via create_deep_agent) which provides:
|
|
- write_todos: Create and update planning/todo lists for complex tasks
|
|
|
|
The system prompt can be configured via agent_config:
|
|
- Custom system instructions (or use defaults)
|
|
- Citation toggle (enable/disable citation requirements)
|
|
|
|
Args:
|
|
llm: ChatLiteLLM instance for the agent's language model
|
|
search_space_id: The user's search space ID
|
|
db_session: Database session for tools that need DB access
|
|
connector_service: Initialized connector service for knowledge base search
|
|
checkpointer: LangGraph checkpointer for conversation state persistence.
|
|
Use AsyncPostgresSaver for production or MemorySaver for testing.
|
|
user_id: The current user's UUID string (required for memory tools)
|
|
agent_config: Optional AgentConfig from NewLLMConfig for prompt configuration.
|
|
If None, uses default system prompt with citations enabled.
|
|
enabled_tools: Explicit list of tool names to enable. If None, all default tools
|
|
are enabled. Use this to limit which tools are available.
|
|
disabled_tools: List of tool names to disable. Applied after enabled_tools.
|
|
Use this to exclude specific tools from the defaults.
|
|
additional_tools: Extra custom tools to add beyond the built-in ones.
|
|
These are always added regardless of enabled/disabled settings.
|
|
firecrawl_api_key: Optional Firecrawl API key for premium web scraping.
|
|
Falls back to Chromium/Trafilatura if not provided.
|
|
|
|
Returns:
|
|
CompiledStateGraph: The configured deep agent
|
|
|
|
Examples:
|
|
# Create agent with all default tools and default prompt
|
|
agent = create_surfsense_deep_agent(llm, search_space_id, db_session, ...)
|
|
|
|
# Create agent with custom prompt configuration
|
|
agent = create_surfsense_deep_agent(
|
|
llm, search_space_id, db_session, ...,
|
|
agent_config=AgentConfig(
|
|
provider="OPENAI",
|
|
model_name="gpt-4",
|
|
api_key="...",
|
|
system_instructions="Custom instructions...",
|
|
citations_enabled=False,
|
|
)
|
|
)
|
|
|
|
# Create agent with only specific tools
|
|
agent = create_surfsense_deep_agent(
|
|
llm, search_space_id, db_session, ...,
|
|
enabled_tools=["scrape_webpage"]
|
|
)
|
|
|
|
# Create agent without podcast generation
|
|
agent = create_surfsense_deep_agent(
|
|
llm, search_space_id, db_session, ...,
|
|
disabled_tools=["generate_podcast"]
|
|
)
|
|
|
|
# Add custom tools
|
|
agent = create_surfsense_deep_agent(
|
|
llm, search_space_id, db_session, ...,
|
|
additional_tools=[my_custom_tool]
|
|
)
|
|
"""
|
|
_t_agent_total = time.perf_counter()
|
|
filesystem_selection = filesystem_selection or FilesystemSelection()
|
|
backend_resolver = build_backend_resolver(
|
|
filesystem_selection,
|
|
search_space_id=search_space_id
|
|
if filesystem_selection.mode == FilesystemMode.CLOUD
|
|
else None,
|
|
)
|
|
|
|
# Discover available connectors and document types for this search space
|
|
available_connectors: list[str] | None = None
|
|
available_document_types: list[str] | None = None
|
|
|
|
_t0 = time.perf_counter()
|
|
try:
|
|
connector_types = await connector_service.get_available_connectors(
|
|
search_space_id
|
|
)
|
|
if connector_types:
|
|
available_connectors = _map_connectors_to_searchable_types(connector_types)
|
|
|
|
available_document_types = await connector_service.get_available_document_types(
|
|
search_space_id
|
|
)
|
|
|
|
except Exception as e:
|
|
logging.warning(f"Failed to discover available connectors/document types: {e}")
|
|
_perf_log.info(
|
|
"[create_agent] Connector/doc-type discovery in %.3fs",
|
|
time.perf_counter() - _t0,
|
|
)
|
|
|
|
# Build dependencies dict for the tools registry
|
|
visibility = thread_visibility or ChatVisibility.PRIVATE
|
|
|
|
# Extract the model's context window so tools can size their output.
|
|
_model_profile = getattr(llm, "profile", None)
|
|
_max_input_tokens: int | None = (
|
|
_model_profile.get("max_input_tokens")
|
|
if isinstance(_model_profile, dict)
|
|
else None
|
|
)
|
|
|
|
dependencies = {
|
|
"search_space_id": search_space_id,
|
|
"db_session": db_session,
|
|
"connector_service": connector_service,
|
|
"firecrawl_api_key": firecrawl_api_key,
|
|
"user_id": user_id,
|
|
"thread_id": thread_id,
|
|
"thread_visibility": visibility,
|
|
"available_connectors": available_connectors,
|
|
"available_document_types": available_document_types,
|
|
"max_input_tokens": _max_input_tokens,
|
|
"llm": llm,
|
|
}
|
|
|
|
modified_disabled_tools = list(disabled_tools) if disabled_tools else []
|
|
modified_disabled_tools.extend(get_connector_gated_tools(available_connectors))
|
|
|
|
# Remove direct KB search tool; KnowledgePriorityMiddleware now runs hybrid
|
|
# search per turn and surfaces hits as a <priority_documents> hint plus
|
|
# `<chunk_index matched="true">` markers inside lazy-loaded XML.
|
|
if "search_knowledge_base" not in modified_disabled_tools:
|
|
modified_disabled_tools.append("search_knowledge_base")
|
|
|
|
# Build tools using the async registry (includes MCP tools)
|
|
_t0 = time.perf_counter()
|
|
tools = await build_tools_async(
|
|
dependencies=dependencies,
|
|
enabled_tools=enabled_tools,
|
|
disabled_tools=modified_disabled_tools,
|
|
additional_tools=list(additional_tools) if additional_tools else None,
|
|
)
|
|
|
|
# Tier 1.6: register `invalid` tool. It is dispatched only when
|
|
# ToolCallNameRepairMiddleware rewrites a malformed call. We
|
|
# intentionally append it AFTER ``build_tools_async`` so it never
|
|
# appears in the system-prompt tool list (which is built from the
|
|
# registry, not the bound tool list).
|
|
_flags: AgentFeatureFlags = get_flags()
|
|
if _flags.enable_tool_call_repair and INVALID_TOOL_NAME not in {
|
|
t.name for t in tools
|
|
}:
|
|
tools = [*list(tools), invalid_tool]
|
|
_perf_log.info(
|
|
"[create_agent] build_tools_async in %.3fs (%d tools)",
|
|
time.perf_counter() - _t0,
|
|
len(tools),
|
|
)
|
|
|
|
# Build system prompt based on agent_config, scoped to the tools actually enabled
|
|
_t0 = time.perf_counter()
|
|
_enabled_tool_names = {t.name for t in tools}
|
|
_user_disabled_tool_names = set(disabled_tools) if disabled_tools else set()
|
|
|
|
# Collect generic MCP connector info so the system prompt can route queries
|
|
# to their tools instead of falling back to "not in knowledge base".
|
|
_mcp_connector_tools: dict[str, list[str]] = {}
|
|
for t in tools:
|
|
meta = getattr(t, "metadata", None) or {}
|
|
if meta.get("mcp_is_generic") and meta.get("mcp_connector_name"):
|
|
_mcp_connector_tools.setdefault(
|
|
meta["mcp_connector_name"],
|
|
[],
|
|
).append(t.name)
|
|
|
|
if _mcp_connector_tools:
|
|
_perf_log.info("MCP connector tool routing: %s", _mcp_connector_tools)
|
|
|
|
if agent_config is not None:
|
|
system_prompt = build_configurable_system_prompt(
|
|
custom_system_instructions=agent_config.system_instructions,
|
|
use_default_system_instructions=agent_config.use_default_system_instructions,
|
|
citations_enabled=agent_config.citations_enabled,
|
|
thread_visibility=thread_visibility,
|
|
enabled_tool_names=_enabled_tool_names,
|
|
disabled_tool_names=_user_disabled_tool_names,
|
|
mcp_connector_tools=_mcp_connector_tools,
|
|
)
|
|
else:
|
|
system_prompt = build_surfsense_system_prompt(
|
|
thread_visibility=thread_visibility,
|
|
enabled_tool_names=_enabled_tool_names,
|
|
disabled_tool_names=_user_disabled_tool_names,
|
|
mcp_connector_tools=_mcp_connector_tools,
|
|
)
|
|
_perf_log.info(
|
|
"[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
|
|
)
|
|
|
|
# Combine system_prompt with BASE_AGENT_PROMPT (same as create_deep_agent)
|
|
final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
|
|
|
|
# The middleware stack — and especially ``SubAgentMiddleware`` — is *not*
|
|
# cheap to build. ``SubAgentMiddleware.__init__`` calls ``create_agent``
|
|
# synchronously to compile the general-purpose subagent's full state graph
|
|
# (every tool + every middleware → pydantic schemas + langgraph compile).
|
|
# On gpt-5.x agents that's roughly 1.5-2s of pure CPU work. If we run it
|
|
# directly here it blocks the asyncio event loop for the whole streaming
|
|
# task (and any other coroutine sharing this loop), which is why
|
|
# "agent creation" wall-clock time used to stretch to ~3-4s. Move the
|
|
# entire middleware build + main-graph compile into a single
|
|
# ``asyncio.to_thread`` so the heavy CPU work runs off-loop and the
|
|
# event loop stays responsive.
|
|
_t0 = time.perf_counter()
|
|
agent = await asyncio.to_thread(
|
|
_build_compiled_agent_blocking,
|
|
llm=llm,
|
|
tools=tools,
|
|
final_system_prompt=final_system_prompt,
|
|
backend_resolver=backend_resolver,
|
|
filesystem_mode=filesystem_selection.mode,
|
|
search_space_id=search_space_id,
|
|
user_id=user_id,
|
|
thread_id=thread_id,
|
|
visibility=visibility,
|
|
anon_session_id=anon_session_id,
|
|
available_connectors=available_connectors,
|
|
available_document_types=available_document_types,
|
|
mentioned_document_ids=mentioned_document_ids,
|
|
max_input_tokens=_max_input_tokens,
|
|
flags=_flags,
|
|
checkpointer=checkpointer,
|
|
)
|
|
_perf_log.info(
|
|
"[create_agent] Middleware stack + graph compiled in %.3fs",
|
|
time.perf_counter() - _t0,
|
|
)
|
|
|
|
_perf_log.info(
|
|
"[create_agent] Total agent creation in %.3fs",
|
|
time.perf_counter() - _t_agent_total,
|
|
)
|
|
return agent
|
|
|
|
|
|
# Tier 1.1: tools whose output is too costly / lossy to discard. Keep
|
|
# this conservative — anything listed here is *never* pruned by
|
|
# ContextEditingMiddleware. The list is filtered against actually-bound
|
|
# tool names so disabled connectors don't show up here.
|
|
_PRUNE_PROTECTED_TOOL_NAMES: frozenset[str] = frozenset(
|
|
{
|
|
"generate_report",
|
|
"generate_resume",
|
|
"generate_podcast",
|
|
"generate_video_presentation",
|
|
"generate_image",
|
|
# Read-heavy connector reads — recomputing them is expensive
|
|
"read_email",
|
|
"search_emails",
|
|
# The fallback for malformed tool calls — keep its replies visible
|
|
"invalid",
|
|
}
|
|
)
|
|
|
|
|
|
def _safe_exclude_tools(tools: Sequence[BaseTool]) -> tuple[str, ...]:
|
|
"""Return ``exclude_tools`` derived from the actually-bound tool list.
|
|
|
|
Filters :data:`_PRUNE_PROTECTED_TOOL_NAMES` against the bound tools
|
|
so we never list tools that don't exist (would be a silent no-op).
|
|
"""
|
|
enabled = {t.name for t in tools}
|
|
return tuple(name for name in _PRUNE_PROTECTED_TOOL_NAMES if name in enabled)
|
|
|
|
|
|
# Tier 2.1 / cleanup: opencode `Permission.disabled` parity. Replaces the
|
|
# legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE``-based gating with a
|
|
# declarative pass over :data:`BUILTIN_TOOLS`. Each tool that declares a
|
|
# ``required_connector`` not present in ``available_connectors`` gets a
|
|
# deny rule so any execution attempt short-circuits with permission_denied.
|
|
def _synthesize_connector_deny_rules(
|
|
*,
|
|
available_connectors: list[str] | None,
|
|
enabled_tool_names: set[str],
|
|
) -> list[Rule]:
|
|
"""Build deny rules for tools whose required connector is not enabled.
|
|
|
|
Source of truth is ``ToolDefinition.required_connector`` in
|
|
:data:`BUILTIN_TOOLS`. A tool only gets a deny rule when:
|
|
|
|
1. It is currently bound (``enabled_tool_names``).
|
|
2. It declares a ``required_connector``.
|
|
3. That connector is *not* in ``available_connectors``.
|
|
|
|
This expresses the OpenCode ``Permission.disabled`` semantics
|
|
declaratively, replacing the substring-heuristic binary gating
|
|
that used to consult the hardcoded ``_CONNECTOR_TYPE_TO_SEARCHABLE``
|
|
map.
|
|
"""
|
|
available = set(available_connectors or [])
|
|
deny: list[Rule] = []
|
|
for tool_def in BUILTIN_TOOLS:
|
|
if tool_def.name not in enabled_tool_names:
|
|
continue
|
|
rc = tool_def.required_connector
|
|
if rc and rc not in available:
|
|
deny.append(Rule(permission=tool_def.name, pattern="*", action="deny"))
|
|
return deny
|
|
|
|
|
|
def _build_compiled_agent_blocking(
|
|
*,
|
|
llm: BaseChatModel,
|
|
tools: Sequence[BaseTool],
|
|
final_system_prompt: str,
|
|
backend_resolver: Any,
|
|
filesystem_mode: FilesystemMode,
|
|
search_space_id: int,
|
|
user_id: str | None,
|
|
thread_id: int | None,
|
|
visibility: ChatVisibility,
|
|
anon_session_id: str | None,
|
|
available_connectors: list[str] | None,
|
|
available_document_types: list[str] | None,
|
|
mentioned_document_ids: list[int] | None,
|
|
max_input_tokens: int | None,
|
|
flags: AgentFeatureFlags,
|
|
checkpointer: Checkpointer,
|
|
):
|
|
"""Build the middleware stack and compile the agent graph synchronously.
|
|
|
|
Runs in a worker thread (see ``asyncio.to_thread`` call site) so the heavy
|
|
CPU work — most notably ``SubAgentMiddleware.__init__`` eagerly calling
|
|
``create_agent`` to compile the general-purpose subagent — does not block
|
|
the event loop.
|
|
"""
|
|
_memory_middleware = MemoryInjectionMiddleware(
|
|
user_id=user_id,
|
|
search_space_id=search_space_id,
|
|
thread_visibility=visibility,
|
|
)
|
|
|
|
# General-purpose subagent middleware
|
|
# Subagent omits AnonymousDocumentMiddleware, KnowledgeTreeMiddleware,
|
|
# KnowledgePriorityMiddleware, and KnowledgeBasePersistenceMiddleware - it
|
|
# inherits state and tools from the parent, but should not (a) re-load
|
|
# anon docs / re-render the tree / re-run hybrid search, or (b) commit at
|
|
# its own completion (only the top-level agent's aafter_agent commits).
|
|
gp_middleware = [
|
|
TodoListMiddleware(),
|
|
_memory_middleware,
|
|
FileIntentMiddleware(llm=llm),
|
|
SurfSenseFilesystemMiddleware(
|
|
backend=backend_resolver,
|
|
filesystem_mode=filesystem_mode,
|
|
search_space_id=search_space_id,
|
|
created_by_id=user_id,
|
|
thread_id=thread_id,
|
|
),
|
|
create_surfsense_compaction_middleware(llm, StateBackend),
|
|
PatchToolCallsMiddleware(),
|
|
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
|
|
]
|
|
|
|
general_purpose_spec: SubAgent = { # type: ignore[typeddict-unknown-key]
|
|
**GENERAL_PURPOSE_SUBAGENT,
|
|
"model": llm,
|
|
"tools": tools,
|
|
"middleware": gp_middleware,
|
|
}
|
|
|
|
# Tier 4.3: specialized user-facing subagents (explore, report_writer,
|
|
# connector_negotiator). Registered through SubAgentMiddleware alongside
|
|
# the general-purpose spec so the parent's `task` tool can address them
|
|
# by name. Off by default until the flag flips so existing deployments
|
|
# don't see new agent types in the task tool description.
|
|
specialized_subagents: list[SubAgent] = []
|
|
if flags.enable_specialized_subagents and not flags.disable_new_agent_stack:
|
|
try:
|
|
# Specialized subagents share the parent's filesystem +
|
|
# todo view so their system prompts (which promise
|
|
# ``read_file``, ``ls``, ``grep``, ``glob``, ``write_todos``)
|
|
# actually match runtime behavior. Build *fresh* instances
|
|
# rather than aliasing the parent's GP middleware to avoid
|
|
# subtle state coupling across compiled graphs.
|
|
subagent_extra_middleware: list = [
|
|
TodoListMiddleware(),
|
|
SurfSenseFilesystemMiddleware(
|
|
backend=backend_resolver,
|
|
filesystem_mode=filesystem_mode,
|
|
search_space_id=search_space_id,
|
|
created_by_id=user_id,
|
|
thread_id=thread_id,
|
|
),
|
|
]
|
|
specialized_subagents = build_specialized_subagents(
|
|
tools=tools,
|
|
model=llm,
|
|
extra_middleware=subagent_extra_middleware,
|
|
)
|
|
except Exception as exc: # pragma: no cover - defensive
|
|
logging.warning(
|
|
"Specialized subagent build failed; running without them: %s",
|
|
exc,
|
|
)
|
|
specialized_subagents = []
|
|
|
|
subagent_specs: list[SubAgent] = [general_purpose_spec, *specialized_subagents]
|
|
|
|
# Main agent middleware
|
|
# Order: AnonDoc -> Tree -> Priority -> FileIntent -> Filesystem -> Persistence -> ...
|
|
# before_agent hooks run in declared order; later injections sit closer to
|
|
# the latest human turn. Tree (large + cacheable) is injected earliest so
|
|
# provider-side prefix caching has more material to hit; FileIntent (most
|
|
# actionable per-turn contract) is injected closest to the user message.
|
|
#
|
|
# ``wrap_model_call`` ordering: the FIRST middleware in the list is the
|
|
# OUTERMOST wrapper. To ensure prune executes before summarization,
|
|
# place ``SpillingContextEditingMiddleware`` before
|
|
# ``SurfSenseCompactionMiddleware`` (Tier 1.1 + 1.3).
|
|
# Compaction is the canonical token-budget defense after the
|
|
# cleanup tier removed ``SafeSummarizationMiddleware``. The Bedrock
|
|
# buffer-empty defense is folded into ``SurfSenseCompactionMiddleware``.
|
|
summarization_mw = create_surfsense_compaction_middleware(llm, StateBackend)
|
|
_ = flags.enable_compaction_v2 # historical flag; retained for telemetry parity
|
|
|
|
# Tier 1.1: ContextEditing prune. Trigger at 55% of model_max_input,
|
|
# earlier than summarization (~85%). When disabled, no edit runs.
|
|
context_edit_mw = None
|
|
if (
|
|
flags.enable_context_editing
|
|
and not flags.disable_new_agent_stack
|
|
and max_input_tokens
|
|
):
|
|
spill_edit = SpillToBackendEdit(
|
|
trigger=int(max_input_tokens * 0.55),
|
|
clear_at_least=int(max_input_tokens * 0.15),
|
|
keep=5,
|
|
exclude_tools=_safe_exclude_tools(tools),
|
|
clear_tool_inputs=True,
|
|
)
|
|
clear_edit = ClearToolUsesEdit(
|
|
trigger=int(max_input_tokens * 0.55),
|
|
clear_at_least=int(max_input_tokens * 0.15),
|
|
keep=5,
|
|
exclude_tools=_safe_exclude_tools(tools),
|
|
clear_tool_inputs=True,
|
|
placeholder="[cleared - older tool output trimmed for context]",
|
|
)
|
|
context_edit_mw = SpillingContextEditingMiddleware(
|
|
edits=[spill_edit, clear_edit],
|
|
backend_resolver=backend_resolver,
|
|
)
|
|
|
|
# Tier 1.4 / 1.8 / 1.9 / 1.10: built-in retry/fallback/limits.
|
|
retry_mw = (
|
|
RetryAfterMiddleware(max_retries=3)
|
|
if flags.enable_retry_after and not flags.disable_new_agent_stack
|
|
else None
|
|
)
|
|
# Fallback chain — primary is the agent's own model; we add cheap
|
|
# alternatives. Off by default; only the first call site that
|
|
# configures the chain via env should enable it.
|
|
fallback_mw: ModelFallbackMiddleware | None = None
|
|
if flags.enable_model_fallback and not flags.disable_new_agent_stack:
|
|
try:
|
|
fallback_mw = ModelFallbackMiddleware(
|
|
"openai:gpt-4o-mini",
|
|
"anthropic:claude-3-5-haiku-20241022",
|
|
)
|
|
except Exception:
|
|
logging.warning("ModelFallbackMiddleware init failed; skipping.")
|
|
fallback_mw = None
|
|
model_call_limit_mw = (
|
|
ModelCallLimitMiddleware(
|
|
thread_limit=120,
|
|
run_limit=80,
|
|
exit_behavior="end",
|
|
)
|
|
if flags.enable_model_call_limit and not flags.disable_new_agent_stack
|
|
else None
|
|
)
|
|
tool_call_limit_mw = (
|
|
ToolCallLimitMiddleware(
|
|
thread_limit=300, run_limit=80, exit_behavior="continue"
|
|
)
|
|
if flags.enable_tool_call_limit and not flags.disable_new_agent_stack
|
|
else None
|
|
)
|
|
|
|
# Tier 1.5: provider-compat _noop injection.
|
|
noop_mw = (
|
|
NoopInjectionMiddleware()
|
|
if flags.enable_compaction_v2 and not flags.disable_new_agent_stack
|
|
else None
|
|
)
|
|
|
|
# Tier 1.7: tool-call name repair (lowercase + invalid fallback).
|
|
#
|
|
# ``registered_tool_names`` MUST cover every tool the model can legitimately
|
|
# call. That includes the bound ``tools`` list AND every tool provided by
|
|
# middleware in the stack — ``FilesystemMiddleware`` (read_file, ls, grep,
|
|
# glob, edit_file, write_file, execute), ``TodoListMiddleware``
|
|
# (write_todos), ``SubAgentMiddleware`` (task), ``SkillsMiddleware`` (skill
|
|
# loaders), etc. If we only inspect ``tools`` here, every call to
|
|
# ``read_file`` / ``ls`` / ``grep`` from the model will be rewritten to
|
|
# ``invalid`` because the repair middleware doesn't recognize them. The
|
|
# built-in deepagents middleware aren't in scope yet at this point of the
|
|
# function but they're added unconditionally below, so we hard-code their
|
|
# canonical names alongside the dynamic ``tools`` set.
|
|
repair_mw = None
|
|
if flags.enable_tool_call_repair and not flags.disable_new_agent_stack:
|
|
registered_names: set[str] = {t.name for t in tools}
|
|
# Tools owned by the standard deepagents middleware stack.
|
|
registered_names |= {
|
|
"write_todos",
|
|
"ls",
|
|
"read_file",
|
|
"write_file",
|
|
"edit_file",
|
|
"glob",
|
|
"grep",
|
|
"execute",
|
|
"task",
|
|
}
|
|
repair_mw = ToolCallNameRepairMiddleware(
|
|
registered_tool_names=registered_names,
|
|
fuzzy_match_threshold=None, # opencode parity: no fuzzy step
|
|
)
|
|
|
|
# Tier 1.11: doom-loop detector. Off by default until UI handles.
|
|
doom_loop_mw = (
|
|
DoomLoopMiddleware(threshold=3)
|
|
if flags.enable_doom_loop and not flags.disable_new_agent_stack
|
|
else None
|
|
)
|
|
|
|
# Tier 2.1: PermissionMiddleware. Layers, earliest -> latest (last
|
|
# match wins per opencode):
|
|
#
|
|
# 1. ``surfsense_defaults`` — single ``allow */*`` rule. SurfSense
|
|
# already runs per-tool HITL (see ``tools/hitl.py``) for mutating
|
|
# connector tools, so we only want PermissionMiddleware to *deny*
|
|
# things the user has gated off; the default fallback in
|
|
# ``permissions.evaluate`` is ``ask``, which would double-prompt
|
|
# on every safe read-only call (``ls``, ``read_file``, ``grep``,
|
|
# ``glob``, ``web_search`` …) and, on resume, replay the previous
|
|
# reject decision into innocent calls.
|
|
# 2. ``connector_synthesized`` — deny rules for tools whose required
|
|
# connector is not connected to this space. Overrides #1.
|
|
# 3. (future) user-defined rules from ``agent_permission_rules`` table
|
|
# via the Agent Permissions UI. Loaded last so they override both.
|
|
permission_mw: PermissionMiddleware | None = None
|
|
if flags.enable_permission and not flags.disable_new_agent_stack:
|
|
synthesized = _synthesize_connector_deny_rules(
|
|
available_connectors=available_connectors,
|
|
enabled_tool_names={t.name for t in tools},
|
|
)
|
|
permission_mw = PermissionMiddleware(
|
|
rulesets=[
|
|
Ruleset(
|
|
rules=[Rule(permission="*", pattern="*", action="allow")],
|
|
origin="surfsense_defaults",
|
|
),
|
|
Ruleset(rules=synthesized, origin="connector_synthesized"),
|
|
],
|
|
)
|
|
|
|
# Tier 5.2: ActionLogMiddleware. Off by default until the
|
|
# ``agent_action_log`` table is migrated. When enabled, persists one
|
|
# row per tool call with optional reverse_descriptor for
|
|
# /api/threads/{thread_id}/revert/{action_id}. Sits inside permission
|
|
# so denied calls aren't logged as completions.
|
|
action_log_mw: ActionLogMiddleware | None = None
|
|
if (
|
|
flags.enable_action_log
|
|
and not flags.disable_new_agent_stack
|
|
and thread_id is not None
|
|
):
|
|
try:
|
|
tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS}
|
|
action_log_mw = ActionLogMiddleware(
|
|
thread_id=thread_id,
|
|
search_space_id=search_space_id,
|
|
user_id=user_id,
|
|
tool_definitions=tool_defs_by_name,
|
|
)
|
|
except Exception: # pragma: no cover - defensive
|
|
logging.warning(
|
|
"ActionLogMiddleware init failed; running without it.",
|
|
exc_info=True,
|
|
)
|
|
action_log_mw = None
|
|
|
|
# Tier 2.2: per-thread busy mutex.
|
|
busy_mutex_mw: BusyMutexMiddleware | None = (
|
|
BusyMutexMiddleware()
|
|
if flags.enable_busy_mutex and not flags.disable_new_agent_stack
|
|
else None
|
|
)
|
|
|
|
# Tier 3b: OpenTelemetry spans (model.call + tool.call). Lives just
|
|
# inside BusyMutex so it spans every retry/fallback attempt of the
|
|
# current turn but never wraps a queued/blocked turn.
|
|
otel_mw: OtelSpanMiddleware | None = (
|
|
OtelSpanMiddleware()
|
|
if flags.enable_otel and not flags.disable_new_agent_stack
|
|
else None
|
|
)
|
|
|
|
# Tier 6: plugin entry-point loader. Off by default; opt-in via the
|
|
# ``SURFSENSE_ENABLE_PLUGIN_LOADER`` flag. The allowlist is read from
|
|
# the ``SURFSENSE_ALLOWED_PLUGINS`` env var (comma-separated). A future
|
|
# PR can wire it through ``global_llm_config.yaml``.
|
|
plugin_middlewares: list[Any] = []
|
|
if flags.enable_plugin_loader and not flags.disable_new_agent_stack:
|
|
try:
|
|
allowed_names = load_allowed_plugin_names_from_env()
|
|
if allowed_names:
|
|
plugin_middlewares = load_plugin_middlewares(
|
|
PluginContext.build(
|
|
search_space_id=search_space_id,
|
|
user_id=user_id,
|
|
thread_visibility=visibility,
|
|
llm=llm,
|
|
),
|
|
allowed_plugin_names=allowed_names,
|
|
)
|
|
except Exception: # pragma: no cover - defensive
|
|
logging.warning(
|
|
"Plugin loader failed; continuing without plugins.",
|
|
exc_info=True,
|
|
)
|
|
plugin_middlewares = []
|
|
|
|
# Tier 4.1: SkillsMiddleware. Loads built-in + space-authored skills
|
|
# via a CompositeBackend. Sources are layered: built-in first, space
|
|
# last, so a search-space-authored skill of the same name overrides
|
|
# the bundled one.
|
|
skills_mw: SkillsMiddleware | None = None
|
|
if flags.enable_skills and not flags.disable_new_agent_stack:
|
|
try:
|
|
skills_factory = build_skills_backend_factory(
|
|
search_space_id=search_space_id
|
|
if filesystem_mode == FilesystemMode.CLOUD
|
|
else None,
|
|
)
|
|
skills_mw = SkillsMiddleware(
|
|
backend=skills_factory,
|
|
sources=default_skills_sources(),
|
|
)
|
|
except Exception as exc: # pragma: no cover - defensive
|
|
logging.warning("SkillsMiddleware init failed; skipping: %s", exc)
|
|
skills_mw = None
|
|
|
|
# Tier 2.5: LLM-driven tool selection for >30 tools.
|
|
selector_mw: LLMToolSelectorMiddleware | None = None
|
|
if (
|
|
flags.enable_llm_tool_selector
|
|
and not flags.disable_new_agent_stack
|
|
and len(tools) > 30
|
|
):
|
|
try:
|
|
selector_mw = LLMToolSelectorMiddleware(
|
|
model="openai:gpt-4o-mini",
|
|
max_tools=12,
|
|
always_include=[
|
|
name
|
|
for name in (
|
|
"update_memory",
|
|
"get_connected_accounts",
|
|
"scrape_webpage",
|
|
)
|
|
if name in {t.name for t in tools}
|
|
],
|
|
)
|
|
except Exception:
|
|
logging.warning("LLMToolSelectorMiddleware init failed; skipping.")
|
|
selector_mw = None
|
|
|
|
deepagent_middleware = [
|
|
# BusyMutex is OUTERMOST: it must wrap the entire stream so no
|
|
# other turn can sneak in while this one is mid-flight.
|
|
busy_mutex_mw,
|
|
# OTel spans sit just inside BusyMutex so each retry attempt
|
|
# gets its own model.call / tool.call span.
|
|
otel_mw,
|
|
TodoListMiddleware(),
|
|
_memory_middleware,
|
|
AnonymousDocumentMiddleware(
|
|
anon_session_id=anon_session_id,
|
|
)
|
|
if filesystem_mode == FilesystemMode.CLOUD
|
|
else None,
|
|
KnowledgeTreeMiddleware(
|
|
search_space_id=search_space_id,
|
|
filesystem_mode=filesystem_mode,
|
|
llm=llm,
|
|
)
|
|
if filesystem_mode == FilesystemMode.CLOUD
|
|
else None,
|
|
KnowledgePriorityMiddleware(
|
|
llm=llm,
|
|
search_space_id=search_space_id,
|
|
filesystem_mode=filesystem_mode,
|
|
available_connectors=available_connectors,
|
|
available_document_types=available_document_types,
|
|
mentioned_document_ids=mentioned_document_ids,
|
|
),
|
|
FileIntentMiddleware(llm=llm),
|
|
SurfSenseFilesystemMiddleware(
|
|
backend=backend_resolver,
|
|
filesystem_mode=filesystem_mode,
|
|
search_space_id=search_space_id,
|
|
created_by_id=user_id,
|
|
thread_id=thread_id,
|
|
),
|
|
KnowledgeBasePersistenceMiddleware(
|
|
search_space_id=search_space_id,
|
|
created_by_id=user_id,
|
|
filesystem_mode=filesystem_mode,
|
|
)
|
|
if filesystem_mode == FilesystemMode.CLOUD
|
|
else None,
|
|
# Tier 4.1: skill loader. Placed before SubAgentMiddleware so
|
|
# subagents inherit the same skill metadata (subagent specs reference
|
|
# the same source paths via `default_skills_sources()`).
|
|
skills_mw,
|
|
SubAgentMiddleware(backend=StateBackend, subagents=subagent_specs),
|
|
# Tier 2.5: tool selection (only when >30 tools and flag on).
|
|
selector_mw,
|
|
# Defensive caps, then prune, then summarize.
|
|
model_call_limit_mw,
|
|
tool_call_limit_mw,
|
|
context_edit_mw,
|
|
summarization_mw,
|
|
# Provider compatibility + retry chain — placed after prune/compact
|
|
# so retries happen on the already-trimmed payload.
|
|
noop_mw,
|
|
retry_mw,
|
|
fallback_mw,
|
|
# Tool-call repair must run after model emits but before
|
|
# permission / dedup / doom-loop interpret the calls.
|
|
repair_mw,
|
|
# Tier 2.1: deny/ask BEFORE the calls are forwarded to tool nodes.
|
|
permission_mw,
|
|
doom_loop_mw,
|
|
# Tier 5.2: action log sits inside permission so denied calls
|
|
# don't appear as completions, and outside dedup so each unique
|
|
# tool invocation gets its own row.
|
|
action_log_mw,
|
|
PatchToolCallsMiddleware(),
|
|
DedupHITLToolCallsMiddleware(agent_tools=list(tools)),
|
|
# Tier 6: plugin slot — sits just before AnthropicCache so plugin-side
|
|
# transforms see the final tool result and run before any caching
|
|
# heuristics. Multiple plugins in declared order; loader filtered by
|
|
# the admin allowlist already.
|
|
*plugin_middlewares,
|
|
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
|
|
]
|
|
deepagent_middleware = [m for m in deepagent_middleware if m is not None]
|
|
|
|
agent = create_agent(
|
|
llm,
|
|
system_prompt=final_system_prompt,
|
|
tools=list(tools),
|
|
middleware=deepagent_middleware,
|
|
context_schema=SurfSenseContextSchema,
|
|
checkpointer=checkpointer,
|
|
)
|
|
return agent.with_config(
|
|
{
|
|
"recursion_limit": 10_000,
|
|
"metadata": {
|
|
"ls_integration": "deepagents",
|
|
"versions": {"deepagents": deepagents_version},
|
|
},
|
|
}
|
|
)
|