Merge pull request #1352 from MODSetter/dev
Some checks are pending
Build and Push Docker Images / tag_release (push) Waiting to run
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_backend, ./surfsense_backend/Dockerfile, backend, surfsense-backend, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-24.04-arm, linux/arm64, arm64) (push) Blocked by required conditions
Build and Push Docker Images / build (./surfsense_web, ./surfsense_web/Dockerfile, web, surfsense-web, ubuntu-latest, linux/amd64, amd64) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (backend, surfsense-backend) (push) Blocked by required conditions
Build and Push Docker Images / create_manifest (web, surfsense-web) (push) Blocked by required conditions

feat: Add multi-agent orchestration
This commit is contained in:
Rohan Verma 2026-05-05 19:28:28 -07:00 committed by GitHub
commit 83ee58016e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
345 changed files with 21459 additions and 363 deletions

View file

@ -1 +1 @@
0.0.22
0.0.23

View file

@ -282,6 +282,9 @@ LANGSMITH_PROJECT=surfsense
# =============================================================================
# OPTIONAL: New-chat agent feature flags
# =============================================================================
# Multi-agent orchestrator switch for authenticated chat streaming.
# MULTI_AGENT_CHAT_ENABLED=false
# Master kill-switch — when true, every flag below is forced OFF.
# SURFSENSE_DISABLE_NEW_AGENT_STACK=false

View file

@ -0,0 +1,7 @@
"""Deepagents-backed routes: ``subagents/``; main-agent graph under ``main_agent/`` (SRP subpackages)."""
from __future__ import annotations
from .main_agent import create_multi_agent_chat_deep_agent
__all__ = ["create_multi_agent_chat_deep_agent"]

View file

@ -0,0 +1,43 @@
"""Connector-type to subagent name; subagent name to availability tokens for build_subagents."""
from __future__ import annotations
CONNECTOR_TYPE_TO_CONNECTOR_AGENT_MAPS: dict[str, str] = {
"GOOGLE_GMAIL_CONNECTOR": "gmail",
"COMPOSIO_GMAIL_CONNECTOR": "gmail",
"GOOGLE_CALENDAR_CONNECTOR": "calendar",
"COMPOSIO_GOOGLE_CALENDAR_CONNECTOR": "calendar",
"DISCORD_CONNECTOR": "discord",
"TEAMS_CONNECTOR": "teams",
"LUMA_CONNECTOR": "luma",
"LINEAR_CONNECTOR": "linear",
"JIRA_CONNECTOR": "jira",
"CLICKUP_CONNECTOR": "clickup",
"SLACK_CONNECTOR": "slack",
"AIRTABLE_CONNECTOR": "airtable",
"NOTION_CONNECTOR": "notion",
"CONFLUENCE_CONNECTOR": "confluence",
"GOOGLE_DRIVE_CONNECTOR": "google_drive",
"COMPOSIO_GOOGLE_DRIVE_CONNECTOR": "google_drive",
"DROPBOX_CONNECTOR": "dropbox",
"ONEDRIVE_CONNECTOR": "onedrive",
}
SUBAGENT_TO_REQUIRED_CONNECTOR_MAP: dict[str, frozenset[str]] = {
"deliverables": frozenset(),
"airtable": frozenset({"AIRTABLE_CONNECTOR"}),
"calendar": frozenset({"GOOGLE_CALENDAR_CONNECTOR"}),
"clickup": frozenset({"CLICKUP_CONNECTOR"}),
"confluence": frozenset({"CONFLUENCE_CONNECTOR"}),
"discord": frozenset({"DISCORD_CONNECTOR"}),
"dropbox": frozenset({"DROPBOX_FILE"}),
"gmail": frozenset({"GOOGLE_GMAIL_CONNECTOR"}),
"google_drive": frozenset({"GOOGLE_DRIVE_FILE"}),
"jira": frozenset({"JIRA_CONNECTOR"}),
"linear": frozenset({"LINEAR_CONNECTOR"}),
"luma": frozenset({"LUMA_CONNECTOR"}),
"notion": frozenset({"NOTION_CONNECTOR"}),
"onedrive": frozenset({"ONEDRIVE_FILE"}),
"slack": frozenset({"SLACK_CONNECTOR"}),
"teams": frozenset({"TEAMS_CONNECTOR"}),
}

View file

@ -0,0 +1,7 @@
"""Main-agent deep agent: ``runtime/`` (factory), ``graph/`` (compile), ``system_prompt/``, etc."""
from __future__ import annotations
from .runtime import create_multi_agent_chat_deep_agent
__all__ = ["create_multi_agent_chat_deep_agent"]

View file

@ -0,0 +1,7 @@
"""Tool-name pruning for context editing (exclude lists without dropping protected tools)."""
from __future__ import annotations
from .prune_tool_names import PRUNE_PROTECTED_TOOL_NAMES, safe_exclude_tools
__all__ = ["PRUNE_PROTECTED_TOOL_NAMES", "safe_exclude_tools"]

View file

@ -0,0 +1,26 @@
"""Tool names excluded from context-editing prune when bound."""
from __future__ import annotations
from collections.abc import Sequence
from langchain_core.tools import BaseTool
PRUNE_PROTECTED_TOOL_NAMES: frozenset[str] = frozenset(
{
"generate_report",
"generate_resume",
"generate_podcast",
"generate_video_presentation",
"generate_image",
"read_email",
"search_emails",
"invalid",
},
)
def safe_exclude_tools(tools: Sequence[BaseTool]) -> tuple[str, ...]:
"""Names from ``PRUNE_PROTECTED_TOOL_NAMES`` that appear in ``tools``."""
enabled = {t.name for t in tools}
return tuple(n for n in PRUNE_PROTECTED_TOOL_NAMES if n in enabled)

View file

@ -0,0 +1,7 @@
"""Sync compile of the main-agent LangGraph graph (middleware + ``create_agent``)."""
from __future__ import annotations
from .compile_graph_sync import build_compiled_agent_graph_sync
__all__ = ["build_compiled_agent_graph_sync"]

View file

@ -0,0 +1,86 @@
"""Synchronous graph compile (middleware + ``create_agent``)."""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from deepagents import __version__ as deepagents_version
from langchain.agents import create_agent
from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.middleware import (
build_main_agent_deepagent_middleware,
)
from app.agents.multi_agent_chat.subagents.shared.permissions import (
ToolsPermissions,
)
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.db import ChatVisibility
def build_compiled_agent_graph_sync(
*,
llm: BaseChatModel,
tools: Sequence[BaseTool],
final_system_prompt: str,
backend_resolver: Any,
filesystem_mode: FilesystemMode,
search_space_id: int,
user_id: str | None,
thread_id: int | None,
visibility: ChatVisibility,
anon_session_id: str | None,
available_connectors: list[str] | None,
available_document_types: list[str] | None,
mentioned_document_ids: list[int] | None,
max_input_tokens: int | None,
flags: AgentFeatureFlags,
checkpointer: Checkpointer,
subagent_dependencies: dict[str, Any],
mcp_tools_by_agent: dict[str, ToolsPermissions] | None = None,
disabled_tools: list[str] | None = None,
):
"""Sync compile: middleware + ``create_agent`` (run via ``asyncio.to_thread``)."""
main_agent_middleware = build_main_agent_deepagent_middleware(
llm=llm,
tools=tools,
backend_resolver=backend_resolver,
filesystem_mode=filesystem_mode,
search_space_id=search_space_id,
user_id=user_id,
thread_id=thread_id,
visibility=visibility,
anon_session_id=anon_session_id,
available_connectors=available_connectors,
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
max_input_tokens=max_input_tokens,
flags=flags,
subagent_dependencies=subagent_dependencies,
checkpointer=checkpointer,
mcp_tools_by_agent=mcp_tools_by_agent,
disabled_tools=disabled_tools,
)
agent = create_agent(
llm,
system_prompt=final_system_prompt,
tools=list(tools),
middleware=main_agent_middleware,
context_schema=SurfSenseContextSchema,
checkpointer=checkpointer,
)
return agent.with_config(
{
"recursion_limit": 10_000,
"metadata": {
"ls_integration": "deepagents",
"versions": {"deepagents": deepagents_version},
},
}
)

View file

@ -0,0 +1,7 @@
"""Async factory: wiring tools, prompts, MCP buckets, then graph compile."""
from __future__ import annotations
from .factory import create_multi_agent_chat_deep_agent
__all__ = ["create_multi_agent_chat_deep_agent"]

View file

@ -0,0 +1,117 @@
"""Compiled agent graph caching for the multi-agent path."""
from __future__ import annotations
import asyncio
from collections.abc import Sequence
from typing import Any
from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from app.agents.multi_agent_chat.subagents.shared.permissions import ToolsPermissions
from app.agents.new_chat.agent_cache import (
flags_signature,
get_cache,
stable_hash,
system_prompt_hash,
tools_signature,
)
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.db import ChatVisibility
from ..graph.compile_graph_sync import build_compiled_agent_graph_sync
def mcp_signature(mcp_tools_by_agent: dict[str, ToolsPermissions]) -> str:
"""Hash the per-agent MCP tool surface so a change rotates the cache key."""
rows = []
for agent_name in sorted(mcp_tools_by_agent.keys()):
perms = mcp_tools_by_agent[agent_name]
allow_names = sorted(item.get("name", "") for item in perms.get("allow", []))
ask_names = sorted(item.get("name", "") for item in perms.get("ask", []))
rows.append((agent_name, allow_names, ask_names))
return stable_hash(rows)
async def build_agent_with_cache(
*,
llm: BaseChatModel,
tools: Sequence[BaseTool],
final_system_prompt: str,
backend_resolver: Any,
filesystem_mode: FilesystemMode,
search_space_id: int,
user_id: str | None,
thread_id: int | None,
visibility: ChatVisibility,
anon_session_id: str | None,
available_connectors: list[str],
available_document_types: list[str],
mentioned_document_ids: list[int] | None,
max_input_tokens: int | None,
flags: AgentFeatureFlags,
checkpointer: Checkpointer,
subagent_dependencies: dict[str, Any],
mcp_tools_by_agent: dict[str, ToolsPermissions],
disabled_tools: list[str] | None,
config_id: str | None,
) -> Any:
"""Compile the multi-agent graph, serving from cache when key components are stable."""
async def _build() -> Any:
return await asyncio.to_thread(
build_compiled_agent_graph_sync,
llm=llm,
tools=tools,
final_system_prompt=final_system_prompt,
backend_resolver=backend_resolver,
filesystem_mode=filesystem_mode,
search_space_id=search_space_id,
user_id=user_id,
thread_id=thread_id,
visibility=visibility,
anon_session_id=anon_session_id,
available_connectors=available_connectors,
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
max_input_tokens=max_input_tokens,
flags=flags,
checkpointer=checkpointer,
subagent_dependencies=subagent_dependencies,
mcp_tools_by_agent=mcp_tools_by_agent,
disabled_tools=disabled_tools,
)
if not (flags.enable_agent_cache and not flags.disable_new_agent_stack):
return await _build()
# Every per-request value any middleware closes over at __init__ must be in
# the key, otherwise a hit will leak state across threads. Bump the schema
# version when the component list changes shape.
cache_key = stable_hash(
"multi-agent-v1",
config_id,
thread_id,
user_id,
search_space_id,
visibility,
filesystem_mode,
anon_session_id,
tools_signature(
tools,
available_connectors=available_connectors,
available_document_types=available_document_types,
),
mcp_signature(mcp_tools_by_agent),
flags_signature(flags),
system_prompt_hash(final_system_prompt),
max_input_tokens,
sorted(disabled_tools) if disabled_tools else None,
)
return await get_cache().get_or_build(cache_key, builder=_build)
__all__ = ["build_agent_with_cache", "mcp_signature"]

View file

@ -0,0 +1,257 @@
"""Async factory: tools, system prompt, MCP buckets for subagents, then sync graph compile."""
from __future__ import annotations
import logging
import time
from collections.abc import Sequence
from typing import Any
from deepagents.graph import BASE_AGENT_PROMPT
from langchain_core.language_models import BaseChatModel
from langchain_core.tools import BaseTool
from langgraph.types import Checkpointer
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.multi_agent_chat.subagents import (
get_subagents_to_exclude,
main_prompt_registry_subagent_lines,
)
from app.agents.multi_agent_chat.subagents.mcp_tools.index import (
load_mcp_tools_by_connector,
)
from app.agents.new_chat.chat_deepagent import _map_connectors_to_searchable_types
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
from app.agents.new_chat.filesystem_backends import build_backend_resolver
from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection
from app.agents.new_chat.llm_config import AgentConfig
from app.agents.new_chat.prompt_caching import apply_litellm_prompt_caching
from app.agents.new_chat.tools.invalid_tool import INVALID_TOOL_NAME, invalid_tool
from app.agents.new_chat.tools.registry import build_tools_async
from app.db import ChatVisibility
from app.services.connector_service import ConnectorService
from app.utils.perf import get_perf_logger
from ..system_prompt import build_main_agent_system_prompt
from ..tools import (
MAIN_AGENT_SURFSENSE_TOOL_NAMES,
MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED,
)
from .agent_cache import build_agent_with_cache
_perf_log = get_perf_logger()
async def create_multi_agent_chat_deep_agent(
llm: BaseChatModel,
search_space_id: int,
db_session: AsyncSession,
connector_service: ConnectorService,
checkpointer: Checkpointer,
user_id: str | None = None,
thread_id: int | None = None,
agent_config: AgentConfig | None = None,
enabled_tools: list[str] | None = None,
disabled_tools: list[str] | None = None,
additional_tools: Sequence[BaseTool] | None = None,
firecrawl_api_key: str | None = None,
thread_visibility: ChatVisibility | None = None,
mentioned_document_ids: list[int] | None = None,
anon_session_id: str | None = None,
filesystem_selection: FilesystemSelection | None = None,
):
"""Deep agent with SurfSense tools/middleware; registry route subagents behind ``task`` when enabled."""
_t_agent_total = time.perf_counter()
apply_litellm_prompt_caching(llm, agent_config=agent_config, thread_id=thread_id)
filesystem_selection = filesystem_selection or FilesystemSelection()
backend_resolver = build_backend_resolver(
filesystem_selection,
search_space_id=search_space_id
if filesystem_selection.mode == FilesystemMode.CLOUD
else None,
)
available_connectors: list[str] | None = None
available_document_types: list[str] | None = None
_t0 = time.perf_counter()
try:
connector_types = await connector_service.get_available_connectors(
search_space_id
)
available_connectors = _map_connectors_to_searchable_types(connector_types)
available_document_types = await connector_service.get_available_document_types(
search_space_id
)
except Exception as e:
logging.warning(
"Connector/doc-type discovery failed; excluding connector subagents this turn: %s",
e,
)
# Fail closed: a None list short-circuits ``get_subagents_to_exclude`` to "exclude
# nothing", which would silently advertise every connector specialist on a flaky
# discovery call. Empty list excludes connector-gated subagents while keeping builtins.
if available_connectors is None:
available_connectors = []
if available_document_types is None:
available_document_types = []
_perf_log.info(
"[create_agent] Connector/doc-type discovery in %.3fs",
time.perf_counter() - _t0,
)
visibility = thread_visibility or ChatVisibility.PRIVATE
_model_profile = getattr(llm, "profile", None)
_max_input_tokens: int | None = (
_model_profile.get("max_input_tokens")
if isinstance(_model_profile, dict)
else None
)
dependencies: dict[str, Any] = {
"search_space_id": search_space_id,
"db_session": db_session,
"connector_service": connector_service,
"firecrawl_api_key": firecrawl_api_key,
"user_id": user_id,
"thread_id": thread_id,
"thread_visibility": visibility,
"available_connectors": available_connectors,
"available_document_types": available_document_types,
"max_input_tokens": _max_input_tokens,
"llm": llm,
}
_t0 = time.perf_counter()
try:
mcp_tools_by_agent = await load_mcp_tools_by_connector(
db_session, search_space_id
)
except Exception as e:
# Degrade to builtins-only rather than aborting the turn: a transient
# DB or MCP-server hiccup should not deny the user a response.
logging.warning(
"MCP tool discovery failed; subagents will run without MCP tools this turn: %s",
e,
)
mcp_tools_by_agent = {}
_perf_log.info(
"[create_agent] load_mcp_tools_by_connector in %.3fs (%d buckets)",
time.perf_counter() - _t0,
len(mcp_tools_by_agent),
)
modified_disabled_tools = list(disabled_tools) if disabled_tools else []
if "search_knowledge_base" not in modified_disabled_tools:
modified_disabled_tools.append("search_knowledge_base")
if enabled_tools is not None:
main_agent_enabled_tools = [
n for n in enabled_tools if n in MAIN_AGENT_SURFSENSE_TOOL_NAMES
]
else:
main_agent_enabled_tools = list(MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED)
_t0 = time.perf_counter()
tools = await build_tools_async(
dependencies=dependencies,
enabled_tools=main_agent_enabled_tools,
disabled_tools=modified_disabled_tools,
additional_tools=list(additional_tools) if additional_tools else None,
include_mcp_tools=False,
)
_flags: AgentFeatureFlags = get_flags()
if _flags.enable_tool_call_repair and INVALID_TOOL_NAME not in {
t.name for t in tools
}:
tools = [*list(tools), invalid_tool]
_perf_log.info(
"[create_agent] build_tools_async in %.3fs (%d tools)",
time.perf_counter() - _t0,
len(tools),
)
_t0 = time.perf_counter()
_enabled_tool_names = {t.name for t in tools}
_user_disabled_tool_names = set(disabled_tools) if disabled_tools else set()
_model_name: str | None = None
prof = getattr(llm, "model_name", None) or getattr(llm, "model", None)
if isinstance(prof, str):
_model_name = prof
_connector_exclude = get_subagents_to_exclude(available_connectors)
_registry_subagent_prompt_lines = main_prompt_registry_subagent_lines(
_connector_exclude
)
if agent_config is not None:
system_prompt = build_main_agent_system_prompt(
today=None,
thread_visibility=thread_visibility,
enabled_tool_names=_enabled_tool_names,
disabled_tool_names=_user_disabled_tool_names,
custom_system_instructions=agent_config.system_instructions,
use_default_system_instructions=agent_config.use_default_system_instructions,
citations_enabled=agent_config.citations_enabled,
model_name=_model_name or getattr(agent_config, "model_name", None),
registry_subagent_prompt_lines=_registry_subagent_prompt_lines,
)
else:
system_prompt = build_main_agent_system_prompt(
thread_visibility=thread_visibility,
enabled_tool_names=_enabled_tool_names,
disabled_tool_names=_user_disabled_tool_names,
citations_enabled=True,
model_name=_model_name,
registry_subagent_prompt_lines=_registry_subagent_prompt_lines,
)
_perf_log.info(
"[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0
)
final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT
config_id = agent_config.config_id if agent_config is not None else None
_t0 = time.perf_counter()
agent = await build_agent_with_cache(
llm=llm,
tools=tools,
final_system_prompt=final_system_prompt,
backend_resolver=backend_resolver,
filesystem_mode=filesystem_selection.mode,
search_space_id=search_space_id,
user_id=user_id,
thread_id=thread_id,
visibility=visibility,
anon_session_id=anon_session_id,
available_connectors=available_connectors,
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
max_input_tokens=_max_input_tokens,
flags=_flags,
checkpointer=checkpointer,
subagent_dependencies=dependencies,
mcp_tools_by_agent=mcp_tools_by_agent,
disabled_tools=disabled_tools,
config_id=config_id,
)
_perf_log.info(
"[create_agent] Middleware stack + graph compiled in %.3fs",
time.perf_counter() - _t0,
)
_perf_log.info(
"[create_agent] Total agent creation in %.3fs",
time.perf_counter() - _t_agent_total,
)
return agent

View file

@ -0,0 +1,7 @@
"""Main-agent system prompt — not shared verbatim with single-agent ``new_chat``."""
from __future__ import annotations
from .builder import build_main_agent_system_prompt
__all__ = ["build_main_agent_system_prompt"]

View file

@ -0,0 +1,7 @@
"""Assemble the main-agent system prompt from ``markdown/*.md`` fragments."""
from __future__ import annotations
from .compose import build_main_agent_system_prompt
__all__ = ["build_main_agent_system_prompt"]

View file

@ -0,0 +1,53 @@
"""Assemble the **main-agent** deep-agent system string only.
Sections (order matters): core instructions provider citations dynamic
``<registry_subagents>`` SurfSense ``<tools>``.
"""
from __future__ import annotations
from datetime import UTC, datetime
from app.db import ChatVisibility
from .sections.citations import build_citations_section
from .sections.provider import build_provider_section
from .sections.registry_subagents import build_registry_subagents_section
from .sections.system_instruction import build_default_system_instruction_xml
from .sections.tools import build_tools_section
def build_main_agent_system_prompt(
*,
today: datetime | None = None,
thread_visibility: ChatVisibility | None = None,
enabled_tool_names: set[str] | None = None,
disabled_tool_names: set[str] | None = None,
custom_system_instructions: str | None = None,
use_default_system_instructions: bool = True,
citations_enabled: bool = True,
model_name: str | None = None,
registry_subagent_prompt_lines: list[tuple[str, str]] | None = None,
) -> str:
resolved_today = (today or datetime.now(UTC)).astimezone(UTC).date().isoformat()
visibility = thread_visibility or ChatVisibility.PRIVATE
if custom_system_instructions and custom_system_instructions.strip():
system_block = custom_system_instructions.format(resolved_today=resolved_today)
elif use_default_system_instructions:
system_block = build_default_system_instruction_xml(
visibility=visibility,
resolved_today=resolved_today,
)
else:
system_block = ""
system_block += build_provider_section(model_name=model_name)
system_block += build_citations_section(citations_enabled=citations_enabled)
system_block += build_registry_subagents_section(registry_subagent_prompt_lines)
system_block += build_tools_section(
visibility=visibility,
enabled_tool_names=enabled_tool_names,
disabled_tool_names=disabled_tool_names,
)
return system_block

View file

@ -0,0 +1,16 @@
"""Load main-agent-only markdown from ``system_prompt/markdown/`` (``importlib.resources``)."""
from __future__ import annotations
from importlib import resources
_PROMPTS_PACKAGE = "app.agents.multi_agent_chat.main_agent.system_prompt.markdown"
def read_prompt_md(filename: str) -> str:
"""Load ``markdown/{filename}`` (e.g. ``agent_private.md`` or ``tools/_preamble.md``)."""
ref = resources.files(_PROMPTS_PACKAGE).joinpath(filename)
if not ref.is_file():
return ""
text = ref.read_text(encoding="utf-8")
return text[:-1] if text.endswith("\n") else text

View file

@ -0,0 +1,50 @@
"""Provider-specific style hints from ``markdown/providers/`` (main agent only)."""
from __future__ import annotations
import re
from .load_md import read_prompt_md
ProviderVariant = str
_OPENAI_CODEX_RE = re.compile(
r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE
)
_OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
_OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
_ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
_GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)
def detect_provider_variant(model_name: str | None) -> ProviderVariant:
if not model_name:
return "default"
name = model_name.strip()
if _OPENAI_CODEX_RE.search(name):
return "openai_codex"
if _OPENAI_REASONING_RE.search(name):
return "openai_reasoning"
if _OPENAI_CLASSIC_RE.search(name):
return "openai_classic"
if _ANTHROPIC_RE.search(name):
return "anthropic"
if _GOOGLE_RE.search(name):
return "google"
if _KIMI_RE.search(name):
return "kimi"
if _GROK_RE.search(name):
return "grok"
if _DEEPSEEK_RE.search(name):
return "deepseek"
return "default"
def build_provider_hint_block(provider_variant: ProviderVariant) -> str:
if not provider_variant or provider_variant == "default":
return ""
text = read_prompt_md(f"providers/{provider_variant}.md")
return f"\n{text}\n" if text else ""

View file

@ -0,0 +1 @@
"""Rendered slices of the main-agent system prompt."""

View file

@ -0,0 +1,11 @@
"""Citation fragment for the main agent (chunk-tagged context only)."""
from __future__ import annotations
from ..load_md import read_prompt_md
def build_citations_section(*, citations_enabled: bool) -> str:
name = "citations_on.md" if citations_enabled else "citations_off.md"
fragment = read_prompt_md(name)
return f"\n{fragment}\n" if fragment else ""

View file

@ -0,0 +1,9 @@
"""Provider-specific style hints."""
from __future__ import annotations
from ..provider_hints import build_provider_hint_block, detect_provider_variant
def build_provider_section(*, model_name: str | None) -> str:
return build_provider_hint_block(detect_provider_variant(model_name))

View file

@ -0,0 +1,27 @@
"""Dynamic ``<registry_subagents>`` block: **task** specialists actually built for this workspace."""
from __future__ import annotations
def build_registry_subagents_section(
registry_subagent_lines: list[tuple[str, str]] | None,
) -> str:
if registry_subagent_lines is None:
return ""
if not registry_subagent_lines:
return (
"\n<registry_subagents>\n"
"No registry specialists are listed for **task** in this workspace.\n"
"</registry_subagents>\n"
)
bullets = "\n".join(
f"- **{name}** — {desc}" for name, desc in registry_subagent_lines
)
return (
"\n<registry_subagents>\n"
"These specialists are registered for **task** (routes without a matching connector are omitted).\n"
f"{bullets}\n"
"The runtime may also offer a general-purpose **task** helper with your tools in a separate context.\n"
"Pick the specialist by **name**. Put full instructions in the task prompt; they do not see this thread.\n"
"</registry_subagents>\n"
)

View file

@ -0,0 +1,35 @@
"""Default ``<system_instruction>`` block for the main agent only."""
from __future__ import annotations
from app.db import ChatVisibility
from ..load_md import read_prompt_md
_PRIVATE_ORDER = (
"agent_private.md",
"kb_only_policy_private.md",
"main_agent_tool_routing.md",
"parameter_resolution.md",
"memory_protocol_private.md",
)
_TEAM_ORDER = (
"agent_team.md",
"kb_only_policy_team.md",
"main_agent_tool_routing.md",
"parameter_resolution.md",
"memory_protocol_team.md",
)
def build_default_system_instruction_xml(
*,
visibility: ChatVisibility,
resolved_today: str,
) -> str:
order = _TEAM_ORDER if visibility == ChatVisibility.SEARCH_SPACE else _PRIVATE_ORDER
parts = [read_prompt_md(name) for name in order]
body = "\n\n".join(p for p in parts if p)
return f"\n<system_instruction>\n{body}\n\n</system_instruction>\n".format(
resolved_today=resolved_today,
)

View file

@ -0,0 +1,20 @@
"""Main-agent ``<tools>`` block (memory + research builtins only; see ``main_agent.tools``)."""
from __future__ import annotations
from app.db import ChatVisibility
from ..tool_instruction_block import build_tools_instruction_block
def build_tools_section(
*,
visibility: ChatVisibility,
enabled_tool_names: set[str] | None,
disabled_tool_names: set[str] | None,
) -> str:
return build_tools_instruction_block(
visibility=visibility,
enabled_tool_names=enabled_tool_names,
disabled_tool_names=disabled_tool_names,
)

View file

@ -0,0 +1,86 @@
"""``<tools>`` + ``<tool_call_examples>`` from ``system_prompt/markdown/{tools,examples}/``.
Only documents tools the main agent actually binds not full ``new_chat``.
"""
from __future__ import annotations
from app.db import ChatVisibility
from ...tools import MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED
from .load_md import read_prompt_md
_MEMORY_VARIANT_TOOLS: frozenset[str] = frozenset({"update_memory"})
def _tool_fragment_path(tool_name: str, variant: str) -> str:
if tool_name in _MEMORY_VARIANT_TOOLS:
return f"tools/{tool_name}_{variant}.md"
return f"tools/{tool_name}.md"
def _example_fragment_path(tool_name: str, variant: str) -> str:
if tool_name in _MEMORY_VARIANT_TOOLS:
return f"examples/{tool_name}_{variant}.md"
return f"examples/{tool_name}.md"
def _format_tool_label(tool_name: str) -> str:
return tool_name.replace("_", " ").title()
def build_tools_instruction_block(
*,
visibility: ChatVisibility,
enabled_tool_names: set[str] | None,
disabled_tool_names: set[str] | None,
) -> str:
variant = "team" if visibility == ChatVisibility.SEARCH_SPACE else "private"
parts: list[str] = []
preamble = read_prompt_md("tools/_preamble.md")
if preamble:
parts.append(preamble + "\n")
examples: list[str] = []
for tool_name in MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED:
if enabled_tool_names is not None and tool_name not in enabled_tool_names:
continue
instruction = read_prompt_md(_tool_fragment_path(tool_name, variant))
if instruction:
parts.append(instruction + "\n")
example = read_prompt_md(_example_fragment_path(tool_name, variant))
if example:
examples.append(example + "\n")
known_disabled = (
set(disabled_tool_names) & set(MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED)
if disabled_tool_names
else set()
)
if known_disabled:
disabled_list = ", ".join(
_format_tool_label(n)
for n in MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED
if n in known_disabled
)
parts.append(
"\n"
"DISABLED TOOLS (by user, main-agent scope):\n"
f"These SurfSense tools were disabled on the main agent for this session: {disabled_list}.\n"
"You do NOT have access to them and MUST NOT claim you can use them.\n"
"If the user still needs that capability, delegate with **task** if a subagent covers it,\n"
"otherwise explain it is disabled on the main agent for this session.\n"
)
parts.append("\n</tools>\n")
if examples:
parts.append("<tool_call_examples>")
parts.extend(examples)
parts.append("</tool_call_examples>\n")
return "".join(parts)

View file

@ -0,0 +1 @@
"""Markdown fragments for the **main-agent** system prompt only (`importlib.resources`)."""

View file

@ -0,0 +1,9 @@
You are SurfSenses **main agent**: you answer using the users knowledge context,
lightweight research tools, and memory — and you **delegate** integrations and
specialized work via **task** (see `<tool_routing>` in this prompt).
Today's date (UTC): {resolved_today}
When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.

View file

@ -0,0 +1,11 @@
You are SurfSenses **main agent** for this team space: you answer using shared
knowledge context, lightweight research tools, and memory — and you **delegate**
integrations and specialized work via **task** (see `<tool_routing>` in this prompt).
In this team thread, each message is prefixed with **[DisplayName of the author]**. Use this to attribute and reference the author of anything in the discussion (who asked a question, made a suggestion, or contributed an idea) and to cite who said what in your answers.
Today's date (UTC): {resolved_today}
When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.
NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.

View file

@ -0,0 +1,15 @@
<citation_instructions>
IMPORTANT: Citations are DISABLED for this configuration.
DO NOT include `[citation:…]` markers anywhere — even if tool descriptions or examples
mention them. Ignore citation-format reminders elsewhere in this prompt when they conflict
with this block.
Instead:
1. Answer in plain prose; optional markdown links to public URLs when sources are URLs.
2. Do NOT expose raw chunk IDs, document IDs, or internal IDs to the user.
3. Present indexed or doc-search facts naturally without attribution markers.
When answering from workspace or docs context: integrate facts cleanly without claiming
“this comes from chunk X”.
</citation_instructions>

View file

@ -0,0 +1,15 @@
<citation_instructions>
This block appears **before** `<tools>` so it wins over any tool-example wording below.
Apply chunk citations **only** when the runtime injects `<document>` / `<chunk id='…'>` blocks
(e.g. from SurfSense docs search or priority documents).
1. For each factual statement taken from those chunks, add `[citation:chunk_id]` using the **exact** `chunk_id` string from `<chunk id='…'>`.
2. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated).
3. Never invent or normalize ids; if unsure, omit the citation.
4. Plain brackets only — no markdown links, no `([citation:…](url))`, no footnote numbering.
Chunk ids may be numeric, prefixed (e.g. `doc-45`), or URLs when the source is web-shaped — copy verbatim.
If no chunk-tagged documents appear in context this turn, do not fabricate citations.
</citation_instructions>

View file

@ -0,0 +1,13 @@
- User: "Check out https://dev.to/some-article"
- Call: `scrape_webpage(url="https://dev.to/some-article")`
- Respond with a structured analysis — key points, takeaways.
- User: "Read this article and summarize it for me: https://example.com/blog/ai-trends"
- Call: `scrape_webpage(url="https://example.com/blog/ai-trends")`
- Respond with a thorough summary using headings and bullet points.
- User: (after discussing https://example.com/stats) "Can you get the live data from that page?"
- Call: `scrape_webpage(url="https://example.com/stats")`
- IMPORTANT: Always attempt scraping first. Never refuse before trying the tool.
- User: "https://example.com/blog/weekend-recipes"
- Call: `scrape_webpage(url="https://example.com/blog/weekend-recipes")`
- When a user sends just a URL with no instructions, scrape it and provide a concise summary of the content.

View file

@ -0,0 +1,9 @@
- User: "How do I install SurfSense?"
- Call: `search_surfsense_docs(query="installation setup")`
- User: "What connectors does SurfSense support?"
- Call: `search_surfsense_docs(query="available connectors integrations")`
- User: "How do I set up the Notion connector?"
- Call: `search_surfsense_docs(query="Notion connector setup configuration")` (how-to docs). Changing data inside Notion itself → **task**.
- User: "How do I use Docker to run SurfSense?"
- Call: `search_surfsense_docs(query="Docker installation setup")`

View file

@ -0,0 +1,16 @@
- <user_name>Alex</user_name>, <user_memory> is empty. User: "I'm a space enthusiast, explain astrophage to me"
- The user casually shared a durable fact. Use their first name in the entry, short neutral heading:
update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n")
- User: "Remember that I prefer concise answers over detailed explanations"
- Durable preference. Merge with existing memory, add a new heading:
update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n\n## Response style\n- (2025-03-15) [pref] Alex prefers concise answers over detailed explanations\n")
- User: "I actually moved to Tokyo last month"
- Updated fact, date prefix reflects when recorded:
update_memory(updated_memory="## Interests & background\n...\n\n## Personal context\n- (2025-03-15) [fact] Alex lives in Tokyo (previously London)\n...")
- User: "I'm a freelance photographer working on a nature documentary"
- Durable background info under a fitting heading:
update_memory(updated_memory="...\n\n## Current focus\n- (2025-03-15) [fact] Alex is a freelance photographer\n- (2025-03-15) [fact] Alex is working on a nature documentary\n")
- User: "Always respond in bullet points"
- Standing instruction:
update_memory(updated_memory="...\n\n## Response style\n- (2025-03-15) [instr] Always respond to Alex in bullet points\n")

View file

@ -0,0 +1,7 @@
- User: "Let's remember that we decided to do weekly standup meetings on Mondays"
- Durable team decision:
update_memory(updated_memory="- (2025-03-15) [fact] Weekly standup meetings on Mondays\n...")
- User: "Our office is in downtown Seattle, 5th floor"
- Durable team fact:
update_memory(updated_memory="- (2025-03-15) [fact] Office location: downtown Seattle, 5th floor\n...")

View file

@ -0,0 +1,8 @@
- User: "What's the current USD to INR exchange rate?"
- Call: `web_search(query="current USD to INR exchange rate")`
- Answer from returned snippets or scrape a top URL if needed; use markdown links to sources.
- User: "What's the latest news about AI?"
- Call: `web_search(query="latest AI news today")`
- User: "What's the weather in New York?"
- Call: `web_search(query="weather New York today")`

View file

@ -0,0 +1,19 @@
<knowledge_base_only_policy>
CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
- Ground factual answers in what you actually receive this turn: injected workspace
documents (when present), **search_surfsense_docs**, **web_search**, **scrape_webpage**,
or substantive results summarized from a **task** subagent you invoked.
- Do NOT answer factual or informational questions from general knowledge unless the user
explicitly grants permission after you say you did not find enough in those sources.
- If indexed/docs search returns nothing relevant AND **web_search** / **scrape_webpage**
(and **task**, if already tried appropriately) still do not supply an answer, you MUST:
1. Say you could not find enough in their workspace/docs/tools output.
2. Ask: "Would you like me to answer from my general knowledge instead?"
3. ONLY then answer from general knowledge after they clearly say yes.
- This policy does NOT apply to:
* Casual conversation, greetings, or meta-questions about SurfSense (e.g. "what can you do?")
* Formatting or analysis of content already in the chat
* Clear rewrite/edit instructions ("bullet-point this paragraph")
* Lightweight research with **web_search** / **scrape_webpage**
* Work that belongs on a specialist — use **task**; see `<tool_routing>`
</knowledge_base_only_policy>

View file

@ -0,0 +1,19 @@
<knowledge_base_only_policy>
CRITICAL RULE — KNOWLEDGE BASE FIRST, NEVER DEFAULT TO GENERAL KNOWLEDGE:
- Ground factual answers in what you actually receive this turn: injected shared
workspace documents (when present), **search_surfsense_docs**, **web_search**,
**scrape_webpage**, or substantive results summarized from a **task** subagent you invoked.
- Do NOT answer factual questions from general knowledge unless a team member explicitly
grants permission after you say you did not find enough in those sources.
- If indexed/docs search returns nothing relevant AND **web_search** / **scrape_webpage**
(and **task**, if already tried appropriately) still do not supply an answer, you MUST:
1. Say you could not find enough in shared docs/tools output.
2. Ask: "Would you like me to answer from my general knowledge instead?"
3. ONLY then answer from general knowledge after they clearly say yes.
- This policy does NOT apply to:
* Casual conversation, greetings, or meta-questions about SurfSense
* Formatting or analysis of content already in the chat
* Clear rewrite/edit instructions
* Lightweight research with **web_search** / **scrape_webpage**
* Work that belongs on a specialist — use **task**; see `<tool_routing>`
</knowledge_base_only_policy>

View file

@ -0,0 +1,27 @@
<tool_routing>
Use **task** for anything beyond your direct SurfSense tools: calendar, mail,
chat, tickets, documents in third-party systems, connector-specific discovery,
deliverables (reports, podcasts, images, etc.), and other specialized routes.
The live list of specialists you may target with **task** for this workspace is in
`<registry_subagents>` (later in this prompt).
Your **direct** SurfSense tools are only: **update_memory**, **web_search**,
**scrape_webpage**, and **search_surfsense_docs**. The runtime may also attach
deep-agent helpers (e.g. todos, filesystem, **task** itself). Use **task** whenever
the user needs capabilities **not** listed in the `<tools>` section (that section appears
later in this system prompt, after citation rules).
Do not treat live third-party state as if it were already in the indexed knowledge
base; reach it via **task**.
Never emit more than one **task** tool call in the same turn. Bundle related work
for the same specialist into a single **task** invocation (the subagent itself can
call its own tools in parallel inside that one run). Parallel **task** calls would
fan out into multiple concurrent subagent runs whose human-approval interrupts
cannot be coordinated; one **task** at a time is required.
</tool_routing>
<!-- TODO: lift the single-task constraint once the runtime supports parallel task
interrupts end-to-end (multi-interrupt SSE + interrupt-id-keyed Command(resume)
+ keyed surfsense_resume_value side-channel). Until then this nudge is the only
guard; the parent graph's resume cannot address multiple pending interrupts. -->

View file

@ -0,0 +1,6 @@
<memory_protocol>
IMPORTANT — After understanding each user message, ALWAYS check: does this message
reveal durable facts about the user (role, interests, preferences, projects,
background, or standing instructions)? If yes, you MUST call update_memory
alongside your normal response — do not defer this to a later turn.
</memory_protocol>

View file

@ -0,0 +1,6 @@
<memory_protocol>
IMPORTANT — After understanding each user message, ALWAYS check: does this message
reveal durable facts about the team (decisions, conventions, architecture, processes,
or key facts)? If yes, you MUST call update_memory alongside your normal response —
do not defer this to a later turn.
</memory_protocol>

View file

@ -0,0 +1,15 @@
<parameter_resolution>
You do **not** call connector-specific discovery tools yourself (accounts, channels,
Jira cloud IDs, Airtable bases, Slack channels, etc.). Those tools exist only on
**task** subagents.
When the user needs work inside a connected product, delegate with **task** and a
clear goal. If several Slack channels, Jira projects, calendar calendars, etc. could
match and only the integration can list them, **you must not** ask the human for
internal IDs (UUIDs, cloud IDs, opaque keys). The **task** subagent uses connector
tools to list candidates and either picks the only sensible match or asks the user
to choose using **normal labels** (e.g. channel display name, project title), not raw IDs.
If you already have plain-language choices from the user or from prior tool output,
you may pass them through to **task** without re-discovery.
</parameter_resolution>

View file

@ -0,0 +1,16 @@
<provider_hints>
You are running on an Anthropic Claude model (SurfSense **main agent**).
Structured reasoning:
- For non-trivial work, `<thinking>` / short `<plan>` before tool calls is fine.
Professional objectivity:
- Accuracy over flattery; verify with **search_surfsense_docs**, **web_search**, **scrape_webpage**, or **task** when unsure — dont invent connector access.
Task management:
- For 3+ steps, use todo tooling; update statuses promptly.
Tool calls:
- Parallelise independent calls; sequence only when outputs chain.
- Never pretend you can run connector-specific tools directly — route through **task** when needed.
</provider_hints>

View file

@ -0,0 +1,18 @@
<provider_hints>
You are running on a DeepSeek model (SurfSense **main agent**).
Reasoning hygiene (R1-aware):
- Keep internal scratch separate from the user-facing answer; dont leak chain-of-thought into tool arguments.
Output style:
- Concise; lead with the answer or the next action; avoid sycophantic openers.
Attribution:
- When citations are **enabled** and facts come from chunk-tagged context, follow the citation block above.
- When citations are **disabled**, do not use `[citation:…]`.
Tool calls:
- Parallelise independent calls.
- Prefer **search_surfsense_docs** for SurfSense docs/product questions before **web_search** when that fits the ask.
- Dont invent paths, chunk ids, or URLs — only values from tools or the user.
</provider_hints>

View file

@ -0,0 +1,18 @@
<provider_hints>
You are running on a Google Gemini model (SurfSense **main agent**).
Output style:
- Concise & direct. Fewer than ~3 lines of prose when the task allows (excluding tool output and code).
- No filler openers/closers — move straight to the answer or the tool call.
- GitHub-flavoured Markdown; monospace-friendly.
Workflow (Understand → Plan → Act → Verify):
1. **Understand:** parse the ask; use **search_surfsense_docs** / injected workspace context before guessing.
2. **Plan:** for multi-step work, a short plan first.
3. **Act:** only with tools you actually have on this agent (see `<tools>` and `<tool_routing>`). Connector work → **task**.
4. **Verify:** re-read or re-search only when it materially reduces risk.
Discipline:
- Do not imply access to connectors, MCP tools, or deliverable generators except via **task**.
- Path arguments for filesystem tools must be exact strings from tool results — never invent paths.
</provider_hints>

View file

@ -0,0 +1,16 @@
<provider_hints>
You are running on an xAI Grok model (SurfSense **main agent**).
Maximum terseness:
- Fewer than 4 lines unless detail is requested; skip preamble/postamble.
Tool discipline:
- Typically one investigative tool per turn unless several independent read-only queries are clearly needed; dont repeat identical calls.
Attribution:
- When citations are **enabled** (see citation block above) and you answer from chunk-tagged documents, use `[citation:chunk_id]` exactly as specified there.
- When citations are **disabled**, never emit `[citation:…]` — plain prose and links per tool guidance.
Style:
- No emojis unless asked; flat lists for short answers.
</provider_hints>

View file

@ -0,0 +1,21 @@
<provider_hints>
You are running on a Moonshot Kimi model (Kimi-K1.5 / Kimi-K2 / Kimi-K2.5+), SurfSense **main agent**.
Action bias:
- Default to taking action with tools rather than describing solutions in prose. If a tool can answer the question, call the tool.
- Don't narrate routine reads, searches, or obvious next steps. Combine related progress into one short status line.
- Be thorough in actions (test what you build, verify what you change). Be brief in explanations.
Tool calls:
- Output multiple non-interfering tool calls in a SINGLE response — parallelism is a major efficiency win on this model.
- When the `task` tool is available, delegate focused subtasks to a subagent with full context (subagents don't inherit yours).
- Don't apologise or pre-announce tool calls. The tool call itself is self-explanatory.
Language:
- Respond in the SAME language as the user's most recent turn unless explicitly instructed otherwise.
Discipline:
- Stay on track. Never give the user more than what they asked for.
- Fact-check with tools; dont fabricate chunk ids or connector outcomes.
- Keep it stupidly simple. Don't overcomplicate.
</provider_hints>

View file

@ -0,0 +1,20 @@
<provider_hints>
You are running on a classic OpenAI chat model (GPT-4 family), SurfSense **main agent**.
Persistence:
- Finish the users request in the same turn when tools allow — dont stop at intent only.
- If a tool errors, fix arguments and retry once before giving up.
Planning:
- For 3+ steps, use the todo / planning tool; mark `in_progress` / `completed` promptly.
- One short sentence before non-trivial tool use is fine.
Output style:
- Conversational but professional; bullets for findings; fenced code with language tags when needed.
- Summarize tool output — dont paste walls of text.
Tool calls:
- Parallelise independent calls in one turn.
- Prefer **search_surfsense_docs** for SurfSense-product questions, **web_search** / **scrape_webpage**
for fresh public facts; integrations and heavy workflows → **task**.
</provider_hints>

View file

@ -0,0 +1,13 @@
<provider_hints>
You are running on an OpenAI Codex-class model (SurfSense **main agent**).
Output style:
- Concise; dont paste huge fetch blobs — summarize.
- When citations are **enabled** and you rely on chunk-tagged docs, references may use `[citation:chunk_id]` per the citation block above; when **disabled**, use prose and URLs only.
- Numbered lists work well when the user should reply with a single option index.
- No emojis; single-level bullets.
Tool calls:
- Parallelise independent calls; chain only when required.
- Dont ask permission for obvious safe defaults — state what you did.
</provider_hints>

View file

@ -0,0 +1,22 @@
<provider_hints>
You are running on an OpenAI reasoning model (GPT-5+ / o-series), SurfSense **main agent**.
Output style:
- Be terse and direct. Don't restate the user's request before answering.
- Don't begin with conversational openers ("Done!", "Got it", "Great question", "Sure thing"). Get to the answer or the action.
- Match response complexity to the task: simple questions → one-line answer; substantial work → lead with the outcome, then context, then any next steps.
- No nested bullets — keep lists flat (single level). For options the user can pick by replying with a number, use `1.` `2.` `3.`.
- Use inline backticks for paths/commands/identifiers; fenced code blocks (with language tags) for multi-line snippets.
Channels (for clients that support them):
- `commentary` — short progress updates only when they add genuinely new information (a discovery, a tradeoff, a blocker, the start of a non-trivial step). Don't narrate routine reads or obvious next steps.
- `final` — the completed response. Keep it self-contained; no "see above" / "see below" cross-references.
Tool calls:
- Parallelise independent tool calls in a single response (`multi_tool_use.parallel` where supported). Only sequence when a later call needs an earlier one's output.
- Connector or integration execution belongs in **task**, not invented main-agent tools.
- Don't ask permission ("Should I proceed?", "Do you want me to…?"). Pick the most reasonable default, do it, and state what you did.
Autonomy:
- Persist until the task is fully resolved within the current turn whenever feasible — within tools you actually have; delegate the rest via **task**.
</provider_hints>

View file

@ -0,0 +1,9 @@
<tools>
You have access to the following **SurfSense** tools (main-agent scope only):
IMPORTANT: You can ONLY use the tools listed below. Anything else — connectors,
deliverables, or multi-step integration work — goes through **task**, not as a
tool in this list.
Do NOT claim you can use a capability if it is not listed here.

View file

@ -0,0 +1,10 @@
- scrape_webpage: Fetch and extract readable content from a single HTTP(S) URL.
- Use when the user wants the *actual page body* (article, table, dashboard snapshot), not just search snippets.
- Try the tool when a URL is given or referenced; dont refuse without attempting unless the URL is clearly unsafe/invalid.
- Args:
- url: Page to fetch
- max_length: Cap on returned characters (default: 50000)
- Returns: Title, metadata, and markdown-ish body.
- Summarize clearly afterward; link back with `[label](url)`.
- If indexed workspace material is insufficient and the user points at a public URL, scraping is appropriate — still not a substitute for **task** on private connectors.

View file

@ -0,0 +1,9 @@
- search_surfsense_docs: Search official SurfSense documentation (product help).
- Use when the user asks how SurfSense works, setup, connectors at a high level, configuration, etc.
- Not a substitute for **task** when they need actions inside Gmail/Slack/Jira/etc.
- Args:
- query: What to look up in SurfSense docs
- top_k: Number of chunks to retrieve (default: 10)
- Returns: Doc excerpts; chunk ids may appear for attribution — follow the **citation**
instructions block above when citations are enabled; otherwise summarize without `[citation:…]`.

View file

@ -0,0 +1,12 @@
- update_memory: Curate the **personal** long-term memory document for this user.
- Current memory (if any) appears in `<user_memory>` with usage vs limit.
- Call when the user asks to remember/forget, or shares durable facts/preferences/instructions.
- Use the first name from `<user_name>` when writing entries — write “Alex prefers…” not “The user prefers…”.
Do not store the name alone as a memory entry.
- Skip ephemeral chat noise (one-off q/a, greetings, session logistics).
- Args:
- updated_memory: FULL replacement markdown (merge and curate — dont only append).
- Formatting rules:
- Bullets: `- (YYYY-MM-DD) [marker] text` with markers `[fact]`, `[pref]`, `[instr]` (priority when trimming: instr > pref > fact).
- Each bullet under a short `##` heading; keep total size under the limit shown in `<user_memory>`.

View file

@ -0,0 +1,26 @@
- update_memory: Update the team's shared memory document for this search space.
- Your current team memory is already in <team_memory> in your context. The `chars`
and `limit` attributes show current usage and the maximum allowed size.
- This is the team's curated long-term memory — decisions, conventions, key facts.
- NEVER store personal memory in team memory (e.g. personal bio, individual
preferences, or user-only standing instructions).
- Call update_memory when:
* A team member explicitly asks to remember or forget something
* The conversation surfaces durable team decisions, conventions, or facts
that will matter in future conversations
- Do not store short-lived or ephemeral info: one-off questions, greetings,
session logistics, or things that only matter for the current task.
- Args:
- updated_memory: The FULL updated markdown document (not a diff).
Merge new facts with existing ones, update contradictions, remove outdated entries.
Treat every update as a curation pass — consolidate, don't just append.
- Every bullet MUST use this format: - (YYYY-MM-DD) [fact] text
Team memory uses ONLY the [fact] marker. Never use [pref] or [instr] in team memory.
- Keep it concise and well under the character limit shown in <team_memory>.
- Every entry MUST be under a `##` heading. Keep heading names short (2-3 words) and
natural. Organize by context — e.g. what the team decided, current architecture,
active processes. Create, split, or merge headings freely as the memory grows.
- Each entry MUST be a single bullet point. Be descriptive but concise — include relevant
details and context rather than just a few words.
- During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities.

View file

@ -0,0 +1,10 @@
- web_search: Live public-web search (whatever search backends the workspace configured).
- Use for current events, prices, weather, news, or anything needing fresh public web data.
- For those queries, call this tool rather than guessing from memory or claiming you lack network access.
- If results are thin, say so and offer to refine the query.
- Args:
- query: Specific search terms
- top_k: Max hits (default: 10, max: 50)
- If snippets are too shallow, follow up with **scrape_webpage** on the best URL.
- Present sources with readable markdown links `[label](url)` — never bare URLs.

View file

@ -0,0 +1,10 @@
"""Main-agent SurfSense tool allowlist."""
from __future__ import annotations
from .index import (
MAIN_AGENT_SURFSENSE_TOOL_NAMES,
MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED,
)
__all__ = ["MAIN_AGENT_SURFSENSE_TOOL_NAMES", "MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED"]

View file

@ -0,0 +1,17 @@
"""Main-agent SurfSense builtin tool names (not full ``new_chat``).
Connector integrations, MCP, deliverables, etc. are delegated via ``task`` subagents.
"""
from __future__ import annotations
MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED: tuple[str, ...] = (
"search_surfsense_docs",
"web_search",
"scrape_webpage",
"update_memory",
)
MAIN_AGENT_SURFSENSE_TOOL_NAMES: frozenset[str] = frozenset(
MAIN_AGENT_SURFSENSE_TOOL_NAMES_ORDERED,
)

View file

@ -0,0 +1,7 @@
"""Multi-agent middleware stack assembly."""
from __future__ import annotations
from .stack import build_main_agent_deepagent_middleware
__all__ = ["build_main_agent_deepagent_middleware"]

View file

@ -0,0 +1,36 @@
"""Audit row per tool call (reversibility metadata)."""
from __future__ import annotations
import logging
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import ActionLogMiddleware
from app.agents.new_chat.tools.registry import BUILTIN_TOOLS
from ..shared.flags import enabled
def build_action_log_mw(
*,
flags: AgentFeatureFlags,
thread_id: int | None,
search_space_id: int,
user_id: str | None,
) -> ActionLogMiddleware | None:
if not enabled(flags, "enable_action_log") or thread_id is None:
return None
try:
tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS}
return ActionLogMiddleware(
thread_id=thread_id,
search_space_id=search_space_id,
user_id=user_id,
tool_definitions=tool_defs_by_name,
)
except Exception: # pragma: no cover - defensive
logging.warning(
"ActionLogMiddleware init failed; running without it.",
exc_info=True,
)
return None

View file

@ -0,0 +1,16 @@
"""Anonymous document hydration from Redis (cloud only)."""
from __future__ import annotations
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import AnonymousDocumentMiddleware
def build_anonymous_doc_mw(
*,
filesystem_mode: FilesystemMode,
anon_session_id: str | None,
) -> AnonymousDocumentMiddleware | None:
if filesystem_mode != FilesystemMode.CLOUD:
return None
return AnonymousDocumentMiddleware(anon_session_id=anon_session_id)

View file

@ -0,0 +1,12 @@
"""Per-thread cooperative lock around the whole turn."""
from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import BusyMutexMiddleware
from ..shared.flags import enabled
def build_busy_mutex_mw(flags: AgentFeatureFlags) -> BusyMutexMiddleware | None:
return BusyMutexMiddleware() if enabled(flags, "enable_busy_mutex") else None

View file

@ -0,0 +1,26 @@
"""SubAgent ``task`` tool wiring required for HITL inside subagents.
Replaces upstream ``SubAgentMiddleware`` to:
- share the parent's checkpointer with each subagent,
- forward ``runtime.config`` (thread_id, recursion_limit, ) into nested invokes,
- bridge ``Command(resume=...)`` from the parent into the subagent via the
``config["configurable"]["surfsense_resume_value"]`` side-channel,
- target the resume at the captured interrupt id so a follow-up
``HumanInTheLoopMiddleware.after_model`` does not consume the same payload,
- re-raise any new subagent interrupt at the parent so the SSE stream surfaces it.
Module layout
-------------
- ``constants`` shared keys / limits.
- ``config`` RunnableConfig + side-channel resume read.
- ``resume`` pending-interrupt detection, fan-out, ``Command(resume=...)`` builder.
- ``propagation`` re-raise pending subagent interrupts at the parent.
- ``task_tool`` the ``task`` tool factory (sync + async).
- ``middleware`` :class:`SurfSenseCheckpointedSubAgentMiddleware` itself.
"""
from .middleware import SurfSenseCheckpointedSubAgentMiddleware
__all__ = ["SurfSenseCheckpointedSubAgentMiddleware"]

View file

@ -0,0 +1,44 @@
"""RunnableConfig wiring for nested subagent invocations.
Forwards the parent's ``runtime.config`` (thread_id, …) into the subagent and
exposes the side-channel ``stream_resume_chat`` uses to ferry resume payloads.
"""
from __future__ import annotations
from typing import Any
from langchain.tools import ToolRuntime
from .constants import DEFAULT_SUBAGENT_RECURSION_LIMIT
def subagent_invoke_config(runtime: ToolRuntime) -> dict[str, Any]:
"""RunnableConfig for the nested invoke; raises ``recursion_limit`` to the parent's budget."""
merged: dict[str, Any] = dict(runtime.config) if runtime.config else {}
current_limit = merged.get("recursion_limit")
try:
current_int = int(current_limit) if current_limit is not None else 0
except (TypeError, ValueError):
current_int = 0
if current_int < DEFAULT_SUBAGENT_RECURSION_LIMIT:
merged["recursion_limit"] = DEFAULT_SUBAGENT_RECURSION_LIMIT
return merged
def consume_surfsense_resume(runtime: ToolRuntime) -> Any:
"""Pop the resume payload; siblings share ``configurable`` by reference."""
cfg = runtime.config or {}
configurable = cfg.get("configurable") if isinstance(cfg, dict) else None
if not isinstance(configurable, dict):
return None
return configurable.pop("surfsense_resume_value", None)
def has_surfsense_resume(runtime: ToolRuntime) -> bool:
"""True iff a resume payload is queued on this runtime (non-destructive)."""
cfg = runtime.config or {}
configurable = cfg.get("configurable") if isinstance(cfg, dict) else None
if not isinstance(configurable, dict):
return False
return "surfsense_resume_value" in configurable

View file

@ -0,0 +1,18 @@
"""Constants shared by the checkpointed subagent middleware."""
from __future__ import annotations
# Mirror of deepagents.middleware.subagents._EXCLUDED_STATE_KEYS.
EXCLUDED_STATE_KEYS = frozenset(
{
"messages",
"todos",
"structured_response",
"skills_metadata",
"memory_contents",
}
)
# Match the parent graph's budget; the LangGraph default of 25 trips on
# multi-step subagent runs.
DEFAULT_SUBAGENT_RECURSION_LIMIT = 10_000

View file

@ -0,0 +1,103 @@
"""SubAgent middleware that compiles each subagent against the parent checkpointer."""
from __future__ import annotations
from typing import Any, cast
from deepagents.backends.protocol import BackendFactory, BackendProtocol
from deepagents.middleware.subagents import (
TASK_SYSTEM_PROMPT,
CompiledSubAgent,
SubAgent,
SubAgentMiddleware,
)
from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware
from langchain.chat_models import init_chat_model
from langgraph.types import Checkpointer
from .task_tool import build_task_tool_with_parent_config
class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware):
"""``SubAgentMiddleware`` variant that compiles each subagent against the parent checkpointer."""
def __init__(
self,
*,
checkpointer: Checkpointer,
backend: BackendProtocol | BackendFactory,
subagents: list[SubAgent | CompiledSubAgent],
system_prompt: str | None = TASK_SYSTEM_PROMPT,
task_description: str | None = None,
) -> None:
self._surf_checkpointer = checkpointer
super(SubAgentMiddleware, self).__init__()
if not subagents:
raise ValueError(
"At least one subagent must be specified when using the new API"
)
self._backend = backend
self._subagents = subagents
subagent_specs = self._surf_compile_subagent_graphs()
task_tool = build_task_tool_with_parent_config(subagent_specs, task_description)
if system_prompt and subagent_specs:
agents_desc = "\n".join(
f"- {s['name']}: {s['description']}" for s in subagent_specs
)
self.system_prompt = (
system_prompt + "\n\nAvailable subagent types:\n" + agents_desc
)
else:
self.system_prompt = system_prompt
self.tools = [task_tool]
def _surf_compile_subagent_graphs(self) -> list[dict[str, Any]]:
"""Mirror of ``SubAgentMiddleware._get_subagents`` that threads the parent checkpointer."""
specs: list[dict[str, Any]] = []
for spec in self._subagents:
if "runnable" in spec:
compiled = cast(CompiledSubAgent, spec)
specs.append(
{
"name": compiled["name"],
"description": compiled["description"],
"runnable": compiled["runnable"],
}
)
continue
if "model" not in spec:
msg = f"SubAgent '{spec['name']}' must specify 'model'"
raise ValueError(msg)
if "tools" not in spec:
msg = f"SubAgent '{spec['name']}' must specify 'tools'"
raise ValueError(msg)
model = spec["model"]
if isinstance(model, str):
model = init_chat_model(model)
middleware: list[Any] = list(spec.get("middleware", []))
interrupt_on = spec.get("interrupt_on")
if interrupt_on:
middleware.append(HumanInTheLoopMiddleware(interrupt_on=interrupt_on))
specs.append(
{
"name": spec["name"],
"description": spec["description"],
"runnable": create_agent(
model,
system_prompt=spec["system_prompt"],
tools=spec["tools"],
middleware=middleware,
name=spec["name"],
checkpointer=self._surf_checkpointer,
),
}
)
return specs

View file

@ -0,0 +1,74 @@
"""Re-raise still-pending subagent interrupts at the parent graph level.
After ``subagent.[a]invoke(Command(resume=...))`` returns, the subagent may
still hold a pending interrupt (e.g. the LLM produced a follow-up tool call
that fired a fresh ``interrupt()``). The parent's pregel cannot see that
interrupt because it lives in a separate compiled graph; we re-raise it here
so the parent's SSE stream surfaces it as the next approval card.
"""
from __future__ import annotations
import logging
from typing import Any
from langchain_core.runnables import Runnable
from langgraph.types import interrupt as _lg_interrupt
from .resume import get_first_pending_subagent_interrupt
logger = logging.getLogger(__name__)
def maybe_propagate_subagent_interrupt(
subagent: Runnable,
sub_config: dict[str, Any],
subagent_type: str,
) -> None:
"""Re-raise a still-pending subagent interrupt at the parent so the SSE stream surfaces it."""
get_state_sync = getattr(subagent, "get_state", None)
if not callable(get_state_sync):
return
try:
snapshot = get_state_sync(sub_config)
except Exception: # pragma: no cover - defensive
logger.debug(
"Subagent get_state failed during re-interrupt check",
exc_info=True,
)
return
_pending_id, pending_value = get_first_pending_subagent_interrupt(snapshot)
if pending_value is None:
return
logger.info(
"Re-raising subagent %r interrupt to parent (multi-step HITL)",
subagent_type,
)
_lg_interrupt(pending_value)
async def amaybe_propagate_subagent_interrupt(
subagent: Runnable,
sub_config: dict[str, Any],
subagent_type: str,
) -> None:
"""Async counterpart of :func:`maybe_propagate_subagent_interrupt`."""
aget_state = getattr(subagent, "aget_state", None)
if not callable(aget_state):
return
try:
snapshot = await aget_state(sub_config)
except Exception: # pragma: no cover - defensive
logger.debug(
"Subagent aget_state failed during re-interrupt check",
exc_info=True,
)
return
_pending_id, pending_value = get_first_pending_subagent_interrupt(snapshot)
if pending_value is None:
return
logger.info(
"Re-raising subagent %r interrupt to parent (multi-step HITL)",
subagent_type,
)
_lg_interrupt(pending_value)

View file

@ -0,0 +1,76 @@
"""Resume-payload shaping and pending-interrupt detection for subagents.
Splits the work of "given a state snapshot and a parent-stashed resume value,
produce the right ``Command(resume=...)`` for the subagent" into pure helpers.
"""
from __future__ import annotations
from typing import Any
from langgraph.types import Command
def hitlrequest_action_count(pending_value: Any) -> int:
"""Bundle size for a LangChain ``HITLRequest`` payload; ``0`` for non-bundle interrupts."""
if not isinstance(pending_value, dict):
return 0
actions = pending_value.get("action_requests")
if isinstance(actions, list):
return len(actions)
return 0
def fan_out_decisions_to_match(resume_value: Any, expected_count: int) -> Any:
"""Legacy fallback: pad a 1-decision resume to N for an ``action_requests=N`` bundle.
Modern frontend submits N decisions per bundle (one per action_request) so
this is a no-op; kept for backwards compatibility with old in-flight
threads or non-bundle clients that send a single decision.
"""
if expected_count <= 1:
return resume_value
if not isinstance(resume_value, dict):
return resume_value
decisions = resume_value.get("decisions")
if not isinstance(decisions, list) or len(decisions) >= expected_count:
return resume_value
if not decisions:
return resume_value
padded = list(decisions) + [decisions[-1]] * (expected_count - len(decisions))
return {**resume_value, "decisions": padded}
def get_first_pending_subagent_interrupt(state: Any) -> tuple[str | None, Any]:
"""First pending ``(interrupt_id, value)``; ``(None, None)`` if no interrupt.
Assumes at most one pending interrupt per snapshot (sequential tool nodes).
Parallel tool nodes would need an id-aware lookup instead of first-wins.
"""
if state is None:
return None, None
for it in getattr(state, "interrupts", None) or ():
value = getattr(it, "value", None)
interrupt_id = getattr(it, "id", None)
if value is not None:
return (
interrupt_id if isinstance(interrupt_id, str) else None,
value,
)
for sub_task in getattr(state, "tasks", None) or ():
for it in getattr(sub_task, "interrupts", None) or ():
value = getattr(it, "value", None)
interrupt_id = getattr(it, "id", None)
if value is not None:
return (
interrupt_id if isinstance(interrupt_id, str) else None,
value,
)
return None, None
def build_resume_command(resume_value: Any, pending_id: str | None) -> Command:
"""``Command(resume={id: value})`` when ``id`` is known, else fall back to scalar."""
if pending_id is None:
return Command(resume=resume_value)
return Command(resume={pending_id: resume_value})

View file

@ -0,0 +1,238 @@
"""Build the ``task`` tool that invokes subagents with HITL bridging.
The tool's body is the only place where the parent and the subagent meet at
runtime: it reads the parent's stashed resume value, decides whether to send
fresh state or a targeted ``Command(resume=...)`` to the subagent, then
re-raises any new pending interrupt back to the parent.
"""
from __future__ import annotations
import logging
from typing import Annotated, Any
from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION
from langchain.tools import BaseTool, ToolRuntime
from langchain_core.messages import HumanMessage, ToolMessage
from langchain_core.runnables import Runnable
from langchain_core.tools import StructuredTool
from langgraph.types import Command
from .config import (
consume_surfsense_resume,
has_surfsense_resume,
subagent_invoke_config,
)
from .constants import EXCLUDED_STATE_KEYS
from .propagation import (
amaybe_propagate_subagent_interrupt,
maybe_propagate_subagent_interrupt,
)
from .resume import (
build_resume_command,
fan_out_decisions_to_match,
get_first_pending_subagent_interrupt,
hitlrequest_action_count,
)
logger = logging.getLogger(__name__)
def build_task_tool_with_parent_config(
subagents: list[dict[str, Any]],
task_description: str | None = None,
) -> BaseTool:
"""Upstream ``_build_task_tool`` + parent ``runtime.config`` propagation + resume bridging."""
subagent_graphs: dict[str, Runnable] = {
spec["name"]: spec["runnable"] for spec in subagents
}
subagent_description_str = "\n".join(
f"- {s['name']}: {s['description']}" for s in subagents
)
if task_description is None:
description = TASK_TOOL_DESCRIPTION.format(
available_agents=subagent_description_str
)
elif "{available_agents}" in task_description:
description = task_description.format(available_agents=subagent_description_str)
else:
description = task_description
def _return_command_with_state_update(result: dict, tool_call_id: str) -> Command:
if "messages" not in result:
msg = (
"CompiledSubAgent must return a state containing a 'messages' key. "
"Custom StateGraphs used with CompiledSubAgent should include 'messages' "
"in their state schema to communicate results back to the main agent."
)
raise ValueError(msg)
state_update = {k: v for k, v in result.items() if k not in EXCLUDED_STATE_KEYS}
messages = result["messages"]
if not messages:
msg = (
"CompiledSubAgent returned an empty 'messages' list. "
"Subagents must produce at least one message so the parent has "
"output to forward back to the user."
)
raise ValueError(msg)
last_text = getattr(messages[-1], "text", None) or ""
message_text = last_text.rstrip()
return Command(
update={
**state_update,
"messages": [ToolMessage(message_text, tool_call_id=tool_call_id)],
}
)
def _validate_and_prepare_state(
subagent_type: str, description: str, runtime: ToolRuntime
) -> tuple[Runnable, dict]:
subagent = subagent_graphs[subagent_type]
subagent_state = {
k: v for k, v in runtime.state.items() if k not in EXCLUDED_STATE_KEYS
}
subagent_state["messages"] = [HumanMessage(content=description)]
return subagent, subagent_state
def task(
description: Annotated[
str,
"A detailed description of the task for the subagent to perform autonomously. Include all necessary context and specify the expected output format.",
],
subagent_type: Annotated[
str,
"The type of subagent to use. Must be one of the available agent types listed in the tool description.",
],
runtime: ToolRuntime,
) -> str | Command:
if subagent_type not in subagent_graphs:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
return (
f"We cannot invoke subagent {subagent_type} because it does not exist, "
f"the only allowed types are {allowed_types}"
)
if not runtime.tool_call_id:
raise ValueError("Tool call ID is required for subagent invocation")
subagent, subagent_state = _validate_and_prepare_state(
subagent_type, description, runtime
)
sub_config = subagent_invoke_config(runtime)
# Resume bridge: forward the parent's stashed decision into the
# subagent's pending ``interrupt()``, targeted by id.
pending_id: str | None = None
pending_value: Any = None
get_state = getattr(subagent, "get_state", None)
if callable(get_state):
try:
snapshot = get_state(sub_config)
pending_id, pending_value = get_first_pending_subagent_interrupt(
snapshot
)
except Exception:
# Fail loud if a resume is queued: silent fallback would
# replay the original interrupt to the user.
if has_surfsense_resume(runtime):
logger.exception(
"Subagent %r get_state raised with resume queued; re-raising.",
subagent_type,
)
raise
logger.debug(
"Subagent get_state failed; falling back to fresh invoke",
exc_info=True,
)
if pending_value is not None:
resume_value = consume_surfsense_resume(runtime)
if resume_value is None:
# Bridge invariant: a queued resume must accompany any pending
# subagent interrupt. Fall-through replay would silently re-prompt
# the user; raise so the streaming layer surfaces a clear error.
raise RuntimeError(
f"Subagent {subagent_type!r} has a pending interrupt but no "
"surfsense_resume_value on config; resume bridge is broken."
)
expected = hitlrequest_action_count(pending_value)
resume_value = fan_out_decisions_to_match(resume_value, expected)
result = subagent.invoke(
build_resume_command(resume_value, pending_id),
config=sub_config,
)
else:
result = subagent.invoke(subagent_state, config=sub_config)
maybe_propagate_subagent_interrupt(subagent, sub_config, subagent_type)
return _return_command_with_state_update(result, runtime.tool_call_id)
async def atask(
description: Annotated[
str,
"A detailed description of the task for the subagent to perform autonomously. Include all necessary context and specify the expected output format.",
],
subagent_type: Annotated[
str,
"The type of subagent to use. Must be one of the available agent types listed in the tool description.",
],
runtime: ToolRuntime,
) -> str | Command:
if subagent_type not in subagent_graphs:
allowed_types = ", ".join([f"`{k}`" for k in subagent_graphs])
return (
f"We cannot invoke subagent {subagent_type} because it does not exist, "
f"the only allowed types are {allowed_types}"
)
if not runtime.tool_call_id:
raise ValueError("Tool call ID is required for subagent invocation")
subagent, subagent_state = _validate_and_prepare_state(
subagent_type, description, runtime
)
sub_config = subagent_invoke_config(runtime)
# Resume bridge — see ``task`` above.
pending_id: str | None = None
pending_value: Any = None
aget_state = getattr(subagent, "aget_state", None)
if callable(aget_state):
try:
snapshot = await aget_state(sub_config)
pending_id, pending_value = get_first_pending_subagent_interrupt(
snapshot
)
except Exception:
if has_surfsense_resume(runtime):
logger.exception(
"Subagent %r aget_state raised with resume queued; re-raising.",
subagent_type,
)
raise
logger.debug(
"Subagent aget_state failed; falling back to fresh ainvoke",
exc_info=True,
)
if pending_value is not None:
resume_value = consume_surfsense_resume(runtime)
if resume_value is None:
raise RuntimeError(
f"Subagent {subagent_type!r} has a pending interrupt but no "
"surfsense_resume_value on config; resume bridge is broken."
)
expected = hitlrequest_action_count(pending_value)
resume_value = fan_out_decisions_to_match(resume_value, expected)
result = await subagent.ainvoke(
build_resume_command(resume_value, pending_id),
config=sub_config,
)
else:
result = await subagent.ainvoke(subagent_state, config=sub_config)
await amaybe_propagate_subagent_interrupt(subagent, sub_config, subagent_type)
return _return_command_with_state_update(result, runtime.tool_call_id)
return StructuredTool.from_function(
name="task",
func=task,
coroutine=atask,
description=description,
)

View file

@ -0,0 +1,50 @@
"""Spill + clear-tool-uses passes to keep payloads under budget."""
from __future__ import annotations
from collections.abc import Sequence
from typing import Any
from langchain_core.tools import BaseTool
from app.agents.multi_agent_chat.main_agent.context_prune.prune_tool_names import (
safe_exclude_tools,
)
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import (
ClearToolUsesEdit,
SpillingContextEditingMiddleware,
SpillToBackendEdit,
)
from ..shared.flags import enabled
def build_context_editing_mw(
*,
flags: AgentFeatureFlags,
max_input_tokens: int | None,
tools: Sequence[BaseTool],
backend_resolver: Any,
) -> SpillingContextEditingMiddleware | None:
if not enabled(flags, "enable_context_editing") or not max_input_tokens:
return None
spill_edit = SpillToBackendEdit(
trigger=int(max_input_tokens * 0.55),
clear_at_least=int(max_input_tokens * 0.15),
keep=5,
exclude_tools=safe_exclude_tools(tools),
clear_tool_inputs=True,
)
clear_edit = ClearToolUsesEdit(
trigger=int(max_input_tokens * 0.55),
clear_at_least=int(max_input_tokens * 0.15),
keep=5,
exclude_tools=safe_exclude_tools(tools),
clear_tool_inputs=True,
placeholder="[cleared - older tool output trimmed for context]",
)
return SpillingContextEditingMiddleware(
edits=[spill_edit, clear_edit],
backend_resolver=backend_resolver,
)

View file

@ -0,0 +1,13 @@
"""Drop duplicate HITL tool calls before execution."""
from __future__ import annotations
from collections.abc import Sequence
from langchain_core.tools import BaseTool
from app.agents.new_chat.middleware import DedupHITLToolCallsMiddleware
def build_dedup_hitl_mw(tools: Sequence[BaseTool]) -> DedupHITLToolCallsMiddleware:
return DedupHITLToolCallsMiddleware(agent_tools=list(tools))

View file

@ -0,0 +1,14 @@
"""Stop N identical tool calls in a row via interrupt."""
from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import DoomLoopMiddleware
from ..shared.flags import enabled
def build_doom_loop_mw(flags: AgentFeatureFlags) -> DoomLoopMiddleware | None:
return (
DoomLoopMiddleware(threshold=3) if enabled(flags, "enable_doom_loop") else None
)

View file

@ -0,0 +1,23 @@
"""Commit staged cloud filesystem mutations to Postgres at end of turn."""
from __future__ import annotations
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import KnowledgeBasePersistenceMiddleware
def build_kb_persistence_mw(
*,
filesystem_mode: FilesystemMode,
search_space_id: int,
user_id: str | None,
thread_id: int | None,
) -> KnowledgeBasePersistenceMiddleware | None:
if filesystem_mode != FilesystemMode.CLOUD:
return None
return KnowledgeBasePersistenceMiddleware(
search_space_id=search_space_id,
created_by_id=user_id,
filesystem_mode=filesystem_mode,
thread_id=thread_id,
)

View file

@ -0,0 +1,27 @@
"""KB priority planner: <priority_documents> injection."""
from __future__ import annotations
from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import KnowledgePriorityMiddleware
def build_knowledge_priority_mw(
*,
llm: BaseChatModel,
search_space_id: int,
filesystem_mode: FilesystemMode,
available_connectors: list[str] | None,
available_document_types: list[str] | None,
mentioned_document_ids: list[int] | None,
) -> KnowledgePriorityMiddleware:
return KnowledgePriorityMiddleware(
llm=llm,
search_space_id=search_space_id,
filesystem_mode=filesystem_mode,
available_connectors=available_connectors,
available_document_types=available_document_types,
mentioned_document_ids=mentioned_document_ids,
)

View file

@ -0,0 +1,23 @@
"""<workspace_tree> injection (cloud only)."""
from __future__ import annotations
from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import KnowledgeTreeMiddleware
def build_knowledge_tree_mw(
*,
filesystem_mode: FilesystemMode,
search_space_id: int,
llm: BaseChatModel,
) -> KnowledgeTreeMiddleware | None:
if filesystem_mode != FilesystemMode.CLOUD:
return None
return KnowledgeTreeMiddleware(
search_space_id=search_space_id,
filesystem_mode=filesystem_mode,
llm=llm,
)

View file

@ -0,0 +1,12 @@
"""Provider-compat: append a `_noop` tool when tools=[] but history has tool calls."""
from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import NoopInjectionMiddleware
from ..shared.flags import enabled
def build_noop_injection_mw(flags: AgentFeatureFlags) -> NoopInjectionMiddleware | None:
return NoopInjectionMiddleware() if enabled(flags, "enable_compaction_v2") else None

View file

@ -0,0 +1,12 @@
"""OTel spans on model and tool calls."""
from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import OtelSpanMiddleware
from ..shared.flags import enabled
def build_otel_mw(flags: AgentFeatureFlags) -> OtelSpanMiddleware | None:
return OtelSpanMiddleware() if enabled(flags, "enable_otel") else None

View file

@ -0,0 +1,49 @@
"""Tail-of-stack plugin slot driven by env allowlist."""
from __future__ import annotations
import logging
from typing import Any
from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.plugin_loader import (
PluginContext,
load_allowed_plugin_names_from_env,
load_plugin_middlewares,
)
from app.db import ChatVisibility
from ..shared.flags import enabled
def build_plugin_middlewares(
*,
flags: AgentFeatureFlags,
search_space_id: int,
user_id: str | None,
visibility: ChatVisibility,
llm: BaseChatModel,
) -> list[Any]:
if not enabled(flags, "enable_plugin_loader"):
return []
try:
allowed_names = load_allowed_plugin_names_from_env()
if not allowed_names:
return []
return load_plugin_middlewares(
PluginContext.build(
search_space_id=search_space_id,
user_id=user_id,
thread_visibility=visibility,
llm=llm,
),
allowed_plugin_names=allowed_names,
)
except Exception: # pragma: no cover - defensive
logging.warning(
"Plugin loader failed; continuing without plugins.",
exc_info=True,
)
return []

View file

@ -0,0 +1,50 @@
"""Repair miscased / unknown tool names to the registered set or invalid_tool."""
from __future__ import annotations
from collections.abc import Sequence
from langchain_core.tools import BaseTool
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import ToolCallNameRepairMiddleware
from ..shared.flags import enabled
# deepagents-built-in tool names the repair pass treats as known.
_DEEPAGENT_BUILTIN_TOOL_NAMES: frozenset[str] = frozenset(
{
"write_todos",
"ls",
"read_file",
"write_file",
"edit_file",
"glob",
"grep",
"execute",
"task",
"mkdir",
"cd",
"pwd",
"move_file",
"rm",
"rmdir",
"list_tree",
"execute_code",
}
)
def build_repair_mw(
*,
flags: AgentFeatureFlags,
tools: Sequence[BaseTool],
) -> ToolCallNameRepairMiddleware | None:
if not enabled(flags, "enable_tool_call_repair"):
return None
registered_names: set[str] = {t.name for t in tools}
registered_names |= _DEEPAGENT_BUILTIN_TOOL_NAMES
return ToolCallNameRepairMiddleware(
registered_tool_names=registered_names,
fuzzy_match_threshold=None,
)

View file

@ -0,0 +1,39 @@
"""LLM-based tool subset selection (only when >30 tools)."""
from __future__ import annotations
import logging
from collections.abc import Sequence
from langchain.agents.middleware import LLMToolSelectorMiddleware
from langchain_core.tools import BaseTool
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from ..shared.flags import enabled
def build_selector_mw(
*,
flags: AgentFeatureFlags,
tools: Sequence[BaseTool],
) -> LLMToolSelectorMiddleware | None:
if not enabled(flags, "enable_llm_tool_selector") or len(tools) <= 30:
return None
try:
return LLMToolSelectorMiddleware(
model="openai:gpt-4o-mini",
max_tools=12,
always_include=[
name
for name in (
"update_memory",
"get_connected_accounts",
"scrape_webpage",
)
if name in {t.name for t in tools}
],
)
except Exception:
logging.warning("LLMToolSelectorMiddleware init failed; skipping.")
return None

View file

@ -0,0 +1,39 @@
"""Skill discovery + injection."""
from __future__ import annotations
import logging
from deepagents.middleware.skills import SkillsMiddleware
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import (
build_skills_backend_factory,
default_skills_sources,
)
from ..shared.flags import enabled
def build_skills_mw(
*,
flags: AgentFeatureFlags,
filesystem_mode: FilesystemMode,
search_space_id: int,
) -> SkillsMiddleware | None:
if not enabled(flags, "enable_skills"):
return None
try:
skills_factory = build_skills_backend_factory(
search_space_id=search_space_id
if filesystem_mode == FilesystemMode.CLOUD
else None,
)
return SkillsMiddleware(
backend=skills_factory,
sources=default_skills_sources(),
)
except Exception as exc: # pragma: no cover - defensive
logging.warning("SkillsMiddleware init failed; skipping: %s", exc)
return None

View file

@ -0,0 +1,9 @@
"""Anthropic prompt caching annotations on system/tool/message blocks."""
from __future__ import annotations
from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
def build_anthropic_cache_mw() -> AnthropicPromptCachingMiddleware:
return AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore")

View file

@ -0,0 +1,14 @@
"""Context-window summarization with SurfSense protected sections."""
from __future__ import annotations
from typing import Any
from deepagents.backends import StateBackend
from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.middleware import create_surfsense_compaction_middleware
def build_compaction_mw(llm: BaseChatModel) -> Any:
return create_surfsense_compaction_middleware(llm, StateBackend)

View file

@ -0,0 +1,11 @@
"""File-intent classifier that gates strict write contracts."""
from __future__ import annotations
from langchain_core.language_models import BaseChatModel
from app.agents.new_chat.middleware import FileIntentMiddleware
def build_file_intent_mw(llm: BaseChatModel) -> FileIntentMiddleware:
return FileIntentMiddleware(llm=llm)

View file

@ -0,0 +1,25 @@
"""SurfSense filesystem tools/middleware."""
from __future__ import annotations
from typing import Any
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import SurfSenseFilesystemMiddleware
def build_filesystem_mw(
*,
backend_resolver: Any,
filesystem_mode: FilesystemMode,
search_space_id: int,
user_id: str | None,
thread_id: int | None,
) -> SurfSenseFilesystemMiddleware:
return SurfSenseFilesystemMiddleware(
backend=backend_resolver,
filesystem_mode=filesystem_mode,
search_space_id=search_space_id,
created_by_id=user_id,
thread_id=thread_id,
)

View file

@ -0,0 +1,10 @@
"""Single source of truth for the feature-flag predicate."""
from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags
def enabled(flags: AgentFeatureFlags, attr: str) -> bool:
"""``flags.<attr>`` is on AND the new-agent-stack kill switch is off."""
return getattr(flags, attr) and not flags.disable_new_agent_stack

View file

@ -0,0 +1,19 @@
"""User/team memory injection prepended to the conversation."""
from __future__ import annotations
from app.agents.new_chat.middleware import MemoryInjectionMiddleware
from app.db import ChatVisibility
def build_memory_mw(
*,
user_id: str | None,
search_space_id: int,
visibility: ChatVisibility,
) -> MemoryInjectionMiddleware:
return MemoryInjectionMiddleware(
user_id=user_id,
search_space_id=search_space_id,
thread_visibility=visibility,
)

View file

@ -0,0 +1,9 @@
"""Repair dangling tool-call sequences before each agent turn."""
from __future__ import annotations
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
def build_patch_tool_calls_mw() -> PatchToolCallsMiddleware:
return PatchToolCallsMiddleware()

View file

@ -0,0 +1,12 @@
"""Permission rulesets fanned out to parent / general-purpose / subagent stacks."""
from __future__ import annotations
from .context import PermissionContext, build_permission_context
from .middleware import build_full_permission_mw
__all__ = [
"PermissionContext",
"build_full_permission_mw",
"build_permission_context",
]

View file

@ -0,0 +1,107 @@
"""Derive shared permission context once; fan out to all three stack layers.
The context carries:
- ``rulesets``: full ask/deny/allow rules for the main-agent permission middleware.
- ``general_purpose_interrupt_on``: ``ask`` rules mirrored as deepagents
``interrupt_on`` so HITL still triggers from inside ``task`` runs (subagents
bypass the main-agent permission middleware).
- ``subagent_deny_mw``: a deny-only ``PermissionMiddleware`` instance shared
across the general-purpose and registry subagent stacks.
"""
from __future__ import annotations
from collections.abc import Sequence
from dataclasses import dataclass
from langchain_core.tools import BaseTool
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.filesystem_selection import FilesystemMode
from app.agents.new_chat.middleware import PermissionMiddleware
from app.agents.new_chat.permissions import Rule, Ruleset
from app.agents.new_chat.tools.registry import BUILTIN_TOOLS
from ..flags import enabled
@dataclass(frozen=True)
class PermissionContext:
rulesets: list[Ruleset]
general_purpose_interrupt_on: dict[str, bool]
subagent_deny_mw: PermissionMiddleware | None
def build_permission_context(
*,
flags: AgentFeatureFlags,
filesystem_mode: FilesystemMode,
tools: Sequence[BaseTool],
available_connectors: list[str] | None,
) -> PermissionContext:
is_desktop_fs = filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER
permission_enabled = enabled(flags, "enable_permission")
rulesets: list[Ruleset] = []
if permission_enabled or is_desktop_fs:
rulesets.append(
Ruleset(
rules=[Rule(permission="*", pattern="*", action="allow")],
origin="surfsense_defaults",
)
)
if is_desktop_fs:
rulesets.append(
Ruleset(
rules=[
Rule(permission="rm", pattern="*", action="ask"),
Rule(permission="rmdir", pattern="*", action="ask"),
Rule(permission="move_file", pattern="*", action="ask"),
Rule(permission="edit_file", pattern="*", action="ask"),
Rule(permission="write_file", pattern="*", action="ask"),
],
origin="desktop_safety",
)
)
tool_names_in_use = {t.name for t in tools}
if permission_enabled:
available_set = set(available_connectors or [])
synthesized: list[Rule] = []
for tool_def in BUILTIN_TOOLS:
if tool_def.name not in tool_names_in_use:
continue
rc = tool_def.required_connector
if rc and rc not in available_set:
synthesized.append(
Rule(permission=tool_def.name, pattern="*", action="deny")
)
if synthesized:
rulesets.append(Ruleset(rules=synthesized, origin="connector_synthesized"))
general_purpose_interrupt_on: dict[str, bool] = {
rule.permission: True
for rs in rulesets
for rule in rs.rules
if rule.action == "ask" and rule.permission in tool_names_in_use
}
deny_rulesets = [
Ruleset(
rules=[r for r in rs.rules if r.action == "deny"],
origin=rs.origin,
)
for rs in rulesets
]
deny_rulesets = [rs for rs in deny_rulesets if rs.rules]
subagent_deny_mw: PermissionMiddleware | None = (
PermissionMiddleware(rulesets=deny_rulesets) if deny_rulesets else None
)
return PermissionContext(
rulesets=rulesets,
general_purpose_interrupt_on=general_purpose_interrupt_on,
subagent_deny_mw=subagent_deny_mw,
)

View file

@ -0,0 +1,10 @@
"""Main-agent permission middleware (full ask/deny/allow rules)."""
from __future__ import annotations
from app.agents.new_chat.middleware import PermissionMiddleware
from app.agents.new_chat.permissions import Ruleset
def build_full_permission_mw(rulesets: list[Ruleset]) -> PermissionMiddleware | None:
return PermissionMiddleware(rulesets=rulesets) if rulesets else None

View file

@ -0,0 +1,7 @@
"""Resilience middleware shared as the same instances across parent / general-purpose / registry."""
from __future__ import annotations
from .bundle import ResilienceBundle, build_resilience_bundle
__all__ = ["ResilienceBundle", "build_resilience_bundle"]

View file

@ -0,0 +1,51 @@
"""Construct each resilience middleware once; same instances flow into every consumer."""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
from langchain.agents.middleware import (
ModelCallLimitMiddleware,
ToolCallLimitMiddleware,
)
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import RetryAfterMiddleware
from app.agents.new_chat.middleware.scoped_model_fallback import (
ScopedModelFallbackMiddleware,
)
from .fallback import build_fallback_mw
from .model_call_limit import build_model_call_limit_mw
from .retry import build_retry_mw
from .tool_call_limit import build_tool_call_limit_mw
@dataclass(frozen=True)
class ResilienceBundle:
retry: RetryAfterMiddleware | None
fallback: ScopedModelFallbackMiddleware | None
model_call_limit: ModelCallLimitMiddleware | None
tool_call_limit: ToolCallLimitMiddleware | None
def as_list(self) -> list[Any]:
return [
m
for m in (
self.retry,
self.fallback,
self.model_call_limit,
self.tool_call_limit,
)
if m is not None
]
def build_resilience_bundle(flags: AgentFeatureFlags) -> ResilienceBundle:
return ResilienceBundle(
retry=build_retry_mw(flags),
fallback=build_fallback_mw(flags),
model_call_limit=build_model_call_limit_mw(flags),
tool_call_limit=build_tool_call_limit_mw(flags),
)

View file

@ -0,0 +1,27 @@
"""Switch to a fallback model on provider/network errors only."""
from __future__ import annotations
import logging
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware.scoped_model_fallback import (
ScopedModelFallbackMiddleware,
)
from ..flags import enabled
def build_fallback_mw(
flags: AgentFeatureFlags,
) -> ScopedModelFallbackMiddleware | None:
if not enabled(flags, "enable_model_fallback"):
return None
try:
return ScopedModelFallbackMiddleware(
"openai:gpt-4o-mini",
"anthropic:claude-3-5-haiku-20241022",
)
except Exception:
logging.warning("ScopedModelFallbackMiddleware init failed; skipping.")
return None

View file

@ -0,0 +1,21 @@
"""Cap model calls per thread / per run to prevent runaway cost."""
from __future__ import annotations
from langchain.agents.middleware import ModelCallLimitMiddleware
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from ..flags import enabled
def build_model_call_limit_mw(
flags: AgentFeatureFlags,
) -> ModelCallLimitMiddleware | None:
if not enabled(flags, "enable_model_call_limit"):
return None
return ModelCallLimitMiddleware(
thread_limit=120,
run_limit=80,
exit_behavior="end",
)

View file

@ -0,0 +1,16 @@
"""Retry on transient model errors (e.g. Retry-After-bearing 429s)."""
from __future__ import annotations
from app.agents.new_chat.feature_flags import AgentFeatureFlags
from app.agents.new_chat.middleware import RetryAfterMiddleware
from ..flags import enabled
def build_retry_mw(flags: AgentFeatureFlags) -> RetryAfterMiddleware | None:
return (
RetryAfterMiddleware(max_retries=3)
if enabled(flags, "enable_retry_after")
else None
)

Some files were not shown because too many files have changed in this diff Show more