diff --git a/surfsense_backend/app/agents/new_chat/__init__.py b/surfsense_backend/app/agents/new_chat/__init__.py deleted file mode 100644 index 4b2eb89eb..000000000 --- a/surfsense_backend/app/agents/new_chat/__init__.py +++ /dev/null @@ -1,91 +0,0 @@ -""" -SurfSense New Chat Agent Module. - -This module provides the SurfSense deep agent with configurable tools, -middleware, and preloaded knowledge-base filesystem behavior. - -Directory Structure: -- tools/: All agent tools (podcast, generate_image, web, memory, etc.) -- middleware/: Custom middleware (knowledge search, filesystem, dedup, etc.) -- chat_deepagent.py: Main agent factory -- system_prompt.py: System prompts and instructions -- context.py: Context schema for the agent -- checkpointer.py: LangGraph checkpointer setup -- llm_config.py: LLM configuration utilities -- utils.py: Shared utilities -""" - -# Agent factory -from .chat_deepagent import create_surfsense_deep_agent - -# Context -from .context import SurfSenseContextSchema - -# LLM config -from .llm_config import ( - create_chat_litellm_from_config, - load_global_llm_config_by_id, - load_llm_config_from_yaml, -) - -# Middleware -from .middleware import ( - DedupHITLToolCallsMiddleware, - KnowledgeBaseSearchMiddleware, - SurfSenseFilesystemMiddleware, -) - -# System prompt -from .system_prompt import ( - SURFSENSE_CITATION_INSTRUCTIONS, - SURFSENSE_SYSTEM_PROMPT, - build_surfsense_system_prompt, -) - -# Tools - registry exports -# Tools - factory exports (for direct use) -# Tools - knowledge base utilities -from .tools import ( - BUILTIN_TOOLS, - ToolDefinition, - build_tools, - create_generate_podcast_tool, - create_scrape_webpage_tool, - format_documents_for_context, - get_all_tool_names, - get_default_enabled_tools, - get_tool_by_name, - search_knowledge_base_async, -) - -__all__ = [ - # Tools registry - "BUILTIN_TOOLS", - # System prompt - "SURFSENSE_CITATION_INSTRUCTIONS", - "SURFSENSE_SYSTEM_PROMPT", - # Middleware - "DedupHITLToolCallsMiddleware", - "KnowledgeBaseSearchMiddleware", - # Context - "SurfSenseContextSchema", - "SurfSenseFilesystemMiddleware", - "ToolDefinition", - "build_surfsense_system_prompt", - "build_tools", - # LLM config - "create_chat_litellm_from_config", - # Tool factories - "create_generate_podcast_tool", - "create_scrape_webpage_tool", - # Agent factory - "create_surfsense_deep_agent", - # Knowledge base utilities - "format_documents_for_context", - "get_all_tool_names", - "get_default_enabled_tools", - "get_tool_by_name", - "load_global_llm_config_by_id", - "load_llm_config_from_yaml", - "search_knowledge_base_async", -] diff --git a/surfsense_backend/app/agents/new_chat/agent_cache.py b/surfsense_backend/app/agents/new_chat/agent_cache.py deleted file mode 100644 index d0500ce2b..000000000 --- a/surfsense_backend/app/agents/new_chat/agent_cache.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Backward-compatible shim. - -Moved to ``app.agents.shared.agent_cache``. Re-exported here for the frozen -single-agent stack (``chat_deepagent``) until that stack is retired. -""" - -from app.agents.shared.agent_cache import ( - flags_signature, - get_cache, - reload_for_tests, - stable_hash, - system_prompt_hash, - tools_signature, -) - -__all__ = [ - "flags_signature", - "get_cache", - "reload_for_tests", - "stable_hash", - "system_prompt_hash", - "tools_signature", -] diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py deleted file mode 100644 index c00583524..000000000 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ /dev/null @@ -1,1080 +0,0 @@ -""" -SurfSense deep agent implementation. - -This module provides the factory function for creating SurfSense deep agents -with configurable tools via the tools registry and configurable prompts -via NewLLMConfig. - -We use ``create_agent`` (from langchain) rather than ``create_deep_agent`` -(from deepagents) so that the middleware stack is fully under our control. -This lets us swap in ``SurfSenseFilesystemMiddleware`` — a customisable -subclass of the default ``FilesystemMiddleware`` — while preserving every -other behaviour that ``create_deep_agent`` provides (todo-list, subagents, -summarisation, etc.). Prompt caching is configured at LLM-build time via -``apply_litellm_prompt_caching`` (LiteLLM-native, multi-provider) rather -than as a middleware. -""" - -import asyncio -import logging -import time -from collections.abc import Sequence -from typing import Any - -from deepagents import SubAgent, SubAgentMiddleware, __version__ as deepagents_version -from deepagents.backends import StateBackend -from deepagents.graph import BASE_AGENT_PROMPT -from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware -from deepagents.middleware.skills import SkillsMiddleware -from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT -from langchain.agents import create_agent -from langchain.agents.middleware import ( - LLMToolSelectorMiddleware, - ModelCallLimitMiddleware, - TodoListMiddleware, - ToolCallLimitMiddleware, -) -from langchain_core.language_models import BaseChatModel -from langchain_core.tools import BaseTool -from langgraph.types import Checkpointer -from sqlalchemy.ext.asyncio import AsyncSession - -from app.agents.new_chat.agent_cache import ( - flags_signature, - get_cache, - stable_hash, - system_prompt_hash, - tools_signature, -) -from app.agents.new_chat.context import SurfSenseContextSchema -from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags -from app.agents.new_chat.filesystem_backends import build_backend_resolver -from app.agents.new_chat.filesystem_selection import FilesystemMode, FilesystemSelection -from app.agents.new_chat.llm_config import AgentConfig -from app.agents.new_chat.middleware import ( - ActionLogMiddleware, - AnonymousDocumentMiddleware, - BusyMutexMiddleware, - ClearToolUsesEdit, - DedupHITLToolCallsMiddleware, - DoomLoopMiddleware, - FileIntentMiddleware, - FlattenSystemMessageMiddleware, - KnowledgeBasePersistenceMiddleware, - KnowledgePriorityMiddleware, - KnowledgeTreeMiddleware, - MemoryInjectionMiddleware, - NoopInjectionMiddleware, - OtelSpanMiddleware, - PermissionMiddleware, - RetryAfterMiddleware, - SpillingContextEditingMiddleware, - SpillToBackendEdit, - SurfSenseFilesystemMiddleware, - ToolCallNameRepairMiddleware, - build_skills_backend_factory, - create_surfsense_compaction_middleware, - default_skills_sources, -) -from app.agents.new_chat.middleware.scoped_model_fallback import ( - ScopedModelFallbackMiddleware, -) -from app.agents.new_chat.permissions import Rule, Ruleset -from app.agents.new_chat.plugin_loader import ( - PluginContext, - load_allowed_plugin_names_from_env, - load_plugin_middlewares, -) -from app.agents.new_chat.connector_searchable_types import ( - map_connectors_to_searchable_types, -) -from app.agents.new_chat.prompt_caching import apply_litellm_prompt_caching -from app.agents.new_chat.subagents import build_specialized_subagents -from app.agents.new_chat.system_prompt import ( - build_configurable_system_prompt, - build_surfsense_system_prompt, -) -from app.agents.new_chat.tools.invalid_tool import ( - INVALID_TOOL_NAME, - invalid_tool, -) -from app.agents.new_chat.tools.registry import ( - BUILTIN_TOOLS, - build_tools_async, - get_connector_gated_tools, -) -from app.db import ChatVisibility -from app.services.connector_service import ConnectorService -from app.services.llm_service import get_planner_llm -from app.utils.perf import get_perf_logger - -_perf_log = get_perf_logger() - - -def _resolve_prompt_model_name( - agent_config: AgentConfig | None, - llm: BaseChatModel, -) -> str | None: - """Resolve the model id to feed to provider-variant detection. - - Preference order (matches the established idiom in - ``llm_router_service.py`` — see ``params.get("base_model") or - params.get("model", "")`` usages there): - - 1. ``agent_config.litellm_params["base_model"]`` — required for Azure - deployments where ``model_name`` is the deployment slug, not the - underlying family. Without this, a deployment named e.g. - ``"prod-chat-001"`` would silently miss every provider regex. - 2. ``agent_config.model_name`` — the user's configured model id. - 3. ``getattr(llm, "model", None)`` — fallback for direct callers that - don't supply an ``AgentConfig`` (currently a defensive path; all - production callers pass ``agent_config``). - - Returns ``None`` when nothing is available; ``compose_system_prompt`` - treats that as the ``"default"`` variant (no provider block emitted). - """ - if agent_config is not None: - params = agent_config.litellm_params or {} - base_model = params.get("base_model") - if isinstance(base_model, str) and base_model.strip(): - return base_model - if agent_config.model_name: - return agent_config.model_name - return getattr(llm, "model", None) - - -# ============================================================================= -# Deep Agent Factory -# ============================================================================= - - -async def create_surfsense_deep_agent( - llm: BaseChatModel, - search_space_id: int, - db_session: AsyncSession, - connector_service: ConnectorService, - checkpointer: Checkpointer, - user_id: str | None = None, - thread_id: int | None = None, - agent_config: AgentConfig | None = None, - enabled_tools: list[str] | None = None, - disabled_tools: list[str] | None = None, - additional_tools: Sequence[BaseTool] | None = None, - firecrawl_api_key: str | None = None, - thread_visibility: ChatVisibility | None = None, - mentioned_document_ids: list[int] | None = None, - anon_session_id: str | None = None, - filesystem_selection: FilesystemSelection | None = None, -): - """ - Create a SurfSense deep agent with configurable tools and prompts. - - The agent comes with built-in tools that can be configured: - - generate_podcast: Generate audio podcasts from content - - generate_image: Generate images from text descriptions using AI models - - scrape_webpage: Extract content from webpages - - update_memory: Update the user's personal or team memory document - - The agent also includes TodoListMiddleware by default (via create_deep_agent) which provides: - - write_todos: Create and update planning/todo lists for complex tasks - - The system prompt can be configured via agent_config: - - Custom system instructions (or use defaults) - - Citation toggle (enable/disable citation requirements) - - Args: - llm: ChatLiteLLM instance for the agent's language model - search_space_id: The user's search space ID - db_session: Database session for tools that need DB access - connector_service: Initialized connector service for knowledge base search - checkpointer: LangGraph checkpointer for conversation state persistence. - Use AsyncPostgresSaver for production or MemorySaver for testing. - user_id: The current user's UUID string (required for memory tools) - agent_config: Optional AgentConfig from NewLLMConfig for prompt configuration. - If None, uses default system prompt with citations enabled. - enabled_tools: Explicit list of tool names to enable. If None, all default tools - are enabled. Use this to limit which tools are available. - disabled_tools: List of tool names to disable. Applied after enabled_tools. - Use this to exclude specific tools from the defaults. - additional_tools: Extra custom tools to add beyond the built-in ones. - These are always added regardless of enabled/disabled settings. - firecrawl_api_key: Optional Firecrawl API key for premium web scraping. - Falls back to Chromium/Trafilatura if not provided. - - Returns: - CompiledStateGraph: The configured deep agent - - Examples: - # Create agent with all default tools and default prompt - agent = create_surfsense_deep_agent(llm, search_space_id, db_session, ...) - - # Create agent with custom prompt configuration - agent = create_surfsense_deep_agent( - llm, search_space_id, db_session, ..., - agent_config=AgentConfig( - provider="OPENAI", - model_name="gpt-4", - api_key="...", - system_instructions="Custom instructions...", - citations_enabled=False, - ) - ) - - # Create agent with only specific tools - agent = create_surfsense_deep_agent( - llm, search_space_id, db_session, ..., - enabled_tools=["scrape_webpage"] - ) - - # Create agent without podcast generation - agent = create_surfsense_deep_agent( - llm, search_space_id, db_session, ..., - disabled_tools=["generate_podcast"] - ) - - # Add custom tools - agent = create_surfsense_deep_agent( - llm, search_space_id, db_session, ..., - additional_tools=[my_custom_tool] - ) - """ - _t_agent_total = time.perf_counter() - - # Layer thread-aware prompt caching onto the LLM. Idempotent with the - # build-time call in ``llm_config.py``; this run merely adds - # ``prompt_cache_key=f"surfsense-thread-{thread_id}"`` for OpenAI-family - # configs now that ``thread_id`` is known. No-op when ``thread_id`` is - # None or the provider is non-OpenAI-family. - apply_litellm_prompt_caching(llm, agent_config=agent_config, thread_id=thread_id) - - filesystem_selection = filesystem_selection or FilesystemSelection() - backend_resolver = build_backend_resolver( - filesystem_selection, - search_space_id=search_space_id - if filesystem_selection.mode == FilesystemMode.CLOUD - else None, - ) - - # Discover available connectors and document types for this search space. - # - # NOTE: These two calls cannot be parallelized via ``asyncio.gather``. - # ``ConnectorService`` shares a single ``AsyncSession`` (``self.session``); - # SQLAlchemy explicitly forbids concurrent operations on the same session - # ("This session is provisioning a new connection; concurrent operations - # are not permitted on the same session"). The Phase 1.4 in-process TTL - # cache in ``connector_service`` already collapses the warm path to a - # near-zero pair of dict lookups, so sequential awaits cost nothing in - # the common case while remaining correct on cold cache misses. - available_connectors: list[str] | None = None - available_document_types: list[str] | None = None - - _t0 = time.perf_counter() - try: - try: - connector_types_result = await connector_service.get_available_connectors( - search_space_id - ) - if connector_types_result: - available_connectors = map_connectors_to_searchable_types( - connector_types_result - ) - except Exception as e: - logging.warning("Failed to discover available connectors: %s", e) - - try: - available_document_types = ( - await connector_service.get_available_document_types(search_space_id) - ) - except Exception as e: - logging.warning("Failed to discover available document types: %s", e) - except Exception as e: # pragma: no cover - defensive outer guard - logging.warning(f"Failed to discover available connectors/document types: {e}") - _perf_log.info( - "[create_agent] Connector/doc-type discovery in %.3fs", - time.perf_counter() - _t0, - ) - - # Build dependencies dict for the tools registry - visibility = thread_visibility or ChatVisibility.PRIVATE - - # Extract the model's context window so tools can size their output. - _model_profile = getattr(llm, "profile", None) - _max_input_tokens: int | None = ( - _model_profile.get("max_input_tokens") - if isinstance(_model_profile, dict) - else None - ) - - dependencies = { - "search_space_id": search_space_id, - "db_session": db_session, - "connector_service": connector_service, - "firecrawl_api_key": firecrawl_api_key, - "user_id": user_id, - "thread_id": thread_id, - "thread_visibility": visibility, - "available_connectors": available_connectors, - "available_document_types": available_document_types, - "max_input_tokens": _max_input_tokens, - "llm": llm, - } - - modified_disabled_tools = list(disabled_tools) if disabled_tools else [] - modified_disabled_tools.extend(get_connector_gated_tools(available_connectors)) - - # Remove direct KB search tool; KnowledgePriorityMiddleware now runs hybrid - # search per turn and surfaces hits as a hint plus - # `` markers inside lazy-loaded XML. - if "search_knowledge_base" not in modified_disabled_tools: - modified_disabled_tools.append("search_knowledge_base") - - # Build tools using the async registry (includes MCP tools) - _t0 = time.perf_counter() - tools = await build_tools_async( - dependencies=dependencies, - enabled_tools=enabled_tools, - disabled_tools=modified_disabled_tools, - additional_tools=list(additional_tools) if additional_tools else None, - ) - - # Register the ``invalid`` tool only when tool-call repair is on. It - # is dispatched only when :class:`ToolCallNameRepairMiddleware` - # rewrites a malformed call. We intentionally append it AFTER - # ``build_tools_async`` so it never appears in the system-prompt - # tool list (which is built from the registry, not the bound tool - # list). - _flags: AgentFeatureFlags = get_flags() - if _flags.enable_tool_call_repair and INVALID_TOOL_NAME not in { - t.name for t in tools - }: - tools = [*list(tools), invalid_tool] - _perf_log.info( - "[create_agent] build_tools_async in %.3fs (%d tools)", - time.perf_counter() - _t0, - len(tools), - ) - - # Build system prompt based on agent_config, scoped to the tools actually enabled - _t0 = time.perf_counter() - _enabled_tool_names = {t.name for t in tools} - _user_disabled_tool_names = set(disabled_tools) if disabled_tools else set() - - # Collect generic MCP connector info so the system prompt can route queries - # to their tools instead of falling back to "not in knowledge base". - _mcp_connector_tools: dict[str, list[str]] = {} - for t in tools: - meta = getattr(t, "metadata", None) or {} - if meta.get("mcp_is_generic") and meta.get("mcp_connector_name"): - _mcp_connector_tools.setdefault( - meta["mcp_connector_name"], - [], - ).append(t.name) - - if _mcp_connector_tools: - _perf_log.info("MCP connector tool routing: %s", _mcp_connector_tools) - - if agent_config is not None: - system_prompt = build_configurable_system_prompt( - custom_system_instructions=agent_config.system_instructions, - use_default_system_instructions=agent_config.use_default_system_instructions, - citations_enabled=agent_config.citations_enabled, - thread_visibility=thread_visibility, - enabled_tool_names=_enabled_tool_names, - disabled_tool_names=_user_disabled_tool_names, - mcp_connector_tools=_mcp_connector_tools, - model_name=_resolve_prompt_model_name(agent_config, llm), - ) - else: - system_prompt = build_surfsense_system_prompt( - thread_visibility=thread_visibility, - enabled_tool_names=_enabled_tool_names, - disabled_tool_names=_user_disabled_tool_names, - mcp_connector_tools=_mcp_connector_tools, - model_name=_resolve_prompt_model_name(agent_config, llm), - ) - _perf_log.info( - "[create_agent] System prompt built in %.3fs", time.perf_counter() - _t0 - ) - - # Combine system_prompt with BASE_AGENT_PROMPT (same as create_deep_agent) - final_system_prompt = system_prompt + "\n\n" + BASE_AGENT_PROMPT - - # The middleware stack — and especially ``SubAgentMiddleware`` — is *not* - # cheap to build. ``SubAgentMiddleware.__init__`` calls ``create_agent`` - # synchronously to compile the general-purpose subagent's full state graph - # (every tool + every middleware → pydantic schemas + langgraph compile). - # On gpt-5.x agents that's roughly 1.5-2s of pure CPU work. If we run it - # directly here it blocks the asyncio event loop for the whole streaming - # task (and any other coroutine sharing this loop), which is why - # "agent creation" wall-clock time used to stretch to ~3-4s. Move the - # entire middleware build + main-graph compile into a single - # ``asyncio.to_thread`` so the heavy CPU work runs off-loop and the - # event loop stays responsive. - # - # PHASE 1: cache the resulting compiled graph. ``agent_cache`` is keyed - # on every per-request value that any middleware in the stack closes - # over in ``__init__`` — drop one and you risk leaking state across - # threads. Hits collapse this whole block to a microsecond lookup; - # misses pay the original CPU cost AND populate the cache. - config_id = agent_config.config_id if agent_config is not None else None - - async def _build_agent() -> Any: - return await asyncio.to_thread( - _build_compiled_agent_blocking, - llm=llm, - tools=tools, - final_system_prompt=final_system_prompt, - backend_resolver=backend_resolver, - filesystem_mode=filesystem_selection.mode, - search_space_id=search_space_id, - user_id=user_id, - thread_id=thread_id, - visibility=visibility, - anon_session_id=anon_session_id, - available_connectors=available_connectors, - available_document_types=available_document_types, - # ``mentioned_document_ids`` is consumed by - # ``KnowledgePriorityMiddleware`` per turn via - # ``runtime.context`` (Phase 1.5). We still pass the - # caller-provided list here for the legacy fallback path - # (cache disabled / context not propagated) — the middleware - # drains its own copy after the first read so a cached graph - # never replays stale mentions. - mentioned_document_ids=mentioned_document_ids, - max_input_tokens=_max_input_tokens, - flags=_flags, - checkpointer=checkpointer, - ) - - _t0 = time.perf_counter() - if _flags.enable_agent_cache and not _flags.disable_new_agent_stack: - # Cache key components — order matters only for human readability; - # the resulting hash is what's stored. Every component must - # rotate on a real shape change AND stay stable across identical - # invocations. - cache_key = stable_hash( - "v1", # schema version of the key — bump if components change - config_id, - thread_id, - user_id, - search_space_id, - visibility, - filesystem_selection.mode, - anon_session_id, - tools_signature( - tools, - available_connectors=available_connectors, - available_document_types=available_document_types, - ), - flags_signature(_flags), - system_prompt_hash(final_system_prompt), - _max_input_tokens, - # ``mentioned_document_ids`` deliberately omitted — middleware - # reads it from ``runtime.context`` (Phase 1.5). - ) - agent = await get_cache().get_or_build(cache_key, builder=_build_agent) - else: - agent = await _build_agent() - _perf_log.info( - "[create_agent] Middleware stack + graph compiled in %.3fs (cache=%s)", - time.perf_counter() - _t0, - "on" - if _flags.enable_agent_cache and not _flags.disable_new_agent_stack - else "off", - ) - - _perf_log.info( - "[create_agent] Total agent creation in %.3fs", - time.perf_counter() - _t_agent_total, - ) - return agent - - -# Tools whose output is too costly / lossy to discard. Keep this -# conservative — anything listed here is *never* pruned by -# :class:`ContextEditingMiddleware`. The list is filtered against -# actually-bound tool names so disabled connectors don't show up here. -_PRUNE_PROTECTED_TOOL_NAMES: frozenset[str] = frozenset( - { - "generate_report", - "generate_resume", - "generate_podcast", - "generate_video_presentation", - "generate_image", - # Read-heavy connector reads — recomputing them is expensive - "read_email", - "search_emails", - # The fallback for malformed tool calls — keep its replies visible - "invalid", - } -) - - -def _safe_exclude_tools(tools: Sequence[BaseTool]) -> tuple[str, ...]: - """Return ``exclude_tools`` derived from the actually-bound tool list. - - Filters :data:`_PRUNE_PROTECTED_TOOL_NAMES` against the bound tools - so we never list tools that don't exist (would be a silent no-op). - """ - enabled = {t.name for t in tools} - return tuple(name for name in _PRUNE_PROTECTED_TOOL_NAMES if name in enabled) - - -# Connector gating: any tool whose ``ToolDefinition.required_connector`` -# isn't actually wired up gets a synthesized permission deny rule so -# execution attempts short-circuit with ``permission_denied`` instead of -# bubbling up provider-specific 401/404 errors. Mirrors OpenCode's -# ``Permission.disabled`` (declarative, per-tool gating) — replaces the -# legacy binary ``_CONNECTOR_TYPE_TO_SEARCHABLE`` substring-heuristic. -def _synthesize_connector_deny_rules( - *, - available_connectors: list[str] | None, - enabled_tool_names: set[str], -) -> list[Rule]: - """Build deny rules for tools whose required connector is not enabled. - - Source of truth is ``ToolDefinition.required_connector`` in - :data:`BUILTIN_TOOLS`. A tool only gets a deny rule when: - - 1. It is currently bound (``enabled_tool_names``). - 2. It declares a ``required_connector``. - 3. That connector is *not* in ``available_connectors``. - """ - available = set(available_connectors or []) - deny: list[Rule] = [] - for tool_def in BUILTIN_TOOLS: - if tool_def.name not in enabled_tool_names: - continue - rc = tool_def.required_connector - if rc and rc not in available: - deny.append(Rule(permission=tool_def.name, pattern="*", action="deny")) - return deny - - -def _build_compiled_agent_blocking( - *, - llm: BaseChatModel, - tools: Sequence[BaseTool], - final_system_prompt: str, - backend_resolver: Any, - filesystem_mode: FilesystemMode, - search_space_id: int, - user_id: str | None, - thread_id: int | None, - visibility: ChatVisibility, - anon_session_id: str | None, - available_connectors: list[str] | None, - available_document_types: list[str] | None, - mentioned_document_ids: list[int] | None, - max_input_tokens: int | None, - flags: AgentFeatureFlags, - checkpointer: Checkpointer, -): - """Build the middleware stack and compile the agent graph synchronously. - - Runs in a worker thread (see ``asyncio.to_thread`` call site) so the heavy - CPU work — most notably ``SubAgentMiddleware.__init__`` eagerly calling - ``create_agent`` to compile the general-purpose subagent — does not block - the event loop. - """ - _memory_middleware = MemoryInjectionMiddleware( - user_id=user_id, - search_space_id=search_space_id, - thread_visibility=visibility, - ) - - # General-purpose subagent middleware - # Subagent omits AnonymousDocumentMiddleware, KnowledgeTreeMiddleware, - # KnowledgePriorityMiddleware, and KnowledgeBasePersistenceMiddleware - it - # inherits state and tools from the parent, but should not (a) re-load - # anon docs / re-render the tree / re-run hybrid search, or (b) commit at - # its own completion (only the top-level agent's aafter_agent commits). - gp_middleware = [ - TodoListMiddleware(), - _memory_middleware, - FileIntentMiddleware(llm=llm), - SurfSenseFilesystemMiddleware( - backend=backend_resolver, - filesystem_mode=filesystem_mode, - search_space_id=search_space_id, - created_by_id=user_id, - thread_id=thread_id, - ), - create_surfsense_compaction_middleware(llm, StateBackend), - PatchToolCallsMiddleware(), - ] - - general_purpose_spec: SubAgent = { # type: ignore[typeddict-unknown-key] - **GENERAL_PURPOSE_SUBAGENT, - "model": llm, - "tools": tools, - "middleware": gp_middleware, - } - - # Specialized user-facing subagents (explore, report_writer, - # connector_negotiator). Registered through SubAgentMiddleware alongside - # the general-purpose spec so the parent's `task` tool can address them - # by name. Off by default until the flag flips so existing deployments - # don't see new agent types in the task tool description. - specialized_subagents: list[SubAgent] = [] - if flags.enable_specialized_subagents and not flags.disable_new_agent_stack: - try: - # Specialized subagents share the parent's filesystem + - # todo view so their system prompts (which promise - # ``read_file``, ``ls``, ``grep``, ``glob``, ``write_todos``) - # actually match runtime behavior. Build *fresh* instances - # rather than aliasing the parent's GP middleware to avoid - # subtle state coupling across compiled graphs. - subagent_extra_middleware: list = [ - TodoListMiddleware(), - SurfSenseFilesystemMiddleware( - backend=backend_resolver, - filesystem_mode=filesystem_mode, - search_space_id=search_space_id, - created_by_id=user_id, - thread_id=thread_id, - ), - ] - specialized_subagents = build_specialized_subagents( - tools=tools, - model=llm, - extra_middleware=subagent_extra_middleware, - ) - logging.info( - "Specialized subagents registered for task tool: %s", - [s["name"] for s in specialized_subagents], - ) - except Exception as exc: # pragma: no cover - defensive - logging.warning( - "Specialized subagent build failed; running without them: %s", - exc, - ) - specialized_subagents = [] - - subagent_specs: list[SubAgent] = [general_purpose_spec, *specialized_subagents] - - # Main agent middleware - # Order: AnonDoc -> Tree -> Priority -> FileIntent -> Filesystem -> Persistence -> ... - # before_agent hooks run in declared order; later injections sit closer to - # the latest human turn. Tree (large + cacheable) is injected earliest so - # provider-side prefix caching has more material to hit; FileIntent (most - # actionable per-turn contract) is injected closest to the user message. - # - # ``wrap_model_call`` ordering: the FIRST middleware in the list is the - # OUTERMOST wrapper. To ensure prune executes before summarization, - # place ``SpillingContextEditingMiddleware`` before - # ``SurfSenseCompactionMiddleware``. Compaction is the canonical - # token-budget defense; the Bedrock buffer-empty defense is folded - # into ``SurfSenseCompactionMiddleware``. - summarization_mw = create_surfsense_compaction_middleware(llm, StateBackend) - _ = flags.enable_compaction_v2 # historical flag; retained for telemetry parity - - # ContextEditing prune. Trigger at 55% of ``max_input_tokens``, - # earlier than summarization (~85%). When disabled, no edit runs. - context_edit_mw = None - if ( - flags.enable_context_editing - and not flags.disable_new_agent_stack - and max_input_tokens - ): - spill_edit = SpillToBackendEdit( - trigger=int(max_input_tokens * 0.55), - clear_at_least=int(max_input_tokens * 0.15), - keep=5, - exclude_tools=_safe_exclude_tools(tools), - clear_tool_inputs=True, - ) - clear_edit = ClearToolUsesEdit( - trigger=int(max_input_tokens * 0.55), - clear_at_least=int(max_input_tokens * 0.15), - keep=5, - exclude_tools=_safe_exclude_tools(tools), - clear_tool_inputs=True, - placeholder="[cleared - older tool output trimmed for context]", - ) - context_edit_mw = SpillingContextEditingMiddleware( - edits=[spill_edit, clear_edit], - backend_resolver=backend_resolver, - ) - - # Resilience knobs: header-aware retry, model fallback, and - # per-thread / per-run call-count limits. The fallback / limit - # middlewares are vanilla LangChain primitives; ``RetryAfter`` is - # SurfSense's header-aware variant (see its module docstring). - retry_mw = ( - RetryAfterMiddleware(max_retries=3) - if flags.enable_retry_after and not flags.disable_new_agent_stack - else None - ) - # Fallback chain — primary is the agent's own model; we add cheap - # alternatives. Off by default; only the first call site that - # configures the chain via env should enable it. - fallback_mw: ScopedModelFallbackMiddleware | None = None - if flags.enable_model_fallback and not flags.disable_new_agent_stack: - try: - fallback_mw = ScopedModelFallbackMiddleware( - "openai:gpt-4o-mini", - "anthropic:claude-3-5-haiku-20241022", - ) - except Exception: - logging.warning("ScopedModelFallbackMiddleware init failed; skipping.") - fallback_mw = None - model_call_limit_mw = ( - ModelCallLimitMiddleware( - thread_limit=120, - run_limit=80, - exit_behavior="end", - ) - if flags.enable_model_call_limit and not flags.disable_new_agent_stack - else None - ) - tool_call_limit_mw = ( - ToolCallLimitMiddleware( - thread_limit=300, run_limit=80, exit_behavior="continue" - ) - if flags.enable_tool_call_limit and not flags.disable_new_agent_stack - else None - ) - - # Provider-compat ``_noop`` injection (mirrors OpenCode's - # ``llm.ts`` workaround for providers that reject empty assistant - # turns or alternating-role constraints). - noop_mw = ( - NoopInjectionMiddleware() - if flags.enable_compaction_v2 and not flags.disable_new_agent_stack - else None - ) - - # Tool-call name repair (lowercase + ``invalid`` fallback). - # - # ``registered_tool_names`` MUST cover every tool the model can legitimately - # call. That includes the bound ``tools`` list AND every tool provided by - # middleware in the stack — ``FilesystemMiddleware`` (read_file, ls, grep, - # glob, edit_file, write_file, execute), ``TodoListMiddleware`` - # (write_todos), ``SubAgentMiddleware`` (task), ``SkillsMiddleware`` (skill - # loaders), etc. If we only inspect ``tools`` here, every call to - # ``read_file`` / ``ls`` / ``grep`` from the model will be rewritten to - # ``invalid`` because the repair middleware doesn't recognize them. The - # built-in deepagents middleware aren't in scope yet at this point of the - # function but they're added unconditionally below, so we hard-code their - # canonical names alongside the dynamic ``tools`` set. - repair_mw = None - if flags.enable_tool_call_repair and not flags.disable_new_agent_stack: - registered_names: set[str] = {t.name for t in tools} - # Tools owned by the standard deepagents middleware stack and the - # SurfSense filesystem extension. - registered_names |= { - "write_todos", - "ls", - "read_file", - "write_file", - "edit_file", - "glob", - "grep", - "execute", - "task", - "mkdir", - "cd", - "pwd", - "move_file", - "rm", - "rmdir", - "list_tree", - "execute_code", - } - repair_mw = ToolCallNameRepairMiddleware( - registered_tool_names=registered_names, - # Disable fuzzy matching to avoid silent rewrites; the - # lowercase + ``invalid`` fallback alone covers >95% of - # observed model errors. - fuzzy_match_threshold=None, - ) - - # Doom-loop detector. Off by default until the frontend handles - # ``permission == "doom_loop"`` interrupts. - doom_loop_mw = ( - DoomLoopMiddleware(threshold=3) - if flags.enable_doom_loop and not flags.disable_new_agent_stack - else None - ) - - # PermissionMiddleware. Layers, earliest -> latest (last match wins, - # same evaluation order as OpenCode's ``permission/index.ts``): - # - # 1. ``surfsense_defaults`` — single ``allow */*`` rule. SurfSense - # already runs per-tool HITL (see ``tools/hitl.py``) for mutating - # connector tools, so we only want PermissionMiddleware to *deny* - # things the user has gated off; the default fallback in - # ``permissions.evaluate`` is ``ask``, which would double-prompt - # on every safe read-only call (``ls``, ``read_file``, ``grep``, - # ``glob``, ``web_search`` …) and, on resume, replay the previous - # reject decision into innocent calls. - # 2. ``desktop_safety`` — ``ask`` for destructive filesystem ops when - # the agent is operating against the user's real disk. Cloud mode - # has full revision-based revert via ``revert_service``, but - # desktop mode hits disk immediately with no undo, so an - # accidental ``rm`` / ``rmdir`` / ``move_file`` / ``edit_file`` / - # ``write_file`` is unrecoverable. This layer is forced on in - # desktop mode regardless of ``enable_permission`` because the - # safety net is non-negotiable. - # 3. ``connector_synthesized`` — deny rules for tools whose required - # connector is not connected to this space. Overrides #1/#2. - # 4. (future) user-defined rules from ``agent_permission_rules`` table - # via the Agent Permissions UI. Loaded last so they override all. - permission_mw: PermissionMiddleware | None = None - is_desktop_fs = filesystem_mode == FilesystemMode.DESKTOP_LOCAL_FOLDER - permission_enabled = flags.enable_permission and not flags.disable_new_agent_stack - # Build the middleware whenever it has work to do: either the user - # opted into the rule engine, OR we're in desktop mode and need the - # safety rules unconditionally. - if permission_enabled or is_desktop_fs: - rulesets: list[Ruleset] = [ - Ruleset( - rules=[Rule(permission="*", pattern="*", action="allow")], - origin="surfsense_defaults", - ), - ] - if is_desktop_fs: - rulesets.append( - Ruleset( - rules=[ - Rule(permission="rm", pattern="*", action="ask"), - Rule(permission="rmdir", pattern="*", action="ask"), - Rule(permission="move_file", pattern="*", action="ask"), - Rule(permission="edit_file", pattern="*", action="ask"), - Rule(permission="write_file", pattern="*", action="ask"), - ], - origin="desktop_safety", - ) - ) - if permission_enabled: - synthesized = _synthesize_connector_deny_rules( - available_connectors=available_connectors, - enabled_tool_names={t.name for t in tools}, - ) - rulesets.append(Ruleset(rules=synthesized, origin="connector_synthesized")) - permission_mw = PermissionMiddleware(rulesets=rulesets) - - # ActionLogMiddleware. Off by default until the ``agent_action_log`` - # table is migrated. When enabled, persists one row per tool call - # with optional reverse_descriptor for - # ``POST /api/threads/{thread_id}/revert/{action_id}``. Sits inside - # ``permission`` so denied calls aren't logged as completions. - action_log_mw: ActionLogMiddleware | None = None - if ( - flags.enable_action_log - and not flags.disable_new_agent_stack - and thread_id is not None - ): - try: - tool_defs_by_name = {td.name: td for td in BUILTIN_TOOLS} - action_log_mw = ActionLogMiddleware( - thread_id=thread_id, - search_space_id=search_space_id, - user_id=user_id, - tool_definitions=tool_defs_by_name, - ) - except Exception: # pragma: no cover - defensive - logging.warning( - "ActionLogMiddleware init failed; running without it.", - exc_info=True, - ) - action_log_mw = None - - # Per-thread busy mutex (refuse a second concurrent turn on the same - # thread; see :class:`BusyMutexMiddleware` docstring). - busy_mutex_mw: BusyMutexMiddleware | None = ( - BusyMutexMiddleware() - if flags.enable_busy_mutex and not flags.disable_new_agent_stack - else None - ) - - # OpenTelemetry spans (model.call + tool.call). Lives just inside - # BusyMutex so it spans every retry/fallback attempt of the current - # turn but never wraps a queued/blocked turn. - otel_mw: OtelSpanMiddleware | None = ( - OtelSpanMiddleware() - if flags.enable_otel and not flags.disable_new_agent_stack - else None - ) - - # Plugin entry-point loader. Off by default; opt-in via the - # ``SURFSENSE_ENABLE_PLUGIN_LOADER`` flag. The allowlist is read from - # the ``SURFSENSE_ALLOWED_PLUGINS`` env var (comma-separated). A future - # PR can wire it through ``global_llm_config.yaml``. - plugin_middlewares: list[Any] = [] - if flags.enable_plugin_loader and not flags.disable_new_agent_stack: - try: - allowed_names = load_allowed_plugin_names_from_env() - if allowed_names: - plugin_middlewares = load_plugin_middlewares( - PluginContext.build( - search_space_id=search_space_id, - user_id=user_id, - thread_visibility=visibility, - llm=llm, - ), - allowed_plugin_names=allowed_names, - ) - except Exception: # pragma: no cover - defensive - logging.warning( - "Plugin loader failed; continuing without plugins.", - exc_info=True, - ) - plugin_middlewares = [] - - # SkillsMiddleware (deepagents) loads built-in + space-authored - # skills via a CompositeBackend. Sources are layered: built-in first, - # space last, so a search-space-authored skill of the same name - # overrides the bundled one. - skills_mw: SkillsMiddleware | None = None - if flags.enable_skills and not flags.disable_new_agent_stack: - try: - skills_factory = build_skills_backend_factory( - search_space_id=search_space_id - if filesystem_mode == FilesystemMode.CLOUD - else None, - ) - skills_mw = SkillsMiddleware( - backend=skills_factory, - sources=default_skills_sources(), - ) - except Exception as exc: # pragma: no cover - defensive - logging.warning("SkillsMiddleware init failed; skipping: %s", exc) - skills_mw = None - - # LangChain's LLM-driven tool selection — only enabled for stacks - # large enough to need narrowing (>30 tools). - selector_mw: LLMToolSelectorMiddleware | None = None - if ( - flags.enable_llm_tool_selector - and not flags.disable_new_agent_stack - and len(tools) > 30 - ): - try: - selector_mw = LLMToolSelectorMiddleware( - model="openai:gpt-4o-mini", - max_tools=12, - always_include=[ - name - for name in ( - "update_memory", - "get_connected_accounts", - "scrape_webpage", - ) - if name in {t.name for t in tools} - ], - ) - except Exception: - logging.warning("LLMToolSelectorMiddleware init failed; skipping.") - selector_mw = None - - deepagent_middleware = [ - # BusyMutex is OUTERMOST: it must wrap the entire stream so no - # other turn can sneak in while this one is mid-flight. - busy_mutex_mw, - # OTel spans sit just inside BusyMutex so each retry attempt - # gets its own model.call / tool.call span. - otel_mw, - TodoListMiddleware(), - _memory_middleware, - AnonymousDocumentMiddleware( - anon_session_id=anon_session_id, - ) - if filesystem_mode == FilesystemMode.CLOUD - else None, - KnowledgeTreeMiddleware( - search_space_id=search_space_id, - filesystem_mode=filesystem_mode, - llm=llm, - ) - if filesystem_mode == FilesystemMode.CLOUD - else None, - KnowledgePriorityMiddleware( - llm=llm, - planner_llm=get_planner_llm(), - search_space_id=search_space_id, - filesystem_mode=filesystem_mode, - available_connectors=available_connectors, - available_document_types=available_document_types, - mentioned_document_ids=mentioned_document_ids, - ), - FileIntentMiddleware(llm=llm), - SurfSenseFilesystemMiddleware( - backend=backend_resolver, - filesystem_mode=filesystem_mode, - search_space_id=search_space_id, - created_by_id=user_id, - thread_id=thread_id, - ), - KnowledgeBasePersistenceMiddleware( - search_space_id=search_space_id, - created_by_id=user_id, - filesystem_mode=filesystem_mode, - thread_id=thread_id, - ) - if filesystem_mode == FilesystemMode.CLOUD - else None, - # Skill loader. Placed before SubAgentMiddleware so subagents - # inherit the same skill metadata (subagent specs reference the - # same source paths via ``default_skills_sources()``). - skills_mw, - SubAgentMiddleware(backend=StateBackend, subagents=subagent_specs), - # Tool selection (only when >30 tools and flag on). - selector_mw, - # Defensive caps, then prune, then summarize. - model_call_limit_mw, - tool_call_limit_mw, - context_edit_mw, - summarization_mw, - # Provider compatibility + retry chain — placed after prune/compact - # so retries happen on the already-trimmed payload. - noop_mw, - retry_mw, - fallback_mw, - # Coalesce a multi-text-block system message into one block - # immediately before the model call. Sits innermost on the - # system-message-mutation chain so it observes every appender - # (todo / filesystem / skills / subagents …) and prevents - # OpenRouter→Anthropic from redistributing ``cache_control`` - # across N blocks and tripping Anthropic's 4-breakpoint cap. - # See ``middleware/flatten_system.py`` for full rationale. - FlattenSystemMessageMiddleware(), - # Tool-call repair must run after model emits but before - # permission / dedup / doom-loop interpret the calls. - repair_mw, - # Permission deny/ask BEFORE the calls are forwarded to tool nodes. - permission_mw, - doom_loop_mw, - # Action log sits inside permission so denied calls don't appear - # as completions, and outside dedup so each unique tool invocation - # gets its own row. - action_log_mw, - PatchToolCallsMiddleware(), - DedupHITLToolCallsMiddleware(agent_tools=list(tools)), - # Plugin slot — sits at the tail so plugin-side transforms see the - # final tool result. Prompt caching is now applied at LLM build time - # via ``apply_litellm_prompt_caching`` (see prompt_caching.py), so no - # caching middleware is needed here. Multiple plugins run in declared - # order; loader filtered by the admin allowlist already. - *plugin_middlewares, - ] - deepagent_middleware = [m for m in deepagent_middleware if m is not None] - - agent = create_agent( - llm, - system_prompt=final_system_prompt, - tools=list(tools), - middleware=deepagent_middleware, - context_schema=SurfSenseContextSchema, - checkpointer=checkpointer, - ) - return agent.with_config( - { - "recursion_limit": 10_000, - "metadata": { - "ls_integration": "deepagents", - "versions": {"deepagents": deepagents_version}, - }, - } - ) diff --git a/surfsense_backend/app/agents/new_chat/connector_searchable_types.py b/surfsense_backend/app/agents/new_chat/connector_searchable_types.py deleted file mode 100644 index c6efb1a68..000000000 --- a/surfsense_backend/app/agents/new_chat/connector_searchable_types.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Backward-compatible shim. - -Moved to ``app.agents.shared.connector_searchable_types``. Re-exported here for -the frozen single-agent stack (``chat_deepagent``) until that stack is retired. -""" - -from app.agents.shared.connector_searchable_types import ( - map_connectors_to_searchable_types, -) - -__all__ = ["map_connectors_to_searchable_types"] diff --git a/surfsense_backend/app/agents/new_chat/context.py b/surfsense_backend/app/agents/new_chat/context.py deleted file mode 100644 index f77b1709d..000000000 --- a/surfsense_backend/app/agents/new_chat/context.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Backward-compatible shim. - -The agent context schema moved to :mod:`app.agents.shared.context` as part of -promoting the shared agent toolkit out of ``new_chat`` into the cross-agent -kernel. Import from there directly; this re-export keeps the remaining -importers (the not-yet-retired single-agent stack and the ``new_chat`` package -__init__) working during the migration and will be removed with them. -""" - -from __future__ import annotations - -from app.agents.shared.context import ( - FileOperationContractState, - SurfSenseContextSchema, -) - -__all__ = [ - "FileOperationContractState", - "SurfSenseContextSchema", -] diff --git a/surfsense_backend/app/agents/new_chat/feature_flags.py b/surfsense_backend/app/agents/new_chat/feature_flags.py deleted file mode 100644 index 43e671952..000000000 --- a/surfsense_backend/app/agents/new_chat/feature_flags.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Backward-compatible shim. - -The agent feature-flag resolver moved to :mod:`app.agents.shared.feature_flags` -as part of promoting the shared agent toolkit out of ``new_chat`` into the -cross-agent kernel. Import from there directly; this re-export keeps the -not-yet-retired single-agent stack working during the migration and will be -removed with it. -""" - -from __future__ import annotations - -from app.agents.shared.feature_flags import ( - AgentFeatureFlags, - get_flags, - reload_for_tests, -) - -__all__ = [ - "AgentFeatureFlags", - "get_flags", - "reload_for_tests", -] diff --git a/surfsense_backend/app/agents/new_chat/filesystem_backends.py b/surfsense_backend/app/agents/new_chat/filesystem_backends.py deleted file mode 100644 index 1df374e5c..000000000 --- a/surfsense_backend/app/agents/new_chat/filesystem_backends.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Backward-compatible shim. - -Moved to ``app.agents.shared.filesystem_backends``. Re-exported here for the -frozen single-agent stack (``chat_deepagent``) until that stack is retired. -""" - -from app.agents.shared.filesystem_backends import build_backend_resolver - -__all__ = ["build_backend_resolver"] diff --git a/surfsense_backend/app/agents/new_chat/filesystem_selection.py b/surfsense_backend/app/agents/new_chat/filesystem_selection.py deleted file mode 100644 index 32ea3d4dc..000000000 --- a/surfsense_backend/app/agents/new_chat/filesystem_selection.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Backward-compatible shim. - -The filesystem mode contracts moved to :mod:`app.agents.shared.filesystem_selection` -as part of promoting the shared agent toolkit out of ``new_chat`` into the -cross-agent kernel. Import from there directly; this re-export keeps the -not-yet-retired single-agent stack working during the migration and will be -removed with it. -""" - -from __future__ import annotations - -from app.agents.shared.filesystem_selection import ( - ClientPlatform, - FilesystemMode, - FilesystemSelection, - LocalFilesystemMount, -) - -__all__ = [ - "ClientPlatform", - "FilesystemMode", - "FilesystemSelection", - "LocalFilesystemMount", -] diff --git a/surfsense_backend/app/agents/new_chat/llm_config.py b/surfsense_backend/app/agents/new_chat/llm_config.py deleted file mode 100644 index 28bca9360..000000000 --- a/surfsense_backend/app/agents/new_chat/llm_config.py +++ /dev/null @@ -1,33 +0,0 @@ -"""Backward-compatible shim. - -The LLM configuration layer now lives in the shared agent kernel at -``app.agents.shared.llm_config``. This module re-exports it so frozen -single-agent code (``chat_deepagent``) keeps working until that stack is -retired. -""" - -from __future__ import annotations - -from app.agents.shared.llm_config import ( - AgentConfig, - SanitizedChatLiteLLM, - create_chat_litellm_from_agent_config, - create_chat_litellm_from_config, - load_agent_config, - load_agent_llm_config_for_search_space, - load_global_llm_config_by_id, - load_llm_config_from_yaml, - load_new_llm_config_from_db, -) - -__all__ = [ - "AgentConfig", - "SanitizedChatLiteLLM", - "create_chat_litellm_from_agent_config", - "create_chat_litellm_from_config", - "load_agent_config", - "load_agent_llm_config_for_search_space", - "load_global_llm_config_by_id", - "load_llm_config_from_yaml", - "load_new_llm_config_from_db", -] diff --git a/surfsense_backend/app/agents/new_chat/middleware/__init__.py b/surfsense_backend/app/agents/new_chat/middleware/__init__.py deleted file mode 100644 index 5a7324e2e..000000000 --- a/surfsense_backend/app/agents/new_chat/middleware/__init__.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Backward-compatible shim package. - -The agent middleware now lives in the shared kernel at -``app.agents.shared.middleware``. This package re-exports it so frozen -single-agent code (``chat_deepagent`` and ``subagents/*``) keeps working -until that stack is retired. -""" - -from app.agents.shared.middleware import ( - ActionLogMiddleware, - AnonymousDocumentMiddleware, - BuiltinSkillsBackend, - BusyMutexMiddleware, - ClearToolUsesEdit, - DedupHITLToolCallsMiddleware, - DoomLoopMiddleware, - FileIntentMiddleware, - FlattenSystemMessageMiddleware, - KnowledgeBasePersistenceMiddleware, - KnowledgeBaseSearchMiddleware, - KnowledgePriorityMiddleware, - KnowledgeTreeMiddleware, - MemoryInjectionMiddleware, - NoopInjectionMiddleware, - OtelSpanMiddleware, - PermissionMiddleware, - RetryAfterMiddleware, - SearchSpaceSkillsBackend, - SpillingContextEditingMiddleware, - SpillToBackendEdit, - SurfSenseCompactionMiddleware, - SurfSenseFilesystemMiddleware, - ToolCallNameRepairMiddleware, - build_skills_backend_factory, - commit_staged_filesystem_state, - create_surfsense_compaction_middleware, - default_skills_sources, -) - -__all__ = [ - "ActionLogMiddleware", - "AnonymousDocumentMiddleware", - "BuiltinSkillsBackend", - "BusyMutexMiddleware", - "ClearToolUsesEdit", - "DedupHITLToolCallsMiddleware", - "DoomLoopMiddleware", - "FileIntentMiddleware", - "FlattenSystemMessageMiddleware", - "KnowledgeBasePersistenceMiddleware", - "KnowledgeBaseSearchMiddleware", - "KnowledgePriorityMiddleware", - "KnowledgeTreeMiddleware", - "MemoryInjectionMiddleware", - "NoopInjectionMiddleware", - "OtelSpanMiddleware", - "PermissionMiddleware", - "RetryAfterMiddleware", - "SearchSpaceSkillsBackend", - "SpillToBackendEdit", - "SpillingContextEditingMiddleware", - "SurfSenseCompactionMiddleware", - "SurfSenseFilesystemMiddleware", - "ToolCallNameRepairMiddleware", - "build_skills_backend_factory", - "commit_staged_filesystem_state", - "create_surfsense_compaction_middleware", - "default_skills_sources", -] diff --git a/surfsense_backend/app/agents/new_chat/middleware/permission.py b/surfsense_backend/app/agents/new_chat/middleware/permission.py deleted file mode 100644 index e8395cbdb..000000000 --- a/surfsense_backend/app/agents/new_chat/middleware/permission.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Backward-compatible shim. - -Moved to ``app.agents.shared.middleware.permission``. Re-exported here for the -frozen single-agent stack (``chat_deepagent``/``subagents``). -""" - -from app.agents.shared.middleware.permission import ( - PatternResolver, - PermissionMiddleware, - _normalize_permission_decision, -) - -__all__ = [ - "PatternResolver", - "PermissionMiddleware", - "_normalize_permission_decision", -] diff --git a/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py b/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py deleted file mode 100644 index d1f4fffef..000000000 --- a/surfsense_backend/app/agents/new_chat/middleware/scoped_model_fallback.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Backward-compatible shim. - -Moved to ``app.agents.shared.middleware.scoped_model_fallback``. Re-exported here -for the frozen single-agent stack (``chat_deepagent``). -""" - -from app.agents.shared.middleware.scoped_model_fallback import ( - ScopedModelFallbackMiddleware, -) - -__all__ = ["ScopedModelFallbackMiddleware"] diff --git a/surfsense_backend/app/agents/new_chat/middleware/skills_backends.py b/surfsense_backend/app/agents/new_chat/middleware/skills_backends.py deleted file mode 100644 index 37d0c6680..000000000 --- a/surfsense_backend/app/agents/new_chat/middleware/skills_backends.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Backward-compatible shim. - -Moved to ``app.agents.shared.middleware.skills_backends``. Re-exported here for -the frozen single-agent stack (``subagents/config``). -""" - -from app.agents.shared.middleware.skills_backends import ( - SKILLS_BUILTIN_PREFIX, - SKILLS_SPACE_PREFIX, - BuiltinSkillsBackend, - SearchSpaceSkillsBackend, - build_skills_backend_factory, - default_skills_sources, -) - -__all__ = [ - "SKILLS_BUILTIN_PREFIX", - "SKILLS_SPACE_PREFIX", - "BuiltinSkillsBackend", - "SearchSpaceSkillsBackend", - "build_skills_backend_factory", - "default_skills_sources", -] diff --git a/surfsense_backend/app/agents/new_chat/permissions.py b/surfsense_backend/app/agents/new_chat/permissions.py deleted file mode 100644 index 49ded62ce..000000000 --- a/surfsense_backend/app/agents/new_chat/permissions.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Backward-compatible shim. - -The permission evaluator now lives in the shared agent kernel at -``app.agents.shared.permissions``. This module re-exports it so frozen -single-agent code (``chat_deepagent`` and ``subagents/*``) keeps working -until that stack is retired. -""" - -from __future__ import annotations - -from app.agents.shared.permissions import ( - Rule, - RuleAction, - Ruleset, - aggregate_action, - evaluate, - evaluate_many, - wildcard_match, -) - -__all__ = [ - "Rule", - "RuleAction", - "Ruleset", - "aggregate_action", - "evaluate", - "evaluate_many", - "wildcard_match", -] diff --git a/surfsense_backend/app/agents/new_chat/plugin_loader.py b/surfsense_backend/app/agents/new_chat/plugin_loader.py deleted file mode 100644 index 332a1ec26..000000000 --- a/surfsense_backend/app/agents/new_chat/plugin_loader.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Backward-compatible shim. - -Moved to ``app.agents.shared.plugin_loader``. Re-exported here for the frozen -single-agent stack (``chat_deepagent``) until that stack is retired. -""" - -from app.agents.shared.plugin_loader import ( - PLUGIN_ENTRY_POINT_GROUP, - PluginContext, - load_allowed_plugin_names_from_env, - load_plugin_middlewares, -) - -__all__ = [ - "PLUGIN_ENTRY_POINT_GROUP", - "PluginContext", - "load_allowed_plugin_names_from_env", - "load_plugin_middlewares", -] diff --git a/surfsense_backend/app/agents/new_chat/prompt_caching.py b/surfsense_backend/app/agents/new_chat/prompt_caching.py deleted file mode 100644 index bfe421db0..000000000 --- a/surfsense_backend/app/agents/new_chat/prompt_caching.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Backward-compatible shim. - -The LiteLLM prompt-caching helper now lives in the shared agent kernel at -``app.agents.shared.prompt_caching``. This module re-exports it so frozen -single-agent code (``chat_deepagent``) keeps working until that stack is -retired. -""" - -from __future__ import annotations - -from app.agents.shared.prompt_caching import apply_litellm_prompt_caching - -__all__ = ["apply_litellm_prompt_caching"] diff --git a/surfsense_backend/app/agents/new_chat/subagents/__init__.py b/surfsense_backend/app/agents/new_chat/subagents/__init__.py deleted file mode 100644 index bd1823b57..000000000 --- a/surfsense_backend/app/agents/new_chat/subagents/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -"""Specialized user-facing subagents for the SurfSense agent. - -The :class:`deepagents.SubAgentMiddleware` already provides the -materialization machinery (each :class:`deepagents.SubAgent` typed-dict -spec is compiled into an ephemeral runnable invoked via the ``task`` -tool); what's specific to SurfSense is the *seeding* of those subagents -with declarative deny rules. - -Per-subagent permission rules are injected as a -:class:`PermissionMiddleware` entry inside the subagent's ``middleware`` -field. The auto-deny pattern (e.g. forbid ``task``/``todowrite`` -recursion, block write tools for read-only research roles) is borrowed -from OpenCode's ``packages/opencode/src/tool/task.ts``, which has -analogous logic for restricting child sessions. -""" - -from .config import ( - build_connector_negotiator_subagent, - build_explore_subagent, - build_report_writer_subagent, - build_specialized_subagents, -) -from .providers.linear import build_linear_specialist_subagent -from .providers.slack import build_slack_specialist_subagent - -__all__ = [ - "build_connector_negotiator_subagent", - "build_explore_subagent", - "build_linear_specialist_subagent", - "build_report_writer_subagent", - "build_slack_specialist_subagent", - "build_specialized_subagents", -] diff --git a/surfsense_backend/app/agents/new_chat/subagents/config.py b/surfsense_backend/app/agents/new_chat/subagents/config.py deleted file mode 100644 index 2cfd47441..000000000 --- a/surfsense_backend/app/agents/new_chat/subagents/config.py +++ /dev/null @@ -1,436 +0,0 @@ -"""Builders for specialized SurfSense subagents. - -Each subagent is built from three pieces: - -1. A name + description + system prompt (the user-facing contract for - when ``task`` should delegate to this role). -2. A filtered tool list (subset of the parent's bound tools). -3. A :class:`PermissionMiddleware` instance carrying a deny ruleset that - prevents the subagent from acting outside its scope (e.g. an - explore-only role cannot mutate state). - -Skill sources (``/skills/builtin/`` + ``/skills/space/``) are inherited -from the parent unconditionally — every subagent benefits from the same -authored guidance documents. -""" - -from __future__ import annotations - -import logging -from collections.abc import Iterable, Sequence -from typing import TYPE_CHECKING, Any - -from app.agents.new_chat.middleware.skills_backends import default_skills_sources -from app.agents.new_chat.permissions import Rule, Ruleset -from app.agents.new_chat.subagents.providers.linear import ( - build_linear_specialist_subagent, -) -from app.agents.new_chat.subagents.providers.slack import ( - build_slack_specialist_subagent, -) - -if TYPE_CHECKING: - from deepagents import SubAgent - from langchain_core.language_models import BaseChatModel - from langchain_core.tools import BaseTool - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Tool name constants -# --------------------------------------------------------------------------- - -# Read-only tools that ``explore`` is permitted to use. Names match the -# tools provided by the deepagents ``FilesystemMiddleware`` (``ls``, ``read_file``, -# ``glob``, ``grep``) plus the SurfSense-side read tools. -EXPLORE_READ_TOOLS: frozenset[str] = frozenset( - { - "web_search", - "scrape_webpage", - "read_file", - "ls", - "glob", - "grep", - } -) - -# Tools ``report_writer`` may call. The set is intentionally narrow so the -# subagent doesn't drift into tangential research; if richer source-gathering -# is needed, the parent should hand off to ``explore`` first. -REPORT_WRITER_TOOLS: frozenset[str] = frozenset( - { - "read_file", - "generate_report", - } -) - -# Wildcard patterns that match write tools we deny by default in read-only -# subagents. Anchored at start AND end via :func:`Rule` semantics. We use -# substring-style ``*verb*`` patterns because connector tool names typically -# put the verb in the middle (``linear_create_issue``, ``slack_send_message``, -# ``notion_update_page``); strict suffix patterns (``*_create``) miss those. -# -# A handful of canonical exact-match names is appended so that bare verbs -# (``edit``, ``write``) are also blocked even when a connector dropped the -# usual prefix. -WRITE_TOOL_DENY_PATTERNS: tuple[str, ...] = ( - "*create*", - "*update*", - "*delete*", - "*send*", - "*write*", - "*edit*", - "*move*", - "*mkdir*", - "*upload*", - "edit_file", - "write_file", - "move_file", - "mkdir", - "rm", - "rmdir", - "update_memory", - "update_memory_team", - "update_memory_private", -) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -# Tool names that are NOT in the registry's ``tools`` list because they -# are provided dynamically by middleware at compile time. We don't pass -# them through ``_filter_tools`` (the actual ``BaseTool`` instances live -# inside the middleware), but we do exempt them from the "missing" warning -# below — operators were seeing spurious noise like -# ``missing: ['glob', 'grep', 'ls', 'read_file']`` even though those -# tools are reachable via :class:`SurfSenseFilesystemMiddleware` once the -# subagent is compiled. -_MIDDLEWARE_PROVIDED_TOOL_NAMES: frozenset[str] = frozenset( - { - "ls", - "read_file", - "write_file", - "edit_file", - "glob", - "grep", - "execute", - "write_todos", - "task", - } -) - - -def _filter_tools( - tools: Sequence[BaseTool], - allowed_names: Iterable[str], -) -> list[BaseTool]: - """Return only tools whose ``name`` appears in ``allowed_names``. - - Tools are looked up by exact name. Names matching - :data:`_MIDDLEWARE_PROVIDED_TOOL_NAMES` are intentionally absent from - ``tools`` (they're injected by middleware at compile time) and are - silently excluded from the "missing" warning so operators don't see - false positives every build. - """ - allowed = set(allowed_names) - selected = [t for t in tools if t.name in allowed] - missing = sorted( - (allowed - {t.name for t in selected}) - _MIDDLEWARE_PROVIDED_TOOL_NAMES - ) - if missing: - logger.info( - "Subagent build: %d/%d registry tools available; missing: %s", - len(selected), - len(allowed - _MIDDLEWARE_PROVIDED_TOOL_NAMES), - missing, - ) - return selected - - -def _read_only_deny_rules() -> list[Rule]: - """Synthesize a list of deny rules covering common write-tool patterns.""" - return [ - Rule(permission=pattern, pattern="*", action="deny") - for pattern in WRITE_TOOL_DENY_PATTERNS - ] - - -def _build_permission_middleware(deny_rules: list[Rule], origin: str): - """Construct a :class:`PermissionMiddleware` seeded with ``deny_rules``. - - Imported lazily because the middleware module pulls in interrupt/HITL - machinery we don't want at import time of this config file. - """ - from app.agents.new_chat.middleware.permission import PermissionMiddleware - - return PermissionMiddleware( - rulesets=[Ruleset(rules=deny_rules, origin=origin)], - ) - - -def _wrap_with_subagent_essentials( - custom_middleware: list, - *, - agent_tools: Sequence[BaseTool], - extra_middleware: Sequence[Any] | None = None, -): - """Compose the final middleware list for a specialized subagent. - - Order, outer to inner: - - 1. ``extra_middleware`` — provided by the caller (typically the parent - agent's ``SurfSenseFilesystemMiddleware`` and ``TodoListMiddleware``) - so the subagent inherits the parent's filesystem/todo view. These - run **before** the subagent-local middleware so their tools are - wired up before permissioning kicks in. - 2. ``custom_middleware`` — subagent-local rules (e.g. permission deny - lists). - 3. :class:`PatchToolCallsMiddleware` — normalizes tool-call shapes. - 4. :class:`DedupHITLToolCallsMiddleware` — collapses duplicate HITL - calls using metadata declared at registry time. - - Without ``extra_middleware`` the subagent will only have the registry - tools listed in its ``tools`` field — meaning ``read_file``, ``ls``, - ``grep``, etc. won't exist. Always pass ``extra_middleware`` from the - parent unless you specifically want a sandboxed subagent. - """ - from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware - - from app.agents.new_chat.middleware import DedupHITLToolCallsMiddleware - - return [ - *(extra_middleware or []), - *custom_middleware, - PatchToolCallsMiddleware(), - DedupHITLToolCallsMiddleware(agent_tools=list(agent_tools)), - ] - - -# --------------------------------------------------------------------------- -# System prompts -# --------------------------------------------------------------------------- - -EXPLORE_SYSTEM_PROMPT = """You are the **explore** subagent for SurfSense. - -## Your job -Conduct read-only research across the user's knowledge base, the web, and any documents the parent agent has surfaced. Return a synthesized answer with explicit citations — never speculate beyond the sources you have actually inspected. - -## Tools available -- `web_search` — only when the user's KB clearly does not contain the answer. -- `scrape_webpage` — to read a URL the user or the search results provided. -- `read_file`, `ls`, `glob`, `grep` — to inspect specific documents or trees the parent has flagged. - -## Rules -- Read-only. You cannot create, edit, delete, send, or move anything. -- Cite every claim. Use `[citation:chunk_id]` exactly as the chunk tag specifies. -- If a sub-question has no support in the inspected sources, say so explicitly. Do not fabricate. -- Return the most useful synthesis in your single final message. The parent agent will not be able to follow up. -""" - - -REPORT_WRITER_SYSTEM_PROMPT = """You are the **report_writer** subagent for SurfSense. - -## Your job -Produce a single high-quality report deliverable using `generate_report`. The parent has already gathered (or knows where to gather) the underlying sources. - -## Workflow -1. **Outline first.** Before calling `generate_report`, write a one-paragraph outline of the sections you plan to produce. Confirm the outline reflects the parent's instructions. -2. **Source resolution.** Decide whether to call `read_file` for any final-checks, or whether the parent's earlier tool calls already cover the source set. -3. **One report.** Call `generate_report` exactly once with `source_strategy` chosen per the topic and chat history (see the `report-writing` skill). -4. **Confirm.** End with a one-sentence summary in your final message — never paste the report back into chat; the artifact card renders itself. -""" - - -CONNECTOR_NEGOTIATOR_SYSTEM_PROMPT = """You are the **connector_negotiator** subagent for SurfSense. - -## Your job -Coordinate cross-connector workflows: chains where the result of one service's tool feeds into another's. Common shapes include "find Linear issues mentioned in last week's Slack messages", "draft a Gmail reply citing a Notion doc", or "list Linear tickets opened by the same person who filed Jira FOO-123". - -## Workflow -1. **Plan.** Identify the connector hops needed and the order they should run in. Write a short plan in your first message. -2. **Verify access.** Use `get_connected_accounts` to confirm the relevant connectors are actually wired up before issuing tool calls. If a connector is missing, stop and report — do not fabricate. -3. **Execute.** Run each hop, citing IDs (issue keys, message ts, page IDs) in your scratch notes so the parent can audit. -4. **Hand back.** Return a structured summary with the final answer plus the chain of evidence (issue → message → page, etc.). - -## Caveats -- If a hop fails, do not retry blindly — return the partial result and explain. -- Mutating tools (create, update, delete, send) require parent permission; you are NOT cleared to call them on your own. -""" - - -# --------------------------------------------------------------------------- -# Subagent builders -# --------------------------------------------------------------------------- - - -def build_explore_subagent( - *, - tools: Sequence[BaseTool], - model: BaseChatModel | None = None, - extra_middleware: Sequence[Any] | None = None, -) -> SubAgent: - """Build the read-only ``explore`` subagent spec. - - Pass ``extra_middleware`` (typically the parent's filesystem + todo - middleware) so the subagent can actually use ``read_file``, ``ls``, - ``grep``, ``glob`` — which its system prompt promises but which only - exist when their middleware is mounted. - """ - from deepagents import SubAgent # noqa: F401 (TypedDict for type clarity) - - selected_tools = _filter_tools(tools, EXPLORE_READ_TOOLS) - deny_rules = _read_only_deny_rules() - permission_mw = _build_permission_middleware(deny_rules, origin="subagent_explore") - - spec: dict = { - "name": "explore", - "description": ( - "Read-only research across the user's knowledge base and the web. " - "Use when the parent needs deeply-cited synthesis without " - "modifying anything." - ), - "system_prompt": EXPLORE_SYSTEM_PROMPT, - "tools": selected_tools, - "middleware": _wrap_with_subagent_essentials( - [permission_mw], - agent_tools=selected_tools, - extra_middleware=extra_middleware, - ), - "skills": default_skills_sources(), - } - if model is not None: - spec["model"] = model - return spec # type: ignore[return-value] - - -def build_report_writer_subagent( - *, - tools: Sequence[BaseTool], - model: BaseChatModel | None = None, - extra_middleware: Sequence[Any] | None = None, -) -> SubAgent: - """Build the ``report_writer`` subagent spec. - - Read-only deny ruleset still applies — the subagent should call - ``generate_report`` and nothing else mutating. ``generate_report`` - creates a report artifact via a backend service and is intentionally - **not** denied. - - Pass ``extra_middleware`` (typically the parent's filesystem + todo - middleware) so the subagent can run ``read_file`` for source-checks - before calling ``generate_report``. - """ - selected_tools = _filter_tools(tools, REPORT_WRITER_TOOLS) - deny_rules = _read_only_deny_rules() - permission_mw = _build_permission_middleware( - deny_rules, origin="subagent_report_writer" - ) - - spec: dict = { - "name": "report_writer", - "description": ( - "Produce a single Markdown report artifact via generate_report, " - "using the outline-then-fill protocol. Use when the parent has " - "decided a deliverable is needed." - ), - "system_prompt": REPORT_WRITER_SYSTEM_PROMPT, - "tools": selected_tools, - "middleware": _wrap_with_subagent_essentials( - [permission_mw], - agent_tools=selected_tools, - extra_middleware=extra_middleware, - ), - "skills": default_skills_sources(), - } - if model is not None: - spec["model"] = model - return spec # type: ignore[return-value] - - -def build_connector_negotiator_subagent( - *, - tools: Sequence[BaseTool], - model: BaseChatModel | None = None, - extra_middleware: Sequence[Any] | None = None, -) -> SubAgent: - """Build the ``connector_negotiator`` subagent spec. - - Inherits all MCP / connector tools the parent has plus - ``get_connected_accounts``. Read-only by default; permission rules deny - write/mutation patterns. The parent agent re-asks for permission if a - connector mutation is genuinely needed. - - Pass ``extra_middleware`` (typically the parent's filesystem + todo - middleware) so this subagent shares the parent's filesystem view when - citing evidence across hops. - """ - parent_tool_names = {t.name for t in tools} - allowed: set[str] = set() - if "get_connected_accounts" in parent_tool_names: - allowed.add("get_connected_accounts") - # Inherit anything that smells connector- or MCP-related but is not a - # bulk-write API. Heuristic: keep all parent tools; rely on the deny - # ruleset to block mutation patterns. This mirrors the plan: "all - # MCP/connector tools the parent has". - for name in parent_tool_names: - allowed.add(name) - selected_tools = _filter_tools(tools, allowed) - - deny_rules = _read_only_deny_rules() - permission_mw = _build_permission_middleware( - deny_rules, origin="subagent_connector_negotiator" - ) - - spec: dict = { - "name": "connector_negotiator", - "description": ( - "Coordinate read-only chains across connectors (Slack → Linear, " - "Notion → Gmail, etc.). Returns a structured summary with the " - "evidence chain. Cannot mutate connector state." - ), - "system_prompt": CONNECTOR_NEGOTIATOR_SYSTEM_PROMPT, - "tools": selected_tools, - "middleware": _wrap_with_subagent_essentials( - [permission_mw], - agent_tools=selected_tools, - extra_middleware=extra_middleware, - ), - "skills": default_skills_sources(), - } - if model is not None: - spec["model"] = model - return spec # type: ignore[return-value] - - -def build_specialized_subagents( - *, - tools: Sequence[BaseTool], - model: BaseChatModel | None = None, - extra_middleware: Sequence[Any] | None = None, -) -> list[SubAgent]: - """Return the canonical list of specialized subagents to register. - - Order matters only for the order they appear in the ``task`` tool - description — most useful first. - """ - return [ - build_explore_subagent( - tools=tools, model=model, extra_middleware=extra_middleware - ), - build_report_writer_subagent( - tools=tools, model=model, extra_middleware=extra_middleware - ), - build_linear_specialist_subagent( - tools=tools, model=model, extra_middleware=extra_middleware - ), - build_slack_specialist_subagent( - tools=tools, model=model, extra_middleware=extra_middleware - ), - build_connector_negotiator_subagent( - tools=tools, model=model, extra_middleware=extra_middleware - ), - ] diff --git a/surfsense_backend/app/agents/new_chat/subagents/constants.py b/surfsense_backend/app/agents/new_chat/subagents/constants.py deleted file mode 100644 index cb1da499b..000000000 --- a/surfsense_backend/app/agents/new_chat/subagents/constants.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Shared constants for provider subagent safety policies.""" - -from __future__ import annotations - -# Generic mutation-deny patterns for read-only specialist roles. -WRITE_TOOL_DENY_PATTERNS: tuple[str, ...] = ( - "*create*", - "*update*", - "*delete*", - "*send*", - "*write*", - "*edit*", - "*move*", - "*mkdir*", - "*upload*", - "edit_file", - "write_file", - "move_file", - "mkdir", - "update_memory", - "update_memory_team", - "update_memory_private", -) - -# Tools that mutate virtual KB filesystem or parent/global chat state. -# Provider specialists should not mutate these surfaces directly. -NON_PROVIDER_STATE_MUTATION_DENY: frozenset[str] = frozenset( - { - # Exact tool names from shared deny patterns. - *{name for name in WRITE_TOOL_DENY_PATTERNS if "*" not in name}, - # Additional non-provider state mutation controls. - "write_todos", - "task", - } -) diff --git a/surfsense_backend/app/agents/new_chat/subagents/providers/linear.py b/surfsense_backend/app/agents/new_chat/subagents/providers/linear.py deleted file mode 100644 index da332fe28..000000000 --- a/surfsense_backend/app/agents/new_chat/subagents/providers/linear.py +++ /dev/null @@ -1,162 +0,0 @@ -"""Linear provider specialist subagent. - -This file is intentionally standalone so provider specialists can be reviewed -and evolved independently (one provider per file). -""" - -from __future__ import annotations - -from collections.abc import Sequence -from typing import TYPE_CHECKING, Any - -from app.agents.new_chat.permissions import Rule, Ruleset -from app.agents.new_chat.subagents.constants import NON_PROVIDER_STATE_MUTATION_DENY -from app.services.mcp_oauth.registry import ( - LINEAR_MCP_READONLY_TOOL_NAMES, - linear_mcp_original_tool_name, -) - -if TYPE_CHECKING: - from deepagents import SubAgent - from langchain_core.language_models import BaseChatModel - from langchain_core.tools import BaseTool - - -# Read vs write Linear MCP tools are defined in -# ``app.services.mcp_oauth.registry`` (``LINEAR_MCP_READONLY_TOOL_NAMES`` / -# ``LINEAR_MCP_WRITE_TOOL_NAMES``). Any other Linear-domain tool requires approval. - -LINEAR_SYSTEM_PROMPT = """You are the linear_specialist subagent for SurfSense. - -Role: -- You are the Linear domain specialist. Handle Linear-only requests accurately. - -Primary objective: -- Resolve the user's Linear task and return a concise, auditable result. - -Routing boundary: -- Use this subagent for Linear-domain tasks (issues, status, assignees, labels, - teams, and project references). -- If the task is primarily non-Linear or cross-connector orchestration, return - status=needs_input and hand control back to the parent with the exact next hop. - -Execution steps: -1) Verify Linear access first (use get_connected_accounts if needed). -2) Prefer read/list tools first to gather current issue facts before concluding. -3) Track key identifiers in your reasoning: issue ID, issue key, team ID, label ID. -4) If required identifiers are missing, ask the parent for exactly what is missing. -5) Return a compact result with findings + evidence references. - -Output format: -- status: success | needs_input | blocked | error -- summary: one short paragraph -- evidence: bullet list of concrete IDs / issue keys used -- next_step: one sentence (only when blocked or needs_input) - -Constraints: -- Do not invent issue keys, IDs, or workflow state names. -- Mutating Linear operations are allowed only with explicit approval. -- If Linear connector access is unavailable, stop and return status=blocked. -""" - - -def _select_linear_tools(tools: Sequence[BaseTool]) -> list[BaseTool]: - """Keep Linear tools plus minimal shared read utilities.""" - allowed_exact = { - "get_connected_accounts", - "read_file", - "ls", - "glob", - "grep", - } - selected: list[BaseTool] = [] - for tool in tools: - if tool.name in allowed_exact: - selected.append(tool) - continue - if linear_mcp_original_tool_name(tool.name) is not None: - selected.append(tool) - continue - if tool.name.startswith("linear_") or tool.name.endswith("_linear_issue"): - selected.append(tool) - return selected - - -def _is_linear_readonly_tool_name(name: str) -> bool: - """Return True when a tool name maps to a read-only Linear MCP operation.""" - base = linear_mcp_original_tool_name(name) - return base is not None and base in LINEAR_MCP_READONLY_TOOL_NAMES - - -def _is_linear_domain_tool_name(name: str) -> bool: - """Return True for Linear-domain tools handled by this specialist.""" - if linear_mcp_original_tool_name(name) is not None: - return True - return name.startswith("linear_") or name.endswith("_linear_issue") - - -def _permission_middleware(*, selected_tools: Sequence[BaseTool]) -> Any: - """Permission policy for Linear specialist.""" - from app.agents.new_chat.middleware.permission import PermissionMiddleware - - ask_tools = sorted( - { - tool.name - for tool in selected_tools - if _is_linear_domain_tool_name(tool.name) - and not _is_linear_readonly_tool_name(tool.name) - } - ) - rules: list[Rule] = [Rule(permission="*", pattern="*", action="allow")] - rules.extend( - Rule(permission=name, pattern="*", action="deny") - for name in NON_PROVIDER_STATE_MUTATION_DENY - ) - rules.extend(Rule(permission=name, pattern="*", action="ask") for name in ask_tools) - return PermissionMiddleware( - rulesets=[Ruleset(rules=rules, origin="subagent_linear_specialist")] - ) - - -def _wrap_subagent_middleware( - *, - selected_tools: Sequence[BaseTool], - extra_middleware: Sequence[Any] | None, -) -> list[Any]: - """Apply standard middleware chain used by other subagents.""" - from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware - - from app.agents.new_chat.middleware import DedupHITLToolCallsMiddleware - - return [ - *(extra_middleware or []), - _permission_middleware(selected_tools=selected_tools), - PatchToolCallsMiddleware(), - DedupHITLToolCallsMiddleware(agent_tools=list(selected_tools)), - ] - - -def build_linear_specialist_subagent( - *, - tools: Sequence[BaseTool], - model: BaseChatModel | None = None, - extra_middleware: Sequence[Any] | None = None, -) -> SubAgent: - """Build the ``linear_specialist`` provider subagent spec.""" - selected_tools = _select_linear_tools(tools) - spec: dict[str, Any] = { - "name": "linear_specialist", - "description": ( - "Linear operations specialist for issue and workflow requests, " - "with strict evidence tracking and approval-gated mutating operations." - ), - "system_prompt": LINEAR_SYSTEM_PROMPT, - "tools": selected_tools, - "middleware": _wrap_subagent_middleware( - selected_tools=selected_tools, - extra_middleware=extra_middleware, - ), - } - if model is not None: - spec["model"] = model - return spec # type: ignore[return-value] diff --git a/surfsense_backend/app/agents/new_chat/subagents/providers/slack.py b/surfsense_backend/app/agents/new_chat/subagents/providers/slack.py deleted file mode 100644 index 90ca80152..000000000 --- a/surfsense_backend/app/agents/new_chat/subagents/providers/slack.py +++ /dev/null @@ -1,170 +0,0 @@ -"""Slack provider specialist subagent. - -This file is intentionally standalone so provider specialists can be reviewed -and evolved independently (one provider per file). -""" - -from __future__ import annotations - -from collections.abc import Sequence -from typing import TYPE_CHECKING, Any - -from app.agents.new_chat.permissions import Rule, Ruleset -from app.agents.new_chat.subagents.constants import NON_PROVIDER_STATE_MUTATION_DENY - -if TYPE_CHECKING: - from deepagents import SubAgent - from langchain_core.language_models import BaseChatModel - from langchain_core.tools import BaseTool - - -# Official references: -# - https://docs.slack.dev/ai/slack-mcp-server -# - https://www.npmjs.com/package/@modelcontextprotocol/server-slack -# -# Policy: only known read-only Slack tools are auto-allowed. Any other -# ``slack_*`` tool is treated as mutating and requires explicit approval. -SLACK_READONLY_TOOL_NAMES: frozenset[str] = frozenset( - { - # Slack-hosted MCP read tools - "slack_search_channels", - "slack_read_channel", - "slack_read_thread", - "slack_read_canvas", - "slack_read_user_profile", - # modelcontextprotocol/server-slack read tools - "slack_list_channels", - "slack_get_channel_history", - "slack_get_thread_replies", - "slack_get_users", - "slack_get_user_profile", - } -) - -SLACK_SYSTEM_PROMPT = """You are the slack_specialist subagent for SurfSense. - -Role: -- You are the Slack domain specialist. Handle Slack-only requests accurately. - -Primary objective: -- Resolve the user's Slack task and return a concise, auditable result. - -Routing boundary: -- Use this subagent for Slack-domain tasks (channels, threads, users, messages, - and Slack canvases). -- If the task is primarily non-Slack or cross-connector orchestration, return - status=needs_input and hand control back to the parent with the exact next hop. - -Execution steps: -1) Verify Slack access first (use get_connected_accounts if needed). -2) Prefer read/list tools first to gather facts before concluding. -3) Track key identifiers in your reasoning: channel ID, message ts, thread ts, user ID. -4) If required identifiers are missing, ask the parent for exactly what is missing. -5) Return a compact result with findings + evidence references. - -Output format: -- status: success | needs_input | blocked | error -- summary: one short paragraph -- evidence: bullet list of concrete IDs / timestamps used -- next_step: one sentence (only when blocked or needs_input) - -Constraints: -- Do not invent Slack IDs, channels, users, or message content. -- Mutating Slack operations are allowed only with explicit approval. -- If Slack connector access is unavailable, stop and return status=blocked. -""" - - -def _select_slack_tools(tools: Sequence[BaseTool]) -> list[BaseTool]: - """Keep Slack tools plus minimal shared read utilities.""" - allowed_exact = { - "get_connected_accounts", - "read_file", - "ls", - "glob", - "grep", - } - slack_prefix = "slack_" - selected: list[BaseTool] = [] - for tool in tools: - if tool.name in allowed_exact: - selected.append(tool) - continue - if tool.name.startswith(slack_prefix): - selected.append(tool) - return selected - - -def _permission_middleware(*, selected_tools: Sequence[BaseTool]) -> Any: - """Permission policy for Slack specialist. - - Intent: - - Allow Slack-domain operations by default. - - Gate Slack mutating operations behind approval (`ask`). - - Hard-deny non-Slack state mutations, especially KB virtual filesystem - mutation and parent-context mutation tools. - """ - from app.agents.new_chat.middleware.permission import PermissionMiddleware - - ask_tools = sorted( - { - tool.name - for tool in selected_tools - if tool.name.startswith("slack_") - and tool.name not in SLACK_READONLY_TOOL_NAMES - } - ) - rules: list[Rule] = [Rule(permission="*", pattern="*", action="allow")] - rules.extend( - Rule(permission=name, pattern="*", action="deny") - for name in NON_PROVIDER_STATE_MUTATION_DENY - ) - rules.extend(Rule(permission=name, pattern="*", action="ask") for name in ask_tools) - return PermissionMiddleware( - rulesets=[Ruleset(rules=rules, origin="subagent_slack_specialist")] - ) - - -def _wrap_subagent_middleware( - *, - selected_tools: Sequence[BaseTool], - extra_middleware: Sequence[Any] | None, -) -> list[Any]: - """Apply standard middleware chain used by other subagents.""" - from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware - - from app.agents.new_chat.middleware import DedupHITLToolCallsMiddleware - - return [ - *(extra_middleware or []), - _permission_middleware(selected_tools=selected_tools), - PatchToolCallsMiddleware(), - DedupHITLToolCallsMiddleware(agent_tools=list(selected_tools)), - ] - - -def build_slack_specialist_subagent( - *, - tools: Sequence[BaseTool], - model: BaseChatModel | None = None, - extra_middleware: Sequence[Any] | None = None, -) -> SubAgent: - """Build the ``slack_specialist`` provider subagent spec.""" - selected_tools = _select_slack_tools(tools) - spec: dict[str, Any] = { - "name": "slack_specialist", - "description": ( - "Slack operations specialist for any Slack-domain request " - "(channels, threads, users, and messages), with strict evidence " - "tracking and approval-gated mutating operations." - ), - "system_prompt": SLACK_SYSTEM_PROMPT, - "tools": selected_tools, - "middleware": _wrap_subagent_middleware( - selected_tools=selected_tools, - extra_middleware=extra_middleware, - ), - } - if model is not None: - spec["model"] = model - return spec # type: ignore[return-value] diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py deleted file mode 100644 index 363cf5507..000000000 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Backward-compatible shim. - -Moved to ``app.agents.shared.system_prompt``. Re-exported here for the frozen -single-agent stack (``chat_deepagent``) until that stack is retired. -""" - -from app.agents.shared.system_prompt import ( - SURFSENSE_CITATION_INSTRUCTIONS, - SURFSENSE_NO_CITATION_INSTRUCTIONS, - SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE, - SURFSENSE_SYSTEM_PROMPT, - build_configurable_system_prompt, - build_surfsense_system_prompt, - compose_system_prompt, - detect_provider_variant, - get_default_system_instructions, -) - -__all__ = [ - "SURFSENSE_CITATION_INSTRUCTIONS", - "SURFSENSE_NO_CITATION_INSTRUCTIONS", - "SURFSENSE_SYSTEM_INSTRUCTIONS_TEMPLATE", - "SURFSENSE_SYSTEM_PROMPT", - "build_configurable_system_prompt", - "build_surfsense_system_prompt", - "compose_system_prompt", - "detect_provider_variant", - "get_default_system_instructions", -] diff --git a/surfsense_backend/app/agents/new_chat/tools/__init__.py b/surfsense_backend/app/agents/new_chat/tools/__init__.py deleted file mode 100644 index 852fc813f..000000000 --- a/surfsense_backend/app/agents/new_chat/tools/__init__.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Backward-compatible shim package. - -The agent tools now live in the shared kernel at ``app.agents.shared.tools``. -This package re-exports the public surface (and keeps ``invalid_tool`` / -``registry`` submodule shims) so the frozen single-agent stack -(``new_chat.__init__`` and ``chat_deepagent``) keeps working until that stack is -retired. All live code imports from ``app.agents.shared.tools`` directly. -""" - -from app.agents.shared.tools import ( - BUILTIN_TOOLS, - CONNECTOR_DESCRIPTIONS, - ToolDefinition, - build_tools, - create_generate_image_tool, - create_generate_podcast_tool, - create_generate_video_presentation_tool, - create_scrape_webpage_tool, - create_update_memory_tool, - create_update_team_memory_tool, - format_documents_for_context, - get_all_tool_names, - get_default_enabled_tools, - get_tool_by_name, - search_knowledge_base_async, -) - -__all__ = [ - "BUILTIN_TOOLS", - "CONNECTOR_DESCRIPTIONS", - "ToolDefinition", - "build_tools", - "create_generate_image_tool", - "create_generate_podcast_tool", - "create_generate_video_presentation_tool", - "create_scrape_webpage_tool", - "create_update_memory_tool", - "create_update_team_memory_tool", - "format_documents_for_context", - "get_all_tool_names", - "get_default_enabled_tools", - "get_tool_by_name", - "search_knowledge_base_async", -] diff --git a/surfsense_backend/app/agents/new_chat/tools/invalid_tool.py b/surfsense_backend/app/agents/new_chat/tools/invalid_tool.py deleted file mode 100644 index cc7fe4c11..000000000 --- a/surfsense_backend/app/agents/new_chat/tools/invalid_tool.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Backward-compatible shim. - -Moved to ``app.agents.shared.tools.invalid_tool``. Re-exported here for the -frozen single-agent stack (``chat_deepagent``) until that stack is retired. -""" - -from app.agents.shared.tools.invalid_tool import ( - INVALID_TOOL_DESCRIPTION, - INVALID_TOOL_NAME, - invalid_tool, -) - -__all__ = [ - "INVALID_TOOL_DESCRIPTION", - "INVALID_TOOL_NAME", - "invalid_tool", -] diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py deleted file mode 100644 index 9b5d92559..000000000 --- a/surfsense_backend/app/agents/new_chat/tools/registry.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Backward-compatible shim. - -Moved to ``app.agents.shared.tools.registry``. Re-exported here for the frozen -single-agent stack (``chat_deepagent``) until that stack is retired. -""" - -from app.agents.shared.tools.registry import ( - BUILTIN_TOOLS, - ToolDefinition, - build_tools_async, - get_connector_gated_tools, -) - -__all__ = [ - "BUILTIN_TOOLS", - "ToolDefinition", - "build_tools_async", - "get_connector_gated_tools", -] diff --git a/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py b/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py index 80b9862e7..0bc614fb4 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py +++ b/surfsense_backend/tests/unit/agents/new_chat/middleware/test_scoped_model_fallback.py @@ -87,7 +87,7 @@ class RateLimitError(Exception): def _build_agent(primary: BaseChatModel, fallback: BaseChatModel): from langchain.agents import create_agent - from app.agents.new_chat.middleware.scoped_model_fallback import ( + from app.agents.shared.middleware.scoped_model_fallback import ( ScopedModelFallbackMiddleware, ) diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_resolve_prompt_model_name.py b/surfsense_backend/tests/unit/agents/new_chat/test_resolve_prompt_model_name.py deleted file mode 100644 index a9041f5a7..000000000 --- a/surfsense_backend/tests/unit/agents/new_chat/test_resolve_prompt_model_name.py +++ /dev/null @@ -1,117 +0,0 @@ -"""Tests for ``_resolve_prompt_model_name`` in :mod:`app.agents.new_chat.chat_deepagent`. - -The helper picks the model id fed to ``detect_provider_variant`` so the -right ```` block lands in the system prompt. The tests -below pin its preference order: - -1. ``agent_config.litellm_params["base_model"]`` (Azure-correct). -2. ``agent_config.model_name``. -3. ``getattr(llm, "model", None)``. - -Without (1) an Azure deployment named e.g. ``"prod-chat-001"`` would -silently miss every provider regex. -""" - -from __future__ import annotations - -import pytest - -from app.agents.new_chat.chat_deepagent import _resolve_prompt_model_name -from app.agents.shared.llm_config import AgentConfig - -pytestmark = pytest.mark.unit - - -def _make_cfg(**overrides) -> AgentConfig: - """Build an ``AgentConfig`` with sensible defaults for the helper test.""" - defaults = { - "provider": "OPENAI", - "model_name": "x", - "api_key": "k", - } - return AgentConfig(**{**defaults, **overrides}) - - -class _FakeLLM: - """Stand-in for a ``ChatLiteLLM`` / ``ChatLiteLLMRouter`` instance. - - The resolver only reads the ``.model`` attribute via ``getattr``, - matching the established idiom in ``knowledge_search.py`` / - ``stream_new_chat.py`` / ``document_summarizer.py``. - """ - - def __init__(self, model: str | None) -> None: - self.model = model - - -def test_prefers_litellm_params_base_model_over_deployment_name() -> None: - """Azure deployment slug must NOT shadow the underlying model family. - - This is the failure mode the helper exists to prevent: a deployment - named ``"azure/prod-chat-001"`` would not match any provider regex - on its own, but the family ``"gpt-4o"`` lives in - ``litellm_params["base_model"]`` and routes to ``openai_classic``. - """ - cfg = _make_cfg( - model_name="azure/prod-chat-001", - litellm_params={"base_model": "gpt-4o"}, - ) - assert _resolve_prompt_model_name(cfg, _FakeLLM("azure/prod-chat-001")) == "gpt-4o" - - -def test_falls_back_to_model_name_when_litellm_params_is_none() -> None: - cfg = _make_cfg( - model_name="anthropic/claude-3-5-sonnet", - litellm_params=None, - ) - got = _resolve_prompt_model_name(cfg, _FakeLLM("anthropic/claude-3-5-sonnet")) - assert got == "anthropic/claude-3-5-sonnet" - - -def test_handles_litellm_params_without_base_model_key() -> None: - cfg = _make_cfg( - model_name="openai/gpt-4o", - litellm_params={"temperature": 0.5}, - ) - assert _resolve_prompt_model_name(cfg, _FakeLLM("openai/gpt-4o")) == "openai/gpt-4o" - - -def test_ignores_blank_base_model() -> None: - """Whitespace-only ``base_model`` must not shadow ``model_name``.""" - cfg = _make_cfg( - model_name="openai/gpt-4o", - litellm_params={"base_model": " "}, - ) - assert _resolve_prompt_model_name(cfg, _FakeLLM("openai/gpt-4o")) == "openai/gpt-4o" - - -def test_ignores_non_string_base_model() -> None: - """Defensive: a non-string ``base_model`` should not crash the resolver.""" - cfg = _make_cfg( - model_name="openai/gpt-4o", - litellm_params={"base_model": 42}, - ) - assert _resolve_prompt_model_name(cfg, _FakeLLM("openai/gpt-4o")) == "openai/gpt-4o" - - -def test_falls_back_to_llm_model_when_no_agent_config() -> None: - """No ``agent_config`` -> use ``llm.model`` directly. Defensive path - for direct callers; production callers always supply a config.""" - assert ( - _resolve_prompt_model_name(None, _FakeLLM("openai/gpt-4o-mini")) - == "openai/gpt-4o-mini" - ) - - -def test_returns_none_when_nothing_available() -> None: - """``compose_system_prompt`` treats ``None`` as the ``"default"`` - variant and emits no provider block.""" - assert _resolve_prompt_model_name(None, _FakeLLM(None)) is None - - -def test_auto_mode_resolves_to_auto_string() -> None: - """Auto mode -> ``"auto"``. ``detect_provider_variant("auto")`` - returns ``"default"``, which is correct: the child model isn't - known until the LiteLLM Router dispatches.""" - cfg = AgentConfig.from_auto_mode() - assert _resolve_prompt_model_name(cfg, _FakeLLM("auto")) == "auto" diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py b/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py deleted file mode 100644 index 79d517d9a..000000000 --- a/surfsense_backend/tests/unit/agents/new_chat/test_specialized_subagents.py +++ /dev/null @@ -1,337 +0,0 @@ -"""Tests for the specialized subagents (explore / report_writer / connector_negotiator).""" - -from __future__ import annotations - -from langchain_core.tools import tool - -from app.agents.shared.middleware.permission import PermissionMiddleware -from app.agents.new_chat.subagents import ( - build_connector_negotiator_subagent, - build_explore_subagent, - build_report_writer_subagent, - build_specialized_subagents, -) -from app.agents.new_chat.subagents.config import ( - EXPLORE_READ_TOOLS, - REPORT_WRITER_TOOLS, - WRITE_TOOL_DENY_PATTERNS, -) - -# --------------------------------------------------------------------------- -# Fake tools used to verify filtering & permission behavior -# --------------------------------------------------------------------------- - - -@tool -def web_search(query: str) -> str: - """Search the public web.""" - return "" - - -@tool -def scrape_webpage(url: str) -> str: - """Scrape a single webpage.""" - return "" - - -@tool -def read_file(path: str) -> str: - """Read a file.""" - return "" - - -@tool -def ls_tree(path: str) -> str: - """List a tree.""" - return "" - - -@tool -def grep(pattern: str) -> str: - """Grep.""" - return "" - - -@tool -def update_memory(content: str) -> str: - """Update the user's memory.""" - return "" - - -@tool -def edit_file(path: str, old: str, new: str) -> str: - """Edit a file.""" - return "" - - -@tool -def linear_create_issue(title: str) -> str: - """Create a Linear issue.""" - return "" - - -@tool -def slack_send_message(channel: str, text: str) -> str: - """Send a Slack message.""" - return "" - - -@tool -def get_connected_accounts() -> str: - """List connected accounts.""" - return "" - - -@tool -def generate_report(topic: str) -> str: - """Generate a report artifact.""" - return "" - - -ALL_TOOLS = [ - web_search, - scrape_webpage, - read_file, - ls_tree, - grep, - update_memory, - edit_file, - linear_create_issue, - slack_send_message, - get_connected_accounts, - generate_report, -] - - -class TestExploreSubagent: - def test_only_read_tools_are_exposed(self) -> None: - spec = build_explore_subagent(tools=ALL_TOOLS) - names = {t.name for t in spec["tools"]} # type: ignore[index] - assert names == EXPLORE_READ_TOOLS & {t.name for t in ALL_TOOLS} - assert "update_memory" not in names - assert "linear_create_issue" not in names - assert "edit_file" not in names - - def test_includes_permission_middleware_with_deny_rules(self) -> None: - spec = build_explore_subagent(tools=ALL_TOOLS) - permission_mws = [ - m - for m in spec["middleware"] - if isinstance(m, PermissionMiddleware) # type: ignore[index] - ] - assert len(permission_mws) == 1 - ruleset = permission_mws[0]._static_rulesets[0] - assert ruleset.origin == "subagent_explore" - deny_patterns = {r.permission for r in ruleset.rules if r.action == "deny"} - assert "update_memory" in deny_patterns - assert "edit_file" in deny_patterns - assert "*create*" in deny_patterns - assert "*send*" in deny_patterns - - def test_skills_inherits_default_sources(self) -> None: - spec = build_explore_subagent(tools=ALL_TOOLS) - assert spec["skills"] == ["/skills/builtin/", "/skills/space/"] # type: ignore[index] - - def test_name_and_description_match_contract(self) -> None: - spec = build_explore_subagent(tools=ALL_TOOLS) - assert spec["name"] == "explore" - assert "read-only" in spec["description"].lower() - - def test_includes_dedup_and_patch_middleware(self) -> None: - from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware - - from app.agents.shared.middleware import DedupHITLToolCallsMiddleware - - spec = build_explore_subagent(tools=ALL_TOOLS) - types = {type(m) for m in spec["middleware"]} # type: ignore[index] - assert PatchToolCallsMiddleware in types - assert DedupHITLToolCallsMiddleware in types - - -class TestReportWriterSubagent: - def test_exposes_only_report_writing_tools(self) -> None: - spec = build_report_writer_subagent(tools=ALL_TOOLS) - names = {t.name for t in spec["tools"]} # type: ignore[index] - assert names == REPORT_WRITER_TOOLS & {t.name for t in ALL_TOOLS} - assert "generate_report" in names - assert "read_file" in names - - def test_deny_rules_block_writes_but_allow_generate_report(self) -> None: - spec = build_report_writer_subagent(tools=ALL_TOOLS) - permission_mws = [ - m - for m in spec["middleware"] - if isinstance(m, PermissionMiddleware) # type: ignore[index] - ] - ruleset = permission_mws[0]._static_rulesets[0] - deny_patterns = {r.permission for r in ruleset.rules if r.action == "deny"} - assert "update_memory" in deny_patterns - # generate_report MUST not be denied — it's the whole point of the subagent. - assert "generate_report" not in deny_patterns - # No deny pattern should match `generate_report` either. - assert all( - not _wildcard_matches(pattern, "generate_report") - for pattern in deny_patterns - ) - - -class TestConnectorNegotiatorSubagent: - def test_inherits_all_parent_tools(self) -> None: - spec = build_connector_negotiator_subagent(tools=ALL_TOOLS) - names = {t.name for t in spec["tools"]} # type: ignore[index] - # Every parent tool is inherited; the deny ruleset enforces behavior - # at execution time instead of trimming the tool list. - assert names == {t.name for t in ALL_TOOLS} - - def test_get_connected_accounts_is_present(self) -> None: - spec = build_connector_negotiator_subagent(tools=ALL_TOOLS) - names = {t.name for t in spec["tools"]} # type: ignore[index] - assert "get_connected_accounts" in names - - def test_deny_ruleset_blocks_mutating_connector_tools(self) -> None: - spec = build_connector_negotiator_subagent(tools=ALL_TOOLS) - permission_mws = [ - m - for m in spec["middleware"] - if isinstance(m, PermissionMiddleware) # type: ignore[index] - ] - ruleset = permission_mws[0]._static_rulesets[0] - deny_patterns = {r.permission for r in ruleset.rules if r.action == "deny"} - # `linear_create_issue` matches the `*_create` deny pattern. - assert any(_wildcard_matches(p, "linear_create_issue") for p in deny_patterns) - assert any(_wildcard_matches(p, "slack_send_message") for p in deny_patterns) - - -class TestBuildSpecializedSubagents: - def test_returns_five_specs(self) -> None: - specs = build_specialized_subagents(tools=ALL_TOOLS) - names = [s["name"] for s in specs] # type: ignore[index] - assert names == [ - "explore", - "report_writer", - "linear_specialist", - "slack_specialist", - "connector_negotiator", - ] - - def test_all_specs_have_unique_names(self) -> None: - specs = build_specialized_subagents(tools=ALL_TOOLS) - names = [s["name"] for s in specs] # type: ignore[index] - assert len(set(names)) == len(names) - - def test_extra_middleware_is_prepended_to_each_spec(self) -> None: - """Sentinel middleware passed via ``extra_middleware`` must appear - in each subagent's ``middleware`` list, before the local rules. - - This guards against the regression where specialized subagents - promised filesystem tools (``read_file``, ``ls``, ``grep``) in - their system prompts but had no filesystem middleware mounted. - """ - - class _Sentinel: - pass - - sentinel = _Sentinel() - specs = build_specialized_subagents( - tools=ALL_TOOLS, extra_middleware=[sentinel] - ) - for spec in specs: - mws = spec["middleware"] # type: ignore[index] - assert sentinel in mws - # The sentinel must appear *before* the permission middleware - # (subagent-local rules), preserving the documented composition - # order: extra → custom → patch → dedup. - sentinel_idx = mws.index(sentinel) - perm_idx = next( - (i for i, m in enumerate(mws) if isinstance(m, PermissionMiddleware)), - None, - ) - assert perm_idx is not None - assert sentinel_idx < perm_idx - - -class TestFilterToolsWarningSuppression: - """Names provided by middleware (read_file, ls, grep, …) must not - trigger the spurious "missing" warning in :func:`_filter_tools`.""" - - def test_middleware_provided_names_are_silent(self, caplog) -> None: - import logging - - from app.agents.new_chat.subagents.config import _filter_tools - - with caplog.at_level( - logging.INFO, logger="app.agents.new_chat.subagents.config" - ): - # Allowed set asks for two registry tools (one present, one - # not) plus a bunch of middleware-provided names. - _filter_tools( - [web_search], - allowed_names={ - "web_search", - "scrape_webpage", # legitimately missing → should warn - "read_file", # mw-provided → suppressed - "ls", - "grep", - "glob", - "write_todos", - }, - ) - - warnings = [r.message for r in caplog.records if r.levelno >= logging.INFO] - # Exactly one warning, and it should mention scrape_webpage but not - # any middleware-provided name. Inspect the rendered "missing" - # list (between the brackets) so we don't false-match substrings - # like ``ls`` inside ``available``. - assert len(warnings) == 1, warnings - msg = warnings[0] - assert "scrape_webpage" in msg - bracket_section = msg.split("missing: ", 1)[1] - for noisy in ("read_file", "ls", "grep", "glob", "write_todos"): - assert f"'{noisy}'" not in bracket_section, msg - - -class TestDenyPatternsCoverage: - def test_deny_patterns_cover_canonical_write_tools(self) -> None: - canonical_writes = [ - "update_memory", - "edit_file", - "write_file", - "move_file", - "mkdir", - "linear_create_issue", - "linear_update_issue", - "linear_delete_issue", - "slack_send_message", - "create_index", - "update_account", - "delete_record", - "send_email", - ] - for tool_name in canonical_writes: - assert any( - _wildcard_matches(pattern, tool_name) - for pattern in WRITE_TOOL_DENY_PATTERNS - ), f"no deny pattern matches {tool_name!r}" - - def test_deny_patterns_do_not_match_safe_read_tools(self) -> None: - canonical_reads = [ - "read_file", - "ls_tree", - "grep", - "web_search", - "scrape_webpage", - "get_connected_accounts", - "generate_report", - ] - for tool_name in canonical_reads: - assert not any( - _wildcard_matches(pattern, tool_name) - for pattern in WRITE_TOOL_DENY_PATTERNS - ), f"deny pattern incorrectly matches read tool {tool_name!r}" - - -def _wildcard_matches(pattern: str, value: str) -> bool: - """Helper using the same matcher the rule evaluator does.""" - from app.agents.shared.permissions import wildcard_match - - return wildcard_match(value, pattern) diff --git a/surfsense_backend/tests/unit/test_stream_new_chat_contract.py b/surfsense_backend/tests/unit/test_stream_new_chat_contract.py index 9b29fdd6a..8ff576e2d 100644 --- a/surfsense_backend/tests/unit/test_stream_new_chat_contract.py +++ b/surfsense_backend/tests/unit/test_stream_new_chat_contract.py @@ -436,39 +436,3 @@ def test_turn_status_sse_contract_exists(): assert 'type: "data-turn-status"' in state_source assert 'case "data-turn-status":' in pipeline_source assert "end_turn(str(chat_id))" in stream_source - - -def test_chat_deepagent_forwards_resolved_model_name_to_both_builders(): - """Regression guard: both system-prompt builders in chat_deepagent.py - must receive ``model_name=_resolve_prompt_model_name(...)`` so the - provider-variant dispatch can render the right ```` - block. Without this the prompt silently falls back to the empty - ``"default"`` variant — the original bug being fixed. - - This test mirrors :func:`test_stream_error_emission_keeps_machine_error_codes` - in style: it inspects module source text + a regex to enforce the - call-site shape, not just the wrapper layer (the wrappers already - forward ``model_name`` correctly, so testing them would not catch - the actual missed plumbing). - """ - import app.agents.new_chat.chat_deepagent as chat_deepagent_module - - source = inspect.getsource(chat_deepagent_module) - - # Helper itself must be defined. - assert "def _resolve_prompt_model_name(" in source - - # Both builder calls must forward the resolved model name. Match - # across newlines + whitespace because the kwargs are split over - # multiple lines. - pattern = re.compile( - r"build_(?:surfsense|configurable)_system_prompt\([^)]*" - r"model_name=_resolve_prompt_model_name\(", - re.DOTALL, - ) - matches = pattern.findall(source) - assert len(matches) == 2, ( - "Expected both system-prompt builder call sites to forward " - "`model_name=_resolve_prompt_model_name(...)`, found " - f"{len(matches)}" - )