Merge remote-tracking branch 'upstream/dev' into fix/sensitive-actions

2026-05-15 18:25:18 +02:00 · 2026-04-14 04:10:00 +05:30 · 2026-04-14 04:10:00 +05:30 · 136901276a
commit 136901276a
parent 3f68962772 7c4d1a6af6
126 changed files with 26088 additions and 380 deletions
--- a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
@ -7,10 +7,12 @@ This middleware customizes prompts and persists write/edit operations for
 from __future__ import annotations

 import asyncio
+import logging
 import re
 from datetime import UTC, datetime
 from typing import Annotated, Any

+from daytona.common.errors import DaytonaError
 from deepagents import FilesystemMiddleware
 from deepagents.backends.protocol import EditResult, WriteResult
 from deepagents.backends.utils import validate_path
@ -23,6 +25,11 @@ from langchain_core.tools import BaseTool, StructuredTool
 from langgraph.types import Command
 from sqlalchemy import delete, select

+from app.agents.new_chat.sandbox import (
+    _evict_sandbox_cache,
+    get_or_create_sandbox,
+    is_sandbox_enabled,
+)
 from app.db import Chunk, Document, DocumentType, Folder, shielded_async_session
 from app.indexing_pipeline.document_chunker import chunk_text
 from app.utils.document_converters import (
@ -31,6 +38,8 @@ from app.utils.document_converters import (
    generate_unique_identifier_hash,
 )

+logger = logging.getLogger(__name__)
+
 # =============================================================================
 # System Prompt (injected into every model call by wrap_model_call)
 # =============================================================================
@ -40,7 +49,7 @@ SURFSENSE_FILESYSTEM_SYSTEM_PROMPT = """## Following Conventions
 - Read files before editing — understand existing content before making changes.
 - Mimic existing style, naming conventions, and patterns.

-## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`, `save_document`
+## Filesystem Tools

 All file paths must start with a `/`.
 - ls: list files and directories at a given path.
@ -128,6 +137,24 @@ SURFSENSE_GREP_TOOL_DESCRIPTION = """Search for a literal text pattern across fi
 Use this to locate relevant document files/chunks before reading full files.
 """

+SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION = """Executes Python code in an isolated sandbox environment.
+
+Common data-science packages are pre-installed (pandas, numpy, matplotlib,
+scipy, scikit-learn).
+
+When to use this tool: use execute_code for numerical computation, data
+analysis, statistics, and any task that benefits from running Python code.
+Never perform arithmetic manually when this tool is available.
+
+Usage notes:
+- No outbound network access.
+- Returns combined stdout/stderr with exit code.
+- Use print() to produce output.
+- You can create files, run shell commands via subprocess or os.system(),
+  and use any standard library module.
+- Use the optional timeout parameter to override the default timeout.
+"""
+
 SURFSENSE_SAVE_DOCUMENT_TOOL_DESCRIPTION = """Permanently saves a document to the user's knowledge base.

 This is an expensive operation — it creates a new Document record in the
@ -148,17 +175,36 @@ Args:
 class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
    """SurfSense-specific filesystem middleware with DB persistence for docs."""

+    _MAX_EXECUTE_TIMEOUT = 300
+
    def __init__(
        self,
        *,
        search_space_id: int | None = None,
        created_by_id: str | None = None,
+        thread_id: int | str | None = None,
        tool_token_limit_before_evict: int | None = 20000,
    ) -> None:
        self._search_space_id = search_space_id
        self._created_by_id = created_by_id
+        self._thread_id = thread_id
+        self._sandbox_available = is_sandbox_enabled() and thread_id is not None
+
+        system_prompt = SURFSENSE_FILESYSTEM_SYSTEM_PROMPT
+        if self._sandbox_available:
+            system_prompt += (
+                "\n- execute_code: run Python code in an isolated sandbox."
+                "\n\n## Code Execution"
+                "\n\nUse execute_code whenever a task benefits from running code."
+                " Never perform arithmetic manually."
+                "\n\nDocuments here are XML-wrapped markdown, not raw data files."
+                " To work with them programmatically, read the document first,"
+                " extract the data, write it as a clean file (CSV, JSON, etc.),"
+                " and then run your code against it."
+            )
+
        super().__init__(
-            system_prompt=SURFSENSE_FILESYSTEM_SYSTEM_PROMPT,
+            system_prompt=system_prompt,
            custom_tool_descriptions={
                "ls": SURFSENSE_LIST_FILES_TOOL_DESCRIPTION,
                "read_file": SURFSENSE_READ_FILE_TOOL_DESCRIPTION,
@ -168,10 +214,12 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
                "grep": SURFSENSE_GREP_TOOL_DESCRIPTION,
            },
            tool_token_limit_before_evict=tool_token_limit_before_evict,
+            max_execute_timeout=self._MAX_EXECUTE_TIMEOUT,
        )
-        # Remove the execute tool (no sandbox backend)
        self.tools = [t for t in self.tools if t.name != "execute"]
        self.tools.append(self._create_save_document_tool())
+        if self._sandbox_available:
+            self.tools.append(self._create_execute_code_tool())

    @staticmethod
    def _run_async_blocking(coro: Any) -> Any:
@ -455,6 +503,108 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
            coroutine=async_save_document,
        )

+    def _create_execute_code_tool(self) -> BaseTool:
+        """Create execute_code tool backed by a Daytona sandbox."""
+
+        def sync_execute_code(
+            command: Annotated[
+                str, "Python code to execute. Use print() to see output."
+            ],
+            runtime: ToolRuntime[None, FilesystemState],
+            timeout: Annotated[
+                int | None,
+                "Optional timeout in seconds.",
+            ] = None,
+        ) -> str:
+            if timeout is not None:
+                if timeout < 0:
+                    return f"Error: timeout must be non-negative, got {timeout}."
+                if timeout > self._MAX_EXECUTE_TIMEOUT:
+                    return f"Error: timeout {timeout}s exceeds maximum ({self._MAX_EXECUTE_TIMEOUT}s)."
+            return self._run_async_blocking(
+                self._execute_in_sandbox(command, runtime, timeout)
+            )
+
+        async def async_execute_code(
+            command: Annotated[
+                str, "Python code to execute. Use print() to see output."
+            ],
+            runtime: ToolRuntime[None, FilesystemState],
+            timeout: Annotated[
+                int | None,
+                "Optional timeout in seconds.",
+            ] = None,
+        ) -> str:
+            if timeout is not None:
+                if timeout < 0:
+                    return f"Error: timeout must be non-negative, got {timeout}."
+                if timeout > self._MAX_EXECUTE_TIMEOUT:
+                    return f"Error: timeout {timeout}s exceeds maximum ({self._MAX_EXECUTE_TIMEOUT}s)."
+            return await self._execute_in_sandbox(command, runtime, timeout)
+
+        return StructuredTool.from_function(
+            name="execute_code",
+            description=SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION,
+            func=sync_execute_code,
+            coroutine=async_execute_code,
+        )
+
+    @staticmethod
+    def _wrap_as_python(code: str) -> str:
+        """Wrap Python code in a shell invocation for the sandbox."""
+        return f"python3 << 'PYEOF'\n{code}\nPYEOF"
+
+    async def _execute_in_sandbox(
+        self,
+        command: str,
+        runtime: ToolRuntime[None, FilesystemState],
+        timeout: int | None,
+    ) -> str:
+        """Core logic: get sandbox, sync files, run command, handle retries."""
+        assert self._thread_id is not None
+        command = self._wrap_as_python(command)
+
+        try:
+            return await self._try_sandbox_execute(command, runtime, timeout)
+        except (DaytonaError, Exception) as first_err:
+            logger.warning(
+                "Sandbox execute failed for thread %s, retrying: %s",
+                self._thread_id,
+                first_err,
+            )
+            _evict_sandbox_cache(self._thread_id)
+            try:
+                return await self._try_sandbox_execute(command, runtime, timeout)
+            except Exception:
+                logger.exception(
+                    "Sandbox retry also failed for thread %s", self._thread_id
+                )
+                return "Error: Code execution is temporarily unavailable. Please try again."
+
+    async def _try_sandbox_execute(
+        self,
+        command: str,
+        runtime: ToolRuntime[None, FilesystemState],
+        timeout: int | None,
+    ) -> str:
+        sandbox, is_new = await get_or_create_sandbox(self._thread_id)
+        # files = runtime.state.get("files") or {}
+        # await sync_files_to_sandbox(self._thread_id, files, sandbox, is_new)
+        result = await sandbox.aexecute(command, timeout=timeout)
+        output = (result.output or "").strip()
+        if not output and result.exit_code == 0:
+            return (
+                "[Code executed successfully but produced no output. "
+                "Use print() to display results, then try again.]"
+            )
+        parts = [result.output]
+        if result.exit_code is not None:
+            status = "succeeded" if result.exit_code == 0 else "failed"
+            parts.append(f"\n[Command {status} with exit code {result.exit_code}]")
+        if result.truncated:
+            parts.append("\n[Output was truncated due to size limits]")
+        return "".join(parts)
+
    def _create_write_file_tool(self) -> BaseTool:
        """Create write_file — ephemeral for /documents/*, persisted otherwise."""
        tool_description = (
--- a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py
@ -774,11 +774,16 @@ class KnowledgeBaseSearchMiddleware(AgentMiddleware):  # type: ignore[type-arg]
        messages = state.get("messages") or []
        if not messages:
            return None
-        last_message = messages[-1]
-        if not isinstance(last_message, HumanMessage):
+
+        last_human = None
+        for msg in reversed(messages):
+            if isinstance(msg, HumanMessage):
+                last_human = msg
+                break
+        if last_human is None:
            return None

-        user_text = _extract_text_from_message(last_message).strip()
+        user_text = _extract_text_from_message(last_human).strip()
        if not user_text:
            return None