From 9e8ea1fd1c3a6bbb0899a9e5f92427fa94ada9ba Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 16:23:40 +0200
Subject: [PATCH 01/14] harden sandbox: security params, file sync, path
 traversal fix

---
 .../app/agents/new_chat/sandbox.py            | 111 ++++++++++++++----
 1 file changed, 89 insertions(+), 22 deletions(-)

diff --git a/surfsense_backend/app/agents/new_chat/sandbox.py b/surfsense_backend/app/agents/new_chat/sandbox.py
index 8b634993b..79947de2b 100644
--- a/surfsense_backend/app/agents/new_chat/sandbox.py
+++ b/surfsense_backend/app/agents/new_chat/sandbox.py
@@ -42,7 +42,7 @@ class _TimeoutAwareSandbox(DaytonaSandbox):
     """
 
     def execute(self, command: str, *, timeout: int | None = None) -> ExecuteResponse:
-        t = timeout if timeout is not None else self._timeout
+        t = timeout if timeout is not None else self._default_timeout
         result = self._sandbox.process.exec(command, timeout=t)
         return ExecuteResponse(
             output=result.result,
@@ -58,8 +58,10 @@ class _TimeoutAwareSandbox(DaytonaSandbox):
 
 _daytona_client: Daytona | None = None
 _sandbox_cache: dict[str, _TimeoutAwareSandbox] = {}
+_seeded_files: dict[str, dict[str, str]] = {}
 _SANDBOX_CACHE_MAX_SIZE = 20
 THREAD_LABEL_KEY = "surfsense_thread"
+SANDBOX_DOCUMENTS_ROOT = "/home/daytona/documents"
 
 
 def is_sandbox_enabled() -> bool:
@@ -78,14 +80,29 @@ def _get_client() -> Daytona:
     return _daytona_client
 
 
-def _find_or_create(thread_id: str) -> _TimeoutAwareSandbox:
+def _sandbox_create_params(
+    labels: dict[str, str],
+) -> CreateSandboxFromSnapshotParams:
+    snapshot_id = os.environ.get("DAYTONA_SNAPSHOT_ID") or None
+    return CreateSandboxFromSnapshotParams(
+        language="python",
+        labels=labels,
+        snapshot=snapshot_id,
+        network_block_all=True,
+        auto_stop_interval=10,
+        auto_delete_interval=60,
+    )
+
+
+def _find_or_create(thread_id: str) -> tuple[_TimeoutAwareSandbox, bool]:
     """Find an existing sandbox for *thread_id*, or create a new one.
 
-    If an existing sandbox is found but is stopped/archived, it will be
-    restarted automatically before returning.
+    Returns a tuple of (sandbox, is_new) where *is_new* is True when a
+    fresh sandbox was created (first time or replacement after failure).
     """
     client = _get_client()
     labels = {THREAD_LABEL_KEY: thread_id}
+    is_new = False
 
     try:
         sandbox = client.find_one(labels=labels)
@@ -109,41 +126,39 @@ def _find_or_create(thread_id: str) -> _TimeoutAwareSandbox:
                 sandbox.id,
                 sandbox.state,
             )
-            sandbox = client.create(
-                CreateSandboxFromSnapshotParams(language="python", labels=labels)
-            )
+            sandbox = client.create(_sandbox_create_params(labels))
+            is_new = True
             logger.info("Created replacement sandbox: %s", sandbox.id)
         elif sandbox.state != SandboxState.STARTED:
             sandbox.wait_for_sandbox_start(timeout=60)
 
     except Exception:
         logger.info("No existing sandbox for thread %s — creating one", thread_id)
-        sandbox = client.create(
-            CreateSandboxFromSnapshotParams(language="python", labels=labels)
-        )
+        sandbox = client.create(_sandbox_create_params(labels))
+        is_new = True
         logger.info("Created new sandbox: %s", sandbox.id)
 
-    return _TimeoutAwareSandbox(sandbox=sandbox)
+    return _TimeoutAwareSandbox(sandbox=sandbox), is_new
 
 
-async def get_or_create_sandbox(thread_id: int | str) -> _TimeoutAwareSandbox:
+async def get_or_create_sandbox(
+    thread_id: int | str,
+) -> tuple[_TimeoutAwareSandbox, bool]:
     """Get or create a sandbox for a conversation thread.
 
     Uses an in-process cache keyed by thread_id so subsequent messages
     in the same conversation reuse the sandbox object without an API call.
 
-    Args:
-        thread_id: The conversation thread identifier.
-
     Returns:
-        DaytonaSandbox connected to the sandbox.
+        Tuple of (sandbox, is_new). *is_new* is True when a fresh sandbox
+        was created, signalling that file tracking should be reset.
     """
     key = str(thread_id)
     cached = _sandbox_cache.get(key)
     if cached is not None:
         logger.info("Reusing cached sandbox for thread %s", key)
-        return cached
-    sandbox = await asyncio.to_thread(_find_or_create, key)
+        return cached, False
+    sandbox, is_new = await asyncio.to_thread(_find_or_create, key)
     _sandbox_cache[key] = sandbox
 
     if len(_sandbox_cache) > _SANDBOX_CACHE_MAX_SIZE:
@@ -151,12 +166,60 @@ async def get_or_create_sandbox(thread_id: int | str) -> _TimeoutAwareSandbox:
         _sandbox_cache.pop(oldest_key, None)
         logger.debug("Evicted oldest sandbox cache entry: %s", oldest_key)
 
-    return sandbox
+    return sandbox, is_new
+
+
+async def sync_files_to_sandbox(
+    thread_id: int | str,
+    files: dict[str, dict],
+    sandbox: _TimeoutAwareSandbox,
+    is_new: bool,
+) -> None:
+    """Upload new or changed virtual-filesystem files to the sandbox.
+
+    Compares *files* (from ``state["files"]``) against the ``_seeded_files``
+    tracking dict and uploads only what has changed.  When *is_new* is True
+    the tracking is reset so every file is re-uploaded.
+    """
+    key = str(thread_id)
+    if is_new:
+        _seeded_files.pop(key, None)
+
+    tracked = _seeded_files.get(key, {})
+    to_upload: list[tuple[str, bytes]] = []
+
+    for vpath, fdata in files.items():
+        modified_at = fdata.get("modified_at", "")
+        if tracked.get(vpath) == modified_at:
+            continue
+        content = "\n".join(fdata.get("content", []))
+        sandbox_path = f"{SANDBOX_DOCUMENTS_ROOT}{vpath}"
+        to_upload.append((sandbox_path, content.encode("utf-8")))
+
+    if not to_upload:
+        return
+
+    def _upload() -> None:
+        sandbox.upload_files(to_upload)
+
+    await asyncio.to_thread(_upload)
+
+    new_tracked = dict(tracked)
+    for vpath, fdata in files.items():
+        new_tracked[vpath] = fdata.get("modified_at", "")
+    _seeded_files[key] = new_tracked
+    logger.info("Synced %d file(s) to sandbox for thread %s", len(to_upload), key)
+
+
+def _evict_sandbox_cache(thread_id: int | str) -> None:
+    key = str(thread_id)
+    _sandbox_cache.pop(key, None)
+    _seeded_files.pop(key, None)
 
 
 async def delete_sandbox(thread_id: int | str) -> None:
     """Delete the sandbox for a conversation thread."""
-    _sandbox_cache.pop(str(thread_id), None)
+    _evict_sandbox_cache(thread_id)
 
     def _delete() -> None:
         client = _get_client()
@@ -193,7 +256,11 @@ def _get_sandbox_files_dir() -> Path:
 def _local_path_for(thread_id: int | str, sandbox_path: str) -> Path:
     """Map a sandbox-internal absolute path to a local filesystem path."""
     relative = sandbox_path.lstrip("/")
-    return _get_sandbox_files_dir() / str(thread_id) / relative
+    base = (_get_sandbox_files_dir() / str(thread_id)).resolve()
+    target = (base / relative).resolve()
+    if not target.is_relative_to(base):
+        raise ValueError(f"Path traversal blocked: {sandbox_path}")
+    return target
 
 
 def get_local_sandbox_file(thread_id: int | str, sandbox_path: str) -> bytes | None:
@@ -226,7 +293,7 @@ async def persist_and_delete_sandbox(
     Per-file errors are logged but do **not** prevent the sandbox from
     being deleted — freeing Daytona storage is the priority.
     """
-    _sandbox_cache.pop(str(thread_id), None)
+    _evict_sandbox_cache(thread_id)
 
     def _persist_and_delete() -> None:
         client = _get_client()

From a7e70020b1e9d1a053505cfe46b5d05369915537 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 16:23:47 +0200
Subject: [PATCH 02/14] add execute_code tool with sandbox integration

---
 .../agents/new_chat/middleware/filesystem.py  | 135 +++++++++++++++++-
 1 file changed, 132 insertions(+), 3 deletions(-)

diff --git a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
index d7697ef15..27bb5ce63 100644
--- a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
@@ -7,10 +7,12 @@ This middleware customizes prompts and persists write/edit operations for
 from __future__ import annotations
 
 import asyncio
+import logging
 import re
 from datetime import UTC, datetime
 from typing import Annotated, Any
 
+from daytona.common.errors import DaytonaError
 from deepagents import FilesystemMiddleware
 from deepagents.backends.protocol import EditResult, WriteResult
 from deepagents.backends.utils import validate_path
@@ -23,6 +25,12 @@ from langchain_core.tools import BaseTool, StructuredTool
 from langgraph.types import Command
 from sqlalchemy import delete, select
 
+from app.agents.new_chat.sandbox import (
+    _evict_sandbox_cache,
+    get_or_create_sandbox,
+    is_sandbox_enabled,
+    sync_files_to_sandbox,
+)
 from app.db import Chunk, Document, DocumentType, Folder, shielded_async_session
 from app.indexing_pipeline.document_chunker import chunk_text
 from app.utils.document_converters import (
@@ -31,6 +39,8 @@ from app.utils.document_converters import (
     generate_unique_identifier_hash,
 )
 
+logger = logging.getLogger(__name__)
+
 # =============================================================================
 # System Prompt (injected into every model call by wrap_model_call)
 # =============================================================================
@@ -40,7 +50,7 @@ SURFSENSE_FILESYSTEM_SYSTEM_PROMPT = """## Following Conventions
 - Read files before editing — understand existing content before making changes.
 - Mimic existing style, naming conventions, and patterns.
 
-## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`, `save_document`
+## Filesystem Tools
 
 All file paths must start with a `/`.
 - ls: list files and directories at a given path.
@@ -128,6 +138,21 @@ SURFSENSE_GREP_TOOL_DESCRIPTION = """Search for a literal text pattern across fi
 Use this to locate relevant document files/chunks before reading full files.
 """
 
+SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION = """Executes a shell command in an isolated sandbox environment.
+
+The sandbox runs Python with common data-science packages pre-installed
+(pandas, numpy, matplotlib, scipy, scikit-learn).
+
+Knowledge base documents from your conversation are automatically available
+as XML files under /home/daytona/documents/.
+
+Usage notes:
+- Commands run in an isolated sandbox with no outbound network access.
+- Returns combined stdout/stderr output with exit code.
+- Use the optional timeout parameter to override the default timeout.
+- When issuing multiple commands, use ';' or '&&' to chain them.
+"""
+
 SURFSENSE_SAVE_DOCUMENT_TOOL_DESCRIPTION = """Permanently saves a document to the user's knowledge base.
 
 This is an expensive operation — it creates a new Document record in the
@@ -148,17 +173,29 @@ Args:
 class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
     """SurfSense-specific filesystem middleware with DB persistence for docs."""
 
+    _MAX_EXECUTE_TIMEOUT = 300
+
     def __init__(
         self,
         *,
         search_space_id: int | None = None,
         created_by_id: str | None = None,
+        thread_id: int | str | None = None,
         tool_token_limit_before_evict: int | None = 20000,
     ) -> None:
         self._search_space_id = search_space_id
         self._created_by_id = created_by_id
+        self._thread_id = thread_id
+        self._sandbox_available = is_sandbox_enabled() and thread_id is not None
+
+        system_prompt = SURFSENSE_FILESYSTEM_SYSTEM_PROMPT
+        if self._sandbox_available:
+            system_prompt += (
+                "\n- execute_code: run shell commands in an isolated Python sandbox."
+            )
+
         super().__init__(
-            system_prompt=SURFSENSE_FILESYSTEM_SYSTEM_PROMPT,
+            system_prompt=system_prompt,
             custom_tool_descriptions={
                 "ls": SURFSENSE_LIST_FILES_TOOL_DESCRIPTION,
                 "read_file": SURFSENSE_READ_FILE_TOOL_DESCRIPTION,
@@ -168,10 +205,12 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
                 "grep": SURFSENSE_GREP_TOOL_DESCRIPTION,
             },
             tool_token_limit_before_evict=tool_token_limit_before_evict,
+            max_execute_timeout=self._MAX_EXECUTE_TIMEOUT,
         )
-        # Remove the execute tool (no sandbox backend)
         self.tools = [t for t in self.tools if t.name != "execute"]
         self.tools.append(self._create_save_document_tool())
+        if self._sandbox_available:
+            self.tools.append(self._create_execute_code_tool())
 
     @staticmethod
     def _run_async_blocking(coro: Any) -> Any:
@@ -455,6 +494,96 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
             coroutine=async_save_document,
         )
 
+    def _create_execute_code_tool(self) -> BaseTool:
+        """Create execute_code tool backed by a Daytona sandbox."""
+
+        def sync_execute_code(
+            command: Annotated[
+                str, "Shell command to execute in the sandbox environment."
+            ],
+            runtime: ToolRuntime[None, FilesystemState],
+            timeout: Annotated[
+                int | None,
+                "Optional timeout in seconds for this command.",
+            ] = None,
+        ) -> str:
+            if timeout is not None:
+                if timeout < 0:
+                    return f"Error: timeout must be non-negative, got {timeout}."
+                if timeout > self._MAX_EXECUTE_TIMEOUT:
+                    return f"Error: timeout {timeout}s exceeds maximum ({self._MAX_EXECUTE_TIMEOUT}s)."
+            return self._run_async_blocking(
+                self._execute_in_sandbox(command, runtime, timeout)
+            )
+
+        async def async_execute_code(
+            command: Annotated[
+                str, "Shell command to execute in the sandbox environment."
+            ],
+            runtime: ToolRuntime[None, FilesystemState],
+            timeout: Annotated[
+                int | None,
+                "Optional timeout in seconds for this command.",
+            ] = None,
+        ) -> str:
+            if timeout is not None:
+                if timeout < 0:
+                    return f"Error: timeout must be non-negative, got {timeout}."
+                if timeout > self._MAX_EXECUTE_TIMEOUT:
+                    return f"Error: timeout {timeout}s exceeds maximum ({self._MAX_EXECUTE_TIMEOUT}s)."
+            return await self._execute_in_sandbox(command, runtime, timeout)
+
+        return StructuredTool.from_function(
+            name="execute_code",
+            description=SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION,
+            func=sync_execute_code,
+            coroutine=async_execute_code,
+        )
+
+    async def _execute_in_sandbox(
+        self,
+        command: str,
+        runtime: ToolRuntime[None, FilesystemState],
+        timeout: int | None,
+    ) -> str:
+        """Core logic: get sandbox, sync files, run command, handle retries."""
+        assert self._thread_id is not None
+
+        try:
+            return await self._try_sandbox_execute(command, runtime, timeout)
+        except (DaytonaError, Exception) as first_err:
+            logger.warning(
+                "Sandbox execute failed for thread %s, retrying: %s",
+                self._thread_id,
+                first_err,
+            )
+            _evict_sandbox_cache(self._thread_id)
+            try:
+                return await self._try_sandbox_execute(command, runtime, timeout)
+            except Exception:
+                logger.exception(
+                    "Sandbox retry also failed for thread %s", self._thread_id
+                )
+                return "Error: Code execution is temporarily unavailable. Please try again."
+
+    async def _try_sandbox_execute(
+        self,
+        command: str,
+        runtime: ToolRuntime[None, FilesystemState],
+        timeout: int | None,
+    ) -> str:
+        sandbox, is_new = await get_or_create_sandbox(self._thread_id)
+        files = runtime.state.get("files") or {}
+        await sync_files_to_sandbox(self._thread_id, files, sandbox, is_new)
+        result = await sandbox.aexecute(command, timeout=timeout)
+        parts = [result.output]
+        if result.exit_code is not None:
+            status = "succeeded" if result.exit_code == 0 else "failed"
+            parts.append(f"\n[Command {status} with exit code {result.exit_code}]")
+        if result.truncated:
+            parts.append("\n[Output was truncated due to size limits]")
+        return "".join(parts)
+
     def _create_write_file_tool(self) -> BaseTool:
         """Create write_file — ephemeral for /documents/*, persisted otherwise."""
         tool_description = (

From 823e3ebb1dc9d9a25b8e6f609ed4d2fc745ea086 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 16:23:53 +0200
Subject: [PATCH 03/14] pass thread_id to filesystem middleware

---
 surfsense_backend/app/agents/new_chat/chat_deepagent.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
index 6ff98badf..9bf38cad6 100644
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@@ -439,6 +439,7 @@ async def create_surfsense_deep_agent(
         SurfSenseFilesystemMiddleware(
             search_space_id=search_space_id,
             created_by_id=user_id,
+            thread_id=thread_id,
         ),
         create_summarization_middleware(llm, StateBackend),
         PatchToolCallsMiddleware(),
@@ -466,6 +467,7 @@ async def create_surfsense_deep_agent(
         SurfSenseFilesystemMiddleware(
             search_space_id=search_space_id,
             created_by_id=user_id,
+            thread_id=thread_id,
         ),
         SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]),
         create_summarization_middleware(llm, StateBackend),

From 1e74c87aa14d63690523b82e77075ffdde26dfe8 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 16:23:58 +0200
Subject: [PATCH 04/14] add Daytona sandbox env vars to .env.example

---
 surfsense_backend/.env.example | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example
index 8c8587cea..1f998d01a 100644
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@@ -197,6 +197,13 @@ LLAMA_CLOUD_API_KEY=llx-nnn
 # AZURE_DI_ENDPOINT=https://your-resource.cognitiveservices.azure.com/
 # AZURE_DI_KEY=your-key
 
+# Daytona Sandbox (isolated code execution)
+# DAYTONA_SANDBOX_ENABLED=FALSE
+# DAYTONA_API_KEY=your-daytona-api-key
+# DAYTONA_API_URL=https://app.daytona.io/api
+# DAYTONA_TARGET=us
+# DAYTONA_SNAPSHOT_ID=
+
 # OPTIONAL: Add these for LangSmith Observability
 LANGSMITH_TRACING=true
 LANGSMITH_ENDPOINT=https://api.smith.langchain.com

From c72b737a8179f1ee40f183f6ae62d0981cf2fba9 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 16:30:27 +0200
Subject: [PATCH 05/14] add snapshot creation script for surfsense-sandbox

---
 .../scripts/create_sandbox_snapshot.py        | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 surfsense_backend/scripts/create_sandbox_snapshot.py

diff --git a/surfsense_backend/scripts/create_sandbox_snapshot.py b/surfsense_backend/scripts/create_sandbox_snapshot.py
new file mode 100644
index 000000000..f1c1920db
--- /dev/null
+++ b/surfsense_backend/scripts/create_sandbox_snapshot.py
@@ -0,0 +1,46 @@
+"""Create the Daytona snapshot used by SurfSense sandboxes.
+
+Usage:
+    uv run python scripts/create_sandbox_snapshot.py
+
+Requires DAYTONA_API_KEY (and optionally DAYTONA_API_URL / DAYTONA_TARGET)
+to be set in the environment or in a .env file.
+"""
+
+import os
+import sys
+
+from daytona import CreateSnapshotParams, Daytona, DaytonaConfig, Image
+
+SNAPSHOT_NAME = "surfsense-sandbox"
+
+PACKAGES = [
+    "pandas",
+    "numpy",
+    "matplotlib",
+    "scipy",
+    "scikit-learn",
+]
+
+
+def main() -> None:
+    config = DaytonaConfig(
+        api_key=os.environ.get("DAYTONA_API_KEY", ""),
+        api_url=os.environ.get("DAYTONA_API_URL", "https://app.daytona.io/api"),
+        target=os.environ.get("DAYTONA_TARGET", "us"),
+    )
+    daytona = Daytona(config)
+
+    image = Image.debian_slim("3.12").pip_install(*PACKAGES)
+
+    print(f"Creating snapshot '{SNAPSHOT_NAME}' with packages: {', '.join(PACKAGES)}")
+    snapshot = daytona.snapshot.create(
+        CreateSnapshotParams(name=SNAPSHOT_NAME, image=image),
+        on_logs=lambda chunk: print(chunk, end=""),
+    )
+    print(f"\nSnapshot created: {snapshot.name}")
+    print(f"Set DAYTONA_SNAPSHOT_ID={snapshot.name} in your .env")
+
+
+if __name__ == "__main__":
+    sys.exit(main() or 0)

From c9e3b1a08aafe8bcbc27c356852a0cc17f05f0a4 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 18:03:16 +0200
Subject: [PATCH 06/14] fix KB search skipping when system message follows user
 message

---
 .../agents/new_chat/middleware/knowledge_search.py    | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py
index 7b0dd2f71..06ed4ad80 100644
--- a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py
@@ -774,11 +774,16 @@ class KnowledgeBaseSearchMiddleware(AgentMiddleware):  # type: ignore[type-arg]
         messages = state.get("messages") or []
         if not messages:
             return None
-        last_message = messages[-1]
-        if not isinstance(last_message, HumanMessage):
+
+        last_human = None
+        for msg in reversed(messages):
+            if isinstance(msg, HumanMessage):
+                last_human = msg
+                break
+        if last_human is None:
             return None
 
-        user_text = _extract_text_from_message(last_message).strip()
+        user_text = _extract_text_from_message(last_human).strip()
         if not user_text:
             return None
 

From bb41c09eef421821ecfecdb6232cfd376769a97d Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 18:03:22 +0200
Subject: [PATCH 07/14] clean execute_code tool description, remove sandbox
 internals

---
 .../agents/new_chat/middleware/filesystem.py  | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
index 27bb5ce63..bba7ff6a8 100644
--- a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
@@ -138,19 +138,22 @@ SURFSENSE_GREP_TOOL_DESCRIPTION = """Search for a literal text pattern across fi
 Use this to locate relevant document files/chunks before reading full files.
 """
 
-SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION = """Executes a shell command in an isolated sandbox environment.
+SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION = """Executes a shell command in an isolated code execution environment.
 
-The sandbox runs Python with common data-science packages pre-installed
-(pandas, numpy, matplotlib, scipy, scikit-learn).
+Common data-science packages are pre-installed (pandas, numpy, matplotlib,
+scipy, scikit-learn). Documents from the conversation are automatically
+made available — run `ls` to discover them.
 
-Knowledge base documents from your conversation are automatically available
-as XML files under /home/daytona/documents/.
+When to use this tool: always use execute_code for any task involving
+numerical computation, data analysis, aggregation, or statistics. Write
+Python code to compute results and present the verified output. Never
+perform arithmetic manually when this tool is available.
 
 Usage notes:
-- Commands run in an isolated sandbox with no outbound network access.
-- Returns combined stdout/stderr output with exit code.
+- No outbound network access.
+- Returns combined stdout/stderr with exit code.
 - Use the optional timeout parameter to override the default timeout.
-- When issuing multiple commands, use ';' or '&&' to chain them.
+- Chain multiple commands with ';' or '&&'.
 """
 
 SURFSENSE_SAVE_DOCUMENT_TOOL_DESCRIPTION = """Permanently saves a document to the user's knowledge base.
@@ -192,6 +195,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
         if self._sandbox_available:
             system_prompt += (
                 "\n- execute_code: run shell commands in an isolated Python sandbox."
+                " Prefer this tool for any numerical computation or data analysis."
             )
 
         super().__init__(

From 4bd9ff15eb1a4334a5f05734e9923194fe3bafaf Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 18:06:45 +0200
Subject: [PATCH 08/14] add /documents symlink to sandbox snapshot

---
 surfsense_backend/scripts/create_sandbox_snapshot.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/surfsense_backend/scripts/create_sandbox_snapshot.py b/surfsense_backend/scripts/create_sandbox_snapshot.py
index f1c1920db..e323d407a 100644
--- a/surfsense_backend/scripts/create_sandbox_snapshot.py
+++ b/surfsense_backend/scripts/create_sandbox_snapshot.py
@@ -31,7 +31,13 @@ def main() -> None:
     )
     daytona = Daytona(config)
 
-    image = Image.debian_slim("3.12").pip_install(*PACKAGES)
+    image = (
+        Image.debian_slim("3.12")
+        .pip_install(*PACKAGES)
+        # The agent's virtual filesystem serves documents at /documents/.
+        # This symlink lets code inside the sandbox use the same path.
+        .run("mkdir -p /home/daytona/documents && ln -sf /home/daytona/documents /documents")
+    )
 
     print(f"Creating snapshot '{SNAPSHOT_NAME}' with packages: {', '.join(PACKAGES)}")
     snapshot = daytona.snapshot.create(

From c4a5ac414462b082cd618cc98f25bf7ac0e0f750 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 18:11:40 +0200
Subject: [PATCH 09/14] delete existing snapshot before recreating

---
 surfsense_backend/scripts/create_sandbox_snapshot.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/surfsense_backend/scripts/create_sandbox_snapshot.py b/surfsense_backend/scripts/create_sandbox_snapshot.py
index e323d407a..6ca6188ea 100644
--- a/surfsense_backend/scripts/create_sandbox_snapshot.py
+++ b/surfsense_backend/scripts/create_sandbox_snapshot.py
@@ -39,6 +39,14 @@ def main() -> None:
         .run("mkdir -p /home/daytona/documents && ln -sf /home/daytona/documents /documents")
     )
 
+    try:
+        existing = daytona.snapshot.get(SNAPSHOT_NAME)
+        print(f"Deleting existing snapshot '{SNAPSHOT_NAME}'...")
+        daytona.snapshot.delete(existing)
+        print("Deleted.")
+    except Exception:
+        pass
+
     print(f"Creating snapshot '{SNAPSHOT_NAME}' with packages: {', '.join(PACKAGES)}")
     snapshot = daytona.snapshot.create(
         CreateSnapshotParams(name=SNAPSHOT_NAME, image=image),

From a71338e368d1d7765bad7cd0ac0f175c086d42b6 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 18:29:17 +0200
Subject: [PATCH 10/14] rewrite snapshot script: use run_commands, dotenv,
 idempotent delete

---
 .../scripts/create_sandbox_snapshot.py        | 88 +++++++++++++------
 1 file changed, 62 insertions(+), 26 deletions(-)

diff --git a/surfsense_backend/scripts/create_sandbox_snapshot.py b/surfsense_backend/scripts/create_sandbox_snapshot.py
index 6ca6188ea..97ed6dfe8 100644
--- a/surfsense_backend/scripts/create_sandbox_snapshot.py
+++ b/surfsense_backend/scripts/create_sandbox_snapshot.py
@@ -1,16 +1,32 @@
-"""Create the Daytona snapshot used by SurfSense sandboxes.
+"""Create the Daytona snapshot used by SurfSense code-execution sandboxes.
 
-Usage:
+Run from the backend directory:
+    cd surfsense_backend
     uv run python scripts/create_sandbox_snapshot.py
 
-Requires DAYTONA_API_KEY (and optionally DAYTONA_API_URL / DAYTONA_TARGET)
-to be set in the environment or in a .env file.
+Prerequisites:
+    - DAYTONA_API_KEY set in surfsense_backend/.env (or exported in shell)
+    - DAYTONA_API_URL=https://app.daytona.io/api
+    - DAYTONA_TARGET=us  (or eu)
+
+After this script succeeds, add to surfsense_backend/.env:
+    DAYTONA_SNAPSHOT_ID=surfsense-sandbox
 """
 
 import os
 import sys
+import time
+from pathlib import Path
 
-from daytona import CreateSnapshotParams, Daytona, DaytonaConfig, Image
+from dotenv import load_dotenv
+
+_here = Path(__file__).parent
+for candidate in [_here / "../surfsense_backend/.env", _here / ".env", _here / "../.env"]:
+    if candidate.exists():
+        load_dotenv(candidate)
+        break
+
+from daytona import CreateSnapshotParams, Daytona, Image  # noqa: E402
 
 SNAPSHOT_NAME = "surfsense-sandbox"
 
@@ -23,38 +39,58 @@ PACKAGES = [
 ]
 
 
-def main() -> None:
-    config = DaytonaConfig(
-        api_key=os.environ.get("DAYTONA_API_KEY", ""),
-        api_url=os.environ.get("DAYTONA_API_URL", "https://app.daytona.io/api"),
-        target=os.environ.get("DAYTONA_TARGET", "us"),
-    )
-    daytona = Daytona(config)
-
-    image = (
+def build_image() -> Image:
+    """Build the sandbox image with data-science packages and a /documents symlink."""
+    return (
         Image.debian_slim("3.12")
         .pip_install(*PACKAGES)
-        # The agent's virtual filesystem serves documents at /documents/.
-        # This symlink lets code inside the sandbox use the same path.
-        .run("mkdir -p /home/daytona/documents && ln -sf /home/daytona/documents /documents")
+        # Symlink /documents → /home/daytona/documents so the LLM can use
+        # the same /documents/ path it sees in the virtual filesystem.
+        .run_commands(
+            "mkdir -p /home/daytona/documents",
+            "ln -sfn /home/daytona/documents /documents",
+        )
     )
 
+
+def main() -> None:
+    api_key = os.environ.get("DAYTONA_API_KEY")
+    if not api_key:
+        print("ERROR: DAYTONA_API_KEY is not set.", file=sys.stderr)
+        print("Add it to surfsense_backend/.env or export it in your shell.", file=sys.stderr)
+        sys.exit(1)
+
+    daytona = Daytona()
+
     try:
         existing = daytona.snapshot.get(SNAPSHOT_NAME)
-        print(f"Deleting existing snapshot '{SNAPSHOT_NAME}'...")
+        print(f"Deleting existing snapshot '{SNAPSHOT_NAME}' …")
         daytona.snapshot.delete(existing)
-        print("Deleted.")
+        print(f"Deleted '{SNAPSHOT_NAME}'. Waiting for removal to propagate …")
+        for attempt in range(30):
+            time.sleep(2)
+            try:
+                daytona.snapshot.get(SNAPSHOT_NAME)
+            except Exception:
+                print(f"Confirmed '{SNAPSHOT_NAME}' is gone.\n")
+                break
+        else:
+            print(f"WARNING: '{SNAPSHOT_NAME}' may still exist after 60s. Proceeding anyway.\n")
     except Exception:
         pass
 
-    print(f"Creating snapshot '{SNAPSHOT_NAME}' with packages: {', '.join(PACKAGES)}")
-    snapshot = daytona.snapshot.create(
-        CreateSnapshotParams(name=SNAPSHOT_NAME, image=image),
-        on_logs=lambda chunk: print(chunk, end=""),
+    print(f"Building snapshot '{SNAPSHOT_NAME}' …")
+    print(f"Packages: {', '.join(PACKAGES)}\n")
+
+    daytona.snapshot.create(
+        CreateSnapshotParams(name=SNAPSHOT_NAME, image=build_image()),
+        on_logs=lambda chunk: print(chunk, end="", flush=True),
     )
-    print(f"\nSnapshot created: {snapshot.name}")
-    print(f"Set DAYTONA_SNAPSHOT_ID={snapshot.name} in your .env")
+
+    print(f"\n\nSnapshot '{SNAPSHOT_NAME}' is ready.")
+    print("\nAdd this to surfsense_backend/.env:")
+    print(f"    DAYTONA_SNAPSHOT_ID={SNAPSHOT_NAME}")
 
 
 if __name__ == "__main__":
-    sys.exit(main() or 0)
+    main()

From 5719a794a057958d1097b77dc22317aedbbf2e03 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 19:51:34 +0200
Subject: [PATCH 11/14] refine execute_code prompts: tool description + XML
 workflow guidance

---
 .../agents/new_chat/middleware/filesystem.py  | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
index bba7ff6a8..f10814f01 100644
--- a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
@@ -141,13 +141,12 @@ Use this to locate relevant document files/chunks before reading full files.
 SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION = """Executes a shell command in an isolated code execution environment.
 
 Common data-science packages are pre-installed (pandas, numpy, matplotlib,
-scipy, scikit-learn). Documents from the conversation are automatically
-made available — run `ls` to discover them.
+scipy, scikit-learn).
 
-When to use this tool: always use execute_code for any task involving
-numerical computation, data analysis, aggregation, or statistics. Write
-Python code to compute results and present the verified output. Never
-perform arithmetic manually when this tool is available.
+When to use this tool: use execute_code for numerical computation, data
+analysis, statistics, and any task that benefits from running shell
+commands or Python code. Never perform arithmetic manually when this tool
+is available.
 
 Usage notes:
 - No outbound network access.
@@ -195,7 +194,13 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
         if self._sandbox_available:
             system_prompt += (
                 "\n- execute_code: run shell commands in an isolated Python sandbox."
-                " Prefer this tool for any numerical computation or data analysis."
+                "\n\n## Code Execution"
+                "\n\nUse execute_code for numerical computation, data analysis, and"
+                " statistics — never do arithmetic manually."
+                "\n\nDocuments here are XML-wrapped markdown, not raw data files."
+                " To work with them programmatically, read the document first,"
+                " extract the data, write it as a clean file (CSV, JSON, etc.),"
+                " and then run your code against it."
             )
 
         super().__init__(

From 5ea2732bf281baf453d04402071f5ede85d39153 Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 20:53:17 +0200
Subject: [PATCH 12/14] make execute_code a Python tool: auto-wrap, print()
 nudge on empty output

---
 .../agents/new_chat/middleware/filesystem.py  | 37 +++++++++++++------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
index f10814f01..09f69d07b 100644
--- a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
@@ -138,21 +138,22 @@ SURFSENSE_GREP_TOOL_DESCRIPTION = """Search for a literal text pattern across fi
 Use this to locate relevant document files/chunks before reading full files.
 """
 
-SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION = """Executes a shell command in an isolated code execution environment.
+SURFSENSE_EXECUTE_CODE_TOOL_DESCRIPTION = """Executes Python code in an isolated sandbox environment.
 
 Common data-science packages are pre-installed (pandas, numpy, matplotlib,
 scipy, scikit-learn).
 
 When to use this tool: use execute_code for numerical computation, data
-analysis, statistics, and any task that benefits from running shell
-commands or Python code. Never perform arithmetic manually when this tool
-is available.
+analysis, statistics, and any task that benefits from running Python code.
+Never perform arithmetic manually when this tool is available.
 
 Usage notes:
 - No outbound network access.
 - Returns combined stdout/stderr with exit code.
+- Use print() to produce output.
+- You can create files, run shell commands via subprocess or os.system(),
+  and use any standard library module.
 - Use the optional timeout parameter to override the default timeout.
-- Chain multiple commands with ';' or '&&'.
 """
 
 SURFSENSE_SAVE_DOCUMENT_TOOL_DESCRIPTION = """Permanently saves a document to the user's knowledge base.
@@ -193,10 +194,10 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
         system_prompt = SURFSENSE_FILESYSTEM_SYSTEM_PROMPT
         if self._sandbox_available:
             system_prompt += (
-                "\n- execute_code: run shell commands in an isolated Python sandbox."
+                "\n- execute_code: run Python code in an isolated sandbox."
                 "\n\n## Code Execution"
-                "\n\nUse execute_code for numerical computation, data analysis, and"
-                " statistics — never do arithmetic manually."
+                "\n\nUse execute_code whenever a task benefits from running code."
+                " Never perform arithmetic manually."
                 "\n\nDocuments here are XML-wrapped markdown, not raw data files."
                 " To work with them programmatically, read the document first,"
                 " extract the data, write it as a clean file (CSV, JSON, etc.),"
@@ -508,12 +509,12 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
 
         def sync_execute_code(
             command: Annotated[
-                str, "Shell command to execute in the sandbox environment."
+                str, "Python code to execute. Use print() to see output."
             ],
             runtime: ToolRuntime[None, FilesystemState],
             timeout: Annotated[
                 int | None,
-                "Optional timeout in seconds for this command.",
+                "Optional timeout in seconds.",
             ] = None,
         ) -> str:
             if timeout is not None:
@@ -527,12 +528,12 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
 
         async def async_execute_code(
             command: Annotated[
-                str, "Shell command to execute in the sandbox environment."
+                str, "Python code to execute. Use print() to see output."
             ],
             runtime: ToolRuntime[None, FilesystemState],
             timeout: Annotated[
                 int | None,
-                "Optional timeout in seconds for this command.",
+                "Optional timeout in seconds.",
             ] = None,
         ) -> str:
             if timeout is not None:
@@ -549,6 +550,11 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
             coroutine=async_execute_code,
         )
 
+    @staticmethod
+    def _wrap_as_python(code: str) -> str:
+        """Wrap Python code in a shell invocation for the sandbox."""
+        return f"python3 << 'PYEOF'\n{code}\nPYEOF"
+
     async def _execute_in_sandbox(
         self,
         command: str,
@@ -557,6 +563,7 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
     ) -> str:
         """Core logic: get sandbox, sync files, run command, handle retries."""
         assert self._thread_id is not None
+        command = self._wrap_as_python(command)
 
         try:
             return await self._try_sandbox_execute(command, runtime, timeout)
@@ -585,6 +592,12 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
         files = runtime.state.get("files") or {}
         await sync_files_to_sandbox(self._thread_id, files, sandbox, is_new)
         result = await sandbox.aexecute(command, timeout=timeout)
+        output = (result.output or "").strip()
+        if not output and result.exit_code == 0:
+            return (
+                "[Code executed successfully but produced no output. "
+                "Use print() to display results, then try again.]"
+            )
         parts = [result.output]
         if result.exit_code is not None:
             status = "succeeded" if result.exit_code == 0 else "failed"

From facfb1f7e7b339bcce3ab5500056f3e12af4392f Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 20:59:31 +0200
Subject: [PATCH 13/14] disable file sync to sandbox, LLM handles data
 extraction itself

---
 .../app/agents/new_chat/middleware/filesystem.py             | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
index 09f69d07b..af5a6925b 100644
--- a/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
+++ b/surfsense_backend/app/agents/new_chat/middleware/filesystem.py
@@ -29,7 +29,6 @@ from app.agents.new_chat.sandbox import (
     _evict_sandbox_cache,
     get_or_create_sandbox,
     is_sandbox_enabled,
-    sync_files_to_sandbox,
 )
 from app.db import Chunk, Document, DocumentType, Folder, shielded_async_session
 from app.indexing_pipeline.document_chunker import chunk_text
@@ -589,8 +588,8 @@ class SurfSenseFilesystemMiddleware(FilesystemMiddleware):
         timeout: int | None,
     ) -> str:
         sandbox, is_new = await get_or_create_sandbox(self._thread_id)
-        files = runtime.state.get("files") or {}
-        await sync_files_to_sandbox(self._thread_id, files, sandbox, is_new)
+        # files = runtime.state.get("files") or {}
+        # await sync_files_to_sandbox(self._thread_id, files, sandbox, is_new)
         result = await sandbox.aexecute(command, timeout=timeout)
         output = (result.output or "").strip()
         if not output and result.exit_code == 0:

From 9396ee9c85d6689462020d341f47ed6eb5f7a90d Mon Sep 17 00:00:00 2001
From: CREDO23 <thierrybakera12@gmail.com>
Date: Mon, 13 Apr 2026 21:57:06 +0200
Subject: [PATCH 14/14] fix: delete broken sandbox before creating replacement
 to prevent orphan accumulation

---
 surfsense_backend/app/agents/new_chat/sandbox.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/surfsense_backend/app/agents/new_chat/sandbox.py b/surfsense_backend/app/agents/new_chat/sandbox.py
index 79947de2b..614a1b1b9 100644
--- a/surfsense_backend/app/agents/new_chat/sandbox.py
+++ b/surfsense_backend/app/agents/new_chat/sandbox.py
@@ -126,6 +126,10 @@ def _find_or_create(thread_id: str) -> tuple[_TimeoutAwareSandbox, bool]:
                 sandbox.id,
                 sandbox.state,
             )
+            try:
+                client.delete(sandbox)
+            except Exception:
+                logger.debug("Could not delete broken sandbox %s", sandbox.id, exc_info=True)
             sandbox = client.create(_sandbox_create_params(labels))
             is_new = True
             logger.info("Created replacement sandbox: %s", sandbox.id)