Merge remote-tracking branch 'upstream/dev' into feat/document-test

2026-04-25 00:36:31 +02:00 · 2026-02-25 20:25:06 +05:30 · 2026-02-25 20:25:06 +05:30 · 93c0af475b
commit 93c0af475b
parent 853750e6e9 80be9f26c4
22 changed files with 4850 additions and 2822 deletions
--- a/Dockerfile.allinone
+++ b/Dockerfile.allinone
@ -260,6 +260,10 @@ ENV NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
 ENV NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL
 ENV NEXT_PUBLIC_ETL_SERVICE=DOCLING

+# Daytona Sandbox (cloud code execution — no local server needed)
+ENV DAYTONA_SANDBOX_ENABLED=FALSE
+# DAYTONA_API_KEY, DAYTONA_API_URL, DAYTONA_TARGET: set at runtime for production.
+
 # Electric SQL configuration (ELECTRIC_DATABASE_URL is built dynamically by entrypoint from these values)
 ENV ELECTRIC_DB_USER=electric
 ENV ELECTRIC_DB_PASSWORD=electric_password
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -65,6 +65,11 @@ services:
      - ELECTRIC_DB_PASSWORD=${ELECTRIC_DB_PASSWORD:-electric_password}
      - AUTH_TYPE=${AUTH_TYPE:-LOCAL}
      - NEXT_FRONTEND_URL=${NEXT_FRONTEND_URL:-http://localhost:3000}
+      # Daytona Sandbox – uncomment and set credentials to enable cloud code execution
+      # - DAYTONA_SANDBOX_ENABLED=TRUE
+      # - DAYTONA_API_KEY=${DAYTONA_API_KEY:-}
+      # - DAYTONA_API_URL=${DAYTONA_API_URL:-https://app.daytona.io/api}
+      # - DAYTONA_TARGET=${DAYTONA_TARGET:-us}
    depends_on:
      - db
      - redis
--- a/scripts/docker/entrypoint-allinone.sh
+++ b/scripts/docker/entrypoint-allinone.sh
@ -232,6 +232,7 @@ echo "  Auth Type:       ${NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE}"
 echo "  ETL Service:     ${NEXT_PUBLIC_ETL_SERVICE}"
 echo "  TTS Service:     ${TTS_SERVICE}"
 echo "  STT Service:     ${STT_SERVICE}"
+echo "  Daytona Sandbox: ${DAYTONA_SANDBOX_ENABLED:-FALSE}"
 echo "==========================================="
 echo ""

--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@ -167,40 +167,14 @@ LANGSMITH_ENDPOINT=https://api.smith.langchain.com
 LANGSMITH_API_KEY=lsv2_pt_.....
 LANGSMITH_PROJECT=surfsense

-# Uvicorn Server Configuration
-# Full documentation for Uvicorn options can be found at: https://www.uvicorn.org/#command-line-options
-UVICORN_HOST="0.0.0.0"
-UVICORN_PORT=8000
-UVICORN_LOG_LEVEL=info
+# Agent Specific Configuration
+# Daytona Sandbox (secure cloud code execution for deep agent)
+# Set DAYTONA_SANDBOX_ENABLED=TRUE to give the agent an isolated execute tool
+DAYTONA_SANDBOX_ENABLED=TRUE
+DAYTONA_API_KEY=dtn_asdasfasfafas
+DAYTONA_API_URL=https://app.daytona.io/api
+DAYTONA_TARGET=us

-# OPTIONAL: Advanced Uvicorn Options (uncomment to use)
-# UVICORN_PROXY_HEADERS=false
-# UVICORN_FORWARDED_ALLOW_IPS="127.0.0.1"
-# UVICORN_WORKERS=1
-# UVICORN_ACCESS_LOG=true
-# UVICORN_LOOP="auto"
-# UVICORN_HTTP="auto"
-# UVICORN_WS="auto"
-# UVICORN_LIFESPAN="auto"
-# UVICORN_LOG_CONFIG=""
-# UVICORN_SERVER_HEADER=true
-# UVICORN_DATE_HEADER=true
-# UVICORN_LIMIT_CONCURRENCY=
-# UVICORN_LIMIT_MAX_REQUESTS=
-# UVICORN_TIMEOUT_KEEP_ALIVE=5
-# UVICORN_TIMEOUT_NOTIFY=30
-# UVICORN_SSL_KEYFILE=""
-# UVICORN_SSL_CERTFILE=""
-# UVICORN_SSL_KEYFILE_PASSWORD=""
-# UVICORN_SSL_VERSION=""
-# UVICORN_SSL_CERT_REQS=""
-# UVICORN_SSL_CA_CERTS=""
-# UVICORN_SSL_CIPHERS=""
-# UVICORN_HEADERS=""
-# UVICORN_USE_COLORS=true
-# UVICORN_UDS=""
-# UVICORN_FD=""
-# UVICORN_ROOT_PATH=""

 # ============================================================
 # Testing (optional — all have sensible defaults)
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@ -10,6 +10,7 @@ from collections.abc import Sequence
 from typing import Any

 from deepagents import create_deep_agent
+from deepagents.backends.protocol import SandboxBackendProtocol
 from langchain_core.language_models import BaseChatModel
 from langchain_core.tools import BaseTool
 from langgraph.types import Checkpointer
@ -128,6 +129,7 @@ async def create_surfsense_deep_agent(
    additional_tools: Sequence[BaseTool] | None = None,
    firecrawl_api_key: str | None = None,
    thread_visibility: ChatVisibility | None = None,
+    sandbox_backend: SandboxBackendProtocol | None = None,
 ):
    """
    Create a SurfSense deep agent with configurable tools and prompts.
@ -167,6 +169,9 @@ async def create_surfsense_deep_agent(
                         These are always added regardless of enabled/disabled settings.
        firecrawl_api_key: Optional Firecrawl API key for premium web scraping.
                          Falls back to Chromium/Trafilatura if not provided.
+        sandbox_backend: Optional sandbox backend (e.g. DaytonaSandbox) for
+                        secure code execution. When provided, the agent gets an
+                        isolated ``execute`` tool for running shell commands.

    Returns:
        CompiledStateGraph: The configured deep agent
@ -277,19 +282,26 @@ async def create_surfsense_deep_agent(
    )

    # Build system prompt based on agent_config
+    _sandbox_enabled = sandbox_backend is not None
    if agent_config is not None:
-        # Use configurable prompt with settings from NewLLMConfig
        system_prompt = build_configurable_system_prompt(
            custom_system_instructions=agent_config.system_instructions,
            use_default_system_instructions=agent_config.use_default_system_instructions,
            citations_enabled=agent_config.citations_enabled,
            thread_visibility=thread_visibility,
+            sandbox_enabled=_sandbox_enabled,
        )
    else:
        system_prompt = build_surfsense_system_prompt(
            thread_visibility=thread_visibility,
+            sandbox_enabled=_sandbox_enabled,
        )

+    # Build optional kwargs for the deep agent
+    deep_agent_kwargs: dict[str, Any] = {}
+    if sandbox_backend is not None:
+        deep_agent_kwargs["backend"] = sandbox_backend
+
    # Create the deep agent with system prompt and checkpointer
    # Note: TodoListMiddleware (write_todos) is included by default in create_deep_agent
    agent = create_deep_agent(
@ -298,6 +310,7 @@ async def create_surfsense_deep_agent(
        system_prompt=system_prompt,
        context_schema=SurfSenseContextSchema,
        checkpointer=checkpointer,
+        **deep_agent_kwargs,
    )

    return agent
--- a/surfsense_backend/app/agents/new_chat/sandbox.py
+++ b/surfsense_backend/app/agents/new_chat/sandbox.py
@ -0,0 +1,152 @@
+"""
+Daytona sandbox provider for SurfSense deep agent.
+
+Manages the lifecycle of sandboxed code execution environments.
+Each conversation thread gets its own isolated sandbox instance
+via the Daytona cloud API, identified by labels.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+
+from daytona import (
+    CreateSandboxFromSnapshotParams,
+    Daytona,
+    DaytonaConfig,
+    SandboxState,
+)
+from deepagents.backends.protocol import ExecuteResponse
+from langchain_daytona import DaytonaSandbox
+
+logger = logging.getLogger(__name__)
+
+
+class _TimeoutAwareSandbox(DaytonaSandbox):
+    """DaytonaSandbox subclass that accepts the per-command *timeout*
+    kwarg required by the deepagents middleware.
+
+    The upstream ``langchain-daytona`` ``execute()`` ignores timeout,
+    so deepagents raises *"This sandbox backend does not support
+    per-command timeout overrides"* on every first call.  This thin
+    wrapper forwards the parameter to the Daytona SDK.
+    """
+
+    def execute(self, command: str, *, timeout: int | None = None) -> ExecuteResponse:
+        t = timeout if timeout is not None else self._timeout
+        result = self._sandbox.process.exec(command, timeout=t)
+        return ExecuteResponse(
+            output=result.result,
+            exit_code=result.exit_code,
+            truncated=False,
+        )
+
+    async def aexecute(
+        self, command: str, *, timeout: int | None = None
+    ) -> ExecuteResponse:  # type: ignore[override]
+        return await asyncio.to_thread(self.execute, command, timeout=timeout)
+
+
+_daytona_client: Daytona | None = None
+THREAD_LABEL_KEY = "surfsense_thread"
+
+
+def is_sandbox_enabled() -> bool:
+    return os.environ.get("DAYTONA_SANDBOX_ENABLED", "FALSE").upper() == "TRUE"
+
+
+def _get_client() -> Daytona:
+    global _daytona_client
+    if _daytona_client is None:
+        config = DaytonaConfig(
+            api_key=os.environ.get("DAYTONA_API_KEY", ""),
+            api_url=os.environ.get("DAYTONA_API_URL", "https://app.daytona.io/api"),
+            target=os.environ.get("DAYTONA_TARGET", "us"),
+        )
+        _daytona_client = Daytona(config)
+    return _daytona_client
+
+
+def _find_or_create(thread_id: str) -> _TimeoutAwareSandbox:
+    """Find an existing sandbox for *thread_id*, or create a new one.
+
+    If an existing sandbox is found but is stopped/archived, it will be
+    restarted automatically before returning.
+    """
+    client = _get_client()
+    labels = {THREAD_LABEL_KEY: thread_id}
+
+    try:
+        sandbox = client.find_one(labels=labels)
+        logger.info("Found existing sandbox %s (state=%s)", sandbox.id, sandbox.state)
+
+        if sandbox.state in (
+            SandboxState.STOPPED,
+            SandboxState.STOPPING,
+            SandboxState.ARCHIVED,
+        ):
+            logger.info("Starting stopped sandbox %s …", sandbox.id)
+            sandbox.start(timeout=60)
+            logger.info("Sandbox %s is now started", sandbox.id)
+        elif sandbox.state in (
+            SandboxState.ERROR,
+            SandboxState.BUILD_FAILED,
+            SandboxState.DESTROYED,
+        ):
+            logger.warning(
+                "Sandbox %s in unrecoverable state %s — creating a new one",
+                sandbox.id,
+                sandbox.state,
+            )
+            sandbox = client.create(
+                CreateSandboxFromSnapshotParams(language="python", labels=labels)
+            )
+            logger.info("Created replacement sandbox: %s", sandbox.id)
+        elif sandbox.state != SandboxState.STARTED:
+            sandbox.wait_for_sandbox_start(timeout=60)
+
+    except Exception:
+        logger.info("No existing sandbox for thread %s — creating one", thread_id)
+        sandbox = client.create(
+            CreateSandboxFromSnapshotParams(language="python", labels=labels)
+        )
+        logger.info("Created new sandbox: %s", sandbox.id)
+
+    return _TimeoutAwareSandbox(sandbox=sandbox)
+
+
+async def get_or_create_sandbox(thread_id: int | str) -> _TimeoutAwareSandbox:
+    """Get or create a sandbox for a conversation thread.
+
+    Uses the thread_id as a label so the same sandbox persists
+    across multiple messages within the same conversation.
+
+    Args:
+        thread_id: The conversation thread identifier.
+
+    Returns:
+        DaytonaSandbox connected to the sandbox.
+    """
+    return await asyncio.to_thread(_find_or_create, str(thread_id))
+
+
+async def delete_sandbox(thread_id: int | str) -> None:
+    """Delete the sandbox for a conversation thread."""
+
+    def _delete() -> None:
+        client = _get_client()
+        labels = {THREAD_LABEL_KEY: str(thread_id)}
+        try:
+            sandbox = client.find_one(labels=labels)
+            client.delete(sandbox)
+            logger.info("Sandbox deleted: %s", sandbox.id)
+        except Exception:
+            logger.warning(
+                "Failed to delete sandbox for thread %s",
+                thread_id,
+                exc_info=True,
+            )
+
+    await asyncio.to_thread(_delete)
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@ -645,6 +645,87 @@ However, from your video learning, it's important to note that asyncio is not su
 </citation_instructions>
 """

+# Sandbox / code execution instructions — appended when sandbox backend is enabled.
+# Inspired by Claude's computer-use prompt, scoped to code execution & data analytics.
+SANDBOX_EXECUTION_INSTRUCTIONS = """
+<code_execution>
+You have access to a secure, isolated Linux sandbox environment for running code and shell commands.
+This gives you the `execute` tool alongside the standard filesystem tools (`ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`).
+
+## CRITICAL — CODE-FIRST RULE
+
+ALWAYS prefer executing code over giving a text-only response when the user's request involves ANY of the following:
+- **Creating a chart, plot, graph, or visualization** → Write Python code and generate the actual file. NEVER describe percentages or data in text and offer to "paste into Excel". Just produce the chart.
+- **Data analysis, statistics, or computation** → Write code to compute the answer. Do not do math by hand in text.
+- **Generating or transforming files** (CSV, PDF, images, etc.) → Write code to create the file.
+- **Running, testing, or debugging code** → Execute it in the sandbox.
+
+This applies even when you first retrieve data from the knowledge base. After `search_knowledge_base` returns relevant data, **immediately proceed to write and execute code** if the user's request matches any of the categories above. Do NOT stop at a text summary and wait for the user to ask you to "use Python" — that extra round-trip is a poor experience.
+
+Example (CORRECT):
+  User: "Create a pie chart of my benefits"
+  → 1. search_knowledge_base → retrieve benefits data
+  → 2. Immediately execute Python code (matplotlib) to generate the pie chart
+  → 3. Return the downloadable file + brief description
+
+Example (WRONG):
+  User: "Create a pie chart of my benefits"
+  → 1. search_knowledge_base → retrieve benefits data
+  → 2. Print a text table with percentages and ask the user if they want a chart ← NEVER do this
+
+## When to Use Code Execution
+
+Use the sandbox when the task benefits from actually running code rather than just describing it:
+- **Data analysis**: Load CSVs/JSON, compute statistics, filter/aggregate data, pivot tables
+- **Visualization**: Generate charts and plots (matplotlib, plotly, seaborn)
+- **Calculations**: Math, financial modeling, unit conversions, simulations
+- **Code validation**: Run and test code snippets the user provides or asks about
+- **File processing**: Parse, transform, or convert data files
+- **Quick prototyping**: Demonstrate working code for the user's problem
+- **Package exploration**: Install and test libraries the user is evaluating
+
+## When NOT to Use Code Execution
+
+Do not use the sandbox for:
+- Answering factual questions from your own knowledge
+- Summarizing or explaining concepts
+- Simple formatting or text generation tasks
+- Tasks that don't require running code to answer
+
+## Package Management
+
+- Use `pip install <package>` to install Python packages as needed
+- Common data/analytics packages (pandas, numpy, matplotlib, scipy, scikit-learn) may need to be installed on first use
+- Always verify a package installed successfully before using it
+
+## Working Guidelines
+
+- **Working directory**: The shell starts in the sandbox user's home directory (e.g. `/home/daytona`). Use **relative paths** or `/tmp/` for all files you create. NEVER write directly to `/home/` — that is the parent directory and is not writable. Use `pwd` if you need to discover the current working directory.
+- **Iterative approach**: For complex tasks, break work into steps — write code, run it, check output, refine
+- **Error handling**: If code fails, read the error, fix the issue, and retry. Don't just report the error without attempting a fix.
+- **Show results**: When generating plots or outputs, present the key findings directly in your response. For plots, save to a file and describe the results.
+- **Be efficient**: Install packages once per session. Combine related commands when possible.
+- **Large outputs**: If command output is very large, use `head`, `tail`, or save to a file and read selectively.
+
+## Sharing Generated Files
+
+When your code creates output files (images, CSVs, PDFs, etc.) in the sandbox:
+- **Print the absolute path** at the end of your script so the user can download the file. Example: `print("SANDBOX_FILE: /tmp/chart.png")`
+- **DO NOT call `display_image`** for files created inside the sandbox. Sandbox files are not accessible via public URLs, so `display_image` will always show "Image not available". The frontend automatically renders a download button from the `SANDBOX_FILE:` marker.
+- You can output multiple files, one per line: `print("SANDBOX_FILE: /tmp/report.csv")`, `print("SANDBOX_FILE: /tmp/chart.png")`
+- Always describe what the file contains in your response text so the user knows what they are downloading.
+- IMPORTANT: Every `execute` call that saves a file MUST print the `SANDBOX_FILE: <path>` marker. Without it the user cannot download the file.
+
+## Data Analytics Best Practices
+
+When the user asks you to analyze data:
+1. First, inspect the data structure (`head`, `shape`, `dtypes`, `describe()`)
+2. Clean and validate before computing (handle nulls, check types)
+3. Perform the analysis and present results clearly
+4. Offer follow-up insights or visualizations when appropriate
+</code_execution>
+"""
+
 # Anti-citation prompt - used when citations are disabled
 # This explicitly tells the model NOT to include citations
 SURFSENSE_NO_CITATION_INSTRUCTIONS = """
@ -670,6 +751,7 @@ Your goal is to provide helpful, informative answers in a clean, readable format
 def build_surfsense_system_prompt(
    today: datetime | None = None,
    thread_visibility: ChatVisibility | None = None,
+    sandbox_enabled: bool = False,
 ) -> str:
    """
    Build the SurfSense system prompt with default settings.
@ -678,10 +760,12 @@ def build_surfsense_system_prompt(
    - Default system instructions
    - Tools instructions (always included)
    - Citation instructions enabled
+    - Sandbox execution instructions (when sandbox_enabled=True)

    Args:
        today: Optional datetime for today's date (defaults to current UTC date)
        thread_visibility: Optional; when provided, used for conditional prompt (e.g. private vs shared memory wording). Defaults to private behavior when None.
+        sandbox_enabled: Whether the sandbox backend is active (adds code execution instructions).

    Returns:
        Complete system prompt string
@ -691,7 +775,13 @@ def build_surfsense_system_prompt(
    system_instructions = _get_system_instructions(visibility, today)
    tools_instructions = _get_tools_instructions(visibility)
    citation_instructions = SURFSENSE_CITATION_INSTRUCTIONS
-    return system_instructions + tools_instructions + citation_instructions
+    sandbox_instructions = SANDBOX_EXECUTION_INSTRUCTIONS if sandbox_enabled else ""
+    return (
+        system_instructions
+        + tools_instructions
+        + citation_instructions
+        + sandbox_instructions
+    )


 def build_configurable_system_prompt(
@ -700,14 +790,16 @@ def build_configurable_system_prompt(
    citations_enabled: bool = True,
    today: datetime | None = None,
    thread_visibility: ChatVisibility | None = None,
+    sandbox_enabled: bool = False,
 ) -> str:
    """
    Build a configurable SurfSense system prompt based on NewLLMConfig settings.

-    The prompt is composed of three parts:
+    The prompt is composed of up to four parts:
    1. System Instructions - either custom or default SURFSENSE_SYSTEM_INSTRUCTIONS
    2. Tools Instructions - always included (SURFSENSE_TOOLS_INSTRUCTIONS)
    3. Citation Instructions - either SURFSENSE_CITATION_INSTRUCTIONS or SURFSENSE_NO_CITATION_INSTRUCTIONS
+    4. Sandbox Execution Instructions - when sandbox_enabled=True

    Args:
        custom_system_instructions: Custom system instructions to use. If empty/None and
@ -719,6 +811,7 @@ def build_configurable_system_prompt(
                          anti-citation instructions (False).
        today: Optional datetime for today's date (defaults to current UTC date)
        thread_visibility: Optional; when provided, used for conditional prompt (e.g. private vs shared memory wording). Defaults to private behavior when None.
+        sandbox_enabled: Whether the sandbox backend is active (adds code execution instructions).

    Returns:
        Complete system prompt string
@ -727,7 +820,6 @@ def build_configurable_system_prompt(

    # Determine system instructions
    if custom_system_instructions and custom_system_instructions.strip():
-        # Use custom instructions, injecting the date placeholder if present
        system_instructions = custom_system_instructions.format(
            resolved_today=resolved_today
        )
@ -735,7 +827,6 @@ def build_configurable_system_prompt(
        visibility = thread_visibility or ChatVisibility.PRIVATE
        system_instructions = _get_system_instructions(visibility, today)
    else:
-        # No system instructions (edge case)
        system_instructions = ""

    # Tools instructions: conditional on thread_visibility (private vs shared memory wording)
@ -748,7 +839,14 @@ def build_configurable_system_prompt(
        else SURFSENSE_NO_CITATION_INSTRUCTIONS
    )

-    return system_instructions + tools_instructions + citation_instructions
+    sandbox_instructions = SANDBOX_EXECUTION_INSTRUCTIONS if sandbox_enabled else ""
+
+    return (
+        system_instructions
+        + tools_instructions
+        + citation_instructions
+        + sandbox_instructions
+    )


 def get_default_system_instructions() -> str:
--- a/surfsense_backend/app/routes/init.py
+++ b/surfsense_backend/app/routes/init.py
@ -36,6 +36,7 @@ from .podcasts_routes import router as podcasts_router
 from .public_chat_routes import router as public_chat_router
 from .rbac_routes import router as rbac_router
 from .reports_routes import router as reports_router
+from .sandbox_routes import router as sandbox_router
 from .search_source_connectors_routes import router as search_source_connectors_router
 from .search_spaces_routes import router as search_spaces_router
 from .slack_add_connector_route import router as slack_add_connector_router
@ -50,6 +51,7 @@ router.include_router(editor_router)
 router.include_router(documents_router)
 router.include_router(notes_router)
 router.include_router(new_chat_router)  # Chat with assistant-ui persistence
+router.include_router(sandbox_router)  # Sandbox file downloads (Daytona)
 router.include_router(chat_comments_router)
 router.include_router(podcasts_router)  # Podcast task status and audio
 router.include_router(reports_router)  # Report CRUD and export (PDF/DOCX)
--- a/surfsense_backend/app/routes/sandbox_routes.py
+++ b/surfsense_backend/app/routes/sandbox_routes.py
@ -0,0 +1,91 @@
+"""Routes for downloading files from Daytona sandbox environments."""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi.responses import Response
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.future import select
+
+from app.db import NewChatThread, Permission, User, get_async_session
+from app.users import current_active_user
+from app.utils.rbac import check_permission
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+MIME_TYPES: dict[str, str] = {
+    ".png": "image/png",
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".gif": "image/gif",
+    ".webp": "image/webp",
+    ".svg": "image/svg+xml",
+    ".pdf": "application/pdf",
+    ".csv": "text/csv",
+    ".json": "application/json",
+    ".txt": "text/plain",
+    ".html": "text/html",
+    ".md": "text/markdown",
+    ".py": "text/x-python",
+    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    ".zip": "application/zip",
+}
+
+
+def _guess_media_type(filename: str) -> str:
+    ext = ("." + filename.rsplit(".", 1)[-1].lower()) if "." in filename else ""
+    return MIME_TYPES.get(ext, "application/octet-stream")
+
+
+@router.get("/threads/{thread_id}/sandbox/download")
+async def download_sandbox_file(
+    thread_id: int,
+    path: str = Query(..., description="Absolute path of the file inside the sandbox"),
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """Download a file from the Daytona sandbox associated with a chat thread."""
+
+    from app.agents.new_chat.sandbox import get_or_create_sandbox, is_sandbox_enabled
+
+    if not is_sandbox_enabled():
+        raise HTTPException(status_code=404, detail="Sandbox is not enabled")
+
+    result = await session.execute(
+        select(NewChatThread).filter(NewChatThread.id == thread_id)
+    )
+    thread = result.scalars().first()
+    if not thread:
+        raise HTTPException(status_code=404, detail="Thread not found")
+
+    await check_permission(
+        session,
+        user,
+        thread.search_space_id,
+        Permission.CHATS_READ.value,
+        "You don't have permission to access files in this thread",
+    )
+
+    try:
+        sandbox = await get_or_create_sandbox(thread_id)
+        raw_sandbox = sandbox._sandbox
+        content: bytes = await asyncio.to_thread(raw_sandbox.fs.download_file, path)
+    except Exception as exc:
+        logger.warning("Sandbox file download failed for %s: %s", path, exc)
+        raise HTTPException(
+            status_code=404, detail=f"Could not download file: {exc}"
+        ) from exc
+
+    filename = path.rsplit("/", 1)[-1] if "/" in path else path
+    media_type = _guess_media_type(filename)
+
+    return Response(
+        content=content,
+        media_type=media_type,
+        headers={"Content-Disposition": f'attachment; filename="{filename}"'},
+    )
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -11,6 +11,7 @@ Supports loading LLM configurations from:

 import json
 import logging
+import re
 from collections.abc import AsyncGenerator
 from dataclasses import dataclass
 from typing import Any
@ -409,6 +410,21 @@ async def _stream_agent_events(
                    status="in_progress",
                    items=last_active_step_items,
                )
+            elif tool_name == "execute":
+                cmd = (
+                    tool_input.get("command", "")
+                    if isinstance(tool_input, dict)
+                    else str(tool_input)
+                )
+                display_cmd = cmd[:80] + ("…" if len(cmd) > 80 else "")
+                last_active_step_title = "Running command"
+                last_active_step_items = [f"$ {display_cmd}"]
+                yield streaming_service.format_thinking_step(
+                    step_id=tool_step_id,
+                    title="Running command",
+                    status="in_progress",
+                    items=last_active_step_items,
+                )
            else:
                last_active_step_title = f"Using {tool_name.replace('_', ' ')}"
                last_active_step_items = []
@ -625,6 +641,32 @@ async def _stream_agent_events(
                    status="completed",
                    items=completed_items,
                )
+            elif tool_name == "execute":
+                raw_text = (
+                    tool_output.get("result", "")
+                    if isinstance(tool_output, dict)
+                    else str(tool_output)
+                )
+                m = re.match(r"^Exit code:\s*(\d+)", raw_text)
+                exit_code_val = int(m.group(1)) if m else None
+                if exit_code_val is not None and exit_code_val == 0:
+                    completed_items = [
+                        *last_active_step_items,
+                        "Completed successfully",
+                    ]
+                elif exit_code_val is not None:
+                    completed_items = [
+                        *last_active_step_items,
+                        f"Exit code: {exit_code_val}",
+                    ]
+                else:
+                    completed_items = [*last_active_step_items, "Finished"]
+                yield streaming_service.format_thinking_step(
+                    step_id=original_step_id,
+                    title="Running command",
+                    status="completed",
+                    items=completed_items,
+                )
            elif tool_name == "ls":
                if isinstance(tool_output, dict):
                    ls_output = tool_output.get("result", "")
@ -818,6 +860,28 @@ async def _stream_agent_events(
                    if isinstance(tool_output, dict)
                    else {"result": tool_output},
                )
+            elif tool_name == "execute":
+                raw_text = (
+                    tool_output.get("result", "")
+                    if isinstance(tool_output, dict)
+                    else str(tool_output)
+                )
+                exit_code: int | None = None
+                output_text = raw_text
+                m = re.match(r"^Exit code:\s*(\d+)", raw_text)
+                if m:
+                    exit_code = int(m.group(1))
+                    om = re.search(r"\nOutput:\n([\s\S]*)", raw_text)
+                    output_text = om.group(1) if om else ""
+                thread_id_str = config.get("configurable", {}).get("thread_id", "")
+                yield streaming_service.format_tool_output_available(
+                    tool_call_id,
+                    {
+                        "exit_code": exit_code,
+                        "output": output_text,
+                        "thread_id": thread_id_str,
+                    },
+                )
            else:
                yield streaming_service.format_tool_output_available(
                    tool_call_id,
@ -982,6 +1046,22 @@ async def stream_new_chat(
        # Get the PostgreSQL checkpointer for persistent conversation memory
        checkpointer = await get_checkpointer()

+        # Optionally provision a sandboxed code execution environment
+        sandbox_backend = None
+        from app.agents.new_chat.sandbox import (
+            get_or_create_sandbox,
+            is_sandbox_enabled,
+        )
+
+        if is_sandbox_enabled():
+            try:
+                sandbox_backend = await get_or_create_sandbox(chat_id)
+            except Exception as sandbox_err:
+                logging.getLogger(__name__).warning(
+                    "Sandbox creation failed, continuing without execute tool: %s",
+                    sandbox_err,
+                )
+
        visibility = thread_visibility or ChatVisibility.PRIVATE
        agent = await create_surfsense_deep_agent(
            llm=llm,
@ -994,6 +1074,7 @@ async def stream_new_chat(
            agent_config=agent_config,
            firecrawl_api_key=firecrawl_api_key,
            thread_visibility=visibility,
+            sandbox_backend=sandbox_backend,
        )

        # Build input with message history
@ -1359,6 +1440,22 @@ async def stream_resume_chat(
            firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")

        checkpointer = await get_checkpointer()
+
+        sandbox_backend = None
+        from app.agents.new_chat.sandbox import (
+            get_or_create_sandbox,
+            is_sandbox_enabled,
+        )
+
+        if is_sandbox_enabled():
+            try:
+                sandbox_backend = await get_or_create_sandbox(chat_id)
+            except Exception as sandbox_err:
+                logging.getLogger(__name__).warning(
+                    "Sandbox creation failed, continuing without execute tool: %s",
+                    sandbox_err,
+                )
+
        visibility = thread_visibility or ChatVisibility.PRIVATE

        agent = await create_surfsense_deep_agent(
@ -1372,6 +1469,7 @@ async def stream_resume_chat(
            agent_config=agent_config,
            firecrawl_api_key=firecrawl_api_key,
            thread_visibility=visibility,
+            sandbox_backend=sandbox_backend,
        )

        # Release the transaction before streaming (same rationale as stream_new_chat).
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@ -66,6 +66,7 @@ dependencies = [
    "pypandoc_binary>=1.16.2",
    "typst>=0.14.0",
    "deepagents>=0.4.3",
+    "langchain-daytona>=0.0.2",
 ]

 [dependency-groups]
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
--- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx
@ -53,6 +53,7 @@ import {
 	DeleteNotionPageToolUI,
 	UpdateNotionPageToolUI,
 } from "@/components/tool-ui/notion";
+import { SandboxExecuteToolUI } from "@/components/tool-ui/sandbox-execute";
 import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage";
 import { RecallMemoryToolUI, SaveMemoryToolUI } from "@/components/tool-ui/user-memory";
 import { Skeleton } from "@/components/ui/skeleton";
@ -157,6 +158,7 @@ const TOOLS_WITH_UI = new Set([
 	"delete_linear_issue",
 	"create_google_drive_file",
 	"delete_google_drive_file",
+	"execute",
 	// "write_todos", // Disabled for now
 ]);

@ -1672,6 +1674,7 @@ export default function NewChatPage() {
 			<DeleteLinearIssueToolUI />
 			<CreateGoogleDriveFileToolUI />
 			<DeleteGoogleDriveFileToolUI />
+			<SandboxExecuteToolUI />
 			{/* <WriteTodosToolUI /> Disabled for now */}
 			<div className="flex h-[calc(100dvh-64px)] overflow-hidden">
 				<div className="flex-1 flex flex-col min-w-0 overflow-hidden">
--- a/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/team/page.tsx
@ -16,8 +16,8 @@ import {
 	Link2,
 	ShieldUser,
 	Trash2,
-	UserPlus,
 	User,
+	UserPlus,
 	Users,
 } from "lucide-react";
 import { motion } from "motion/react";
--- a/surfsense_web/components/assistant-ui/tooltip-icon-button.tsx
+++ b/surfsense_web/components/assistant-ui/tooltip-icon-button.tsx
@ -1,7 +1,7 @@
 "use client";

 import { Slottable } from "@radix-ui/react-slot";
-import { type ComponentPropsWithRef, type ReactNode, forwardRef } from "react";
+import { type ComponentPropsWithRef, forwardRef, type ReactNode } from "react";
 import { Button } from "@/components/ui/button";
 import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
 import { cn } from "@/lib/utils";
--- a/surfsense_web/components/settings/roles-manager.tsx
+++ b/surfsense_web/components/settings/roles-manager.tsx
@ -7,6 +7,7 @@ import {
 	Edit2,
 	FileText,
 	Globe,
+	Logs,
 	type LucideIcon,
 	MessageCircle,
 	MessageSquare,
@ -14,7 +15,6 @@ import {
 	MoreHorizontal,
 	Plug,
 	Plus,
-	Logs,
 	Settings,
 	Shield,
 	Trash2,
@ -23,13 +23,13 @@ import {
 import { motion } from "motion/react";
 import { useCallback, useEffect, useMemo, useState } from "react";
 import { toast } from "sonner";
+import { myAccessAtom } from "@/atoms/members/members-query.atoms";
+import { permissionsAtom } from "@/atoms/permissions/permissions-query.atoms";
 import {
 	createRoleMutationAtom,
 	deleteRoleMutationAtom,
 	updateRoleMutationAtom,
 } from "@/atoms/roles/roles-mutation.atoms";
-import { permissionsAtom } from "@/atoms/permissions/permissions-query.atoms";
-import { myAccessAtom } from "@/atoms/members/members-query.atoms";
 import {
 	AlertDialog,
 	AlertDialogAction,
--- a/surfsense_web/components/tool-ui/google-drive/create-file.tsx
+++ b/surfsense_web/components/tool-ui/google-drive/create-file.tsx
@ -253,29 +253,31 @@ function ApprovalCard({
 				</div>
 			)}

-		{/* Display mode */}
-		{!isEditing && (
-			<div className="space-y-2 px-4 py-3 bg-card">
-				<div>
-					<p className="text-xs font-medium text-muted-foreground">Name</p>
-					<p className="text-sm text-foreground">{committedArgs?.name ?? args.name}</p>
-				</div>
-				<div>
-					<p className="text-xs font-medium text-muted-foreground">Type</p>
-					<p className="text-sm text-foreground">
-						{FILE_TYPE_LABELS[committedArgs?.file_type ?? args.file_type] ?? committedArgs?.file_type ?? args.file_type}
-					</p>
-				</div>
-				{(committedArgs?.content ?? args.content) && (
+			{/* Display mode */}
+			{!isEditing && (
+				<div className="space-y-2 px-4 py-3 bg-card">
 					<div>
-						<p className="text-xs font-medium text-muted-foreground">Content</p>
-						<p className="line-clamp-4 text-sm whitespace-pre-wrap text-foreground">
-							{committedArgs?.content ?? args.content}
+						<p className="text-xs font-medium text-muted-foreground">Name</p>
+						<p className="text-sm text-foreground">{committedArgs?.name ?? args.name}</p>
+					</div>
+					<div>
+						<p className="text-xs font-medium text-muted-foreground">Type</p>
+						<p className="text-sm text-foreground">
+							{FILE_TYPE_LABELS[committedArgs?.file_type ?? args.file_type] ??
+								committedArgs?.file_type ??
+								args.file_type}
 						</p>
 					</div>
-				)}
-			</div>
-		)}
+					{(committedArgs?.content ?? args.content) && (
+						<div>
+							<p className="text-xs font-medium text-muted-foreground">Content</p>
+							<p className="line-clamp-4 text-sm whitespace-pre-wrap text-foreground">
+								{committedArgs?.content ?? args.content}
+							</p>
+						</div>
+					)}
+				</div>
+			)}

 			{/* Edit mode */}
 			{isEditing && !decided && (
@ -341,26 +343,26 @@ function ApprovalCard({
 					</p>
 				) : isEditing ? (
 					<>
-					<Button
-						size="sm"
-						onClick={() => {
-							const finalArgs = buildFinalArgs();
-							setCommittedArgs(finalArgs);
-							setDecided("edit");
-							setIsEditing(false);
-							onDecision({
-								type: "edit",
-								edited_action: {
-									name: interruptData.action_requests[0].name,
-									args: finalArgs,
-								},
-							});
-						}}
-						disabled={!canApprove}
-					>
-						<CheckIcon />
-						Approve with Changes
-					</Button>
+						<Button
+							size="sm"
+							onClick={() => {
+								const finalArgs = buildFinalArgs();
+								setCommittedArgs(finalArgs);
+								setDecided("edit");
+								setIsEditing(false);
+								onDecision({
+									type: "edit",
+									edited_action: {
+										name: interruptData.action_requests[0].name,
+										args: finalArgs,
+									},
+								});
+							}}
+							disabled={!canApprove}
+						>
+							<CheckIcon />
+							Approve with Changes
+						</Button>
 						<Button
 							size="sm"
 							variant="outline"
@ -376,25 +378,25 @@ function ApprovalCard({
 				) : (
 					<>
 						{allowedDecisions.includes("approve") && (
-						<Button
-							size="sm"
-							onClick={() => {
-								const finalArgs = buildFinalArgs();
-								setCommittedArgs(finalArgs);
-								setDecided("approve");
-								onDecision({
-									type: "approve",
-									edited_action: {
-										name: interruptData.action_requests[0].name,
-										args: finalArgs,
-									},
-								});
-							}}
-							disabled={!canApprove}
-						>
-							<CheckIcon />
-							Approve
-						</Button>
+							<Button
+								size="sm"
+								onClick={() => {
+									const finalArgs = buildFinalArgs();
+									setCommittedArgs(finalArgs);
+									setDecided("approve");
+									onDecision({
+										type: "approve",
+										edited_action: {
+											name: interruptData.action_requests[0].name,
+											args: finalArgs,
+										},
+									});
+								}}
+								disabled={!canApprove}
+							>
+								<CheckIcon />
+								Approve
+							</Button>
 						)}
 						{canEdit && (
 							<Button size="sm" variant="outline" onClick={() => setIsEditing(true)}>
--- a/surfsense_web/components/tool-ui/google-drive/trash-file.tsx
+++ b/surfsense_web/components/tool-ui/google-drive/trash-file.tsx
@ -390,9 +390,7 @@ function WarningCard({ result }: { result: WarningResult }) {
 				</div>
 			</div>
 			<div className="space-y-2 px-4 py-3">
-				{result.message && (
-					<p className="text-sm text-muted-foreground">{result.message}</p>
-				)}
+				{result.message && <p className="text-sm text-muted-foreground">{result.message}</p>}
 				<p className="text-xs text-amber-600 dark:text-amber-500">{result.warning}</p>
 			</div>
 		</div>
--- a/surfsense_web/components/tool-ui/index.ts
+++ b/surfsense_web/components/tool-ui/index.ts
@ -78,6 +78,13 @@ export {
 	type SerializablePlan,
 	type TodoStatus,
 } from "./plan";
+export {
+	type ExecuteArgs,
+	ExecuteArgsSchema,
+	type ExecuteResult,
+	ExecuteResultSchema,
+	SandboxExecuteToolUI,
+} from "./sandbox-execute";
 export {
 	type ScrapeWebpageArgs,
 	ScrapeWebpageArgsSchema,
--- a/surfsense_web/components/tool-ui/sandbox-execute.tsx
+++ b/surfsense_web/components/tool-ui/sandbox-execute.tsx
@ -0,0 +1,420 @@
+"use client";
+
+import { makeAssistantToolUI } from "@assistant-ui/react";
+import {
+	AlertCircleIcon,
+	CheckCircle2Icon,
+	ChevronRightIcon,
+	DownloadIcon,
+	FileIcon,
+	Loader2Icon,
+	TerminalIcon,
+	XCircleIcon,
+} from "lucide-react";
+import { useCallback, useMemo, useState } from "react";
+import { z } from "zod";
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible";
+import { getBearerToken } from "@/lib/auth-utils";
+import { BACKEND_URL } from "@/lib/env-config";
+import { cn } from "@/lib/utils";
+
+// ============================================================================
+// Zod Schemas
+// ============================================================================
+
+const ExecuteArgsSchema = z.object({
+	command: z.string(),
+	timeout: z.number().nullish(),
+});
+
+const ExecuteResultSchema = z.object({
+	result: z.string().nullish(),
+	exit_code: z.number().nullish(),
+	output: z.string().nullish(),
+	error: z.string().nullish(),
+	status: z.string().nullish(),
+	thread_id: z.string().nullish(),
+});
+
+// ============================================================================
+// Types
+// ============================================================================
+
+type ExecuteArgs = z.infer<typeof ExecuteArgsSchema>;
+type ExecuteResult = z.infer<typeof ExecuteResultSchema>;
+
+interface SandboxFile {
+	path: string;
+	name: string;
+}
+
+interface ParsedOutput {
+	exitCode: number | null;
+	output: string;
+	displayOutput: string;
+	truncated: boolean;
+	isError: boolean;
+	files: SandboxFile[];
+}
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+const SANDBOX_FILE_RE = /^SANDBOX_FILE:\s*(.+)$/gm;
+
+function extractSandboxFiles(text: string): SandboxFile[] {
+	const files: SandboxFile[] = [];
+	let match: RegExpExecArray | null;
+	while ((match = SANDBOX_FILE_RE.exec(text)) !== null) {
+		const filePath = match[1].trim();
+		if (filePath) {
+			const name = filePath.includes("/") ? filePath.split("/").pop() || filePath : filePath;
+			files.push({ path: filePath, name });
+		}
+	}
+	SANDBOX_FILE_RE.lastIndex = 0;
+	return files;
+}
+
+function stripSandboxFileLines(text: string): string {
+	return text
+		.replace(/^SANDBOX_FILE:\s*.+$/gm, "")
+		.replace(/\n{3,}/g, "\n\n")
+		.trim();
+}
+
+function parseExecuteResult(result: ExecuteResult): ParsedOutput {
+	const raw = result.result || result.output || "";
+
+	if (result.error) {
+		return {
+			exitCode: null,
+			output: result.error,
+			displayOutput: result.error,
+			truncated: false,
+			isError: true,
+			files: [],
+		};
+	}
+
+	if (result.exit_code !== undefined && result.exit_code !== null) {
+		const files = extractSandboxFiles(raw);
+		const displayOutput = stripSandboxFileLines(raw);
+		return {
+			exitCode: result.exit_code,
+			output: raw,
+			displayOutput,
+			truncated: raw.includes("[Output was truncated"),
+			isError: result.exit_code !== 0,
+			files,
+		};
+	}
+
+	const exitMatch = raw.match(/^Exit code:\s*(\d+)/);
+	if (exitMatch) {
+		const exitCode = parseInt(exitMatch[1], 10);
+		const outputMatch = raw.match(/\nOutput:\n([\s\S]*)/);
+		const output = outputMatch ? outputMatch[1] : "";
+		const files = extractSandboxFiles(output);
+		const displayOutput = stripSandboxFileLines(output);
+		return {
+			exitCode,
+			output,
+			displayOutput,
+			truncated: raw.includes("[Output was truncated"),
+			isError: exitCode !== 0,
+			files,
+		};
+	}
+
+	if (raw.startsWith("Error:")) {
+		return {
+			exitCode: null,
+			output: raw,
+			displayOutput: raw,
+			truncated: false,
+			isError: true,
+			files: [],
+		};
+	}
+
+	const files = extractSandboxFiles(raw);
+	const displayOutput = stripSandboxFileLines(raw);
+	return { exitCode: null, output: raw, displayOutput, truncated: false, isError: false, files };
+}
+
+function truncateCommand(command: string, maxLen = 80): string {
+	if (command.length <= maxLen) return command;
+	return command.slice(0, maxLen) + "…";
+}
+
+// ============================================================================
+// Download helper
+// ============================================================================
+
+async function downloadSandboxFile(threadId: string, filePath: string, fileName: string) {
+	const token = getBearerToken();
+	const url = `${BACKEND_URL}/api/v1/threads/${threadId}/sandbox/download?path=${encodeURIComponent(filePath)}`;
+	const res = await fetch(url, {
+		headers: { Authorization: `Bearer ${token || ""}` },
+	});
+	if (!res.ok) {
+		throw new Error(`Download failed: ${res.statusText}`);
+	}
+	const blob = await res.blob();
+	const blobUrl = URL.createObjectURL(blob);
+	const a = document.createElement("a");
+	a.href = blobUrl;
+	a.download = fileName;
+	document.body.appendChild(a);
+	a.click();
+	a.remove();
+	URL.revokeObjectURL(blobUrl);
+}
+
+// ============================================================================
+// Sub-Components
+// ============================================================================
+
+function ExecuteLoading({ command }: { command: string }) {
+	return (
+		<div className="my-4 flex max-w-lg items-center gap-3 rounded-xl border border-border bg-card px-4 py-3">
+			<Loader2Icon className="size-4 shrink-0 animate-spin text-muted-foreground" />
+			<code className="truncate text-sm text-muted-foreground font-mono">
+				{truncateCommand(command)}
+			</code>
+		</div>
+	);
+}
+
+function ExecuteErrorState({ command, error }: { command: string; error: string }) {
+	return (
+		<div className="my-4 max-w-lg overflow-hidden rounded-xl border border-destructive/20 bg-destructive/5 p-4">
+			<div className="flex items-center gap-3">
+				<div className="flex size-9 shrink-0 items-center justify-center rounded-lg bg-destructive/10">
+					<AlertCircleIcon className="size-4 text-destructive" />
+				</div>
+				<div className="min-w-0 flex-1">
+					<p className="text-sm font-medium text-destructive">Execution failed</p>
+					<code className="mt-0.5 block truncate text-xs text-muted-foreground font-mono">
+						$ {command}
+					</code>
+					<p className="mt-1 text-xs text-muted-foreground">{error}</p>
+				</div>
+			</div>
+		</div>
+	);
+}
+
+function ExecuteCancelledState({ command }: { command: string }) {
+	return (
+		<div className="my-4 max-w-lg rounded-xl border border-muted p-4 text-muted-foreground">
+			<p className="flex items-center gap-2 font-mono text-sm">
+				<TerminalIcon className="size-4" />
+				<span className="line-through truncate">$ {command}</span>
+			</p>
+		</div>
+	);
+}
+
+function SandboxFileDownload({ file, threadId }: { file: SandboxFile; threadId: string }) {
+	const [downloading, setDownloading] = useState(false);
+	const [error, setError] = useState<string | null>(null);
+
+	const handleDownload = useCallback(async () => {
+		setDownloading(true);
+		setError(null);
+		try {
+			await downloadSandboxFile(threadId, file.path, file.name);
+		} catch (e) {
+			setError(e instanceof Error ? e.message : "Download failed");
+		} finally {
+			setDownloading(false);
+		}
+	}, [threadId, file.path, file.name]);
+
+	return (
+		<Button
+			variant="ghost"
+			size="sm"
+			className="h-8 gap-2 rounded-lg bg-zinc-800/60 hover:bg-zinc-700/60 text-zinc-200 text-xs font-mono px-3"
+			onClick={handleDownload}
+			disabled={downloading}
+		>
+			{downloading ? (
+				<Loader2Icon className="size-3.5 animate-spin" />
+			) : (
+				<DownloadIcon className="size-3.5" />
+			)}
+			<FileIcon className="size-3 text-zinc-400" />
+			<span className="truncate max-w-[200px]">{file.name}</span>
+			{error && <span className="text-destructive text-[10px] ml-1">{error}</span>}
+		</Button>
+	);
+}
+
+function ExecuteCompleted({
+	command,
+	parsed,
+	threadId,
+}: {
+	command: string;
+	parsed: ParsedOutput;
+	threadId: string | null;
+}) {
+	const [open, setOpen] = useState(false);
+	const isLongCommand = command.length > 80 || command.includes("\n");
+	const hasTextContent = parsed.displayOutput.trim().length > 0 || isLongCommand;
+	const hasFiles = parsed.files.length > 0 && !!threadId;
+	const hasContent = hasTextContent || hasFiles;
+
+	const exitBadge = useMemo(() => {
+		if (parsed.exitCode === null) return null;
+		const success = parsed.exitCode === 0;
+		return (
+			<Badge
+				variant={success ? "secondary" : "destructive"}
+				className={cn(
+					"ml-auto gap-1 text-[10px] px-1.5 py-0",
+					success &&
+						"bg-emerald-500/10 text-emerald-600 dark:text-emerald-400 border-emerald-500/20"
+				)}
+			>
+				{success ? <CheckCircle2Icon className="size-3" /> : <XCircleIcon className="size-3" />}
+				{parsed.exitCode}
+			</Badge>
+		);
+	}, [parsed.exitCode]);
+
+	return (
+		<div className="my-4 max-w-lg">
+			<Collapsible open={open} onOpenChange={setOpen}>
+				<CollapsibleTrigger
+					className={cn(
+						"flex w-full items-center gap-2 rounded-xl border bg-card px-4 py-2.5 text-left transition-colors hover:bg-accent/50",
+						open && "rounded-b-none border-b-0",
+						parsed.isError && "border-destructive/20"
+					)}
+					disabled={!hasContent}
+				>
+					<ChevronRightIcon
+						className={cn(
+							"size-3.5 shrink-0 text-muted-foreground transition-transform duration-200",
+							open && "rotate-90",
+							!hasContent && "invisible"
+						)}
+					/>
+					<TerminalIcon className="size-3.5 shrink-0 text-muted-foreground" />
+					<code className="min-w-0 flex-1 truncate text-sm font-mono">
+						{truncateCommand(command)}
+					</code>
+					{hasFiles && !open && (
+						<Badge
+							variant="outline"
+							className="gap-1 text-[10px] px-1.5 py-0 border-blue-500/30 text-blue-500"
+						>
+							<FileIcon className="size-2.5" />
+							{parsed.files.length}
+						</Badge>
+					)}
+					{exitBadge}
+				</CollapsibleTrigger>
+
+				<CollapsibleContent>
+					<div
+						className={cn(
+							"rounded-b-xl border border-t-0 bg-zinc-950 dark:bg-zinc-900/60 px-4 py-3 space-y-3",
+							parsed.isError && "border-destructive/20"
+						)}
+					>
+						{isLongCommand && (
+							<div>
+								<p className="mb-1.5 text-[10px] font-medium uppercase tracking-wider text-zinc-500">
+									Command
+								</p>
+								<pre className="max-h-60 overflow-auto whitespace-pre-wrap break-all rounded-md bg-zinc-900/80 dark:bg-zinc-800/40 px-3 py-2 text-xs font-mono text-emerald-400 leading-relaxed">
+									{command}
+								</pre>
+							</div>
+						)}
+						{parsed.displayOutput.trim().length > 0 && (
+							<div>
+								{(isLongCommand || hasFiles) && (
+									<p className="mb-1.5 text-[10px] font-medium uppercase tracking-wider text-zinc-500">
+										Output
+									</p>
+								)}
+								<pre className="max-h-80 overflow-auto whitespace-pre-wrap break-all text-xs font-mono text-zinc-300 leading-relaxed">
+									{parsed.displayOutput}
+								</pre>
+							</div>
+						)}
+						{parsed.truncated && (
+							<p className="text-[10px] text-zinc-500 italic">
+								Output was truncated due to size limits
+							</p>
+						)}
+						{hasFiles && threadId && (
+							<div>
+								<p className="mb-1.5 text-[10px] font-medium uppercase tracking-wider text-zinc-500">
+									Files
+								</p>
+								<div className="flex flex-wrap gap-2">
+									{parsed.files.map((file) => (
+										<SandboxFileDownload key={file.path} file={file} threadId={threadId} />
+									))}
+								</div>
+							</div>
+						)}
+					</div>
+				</CollapsibleContent>
+			</Collapsible>
+		</div>
+	);
+}
+
+// ============================================================================
+// Tool UI
+// ============================================================================
+
+export const SandboxExecuteToolUI = makeAssistantToolUI<ExecuteArgs, ExecuteResult>({
+	toolName: "execute",
+	render: function SandboxExecuteUI({ args, result, status }) {
+		const command = args.command || "…";
+
+		if (status.type === "running" || status.type === "requires-action") {
+			return <ExecuteLoading command={command} />;
+		}
+
+		if (status.type === "incomplete") {
+			if (status.reason === "cancelled") {
+				return <ExecuteCancelledState command={command} />;
+			}
+			if (status.reason === "error") {
+				return (
+					<ExecuteErrorState
+						command={command}
+						error={typeof status.error === "string" ? status.error : "An error occurred"}
+					/>
+				);
+			}
+		}
+
+		if (!result) {
+			return <ExecuteLoading command={command} />;
+		}
+
+		if (result.error && !result.result && !result.output) {
+			return <ExecuteErrorState command={command} error={result.error} />;
+		}
+
+		const parsed = parseExecuteResult(result);
+		const threadId = result.thread_id || null;
+		return <ExecuteCompleted command={command} parsed={parsed} threadId={threadId} />;
+	},
+});
+
+export { ExecuteArgsSchema, ExecuteResultSchema, type ExecuteArgs, type ExecuteResult };
--- a/surfsense_web/components/ui/expanded-gif-overlay.tsx
+++ b/surfsense_web/components/ui/expanded-gif-overlay.tsx
@ -40,7 +40,7 @@ function ExpandedGifOverlay({
 				className="max-h-[90vh] max-w-[90vw] cursor-pointer rounded-2xl shadow-2xl"
 			/>
 		</motion.div>,
-		document.body,
+		document.body
 	);
 }

--- a/surfsense_web/components/ui/hero-carousel.tsx
+++ b/surfsense_web/components/ui/hero-carousel.tsx
@ -18,8 +18,7 @@ const carouselItems = [
 	},
 	{
 		title: "Search & Citation",
-		description:
-			"Ask questions and get cited responses from your knowledge base.",
+		description: "Ask questions and get cited responses from your knowledge base.",
 		src: "/homepage/hero_tutorial/BSNCGif.gif",
 	},
 	{
@ -121,9 +120,7 @@ function HeroCarouselCard({
 						<h3 className="truncate text-base font-semibold text-neutral-900 sm:text-xl dark:text-white">
 							{title}
 						</h3>
-						<p className="text-sm text-neutral-500 dark:text-neutral-400">
-							{description}
-						</p>
+						<p className="text-sm text-neutral-500 dark:text-neutral-400">{description}</p>
 					</div>
 				</div>
 				<div
@ -141,11 +138,7 @@ function HeroCarouselCard({
 							className="w-full rounded-lg sm:rounded-xl"
 						/>
 					) : frozenFrame ? (
-						<img
-							src={frozenFrame}
-							alt={title}
-							className="w-full rounded-lg sm:rounded-xl"
-						/>
+						<img src={frozenFrame} alt={title} className="w-full rounded-lg sm:rounded-xl" />
 					) : (
 						<div className="aspect-video w-full rounded-lg bg-neutral-100 sm:rounded-xl dark:bg-neutral-800" />
 					)}
@ -174,7 +167,7 @@ function HeroCarousel() {
 			directionRef.current = newIndex >= activeIndex ? "forward" : "backward";
 			setActiveIndex(newIndex);
 		},
-		[activeIndex],
+		[activeIndex]
 	);

 	useEffect(() => {
@ -246,7 +239,7 @@ function HeroCarousel() {
 				blur: t * 6,
 			};
 		},
-		[activeIndex, cardWidth, baseOffset, stackGap],
+		[activeIndex, cardWidth, baseOffset, stackGap]
 	);

 	return (
@ -287,18 +280,18 @@ function HeroCarousel() {
 									transition={{ duration: 0.7, ease: [0.32, 0.72, 0, 1] }}
 								>
 									<motion.div
-									animate={{ filter: `blur(${style.blur}px)` }}
-									transition={{ duration: 0.7, ease: [0.32, 0.72, 0, 1] }}
-								>
-									<HeroCarouselCard
-										index={i}
-										title={item.title}
-										description={item.description}
-										src={item.src}
-										isActive={i === activeIndex}
-										onExpandedChange={setIsGifExpanded}
-									/>
-								</motion.div>
+										animate={{ filter: `blur(${style.blur}px)` }}
+										transition={{ duration: 0.7, ease: [0.32, 0.72, 0, 1] }}
+									>
+										<HeroCarouselCard
+											index={i}
+											title={item.title}
+											description={item.description}
+											src={item.src}
+											isActive={i === activeIndex}
+											onExpandedChange={setIsGifExpanded}
+										/>
+									</motion.div>
 									<motion.div
 										className="pointer-events-none absolute inset-0 rounded-2xl bg-black sm:rounded-3xl"
 										animate={{ opacity: style.overlayOpacity }}